fix: 修复爬取视频/帖子最大数设置值较低导致不爬取的问题
This commit is contained in:
parent
e4836847cd
commit
133f978477
|
@ -93,7 +93,7 @@ class BilibiliCrawler(AbstractCrawler):
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
utils.logger.info("[BilibiliCrawler.search] Begin search bilibli keywords")
|
utils.logger.info("[BilibiliCrawler.search] Begin search bilibli keywords")
|
||||||
bili_limit_count = 20 # bilibili limit page fixed value
|
bili_limit_count = min(20, max(1, config.CRAWLER_MAX_NOTES_COUNT)) # bilibili limit page fixed value
|
||||||
for keyword in config.KEYWORDS.split(","):
|
for keyword in config.KEYWORDS.split(","):
|
||||||
utils.logger.info(f"[BilibiliCrawler.search] Current search keyword: {keyword}")
|
utils.logger.info(f"[BilibiliCrawler.search] Current search keyword: {keyword}")
|
||||||
page = 1
|
page = 1
|
||||||
|
|
|
@ -84,7 +84,7 @@ class DouYinCrawler(AbstractCrawler):
|
||||||
for keyword in config.KEYWORDS.split(","):
|
for keyword in config.KEYWORDS.split(","):
|
||||||
utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}")
|
utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}")
|
||||||
aweme_list: List[str] = []
|
aweme_list: List[str] = []
|
||||||
dy_limit_count = 10
|
dy_limit_count = min(10, max(1, config.CRAWLER_MAX_NOTES_COUNT))
|
||||||
page = 0
|
page = 0
|
||||||
while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
|
while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -85,7 +85,7 @@ class KuaishouCrawler(AbstractCrawler):
|
||||||
|
|
||||||
async def search(self):
|
async def search(self):
|
||||||
utils.logger.info("[KuaishouCrawler.search] Begin search kuaishou keywords")
|
utils.logger.info("[KuaishouCrawler.search] Begin search kuaishou keywords")
|
||||||
ks_limit_count = 20 # kuaishou limit page fixed value
|
ks_limit_count = min(20, max(1, config.CRAWLER_MAX_NOTES_COUNT)) # kuaishou limit page fixed value
|
||||||
for keyword in config.KEYWORDS.split(","):
|
for keyword in config.KEYWORDS.split(","):
|
||||||
utils.logger.info(f"[KuaishouCrawler.search] Current search keyword: {keyword}")
|
utils.logger.info(f"[KuaishouCrawler.search] Current search keyword: {keyword}")
|
||||||
page = 1
|
page = 1
|
||||||
|
|
|
@ -104,7 +104,7 @@ class WeiboCrawler(AbstractCrawler):
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
utils.logger.info("[WeiboCrawler.search] Begin search weibo keywords")
|
utils.logger.info("[WeiboCrawler.search] Begin search weibo keywords")
|
||||||
weibo_limit_count = 10
|
weibo_limit_count = min(10, max(1, config.CRAWLER_MAX_NOTES_COUNT))
|
||||||
for keyword in config.KEYWORDS.split(","):
|
for keyword in config.KEYWORDS.split(","):
|
||||||
utils.logger.info(f"[WeiboCrawler.search] Current search keyword: {keyword}")
|
utils.logger.info(f"[WeiboCrawler.search] Current search keyword: {keyword}")
|
||||||
page = 1
|
page = 1
|
||||||
|
|
|
@ -96,7 +96,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||||
async def search(self) -> None:
|
async def search(self) -> None:
|
||||||
"""Search for notes and retrieve their comment information."""
|
"""Search for notes and retrieve their comment information."""
|
||||||
utils.logger.info("[XiaoHongShuCrawler.search] Begin search xiaohongshu keywords")
|
utils.logger.info("[XiaoHongShuCrawler.search] Begin search xiaohongshu keywords")
|
||||||
xhs_limit_count = 20 # xhs limit page fixed value
|
xhs_limit_count = min(20, max(1, config.CRAWLER_MAX_NOTES_COUNT)) # xhs limit page fixed value
|
||||||
for keyword in config.KEYWORDS.split(","):
|
for keyword in config.KEYWORDS.split(","):
|
||||||
utils.logger.info(f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}")
|
utils.logger.info(f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}")
|
||||||
page = 1
|
page = 1
|
||||||
|
|
Loading…
Reference in New Issue