Merge pull request #206 from leantli/fix/max_notes_count

fix: 修复爬取视频/帖子最大数设置值较低导致不爬取的问题
This commit is contained in:
程序员阿江-Relakkes 2024-04-04 00:46:42 +08:00 committed by GitHub
commit 208978b88f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 13 additions and 3 deletions

View File

@ -93,7 +93,9 @@ class BilibiliCrawler(AbstractCrawler):
:return: :return:
""" """
utils.logger.info("[BilibiliCrawler.search] Begin search bilibli keywords") utils.logger.info("[BilibiliCrawler.search] Begin search bilibli keywords")
bili_limit_count = 20 # bilibili limit page fixed value bili_limit_count =20 # bilibili limit page fixed value
if config.CRAWLER_MAX_NOTES_COUNT < bili_limit_count:
config.CRAWLER_MAX_NOTES_COUNT = bili_limit_count
for keyword in config.KEYWORDS.split(","): for keyword in config.KEYWORDS.split(","):
utils.logger.info(f"[BilibiliCrawler.search] Current search keyword: {keyword}") utils.logger.info(f"[BilibiliCrawler.search] Current search keyword: {keyword}")
page = 1 page = 1

View File

@ -81,10 +81,12 @@ class DouYinCrawler(AbstractCrawler):
async def search(self) -> None: async def search(self) -> None:
utils.logger.info("[DouYinCrawler.search] Begin search douyin keywords") utils.logger.info("[DouYinCrawler.search] Begin search douyin keywords")
dy_limit_count = 10 # douyin limit page fixed value
if config.CRAWLER_MAX_NOTES_COUNT < dy_limit_count:
config.CRAWLER_MAX_NOTES_COUNT = dy_limit_count
for keyword in config.KEYWORDS.split(","): for keyword in config.KEYWORDS.split(","):
utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}") utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}")
aweme_list: List[str] = [] aweme_list: List[str] = []
dy_limit_count = 10
page = 0 page = 0
while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT: while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
try: try:

View File

@ -86,6 +86,8 @@ class KuaishouCrawler(AbstractCrawler):
async def search(self): async def search(self):
utils.logger.info("[KuaishouCrawler.search] Begin search kuaishou keywords") utils.logger.info("[KuaishouCrawler.search] Begin search kuaishou keywords")
ks_limit_count = 20 # kuaishou limit page fixed value ks_limit_count = 20 # kuaishou limit page fixed value
if config.CRAWLER_MAX_NOTES_COUNT < ks_limit_count:
config.CRAWLER_MAX_NOTES_COUNT = ks_limit_count
for keyword in config.KEYWORDS.split(","): for keyword in config.KEYWORDS.split(","):
utils.logger.info(f"[KuaishouCrawler.search] Current search keyword: {keyword}") utils.logger.info(f"[KuaishouCrawler.search] Current search keyword: {keyword}")
page = 1 page = 1

View File

@ -104,7 +104,9 @@ class WeiboCrawler(AbstractCrawler):
:return: :return:
""" """
utils.logger.info("[WeiboCrawler.search] Begin search weibo keywords") utils.logger.info("[WeiboCrawler.search] Begin search weibo keywords")
weibo_limit_count = 10 weibo_limit_count = 10 # weibo limit page fixed value
if config.CRAWLER_MAX_NOTES_COUNT < weibo_limit_count:
config.CRAWLER_MAX_NOTES_COUNT = weibo_limit_count
for keyword in config.KEYWORDS.split(","): for keyword in config.KEYWORDS.split(","):
utils.logger.info(f"[WeiboCrawler.search] Current search keyword: {keyword}") utils.logger.info(f"[WeiboCrawler.search] Current search keyword: {keyword}")
page = 1 page = 1

View File

@ -97,6 +97,8 @@ class XiaoHongShuCrawler(AbstractCrawler):
"""Search for notes and retrieve their comment information.""" """Search for notes and retrieve their comment information."""
utils.logger.info("[XiaoHongShuCrawler.search] Begin search xiaohongshu keywords") utils.logger.info("[XiaoHongShuCrawler.search] Begin search xiaohongshu keywords")
xhs_limit_count = 20 # xhs limit page fixed value xhs_limit_count = 20 # xhs limit page fixed value
if config.CRAWLER_MAX_NOTES_COUNT < xhs_limit_count:
config.CRAWLER_MAX_NOTES_COUNT = xhs_limit_count
for keyword in config.KEYWORDS.split(","): for keyword in config.KEYWORDS.split(","):
utils.logger.info(f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}") utils.logger.info(f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}")
page = 1 page = 1