diff --git a/README.md b/README.md index 99becb5..c5e2612 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ # A: 在config/base_config.py 中 KEYWORDS 参数用于控制需要爬去的关键词 # Q: 可以指定帖子爬去吗? -# A:在config/base_config.py 中 SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表 +# A:在config/base_config.py 中 XHS_SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表 # Q: 刚开始能爬取数据,过一段时间就是失效了? # A:出现这种情况多半是由于你的账号触发了平台风控机制了,❗️❗️请勿大规模对平台进行爬虫,影响平台。 diff --git a/config/base_config.py b/config/base_config.py index 318c58d..38f662f 100644 --- a/config/base_config.py +++ b/config/base_config.py @@ -27,8 +27,8 @@ CRAWLER_MAX_NOTES_COUNT = 20 MAX_CONCURRENCY_NUM = 10 -# specified note id list -SPECIFIED_ID_LIST = [ +# xhs specified note id list +XHS_SPECIFIED_ID_LIST = [ "6422c2750000000027000d88", "64ca1b73000000000b028dd2", "630d5b85000000001203ab41", diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 87e4c03..ce210fe 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -119,13 +119,13 @@ class XiaoHongShuCrawler(AbstractCrawler): """Get the information and comments of the specified post""" semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) task_list = [ - self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.SPECIFIED_ID_LIST + self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.XHS_SPECIFIED_ID_LIST ] note_details = await asyncio.gather(*task_list) for note_detail in note_details: if note_detail is not None: await xhs_model.update_xhs_note(note_detail) - await self.batch_get_note_comments(config.SPECIFIED_ID_LIST) + await self.batch_get_note_comments(config.XHS_SPECIFIED_ID_LIST) async def get_note_detail(self, note_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]: