From 098923d74da2bc9d054995a56499e50879885652 Mon Sep 17 00:00:00 2001 From: Relakkes Date: Sat, 18 Nov 2023 15:53:10 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20=E4=BC=98=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81-=E5=8F=98=E9=87=8F=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- config/base_config.py | 4 ++-- media_platform/xhs/core.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 99becb5..c5e2612 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ # A: 在config/base_config.py 中 KEYWORDS 参数用于控制需要爬去的关键词 # Q: 可以指定帖子爬去吗? -# A:在config/base_config.py 中 SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表 +# A:在config/base_config.py 中 XHS_SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表 # Q: 刚开始能爬取数据,过一段时间就是失效了? # A:出现这种情况多半是由于你的账号触发了平台风控机制了,❗️❗️请勿大规模对平台进行爬虫,影响平台。 diff --git a/config/base_config.py b/config/base_config.py index 318c58d..38f662f 100644 --- a/config/base_config.py +++ b/config/base_config.py @@ -27,8 +27,8 @@ CRAWLER_MAX_NOTES_COUNT = 20 MAX_CONCURRENCY_NUM = 10 -# specified note id list -SPECIFIED_ID_LIST = [ +# xhs specified note id list +XHS_SPECIFIED_ID_LIST = [ "6422c2750000000027000d88", "64ca1b73000000000b028dd2", "630d5b85000000001203ab41", diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 87e4c03..ce210fe 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -119,13 +119,13 @@ class XiaoHongShuCrawler(AbstractCrawler): """Get the information and comments of the specified post""" semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) task_list = [ - self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.SPECIFIED_ID_LIST + self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.XHS_SPECIFIED_ID_LIST ] note_details = await asyncio.gather(*task_list) for note_detail in note_details: if note_detail is not None: await xhs_model.update_xhs_note(note_detail) - await self.batch_get_note_comments(config.SPECIFIED_ID_LIST) + await self.batch_get_note_comments(config.XHS_SPECIFIED_ID_LIST) async def get_note_detail(self, note_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]: