refactor: 优化代码-变量名

This commit is contained in:
Relakkes 2023-11-18 15:53:10 +08:00
parent ecf9a5e893
commit 098923d74d
3 changed files with 5 additions and 5 deletions

View File

@ -77,7 +77,7 @@
# A: 在config/base_config.py 中 KEYWORDS 参数用于控制需要爬去的关键词
# Q: 可以指定帖子爬去吗?
# A在config/base_config.py 中 SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表
# A在config/base_config.py 中 XHS_SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表
# Q: 刚开始能爬取数据,过一段时间就是失效了?
# A出现这种情况多半是由于你的账号触发了平台风控机制了请勿大规模对平台进行爬虫影响平台。

View File

@ -27,8 +27,8 @@ CRAWLER_MAX_NOTES_COUNT = 20
MAX_CONCURRENCY_NUM = 10
# specified note id list
SPECIFIED_ID_LIST = [
# xhs specified note id list
XHS_SPECIFIED_ID_LIST = [
"6422c2750000000027000d88",
"64ca1b73000000000b028dd2",
"630d5b85000000001203ab41",

View File

@ -119,13 +119,13 @@ class XiaoHongShuCrawler(AbstractCrawler):
"""Get the information and comments of the specified post"""
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
task_list = [
self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.SPECIFIED_ID_LIST
self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.XHS_SPECIFIED_ID_LIST
]
note_details = await asyncio.gather(*task_list)
for note_detail in note_details:
if note_detail is not None:
await xhs_model.update_xhs_note(note_detail)
await self.batch_get_note_comments(config.SPECIFIED_ID_LIST)
await self.batch_get_note_comments(config.XHS_SPECIFIED_ID_LIST)
async def get_note_detail(self, note_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]: