refactor: 优化代码-变量名
This commit is contained in:
parent
ecf9a5e893
commit
098923d74d
|
@ -77,7 +77,7 @@
|
|||
# A: 在config/base_config.py 中 KEYWORDS 参数用于控制需要爬去的关键词
|
||||
|
||||
# Q: 可以指定帖子爬去吗?
|
||||
# A:在config/base_config.py 中 SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表
|
||||
# A:在config/base_config.py 中 XHS_SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表
|
||||
|
||||
# Q: 刚开始能爬取数据,过一段时间就是失效了?
|
||||
# A:出现这种情况多半是由于你的账号触发了平台风控机制了,❗️❗️请勿大规模对平台进行爬虫,影响平台。
|
||||
|
|
|
@ -27,8 +27,8 @@ CRAWLER_MAX_NOTES_COUNT = 20
|
|||
MAX_CONCURRENCY_NUM = 10
|
||||
|
||||
|
||||
# specified note id list
|
||||
SPECIFIED_ID_LIST = [
|
||||
# xhs specified note id list
|
||||
XHS_SPECIFIED_ID_LIST = [
|
||||
"6422c2750000000027000d88",
|
||||
"64ca1b73000000000b028dd2",
|
||||
"630d5b85000000001203ab41",
|
||||
|
|
|
@ -119,13 +119,13 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
|||
"""Get the information and comments of the specified post"""
|
||||
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
||||
task_list = [
|
||||
self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.SPECIFIED_ID_LIST
|
||||
self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.XHS_SPECIFIED_ID_LIST
|
||||
]
|
||||
note_details = await asyncio.gather(*task_list)
|
||||
for note_detail in note_details:
|
||||
if note_detail is not None:
|
||||
await xhs_model.update_xhs_note(note_detail)
|
||||
await self.batch_get_note_comments(config.SPECIFIED_ID_LIST)
|
||||
await self.batch_get_note_comments(config.XHS_SPECIFIED_ID_LIST)
|
||||
|
||||
|
||||
async def get_note_detail(self, note_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]:
|
||||
|
|
Loading…
Reference in New Issue