refactor: 优化代码-变量名
This commit is contained in:
parent
ecf9a5e893
commit
098923d74d
|
@ -77,7 +77,7 @@
|
||||||
# A: 在config/base_config.py 中 KEYWORDS 参数用于控制需要爬去的关键词
|
# A: 在config/base_config.py 中 KEYWORDS 参数用于控制需要爬去的关键词
|
||||||
|
|
||||||
# Q: 可以指定帖子爬去吗?
|
# Q: 可以指定帖子爬去吗?
|
||||||
# A:在config/base_config.py 中 SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表
|
# A:在config/base_config.py 中 XHS_SPECIFIED_ID_LIST 参数用于控制需要指定爬去的帖子ID列表
|
||||||
|
|
||||||
# Q: 刚开始能爬取数据,过一段时间就是失效了?
|
# Q: 刚开始能爬取数据,过一段时间就是失效了?
|
||||||
# A:出现这种情况多半是由于你的账号触发了平台风控机制了,❗️❗️请勿大规模对平台进行爬虫,影响平台。
|
# A:出现这种情况多半是由于你的账号触发了平台风控机制了,❗️❗️请勿大规模对平台进行爬虫,影响平台。
|
||||||
|
|
|
@ -27,8 +27,8 @@ CRAWLER_MAX_NOTES_COUNT = 20
|
||||||
MAX_CONCURRENCY_NUM = 10
|
MAX_CONCURRENCY_NUM = 10
|
||||||
|
|
||||||
|
|
||||||
# specified note id list
|
# xhs specified note id list
|
||||||
SPECIFIED_ID_LIST = [
|
XHS_SPECIFIED_ID_LIST = [
|
||||||
"6422c2750000000027000d88",
|
"6422c2750000000027000d88",
|
||||||
"64ca1b73000000000b028dd2",
|
"64ca1b73000000000b028dd2",
|
||||||
"630d5b85000000001203ab41",
|
"630d5b85000000001203ab41",
|
||||||
|
|
|
@ -119,13 +119,13 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||||
"""Get the information and comments of the specified post"""
|
"""Get the information and comments of the specified post"""
|
||||||
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
||||||
task_list = [
|
task_list = [
|
||||||
self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.SPECIFIED_ID_LIST
|
self.get_note_detail(note_id=note_id, semaphore=semaphore) for note_id in config.XHS_SPECIFIED_ID_LIST
|
||||||
]
|
]
|
||||||
note_details = await asyncio.gather(*task_list)
|
note_details = await asyncio.gather(*task_list)
|
||||||
for note_detail in note_details:
|
for note_detail in note_details:
|
||||||
if note_detail is not None:
|
if note_detail is not None:
|
||||||
await xhs_model.update_xhs_note(note_detail)
|
await xhs_model.update_xhs_note(note_detail)
|
||||||
await self.batch_get_note_comments(config.SPECIFIED_ID_LIST)
|
await self.batch_get_note_comments(config.XHS_SPECIFIED_ID_LIST)
|
||||||
|
|
||||||
|
|
||||||
async def get_note_detail(self, note_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]:
|
async def get_note_detail(self, note_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]:
|
||||||
|
|
Loading…
Reference in New Issue