feat: logger record current search page

This commit is contained in:
Relakkes 2024-06-24 22:24:51 +08:00
parent a0e5a29af8
commit d3eeccbaac
8 changed files with 6 additions and 4 deletions

View File

@ -87,7 +87,6 @@
- 知识星球:沉淀高质量常见问题、最佳实践文档、多年编程+爬虫经验分享,提供付费知识星球服务,主动提问,作者会定期回答问题 - 知识星球:沉淀高质量常见问题、最佳实践文档、多年编程+爬虫经验分享,提供付费知识星球服务,主动提问,作者会定期回答问题
<p> <p>
<img alt="xingqiu" src="https://nm.zizhi1.com/static/img/8e1312d1f52f2e0ff436ea7196b4e27b.15555424244122T1.webp" style="width: auto;height: 400px" > <img alt="xingqiu" src="https://nm.zizhi1.com/static/img/8e1312d1f52f2e0ff436ea7196b4e27b.15555424244122T1.webp" style="width: auto;height: 400px" >
<img alt="xingqiu_yhq" src="https://nm.zizhi1.com/static/img/c14da73f0dd9351e7169a36368d5037b.yhq.webp" style="width: auto;height: 400px" >
</p> </p>
星球精选文章: 星球精选文章:
@ -95,6 +94,7 @@
- [【独创】使用Playwright低成本获取某书X-s参数流程分析当年的回忆录](https://articles.zsxq.com/id_u4lcrvqakuc7.html) - [【独创】使用Playwright低成本获取某书X-s参数流程分析当年的回忆录](https://articles.zsxq.com/id_u4lcrvqakuc7.html)
- [ MediaCrawler-基于抽象类设计重构项目缓存](https://articles.zsxq.com/id_4ju73oxewt9j.html) - [ MediaCrawler-基于抽象类设计重构项目缓存](https://articles.zsxq.com/id_4ju73oxewt9j.html)
- [ 手把手带你撸一个自己的IP代理池](https://articles.zsxq.com/id_38fza371ladm.html) - [ 手把手带你撸一个自己的IP代理池](https://articles.zsxq.com/id_38fza371ladm.html)
- 每天 1 快钱订阅我的知识服务

View File

@ -106,6 +106,7 @@ class BilibiliCrawler(AbstractCrawler):
page += 1 page += 1
continue continue
utils.logger.info(f"[BilibiliCrawler.search] search bilibili keyword: {keyword}, page: {page}")
video_id_list: List[str] = [] video_id_list: List[str] = []
videos_res = await self.bili_client.search_video_by_keyword( videos_res = await self.bili_client.search_video_by_keyword(
keyword=keyword, keyword=keyword,
@ -126,7 +127,6 @@ class BilibiliCrawler(AbstractCrawler):
if video_item: if video_item:
video_id_list.append(video_item.get("View").get("aid")) video_id_list.append(video_item.get("View").get("aid"))
await bilibili_store.update_bilibili_video(video_item) await bilibili_store.update_bilibili_video(video_item)
page += 1 page += 1
await self.batch_get_video_comments(video_id_list) await self.batch_get_video_comments(video_id_list)

View File

@ -90,6 +90,7 @@ class DouYinCrawler(AbstractCrawler):
page += 1 page += 1
continue continue
try: try:
utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page}")
posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword, posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword,
offset=page * dy_limit_count - dy_limit_count, offset=page * dy_limit_count - dy_limit_count,
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE) publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE)

View File

@ -92,7 +92,7 @@ class KuaishouCrawler(AbstractCrawler):
utils.logger.info(f"[KuaishouCrawler.search] Skip page: {page}") utils.logger.info(f"[KuaishouCrawler.search] Skip page: {page}")
page += 1 page += 1
continue continue
utils.logger.info(f"[KuaishouCrawler.search] search kuaishou keyword: {keyword}, page: {page}")
video_id_list: List[str] = [] video_id_list: List[str] = []
videos_res = await self.ks_client.search_info_by_keyword( videos_res = await self.ks_client.search_info_by_keyword(
keyword=keyword, keyword=keyword,

View File

@ -108,7 +108,7 @@ class WeiboCrawler(AbstractCrawler):
utils.logger.info(f"[WeiboCrawler.search] Skip page: {page}") utils.logger.info(f"[WeiboCrawler.search] Skip page: {page}")
page += 1 page += 1
continue continue
utils.logger.info(f"[WeiboCrawler.search] search weibo keyword: {keyword}, page: {page}")
search_res = await self.wb_client.get_note_by_keyword( search_res = await self.wb_client.get_note_by_keyword(
keyword=keyword, keyword=keyword,
page=page, page=page,

View File

@ -102,6 +102,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
continue continue
try: try:
utils.logger.info(f"[XiaoHongShuCrawler.search] search xhs keyword: {keyword}, page: {page}")
note_id_list: List[str] = [] note_id_list: List[str] = []
notes_res = await self.xhs_client.get_note_by_keyword( notes_res = await self.xhs_client.get_note_by_keyword(
keyword=keyword, keyword=keyword,

Binary file not shown.

Before

Width:  |  Height:  |  Size: 170 KiB

After

Width:  |  Height:  |  Size: 171 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 115 KiB