diff --git a/config/base_config.py b/config/base_config.py index efccb96..8f203c9 100644 --- a/config/base_config.py +++ b/config/base_config.py @@ -37,7 +37,7 @@ USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name START_PAGE = 1 # 爬取视频/帖子的数量控制 -CRAWLER_MAX_NOTES_COUNT = 100 +CRAWLER_MAX_NOTES_COUNT = 200 # 并发爬虫数量控制 MAX_CONCURRENCY_NUM = 1 @@ -46,7 +46,7 @@ MAX_CONCURRENCY_NUM = 1 ENABLE_GET_IMAGES = False # 是否开启爬评论模式, 默认开启爬评论 -ENABLE_GET_COMMENTS = True +ENABLE_GET_COMMENTS = False # 是否开启爬二级评论模式, 默认不开启爬二级评论 # 老版本项目使用了 db, 则需参考 schema/tables.sql line 287 增加表字段 diff --git a/media_platform/bilibili/client.py b/media_platform/bilibili/client.py index 76fd520..4eec2d9 100644 --- a/media_platform/bilibili/client.py +++ b/media_platform/bilibili/client.py @@ -116,13 +116,17 @@ class BilibiliClient(AbstractApiClient): self.cookie_dict = cookie_dict async def search_video_by_keyword(self, keyword: str, page: int = 1, page_size: int = 20, - order: SearchOrderType = SearchOrderType.DEFAULT): + order: SearchOrderType = SearchOrderType.DEFAULT, + pubtime_begin_s: int = 0, pubtime_end_s: int = 0) -> Dict: + """ KuaiShou web search api :param keyword: 搜索关键词 :param page: 分页参数具体第几页 :param page_size: 每一页参数的数量 :param order: 搜索结果排序,默认位综合排序 + :param pubtime_begin_s: 发布时间开始时间戳 + :param pubtime_end_s: 发布时间结束时间戳 :return: """ uri = "/x/web-interface/wbi/search/type" @@ -131,7 +135,9 @@ class BilibiliClient(AbstractApiClient): "keyword": keyword, "page": page, "page_size": page_size, - "order": order.value + "order": order.value, + "pubtime_begin": pubtime_begin_s, + "pubtime_end": pubtime_end_s } return await self.get(uri, post_data) diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index d26bd92..6368246 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -114,6 +114,8 @@ class BilibiliCrawler(AbstractCrawler): page=page, page_size=bili_limit_count, order=SearchOrderType.DEFAULT, + pubtime_begin_s=0, # 作品发布日期起始时间戳 + pubtime_end_s=0 # 作品发布日期结束日期时间戳 ) video_list: List[Dict] = videos_res.get("result")