feat: B站搜索接口增加发布日期筛选

This commit is contained in:
Relakkes 2024-10-17 15:11:25 +08:00
parent fa1932e105
commit aa0f920369
3 changed files with 12 additions and 4 deletions

View File

@ -37,7 +37,7 @@ USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
START_PAGE = 1 START_PAGE = 1
# 爬取视频/帖子的数量控制 # 爬取视频/帖子的数量控制
CRAWLER_MAX_NOTES_COUNT = 100 CRAWLER_MAX_NOTES_COUNT = 200
# 并发爬虫数量控制 # 并发爬虫数量控制
MAX_CONCURRENCY_NUM = 1 MAX_CONCURRENCY_NUM = 1
@ -46,7 +46,7 @@ MAX_CONCURRENCY_NUM = 1
ENABLE_GET_IMAGES = False ENABLE_GET_IMAGES = False
# 是否开启爬评论模式, 默认开启爬评论 # 是否开启爬评论模式, 默认开启爬评论
ENABLE_GET_COMMENTS = True ENABLE_GET_COMMENTS = False
# 是否开启爬二级评论模式, 默认不开启爬二级评论 # 是否开启爬二级评论模式, 默认不开启爬二级评论
# 老版本项目使用了 db, 则需参考 schema/tables.sql line 287 增加表字段 # 老版本项目使用了 db, 则需参考 schema/tables.sql line 287 增加表字段

View File

@ -116,13 +116,17 @@ class BilibiliClient(AbstractApiClient):
self.cookie_dict = cookie_dict self.cookie_dict = cookie_dict
async def search_video_by_keyword(self, keyword: str, page: int = 1, page_size: int = 20, async def search_video_by_keyword(self, keyword: str, page: int = 1, page_size: int = 20,
order: SearchOrderType = SearchOrderType.DEFAULT): order: SearchOrderType = SearchOrderType.DEFAULT,
pubtime_begin_s: int = 0, pubtime_end_s: int = 0) -> Dict:
""" """
KuaiShou web search api KuaiShou web search api
:param keyword: 搜索关键词 :param keyword: 搜索关键词
:param page: 分页参数具体第几页 :param page: 分页参数具体第几页
:param page_size: 每一页参数的数量 :param page_size: 每一页参数的数量
:param order: 搜索结果排序默认位综合排序 :param order: 搜索结果排序默认位综合排序
:param pubtime_begin_s: 发布时间开始时间戳
:param pubtime_end_s: 发布时间结束时间戳
:return: :return:
""" """
uri = "/x/web-interface/wbi/search/type" uri = "/x/web-interface/wbi/search/type"
@ -131,7 +135,9 @@ class BilibiliClient(AbstractApiClient):
"keyword": keyword, "keyword": keyword,
"page": page, "page": page,
"page_size": page_size, "page_size": page_size,
"order": order.value "order": order.value,
"pubtime_begin": pubtime_begin_s,
"pubtime_end": pubtime_end_s
} }
return await self.get(uri, post_data) return await self.get(uri, post_data)

View File

@ -114,6 +114,8 @@ class BilibiliCrawler(AbstractCrawler):
page=page, page=page,
page_size=bili_limit_count, page_size=bili_limit_count,
order=SearchOrderType.DEFAULT, order=SearchOrderType.DEFAULT,
pubtime_begin_s=0, # 作品发布日期起始时间戳
pubtime_end_s=0 # 作品发布日期结束日期时间戳
) )
video_list: List[Dict] = videos_res.get("result") video_list: List[Dict] = videos_res.get("result")