diff --git a/README.md b/README.md index 443dabe..ad3335b 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ | 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | -| B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | +| B 站 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | diff --git a/config/base_config.py b/config/base_config.py index b1a592c..7c1fbe2 100644 --- a/config/base_config.py +++ b/config/base_config.py @@ -96,3 +96,9 @@ DY_CREATOR_ID_LIST = [ "MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE", # ........................ ] + +# 指定bili创作者ID列表(sec_id) +BILI_CREATOR_ID_LIST = [ + "20813884", + # ........................ +] diff --git a/media_platform/bilibili/client.py b/media_platform/bilibili/client.py index c97f110..5c13e03 100644 --- a/media_platform/bilibili/client.py +++ b/media_platform/bilibili/client.py @@ -208,7 +208,6 @@ class BilibiliClient(AbstractApiClient): if not is_fetch_sub_comments: result.extend(comment_list) continue - # todo handle get sub comments return result async def get_video_all_level_two_comments(self, @@ -230,15 +229,15 @@ class BilibiliClient(AbstractApiClient): :return: """ - pn = 0 + pn = 1 while True: result = await self.get_video_level_two_comments( - video_id, level_one_comment_id, 0, ps, order_mode) + video_id, level_one_comment_id, pn, ps, order_mode) comment_list: List[Dict] = result.get("replies", []) if callback: # 如果有回调函数,就执行回调函数 await callback(video_id, comment_list) await asyncio.sleep(crawl_interval) - if (int(result["page"]["count"]) <= (pn+1) * ps): + if (int(result["page"]["count"]) <= pn * ps): break pn += 1 @@ -268,3 +267,21 @@ class BilibiliClient(AbstractApiClient): } result = await self.get(uri, post_data) return result + + async def get_creator_videos(self, creator_id: str, pn: int, ps: int = 30, order_mode: SearchOrderType = SearchOrderType.LAST_PUBLISH) -> Dict: + """get all videos for a creator + :param creator_id: 创作者 ID + :param pn: 页数 + :param ps: 一页视频数 + :param order_mode: 排序方式 + + :return: + """ + uri = "/x/space/wbi/arc/search" + post_data = { + "mid": creator_id, + "pn": pn, + "ps": ps, + "order": order_mode, + } + return await self.get(uri, post_data) diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index 220e308..c14391d 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -75,7 +75,10 @@ class BilibiliCrawler(AbstractCrawler): await self.search() elif config.CRAWLER_TYPE == "detail": # Get the information and comments of the specified post - await self.get_specified_videos() + await self.get_specified_videos(config.BILI_SPECIFIED_ID_LIST) + elif config.CRAWLER_TYPE == "creator": + for creator_id in config.BILI_CREATOR_ID_LIST: + await self.get_creator_videos(int(creator_id)) else: pass utils.logger.info( @@ -173,7 +176,25 @@ class BilibiliCrawler(AbstractCrawler): utils.logger.error( f"[BilibiliCrawler.get_comments] may be been blocked, err:{e}") - async def get_specified_videos(self): + async def get_creator_videos(self, creator_id: int): + """ + get videos for a creator + :return: + """ + ps = 30 + pn = 1 + video_bvids_list = [] + while True: + result = await self.bili_client.get_creator_videos(creator_id, pn, ps) + for video in result["list"]["vlist"]: + video_bvids_list.append(video["bvid"]) + if (int(result["page"]["count"]) <= pn * ps): + break + await asyncio.sleep(random.random()) + pn += 1 + await self.get_specified_videos(video_bvids_list) + + async def get_specified_videos(self, bvids_list: List[str]): """ get specified videos info :return: @@ -181,7 +202,7 @@ class BilibiliCrawler(AbstractCrawler): semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) task_list = [ self.get_video_info_task(aid=0, bvid=video_id, semaphore=semaphore) for video_id in - config.BILI_SPECIFIED_ID_LIST + bvids_list ] video_details = await asyncio.gather(*task_list) video_aids_list = [] diff --git a/schema/tables.sql b/schema/tables.sql index fa475e1..2e2825b 100644 --- a/schema/tables.sql +++ b/schema/tables.sql @@ -311,4 +311,7 @@ ADD COLUMN `parent_comment_id` VARCHAR(64) DEFAULT NULL COMMENT '父评论ID'; ALTER TABLE `douyin_aweme_comment` ADD COLUMN `parent_comment_id` VARCHAR(64) DEFAULT NULL COMMENT '父评论ID'; +ALTER TABLE `bilibili_video_comment` +ADD COLUMN `parent_comment_id` VARCHAR(64) DEFAULT NULL COMMENT '父评论ID'; + SET FOREIGN_KEY_CHECKS = 1;