From 3b2cc44750b199fe6f29776028f225fc42719a42 Mon Sep 17 00:00:00 2001 From: ZhouXSh <137194818@qq.com> Date: Thu, 18 Jul 2024 20:11:51 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9EB=E7=AB=99=E5=88=9B=E4=BD=9C?= =?UTF-8?q?=E8=80=85=EF=BC=88UP=E4=B8=BB=EF=BC=89=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=E7=88=AC=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/base_crawler.py | 2 +- media_platform/bilibili/core.py | 1 + schema/tables.sql | 19 +++++++++++ store/bilibili/__init__.py | 18 ++++++++++ store/bilibili/bilibili_store_impl.py | 43 ++++++++++++++++++++++++ store/bilibili/bilibili_store_sql.py | 47 +++++++++++++++++++++++++++ 6 files changed, 129 insertions(+), 1 deletion(-) diff --git a/base/base_crawler.py b/base/base_crawler.py index 2b48643..6c78492 100644 --- a/base/base_crawler.py +++ b/base/base_crawler.py @@ -48,7 +48,7 @@ class AbstractStore(ABC): # TODO support all platform # only xhs is supported, so @abstractmethod is commented - # @abstractmethod + @abstractmethod async def store_creator(self, creator: Dict): pass diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index 641a9d6..d5060f1 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -127,6 +127,7 @@ class BilibiliCrawler(AbstractCrawler): if video_item: video_id_list.append(video_item.get("View").get("aid")) await bilibili_store.update_bilibili_video(video_item) + await bilibili_store.update_up_info(video_item) await self.get_bilibili_video(video_item, semaphore) page += 1 await self.batch_get_video_comments(video_id_list) diff --git a/schema/tables.sql b/schema/tables.sql index 2e2825b..7c5d53e 100644 --- a/schema/tables.sql +++ b/schema/tables.sql @@ -46,6 +46,25 @@ CREATE TABLE `bilibili_video_comment` ( KEY `idx_bilibili_vi_video_i_f22873` (`video_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='B 站视频评论'; +-- ---------------------------- +-- Table structure for bilibili_up_info +-- ---------------------------- +DROP TABLE IF EXISTS `bilibili_up_info`; +CREATE TABLE `bilibili_up_info` ( + `id` int NOT NULL AUTO_INCREMENT COMMENT '自增ID', + `user_id` varchar(64) DEFAULT NULL COMMENT '用户ID', + `nickname` varchar(64) DEFAULT NULL COMMENT '用户昵称', + `avatar` varchar(255) DEFAULT NULL COMMENT '用户头像地址', + `add_ts` bigint NOT NULL COMMENT '记录添加时间戳', + `last_modify_ts` bigint NOT NULL COMMENT '记录最后修改时间戳', + `total_fans` bigint DEFAULT NULL COMMENT '粉丝数', + `total_liked` bigint DEFAULT NULL COMMENT '总获赞数', + `user_rank` int DEFAULT NULL COMMENT '用户等级', + `is_official` int DEFAULT NULL COMMENT '是否官号', + PRIMARY KEY (`id`), + KEY `idx_bilibili_vi_user_123456` (`user_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='B 站UP主信息'; + -- ---------------------------- -- Table structure for douyin_aweme -- ---------------------------- diff --git a/store/bilibili/__init__.py b/store/bilibili/__init__.py index 9099bf6..9ba4a21 100644 --- a/store/bilibili/__init__.py +++ b/store/bilibili/__init__.py @@ -53,6 +53,24 @@ async def update_bilibili_video(video_item: Dict): await BiliStoreFactory.create_store().store_content(content_item=save_content_item) +async def update_up_info(video_item: Dict): + video_item_card_list: Dict = video_item.get("Card") + video_item_card: Dict = video_item_card_list.get("card") + saver_up_info = { + "user_id": str(video_item_card.get("mid")), + "nickname": video_item_card.get("name"), + "avatar": video_item_card.get("face"), + "last_modify_ts": utils.get_current_timestamp(), + "total_fans": video_item_card.get("fans"), + "total_liked": video_item_card_list.get("like_num"), + "user_rank": video_item_card.get("level_info").get("current_level"), + "is_official": video_item_card.get("official_verify").get("type"), + } + utils.logger.info( + f"[store.bilibili.update_up_info] bilibili user_id:{video_item_card.get('mid')}") + await BiliStoreFactory.create_store().store_creator(creator=saver_up_info) + + async def batch_update_bilibili_video_comments(video_id: str, comments: List[Dict]): if not comments: return diff --git a/store/bilibili/bilibili_store_impl.py b/store/bilibili/bilibili_store_impl.py index 4a07dff..5669db3 100644 --- a/store/bilibili/bilibili_store_impl.py +++ b/store/bilibili/bilibili_store_impl.py @@ -85,6 +85,17 @@ class BiliCsvStoreImplement(AbstractStore): """ await self.save_data_to_csv(save_item=comment_item, store_type="comments") + async def store_creator(self, creator: Dict): + """ + Bilibili creator CSV storage implementation + Args: + creator: creator item dict + + Returns: + + """ + await self.save_data_to_csv(save_item=creator, store_type="creators") + class BiliDbStoreImplement(AbstractStore): async def store_content(self, content_item: Dict): @@ -129,6 +140,27 @@ class BiliDbStoreImplement(AbstractStore): else: await update_comment_by_comment_id(comment_id, comment_item=comment_item) + async def store_creator(self, creator: Dict): + """ + Bilibili creator DB storage implementation + Args: + creator: creator item dict + + Returns: + + """ + + from .bilibili_store_sql import (add_new_creator, + query_creator_by_creator_id, + update_creator_by_creator_id) + creator_id = creator.get("user_id") + creator_detail: Dict = await query_creator_by_creator_id(creator_id=creator_id) + if not creator_detail: + creator["add_ts"] = utils.get_current_timestamp() + await add_new_creator(creator) + else: + await update_creator_by_creator_id(creator_id,creator_item=creator) + class BiliJsonStoreImplement(AbstractStore): json_store_path: str = "data/bilibili/json" @@ -204,3 +236,14 @@ class BiliJsonStoreImplement(AbstractStore): """ await self.save_data_to_json(comment_item, "comments") + + async def store_creator(self, creator: Dict): + """ + creator JSON storage implementatio + Args: + creator: + + Returns: + + """ + await self.save_data_to_json(creator, "creators") diff --git a/store/bilibili/bilibili_store_sql.py b/store/bilibili/bilibili_store_sql.py index b521f29..00852ec 100644 --- a/store/bilibili/bilibili_store_sql.py +++ b/store/bilibili/bilibili_store_sql.py @@ -100,3 +100,50 @@ async def update_comment_by_comment_id(comment_id: str, comment_item: Dict) -> i async_db_conn: AsyncMysqlDB = media_crawler_db_var.get() effect_row: int = await async_db_conn.update_table("bilibili_video_comment", comment_item, "comment_id", comment_id) return effect_row + + +async def query_creator_by_creator_id(creator_id: str) -> Dict: + """ + 查询up主信息 + Args: + creator_id: + + Returns: + + """ + async_db_conn: AsyncMysqlDB = media_crawler_db_var.get() + sql: str = f"select * from bilibili_up_info where user_id = '{creator_id}'" + rows: List[Dict] = await async_db_conn.query(sql) + if len(rows) > 0: + return rows[0] + return dict() + + +async def add_new_creator(creator_item: Dict) -> int: + """ + 新增up主信息 + Args: + creator_item: + + Returns: + + """ + async_db_conn: AsyncMysqlDB = media_crawler_db_var.get() + last_row_id: int = await async_db_conn.item_to_table("bilibili_up_info", creator_item) + return last_row_id + + +async def update_creator_by_creator_id(creator_id: str, creator_item: Dict) -> int: + """ + 更新up主信息 + Args: + creator_id: + creator_item: + + Returns: + + """ + async_db_conn: AsyncMysqlDB = media_crawler_db_var.get() + effect_row: int = await async_db_conn.update_table("bilibili_up_info", creator_item, "user_id", creator_id) + return effect_row +