新增B站创作者(UP主)信息爬取
This commit is contained in:
parent
548271e537
commit
3b2cc44750
|
@ -48,7 +48,7 @@ class AbstractStore(ABC):
|
|||
|
||||
# TODO support all platform
|
||||
# only xhs is supported, so @abstractmethod is commented
|
||||
# @abstractmethod
|
||||
@abstractmethod
|
||||
async def store_creator(self, creator: Dict):
|
||||
pass
|
||||
|
||||
|
|
|
@ -127,6 +127,7 @@ class BilibiliCrawler(AbstractCrawler):
|
|||
if video_item:
|
||||
video_id_list.append(video_item.get("View").get("aid"))
|
||||
await bilibili_store.update_bilibili_video(video_item)
|
||||
await bilibili_store.update_up_info(video_item)
|
||||
await self.get_bilibili_video(video_item, semaphore)
|
||||
page += 1
|
||||
await self.batch_get_video_comments(video_id_list)
|
||||
|
|
|
@ -46,6 +46,25 @@ CREATE TABLE `bilibili_video_comment` (
|
|||
KEY `idx_bilibili_vi_video_i_f22873` (`video_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='B 站视频评论';
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for bilibili_up_info
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `bilibili_up_info`;
|
||||
CREATE TABLE `bilibili_up_info` (
|
||||
`id` int NOT NULL AUTO_INCREMENT COMMENT '自增ID',
|
||||
`user_id` varchar(64) DEFAULT NULL COMMENT '用户ID',
|
||||
`nickname` varchar(64) DEFAULT NULL COMMENT '用户昵称',
|
||||
`avatar` varchar(255) DEFAULT NULL COMMENT '用户头像地址',
|
||||
`add_ts` bigint NOT NULL COMMENT '记录添加时间戳',
|
||||
`last_modify_ts` bigint NOT NULL COMMENT '记录最后修改时间戳',
|
||||
`total_fans` bigint DEFAULT NULL COMMENT '粉丝数',
|
||||
`total_liked` bigint DEFAULT NULL COMMENT '总获赞数',
|
||||
`user_rank` int DEFAULT NULL COMMENT '用户等级',
|
||||
`is_official` int DEFAULT NULL COMMENT '是否官号',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `idx_bilibili_vi_user_123456` (`user_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='B 站UP主信息';
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for douyin_aweme
|
||||
-- ----------------------------
|
||||
|
|
|
@ -53,6 +53,24 @@ async def update_bilibili_video(video_item: Dict):
|
|||
await BiliStoreFactory.create_store().store_content(content_item=save_content_item)
|
||||
|
||||
|
||||
async def update_up_info(video_item: Dict):
|
||||
video_item_card_list: Dict = video_item.get("Card")
|
||||
video_item_card: Dict = video_item_card_list.get("card")
|
||||
saver_up_info = {
|
||||
"user_id": str(video_item_card.get("mid")),
|
||||
"nickname": video_item_card.get("name"),
|
||||
"avatar": video_item_card.get("face"),
|
||||
"last_modify_ts": utils.get_current_timestamp(),
|
||||
"total_fans": video_item_card.get("fans"),
|
||||
"total_liked": video_item_card_list.get("like_num"),
|
||||
"user_rank": video_item_card.get("level_info").get("current_level"),
|
||||
"is_official": video_item_card.get("official_verify").get("type"),
|
||||
}
|
||||
utils.logger.info(
|
||||
f"[store.bilibili.update_up_info] bilibili user_id:{video_item_card.get('mid')}")
|
||||
await BiliStoreFactory.create_store().store_creator(creator=saver_up_info)
|
||||
|
||||
|
||||
async def batch_update_bilibili_video_comments(video_id: str, comments: List[Dict]):
|
||||
if not comments:
|
||||
return
|
||||
|
|
|
@ -85,6 +85,17 @@ class BiliCsvStoreImplement(AbstractStore):
|
|||
"""
|
||||
await self.save_data_to_csv(save_item=comment_item, store_type="comments")
|
||||
|
||||
async def store_creator(self, creator: Dict):
|
||||
"""
|
||||
Bilibili creator CSV storage implementation
|
||||
Args:
|
||||
creator: creator item dict
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_csv(save_item=creator, store_type="creators")
|
||||
|
||||
|
||||
class BiliDbStoreImplement(AbstractStore):
|
||||
async def store_content(self, content_item: Dict):
|
||||
|
@ -129,6 +140,27 @@ class BiliDbStoreImplement(AbstractStore):
|
|||
else:
|
||||
await update_comment_by_comment_id(comment_id, comment_item=comment_item)
|
||||
|
||||
async def store_creator(self, creator: Dict):
|
||||
"""
|
||||
Bilibili creator DB storage implementation
|
||||
Args:
|
||||
creator: creator item dict
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
|
||||
from .bilibili_store_sql import (add_new_creator,
|
||||
query_creator_by_creator_id,
|
||||
update_creator_by_creator_id)
|
||||
creator_id = creator.get("user_id")
|
||||
creator_detail: Dict = await query_creator_by_creator_id(creator_id=creator_id)
|
||||
if not creator_detail:
|
||||
creator["add_ts"] = utils.get_current_timestamp()
|
||||
await add_new_creator(creator)
|
||||
else:
|
||||
await update_creator_by_creator_id(creator_id,creator_item=creator)
|
||||
|
||||
|
||||
class BiliJsonStoreImplement(AbstractStore):
|
||||
json_store_path: str = "data/bilibili/json"
|
||||
|
@ -204,3 +236,14 @@ class BiliJsonStoreImplement(AbstractStore):
|
|||
|
||||
"""
|
||||
await self.save_data_to_json(comment_item, "comments")
|
||||
|
||||
async def store_creator(self, creator: Dict):
|
||||
"""
|
||||
creator JSON storage implementatio
|
||||
Args:
|
||||
creator:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(creator, "creators")
|
||||
|
|
|
@ -100,3 +100,50 @@ async def update_comment_by_comment_id(comment_id: str, comment_item: Dict) -> i
|
|||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
effect_row: int = await async_db_conn.update_table("bilibili_video_comment", comment_item, "comment_id", comment_id)
|
||||
return effect_row
|
||||
|
||||
|
||||
async def query_creator_by_creator_id(creator_id: str) -> Dict:
|
||||
"""
|
||||
查询up主信息
|
||||
Args:
|
||||
creator_id:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
sql: str = f"select * from bilibili_up_info where user_id = '{creator_id}'"
|
||||
rows: List[Dict] = await async_db_conn.query(sql)
|
||||
if len(rows) > 0:
|
||||
return rows[0]
|
||||
return dict()
|
||||
|
||||
|
||||
async def add_new_creator(creator_item: Dict) -> int:
|
||||
"""
|
||||
新增up主信息
|
||||
Args:
|
||||
creator_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
last_row_id: int = await async_db_conn.item_to_table("bilibili_up_info", creator_item)
|
||||
return last_row_id
|
||||
|
||||
|
||||
async def update_creator_by_creator_id(creator_id: str, creator_item: Dict) -> int:
|
||||
"""
|
||||
更新up主信息
|
||||
Args:
|
||||
creator_id:
|
||||
creator_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
effect_row: int = await async_db_conn.update_table("bilibili_up_info", creator_item, "user_id", creator_id)
|
||||
return effect_row
|
||||
|
||||
|
|
Loading…
Reference in New Issue