feat: xhs支持获取评论的点赞数量
This commit is contained in:
parent
ab7d8142af
commit
65699aa1cb
|
@ -442,9 +442,10 @@ class XiaoHongShuClient(AbstractApiClient):
|
||||||
}
|
}
|
||||||
return await self.post(uri, data=data, return_response=True)
|
return await self.post(uri, data=data, return_response=True)
|
||||||
|
|
||||||
|
@retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
|
||||||
async def get_note_by_id_from_html(self, note_id: str):
|
async def get_note_by_id_from_html(self, note_id: str):
|
||||||
"""
|
"""
|
||||||
通过解析网页版的笔记详情页HTML,获取笔记详情
|
通过解析网页版的笔记详情页HTML,获取笔记详情, 该接口可能会出现失败的情况,这里尝试重试3次
|
||||||
copy from https://github.com/ReaJason/xhs/blob/eb1c5a0213f6fbb592f0a2897ee552847c69ea2d/xhs/core.py#L217-L259
|
copy from https://github.com/ReaJason/xhs/blob/eb1c5a0213f6fbb592f0a2897ee552847c69ea2d/xhs/core.py#L217-L259
|
||||||
thanks for ReaJason
|
thanks for ReaJason
|
||||||
Args:
|
Args:
|
||||||
|
|
|
@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
from playwright.async_api import (BrowserContext, BrowserType, Page,
|
from playwright.async_api import (BrowserContext, BrowserType, Page,
|
||||||
async_playwright)
|
async_playwright)
|
||||||
|
from tenacity import RetryError
|
||||||
|
|
||||||
import config
|
import config
|
||||||
from base.base_crawler import AbstractCrawler
|
from base.base_crawler import AbstractCrawler
|
||||||
|
@ -197,6 +198,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||||
utils.logger.error(
|
utils.logger.error(
|
||||||
f"[XiaoHongShuCrawler.get_note_detail_from_html] have not fund note detail note_id:{note_id}, err: {ex}")
|
f"[XiaoHongShuCrawler.get_note_detail_from_html] have not fund note detail note_id:{note_id}, err: {ex}")
|
||||||
return {}
|
return {}
|
||||||
|
except RetryError as ex:
|
||||||
|
utils.logger.error(
|
||||||
|
f"[XiaoHongShuCrawler.get_note_detail_from_html] Retry error, note_id:{note_id}, err: {ex}")
|
||||||
|
|
||||||
get_note_detail_task_list = [
|
get_note_detail_task_list = [
|
||||||
get_note_detail_from_html_task(note_id=note_id, semaphore=asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)) for
|
get_note_detail_from_html_task(note_id=note_id, semaphore=asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)) for
|
||||||
|
|
|
@ -425,3 +425,7 @@ CREATE TABLE `weibo_creator`
|
||||||
`tag_list` longtext COMMENT '标签列表',
|
`tag_list` longtext COMMENT '标签列表',
|
||||||
PRIMARY KEY (`id`)
|
PRIMARY KEY (`id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='微博博主';
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='微博博主';
|
||||||
|
|
||||||
|
|
||||||
|
ALTER TABLE `xhs_note_comment`
|
||||||
|
ADD COLUMN `like_count` VARCHAR(64) DEFAULT NULL COMMENT '评论点赞数量';
|
|
@ -28,6 +28,14 @@ class XhsStoreFactory:
|
||||||
|
|
||||||
|
|
||||||
def get_video_url_arr(note_item: Dict) -> List:
|
def get_video_url_arr(note_item: Dict) -> List:
|
||||||
|
"""
|
||||||
|
获取视频url数组
|
||||||
|
Args:
|
||||||
|
note_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
if note_item.get('type') != 'video':
|
if note_item.get('type') != 'video':
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@ -47,6 +55,14 @@ def get_video_url_arr(note_item: Dict) -> List:
|
||||||
|
|
||||||
|
|
||||||
async def update_xhs_note(note_item: Dict):
|
async def update_xhs_note(note_item: Dict):
|
||||||
|
"""
|
||||||
|
更新小红书笔记
|
||||||
|
Args:
|
||||||
|
note_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
note_id = note_item.get("note_id")
|
note_id = note_item.get("note_id")
|
||||||
user_info = note_item.get("user", {})
|
user_info = note_item.get("user", {})
|
||||||
interact_info = note_item.get("interact_info", {})
|
interact_info = note_item.get("interact_info", {})
|
||||||
|
@ -86,6 +102,15 @@ async def update_xhs_note(note_item: Dict):
|
||||||
|
|
||||||
|
|
||||||
async def batch_update_xhs_note_comments(note_id: str, comments: List[Dict]):
|
async def batch_update_xhs_note_comments(note_id: str, comments: List[Dict]):
|
||||||
|
"""
|
||||||
|
批量更新小红书笔记评论
|
||||||
|
Args:
|
||||||
|
note_id:
|
||||||
|
comments:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
if not comments:
|
if not comments:
|
||||||
return
|
return
|
||||||
for comment_item in comments:
|
for comment_item in comments:
|
||||||
|
@ -93,6 +118,15 @@ async def batch_update_xhs_note_comments(note_id: str, comments: List[Dict]):
|
||||||
|
|
||||||
|
|
||||||
async def update_xhs_note_comment(note_id: str, comment_item: Dict):
|
async def update_xhs_note_comment(note_id: str, comment_item: Dict):
|
||||||
|
"""
|
||||||
|
更新小红书笔记评论
|
||||||
|
Args:
|
||||||
|
note_id:
|
||||||
|
comment_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
user_info = comment_item.get("user_info", {})
|
user_info = comment_item.get("user_info", {})
|
||||||
comment_id = comment_item.get("id")
|
comment_id = comment_item.get("id")
|
||||||
comment_pictures = [item.get("url_default", "") for item in comment_item.get("pictures", [])]
|
comment_pictures = [item.get("url_default", "") for item in comment_item.get("pictures", [])]
|
||||||
|
@ -110,12 +144,22 @@ async def update_xhs_note_comment(note_id: str, comment_item: Dict):
|
||||||
"pictures": ",".join(comment_pictures),
|
"pictures": ",".join(comment_pictures),
|
||||||
"parent_comment_id": target_comment.get("id", 0),
|
"parent_comment_id": target_comment.get("id", 0),
|
||||||
"last_modify_ts": utils.get_current_timestamp(),
|
"last_modify_ts": utils.get_current_timestamp(),
|
||||||
|
"like_count": comment_item.get("like_count", 0),
|
||||||
}
|
}
|
||||||
utils.logger.info(f"[store.xhs.update_xhs_note_comment] xhs note comment:{local_db_item}")
|
utils.logger.info(f"[store.xhs.update_xhs_note_comment] xhs note comment:{local_db_item}")
|
||||||
await XhsStoreFactory.create_store().store_comment(local_db_item)
|
await XhsStoreFactory.create_store().store_comment(local_db_item)
|
||||||
|
|
||||||
|
|
||||||
async def save_creator(user_id: str, creator: Dict):
|
async def save_creator(user_id: str, creator: Dict):
|
||||||
|
"""
|
||||||
|
保存小红书创作者
|
||||||
|
Args:
|
||||||
|
user_id:
|
||||||
|
creator:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
user_info = creator.get('basicInfo', {})
|
user_info = creator.get('basicInfo', {})
|
||||||
|
|
||||||
follows = 0
|
follows = 0
|
||||||
|
@ -148,5 +192,16 @@ async def save_creator(user_id: str, creator: Dict):
|
||||||
|
|
||||||
|
|
||||||
async def update_xhs_note_image(note_id, pic_content, extension_file_name):
|
async def update_xhs_note_image(note_id, pic_content, extension_file_name):
|
||||||
|
"""
|
||||||
|
更新小红书笔
|
||||||
|
Args:
|
||||||
|
note_id:
|
||||||
|
pic_content:
|
||||||
|
extension_file_name:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
await XiaoHongShuImage().store_image(
|
await XiaoHongShuImage().store_image(
|
||||||
{"notice_id": note_id, "pic_content": pic_content, "extension_file_name": extension_file_name})
|
{"notice_id": note_id, "pic_content": pic_content, "extension_file_name": extension_file_name})
|
||||||
|
|
Loading…
Reference in New Issue