Merge pull request #278 from ZuWard/main

抖音二级评论
This commit is contained in:
程序员阿江-Relakkes 2024-06-07 13:04:18 +08:00 committed by GitHub
commit c8dbc0bf3d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 45 additions and 8 deletions

View File

@ -18,12 +18,12 @@
> 下面不支持的项目相关的代码架构已经搭建好只需要实现对应的方法即可欢迎大家提交PR
| 平台 | 关键词搜索 | 指定帖子ID爬取 | 二级评论 | 指定创作者主页 | 登录态缓存 | IP代理池 |
|-----|-------|----------|------|--------|-------|-------|
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| 抖音 | ✅ | ✅ | | ✅ | ✅ | ✅ |
| 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
| B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
| 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|-----|-------|----------|-----|--------|-------|-------|
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| 抖音 | ✅ | ✅ | | ✅ | ✅ | ✅ |
| 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
| B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
| 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
## 使用方法

View File

@ -165,6 +165,23 @@ class DOUYINClient(AbstractApiClient):
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
return await self.get(uri, params)
async def get_sub_comments(self, comment_id: str, cursor: int = 0):
"""
获取子评论
"""
uri = "/aweme/v1/web/comment/list/reply/"
params = {
'comment_id': comment_id,
"cursor": cursor,
"count": 20,
"item_type": 0,
}
keywords = request_keyword_var.get()
referer_url = "https://www.douyin.com/search/" + keywords + '?aid=3a3cec5a-9e27-4040-b6aa-ef548c2c1138&publish_time=0&sort_type=0&source=search_history&type=general'
headers = copy.copy(self.headers)
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
return await self.get(uri, params)
async def get_aweme_all_comments(
self,
aweme_id: str,
@ -197,7 +214,27 @@ class DOUYINClient(AbstractApiClient):
await asyncio.sleep(crawl_interval)
if not is_fetch_sub_comments:
continue
# todo fetch sub comments
# 获取二级评论
for comment in comments:
reply_comment_total = comment.get("reply_comment_total")
if reply_comment_total > 0:
comment_id = comment.get("cid")
sub_comments_has_more = 1
sub_comments_cursor = 0
while sub_comments_has_more:
sub_comments_res = await self.get_sub_comments(comment_id, sub_comments_cursor)
sub_comments_has_more = sub_comments_res.get("has_more", 0)
sub_comments_cursor = sub_comments_res.get("cursor", 0)
sub_comments = sub_comments_res.get("comments", [])
if not sub_comments:
continue
result.extend(sub_comments)
if callback: # 如果有回调函数,就执行回调函数
await callback(aweme_id, sub_comments)
await asyncio.sleep(crawl_interval)
return result
async def get_user_info(self, sec_user_id: str):

View File

@ -177,8 +177,8 @@ class DouYinCrawler(AbstractCrawler):
await self.dy_client.get_aweme_all_comments(
aweme_id=aweme_id,
crawl_interval=random.random(),
is_fetch_sub_comments=config.ENABLE_GET_SUB_COMMENTS,
callback=douyin_store.batch_update_dy_aweme_comments
)
utils.logger.info(
f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} comments have all been obtained and filtered ...")