抖音二级评论
This commit is contained in:
parent
5e145c31b9
commit
0ba68809a5
12
README.md
12
README.md
|
@ -18,12 +18,12 @@
|
|||
> 下面不支持的项目,相关的代码架构已经搭建好,只需要实现对应的方法即可,欢迎大家提交PR
|
||||
|
||||
| 平台 | 关键词搜索 | 指定帖子ID爬取 | 二级评论 | 指定创作者主页 | 登录态缓存 | IP代理池 |
|
||||
|-----|-------|----------|------|--------|-------|-------|
|
||||
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| 抖音 | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ |
|
||||
| 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
||||
| B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
||||
| 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
||||
|-----|-------|----------|-----|--------|-------|-------|
|
||||
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
||||
| B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
||||
| 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
||||
|
||||
|
||||
## 使用方法
|
||||
|
|
|
@ -165,6 +165,23 @@ class DOUYINClient(AbstractApiClient):
|
|||
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
|
||||
return await self.get(uri, params)
|
||||
|
||||
async def get_sub_comments(self, comment_id: str, cursor: int = 0):
|
||||
"""
|
||||
获取子评论
|
||||
"""
|
||||
uri = "/aweme/v1/web/comment/list/reply/"
|
||||
params = {
|
||||
'comment_id': comment_id,
|
||||
"cursor": cursor,
|
||||
"count": 20,
|
||||
"item_type": 0,
|
||||
}
|
||||
keywords = request_keyword_var.get()
|
||||
referer_url = "https://www.douyin.com/search/" + keywords + '?aid=3a3cec5a-9e27-4040-b6aa-ef548c2c1138&publish_time=0&sort_type=0&source=search_history&type=general'
|
||||
headers = copy.copy(self.headers)
|
||||
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
|
||||
return await self.get(uri, params)
|
||||
|
||||
async def get_aweme_all_comments(
|
||||
self,
|
||||
aweme_id: str,
|
||||
|
@ -197,7 +214,27 @@ class DOUYINClient(AbstractApiClient):
|
|||
await asyncio.sleep(crawl_interval)
|
||||
if not is_fetch_sub_comments:
|
||||
continue
|
||||
# todo fetch sub comments
|
||||
# 获取二级评论
|
||||
for comment in comments:
|
||||
reply_comment_total = comment.get("reply_comment_total")
|
||||
|
||||
if reply_comment_total > 0:
|
||||
comment_id = comment.get("cid")
|
||||
sub_comments_has_more = 1
|
||||
sub_comments_cursor = 0
|
||||
|
||||
while sub_comments_has_more:
|
||||
sub_comments_res = await self.get_sub_comments(comment_id, sub_comments_cursor)
|
||||
sub_comments_has_more = sub_comments_res.get("has_more", 0)
|
||||
sub_comments_cursor = sub_comments_res.get("cursor", 0)
|
||||
sub_comments = sub_comments_res.get("comments", [])
|
||||
|
||||
if not sub_comments:
|
||||
continue
|
||||
result.extend(sub_comments)
|
||||
if callback: # 如果有回调函数,就执行回调函数
|
||||
await callback(aweme_id, sub_comments)
|
||||
await asyncio.sleep(crawl_interval)
|
||||
return result
|
||||
|
||||
async def get_user_info(self, sec_user_id: str):
|
||||
|
|
|
@ -177,8 +177,8 @@ class DouYinCrawler(AbstractCrawler):
|
|||
await self.dy_client.get_aweme_all_comments(
|
||||
aweme_id=aweme_id,
|
||||
crawl_interval=random.random(),
|
||||
is_fetch_sub_comments=config.ENABLE_GET_SUB_COMMENTS,
|
||||
callback=douyin_store.batch_update_dy_aweme_comments
|
||||
|
||||
)
|
||||
utils.logger.info(
|
||||
f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} comments have all been obtained and filtered ...")
|
||||
|
|
Loading…
Reference in New Issue