fix: 修复抖音关键词搜索bug
This commit is contained in:
parent
2ab14920a3
commit
04cbe549af
|
@ -126,7 +126,8 @@ class DOUYINClient(AbstractApiClient):
|
||||||
offset: int = 0,
|
offset: int = 0,
|
||||||
search_channel: SearchChannelType = SearchChannelType.GENERAL,
|
search_channel: SearchChannelType = SearchChannelType.GENERAL,
|
||||||
sort_type: SearchSortType = SearchSortType.GENERAL,
|
sort_type: SearchSortType = SearchSortType.GENERAL,
|
||||||
publish_time: PublishTimeType = PublishTimeType.UNLIMITED
|
publish_time: PublishTimeType = PublishTimeType.UNLIMITED,
|
||||||
|
search_id: str = ""
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
DouYin Web Search API
|
DouYin Web Search API
|
||||||
|
@ -135,6 +136,7 @@ class DOUYINClient(AbstractApiClient):
|
||||||
:param search_channel:
|
:param search_channel:
|
||||||
:param sort_type:
|
:param sort_type:
|
||||||
:param publish_time: ·
|
:param publish_time: ·
|
||||||
|
:param search_id: ·
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
query_params = {
|
query_params = {
|
||||||
|
@ -149,6 +151,7 @@ class DOUYINClient(AbstractApiClient):
|
||||||
'count': '15',
|
'count': '15',
|
||||||
'need_filter_settings': '1',
|
'need_filter_settings': '1',
|
||||||
'list_type': 'multi',
|
'list_type': 'multi',
|
||||||
|
'search_id': search_id,
|
||||||
}
|
}
|
||||||
if sort_type.value != SearchSortType.GENERAL.value or publish_time.value != PublishTimeType.UNLIMITED.value:
|
if sort_type.value != SearchSortType.GENERAL.value or publish_time.value != PublishTimeType.UNLIMITED.value:
|
||||||
query_params["filter_selected"] = json.dumps({
|
query_params["filter_selected"] = json.dumps({
|
||||||
|
|
|
@ -83,6 +83,7 @@ class DouYinCrawler(AbstractCrawler):
|
||||||
utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}")
|
utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}")
|
||||||
aweme_list: List[str] = []
|
aweme_list: List[str] = []
|
||||||
page = 0
|
page = 0
|
||||||
|
dy_search_id = ""
|
||||||
while (page - start_page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
|
while (page - start_page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
|
||||||
if page < start_page:
|
if page < start_page:
|
||||||
utils.logger.info(f"[DouYinCrawler.search] Skip {page}")
|
utils.logger.info(f"[DouYinCrawler.search] Skip {page}")
|
||||||
|
@ -92,7 +93,8 @@ class DouYinCrawler(AbstractCrawler):
|
||||||
utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page}")
|
utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page}")
|
||||||
posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword,
|
posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword,
|
||||||
offset=page * dy_limit_count - dy_limit_count,
|
offset=page * dy_limit_count - dy_limit_count,
|
||||||
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE)
|
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE),
|
||||||
|
search_id=dy_search_id
|
||||||
)
|
)
|
||||||
except DataFetchError:
|
except DataFetchError:
|
||||||
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
|
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
|
||||||
|
@ -103,7 +105,7 @@ class DouYinCrawler(AbstractCrawler):
|
||||||
utils.logger.error(
|
utils.logger.error(
|
||||||
f"[DouYinCrawler.search] search douyin keyword: {keyword} failed,账号也许被风控了。")
|
f"[DouYinCrawler.search] search douyin keyword: {keyword} failed,账号也许被风控了。")
|
||||||
break
|
break
|
||||||
|
dy_search_id = posts_res.get("extra", {}).get("logid", "")
|
||||||
for post_item in posts_res.get("data"):
|
for post_item in posts_res.get("data"):
|
||||||
try:
|
try:
|
||||||
aweme_info: Dict = post_item.get("aweme_info") or \
|
aweme_info: Dict = post_item.get("aweme_info") or \
|
||||||
|
|
Loading…
Reference in New Issue