fix: 修复抖音关键词搜索bug

2024-08-20 03:09:42 +08:00 · 2024-08-20 03:09:42 +08:00 · 04cbe549af
parent 2ab14920a3
commit 04cbe549af
2 changed files with 8 additions and 3 deletions
--- a/media_platform/douyin/client.py
+++ b/media_platform/douyin/client.py
@ -126,7 +126,8 @@ class DOUYINClient(AbstractApiClient):
            offset: int = 0,
            search_channel: SearchChannelType = SearchChannelType.GENERAL,
            sort_type: SearchSortType = SearchSortType.GENERAL,
-            publish_time: PublishTimeType = PublishTimeType.UNLIMITED
+            publish_time: PublishTimeType = PublishTimeType.UNLIMITED,
            search_id: str = ""
    ):
        """
        DouYin Web Search API
@ -135,6 +136,7 @@ class DOUYINClient(AbstractApiClient):
        :param search_channel:
        :param sort_type:
        :param publish_time: ·
        :param search_id: ·
        :return:
        """
        query_params = {
@ -149,6 +151,7 @@ class DOUYINClient(AbstractApiClient):
            'count': '15',
            'need_filter_settings': '1',
            'list_type': 'multi',
            'search_id': search_id,
        }
        if sort_type.value != SearchSortType.GENERAL.value or publish_time.value != PublishTimeType.UNLIMITED.value:
            query_params["filter_selected"] = json.dumps({
--- a/media_platform/douyin/core.py
+++ b/media_platform/douyin/core.py
@ -83,6 +83,7 @@ class DouYinCrawler(AbstractCrawler):
            utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}")
            aweme_list: List[str] = []
            page = 0
            dy_search_id = ""
            while (page - start_page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                if page < start_page:
                    utils.logger.info(f"[DouYinCrawler.search] Skip {page}")
@ -92,7 +93,8 @@ class DouYinCrawler(AbstractCrawler):
                    utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page}")
                    posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword,
                                                                            offset=page * dy_limit_count - dy_limit_count,
-                                                                            publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE)
+                                                                            publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE),
                                                                            search_id=dy_search_id
                                                                            )
                except DataFetchError:
                    utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
@ -103,7 +105,7 @@ class DouYinCrawler(AbstractCrawler):
                    utils.logger.error(
                        f"[DouYinCrawler.search] search douyin keyword: {keyword} failed，账号也许被风控了。")
                    break
-
+                dy_search_id = posts_res.get("extra", {}).get("logid", "")
                for post_item in posts_res.get("data"):
                    try:
                        aweme_info: Dict = post_item.get("aweme_info") or \