From 78c09c4ae11c07d4589dd52ecdee84e9e17ff593 Mon Sep 17 00:00:00 2001 From: liugangdao Date: Thu, 24 Oct 2024 13:57:27 +0800 Subject: [PATCH] =?UTF-8?q?fix:=E7=BF=BB=E9=A1=B5=E6=97=B6search=20id?= =?UTF-8?q?=E4=B8=8D=E5=8F=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/xhs/client.py | 3 ++- media_platform/xhs/core.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/media_platform/xhs/client.py b/media_platform/xhs/client.py index ee7f82e..4d83c0b 100644 --- a/media_platform/xhs/client.py +++ b/media_platform/xhs/client.py @@ -190,6 +190,7 @@ class XiaoHongShuClient(AbstractApiClient): async def get_note_by_keyword( self, keyword: str, + search_id: str = get_search_id(), page: int = 1, page_size: int = 20, sort: SearchSortType = SearchSortType.GENERAL, note_type: SearchNoteType = SearchNoteType.ALL @@ -211,7 +212,7 @@ class XiaoHongShuClient(AbstractApiClient): "keyword": keyword, "page": page, "page_size": page_size, - "search_id": get_search_id(), + "search_id": search_id, "sort": sort.value, "note_type": note_type.value } diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index ef1011d..6e1da30 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -31,7 +31,7 @@ from var import crawler_type_var, source_keyword_var from .client import XiaoHongShuClient from .exception import DataFetchError from .field import SearchSortType -from .help import parse_note_info_from_note_url +from .help import parse_note_info_from_note_url, get_search_id from .login import XiaoHongShuLogin @@ -112,6 +112,7 @@ class XiaoHongShuCrawler(AbstractCrawler): source_keyword_var.set(keyword) utils.logger.info(f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}") page = 1 + search_id = get_search_id() while (page - start_page + 1) * xhs_limit_count <= config.CRAWLER_MAX_NOTES_COUNT: if page < start_page: utils.logger.info(f"[XiaoHongShuCrawler.search] Skip page {page}") @@ -123,6 +124,7 @@ class XiaoHongShuCrawler(AbstractCrawler): note_id_list: List[str] = [] notes_res = await self.xhs_client.get_note_by_keyword( keyword=keyword, + search_id=search_id, page=page, sort=SearchSortType(config.SORT_TYPE) if config.SORT_TYPE != '' else SearchSortType.GENERAL, )