fix: xhs note detail error
This commit is contained in:
parent
00b12ce482
commit
d6fb255bdf
|
@ -7,15 +7,29 @@ from playwright.async_api import BrowserContext, BrowserType
|
|||
class AbstractCrawler(ABC):
|
||||
@abstractmethod
|
||||
async def start(self):
|
||||
"""
|
||||
start crawler
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def search(self):
|
||||
"""
|
||||
search
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def launch_browser(self, chromium: BrowserType, playwright_proxy: Optional[Dict], user_agent: Optional[str],
|
||||
headless: bool = True) -> BrowserContext:
|
||||
"""
|
||||
launch browser
|
||||
:param chromium: chromium browser
|
||||
:param playwright_proxy: playwright proxy
|
||||
:param user_agent: user agent
|
||||
:param headless: headless mode
|
||||
:return: browser context
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
|
|
1
main.py
1
main.py
|
@ -46,6 +46,7 @@ async def main():
|
|||
await db.close()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
# asyncio.run(main())
|
||||
|
|
|
@ -262,7 +262,11 @@ class BilibiliCrawler(AbstractCrawler):
|
|||
return None
|
||||
|
||||
async def create_bilibili_client(self, httpx_proxy: Optional[str]) -> BilibiliClient:
|
||||
"""Create xhs client"""
|
||||
"""
|
||||
create bilibili client
|
||||
:param httpx_proxy: httpx proxy
|
||||
:return: bilibili client
|
||||
"""
|
||||
utils.logger.info(
|
||||
"[BilibiliCrawler.create_bilibili_client] Begin create bilibili API client ...")
|
||||
cookie_str, cookie_dict = utils.convert_cookies(await self.browser_context.cookies())
|
||||
|
@ -282,7 +286,11 @@ class BilibiliCrawler(AbstractCrawler):
|
|||
|
||||
@staticmethod
|
||||
def format_proxy_info(ip_proxy_info: IpInfoModel) -> Tuple[Optional[Dict], Optional[Dict]]:
|
||||
"""format proxy info for playwright and httpx"""
|
||||
"""
|
||||
format proxy info for playwright and httpx
|
||||
:param ip_proxy_info: ip proxy info
|
||||
:return: playwright proxy, httpx proxy
|
||||
"""
|
||||
playwright_proxy = {
|
||||
"server": f"{ip_proxy_info.protocol}{ip_proxy_info.ip}:{ip_proxy_info.port}",
|
||||
"username": ip_proxy_info.user,
|
||||
|
@ -300,7 +308,14 @@ class BilibiliCrawler(AbstractCrawler):
|
|||
user_agent: Optional[str],
|
||||
headless: bool = True
|
||||
) -> BrowserContext:
|
||||
"""Launch browser and create browser context"""
|
||||
"""
|
||||
launch browser and create browser context
|
||||
:param chromium: chromium browser
|
||||
:param playwright_proxy: playwright proxy
|
||||
:param user_agent: user agent
|
||||
:param headless: headless mode
|
||||
:return: browser context
|
||||
"""
|
||||
utils.logger.info(
|
||||
"[BilibiliCrawler.launch_browser] Begin create browser context ...")
|
||||
if config.SAVE_LOGIN_STATE:
|
||||
|
|
|
@ -220,7 +220,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
|||
"""Get note detail"""
|
||||
async with semaphore:
|
||||
try:
|
||||
note_detail: Dict = await self.xhs_client.get_note_by_id(note_id, xsec_source, xsec_token)
|
||||
note_detail: Dict = await self.xhs_client.get_note_by_id_from_html(note_id)
|
||||
if not note_detail:
|
||||
utils.logger.error(
|
||||
f"[XiaoHongShuCrawler.get_note_detail_async_task] Get note detail error, note_id: {note_id}")
|
||||
|
|
Loading…
Reference in New Issue