fix: 修复微博PC端登录后COOKIE在手机端无法使用的bug

This commit is contained in:
Relakkes 2024-01-06 19:18:07 +08:00
parent fe073801f8
commit 4de14ad6a8
3 changed files with 20 additions and 8 deletions

View File

@ -7,7 +7,7 @@ import asyncio
import copy
import json
import re
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, Optional
from urllib.parse import urlencode
import httpx
@ -73,6 +73,8 @@ class WeiboClient:
resp_data: Dict = await self.request(method="GET", url=f"{self._host}{uri}", headers=self.headers)
if resp_data.get("login"):
ping_flag = True
else:
utils.logger.error(f"[WeiboClient.pong] cookie may be invalid and again login...")
except Exception as e:
utils.logger.error(f"[WeiboClient.pong] Pong weibo failed: {e}, and try to login again...")
ping_flag = False

View File

@ -37,7 +37,9 @@ class WeiboCrawler(AbstractCrawler):
def __init__(self):
self.index_url = "https://www.weibo.com"
self.mobile_index_url = "https://m.weibo.cn"
self.user_agent = utils.get_user_agent()
self.mobile_user_agent = utils.get_mobile_user_agent()
def init_config(self, platform: str, login_type: str, crawler_type: str):
self.platform = platform
@ -57,13 +59,13 @@ class WeiboCrawler(AbstractCrawler):
self.browser_context = await self.launch_browser(
chromium,
None,
self.user_agent,
self.mobile_user_agent,
headless=config.HEADLESS
)
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
self.context_page = await self.browser_context.new_page()
await self.context_page.goto(self.index_url)
await self.context_page.goto(self.mobile_index_url)
# Create a client to interact with the xiaohongshu website.
self.wb_client = await self.create_weibo_client(httpx_proxy_format)
@ -75,7 +77,14 @@ class WeiboCrawler(AbstractCrawler):
context_page=self.context_page,
cookie_str=config.COOKIES
)
await self.context_page.goto(self.index_url)
await asyncio.sleep(1)
await login_obj.begin()
# 登录成功后重定向到手机端的网站再更新手机端登录成功的cookie
utils.logger.info("[WeiboCrawler.start] redirect weibo mobile homepage and update cookies on mobile platform")
await self.context_page.goto(self.mobile_index_url)
await asyncio.sleep(2)
await self.wb_client.update_cookies(browser_context=self.browser_context)
crawler_type_var.set(self.crawler_type)
@ -183,7 +192,7 @@ class WeiboCrawler(AbstractCrawler):
# Download comments
all_comments = await self.wb_client.get_note_all_comments(
note_id=note_id,
crawl_interval=random.random(),
crawl_interval=random.randint(1,10), # 微博对API的限流比较严重所以延时提高一些
)
# Filter comments by keyword

View File

@ -43,6 +43,7 @@ class WeiboLogin(AbstractLogin):
raise ValueError(
"[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
async def check_login_state(self, no_logged_in_session: str) -> bool:
"""
@ -61,8 +62,8 @@ class WeiboLogin(AbstractLogin):
"""If the login dialog box does not pop up automatically, we will manually click the login button"""
dialog_selector = "xpath=//div[@class='woo-modal-main']"
try:
# check dialog box is auto popup and wait for 10 seconds
await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 10)
# check dialog box is auto popup and wait for 4 seconds
await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 4)
except Exception as e:
utils.logger.error(
f"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, error: {e}")
@ -71,12 +72,12 @@ class WeiboLogin(AbstractLogin):
# 向下滚动1000像素
await self.context_page.mouse.wheel(0,500)
await asyncio.sleep(2)
await asyncio.sleep(0.5)
try:
# click login button
login_button_ele = self.context_page.locator(
"xpath=//a[text()='登录']"
"xpath=//a[text()='登录']",
)
await login_button_ele.click()
await asyncio.sleep(0.5)