feat: 微博二维码登录done

This commit is contained in:
Relakkes 2023-12-30 18:54:21 +08:00
parent 27a2041929
commit 38d6f10bf0
6 changed files with 69 additions and 25 deletions

View File

@ -23,8 +23,8 @@
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| B 站 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| 微博 | ✅ | | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| B 站 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| 微博 | ✅ | | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
## 使用方法

View File

@ -14,8 +14,8 @@ IP_PROXY_POOL_COUNT = 2
# 重试时间
RETRY_INTERVAL = 60 * 30 # 30 minutes
# 无头浏览器的标识True:开启 False 关闭(会打开一个浏览器
HEADLESS = False
# 设置为True不会打开浏览器无头浏览器设置False会打开一个浏览器小红书如果一直扫码登录不通过打开浏览器手动过一下滑动验证码
HEADLESS = True
# 是否保存登录状态
SAVE_LOGIN_STATE = True

View File

@ -69,9 +69,12 @@ class WeiboClient:
utils.logger.info("[WeiboClient.pong] Begin pong weibo...")
ping_flag = False
try:
pass
uri = "/api/config"
resp_data: Dict = await self.request(method="GET", url=f"{self._host}{uri}", headers=self.headers)
if resp_data.get("login"):
ping_flag = True
except Exception as e:
utils.logger.error(f"[BilibiliClient.pong] Pong weibo failed: {e}, and try to login again...")
utils.logger.error(f"[WeiboClient.pong] Pong weibo failed: {e}, and try to login again...")
ping_flag = False
return ping_flag

View File

@ -7,9 +7,8 @@
import asyncio
import os
import random
import time
from asyncio import Task
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Tuple
from playwright.async_api import (BrowserContext, BrowserType, Page,
async_playwright)
@ -19,7 +18,7 @@ from base.base_crawler import AbstractCrawler
from models import weibo
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
from tools import utils
from var import comment_tasks_var, crawler_type_var
from var import crawler_type_var
from .client import WeiboClient
from .exception import DataFetchError
@ -37,8 +36,8 @@ class WeiboCrawler(AbstractCrawler):
browser_context: BrowserContext
def __init__(self):
self.index_url = "https://m.weibo.cn"
self.user_agent = utils.get_mobile_user_agent()
self.index_url = "https://www.weibo.com"
self.user_agent = utils.get_user_agent()
def init_config(self, platform: str, login_type: str, crawler_type: str):
self.platform = platform
@ -215,7 +214,7 @@ class WeiboCrawler(AbstractCrawler):
weibo_client_obj = WeiboClient(
proxies=httpx_proxy,
headers={
"User-Agent": self.user_agent,
"User-Agent": utils.get_mobile_user_agent(),
"Cookie": cookie_str,
"Origin": "https://m.weibo.cn",
"Referer": "https://m.weibo.cn",

View File

@ -32,7 +32,7 @@ class WeiboLogin(AbstractLogin):
async def begin(self):
"""Start login weibo"""
utils.logger.info("[WeiboLogin.begin] Begin login Bilibili ...")
utils.logger.info("[WeiboLogin.begin] Begin login weibo ...")
if self.login_type == "qrcode":
await self.login_by_qrcode()
elif self.login_type == "phone":
@ -44,7 +44,7 @@ class WeiboLogin(AbstractLogin):
"[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
async def check_login_state(self) -> bool:
async def check_login_state(self, no_logged_in_session: str) -> bool:
"""
Check if the current login status is successful and return True otherwise return False
retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
@ -52,22 +52,45 @@ class WeiboLogin(AbstractLogin):
"""
current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie)
if cookie_dict.get("SESSDATA", "") or cookie_dict.get("DedeUserID"):
current_web_session = cookie_dict.get("WBPSESS")
if current_web_session != no_logged_in_session:
return True
return False
async def popup_login_dialog(self):
"""If the login dialog box does not pop up automatically, we will manually click the login button"""
dialog_selector = "xpath=//div[@class='woo-modal-main']"
try:
# check dialog box is auto popup and wait for 10 seconds
await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 10)
except Exception as e:
utils.logger.error(
f"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, error: {e}")
utils.logger.info(
"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, we will manually click the login button")
# 向下滚动1000像素
await self.context_page.mouse.wheel(0,500)
await asyncio.sleep(2)
try:
# click login button
login_button_ele = self.context_page.locator(
"xpath=//a[text()='登录']"
)
await login_button_ele.click()
await asyncio.sleep(0.5)
except Exception as e:
utils.logger.info(f"[WeiboLogin.popup_login_dialog] manually click the login button faield maybe login dialog Appear{e}")
async def login_by_qrcode(self):
"""login weibo website and keep webdriver login state"""
utils.logger.info("[WeiboLogin.login_by_qrcode] Begin login weibo by qrcode ...")
# click login button
login_button_ele = self.context_page.locator(
"xpath=//div[@class='right-entry__outside go-login-btn']//div"
)
await login_button_ele.click()
await self.popup_login_dialog()
# find login qrcode
qrcode_img_selector = "//div[@class='login-scan-box']//img"
qrcode_img_selector = "//div[@class='woo-modal-main']//img"
base64_qrcode_img = await utils.find_login_qrcode(
self.context_page,
selector=qrcode_img_selector
@ -81,8 +104,14 @@ class WeiboLogin(AbstractLogin):
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
utils.logger.info(f"[WeiboLogin.login_by_qrcode] Waiting for scan code login, remaining time is 20s")
# get not logged session
current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie)
no_logged_in_session = cookie_dict.get("WBPSESS")
try:
await self.check_login_state()
await self.check_login_state(no_logged_in_session)
except RetryError:
utils.logger.info("[WeiboLogin.login_by_qrcode] Login weibo failed by qrcode login method ...")
sys.exit()

View File

@ -9,9 +9,12 @@ import re
from io import BytesIO
from typing import Dict, List, Optional, Tuple
import httpx
from PIL import Image, ImageDraw
from playwright.async_api import Cookie, Page
from . import utils
async def find_login_qrcode(page: Page, selector: str) -> str:
"""find login qrcode image from target selector"""
@ -19,8 +22,17 @@ async def find_login_qrcode(page: Page, selector: str) -> str:
elements = await page.wait_for_selector(
selector=selector,
)
login_qrcode_img = await elements.get_property("src") # type: ignore
return str(login_qrcode_img)
login_qrcode_img = str(await elements.get_property("src")) # type: ignore
if "http://" in login_qrcode_img or "https://" in login_qrcode_img:
async with httpx.AsyncClient(follow_redirects=True) as client:
utils.logger.info(f"[find_login_qrcode] get qrcode by url:{login_qrcode_img}")
resp = await client.get(login_qrcode_img, headers={"User-Agent": get_user_agent()})
if resp.status_code == 200:
image_data = resp.content
base64_image = base64.b64encode(image_data).decode('utf-8')
return base64_image
raise Exception(f"fetch login image url failed, response message:{resp.text}")
return login_qrcode_img
except Exception as e:
print(e)
@ -29,7 +41,8 @@ async def find_login_qrcode(page: Page, selector: str) -> str:
def show_qrcode(qr_code) -> None: # type: ignore
"""parse base64 encode qrcode image and show it"""
qr_code = qr_code.split(",")[1]
if "," in qr_code:
qr_code = qr_code.split(",")[1]
qr_code = base64.b64decode(qr_code)
image = Image.open(BytesIO(qr_code))