feat: B站二维码、Cookie登录实现

This commit is contained in:
Relakkes 2023-12-04 00:02:00 +08:00
parent a90b411e68
commit 94b5030ef0
3 changed files with 72 additions and 11 deletions

View File

@ -4,8 +4,8 @@
# 仓库描述
**小红书爬虫****抖音爬虫** **快手爬虫**...。
目前能抓取小红书、抖音、快手的视频、图片、评论、点赞、转发等信息。
**小红书爬虫****抖音爬虫** **快手爬虫** **B站爬虫**...。
目前能抓取小红书、抖音、快手、B站的视频、图片、评论、点赞、转发等信息。
原理:利用[playwright](https://playwright.dev/)搭桥保留登录成功后的上下文浏览器环境通过执行JS表达式获取一些加密参数
通过使用此方式免去了复现核心加密JS代码逆向难度大大降低。
@ -21,11 +21,11 @@
## 功能列表
| 平台 | Cookie 登录 | 二维码登录 | 手机号登录 | 关键词搜索 | 指定视频/帖子 ID 爬取 | 登录状态缓存 | 数据保存 | IP 代理池 | 滑块验证码 |
|:---:|:---------:|:-----:|:-----:|:-----:|:-------------:|:------:|:----:|:------:|:-----:|
| 小红书 | | ✅ | ✅ | | ✅ | | | ✅ | ✕ |
| 抖音 | | ✅ | ✅ | | ✅ | | | ✅ | ✅ |
| 快手 | | ✅ | ✕ | | ✅ | | | ✅ | ✕ |
| B 站 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ |
| 微博 | | ✕ | ✕ | | ✕ | | | ✕ | ✕ |
| 小红书 | | ✅ | ✅ | | ✅ | | | ✅ | ✕ |
| 抖音 | | ✅ | ✅ | | ✅ | | | ✅ | ✅ |
| 快手 | | ✅ | ✕ | | ✅ | | | ✅ | ✕ |
| B 站 | ✅ | ✅ | ✕ | ✅ | ✕ | ✅ | ✅ | ✕ | ✕ |
| 微博 | | ✕ | ✕ | | ✕ | | | ✕ | ✕ |
## 使用方法

View File

@ -34,13 +34,74 @@ class BilibiliLogin(AbstractLogin):
self.cookie_str = cookie_str
async def begin(self):
pass
"""Start login xiaohongshu"""
utils.logger.info("Begin login Bilibili ...")
if self.login_type == "qrcode":
await self.login_by_qrcode()
elif self.login_type == "phone":
await self.login_by_mobile()
elif self.login_type == "cookie":
await self.login_by_cookies()
else:
raise ValueError("Invalid Login Type Currently only supported qrcode or phone or cookie ...")
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
async def check_login_state(self) -> bool:
"""
Check if the current login status is successful and return True otherwise return False
retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
if max retry times reached, raise RetryError
"""
current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie)
if cookie_dict.get("SESSDATA", "") or cookie_dict.get("DedeUserID"):
return True
return False
async def login_by_qrcode(self):
pass
"""login bilibili website and keep webdriver login state"""
utils.logger.info("Begin login bilibili by qrcode ...")
# click login button
login_button_ele = self.context_page.locator(
"xpath=//div[@class='right-entry__outside go-login-btn']//div"
)
await login_button_ele.click()
# find login qrcode
qrcode_img_selector = "//div[@class='login-scan-box']//img"
base64_qrcode_img = await utils.find_login_qrcode(
self.context_page,
selector=qrcode_img_selector
)
if not base64_qrcode_img:
utils.logger.info("login failed , have not found qrcode please check ....")
sys.exit()
# show login qrcode
partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
utils.logger.info(f"Waiting for scan code login, remaining time is 20s")
try:
await self.check_login_state()
except RetryError:
utils.logger.info("Login bilibili failed by qrcode login method ...")
sys.exit()
wait_redirect_seconds = 5
utils.logger.info(f"Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
await asyncio.sleep(wait_redirect_seconds)
async def login_by_mobile(self):
pass
async def login_by_cookies(self):
pass
utils.logger.info("Begin login bilibili by cookie ...")
for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items():
await self.browser_context.add_cookies([{
'name': key,
'value': value,
'domain': ".bilibili.com",
'path': "/"
}])

View File

@ -98,6 +98,6 @@ class KuaishouLogin(AbstractLogin):
await self.browser_context.add_cookies([{
'name': key,
'value': value,
'domain': ".douyin.com",
'domain': ".kuaishou.com",
'path': "/"
}])