feat: B站二维码、Cookie登录实现
This commit is contained in:
parent
a90b411e68
commit
94b5030ef0
14
README.md
14
README.md
|
@ -4,8 +4,8 @@
|
|||
|
||||
# 仓库描述
|
||||
|
||||
**小红书爬虫**,**抖音爬虫**, **快手爬虫**...。
|
||||
目前能抓取小红书、抖音、快手的视频、图片、评论、点赞、转发等信息。
|
||||
**小红书爬虫**,**抖音爬虫**, **快手爬虫**, **B站爬虫**...。
|
||||
目前能抓取小红书、抖音、快手、B站的视频、图片、评论、点赞、转发等信息。
|
||||
|
||||
原理:利用[playwright](https://playwright.dev/)搭桥,保留登录成功后的上下文浏览器环境,通过执行JS表达式获取一些加密参数
|
||||
通过使用此方式,免去了复现核心加密JS代码,逆向难度大大降低。
|
||||
|
@ -21,11 +21,11 @@
|
|||
## 功能列表
|
||||
| 平台 | Cookie 登录 | 二维码登录 | 手机号登录 | 关键词搜索 | 指定视频/帖子 ID 爬取 | 登录状态缓存 | 数据保存 | IP 代理池 | 滑块验证码 |
|
||||
|:---:|:---------:|:-----:|:-----:|:-----:|:-------------:|:------:|:----:|:------:|:-----:|
|
||||
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
|
||||
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
|
||||
| B 站 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ |
|
||||
| 微博 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ |
|
||||
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
|
||||
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
|
||||
| B 站 | ✅ | ✅ | ✕ | ✅ | ✕ | ✅ | ✅ | ✕ | ✕ |
|
||||
| 微博 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ |
|
||||
|
||||
|
||||
## 使用方法
|
||||
|
|
|
@ -34,13 +34,74 @@ class BilibiliLogin(AbstractLogin):
|
|||
self.cookie_str = cookie_str
|
||||
|
||||
async def begin(self):
|
||||
pass
|
||||
"""Start login xiaohongshu"""
|
||||
utils.logger.info("Begin login Bilibili ...")
|
||||
if self.login_type == "qrcode":
|
||||
await self.login_by_qrcode()
|
||||
elif self.login_type == "phone":
|
||||
await self.login_by_mobile()
|
||||
elif self.login_type == "cookie":
|
||||
await self.login_by_cookies()
|
||||
else:
|
||||
raise ValueError("Invalid Login Type Currently only supported qrcode or phone or cookie ...")
|
||||
|
||||
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
|
||||
async def check_login_state(self) -> bool:
|
||||
"""
|
||||
Check if the current login status is successful and return True otherwise return False
|
||||
retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
|
||||
if max retry times reached, raise RetryError
|
||||
"""
|
||||
current_cookie = await self.browser_context.cookies()
|
||||
_, cookie_dict = utils.convert_cookies(current_cookie)
|
||||
if cookie_dict.get("SESSDATA", "") or cookie_dict.get("DedeUserID"):
|
||||
return True
|
||||
return False
|
||||
|
||||
async def login_by_qrcode(self):
|
||||
pass
|
||||
"""login bilibili website and keep webdriver login state"""
|
||||
utils.logger.info("Begin login bilibili by qrcode ...")
|
||||
|
||||
# click login button
|
||||
login_button_ele = self.context_page.locator(
|
||||
"xpath=//div[@class='right-entry__outside go-login-btn']//div"
|
||||
)
|
||||
await login_button_ele.click()
|
||||
|
||||
# find login qrcode
|
||||
qrcode_img_selector = "//div[@class='login-scan-box']//img"
|
||||
base64_qrcode_img = await utils.find_login_qrcode(
|
||||
self.context_page,
|
||||
selector=qrcode_img_selector
|
||||
)
|
||||
if not base64_qrcode_img:
|
||||
utils.logger.info("login failed , have not found qrcode please check ....")
|
||||
sys.exit()
|
||||
|
||||
# show login qrcode
|
||||
partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
|
||||
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
|
||||
|
||||
utils.logger.info(f"Waiting for scan code login, remaining time is 20s")
|
||||
try:
|
||||
await self.check_login_state()
|
||||
except RetryError:
|
||||
utils.logger.info("Login bilibili failed by qrcode login method ...")
|
||||
sys.exit()
|
||||
|
||||
wait_redirect_seconds = 5
|
||||
utils.logger.info(f"Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
|
||||
await asyncio.sleep(wait_redirect_seconds)
|
||||
|
||||
async def login_by_mobile(self):
|
||||
pass
|
||||
|
||||
async def login_by_cookies(self):
|
||||
pass
|
||||
utils.logger.info("Begin login bilibili by cookie ...")
|
||||
for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items():
|
||||
await self.browser_context.add_cookies([{
|
||||
'name': key,
|
||||
'value': value,
|
||||
'domain': ".bilibili.com",
|
||||
'path': "/"
|
||||
}])
|
||||
|
|
|
@ -98,6 +98,6 @@ class KuaishouLogin(AbstractLogin):
|
|||
await self.browser_context.add_cookies([{
|
||||
'name': key,
|
||||
'value': value,
|
||||
'domain': ".douyin.com",
|
||||
'domain': ".kuaishou.com",
|
||||
'path': "/"
|
||||
}])
|
||||
|
|
Loading…
Reference in New Issue