feat: B站二维码、Cookie登录实现
This commit is contained in:
parent
a90b411e68
commit
94b5030ef0
14
README.md
14
README.md
|
@ -4,8 +4,8 @@
|
||||||
|
|
||||||
# 仓库描述
|
# 仓库描述
|
||||||
|
|
||||||
**小红书爬虫**,**抖音爬虫**, **快手爬虫**...。
|
**小红书爬虫**,**抖音爬虫**, **快手爬虫**, **B站爬虫**...。
|
||||||
目前能抓取小红书、抖音、快手的视频、图片、评论、点赞、转发等信息。
|
目前能抓取小红书、抖音、快手、B站的视频、图片、评论、点赞、转发等信息。
|
||||||
|
|
||||||
原理:利用[playwright](https://playwright.dev/)搭桥,保留登录成功后的上下文浏览器环境,通过执行JS表达式获取一些加密参数
|
原理:利用[playwright](https://playwright.dev/)搭桥,保留登录成功后的上下文浏览器环境,通过执行JS表达式获取一些加密参数
|
||||||
通过使用此方式,免去了复现核心加密JS代码,逆向难度大大降低。
|
通过使用此方式,免去了复现核心加密JS代码,逆向难度大大降低。
|
||||||
|
@ -21,11 +21,11 @@
|
||||||
## 功能列表
|
## 功能列表
|
||||||
| 平台 | Cookie 登录 | 二维码登录 | 手机号登录 | 关键词搜索 | 指定视频/帖子 ID 爬取 | 登录状态缓存 | 数据保存 | IP 代理池 | 滑块验证码 |
|
| 平台 | Cookie 登录 | 二维码登录 | 手机号登录 | 关键词搜索 | 指定视频/帖子 ID 爬取 | 登录状态缓存 | 数据保存 | IP 代理池 | 滑块验证码 |
|
||||||
|:---:|:---------:|:-----:|:-----:|:-----:|:-------------:|:------:|:----:|:------:|:-----:|
|
|:---:|:---------:|:-----:|:-----:|:-----:|:-------------:|:------:|:----:|:------:|:-----:|
|
||||||
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
|
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
|
||||||
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
|
| 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
|
||||||
| B 站 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ |
|
| B 站 | ✅ | ✅ | ✕ | ✅ | ✕ | ✅ | ✅ | ✕ | ✕ |
|
||||||
| 微博 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ |
|
| 微博 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ |
|
||||||
|
|
||||||
|
|
||||||
## 使用方法
|
## 使用方法
|
||||||
|
|
|
@ -34,13 +34,74 @@ class BilibiliLogin(AbstractLogin):
|
||||||
self.cookie_str = cookie_str
|
self.cookie_str = cookie_str
|
||||||
|
|
||||||
async def begin(self):
|
async def begin(self):
|
||||||
pass
|
"""Start login xiaohongshu"""
|
||||||
|
utils.logger.info("Begin login Bilibili ...")
|
||||||
|
if self.login_type == "qrcode":
|
||||||
|
await self.login_by_qrcode()
|
||||||
|
elif self.login_type == "phone":
|
||||||
|
await self.login_by_mobile()
|
||||||
|
elif self.login_type == "cookie":
|
||||||
|
await self.login_by_cookies()
|
||||||
|
else:
|
||||||
|
raise ValueError("Invalid Login Type Currently only supported qrcode or phone or cookie ...")
|
||||||
|
|
||||||
|
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
|
||||||
|
async def check_login_state(self) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the current login status is successful and return True otherwise return False
|
||||||
|
retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
|
||||||
|
if max retry times reached, raise RetryError
|
||||||
|
"""
|
||||||
|
current_cookie = await self.browser_context.cookies()
|
||||||
|
_, cookie_dict = utils.convert_cookies(current_cookie)
|
||||||
|
if cookie_dict.get("SESSDATA", "") or cookie_dict.get("DedeUserID"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
async def login_by_qrcode(self):
|
async def login_by_qrcode(self):
|
||||||
pass
|
"""login bilibili website and keep webdriver login state"""
|
||||||
|
utils.logger.info("Begin login bilibili by qrcode ...")
|
||||||
|
|
||||||
|
# click login button
|
||||||
|
login_button_ele = self.context_page.locator(
|
||||||
|
"xpath=//div[@class='right-entry__outside go-login-btn']//div"
|
||||||
|
)
|
||||||
|
await login_button_ele.click()
|
||||||
|
|
||||||
|
# find login qrcode
|
||||||
|
qrcode_img_selector = "//div[@class='login-scan-box']//img"
|
||||||
|
base64_qrcode_img = await utils.find_login_qrcode(
|
||||||
|
self.context_page,
|
||||||
|
selector=qrcode_img_selector
|
||||||
|
)
|
||||||
|
if not base64_qrcode_img:
|
||||||
|
utils.logger.info("login failed , have not found qrcode please check ....")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
# show login qrcode
|
||||||
|
partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
|
||||||
|
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
|
||||||
|
|
||||||
|
utils.logger.info(f"Waiting for scan code login, remaining time is 20s")
|
||||||
|
try:
|
||||||
|
await self.check_login_state()
|
||||||
|
except RetryError:
|
||||||
|
utils.logger.info("Login bilibili failed by qrcode login method ...")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
wait_redirect_seconds = 5
|
||||||
|
utils.logger.info(f"Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
|
||||||
|
await asyncio.sleep(wait_redirect_seconds)
|
||||||
|
|
||||||
async def login_by_mobile(self):
|
async def login_by_mobile(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def login_by_cookies(self):
|
async def login_by_cookies(self):
|
||||||
pass
|
utils.logger.info("Begin login bilibili by cookie ...")
|
||||||
|
for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items():
|
||||||
|
await self.browser_context.add_cookies([{
|
||||||
|
'name': key,
|
||||||
|
'value': value,
|
||||||
|
'domain': ".bilibili.com",
|
||||||
|
'path': "/"
|
||||||
|
}])
|
||||||
|
|
|
@ -98,6 +98,6 @@ class KuaishouLogin(AbstractLogin):
|
||||||
await self.browser_context.add_cookies([{
|
await self.browser_context.add_cookies([{
|
||||||
'name': key,
|
'name': key,
|
||||||
'value': value,
|
'value': value,
|
||||||
'domain': ".douyin.com",
|
'domain': ".kuaishou.com",
|
||||||
'path': "/"
|
'path': "/"
|
||||||
}])
|
}])
|
||||||
|
|
Loading…
Reference in New Issue