2023-06-16 11:35:43 +00:00
|
|
|
import re
|
|
|
|
import time
|
2023-06-09 12:41:53 +00:00
|
|
|
import random
|
2023-06-13 06:00:04 +00:00
|
|
|
import base64
|
2023-06-27 15:38:30 +00:00
|
|
|
import logging
|
2023-06-09 12:41:53 +00:00
|
|
|
from io import BytesIO
|
|
|
|
from typing import Optional, Dict, List, Tuple
|
|
|
|
|
2023-06-13 06:00:04 +00:00
|
|
|
from PIL import Image, ImageDraw
|
2023-06-09 12:41:53 +00:00
|
|
|
from playwright.async_api import Cookie
|
|
|
|
from playwright.async_api import Page
|
|
|
|
|
|
|
|
|
|
|
|
async def find_login_qrcode(page: Page, selector: str) -> str:
|
|
|
|
"""find login qrcode image from target selector"""
|
|
|
|
try:
|
|
|
|
elements = await page.wait_for_selector(
|
|
|
|
selector=selector,
|
|
|
|
)
|
|
|
|
login_qrcode_img = await elements.get_property("src")
|
|
|
|
return str(login_qrcode_img)
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
def show_qrcode(qr_code: str):
|
|
|
|
"""parse base64 encode qrcode image and show it"""
|
|
|
|
qr_code = qr_code.split(",")[1]
|
|
|
|
qr_code = base64.b64decode(qr_code)
|
|
|
|
image = Image.open(BytesIO(qr_code))
|
2023-06-13 06:00:04 +00:00
|
|
|
|
|
|
|
# Add a square border around the QR code and display it within the border to improve scanning accuracy.
|
|
|
|
width, height = image.size
|
|
|
|
new_image = Image.new('RGB', (width + 20, height + 20), color=(255, 255, 255))
|
|
|
|
new_image.paste(image, (10, 10))
|
|
|
|
draw = ImageDraw.Draw(new_image)
|
|
|
|
draw.rectangle((0, 0, width + 19, height + 19), outline=(0, 0, 0), width=1)
|
|
|
|
new_image.show()
|
2023-06-09 12:41:53 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_user_agent() -> str:
|
|
|
|
ua_list = [
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
|
|
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36",
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36"
|
|
|
|
]
|
|
|
|
return random.choice(ua_list)
|
|
|
|
|
|
|
|
|
|
|
|
def convert_cookies(cookies: Optional[List[Cookie]]) -> Tuple[str, Dict]:
|
|
|
|
if not cookies:
|
|
|
|
return "", {}
|
|
|
|
cookies_str = ";".join([f"{cookie.get('name')}={cookie.get('value')}" for cookie in cookies])
|
|
|
|
cookie_dict = dict()
|
|
|
|
for cookie in cookies:
|
|
|
|
cookie_dict[cookie.get('name')] = cookie.get('value')
|
|
|
|
return cookies_str, cookie_dict
|
2023-06-16 11:35:43 +00:00
|
|
|
|
|
|
|
|
2023-06-22 14:43:26 +00:00
|
|
|
def convert_str_cookie_to_dict(cookie_str: str) -> Dict:
|
|
|
|
cookie_dict = dict()
|
|
|
|
if not cookie_str:
|
|
|
|
return cookie_dict
|
|
|
|
for cookie in cookie_str.split(";"):
|
|
|
|
cookie = cookie.strip()
|
|
|
|
if not cookie:
|
|
|
|
continue
|
|
|
|
cookie = cookie.split("=")
|
|
|
|
cookie_value = cookie[1]
|
|
|
|
if isinstance(cookie_value, list):
|
|
|
|
cookie_value = "".join(cookie_value)
|
|
|
|
cookie_dict[cookie[0]] = cookie_value
|
|
|
|
return cookie_dict
|
|
|
|
|
|
|
|
|
2023-06-16 11:35:43 +00:00
|
|
|
def get_current_timestamp():
|
|
|
|
return int(time.time() * 1000)
|
|
|
|
|
|
|
|
|
|
|
|
def match_interact_info_count(count_str: str) -> int:
|
|
|
|
if not count_str:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
match = re.search(r'\d+', count_str)
|
|
|
|
if match:
|
|
|
|
number = match.group()
|
|
|
|
return int(number)
|
|
|
|
else:
|
|
|
|
return 0
|
2023-06-27 15:38:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
def init_loging_config():
|
|
|
|
level = logging.INFO
|
|
|
|
logging.basicConfig(
|
|
|
|
level=level,
|
|
|
|
format="%(asctime)s %(name)s %(levelname)s %(message)s ",
|
|
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
|
|
)
|
|
|
|
logging.Logger("Media Crawler")
|