2023-06-27 15:38:30 +00:00
|
|
|
from abc import ABC, abstractmethod
|
2023-12-24 16:02:11 +00:00
|
|
|
from typing import Dict, Optional
|
|
|
|
|
|
|
|
from playwright.async_api import BrowserContext, BrowserType
|
2023-06-27 15:38:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
class AbstractCrawler(ABC):
|
|
|
|
@abstractmethod
|
2024-04-11 16:52:47 +00:00
|
|
|
def init_config(self, platform: str, login_type: str, crawler_type: str, start_page: int, keyword: str):
|
2023-06-27 15:38:30 +00:00
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
async def start(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
2023-07-29 07:35:40 +00:00
|
|
|
async def search(self):
|
2023-06-27 15:38:30 +00:00
|
|
|
pass
|
|
|
|
|
2023-12-24 16:02:11 +00:00
|
|
|
@abstractmethod
|
|
|
|
async def launch_browser(self, chromium: BrowserType, playwright_proxy: Optional[Dict], user_agent: Optional[str],
|
|
|
|
headless: bool = True) -> BrowserContext:
|
|
|
|
pass
|
|
|
|
|
2023-06-27 15:38:30 +00:00
|
|
|
|
|
|
|
class AbstractLogin(ABC):
|
|
|
|
@abstractmethod
|
|
|
|
async def begin(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
async def login_by_qrcode(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
async def login_by_mobile(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
async def login_by_cookies(self):
|
|
|
|
pass
|
2024-01-14 14:06:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
class AbstractStore(ABC):
|
|
|
|
@abstractmethod
|
|
|
|
async def store_content(self, content_item: Dict):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
async def store_comment(self, comment_item: Dict):
|
|
|
|
pass
|
2024-03-01 17:49:42 +00:00
|
|
|
|
|
|
|
# TODO support all platform
|
|
|
|
# only xhs is supported, so @abstractmethod is commented
|
|
|
|
# @abstractmethod
|
|
|
|
async def store_creator(self, creator: Dict):
|
|
|
|
pass
|
2024-03-30 13:27:25 +00:00
|
|
|
|
2024-04-09 09:21:52 +00:00
|
|
|
class AbstractStoreImage(ABC):
|
|
|
|
#TODO: support all platform
|
|
|
|
# only weibo is supported
|
|
|
|
# @abstractmethod
|
|
|
|
async def store_image(self, image_content_item: Dict):
|
|
|
|
pass
|
2024-03-30 13:27:25 +00:00
|
|
|
|
2024-04-13 12:18:04 +00:00
|
|
|
class AbstractApiClient(ABC):
|
2024-03-30 13:27:25 +00:00
|
|
|
@abstractmethod
|
|
|
|
async def request(self, method, url, **kwargs):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
async def update_cookies(self, browser_context: BrowserContext):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
async def pong(self):
|
|
|
|
pass
|