对项目进行一些重构

This commit is contained in:
rm 2024-11-22 09:15:58 +08:00
parent a093a7a6f7
commit c6025124e8
10 changed files with 361 additions and 62 deletions

View File

@ -6,96 +6,113 @@
# torrents 表示抓取资源的页面,默认是 torrents.php
# ===========================【start】 nastools/MoviePilot 合作站点 【start】===========================
['鲨鱼PT']
['sharkpt']
name = "鲨鱼PT"
url = "https://sharkpt.net"
cookie = ""
level = 3
flag = 1
['Audiences 观众/奥迪']
['audiences']
name = "观众/奥迪"
url = "https://audiences.me"
cookie = ""
level = 3
['猪猪网']
['piggo']
name = "猪猪网"
url = "https://piggo.me"
cookie = ""
level = 3
['ZmPT 织梦']
['zmpt']
name = "织梦"
url = "https://zmpt.cc"
cookie = ""
level = 3
['杜比PT']
['hddolby']
name = "杜比PT"
url = "https://www.hddolby.com"
cookie = ""
level = 3
label = "网站凉凉"
flag = 1
['自由农场PT']
['0ff']
name = "自由农场PT"
url = "https://pt.0ff.cc"
cookie = ""
level = 3
['HDFans 红豆饭']
['hdfans']
name = "红豆饭"
url = "http://hdfans.org"
cookie = ""
level = 3
['HHanClub 憨憨']
['hhanclub']
name = "憨憨"
url = "https://hhanclub.top"
cookie = ""
level = 3
['WinterSakura 冬樱']
['wintersakura']
name = "冬樱"
url = "https://wintersakura.net"
cookie = ""
level = 3
['Red Leaves 红叶 ']
['leaves']
name = "红叶"
url = "https://leaves.red"
cookie = ""
level = 3
['1PTBar/壹PT']
['1ptba']
name = "壹PT"
url = "https://1ptba.com"
cookie = "c_secure_uid=MTA0ODQx; c_secure_pass=c32037fc1a670e75d797df403e9e5a33; c_secure_ssl=eWVhaA%3D%3D; c_secure_tracker_ssl=eWVhaA%3D%3D; c_secure_login=bm9wZQ%3D%3D"
level = 3
label = "游戏、综艺、电子书、windows软件"
['icc2022']
name = "icc2022"
url = "https://www.icc2022.com"
cookie = "c_secure_uid=MTk0OTI%3D; c_secure_pass=d7f655d5b8e90739f23620b9d24241e1; c_secure_ssl=eWVhaA%3D%3D; c_secure_tracker_ssl=eWVhaA%3D%3D; c_secure_login=bm9wZQ%3D%3D"
level = 3
label = "动漫、综艺、电视剧"
['PTLSP 零食铺']
['ptlsp']
name = "零食铺"
url = "https://www.ptlsp.com"
cookie = ""
level = 3
label = ""
['xingtan 杏坛']
['xingtan']
name = "杏坛"
url = "https://xingtan.one"
cookie = ""
level = 3
label = ""
['PTVicomo 象站']
['ptvicomo']
name = "象站"
url = "https://ptvicomo.net"
cookie = "c_secure_uid=MjE2MTI%3D; c_secure_pass=80fa4d815a5b1df9e9ab62f0388bfc83; c_secure_ssl=eWVhaA%3D%3D; c_secure_tracker_ssl=eWVhaA%3D%3D; c_secure_login=bm9wZQ%3D%3D"
level = 3
label = ""
['AGSV 末日种子库']
['agsvpt']
name = "末日种子库"
url = "https://www.agsvpt.com"
cookie = ""
level = 3
label = ""
['HDKylin 麒麟']
['hdkyl']
name = "麒麟"
url = "https://www.hdkyl.in"
cookie = ""
level = 3
@ -105,6 +122,7 @@ label = ""
# ===========================【start】 9kg 站点 【start】===========================
['pttime']
name = "pttime"
url = "https://www.pttime.org"
cookie = "c_lang_folder=chs; c_secure_uid=OTExNDU%3D; c_secure_pass=b4567a9950c3906657ed8baeb5a39b2d; c_secure_ssl=eWVhaA%3D%3D; c_secure_tracker_ssl=eWVhaA%3D%3D; c_secure_login=bm9wZQ%3D%3D"
level = 2
@ -112,63 +130,65 @@ label = "9kg"
torrents = "/adults.php"
['2xfree']
name = "2xfree"
url = "https://pt.2xfree.org"
cookie = ""
level = 2
label = "9kg"
['FSM 飞天拉面神教' ]
['fsm']
name = "飞天拉面神教"
url = "https://fsm.name"
cookie = ""
level = 2
label = "纯9kg站点、json数据格式、日本AV、国产AV、AV动漫"
flag = 1
['ilolicon 萝莉控 ' ]
['ilolicon']
name = "萝莉控"
url = "https://share.ilolicon.com"
cookie = ""
level = 2
label = "9kg"
['KamePT ' ]
['kamept']
name = "KamePT"
url = "https://kamept.com"
cookie = ""
level = 2
label = "9kg"
['Kelu 可鲁/可撸' ]
url = "https://our.kelu.one"
cookie = ""
level = 2
label = "9kg、男男"
flag = 1
['M-Team 馒头 ' ]
['m-team']
name = "馒头"
url = "https://kp.m-team.cc"
cookie = ""
level = 1
label = "9kg"
flag = 1
['NicePT 老师 ' ]
['nicept']
name = "老师"
url = "https://www.nicept.net"
cookie = ""
level = 2
label = "9kg"
['Rousi【肉丝】 ' ]
['rousi']
name = "肉丝"
url = "https://rousi.zip"
cookie = "c_secure_uid=MjE0NDM%3D; c_secure_pass=c4065cb7794e2d2dc96b8b2d9fbbb661; c_secure_ssl=eWVhaA%3D%3D; c_secure_tracker_ssl=eWVhaA%3D%3D; c_secure_login=bm9wZQ%3D%3D"
level = 4
label = "9kg"
['TU88 ' ]
['tu88']
name = "TU88"
url = "http://pt.tu88.men"
cookie = ""
level = 4
label = "9kg"
['YDYPT【伊甸园】 ' ]
['hdbd']
name = "伊甸园"
url = "https://pt.hdbd.us"
cookie = ""
level = 4
@ -176,76 +196,88 @@ label = "9kg"
# ===========================【end】 9kg 合作站点 【end】===========================
# ===========================【start】 十三大 站点 【start】===========================
['CHDBits【新岛/金钱岛】 ' ]
['ptchdbits']
name = "新岛/金钱岛"
url = "https://ptchdbits.co"
cookie = ""
level = 1
label = "十三大"
flag = 1
['HDChina【瓷器】 ' ]
['hdchina']
name = "瓷器"
url = "https://hdchina.org"
cookie = ""
level = 1
label = "十三大"
['HDHome【家园】' ]
['hdhome']
name = "家园"
url = "http://hdhome.org"
cookie = ""
level = 1
label = "十三大"
['HDSky【高清天空】' ]
['hdsky']
name = "高清天空"
url = "https://hdsky.me"
cookie = ""
level = 1
label = "十三大"
['KeepFRDS【朋友/月月】' ]
['keepfrds']
name = "朋友/月月"
url = "https://pt.keepfrds.com"
cookie = ""
level = 1
label = "十三大"
['OpenCD【皇后】' ]
['open']
name = "皇后"
url = "https://open.cd"
cookie = ""
level = 1
label = "十三大"
['OurBits【我堡】' ]
['ourbits']
name = "我堡"
url = "https://ourbits.club"
cookie = ""
level = 1
label = "十三大"
flag = 1
['PTerClub【PT之友俱乐部/猫站】' ]
['pterclub']
name = "PT之友俱乐部/猫站"
url = "https://pterclub.com"
cookie = ""
level = 1
label = "十三大"
['SSD【春天/不可说】' ]
['springsunday']
name = "春天/不可说"
url = "https://springsunday.net"
cookie = ""
level = 1
label = "十三大"
flag = 1
['TTG【听听歌/套套哥】' ]
['totheglory']
name = "听听歌/套套哥"
url = "https://totheglory.im"
cookie = ""
level = 1
label = "十三大"
# ===========================【end】 十三大 合作站点 【end】===========================
['pthome 铂金家']
['pthome']
name = "铂金家"
url = "https://www.pthome.net"
cookie = ""
level = 3
label = "影视,有声书,音乐,综合"
['btschool']
name = "bt学校"
url = "https://pt.btschool.club"
cookie = "cf_clearance=sntngeGiRRILzV56kgNcq_LV2OD0R7TBa6idECpWKKE-1732070383-1.2.1.1-mTasoHQiz6L8QH8.bKO.BCWQgito1D_qp87qLxbREUx__xuhM_4U.450bADQp6G76Paah8_6mLr3eEY3yi5yqCFKdYsBwBOyhhRi2QJSOpHSp7GNmWaKYWx72g0GrL.b8dAhhqf2SAzkEyQXs_kWV9s6kONT4HfNjD8ACN8VVJlLhnwjVDiFy1Dc8W3b4okLuCcwIWEo7jmdCJckZzItPjxzGBk5Pc5BAjoq_9MvK73eD2bG9WAhTCw4qObhBsEBQ9ub3MZuJWXv.l96OtB_XasgBPvupytF8j2h4lb2.gp_HqxMn6kK9Nz61kk8xau2Ka3gCmDYJ5OYe6I7KcBXJU8YRHkSFlqh_kg3wQWA0nFjFbQTzyRdsUKhzQa.c6rljxuOCUmgwBgso5ADlf0A56EpptPpFwzfEYmGfjhG3r6HLwT.CTcmmTadkw7oqwbB; c_secure_uid=MTM4NTQ2; c_secure_pass=5eea7234b66710a8f6346d88d00d56a4; c_secure_ssl=eWVhaA%3D%3D; c_secure_tracker_ssl=eWVhaA%3D%3D; c_secure_login=bm9wZQ%3D%3D"
attendance_uri = "/index.php?action=addbonus"
@ -253,13 +285,15 @@ level = 3
label = "教育、培训"
['聆音']
['soulvoice']
name = "聆音"
url = "https://pt.soulvoice.club"
cookie = ""
level = 3
label = "小说、电子书、有声读物"
['咖啡']
['ptcafe']
name = "咖啡"
url = "https://ptcafe.club"
cookie = "c_secure_uid=MTY1Mzc%3D; c_secure_pass=d5ec244890a07c683d94679d6bb84bb6; c_secure_ssl=eWVhaA%3D%3D; c_secure_tracker_ssl=eWVhaA%3D%3D; c_secure_login=bm9wZQ%3D%3D"
level = 3

View File

@ -269,7 +269,7 @@ class PtGetData:
url = section_data.get('url') + uri
i = 0
for _ in range(5):
logging.info(f"开始对:{url} 进行 第 {i} 抓取!")
logger.info(f"开始对:{url} 进行 第 {i} 抓取!")
i = i + 1
try:
response = requests.get(url, headers=self.headers, timeout=5 * 60)

0
PT/pt_impl/__init__.py Normal file
View File

141
PT/pt_impl/default_pt.py Normal file
View File

@ -0,0 +1,141 @@
import sys
import time
import requests
from lxml import html as lhtml
from PT.pt_impl.subject_pt import SubjectPt
from loguru import logger
from PT.pt_impl.util import extract_id, check_seed_status, contains_alpha_or_chinese
from dateutil import parser
class DefaultSubject(SubjectPt):
def __init__(self):
logger.add("../log/PtGetData_{time:YYYY-MM-DD}.log", rotation="1 day", level="INFO")
logger.add(sys.stderr, level="INFO")
self.headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh,zh-CN;q=0.9',
'cache-control': 'max-age=0',
'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
}
def request_url(self, url):
html = ""
for i in range(5):
logger.info(f"开始对:{url} 进行 第 {i} 抓取!")
try:
response = requests.get(url, headers=self.headers, timeout=5 * 60)
if response.status_code == 200:
html = response.text
else:
logger.error(f"{url} , 出现错误code码是{response.status_code}, {response.text}")
except Exception as e:
time.sleep(2)
else:
logger.error(f"{url} , 5次出现错误无法访问")
if len(html) != 0:
# 用于解析 HTML 文本并将其转换为一个 Element 对象
return lhtml.fromstring(html)
return html
def get_page_num(self, page_html):
page_href = page_html.xpath('//td[@class="embedded"]//p[@align="center"][1]//a[last()]/@href')[0]
pages_str = extract_id(page_href, "page")
return int(pages_str) if pages_str.isdigit() else 0
def get_list_data(self, page_html):
# 使用lxml解析HTML
row_follow_tables = page_html.xpath('//table[@class="torrents"]//tr[position() > 1]')
for row_follow in row_follow_tables:
html_content = lhtml.tostring(row_follow, encoding='unicode')
# print(f"html内容{html_content}")
# 一级标题
first_title = row_follow.xpath('.//table[@class="torrentname"]//a[@title]/@title')[0]
second_title_s = row_follow.xpath(
'.//table[@class="torrentname"]//td[@class="embedded"]/text()[normalize-space()]'
'| .//table[@class="torrentname"]//td[@class="embedded"]//font[@title]/text()')
# 二级标题
second_title = ""
for text in second_title_s:
second_title = contains_alpha_or_chinese(text) if contains_alpha_or_chinese(
text) is not None else None
print(f"标题:{first_title} 二级标题:{second_title}")
type_id, type_name = "", ""
type_html = row_follow.xpath('.//td[contains(@class, "rowfollow")][1]//a[@href]')
for td_element in type_html:
type_id = extract_id(td_element.xpath('./@href')[0], "cat")
type_name = td_element.xpath('.//img[@title]/@title')[0]
# html_content = lhtml.tostring(td_element, encoding='unicode')
print(f"类型是:{type_id} + ' ' + {type_name}")
# 种子状态
seed_status = 1
seed_status_html = row_follow.xpath(
'.//table[@class="torrentname"]//td[@class="embedded"]//img[@alt]/@alt')
if len(seed_status_html) > 0:
for seed in seed_status_html:
s = check_seed_status(seed)
if s is not None:
seed_status = s
print(f"种子状态:{seed_status}")
seeding_status = 0
seeding_status_html = row_follow.xpath(
'.//table[@class="torrentname"]//div[@title]/@title')
if len(seeding_status_html) > 0:
seeding_status = 1
print(f"做种状态:{seeding_status}")
comment_count = row_follow.xpath('.//td[@class="rowfollow"][2]//a/text()[normalize-space()]')[0]
print(f"评论数:{comment_count}")
upload_time = ""
upload_time_html = row_follow.xpath('.//span[@title][parent::td]/@title')
for td_element in upload_time_html:
try:
upload_time = parser.parse(td_element)
except ValueError:
pass
print(f"资源上传时间:{upload_time}")
# 资源大小
size_html = row_follow.xpath('.//td[@class="rowfollow"][3]/text()[normalize-space()]')
size = size_html[0].strip() + '' + size_html[1].strip()
print(f"资源大小:{size}")
seed_count = row_follow.xpath('.//td[@class="rowfollow"][4]')[0].text_content().strip()
print(f"做种数:{seed_count}")
download_count = row_follow.xpath('.//td[@class="rowfollow"][5]')[0].text_content().strip()
print(f"下载数:{download_count}")
completion_count = row_follow.xpath('.//td[@class="rowfollow"][6]')[0].text_content().strip()
print(f"完成数:{completion_count}")
publisher = row_follow.xpath('.//td[@class="rowfollow"][7]')[0].text_content().strip()
print(f"发布者:{publisher}")
download_link = row_follow.xpath(
'.//table[@class="torrentname"]//*[contains(@class, "download")]/parent::a/@href')[0]
pt_id = extract_id(download_link, "id")
# 详情链接地址
details_link = row_follow.xpath('.//table[@class="torrentname"]//a[@href]/@href')[0]
print(
f"PT_ID == {pt_id} 下载链接:/{download_link} 详情链接:/{details_link}")
def main_this_pt(self, section_data):
res_txt = f"开始对 [{section_data.get('name')}] 进行操作...,抓取数据:"
logger.info(res_txt)
url, cookie = section_data.get('url'), section_data.get('cookie')
self.headers["cookie"] = cookie
if len(section_data.get("torrents")) > 1:
self.torrents_uri = section_data.get("torrents")

36
PT/pt_impl/subject_pt.py Normal file
View File

@ -0,0 +1,36 @@
from abc import ABC, abstractmethod
class SubjectPt(ABC):
# ======= 一期内容 ========
# 1. 获取指定页面的页数
# 2. 获取指定页面的列表数据
# 3. 获取指定页面的详细数据
# 4. 入库操作
# ======= 二期内容 ========
# 1. 下载种子
# 2. 辅种子
# 3. 删除种子
@abstractmethod
def request_url(self, url):
pass
@abstractmethod
def get_page_num(self, page_html):
pass
@abstractmethod
def get_list_data(self, page_html):
pass
@abstractmethod
def get_detail_data(self, detail_html):
pass
@abstractmethod
def insert_data(self, data):
pass
@abstractmethod
def main_this_pt(self, section_data):
pass

22
PT/pt_impl/util.py Normal file
View File

@ -0,0 +1,22 @@
from urllib.parse import urlparse, parse_qs
def extract_id(url, field):
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
return query_params.get(field, [None])[0]
def contains_alpha_or_chinese(input_str):
s = input_str.strip()
# 判断是否包含字母
has_alpha = any(char.isalpha() for char in s)
# 判断是否包含汉字
has_chinese = any('\u4e00' <= char <= '\u9fff' for char in s)
# 返回结果
return s if has_alpha or has_chinese else None
def check_seed_status(status):
s = ["%", "Free", "free"]
return status if any(keyword in status for keyword in s) else None

View File

@ -37,16 +37,17 @@ class PtOperation:
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
}
async def attendances(self, section_name, section_data):
async def attendances(self, section_data):
"""
签到
:param section_name:
:param cookie:
:param url:
:return:
"""
url, cookie, attendance_uri = section_data.get('url'), section_data.get('cookie'), section_data.get(
'attendance_uri')
section_name, url, cookie, attendance_uri = section_data.get('name'), \
section_data.get('url'), \
section_data.get('cookie'), \
section_data.get('attendance_uri')
res_text, log_txt = "", ""
self.headers["cookie"] = cookie
# cookie不为空时候可以签到
@ -61,10 +62,10 @@ class PtOperation:
for i in range(5):
try:
response_result = requests.get(url, headers=self.headers)
print(response_result.status_code)
print(self.headers)
print("=" * 20)
print(response_result.text)
# print(response_result.status_code)
# print(self.headers)
# print("=" * 20)
# print(response_result.text)
if response_result.status_code == 200 or response_result.text in "签到成功":
res_text = '签到成功!'
break
@ -85,12 +86,13 @@ class PtOperation:
self.headers["cookie"] = ""
async def signup(self, section_name, section_data):
async def signup(self, section_data):
"""
是否开注册
:return:
"""
url = section_data.get('url')
section_name, url = section_data.get('name'), section_data.get('url')
request_url = url + "/signup.php"
text = f"网站名:{section_name}, 网址:{request_url}"
logger.info(f"开始 -->> {text}")
@ -172,9 +174,9 @@ class PtOperation:
if flag != 1:
print(f"Processing section: {section_name} --- {section_data.get('url')}")
# 签到
self.attendances(section_name, section_data)
self.attendances(section_data)
# 检测是否可以注册
self.signup(section_name, section_data)
self.signup(section_data)
except FileNotFoundError:
print(f"Error: The file '{self.toml_file}' was not found.")
except toml.TomlDecodeError as e:

View File

@ -0,0 +1,64 @@
import importlib
from abc import ABC, abstractmethod
# 抽象主题
class Subject(ABC):
@abstractmethod
def request(self):
pass
# 真实对象 1
class RealSubjectA(Subject):
def request(self):
print("RealSubjectA: Handling request.")
# 真实对象 2
class RealSubjectB(Subject):
def request(self):
print("RealSubjectB: Handling request.")
# 默认代理(备用对象)
class DefaultSubject(Subject):
def request(self):
print("DefaultSubject: Handling request as fallback.")
# 代理对象
class Proxy(Subject):
def __init__(self, real_class_name: str):
self._real_subject = self._create_real_subject(real_class_name)
def request(self):
print("Proxy: Additional logic before delegating.")
self._real_subject.request()
print("Proxy: Additional logic after delegating.")
@staticmethod
def _create_real_subject(class_name: str) -> Subject:
# 动态加载类
try:
cls = globals().get(class_name) # 从当前全局作用域中获取类
if cls is None:
raise KeyError
if not issubclass(cls, Subject):
raise TypeError(f"{class_name} is not a subclass of Subject.")
return cls()
except (KeyError, TypeError):
print(f"Warning: Class '{class_name}' not found or invalid. Using DefaultSubject.")
return DefaultSubject()
# 客户端代码
def client_code(proxy: Proxy):
proxy.request()
# 使用代理,通过字符串指定真实类
print("Client: Using RealSubjectA:")
proxy_a = Proxy("RealSubjectA")
client_code(proxy_a)
print("\nClient: Using RealSubjectB:")
proxy_b = Proxy("RealSubjectB")
client_code(proxy_b)
print("\nClient: Attempting to use a non-existent class:")
proxy_invalid = Proxy("NonExistentClass")
client_code(proxy_invalid)

View File

@ -27,7 +27,7 @@ async def attendances_junit(name):
"""
flag, section_name, section_data = get_section_name_by_name(name)
if flag:
await pt_opt.attendances(section_name, section_data)
await pt_opt.attendances(section_data)
async def signup_junit(name):
@ -38,7 +38,7 @@ async def signup_junit(name):
"""
flag, section_name, section_data = get_section_name_by_name(name)
if flag:
await PtOperation().signup(section_name, section_data)
await PtOperation().signup(section_data)
def get_pt_data_junit(name):

View File

@ -26,7 +26,7 @@ async def attendances():
if flag != 1 and cookie is not None and len(cookie.strip()) > 0:
print(f"Processing section: {section_name} --- {section_data.get('url')}")
# 签到
await pt_opt.attendances(section_name, section_data)
await pt_opt.attendances(section_data)
except FileNotFoundError:
print(f"Error: The file '{toml_file}' was not found.")
except toml.TomlDecodeError as e:
@ -47,8 +47,8 @@ async def signup():
url, cookie, flag = section_data.get('url'), section_data.get('cookie'), section_data.get('flag')
if flag != 1:
print(f"Processing section: {section_name} --- {section_data.get('url')}")
# 签到
await pt_opt.signup(section_name, section_data)
# 开注提醒
await pt_opt.signup(section_data)
except FileNotFoundError:
print(f"Error: The file '{toml_file}' was not found.")
except toml.TomlDecodeError as e: