diff --git a/PT/test.html b/PT/test.html
index f03eceb..c409282 100644
--- a/PT/test.html
+++ b/PT/test.html
@@ -111,7 +111,7 @@
- 0
+ 0
|
@@ -125,7 +125,7 @@
|
- 186
+ 123
|
diff --git a/PT/test.py b/PT/test.py
index ecf5758..0af380c 100644
--- a/PT/test.py
+++ b/PT/test.py
@@ -4,6 +4,17 @@ import requests
import toml
from lxml import html as lhtml
+
+def contains_alpha_or_chinese(input_str):
+ s = input_str.strip()
+ # 判断是否包含字母
+ has_alpha = any(char.isalpha() for char in s)
+ # 判断是否包含汉字
+ has_chinese = any('\u4e00' <= char <= '\u9fff' for char in s)
+ # 返回结果
+ return s if has_alpha or has_chinese else None
+
+
# headers = {
# 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
# 'accept-language': 'zh,zh-CN;q=0.9',
@@ -43,24 +54,74 @@ from lxml import html as lhtml
# print(f"{section_name} , 5次出现错误,无法访问!!!")
# if len(html) == 0:
# break
-with open('/Users/renmeng/Downloads/test.html', 'r', encoding='utf-8') as file:
+with open('test.html', 'r', encoding='utf-8') as file:
html_code = file.read()
try:
# 使用lxml解析HTML
doc = lhtml.fromstring(html_code)
# 使用XPath获取目标元素
title_elements = doc.xpath('//table[@class="torrents"]//table[@class="torrentname"]//a[@title]/@title')
- print(title_elements[0])
+ print(f"标题:{title_elements[0]}")
+ second_title_s = doc.xpath(
+ '//table[@class="torrents"]//table[@class="torrentname"]//td[@class="embedded"]/text()[normalize-space()]')
+
+ second_title = ""
+ for text in second_title_s:
+ second_title = contains_alpha_or_chinese(text) if contains_alpha_or_chinese(text) is not None else None
+ print(f"二级标题:{second_title}")
+
+ seed_status = doc.xpath('//table[@class="torrents"]//table[@class="torrentname"]//td[1]//img[@alt]/@alt')
+ if len(seed_status) > 0:
+ print(f"种子状态:{seed_status[1]}")
+ else:
+ print(f"种子状态:{seed_status[0]}")
+
+ seeding_status = doc.xpath(
+ '//table[@class="torrents"]//table[@class="torrentname"]//div[@class="progressarea"]/@title')
+ print(f"做种状态:{seeding_status[0]}")
+
+ comment_count = doc.xpath('//td[@class="rowfollow"][2]//a/text()[normalize-space()]')
+ print(f"评论数:{comment_count[0]}")
+
+ upload_time = doc.xpath('//td[contains(@class, "rowfollow")][4]//span/@title')
+ # for td_element in upload_time:
+ # html_content = lhtml.tostring(td_element, encoding='unicode')
+ # print(html_content)
+ print(f"资源上传时间:{upload_time[0]}")
+
+ size = doc.xpath('//td[@class="rowfollow"][3]/text()[normalize-space()]')
+ print(f"资源大小:{size[0].strip() + '' + size[1].strip()}")
+
+ seed_count = doc.xpath('//td[@class="rowfollow"][4]')[0]
+ print(f"做种数:{seed_count.text_content().strip()}")
+
+ download_count = doc.xpath('//td[@class="rowfollow"][5]')[0]
+ print(f"下载数:{download_count.text_content().strip()}")
+
+ completion_count = doc.xpath('//td[@class="rowfollow"][6]')[0]
+ print(f"完成数:{completion_count.text_content().strip()}")
+
+ publisher = doc.xpath('//td[@class="rowfollow"][7]')[0]
+ print(f"发布者:{publisher.text_content().strip()}")
+ download_link = doc.xpath('//table[@class="torrents"]//table[@class="torrentname"]//img[@class="download"]/parent::a/@href')
+ print(f"下载链接:/{download_link[0]}")
# 详情链接地址
details_link = doc.xpath('//table[@class="torrents"]//table[@class="torrentname"]//a[@href]/@href')
- print(f"/{details_link[0]}")
+ print(f"详情链接:/{details_link[0]}")
+
+ douban_rating = doc.xpath('')
+ print(f"豆瓣评分:/{douban_rating[0]}")
+
+ imdb_rating = doc.xpath('')
+ print(f"imdb_rating:/{imdb_rating[0]}")
except Exception as e:
+ print(e)
print(f"页面无法解析,请知晓!!!")
"""
-主键id,来源名称,一级标题,二级标题,种子状态,状态剩余时间,做种状态,评论数,做种数,下载数,完成数,发布者,豆瓣评分,IMDB评分,下载链接,详情链接,资源大小
+主键id,来源名称,一级标题,二级标题,种子状态,状态剩余时间,做种状态,评论数,资源上传时间,资源大小,做种数,下载数,完成数,发布者,豆瓣评分,IMDB评分,下载链接,详情链接
CREATE TABLE IF NOT EXISTS pt_website_data (
id INTEGER PRIMARY KEY,
source_name TEXT NOT NULL,
@@ -70,6 +131,8 @@ CREATE TABLE IF NOT EXISTS pt_website_data (
status_remaining_time TEXT,
seeding_status TEXT,
comment_count INTEGER,
+ upload_time TEXT,
+ size TEXT,
seed_count INTEGER,
download_count INTEGER,
completion_count INTEGER,
@@ -78,7 +141,6 @@ CREATE TABLE IF NOT EXISTS pt_website_data (
imdb_rating REAL,
download_link TEXT,
details_link TEXT,
- size TEXT,
UNIQUE(source_name, first_title, second_title)
);
@@ -88,4 +150,4 @@ CREATE TABLE IF NOT EXISTS pt_website_type (
type_name TEXT NOT NULL,
type_url TEXT NOT NULL
);
-"""
\ No newline at end of file
+"""
|