From ca4e3449131dacddde0d98c2cfbe18b81384cd2c Mon Sep 17 00:00:00 2001
From: rm <renmeng@njhgroup.cn>
Date: Tue, 16 Jan 2024 01:24:19 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=B8=80=E4=BA=9B=E6=9B=B4?=
 =?UTF-8?q?=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 PT/test.html |  4 +--
 PT/test.py   | 74 +++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 70 insertions(+), 8 deletions(-)
diff --git a/PT/test.html b/PT/test.html
index f03eceb..c409282 100644
--- a/PT/test.html
+++ b/PT/test.html
@@ -111,7 +111,7 @@
 												</table>
 											</td>
 											<td class="rowfollow">
-												<a href="comment.php?action=add&amp;pid=130681&amp;type=torrent" title="添加评论">0</a>
+												<a href="" title="添加评论">0</a>
 											</td>
 											<td class="rowfollow nowrap">
 												<span title="2024-01-13 22:24:08">
@@ -125,7 +125,7 @@
 											</td>
 											<td class="rowfollow" align="center">
 												<b>
-													<a href="details.php?id=130681&amp;hit=1&amp;dllist=1#seeders">186</a>
+													<a href="details.php?id=130681&amp;hit=1&amp;dllist=1#seeders">123</a>
 												</b>
 											</td>
 											<td class="rowfollow">
diff --git a/PT/test.py b/PT/test.py
index ecf5758..0af380c 100644
--- a/PT/test.py
+++ b/PT/test.py
@@ -4,6 +4,17 @@ import requests
 import toml
 from lxml import html as lhtml
 
+
+def contains_alpha_or_chinese(input_str):
+	s = input_str.strip()
+	# 判断是否包含字母
+	has_alpha = any(char.isalpha() for char in s)
+	# 判断是否包含汉字
+	has_chinese = any('\u4e00' <= char <= '\u9fff' for char in s)
+	# 返回结果
+	return s if has_alpha or has_chinese else None
+
+
 # headers = {
 # 	'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
 # 	'accept-language': 'zh,zh-CN;q=0.9',
@@ -43,24 +54,74 @@ from lxml import html as lhtml
 # 				print(f"{section_name} , 5次出现错误，无法访问！！！")
 # 			if len(html) == 0:
 # 				break
-with open('/Users/renmeng/Downloads/test.html', 'r', encoding='utf-8') as file:
+with open('test.html', 'r', encoding='utf-8') as file:
 	html_code = file.read()
 	try:
 		# 使用lxml解析HTML
 		doc = lhtml.fromstring(html_code)
 		# 使用XPath获取目标元素
 		title_elements = doc.xpath('//table[@class="torrents"]//table[@class="torrentname"]//a[@title]/@title')
-		print(title_elements[0])
+		print(f"标题：{title_elements[0]}")
+		second_title_s = doc.xpath(
+			'//table[@class="torrents"]//table[@class="torrentname"]//td[@class="embedded"]/text()[normalize-space()]')
+
+		second_title = ""
+		for text in second_title_s:
+			second_title = contains_alpha_or_chinese(text) if contains_alpha_or_chinese(text) is not None else None
+		print(f"二级标题：{second_title}")
+
+		seed_status = doc.xpath('//table[@class="torrents"]//table[@class="torrentname"]//td[1]//img[@alt]/@alt')
+		if len(seed_status) > 0:
+			print(f"种子状态：{seed_status[1]}")
+		else:
+			print(f"种子状态：{seed_status[0]}")
+
+		seeding_status = doc.xpath(
+			'//table[@class="torrents"]//table[@class="torrentname"]//div[@class="progressarea"]/@title')
+		print(f"做种状态：{seeding_status[0]}")
+
+		comment_count = doc.xpath('//td[@class="rowfollow"][2]//a/text()[normalize-space()]')
+		print(f"评论数：{comment_count[0]}")
+
+		upload_time = doc.xpath('//td[contains(@class, "rowfollow")][4]//span/@title')
+		# for td_element in upload_time:
+		# 	html_content = lhtml.tostring(td_element, encoding='unicode')
+		# 	print(html_content)
+		print(f"资源上传时间：{upload_time[0]}")
+
+		size = doc.xpath('//td[@class="rowfollow"][3]/text()[normalize-space()]')
+		print(f"资源大小：{size[0].strip() + '' + size[1].strip()}")
+
+		seed_count = doc.xpath('//td[@class="rowfollow"][4]')[0]
+		print(f"做种数：{seed_count.text_content().strip()}")
+
+		download_count = doc.xpath('//td[@class="rowfollow"][5]')[0]
+		print(f"下载数：{download_count.text_content().strip()}")
+
+		completion_count = doc.xpath('//td[@class="rowfollow"][6]')[0]
+		print(f"完成数：{completion_count.text_content().strip()}")
+
+		publisher = doc.xpath('//td[@class="rowfollow"][7]')[0]
+		print(f"发布者：{publisher.text_content().strip()}")
+		download_link = doc.xpath('//table[@class="torrents"]//table[@class="torrentname"]//img[@class="download"]/parent::a/@href')
+		print(f"下载链接：/{download_link[0]}")
 		# 详情链接地址
 		details_link = doc.xpath('//table[@class="torrents"]//table[@class="torrentname"]//a[@href]/@href')
-		print(f"/{details_link[0]}")
+		print(f"详情链接：/{details_link[0]}")
+
+		douban_rating = doc.xpath('')
+		print(f"豆瓣评分：/{douban_rating[0]}")
+
+		imdb_rating = doc.xpath('')
+		print(f"imdb_rating：/{imdb_rating[0]}")
 
 
 	except Exception as e:
+		print(e)
 		print(f"页面无法解析，请知晓！！！")
 
 """
-主键id,来源名称,一级标题,二级标题,种子状态,状态剩余时间,做种状态,评论数,做种数,下载数,完成数，发布者，豆瓣评分，IMDB评分，下载链接，详情链接,资源大小
+主键id,来源名称,一级标题,二级标题,种子状态,状态剩余时间,做种状态,评论数,资源上传时间,资源大小，做种数,下载数,完成数，发布者，豆瓣评分，IMDB评分，下载链接，详情链接
 CREATE TABLE IF NOT EXISTS pt_website_data (
     id INTEGER PRIMARY KEY,  
     source_name TEXT NOT NULL,
@@ -70,6 +131,8 @@ CREATE TABLE IF NOT EXISTS pt_website_data (
     status_remaining_time TEXT,
     seeding_status TEXT,
     comment_count INTEGER,
+    upload_time TEXT,
+    size TEXT,
     seed_count INTEGER,
     download_count INTEGER,
     completion_count INTEGER,
@@ -78,7 +141,6 @@ CREATE TABLE IF NOT EXISTS pt_website_data (
     imdb_rating REAL,
     download_link TEXT,
     details_link TEXT,
-    size TEXT,
     UNIQUE(source_name, first_title, second_title)
 );
 
@@ -88,4 +150,4 @@ CREATE TABLE IF NOT EXISTS pt_website_type (
     type_name TEXT NOT NULL,
     type_url TEXT NOT NULL
 );
-"""
\ No newline at end of file
+"""