提交一些代码

2024-01-17 18:22:04 +08:00 · 2024-01-17 18:22:04 +08:00 · cba3feaf4a
parent f3e2a05e34
commit cba3feaf4a
4 changed files with 52 additions and 26 deletions
--- a/PT/pt_get_data.py
+++ b/PT/pt_get_data.py
@ -21,7 +21,7 @@ from urllib.parse import urlparse, parse_qs

 from qnloft_db.sqlite_db_main import SqliteDbMain
 from qnloft_db_model.PtWebsiteData import PtWebsiteData
-
+from dateutil import parser

 def extract_id(url, field):
 	parsed_url = urlparse(url)
@ -49,7 +49,6 @@ class PtGetData:
 	def __init__(self):
 		logger.add("../log/PtGetData_{time:YYYY-MM-DD}.log", rotation="1 day", level="INFO")
 		logger.add(sys.stderr, level="INFO")
-		self.toml_file = 'PT/pt_config.toml'
 		self.torrents_uri = "/torrents.php?sort=0&type=desc"
 		self.headers = {
 			'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
@ -81,10 +80,10 @@ class PtGetData:
 				# 解析网页内容
 				self.get_common_analysis(section_name, doc_html)
 				# 获取分页
-				pages = self.get_common_total_page(doc_html)
-				for i in range(0, pages):
-					time.sleep(2)
-					self.get_data_by_page(section_name, section_data, i)
+				# pages = self.get_common_total_page(doc_html)
+				# for i in range(0, pages):
+				# 	time.sleep(2)
+				# 	self.get_data_by_page(section_name, section_data, i)
 			# 数据入库
 			except Exception as e:
 				print(f"页面无法解析，请知晓！！！{e}")
@ -112,7 +111,6 @@ class PtGetData:
 			# print(f"html内容：{html_content}")
 			# 一级标题
 			first_title = row_follow.xpath('.//table[@class="torrentname"]//a[@title]/@title')[0]
-
 			second_title_s = row_follow.xpath(
 				'.//table[@class="torrentname"]//td[@class="embedded"]/text()[normalize-space()]'
 				'| .//table[@class="torrentname"]//td[@class="embedded"]//font[@title]/text()')
@ -121,7 +119,7 @@ class PtGetData:
 			for text in second_title_s:
 				second_title = contains_alpha_or_chinese(text) if contains_alpha_or_chinese(
 					text) is not None else None
-
+			print(f"标题：{first_title} 二级标题：{second_title}")
 			type_id, type_name = "", ""
 			type_html = row_follow.xpath('.//td[contains(@class, "rowfollow")][1]//a[@href]')
 			for td_element in type_html:
@ -150,11 +148,14 @@ class PtGetData:
 			comment_count = row_follow.xpath('.//td[@class="rowfollow"][2]//a/text()[normalize-space()]')[0]
 			print(f"评论数：{comment_count}")

-			upload_time = row_follow.xpath('.//span[@title][parent::td]/@title')
-			# for td_element in upload_time:
-			# 	html_content = lhtml.tostring(td_element, encoding='unicode')
-			# 	print(html_content)
-			print(f"资源上传时间：{upload_time[0]}")
+			upload_time = ""
+			upload_time_html = row_follow.xpath('.//span[@title][parent::td]/@title')
+			for td_element in upload_time_html:
+				try:
+					upload_time = parser.parse(td_element)
+				except ValueError:
+					pass
+			print(f"资源上传时间：{upload_time}")

 			# 资源大小
 			size_html = row_follow.xpath('.//td[@class="rowfollow"][3]/text()[normalize-space()]')
@ -177,7 +178,8 @@ class PtGetData:
 			pt_id = extract_id(download_link, "id")
 			# 详情链接地址
 			details_link = row_follow.xpath('.//table[@class="torrentname"]//a[@href]/@href')[0]
-			print(f"PT_ID == {pt_id} 标题：{first_title} 二级标题：{second_title} 下载链接：/{download_link} 详情链接：/{details_link}")
+			print(
+				f"PT_ID == {pt_id} 下载链接：/{download_link} 详情链接：/{details_link}")
 			entry = PtWebsiteData(
 				pt_id=pt_id,
 				source_name=section_name,
@ -244,8 +246,9 @@ class PtGetData:


 def opt(self):
+	toml_file = 'PT/pt_config.toml'
 	try:
-		with open(self.toml_file, 'r', encoding='utf-8') as file:
+		with open(toml_file, 'r', encoding='utf-8') as file:
 			config_data = toml.load(file)
 			# 迭代每个 section
 			for section_name, section_data in config_data.items():
@ -255,6 +258,20 @@ def opt(self):
 					# 拉取数据
 					self.get_data(section_name, section_data)
 	except FileNotFoundError:
-		print(f"Error: The file '{self.toml_file}' was not found.")
+		print(f"Error: The file '{toml_file}' was not found.")
 	except toml.TomlDecodeError as e:
 		print(f"Error decoding TOML: {e}")
+
+
+if __name__ == '__main__':
+	toml_file = 'pt_config.toml'
+	with open(toml_file, 'r', encoding='utf-8') as file:
+		config_data = toml.load(file)
+		# 迭代每个 section
+		for section_name, section_data in config_data.items():
+			print(f"Processing section: {section_name} --- {section_data.get('url')}")
+			url, cookie, flag = section_data.get('url'), section_data.get('cookie'), section_data.get('flag')
+			if flag != 1 and cookie is not None and len(cookie.strip()) > 0:
+				# 拉取数据
+				PtGetData().get_data(section_name, section_data)
+				break
--- a/PT/test.py
+++ b/PT/test.py
@ -1,4 +1,5 @@
 import time
+from datetime import datetime

 import pandas as pd
 import requests
@ -7,7 +8,7 @@ from lxml import html as lhtml
 from urllib.parse import urlparse, parse_qs

 from qnloft_db_model.PtWebsiteData import PtWebsiteData
-
+from dateutil import parser

 def extract_id(url, field) -> bytes:
 	parsed_url = urlparse(url)
@ -89,14 +90,22 @@ data = {col: [] for col in columns}
 df = pd.DataFrame(data)


-for i in range(0,10):
-	# 创建一行数据
-	row_data = {'pt_id': i}
+def is_date(s):
+	try:
+		datetime.strptime(s, '%Y-%m-%d %H:%M:%S')
+		return True
+	except ValueError:
+		return False

-	# 将一行数据添加到 DataFrame
-	df = df.append(row_data, ignore_index=True)
-print(df)
-"""
+
+my_list = ['置顶促销', '国语配音', '中文字幕', '2021-02-02 13:26:26','2021-02-02','2021-02-02 13:26']
+for item in my_list:
+	try:
+		parsed_date = parser.parse(item)
+		print(parsed_date)
+	except ValueError:
+		pass
+	"""
 主键id,pt资源id,来源名称,一级标题,二级标题,分类id，分类名称
 种子状态,状态剩余时间,做种状态,评论数,资源上传时间,资源大小，
 做种数,下载数,完成数，发布者，豆瓣评分，IMDB评分，下载链接，详情链接
--- a/qnloft_db/sqlite_db_main.py
+++ b/qnloft_db/sqlite_db_main.py
@ -22,7 +22,7 @@ class SqliteDbMain(DbMain):
 		elif 'macos' in sys_platform.lower():
 			__engine = f"/Users/renmeng/Documents/sqlite_db/{self.database_name}"
 		else:
-			__engine = f"{self.database_name}"
+			__engine = f"../sqlite_db/{self.database_name}"
 		return __engine

 	def __create_sqlite_engine(self):
--- a/qnloft_db_model/PtWebsiteData.py
+++ b/qnloft_db_model/PtWebsiteData.py
@ -5,7 +5,7 @@ from sqlalchemy import Column, Integer, String, Float, UniqueConstraint
 class PtWebsiteData(declarative_base()):
 	__tablename__ = 'pt_website_data'

-	id = Column(Integer, primary_key=True)
+	id = Column(Integer, primary_key=True, autoincrement=True)
 	# pt资源id
 	pt_id = Column(Integer, nullable=False)
 	# 来源名称