qnloft-spider/junit/test_main.py

60 lines
1.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import random
import time
from curl_cffi import requests
import csv
page = 1
size = 10
def get_data(page, size=100):
# 目标 URL
url = f"https://service.scctc.org.cn/ucenter/api/certificate/pageList?current={page}&size={size}&certificateCategory=1"
headers = {
"host": "service.scctc.org.cn",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
}
# 发送 GET 请求获取 JSON 数据
response = requests.get(url, headers=headers)
print(response.text)
return response.json() # 将响应转换为 JSON 格式
def save_data(all_records):
file_name = "/Users/renmeng/Downloads/scctc.csv"
if all_records:
csv_columns = all_records[0].keys() # 获取列标题
# 导出到 CSV 文件
with open(file_name, mode='w', newline='', encoding='utf-8') as file:
writer = csv.DictWriter(file, fieldnames=csv_columns)
writer.writeheader() # 写入列标题
writer.writerows(all_records) # 写入所有记录
print("All data exported to all_records.csv successfully.")
else:
print("No data to export.")
all_records = []
try:
data = get_data(page)
pages = data["data"]["pages"]
print(pages)
for i in range(page + 1, pages + 1):
print(f"开始抓取第 [{i}] 业页数据")
data = get_data(i)
records = data["data"]["records"]
all_records.extend(records)
sleep_time = random.randint(2, 10)
print(f"程序需要休息一下。休息时间是:{sleep_time}s")
time.sleep(sleep_time)
print("抓取完毕开始生成csv文件")
except Exception as e:
print(f"程序异常退出!{e}")
finally:
save_data(all_records)