60 lines
1.6 KiB
Python
60 lines
1.6 KiB
Python
|
import random
|
|||
|
import time
|
|||
|
|
|||
|
from curl_cffi import requests
|
|||
|
import csv
|
|||
|
|
|||
|
page = 1
|
|||
|
size = 10
|
|||
|
|
|||
|
|
|||
|
def get_data(page, size=100):
|
|||
|
# 目标 URL
|
|||
|
url = f"https://service.scctc.org.cn/ucenter/api/certificate/pageList?current={page}&size={size}&certificateCategory=1"
|
|||
|
|
|||
|
headers = {
|
|||
|
"host": "service.scctc.org.cn",
|
|||
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
|
|||
|
}
|
|||
|
|
|||
|
# 发送 GET 请求获取 JSON 数据
|
|||
|
response = requests.get(url, headers=headers)
|
|||
|
print(response.text)
|
|||
|
return response.json() # 将响应转换为 JSON 格式
|
|||
|
|
|||
|
|
|||
|
def save_data(all_records):
|
|||
|
file_name = "/Users/renmeng/Downloads/scctc.csv"
|
|||
|
if all_records:
|
|||
|
csv_columns = all_records[0].keys() # 获取列标题
|
|||
|
|
|||
|
# 导出到 CSV 文件
|
|||
|
with open(file_name, mode='w', newline='', encoding='utf-8') as file:
|
|||
|
writer = csv.DictWriter(file, fieldnames=csv_columns)
|
|||
|
writer.writeheader() # 写入列标题
|
|||
|
writer.writerows(all_records) # 写入所有记录
|
|||
|
|
|||
|
print("All data exported to all_records.csv successfully.")
|
|||
|
else:
|
|||
|
print("No data to export.")
|
|||
|
|
|||
|
|
|||
|
all_records = []
|
|||
|
try:
|
|||
|
data = get_data(page)
|
|||
|
pages = data["data"]["pages"]
|
|||
|
print(pages)
|
|||
|
for i in range(page + 1, pages + 1):
|
|||
|
print(f"开始抓取第 [{i}] 业页数据")
|
|||
|
data = get_data(i)
|
|||
|
records = data["data"]["records"]
|
|||
|
all_records.extend(records)
|
|||
|
sleep_time = random.randint(2, 10)
|
|||
|
print(f"程序需要休息一下。休息时间是:{sleep_time}s")
|
|||
|
time.sleep(sleep_time)
|
|||
|
print("抓取完毕,开始生成csv文件!")
|
|||
|
except Exception as e:
|
|||
|
print(f"程序异常退出!{e}")
|
|||
|
finally:
|
|||
|
save_data(all_records)
|