diff --git a/junit/test_main.py b/junit/test_main.py new file mode 100644 index 0000000..e4ae076 --- /dev/null +++ b/junit/test_main.py @@ -0,0 +1,59 @@ +import random +import time + +from curl_cffi import requests +import csv + +page = 1 +size = 10 + + +def get_data(page, size=100): + # 目标 URL + url = f"https://service.scctc.org.cn/ucenter/api/certificate/pageList?current={page}&size={size}&certificateCategory=1" + + headers = { + "host": "service.scctc.org.cn", + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36" + } + + # 发送 GET 请求获取 JSON 数据 + response = requests.get(url, headers=headers) + print(response.text) + return response.json() # 将响应转换为 JSON 格式 + + +def save_data(all_records): + file_name = "/Users/renmeng/Downloads/scctc.csv" + if all_records: + csv_columns = all_records[0].keys() # 获取列标题 + + # 导出到 CSV 文件 + with open(file_name, mode='w', newline='', encoding='utf-8') as file: + writer = csv.DictWriter(file, fieldnames=csv_columns) + writer.writeheader() # 写入列标题 + writer.writerows(all_records) # 写入所有记录 + + print("All data exported to all_records.csv successfully.") + else: + print("No data to export.") + + +all_records = [] +try: + data = get_data(page) + pages = data["data"]["pages"] + print(pages) + for i in range(page + 1, pages + 1): + print(f"开始抓取第 [{i}] 业页数据") + data = get_data(i) + records = data["data"]["records"] + all_records.extend(records) + sleep_time = random.randint(2, 10) + print(f"程序需要休息一下。休息时间是:{sleep_time}s") + time.sleep(sleep_time) + print("抓取完毕,开始生成csv文件!") +except Exception as e: + print(f"程序异常退出!{e}") +finally: + save_data(all_records)