''' 支付宝账单 cchardet 使用这个包检测比chardet更准确 ''' import codecs import time from pathlib import Path import cchardet as chardet import numpy as np import pandas as pd def detection_file_encoding(file_name): # 自动检测文件编码 with open(file_name, 'rb') as file: rawdata = file.read() result = chardet.detect(rawdata) # 检测结果包含编码和置信度信息 encoding = result['encoding'] confidence = result['confidence'] print(f"文件【{file_name}】 编码:{encoding}, 置信度:{confidence:.2f}") return encoding def encoding_conversion(source_file, target_file, source_encoding, target_encoding): # 文件编码转换 file_path = Path(target_file) if file_path.exists(): return detection_file_encoding(target_file) # 指定源文件的编码和目标文件的编码 source_encoding = source_encoding # 源文件编码 target_encoding = target_encoding # 目标文件编码 # 使用codecs模块打开源文件和目标文件,进行编码转换 with codecs.open(source_file, 'r', encoding=source_encoding) as source: with codecs.open(target_file, 'w', encoding=target_encoding) as target: for line in source: target.write(line) encoding = detection_file_encoding(target_file) print(f"文件已从 {source_encoding} 编码转换为 {encoding} 编码") return encoding def reset_account_name(name): if "余额宝" in name or '滴滴出行' in name: return "支付宝" elif "信用卡" in name: return "信用卡" elif "借记卡" in name: return "现金" class ALiPay: def __init__(self, csv_file): # 获取文件编码 self.encoding = detection_file_encoding(csv_file) rename = csv_file.split("-")[1:3] if len(rename) > 0: rename = "_".join(csv_file.split("-")[1:3]) else: rename = int(time.time()) self.target_file = f'/Users/renmeng/Downloads/支付宝交易账单-{rename}.csv' # 目标文件名 # 生成新文件,并且使用加入日期命名 self.encoding = encoding_conversion(source_file=csv_file, target_file=self.target_file, source_encoding=self.encoding, target_encoding="utf-8") def get_ali_pay_bill(self): # 你可以使用pandas库的skiprows参数来指定从第几行开始读取数据 df = pd.read_csv(self.target_file, encoding=self.encoding, skiprows=2) df = df.drop(index=df[df['交易状态'] != '成功'].index) # 将日期列转换为日期时间对象 df['创建时间'] = pd.to_datetime(df['创建时间']) df['账户'] = df['支付渠道'].apply(reset_account_name) # 格式化日期列为'%Y-%m-%d' df['创建时间'] = df['创建时间'].dt.strftime('%Y-%m-%d') df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x)) df['金额'] = df['订单金额(元)'].apply(lambda x: float(x) if x else 0) \ - df['累计退款总额(元)'].apply(lambda x: float(x) if x else 0) \ - df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x)) return df