83 lines
2.9 KiB
Python
83 lines
2.9 KiB
Python
|
'''
|
|||
|
支付宝账单
|
|||
|
|
|||
|
cchardet 使用这个包检测比chardet更准确
|
|||
|
'''
|
|||
|
import codecs
|
|||
|
import time
|
|||
|
from pathlib import Path
|
|||
|
|
|||
|
import cchardet as chardet
|
|||
|
import numpy as np
|
|||
|
import pandas as pd
|
|||
|
|
|||
|
|
|||
|
def detection_file_encoding(file_name): # 自动检测文件编码
|
|||
|
with open(file_name, 'rb') as file:
|
|||
|
rawdata = file.read()
|
|||
|
result = chardet.detect(rawdata)
|
|||
|
# 检测结果包含编码和置信度信息
|
|||
|
encoding = result['encoding']
|
|||
|
confidence = result['confidence']
|
|||
|
print(f"文件【{file_name}】 编码:{encoding}, 置信度:{confidence:.2f}")
|
|||
|
return encoding
|
|||
|
|
|||
|
|
|||
|
def encoding_conversion(source_file, target_file, source_encoding, target_encoding): # 文件编码转换
|
|||
|
file_path = Path(target_file)
|
|||
|
if file_path.exists():
|
|||
|
return detection_file_encoding(target_file)
|
|||
|
# 指定源文件的编码和目标文件的编码
|
|||
|
source_encoding = source_encoding # 源文件编码
|
|||
|
target_encoding = target_encoding # 目标文件编码
|
|||
|
|
|||
|
# 使用codecs模块打开源文件和目标文件,进行编码转换
|
|||
|
with codecs.open(source_file, 'r', encoding=source_encoding) as source:
|
|||
|
with codecs.open(target_file, 'w', encoding=target_encoding) as target:
|
|||
|
for line in source:
|
|||
|
target.write(line)
|
|||
|
|
|||
|
encoding = detection_file_encoding(target_file)
|
|||
|
print(f"文件已从 {source_encoding} 编码转换为 {encoding} 编码")
|
|||
|
return encoding
|
|||
|
|
|||
|
|
|||
|
def reset_account_name(name):
|
|||
|
if "余额宝" in name or '滴滴出行' in name:
|
|||
|
return "支付宝"
|
|||
|
elif "信用卡" in name:
|
|||
|
return "信用卡"
|
|||
|
elif "借记卡" in name:
|
|||
|
return "现金"
|
|||
|
|
|||
|
|
|||
|
class ALiPay:
|
|||
|
def __init__(self, csv_file):
|
|||
|
# 获取文件编码
|
|||
|
self.encoding = detection_file_encoding(csv_file)
|
|||
|
rename = csv_file.split("-")[1:3]
|
|||
|
if len(rename) > 0:
|
|||
|
rename = "_".join(csv_file.split("-")[1:3])
|
|||
|
else:
|
|||
|
rename = int(time.time())
|
|||
|
self.target_file = f'/Users/renmeng/Downloads/支付宝交易账单-{rename}.csv' # 目标文件名
|
|||
|
# 生成新文件,并且使用加入日期命名
|
|||
|
self.encoding = encoding_conversion(source_file=csv_file, target_file=self.target_file,
|
|||
|
source_encoding=self.encoding,
|
|||
|
target_encoding="utf-8")
|
|||
|
|
|||
|
def get_ali_pay_bill(self):
|
|||
|
# 你可以使用pandas库的skiprows参数来指定从第几行开始读取数据
|
|||
|
df = pd.read_csv(self.target_file, encoding=self.encoding, skiprows=2)
|
|||
|
df = df.drop(index=df[df['交易状态'] != '成功'].index)
|
|||
|
# 将日期列转换为日期时间对象
|
|||
|
df['创建时间'] = pd.to_datetime(df['创建时间'])
|
|||
|
df['账户'] = df['支付渠道'].apply(reset_account_name)
|
|||
|
# 格式化日期列为'%Y-%m-%d'
|
|||
|
df['创建时间'] = df['创建时间'].dt.strftime('%Y-%m-%d')
|
|||
|
df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
|
|||
|
df['金额'] = df['订单金额(元)'].apply(lambda x: float(x) if x else 0) \
|
|||
|
- df['累计退款总额(元)'].apply(lambda x: float(x) if x else 0) \
|
|||
|
- df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
|
|||
|
return df
|