83 lines
2.9 KiB
Python
83 lines
2.9 KiB
Python
'''
|
||
支付宝账单
|
||
|
||
cchardet 使用这个包检测比chardet更准确
|
||
'''
|
||
import codecs
|
||
import time
|
||
from pathlib import Path
|
||
|
||
import cchardet as chardet
|
||
import numpy as np
|
||
import pandas as pd
|
||
|
||
|
||
def detection_file_encoding(file_name): # 自动检测文件编码
|
||
with open(file_name, 'rb') as file:
|
||
rawdata = file.read()
|
||
result = chardet.detect(rawdata)
|
||
# 检测结果包含编码和置信度信息
|
||
encoding = result['encoding']
|
||
confidence = result['confidence']
|
||
print(f"文件【{file_name}】 编码:{encoding}, 置信度:{confidence:.2f}")
|
||
return encoding
|
||
|
||
|
||
def encoding_conversion(source_file, target_file, source_encoding, target_encoding): # 文件编码转换
|
||
file_path = Path(target_file)
|
||
if file_path.exists():
|
||
return detection_file_encoding(target_file)
|
||
# 指定源文件的编码和目标文件的编码
|
||
source_encoding = source_encoding # 源文件编码
|
||
target_encoding = target_encoding # 目标文件编码
|
||
|
||
# 使用codecs模块打开源文件和目标文件,进行编码转换
|
||
with codecs.open(source_file, 'r', encoding=source_encoding) as source:
|
||
with codecs.open(target_file, 'w', encoding=target_encoding) as target:
|
||
for line in source:
|
||
target.write(line)
|
||
|
||
encoding = detection_file_encoding(target_file)
|
||
print(f"文件已从 {source_encoding} 编码转换为 {encoding} 编码")
|
||
return encoding
|
||
|
||
|
||
def reset_account_name(name):
|
||
if "余额宝" in name or '滴滴出行' in name:
|
||
return "支付宝"
|
||
elif "信用卡" in name:
|
||
return "信用卡"
|
||
elif "借记卡" in name:
|
||
return "现金"
|
||
|
||
|
||
class ALiPay:
|
||
def __init__(self, csv_file):
|
||
# 获取文件编码
|
||
self.encoding = detection_file_encoding(csv_file)
|
||
rename = csv_file.split("-")[1:3]
|
||
if len(rename) > 0:
|
||
rename = "_".join(csv_file.split("-")[1:3])
|
||
else:
|
||
rename = int(time.time())
|
||
self.target_file = f'/Users/renmeng/Downloads/支付宝交易账单-{rename}.csv' # 目标文件名
|
||
# 生成新文件,并且使用加入日期命名
|
||
self.encoding = encoding_conversion(source_file=csv_file, target_file=self.target_file,
|
||
source_encoding=self.encoding,
|
||
target_encoding="utf-8")
|
||
|
||
def get_ali_pay_bill(self):
|
||
# 你可以使用pandas库的skiprows参数来指定从第几行开始读取数据
|
||
df = pd.read_csv(self.target_file, encoding=self.encoding, skiprows=2)
|
||
df = df.drop(index=df[df['交易状态'] != '成功'].index)
|
||
# 将日期列转换为日期时间对象
|
||
df['创建时间'] = pd.to_datetime(df['创建时间'])
|
||
df['账户'] = df['支付渠道'].apply(reset_account_name)
|
||
# 格式化日期列为'%Y-%m-%d'
|
||
df['创建时间'] = df['创建时间'].dt.strftime('%Y-%m-%d')
|
||
df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
|
||
df['金额'] = df['订单金额(元)'].apply(lambda x: float(x) if x else 0) \
|
||
- df['累计退款总额(元)'].apply(lambda x: float(x) if x else 0) \
|
||
- df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
|
||
return df
|