qnloft-spider/理财记账/ali_pay.py

83 lines
2.9 KiB
Python
Raw Permalink Normal View History

2023-11-26 12:31:18 +00:00
'''
支付宝账单
cchardet 使用这个包检测比chardet更准确
'''
import codecs
import time
from pathlib import Path
import cchardet as chardet
import numpy as np
import pandas as pd
def detection_file_encoding(file_name): # 自动检测文件编码
with open(file_name, 'rb') as file:
rawdata = file.read()
result = chardet.detect(rawdata)
# 检测结果包含编码和置信度信息
encoding = result['encoding']
confidence = result['confidence']
print(f"文件【{file_name}】 编码:{encoding}, 置信度:{confidence:.2f}")
return encoding
def encoding_conversion(source_file, target_file, source_encoding, target_encoding): # 文件编码转换
file_path = Path(target_file)
if file_path.exists():
return detection_file_encoding(target_file)
# 指定源文件的编码和目标文件的编码
source_encoding = source_encoding # 源文件编码
target_encoding = target_encoding # 目标文件编码
# 使用codecs模块打开源文件和目标文件进行编码转换
with codecs.open(source_file, 'r', encoding=source_encoding) as source:
with codecs.open(target_file, 'w', encoding=target_encoding) as target:
for line in source:
target.write(line)
encoding = detection_file_encoding(target_file)
print(f"文件已从 {source_encoding} 编码转换为 {encoding} 编码")
return encoding
def reset_account_name(name):
if "余额宝" in name or '滴滴出行' in name:
return "支付宝"
elif "信用卡" in name:
return "信用卡"
elif "借记卡" in name:
return "现金"
class ALiPay:
def __init__(self, csv_file):
# 获取文件编码
self.encoding = detection_file_encoding(csv_file)
rename = csv_file.split("-")[1:3]
if len(rename) > 0:
rename = "_".join(csv_file.split("-")[1:3])
else:
rename = int(time.time())
self.target_file = f'/Users/renmeng/Downloads/支付宝交易账单-{rename}.csv' # 目标文件名
# 生成新文件,并且使用加入日期命名
self.encoding = encoding_conversion(source_file=csv_file, target_file=self.target_file,
source_encoding=self.encoding,
target_encoding="utf-8")
def get_ali_pay_bill(self):
# 你可以使用pandas库的skiprows参数来指定从第几行开始读取数据
df = pd.read_csv(self.target_file, encoding=self.encoding, skiprows=2)
df = df.drop(index=df[df['交易状态'] != '成功'].index)
# 将日期列转换为日期时间对象
df['创建时间'] = pd.to_datetime(df['创建时间'])
df['账户'] = df['支付渠道'].apply(reset_account_name)
# 格式化日期列为'%Y-%m-%d'
df['创建时间'] = df['创建时间'].dt.strftime('%Y-%m-%d')
df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
df['金额'] = df['订单金额(元)'].apply(lambda x: float(x) if x else 0) \
- df['累计退款总额(元)'].apply(lambda x: float(x) if x else 0) \
- df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
return df