qnloft-spider/理财记账/ali_pay.py

83 lines
2.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'''
支付宝账单
cchardet 使用这个包检测比chardet更准确
'''
import codecs
import time
from pathlib import Path
import cchardet as chardet
import numpy as np
import pandas as pd
def detection_file_encoding(file_name): # 自动检测文件编码
with open(file_name, 'rb') as file:
rawdata = file.read()
result = chardet.detect(rawdata)
# 检测结果包含编码和置信度信息
encoding = result['encoding']
confidence = result['confidence']
print(f"文件【{file_name}】 编码:{encoding}, 置信度:{confidence:.2f}")
return encoding
def encoding_conversion(source_file, target_file, source_encoding, target_encoding): # 文件编码转换
file_path = Path(target_file)
if file_path.exists():
return detection_file_encoding(target_file)
# 指定源文件的编码和目标文件的编码
source_encoding = source_encoding # 源文件编码
target_encoding = target_encoding # 目标文件编码
# 使用codecs模块打开源文件和目标文件进行编码转换
with codecs.open(source_file, 'r', encoding=source_encoding) as source:
with codecs.open(target_file, 'w', encoding=target_encoding) as target:
for line in source:
target.write(line)
encoding = detection_file_encoding(target_file)
print(f"文件已从 {source_encoding} 编码转换为 {encoding} 编码")
return encoding
def reset_account_name(name):
if "余额宝" in name or '滴滴出行' in name:
return "支付宝"
elif "信用卡" in name:
return "信用卡"
elif "借记卡" in name:
return "现金"
class ALiPay:
def __init__(self, csv_file):
# 获取文件编码
self.encoding = detection_file_encoding(csv_file)
rename = csv_file.split("-")[1:3]
if len(rename) > 0:
rename = "_".join(csv_file.split("-")[1:3])
else:
rename = int(time.time())
self.target_file = f'/Users/renmeng/Downloads/支付宝交易账单-{rename}.csv' # 目标文件名
# 生成新文件,并且使用加入日期命名
self.encoding = encoding_conversion(source_file=csv_file, target_file=self.target_file,
source_encoding=self.encoding,
target_encoding="utf-8")
def get_ali_pay_bill(self):
# 你可以使用pandas库的skiprows参数来指定从第几行开始读取数据
df = pd.read_csv(self.target_file, encoding=self.encoding, skiprows=2)
df = df.drop(index=df[df['交易状态'] != '成功'].index)
# 将日期列转换为日期时间对象
df['创建时间'] = pd.to_datetime(df['创建时间'])
df['账户'] = df['支付渠道'].apply(reset_account_name)
# 格式化日期列为'%Y-%m-%d'
df['创建时间'] = df['创建时间'].dt.strftime('%Y-%m-%d')
df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
df['金额'] = df['订单金额(元)'].apply(lambda x: float(x) if x else 0) \
- df['累计退款总额(元)'].apply(lambda x: float(x) if x else 0) \
- df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
return df