qnloft-spider/理财记账/ali_pay.py

'''
支付宝账单

cchardet 使用这个包检测比chardet更准确
'''
import codecs
import time
from pathlib import Path

import cchardet as chardet
import numpy as np
import pandas as pd


def detection_file_encoding(file_name):  # 自动检测文件编码
	with open(file_name, 'rb') as file:
		rawdata = file.read()
		result = chardet.detect(rawdata)
	# 检测结果包含编码和置信度信息
	encoding = result['encoding']
	confidence = result['confidence']
	print(f"文件【{file_name}】 编码：{encoding}, 置信度：{confidence:.2f}")
	return encoding


def encoding_conversion(source_file, target_file, source_encoding, target_encoding):  # 文件编码转换
	file_path = Path(target_file)
	if file_path.exists():
		return detection_file_encoding(target_file)
	# 指定源文件的编码和目标文件的编码
	source_encoding = source_encoding  # 源文件编码
	target_encoding = target_encoding  # 目标文件编码

	# 使用codecs模块打开源文件和目标文件，进行编码转换
	with codecs.open(source_file, 'r', encoding=source_encoding) as source:
		with codecs.open(target_file, 'w', encoding=target_encoding) as target:
			for line in source:
				target.write(line)

	encoding = detection_file_encoding(target_file)
	print(f"文件已从 {source_encoding} 编码转换为 {encoding} 编码")
	return encoding


def reset_account_name(name):
	if "余额宝" in name or '滴滴出行' in name:
		return "支付宝"
	elif "信用卡" in name:
		return "信用卡"
	elif "借记卡" in name:
		return "现金"


class ALiPay:
	def __init__(self, csv_file):
		# 获取文件编码
		self.encoding = detection_file_encoding(csv_file)
		rename = csv_file.split("-")[1:3]
		if len(rename) > 0:
			rename = "_".join(csv_file.split("-")[1:3])
		else:
			rename = int(time.time())
		self.target_file = f'/Users/renmeng/Downloads/支付宝交易账单-{rename}.csv'  # 目标文件名
		# 生成新文件，并且使用加入日期命名
		self.encoding = encoding_conversion(source_file=csv_file, target_file=self.target_file,
											source_encoding=self.encoding,
											target_encoding="utf-8")

	def get_ali_pay_bill(self):
		# 你可以使用pandas库的skiprows参数来指定从第几行开始读取数据
		df = pd.read_csv(self.target_file, encoding=self.encoding, skiprows=2)
		df = df.drop(index=df[df['交易状态'] != '成功'].index)
		# 将日期列转换为日期时间对象
		df['创建时间'] = pd.to_datetime(df['创建时间'])
		df['账户'] = df['支付渠道'].apply(reset_account_name)
		# 格式化日期列为'%Y-%m-%d'
		df['创建时间'] = df['创建时间'].dt.strftime('%Y-%m-%d')
		df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
		df['金额'] = df['订单金额(元)'].apply(lambda x: float(x) if x else 0) \
					 - df['累计退款总额(元)'].apply(lambda x: float(x) if x else 0) \
					 - df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
		return df