初始化项目

2023-11-26 20:31:18 +08:00 · 2023-11-26 20:31:18 +08:00 · 50fa6ae8b8
parent 1e78c893bf
commit 50fa6ae8b8
21 changed files with 4188 additions and 0 deletions
--- a/Job/README.md
+++ b/Job/README.md
@ -0,0 +1,31 @@
 ## 招聘网站数据爬取
 ### 参考项目：
 #### 拉钩网：
 > 简单的 python 爬取网站的案例 全网代理、58 到家、房价网、东方财富、ITOrange、邮政编码、康美中药、拉钩、猫眼、投融资、中国裁判文书网、自如网、百科网、中国房价网、网易云音乐、去哪儿网、汽车之家
 - [spider-project](https://github.com/tanjunchen/spider-project)
 > 一个拉钩网招聘信息的爬虫，仅供学习参考用！任何商业用途后果自负
 - [lagou_crawler](https://github.com/DE009/lagou_crawler)
 #### BOSS 直聘
 > 爬虫逆向案例，已完成：网易易盾 | 微信小程序反编译逆向(百达星系) | 同花顺 | rpc解密 | 加速乐 | 极验滑块验证码 | 巨量算数 | Boss直聘 | 企查查 | 中国五矿 | qq音乐 | 产业政策大数据平台 | 企知道 | 雪球网(acw_sc__v2) | 1688 | 七麦数据 | whggzy | 企名科技 | mohurd | 艺恩数据 | 欧科云链
 - [spider_reverse](https://github.com/ChenZixinn/spider_reverse)
 > python爬虫项目合集，从基础到js逆向，包含基础篇、自动化篇、进阶篇以及验证码篇。案例涵盖各大网站(xhs douyin weibo ins boss job，jd...)，你将会学到有关爬虫以及反爬虫、自动化和验证码的各方面知识
 -[crawlProject](https://github.com/xishandong/crawlProject)
 #### 智联招聘
 #### 简历自动投递
 > 帮你自动在拉钩网上投递简历
 - [拉钩网自动投递](https://github.com/BeammNotFound/get-jobs-lagou)
 - [Boss直聘自动投递](https://github.com/BeammNotFound/get-jobs-boss)
 - [前程无忧自动投递](https://github.com/BeammNotFound/get-jobs-51job)
--- a/Job/com.boss_zhipin/README.md
+++ b/Job/com.boss_zhipin/README.md
@ -0,0 +1,28 @@
 [获取全部检索条件的URL](https://www.zhipin.com/wapi/zpgeek/search/job/condition.json)
 [获取全国城市URL](https://www.zhipin.com/wapi/zpCommon/data/cityGroup.json)
 [职位搜索的主URL，需要参数](https://www.zhipin.com/wapi/zpgeek/mobile/search/joblist.json)
 - scene: 1
 - query: Java
 - city: 101010100
 - experience:
 - payType:
 - partTime:
 - degree:
 - industry:
 - scale:
 - stage:
 - position:
 - jobType:
 - salary:
 - multiBusinessDistrict:
 - multiSubway:
 - page: 1
 - pageSize: 30
 [获取相关搜索条件的全部关键词](https://www.zhipin.com/wapi/zpgeek/search/job/related/word.json?query=关键词)
 [获取相关城市城区信息](https://www.zhipin.com/wapi/zpgeek/businessDistrict.json?cityCode=101010100)
 [获取地铁线路信息，根据地铁线路进行检索时需要](https://www.zhipin.com/wapi/zpCommon/data/getSubwayByCity?cityCode=101010100)
 [公司行业检索条件数据](https://www.zhipin.com/wapi/zpCommon/data/industry.json)
 [职位类型检索条件数据](https://www.zhipin.com/wapi/zpCommon/data/getCityShowPosition)
--- a/Job/com.boss_zhipin/boss_main.py
+++ b/Job/com.boss_zhipin/boss_main.py
@ -0,0 +1,367 @@
 import json
 import time
 from csv import DictWriter
 from itertools import islice
 from typing import Literal, Iterator, Union
 from urllib.parse import urlparse, parse_qs
 import execjs
 import requests
 from lxml import etree
 from tqdm import tqdm
 # ip代理信息
 # from Proxy_info import proxies, get_api
 # from boss.点选 import BossSlide
 # 类型控制
 Accept = Literal['json', 'text', 'contents']
 city_code_dict: dict = json.load(open('cityCode.json', 'r', encoding='utf-8'))
 # 休眠时间
 sleepTime = 5
 class BossJob:
 	def __init__(self, js_name: str = '', proxy: dict = None):
 		self.isFirst: bool = True  # 是否为初次访问
 		self.js_name: str = js_name  # js的名称
 		self.seed: str = ''  # 随机种子
 		self.ts: str = ''  # 时间戳
 		# api列表
 		self.apiList: list[str] = [
 			'https://www.zhipin.com/wapi/zpgeek/mobile/search/joblist.json',  # 职位搜索页, 需要指定params
 			'https://www.zhipin.com/job_detail/',  # 不需要指定params
 			f'https://www.zhipin.com/web/common/security-js/{self.js_name}.js',  # 动态加载js的链接
 			'https://www.zhipin.com/wapi/zpgeek/search/joblist.json'  # web api
 		]
 		# 请求头
 		self.headers: dict = {
 			'Accept': 'application/json, text/plain, */*',
 		}
 		self.cookies: dict = {}  # cookie
 		self.js = execjs.compile(open('demo.js', 'r', encoding='utf-8').read())  # 调用的js
 		self.stop: bool = False  # 控制手机端搜索停止
 		self.checkEnd: str = ''  # 检测手机端是否爬完
 		self.proxy = proxy  # 代理
 	# 发送请求
 	def ajax_request(self, url: str, params: dict = None, cookies=None) -> requests.Response:
 		for _ in range(5):
 			try:
 				resp = requests.get(url, params=params, headers=self.headers, cookies=cookies, timeout=10,
 									# proxies=self.proxy
 									)
 				if resp.status_code == 200:
 					return resp
 				elif resp.status_code == 403:
 					print("=====出现响应码403, ip被封=====")
 					self.show_pro(sleepTime)
 					self.change_ip()
 					continue
 				else:
 					print('HTTP Error: %s' % resp.status_code)
 					self.show_pro(sleepTime)
 					continue
 			except Exception as e:
 				print('出现错误: ', e)
 				print('链接为: ', url)
 				self.show_pro(sleepTime)
 				continue
 		else:
 			raise Exception('超过5次也无法正常获取响应...')
 	# 初始化搜索
 	def first_get_seed(self, url: str, params: dict = None, isWeb: bool = False) -> Union[requests.Response, None]:
 		if self.isFirst:
 			resp = self.ajax_request(url=url, params=params)
 			self.isFirst = False
 		else:
 			resp = self.ajax_request(url=url, params=params, cookies=self.cookies)
 		# 未发生重定向以及是web端的情况
 		if resp.url == url and not isWeb:
 			print(f'=====本次没有更新cookie: {resp.url} =====')
 			return resp
 		elif isWeb:
 			zpData = resp.json()['zpData']
 			self.seed = zpData['seed']
 			self.ts = zpData['ts']
 			name = zpData['name']
 			self.check_js(name)
 			return
 		# 处理重定向到检查页面的情况
 		parsedUrl = urlparse(resp.url)
 		generatedDict = parse_qs(parsedUrl.query)
 		self.seed = generatedDict['seed'][0]
 		self.ts = generatedDict['ts'][0]
 		name = generatedDict['name'][0]
 		self.check_js(name)
 	# 手机端搜索职位
 	def search_job_mobile(self, position: str, city: str, startPage: int = 1) -> Iterator:
 		self.headers.update({
 			'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36',
 		})
 		city_code = city_code_dict.get(city)
 		if city_code:
 			params: dict = {
 				'city': city_code,
 				'query': position,
 				'page': startPage
 			}
 			# 初始化搜索
 			self.first_get_seed(self.apiList[1], params)
 			self.update_cookie()
 			continuations: list = [params]
 			# 模拟翻页
 			while continuations:
 				continuation = continuations.pop()
 				resp = self.ajax_request('https://www.zhipin.com/wapi/zpgeek/mobile/search/joblist.json',
 										 params=continuation, cookies=self.cookies)
 				html = resp.json().get('zpData', {}).get('html')
 				# 存在新的帖子
 				if html and self.stop is False:
 					print(f'=====爬取{position}-{city}第{continuation["page"]}页=====')
 					continuation['page'] += 1
 					continuations.append(continuation)
 					# 提交数据
 					yield from self.parse_search_html(html)
 					# 控制爬取频率
 					self.show_pro(sleepTime)
 				elif not html and self.stop is False:
 					print('=====ip被封=====')
 					continuations.append(continuation)
 					self.show_pro(sleepTime)
 					self.change_ip()
 					self.isFirst = True
 					self.first_get_seed(self.apiList[1], params)
 					self.update_cookie()
 				else:
 					print(f'=====爬取{position}-{city}停止=====')
 		else:
 			raise Exception(f'错误的城市名称: {city}')
 	# web端搜索
 	def search_job_web(self, position: str, city: str, startPage: int = 1, totalPage: int = 1) -> Iterator:
 		self.headers.update({
 			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
 		})
 		city_code = city_code_dict.get(city)
 		if city_code:
 			params = {
 				'query': position,
 				'city': city_code,
 				'page': 1,
 				'pageSize': '30',
 				'scene': '1',
 			}
 			# 初次访问
 			self.isFirst = True
 			self.first_get_seed(self.apiList[3], params=params, isWeb=True)
 			page = startPage
 			# 控制翻页
 			while page <= totalPage:
 				params.update({'page': page})
 				self.update_cookie()
 				resp = self.ajax_request(self.apiList[3], params=params, cookies=self.cookies)
 				print(f'=====爬取{position}-{city}第{page}页=====')
 				# 出现访问异常，重新生成cookie
 				if resp.json().get('code') == 37:
 					print(f'====={resp.json().get("message")}, 正在重试 =====')
 					zpData = resp.json()['zpData']
 					self.seed = zpData['seed']
 					self.ts = zpData['ts']
 					self.show_pro(sleepTime)
 					continue
 				# 出现ip被封，暂停一下
 				elif resp.json().get('code') == 5002:
 					print(f'====={resp.json().get("message")}=====')
 					self.show_pro(sleepTime)
 					self.change_ip()
 					self.isFirst = True
 					self.first_get_seed(self.apiList[3], params=params, isWeb=True)
 					continue
 				# 得到数据
 				searchData = resp.json().get('zpData', {}).get('jobList')
 				if searchData:
 					page += 1
 					# 提交管道
 					yield from self.parse_search_data(searchData)
 					# 休息一下
 					self.show_pro(sleepTime)
 				# 获取下一次访问所需种子和时间戳
 				self.seed = resp.cookies['__zp_sseed__']
 				self.ts = resp.cookies['__zp_sts__']
 		else:
 			raise Exception(f'错误的城市名称: {city}')
 	# 获取详情页
 	def get_job_details_by_id(self, encryptJobId: str) -> str:
 		url = self.apiList[1] + encryptJobId + '.html'
 		return self.get_job_details_bt_url(url)
 	# 获取详情页
 	def get_job_details_bt_url(self, url: str) -> str:
 		self.headers.update({
 			'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36',
 		})
 		resp = self.first_get_seed(url)
 		self.update_cookie()
 		if not resp:
 			resp = self.ajax_request(url, cookies=self.cookies)
 		tree = etree.HTML(resp.text)
 		texts = tree.xpath('//div[@class="detail-content"]//text()')
 		textList: list = [i.strip() for i in texts if i.strip()]
 		if not textList:
 			print('===== 重置cookie获取详情页 =====')
 			self.isFirst = True
 			self.show_pro(sleepTime)
 			return self.get_job_details_bt_url(url)
 		return '\n'.join(textList)
 	# 保存手机端搜索结果
 	def save_job_list_to_csv(self, position: str, city: str, startPage: int = 1, saveCount: int = 100):
 		dataSet: Iterator = self.search_job_mobile(position, city, startPage)
 		header = ['job_name', 'detail_url', 'pay', 'company_name', 'requirement']
 		fp = open(f'mobile-{position}-{city}.csv', 'w', encoding='utf-8', newline='')
 		writer = DictWriter(fp, header)
 		writer.writeheader()
 		for job in islice(dataSet, saveCount):
 			job['requirement'] = ';'.join(job['requirement'])
 			writer.writerow(job)
 	# 保存web端搜索结果
 	def save_job_list_to_csv_web(self, position: str, city: str, startPage: int = 1, savePage: int = 2):
 		dataSet = self.search_job_web(position, city, startPage, savePage)
 		header = [
 			'jobName', 'encryptJobId', 'salaryDesc', 'jobLabels', 'skills', 'jobExperience',
 			'jobDegree', 'cityName', 'brandName', 'brandScaleName', 'welfareList', 'brandIndustry'
 		]
 		fp = open(f'web-{position}-{city}.csv', 'w', encoding='utf-8', newline='')
 		writer = DictWriter(fp, header)
 		writer.writeheader()
 		for job in dataSet:
 			job['jobLabels'] = ';'.join(job['jobLabels'])
 			job['skills'] = ';'.join(job['skills'])
 			job['welfareList'] = ';'.join(job['welfareList'])
 			writer.writerow(job)
 	# 更新cookie
 	def update_cookie(self):
 		print(f"seed === {self.seed} , ts === {self.ts}")
 		__zp = self.js.call('r', self.seed, self.ts)
 		self.cookies['__zp_stoken__'] = __zp
 		print(f'=====更新cookie: {self.cookies["__zp_stoken__"]}')
 	# 解析手机端搜索
 	def parse_search_html(self, html: str) -> Iterator:
 		tree = etree.HTML(html)
 		li_list = tree.xpath('//li')
 		for num, li in enumerate(li_list, start=1):
 			if num == 1:
 				if self.checkEnd == li.xpath('./a/@href')[0]:
 					self.stop = True
 					return
 				self.checkEnd = li.xpath('./a/@href')[0]
 			yield {
 				'job_name': li.xpath('./a/div[1]/span[1]/text()')[0],
 				'detail_url': 'https://www.zhipin.com' + li.xpath('./a/@href')[0],
 				'pay': li.xpath('a/div[1]/span[2]/text()')[0],
 				'company_name': li.xpath('./a/div[2]/span[1]/text()')[0],
 				'requirement': [r.strip() for r in li.xpath('./a/div[3]//text()') if r.strip()]
 			}
 	# 检查js是否为最新
 	def check_js(self, name):
 		if self.js_name != name:
 			self.js_name = name
 			print(f"=====这次的js名称 -----> {name} =====")
 			resp = self.ajax_request(f'https://www.zhipin.com/web/common/security-js/{self.js_name}.js').text
 			resp_ = resp.split('module,')
 			resp = ''
 			# 对 module 进行处理，否则容易识别为爬虫
 			for i in range(len(resp_)):
 				resp += resp_[i]
 				if i == 0:
 					resp += 'module_,'
 				if i == 1:
 					resp += 'module,'
 			with open('./jssss.js', 'w', encoding='utf-8') as f:
 				f.write(resp)
 	@staticmethod
 	# 解析web端搜索结果
 	def parse_search_data(searchData: list[dict]) -> Iterator:
 		for job in searchData:
 			yield {
 				'jobName': job['jobName'],
 				'encryptJobId': job['encryptJobId'],
 				'salaryDesc': job['salaryDesc'],
 				'jobLabels': job['jobLabels'],
 				'skills': job['skills'],
 				'jobExperience': job['jobExperience'],
 				'jobDegree': job['jobDegree'],
 				'cityName': job['cityName'],
 				'brandName': job['brandName'],
 				'brandScaleName': job['brandScaleName'],
 				'welfareList': job['welfareList'],
 				'brandIndustry': job['brandIndustry']
 			}
 	@staticmethod
 	def change_ip():
 		# response = requests.get(
 		# 	'https://www.zhipin.com/wapi/zpAntispam/v2/geetest/validate',
 		# 	params=self.__do_verify(),
 		# 	cookies=self.cookies,
 		# 	headers=self.headers,
 		# )
 		# print(response.text)
 		pass
 	@staticmethod
 	# 展示休息进度条
 	def show_pro(t: int, isOpen: bool = True):
 		pass
 		# time.sleep(1)
 		# if isOpen:
 		#     for _ in tqdm(
 		#             range(t * 10),
 		#             leave=False,
 		#             colour='blue',
 		#             desc='正在等待中...',
 		#             ascii='*-'
 		#     ):
 		#         time.sleep(0.1)
 if __name__ == '__main__':
 	boss = BossJob('8955eed0')
 	# 通过url获取详情页
 	# detail = boss.get_job_details_bt_url('https://www.zhipin.com/job_detail/fc823036861698e10nF42NW0GVo~.html')
 	# 通过加密id获取详情页
 	# detail = boss.get_job_details_by_id('05988daddc5b6afc1n1-3du1FVZW')
 	# print(detail)
 	# 保存数据
 	# boss.save_job_list_to_csv('python', '上海', saveCount=20)
 	# boss.save_job_list_to_csv_web('python', '上海', 2, 2)
 	# web搜索
 	items = boss.search_job_web('python', '上海', 1, 10)
 	# mobile搜搜
 	# items = boss.search_job_mobile('web', '上海')
 	for item in items:
 		print(item)
--- a/Job/com.boss_zhipin/cityCode.json
+++ b/Job/com.boss_zhipin/cityCode.json
@ -0,0 +1,373 @@
 {
  "鞍山": 101070300,
  "阿拉善盟": 101081200,
  "安康": 101110700,
  "阿克苏地区": 101131000,
  "阿勒泰地区": 101131500,
  "阿拉尔": 101131700,
  "阿里地区": 101140700,
  "安阳": 101180200,
  "安庆": 101220600,
  "安顺": 101260300,
  "阿坝藏族羌族自治州": 101271900,
  "澳门": 101330100,
  "北京": 101010100,
  "白城": 101060500,
  "白山": 101060800,
  "本溪": 101070500,
  "包头": 101080200,
  "巴彦淖尔": 101080800,
  "保定": 101090200,
  "宝鸡": 101110900,
  "滨州": 101121100,
  "巴音郭楞蒙古自治州": 101130400,
  "博尔塔拉蒙古自治州": 101130500,
  "北屯市": 101132100,
  "白银": 101161000,
  "蚌埠": 101220200,
  "亳州": 101220900,
  "毕节": 101260500,
  "巴中": 101270900,
  "保山": 101290300,
  "百色": 101301000,
  "北海": 101301300,
  "白沙黎族自治县": 101311400,
  "保亭黎族苗族自治县": 101311800,
  "重庆": 101040100,
  "长春": 101060100,
  "朝阳": 101071200,
  "赤峰": 101080500,
  "承德": 101090400,
  "沧州": 101090700,
  "长治": 101100500,
  "昌吉回族自治州": 101130300,
  "昌都": 101140300,
  "常州": 101191100,
  "滁州": 101221000,
  "池州": 101221500,
  "长沙": 101250100,
  "郴州": 101250500,
  "常德": 101250600,
  "成都": 101270100,
  "潮州": 101281500,
  "楚雄彝族自治州": 101291700,
  "崇左": 101300200,
  "澄迈": 101311200,
  "昌江黎族自治县": 101311500,
  "大庆": 101050800,
  "大兴安岭地区": 101051300,
  "大连": 101070200,
  "丹东": 101070600,
  "大同": 101100200,
  "德州": 101120400,
  "东营": 101121200,
  "定西": 101160200,
  "达州": 101270600,
  "德阳": 101271700,
  "东莞": 101281600,
  "东沙群岛": 101282200,
  "德宏傣族景颇族自治州": 101291300,
  "迪庆藏族自治州": 101291500,
  "大理白族自治州": 101291600,
  "儋州": 101310400,
  "东方": 101310900,
  "定安": 101311000,
  "鄂尔多斯": 101080600,
  "鄂州": 101200300,
  "恩施土家族苗族自治州": 101201300,
  "抚顺": 101070400,
  "阜新": 101070900,
  "阜阳": 101220800,
  "福州": 101230100,
  "抚州": 101240400,
  "佛山": 101280800,
  "防城港": 101301400,
  "果洛藏族自治州": 101150600,
  "甘南藏族自治州": 101161400,
  "固原": 101170400,
  "赣州": 101240700,
  "贵阳": 101260100,
  "广安": 101270800,
  "广元": 101271800,
  "甘孜藏族自治州": 101272100,
  "广州": 101280100,
  "桂林": 101300500,
  "贵港": 101300800,
  "哈尔滨": 101050100,
  "黑河": 101050600,
  "鹤岗": 101051100,
  "葫芦岛": 101071400,
  "呼和浩特": 101080100,
  "呼伦贝尔": 101080700,
  "衡水": 101090800,
  "邯郸": 101091000,
  "汉中": 101110800,
  "菏泽": 101121000,
  "哈密": 101130900,
  "和田地区": 101131300,
  "海东": 101150200,
  "海北藏族自治州": 101150300,
  "黄南藏族自治州": 101150400,
  "海南藏族自治州": 101150500,
  "海西蒙古族藏族自治州": 101150800,
  "鹤壁": 101181200,
  "淮安": 101190900,
  "黄冈": 101200500,
  "黄石": 101200600,
  "杭州": 101210100,
  "湖州": 101210200,
  "合肥": 101220100,
  "淮南": 101220400,
  "淮北": 101221100,
  "黄山": 101221600,
  "衡阳": 101250400,
  "怀化": 101251200,
  "惠州": 101280300,
  "河源": 101281200,
  "红河哈尼族彝族自治州": 101291200,
  "贺州": 101300700,
  "河池": 101301200,
  "海口": 101310100,
  "佳木斯": 101050400,
  "鸡西": 101051000,
  "吉林": 101060200,
  "锦州": 101070700,
  "晋中": 101100400,
  "晋城": 101100600,
  "济南": 101120100,
  "济宁": 101120700,
  "金昌": 101160600,
  "酒泉": 101160800,
  "嘉峪关": 101161200,
  "焦作": 101181100,
  "济源": 101181800,
  "荆州": 101200800,
  "荆门": 101201200,
  "嘉兴": 101210300,
  "金华": 101210900,
  "九江": 101240200,
  "吉安": 101240600,
  "景德镇": 101240800,
  "江门": 101281100,
  "揭阳": 101281900,
  "克拉玛依": 101130200,
  "克孜勒苏柯尔克孜自治州": 101131100,
  "喀什地区": 101131200,
  "可克达拉市": 101132200,
  "昆玉市": 101132300,
  "开封": 101180800,
  "昆明": 101290100,
  "辽源": 101060600,
  "辽阳": 101071000,
  "廊坊": 101090600,
  "临汾": 101100700,
  "吕梁": 101101100,
  "临沂": 101120900,
  "聊城": 101121700,
  "拉萨": 101140100,
  "林芝": 101140400,
  "兰州": 101160100,
  "陇南": 101161100,
  "临夏回族自治州": 101161300,
  "洛阳": 101180900,
  "漯河": 101181500,
  "连云港": 101191000,
  "丽水": 101210800,
  "六安": 101221400,
  "龙岩": 101230700,
  "娄底": 101250800,
  "六盘水": 101260600,
  "泸州": 101271000,
  "乐山": 101271400,
  "凉山彝族自治州": 101272000,
  "临沧": 101290800,
  "丽江": 101290900,
  "柳州": 101300300,
  "来宾": 101300400,
  "临高": 101311300,
  "乐东黎族自治县": 101311600,
  "陵水黎族自治县": 101311700,
  "牡丹江": 101050300,
  "马鞍山": 101220500,
  "绵阳": 101270400,
  "眉山": 101271500,
  "梅州": 101280400,
  "茂名": 101282000,
  "那曲": 101140600,
  "南阳": 101180700,
  "南京": 101190100,
  "南通": 101190500,
  "宁波": 101210400,
  "宁德": 101230300,
  "南平": 101230900,
  "南昌": 101240100,
  "南充": 101270500,
  "内江": 101271200,
  "怒江傈僳族自治州": 101291400,
  "南宁": 101300100,
  "盘锦": 101071300,
  "平凉": 101160300,
  "平顶山": 101180500,
  "濮阳": 101181300,
  "莆田": 101230400,
  "萍乡": 101240900,
  "攀枝花": 101270200,
  "普洱": 101290500,
  "齐齐哈尔": 101050200,
  "七台河": 101050900,
  "秦皇岛": 101091100,
  "青岛": 101120200,
  "庆阳": 101160400,
  "潜江": 101201500,
  "衢州": 101211000,
  "泉州": 101230500,
  "黔东南苗族侗族自治州": 101260700,
  "黔南布依族苗族自治州": 101260800,
  "黔西南布依族苗族自治州": 101260900,
  "清远": 101281300,
  "曲靖": 101290200,
  "钦州": 101301100,
  "琼海": 101310600,
  "琼中黎族苗族自治县": 101311900,
  "日照": 101121500,
  "日喀则": 101140200,
  "上海": 101020100,
  "绥化": 101050500,
  "双鸭山": 101051200,
  "四平": 101060300,
  "松原": 101060700,
  "沈阳": 101070100,
  "石家庄": 101090100,
  "朔州": 101100900,
  "商洛": 101110600,
  "石河子": 101131600,
  "双河市": 101132400,
  "山南": 101140500,
  "石嘴山": 101170200,
  "商丘": 101181000,
  "三门峡": 101181700,
  "苏州": 101190400,
  "宿迁": 101191300,
  "十堰": 101201000,
  "随州": 101201100,
  "神农架": 101201700,
  "绍兴": 101210500,
  "宿州": 101220700,
  "三明": 101230800,
  "上饶": 101240300,
  "邵阳": 101250900,
  "遂宁": 101270700,
  "韶关": 101280200,
  "汕头": 101280500,
  "深圳": 101280600,
  "汕尾": 101282100,
  "三亚": 101310200,
  "三沙": 101310300,
  "天津": 101030100,
  "通化": 101060400,
  "铁岭": 101071100,
  "通辽": 101080400,
  "唐山": 101090500,
  "太原": 101100100,
  "铜川": 101111000,
  "泰安": 101120800,
  "吐鲁番": 101130800,
  "塔城地区": 101131400,
  "图木舒克": 101131800,
  "铁门关": 101132000,
  "天水": 101160900,
  "泰州": 101191200,
  "天门": 101201600,
  "台州": 101210600,
  "铜陵": 101221200,
  "铜仁": 101260400,
  "屯昌": 101311100,
  "台湾": 101341100,
  "乌海": 101080300,
  "乌兰察布": 101080900,
  "渭南": 101110500,
  "潍坊": 101120600,
  "威海": 101121300,
  "乌鲁木齐": 101130100,
  "五家渠": 101131900,
  "武威": 101160500,
  "吴忠": 101170300,
  "无锡": 101190200,
  "武汉": 101200100,
  "温州": 101210700,
  "芜湖": 101220300,
  "文山壮族苗族自治州": 101291100,
  "梧州": 101300600,
  "五指山": 101310500,
  "文昌": 101310700,
  "万宁": 101310800,
  "锡林郭勒盟": 101081000,
  "兴安盟": 101081100,
  "邢台": 101090900,
  "忻州": 101101000,
  "西安": 101110100,
  "咸阳": 101110200,
  "新星市": 101132500,
  "西宁": 101150100,
  "新乡": 101180300,
  "许昌": 101180400,
  "信阳": 101180600,
  "徐州": 101190800,
  "襄阳": 101200200,
  "孝感": 101200400,
  "咸宁": 101200700,
  "仙桃": 101201400,
  "宣城": 101221300,
  "厦门": 101230200,
  "新余": 101241000,
  "湘潭": 101250200,
  "湘西土家族苗族自治州": 101251400,
  "西双版纳傣族自治州": 101291000,
  "香港": 101320300,
  "伊春": 101050700,
  "延边朝鲜族自治州": 101060900,
  "营口": 101070800,
  "阳泉": 101100300,
  "运城": 101100800,
  "延安": 101110300,
  "榆林": 101110400,
  "烟台": 101120500,
  "伊犁哈萨克自治州": 101130600,
  "玉树藏族自治州": 101150700,
  "银川": 101170100,
  "扬州": 101190600,
  "盐城": 101190700,
  "宜昌": 101200900,
  "宜春": 101240500,
  "鹰潭": 101241100,
  "益阳": 101250700,
  "岳阳": 101251000,
  "永州": 101251300,
  "宜宾": 101271100,
  "雅安": 101271600,
  "云浮": 101281400,
  "阳江": 101281800,
  "玉溪": 101290400,
  "玉林": 101300900,
  "张家口": 101090300,
  "淄博": 101120300,
  "枣庄": 101121400,
  "张掖": 101160700,
  "中卫": 101170500,
  "郑州": 101180100,
  "周口": 101181400,
  "驻马店": 101181600,
  "镇江": 101190300,
  "舟山": 101211100,
  "漳州": 101230600,
  "株洲": 101250300,
  "张家界": 101251100,
  "遵义": 101260200,
  "自贡": 101270300,
  "资阳": 101271300,
  "珠海": 101280700,
  "肇庆": 101280900,
  "湛江": 101281000,
  "中山": 101281700,
  "昭通": 101290700
 }
--- a/Job/com.boss_zhipin/demo.js
+++ b/Job/com.boss_zhipin/demo.js
--- a/Job/com.boss_zhipin/jssss.js
+++ b/Job/com.boss_zhipin/jssss.js
--- a/Job/com.boss_zhipin/test.py
+++ b/Job/com.boss_zhipin/test.py
@ -0,0 +1,4 @@
 import execjs
 js = execjs.compile(open('demo.js', 'r', encoding='utf-8').read())
 js.call('r', "1EAWUR51t3ADpSjeK5ywydCLIV2U4WaF93nocYiDXQs=", "1699709623728")
--- a/Job/com.lagou/README.md
+++ b/Job/com.lagou/README.md
@ -0,0 +1,24 @@
 # 招聘数据拉取
 ## 拉钩网 使用说明
 > 注意：需要有node环境，具体安装和配置请自行搜索~！
 1. 首先进入到`Job`文件夹下，安装三个包，命令如下：
 ```shell
 npm install crypto-js
 npm install jsencrypt
 npm install get-random-values
 ```
 安装完成后便可执行 `la_gou.py` 脚本
 2. 执行中如果出现如下错误：
 ```shell
 window is not defined
 ```
 需要找到`jsencrypt`的安装目录，`node_modules/jsencrypt/bin/jsencrypt.js` 下，加入如下代码：
 ```javascript
 var window = {};
 var navigator ={};
 ```
 之后再次执行。
--- a/Job/com.lagou/la_gou_main.py
+++ b/Job/com.lagou/la_gou_main.py
@ -0,0 +1,267 @@
 import time
 import json
 import execjs
 import requests
 from lxml import etree
 from urllib import parse
 aes_key = ""
 secret_key_value = ""
 with open('lagou.js', 'r', encoding='utf-8') as f:
    lagou_js = execjs.compile(f.read())
 UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
 x_anit = {
    "x-anit-forge-code": "0",
    "x-anit-forge-token": None
 }
 global_cookies = {
    # - 如果 IP 被拉黑，访问需要登录，或者提示太频繁，那么就需要那么就需要补全登录后的 cookie！
    # - 以下所有值都必须在登录后再复制过来，特别是 JSESSIONID，没登录得到的 JSESSIONID 是无效的！
    # - 经过测试主要是以下三个 cookie 起作用，能保持登录：login、gate_login_token、_putrc
    # - 还有一个 JSESSIONID 值，主要是用来获取请求头 x-anit-forge-code 和 x-anit-forge-token
    # "login": "true",
    # "gate_login_token": "",
    # "_putrc": "",
    # "JSESSIONID": ""
 }
 def get_user_trace_token() -> str:
    # 获取 cookie 中的 user_trace_token
    json_url = "https://a.lagou.com/json"
    headers = {
        "Host": "a.lagou.com",
        "Referer": "https://www.lagou.com/",
        "User-Agent": UA
    }
    params = {
        "lt": "trackshow",
        "t": "ad",
        "v": 0,
        "dl": "https://www.lagou.com/",
        "dr": "https://www.lagou.com",
        "time": str(int(time.time() * 1000))
    }
    response = requests.get(url=json_url, headers=headers, params=params)
    user_trace_token = response.cookies.get_dict()["user_trace_token"]
    return user_trace_token
 def get_lg_stoken(original_data: dict) -> str:
    # 获取 cookie 中的 __lg_stoken__
    token_url = "https://www.lagou.com/wn/jobs"
    token_headers = {
        "Host": "www.lagou.com",
        "Referer": "https://www.lagou.com/",
        "User-Agent": UA
    }
    params = {
        "kd": original_data["kd"],
        "city": original_data["city"],
        "fromSearch": original_data["fromSearch"],
        "pn": original_data["pn"],
        "px": original_data["px"]
    }
    token_response = requests.get(url=token_url, params=params, headers=token_headers, cookies=global_cookies,
                                  allow_redirects=False)
    if token_response.status_code != 302:
        raise Exception("获取跳转链接异常！检查 global_cookies 是否已包含 __lg_stoken__！")
    # 获取 302 跳转的地址
    security_check_url = token_response.headers["Location"]
    print(f"security_check_url --->>> {security_check_url}")
    if "login" in security_check_url:
        raise Exception("IP 被关进小黑屋啦！需要登录！请补全登录后的 Cookie，或者自行添加代理！")
    parse_result = parse.urlparse(security_check_url)
    # url 的参数为待加密对象
    security_check_params = parse_result.query
    # 取 name 参数，为混淆 js 的文件名
    security_check_js_name = parse.parse_qs(security_check_params)["name"][0]
    # 发送请求，获取混淆的 js
    js_url = "https://www.lagou.com/common-sec/dist/" + security_check_js_name + ".js"
    js_headers = {
        "Host": "www.lagou.com",
        "Referer": security_check_url,
        "User-Agent": UA
    }
    js_response = requests.get(url=js_url, headers=js_headers, cookies=global_cookies).text
    # 补全 js，添加 window 参数和一个方法，用于获取 __lg_stoken__ 的值
    lg_js = """
    window = {
        "location": {
            "hostname": "www.lagou.com",
            "search": '?%s'
        }
    }
    function getLgStoken(){
        return window.gt.prototype.a()
    }
    """ % security_check_params + js_response
    lg_stoken = execjs.compile(lg_js).call("getLgStoken")
    print(f"lg_stoken --->>> {lg_stoken}")
    return lg_stoken
 def update_cookies(original_data: dict) -> None:
    global global_cookies
    # 获取 user_trace_token
    user_trace_token = get_user_trace_token()
    # 获取 X_HTTP_TOKEN
    x_http_token = lagou_js.call("getXHttpToken", "user_trace_token=" + user_trace_token)
    # 第一次更新全局 cookies，后续获取 __lg_stoken__ 会用到
    global_cookies.update({
        "user_trace_token": user_trace_token,
        "X_HTTP_TOKEN": x_http_token
    })
    # 获取 __lg_stoken__
    lg_stoken = get_lg_stoken(original_data)
    # 第二次更新全局 cookies
    global_cookies.update({
        "__lg_stoken__": lg_stoken,
    })
 def update_aes_key() -> None:
    # 通过JS获取 AES Key，并通过接口激活，接口激活后会返回一个 secretKeyValue，后续请求头会用到
    global aes_key, secret_key_value
    url = "https://gate.lagou.com/system/agreement"
    headers = {
        "Content-Type": "application/json",
        "Host": "gate.lagou.com",
        "Origin": "https://www.lagou.com",
        "Referer": "https://www.lagou.com/",
        "User-Agent": UA
    }
    encrypt_data = lagou_js.call("getAesKeyAndRsaEncryptData")
    aes_key = encrypt_data["aesKey"]
    rsa_encrypt_data = encrypt_data["rsaEncryptData"]
    data = {"secretKeyDecode": rsa_encrypt_data}
    response = requests.post(url=url, headers=headers, json=data).json()
    secret_key_value = response["content"]["secretKeyValue"]
 def update_x_anit(original_data: dict) -> None:
    # 更新 x-anit-forge-code 和 x-anit-forge-token
    url = "https://www.lagou.com/wn/jobs"
    headers = {
        "Host": "www.lagou.com",
        "Referer": "https://www.lagou.com/",
        "User-Agent": UA
    }
    params = {
        "kd": original_data["kd"],
        "city": original_data["city"]
    }
    print(f"update_x_anit params --->>> {params}")
    response = requests.get(url=url, params=params, headers=headers, cookies=global_cookies)
    print(f"update_x_anit params --->>> {response.text}")
    tree = etree.HTML(response.text)
    next_data_json = json.loads(tree.xpath("//script[@id='__NEXT_DATA__']/text()")[0])
    submit_code = next_data_json["props"]["tokenData"]["submitCode"]
    submit_token = next_data_json["props"]["tokenData"]["submitToken"]
    # 注意 JSESSIONID 必须是登录验证后的！
    if not submit_code or not submit_token:
        raise Exception("submitCode & submitToken 为空，请检查 JSESSIONID 是否正确！")
    global x_anit
    x_anit["x-anit-forge-code"] = submit_code
    x_anit["x-anit-forge-token"] = submit_token
 def get_header_params(original_data: dict) -> dict:
    # 后续请求数据所需的请求头参数
    # 职位搜索 URL，如果是搜索公司，那就是 https://www.lagou.com/jobs/companyAjax.json，根据实际情况更改
    u = "https://www.lagou.com/jobs/v2/positionAjax.json"
    return {
        "traceparent": lagou_js.call("getTraceparent"),
        "X-K-HEADER": secret_key_value,
        "X-S-HEADER": lagou_js.call("getXSHeader", aes_key, original_data, u),
        "X-SS-REQ-HEADER": json.dumps({"secret": secret_key_value})
    }
 def get_encrypted_data(original_data: dict) -> str:
    # AES 加密原始数据
    encrypted_data = lagou_js.call("getRequestData", aes_key, original_data)
    return encrypted_data
 def get_data(original_data: dict, encrypted_data: str, header_params: dict) -> dict:
    # 携带加密后的请求数据和完整请求头，拿到密文，AES 解密得到明文职位信息
    url = "https://www.lagou.com/jobs/v2/positionAjax.json"
    referer = parse.urljoin("https://www.lagou.com/wn/jobs?", parse.urlencode(original_data))
    headers = {
        # "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
        "Host": "www.lagou.com",
        "Origin": "https://www.lagou.com",
        "Referer": referer,
        "traceparent": header_params["traceparent"],
        "User-Agent": UA,
        "X-K-HEADER": header_params["X-K-HEADER"],
        "X-S-HEADER": header_params["X-S-HEADER"],
        "X-SS-REQ-HEADER": header_params["X-SS-REQ-HEADER"],
    }
    # 添加 x-anit-forge-code 和 x-anit-forge-token
    headers.update(x_anit)
    data = {"data": encrypted_data}
    response = requests.post(url=url, headers=headers, cookies=global_cookies, data=data).json()
    if "status" in response:
        if not response["status"] and "操作太频繁" in response["msg"]:
            raise Exception("获取数据失败！msg：%s！可以尝试补全登录后的 Cookies，或者添加代理！" % response["msg"])
        else:
            raise Exception("获取数据异常！请检查数据是否完整！")
    else:
        response_data = response["data"]
        decrypted_data = lagou_js.call("getResponseData", response_data, aes_key)
        return decrypted_data
 def main():
    # 初始化设置标识
    need_init = True
    # 要搜索的原始数据，例如爬取最新的10页全国Java岗位
    # for pn in range(1, 10):
    original_data = {
        "city": "北京",  # 城市范围
        "pn": 1,  # 页码
        "kd": "java",  # 关键词
        "px": "new",  # 排序方式，new：最新，default：默认
        "fromSearch": "true"
    }
    while need_init:
        # 初始化设置各项参数，这些参数只需要设置一次就行了，后续请求可以复用
        # 获取必要的 cookies，主要是 user_trace_token、X_HTTP_TOKEN、__lg_stoken__
        update_cookies(original_data)
        # 获取并激活 AES Key，不激活无法使用
        update_aes_key()
        # 如果登录了账号，则获取职位的请求 header 多了两个参数，x-anit-forge-code 和 x-anit-forge-token
        # 在实际测试中，不加这两个值，或者随机值都行，严谨起见还是正常获取一下
        if "login" in global_cookies:
            update_x_anit(original_data)
        need_init = False
    # 获取请求头各参数：X-K-HEADER、X-S-HEADER、X-SS-REQ-HEADER、traceparent
    header_params = get_header_params(original_data)
    # 将要搜索的原始数据进行 AES 加密
    encrypted_data = get_encrypted_data(original_data)
    print(f"global_cookies --->>> {global_cookies}")
    print(f"original_data --->>> {original_data}")
    print(f"header_params --->>> {header_params}")
    print(f"encrypted_data --->>> {encrypted_data} key --->>> {aes_key}")
    # 发起请求，获取加密数据并解密成明文
    data = get_data(original_data, encrypted_data, header_params)
    print(data["content"]["hrInfoMap"])
    print(data["content"]["positionResult"])
 if __name__ == '__main__':
    main()
--- a/Job/com.lagou/lagou.js
+++ b/Job/com.lagou/lagou.js
--- a/Job/com.lagou/test.py
+++ b/Job/com.lagou/test.py
@ -0,0 +1,6 @@
 from urllib import parse
 if __name__ == '__main__':
 	a = parse.quote('北京')
 	print(a)
 	print("北京".encode(encoding='UTF-8', errors='strict'))
--- a/OA/init.py
+++ b/OA/init.py
--- a/OA/oa_clock_in.py
+++ b/OA/oa_clock_in.py
@ -0,0 +1,248 @@
 import sys
 import requests
 import time
 import json
 import random
 # 定义的全局变量
 LOGIN_NAME = "dl-renmeng"
 LOGIN_PASSWD = "1111"
 IS_WORK_DAY = 0
 def get_clock_in_data(clock_in_time):
 	"""根据当前的小时，返回打卡的入参"""
 	print("当前时间-小时：", clock_in_time.tm_hour)
 	# 定义时间类型
 	time_type_one = ["00:00", "09:00", "18:00", "23:59"]
 	time_type_two = ["00:00:00", "09:00:00", "18:00:00", "23:59:59"]
 	clock_in_data = {}
 	if clock_in_time.tm_hour > 9:
 		# 下午打卡
 		clock_in_data = {
 			"time": time_type_one[2],
 			"belongtime": time_type_one[2],
 			"canSignTime": time_type_one[3],
 			"signTime": time.strftime("%H:%M:%S", clock_in_time),
 			"date": time.strftime("%Y-%m-%d", clock_in_time),
 			"belongdate": time.strftime("%Y-%m-%d", clock_in_time),
 			"datetime": f'{time.strftime("%Y-%m-%d", clock_in_time)} {time_type_two[2]}',
 			"signSectionTime": f'{time.strftime("%Y-%m-%d", clock_in_time)} {time_type_two[3]}',
 			"signSection": f'{time.strftime("%Y-%m-%d", clock_in_time)} {time_type_two[0]}#{time.strftime("%Y-%m-%d", struct_time)} {time_type_two[3]}',
 			"min": "359",
 			"workmins": "480",
 			"type": "off",
 			"across": "0",
 			"islastsign": "1",
 			"isYellow": "1",
 			"isPunchOpen": "1",
 			"isacross": "0",
 			"pre": "0",
 			"active": "0",
 			"needSign": "0",
 			"reSign": "1",
 			"min_next": "-1",
 			"signfrom": "e9pc",
 			"serialid": "1",
 			"signAcross": "0",
 			"signAcross_next": "0",
 			"signbelong": "今天",
 			"signbelongspan": "今天",
 		}
 	else:
 		# 上午打卡
 		clock_in_data = {
 			"time": time_type_one[1],
 			"belongtime": time_type_one[1],
 			"canSignTime": time_type_one[0],
 			"date": time.strftime("%Y-%m-%d", clock_in_time),
 			"belongdate": time.strftime("%Y-%m-%d", clock_in_time),
 			"datetime": f'{time.strftime("%Y-%m-%d", clock_in_time)} {time_type_two[1]}',
 			"signSectionTime": f'{time.strftime("%Y-%m-%d", clock_in_time)} {time_type_two[0]}',
 			"signSection": f'{time.strftime("%Y-%m-%d", clock_in_time)} {time_type_two[0]}#{time.strftime("%Y-%m-%d", struct_time)} {time_type_two[3]}',
 			"min": "540",
 			"workmins": "480",
 			"isfirstsign": "1",
 			"type": "on",
 			"across": "0",
 			"islastsign": "1",
 			"isYellow": "0",
 			"isPunchOpen": "1",
 			"isacross": "0",
 			"pre": "0",
 			"active": "1",
 			"needSign": "1",
 			"min_next": "-1",
 			"serialid": "1",
 			"signAcross": "0",
 			"signAcross_next": "0",
 		}
 	return clock_in_data
 def trusty_sleep(sleep_time):
 	"""睡觉方法"""
 	print(f"开始休眠：{sleep_time} 秒")
 	start = time.time()
 	while time.time() - start < sleep_time:
 		time.sleep(sleep_time - (time.time() - start))
 struct_time = time.localtime(time.time())
 # 得到结构化时间格式
 now_time = time.strftime("%Y%m%d", struct_time)
 # now_time = 20220131
 print("当前时间：", now_time)
 url = f"https://api.apihubs.cn/holiday/get?field=workday&date={now_time}&workday=1&cn=1&size=31"
 print("获取工作日信息url：", url)
 print("开始发送请求----->>>>>>")
 request_result = requests.get(url)
 print("请求结果返回----->>>>>>", request_result)
 # 请求返回成功
 if request_result.status_code == 200:
 	# 解析json
 	is_work = json.loads(request_result.text)
 	# 数据获取成功
 	if is_work["code"] == 0:
 		data_list = is_work["data"]["list"][0] if is_work["data"]["list"] else []
 		IS_WORK_DAY = data_list["workday"] if data_list else 0
 print("当前日期是否是工作日（1：是，0：否）：", IS_WORK_DAY)
 if IS_WORK_DAY == 1:
 	header = {
 		"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
 		"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36",
 		"Accept": "*/*",
 		"Accept-Encoding": "gzip, deflate",
 		"Accept-Language": "zh-CN,zh;q=0.9",
 	}
 	print("开始OA登录----->>>")
 	login_form_data = {
 		"loginid": LOGIN_NAME,
 		"langid": "7",
 	}
 	login_form_url = "http://oa.njhgroup.cn/api/hrm/login/getLoginForm"
 	login_form_result = requests.post(
 		login_form_url, headers=header, data=login_form_data
 	)
 	print(requests.utils.dict_from_cookiejar(login_form_result.cookies))
 	randcode = json.loads(login_form_result.text)["qrcode"]["loginkey"]
 	print("获取randcode结果---->>>", randcode)
 	login_data = {
 		"loginid": LOGIN_NAME,
 		"userpassword": LOGIN_PASSWD,
 		"logintype": "1",
 		"isie": "false",
 	}
 	login_cookie = login_form_result.cookies
 	# 开始进行OA登录
 	oa_login_url = "http://oa.njhgroup.cn/api/hrm/login/checkLogin"
 	login_result = requests.post(
 		oa_login_url,
 		headers=header,
 		data=login_data,
 		cookies=requests.utils.dict_from_cookiejar(login_form_result.cookies),
 	)
 	print(requests.utils.dict_from_cookiejar(login_result.cookies))
 	print(login_result.text)
 	print("OA登录结束----->>>", login_result.text)
 	# 休眠10秒
 	time.sleep(10)
 	print("OA开始刷新randcode----->>>")
 	ts = int(round(time.time() * 1000))
 	refresh_code_url = f"http://oa.njhgroup.cn/rsa/weaver.rsa.GetRsaInfo?ts={ts}"
 	refresh_code_result = requests.get(refresh_code_url, headers=header)
 	print(refresh_code_result.cookies)
 	print("OA刷新randcode结束----->>>")
 	# 组装最后的cookie
 	clock_in_cookie = requests.utils.dict_from_cookiejar(
 		login_form_result.cookies)
 	clock_in_cookie.update(
 		requests.utils.dict_from_cookiejar(refresh_code_result.cookies)
 	)
 	clock_in_cookie.update(
 		requests.utils.dict_from_cookiejar(login_result.cookies))
 	print("开始检查当日是否请假----->>>")
 	check_is_work_url = (
 		"http://oa.njhgroup.cn/api/kq/myattendance/getHrmKQMonthReportInfo"
 	)
 	check_is_work_result = requests.post(
 		check_is_work_url,
 		headers=header,
 		data={
 			"typevalue": time.strftime("%Y-%m", struct_time),
 			"loaddata": "1",
 			"type": "2",
 		},
 		cookies=clock_in_cookie,
 	).text
 	# 解析json
 	is_work = json.loads(check_is_work_result)
 	print("结束检查当日是否请假----->>>")
 	print(f"{struct_time.tm_mday}")
 	isWorkDay = is_work["result"][f"{struct_time.tm_mday}"]["isWorkDay"]
 	workflow = len(is_work["result"][f"{struct_time.tm_mday}"]["workflow"])
 	print(f"今天是否是工作日：{isWorkDay},今天是否请假：{workflow}")
 	needSign = False
 	if isWorkDay and workflow <= 0:
 		needSign = True
 	else:
 		print("今日有请假，不打卡~！")
 		sys.exit()
 	check_is_need_sign_url = "http://oa.njhgroup.cn/api/hrm/kq/attendanceButton/getButtons"
 	check_is_need_sign_result = requests.post(
 		check_is_need_sign_url,
 		headers=header,
 		cookies=clock_in_cookie,
 	).text
 	check_is_need_sign_timeline = json.loads(check_is_need_sign_result)["timeline"]
 	# 0代表不需要打卡
 	need_sign = 0
 	# sign_time 为空也代表不需要打卡
 	sign_time = ""
 	if struct_time.tm_hour < 9:
 		# 上午走第一个集合，上午卡
 		need_sign = check_is_need_sign_timeline[0]["needSign"]
 		if "signTime" in check_is_need_sign_timeline[0]:
 			sign_time = check_is_need_sign_timeline[0]["signTime"]
 		print(f"上午打卡情况：---{need_sign} ----- {sign_time} --- {len(sign_time)}")
 	else:
 		# 下午走第二个集合，下午卡
 		need_sign = check_is_need_sign_timeline[1]["needSign"]
 		if "signTime" in check_is_need_sign_timeline[1]:
 			sign_time = check_is_need_sign_timeline[1]["signTime"]
 		print(f"下午打卡情况：---{need_sign} ----- {sign_time} --- {len(sign_time)}")
 	# 可以打卡
 	if need_sign == "1" and len(sign_time) == 0:
 		needSign = True
 	else:
 		print("已经打卡，无需再打~！")
 		sys.exit()
 	# 检查是否已经打过卡，如果没有，则继续
 	if needSign:
 		# 开始打卡,如果是下午，则休眠一下再打卡
 		if struct_time.tm_hour > 9:
 			# 休眠 5分-15分
 			trusty_sleep(random.randint(300, 900))
 		print("OA开始打卡----->>>")
 		# clock_in_cookie["__randcode__"] =
 		# 刷新时间
 		struct_time = time.localtime(time.time())
 		sign_time = time.strftime("%H:%M:%S", struct_time)
 		get_clock_in_data(struct_time)
 		clock_in_url = "http://oa.njhgroup.cn/api/hrm/kq/attendanceButton/punchButton"
 		print(
 			"OA打卡结束----->>>",
 			requests.post(
 				clock_in_url,
 				headers=header,
 				data=get_clock_in_data(struct_time),
 				cookies=clock_in_cookie,
 			).text,
 		)
--- a/Selenium/init.py
+++ b/Selenium/init.py
--- a/Selenium/test.py
+++ b/Selenium/test.py
@ -0,0 +1,21 @@
 from selenium import webdriver
 # urllib3 教程：https://urllib3.readthedocs.io/en/latest/user-guide.html
 # selenium 教程：https://www.selenium.dev/zh-cn/documentation/webdriver/getting_started/
 # 下载最新的chromedriver： https://chromedriver.storage.googleapis.com/index.html
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.support.wait import WebDriverWait
 from webdriver_manager.chrome import ChromeDriverManager
 # 使用管理器管理驱动程序
 # 管理器将驱动下载到了 /Users/renmeng/.wdm/drivers/chromedriver 目录下
 driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
 # options = ChromeOptions()
 # driver = webdriver.Chrome(options=options)
 # 打开网页
 driver.get("https://www.lagou.com/jobs/list_运维?city=%E6%88%90%E9%83%BD&cl=false&fromSearch=true&labelWords=&suginput=") # 打开url网页 比如 driver.get()
 wait = WebDriverWait(driver, 10)
 # 获取网页源码
 print(driver.page_source)
 driver.quit()
--- a/aqistudy网站/airHistory_2108.js
+++ b/aqistudy网站/airHistory_2108.js
--- a/aqistudy网站/main.py
+++ b/aqistudy网站/main.py
@ -0,0 +1,183 @@
 import base64
 import json
 import re
 import time
 import hashlib
 from urllib.parse import urljoin
 import execjs
 import requests
 import urllib3
 def des_js(js_str):
 	keys = re.findall(f'DES\.encrypt\((\w+)\s?,\s?(\w+)\s?,\s?(\w+)\)', js_str)
 	text_name, key_name, iv_name = keys[0]
 	key = re.findall(f'const\s+?{key_name}\s+?=.*?"(.*?)"', js_str)[0]
 	iv = re.findall(f'const\s+?{iv_name}\s+?=.*?"(.*?)"', js_str)[0]
 	appid_name = re.findall("appId:.*?(\w+),", js_str)[0]
 	appId = re.findall(f"var\s?{appid_name}\s?=.*?'(.*?)'", js_str)[0]
 	param_name = re.findall("data:\s?\{\s?(\w+):.*?}", js_str)[0]
 	des_keys = re.findall(f'DES\.decrypt\(data,\s?(\w+),\s?(\w+)\);', js_str)
 	des_dec_key_name, des_dec_iv_name = des_keys[0]
 	des_dec_key = re.findall(f'const\s+?{des_dec_key_name}\s+?=.*?"(.*?)"', js_str)[0]
 	des_dec_iv = re.findall(f'const\s+?{des_dec_iv_name}\s+?=.*?"(.*?)"', js_str)[0]
 	aes_keys = re.findall(f'AES\.decrypt\(data,\s?(\w+),\s?(\w+)\);', js_str)
 	aes_dec_key_name, aes_dec_iv_name = aes_keys[0]
 	aes_dec_key = re.findall(f'const\s+?{aes_dec_key_name}\s+?=.*?"(.*?)"', js_str)[0]
 	aes_dec_iv = re.findall(f'const\s+?{aes_dec_iv_name}\s+?=.*?"(.*?)"', js_str)[0]
 	method = "GETDAYDATA"
 	obj = {"city": "济南", "month": '201702'}
 	timestamp = int(time.time() * 1000)
 	clienttype = 'WEB'
 	form_data = {
 		"appId": appId,
 		"method": method,
 		"timestamp": timestamp,
 		"clienttype": clienttype,
 		"object": obj,
 		"secret": hashlib.md5(
 			f'{appId}{method}{timestamp}{clienttype}{str(obj)}'.replace("'", '"').replace(' ', '').encode(
 				'utf-8')).hexdigest()
 	}
 	base64_d = base64.b64encode(str(form_data).replace("'", '"').replace(' ', '').encode('utf-8')).decode('utf-8')
 	result = js.call("des_encrypt", base64_d, key, iv)
 	print(data := {param_name: result})
 	url = "https://www.aqistudy.cn/historydata/api/historyapi.php"
 	resp = requests.post(url=url, headers=headers, data=data, verify=False)
 	print(resp.text)
 	dec_data = js.call('dec_func', resp.text, des_dec_key, des_dec_iv, aes_dec_key, aes_dec_iv)
 	print(json.loads(dec_data))
 def aes_js(js_str):
 	keys = re.findall(f'AES\.encrypt\((\w+)\s?,\s?(\w+)\s?,\s?(\w+)\)', js_str)
 	text_name, key_name, iv_name = keys[1]
 	key = re.findall(f'const\s+?{key_name}\s+?=.*?"(.*?)"', js_str)[0]
 	iv = re.findall(f'const\s+?{iv_name}\s+?=.*?"(.*?)"', js_str)[0]
 	appid_name = re.findall("appId:.*?(\w+),", js_str)[0]
 	appId = re.findall(f"var\s?{appid_name}\s?=.*?'(.*?)'", js_str)[0]
 	param_name = re.findall("data:\s?\{\s?(\w+):.*?}", js_str)[0]
 	des_keys = re.findall(f'DES\.decrypt\(data,\s?(\w+),\s?(\w+)\);', js_str)
 	des_dec_key_name, des_dec_iv_name = des_keys[0]
 	des_dec_key = re.findall(f'const\s+?{des_dec_key_name}\s+?=.*?"(.*?)"', js_str)[0]
 	des_dec_iv = re.findall(f'const\s+?{des_dec_iv_name}\s+?=.*?"(.*?)"', js_str)[0]
 	aes_keys = re.findall(f'AES\.decrypt\(data,\s?(\w+),\s?(\w+)\);', js_str)
 	aes_dec_key_name, aes_dec_iv_name = aes_keys[0]
 	aes_dec_key = re.findall(f'const\s+?{aes_dec_key_name}\s+?=.*?"(.*?)"', js_str)[0]
 	aes_dec_iv = re.findall(f'const\s+?{aes_dec_iv_name}\s+?=.*?"(.*?)"', js_str)[0]
 	method = "GETDAYDATA"
 	obj = {"city": "济南", "month": '201702'}
 	timestamp = int(time.time() * 1000)
 	clienttype = 'WEB'
 	form_data = {
 		"appId": appId,
 		"method": method,
 		"timestamp": timestamp,
 		"clienttype": clienttype,
 		"object": obj,
 		"secret": hashlib.md5(
 			f'{appId}{method}{timestamp}{clienttype}{str(obj)}'.replace("'", '"').replace(' ', '').encode(
 				'utf-8')).hexdigest()
 	}
 	base64_d = base64.b64encode(str(form_data).replace("'", '"').replace(' ', '').encode('utf-8')).decode('utf-8')
 	result = js.call("aes_encrypt", base64_d, key, iv)
 	print(data := {param_name: result})
 	url = "https://www.aqistudy.cn/historydata/api/historyapi.php"
 	resp = requests.post(url=url, headers=headers, data=data, verify=False)
 	dec_data = js.call('dec_func', resp.text, des_dec_key, des_dec_iv, aes_dec_key, aes_dec_iv)
 	print(json.loads(dec_data))
 def bs64_js(js_str):
 	appid_name = re.findall("appId:.*?(\w+),", js_str)[0]
 	appId = re.findall(f"var\s?{appid_name}\s?=.*?'(.*?)'", js_str)[0]
 	param_name = re.findall("data:\s?\{\s?(\w+):.*?}", js_str)[0]
 	method = "GETDAYDATA"
 	obj = {"city": "济南", "month": '202206'}
 	timestamp = int(time.time() * 1000)
 	clienttype = 'WEB'
 	form_data = {
 		"appId": appId,
 		"method": method,
 		"timestamp": timestamp,
 		"clienttype": clienttype,
 		"object": obj,
 		"secret": hashlib.md5(
 			f'{appId}{method}{timestamp}{clienttype}{str(obj)}'.replace("'", '"').replace(' ', '').encode(
 				'utf-8')).hexdigest()
 	}
 	base64_d = base64.b64encode(str(form_data).replace("'", '"').replace(' ', '').encode('utf-8')).decode('utf-8')
 	print(data := {param_name: base64_d})
 	url = "https://www.aqistudy.cn/historydata/api/historyapi.php"
 	resp = requests.post(url=url, headers=headers, data=data, verify=False)
 	des_keys = re.findall(f'DES\.decrypt\(data,\s?(\w+),\s?(\w+)\);', js_str)
 	des_dec_key_name, des_dec_iv_name = des_keys[0]
 	des_dec_key = re.findall(f'const\s+?{des_dec_key_name}\s?=.*?"(.*?)"', js_str)[0]
 	des_dec_iv = re.findall(f'const\s+?{des_dec_iv_name}\s?=.*?"(.*?)"', js_str)[0]
 	aes_keys = re.findall(f'AES\.decrypt\(data,\s?(\w+),\s?(\w+)\);', js_str)
 	aes_dec_key_name, aes_dec_iv_name = aes_keys[0]
 	aes_dec_key = re.findall(f'const\s+?{aes_dec_key_name}\s?=.*?"(.*?)"', js_str)[0]
 	aes_dec_iv = re.findall(f'const\s+?{aes_dec_iv_name}\s?=.*?"(.*?)"', js_str)[0]
 	dec_data = js.call('dec_func', resp.text, des_dec_key, des_dec_iv, aes_dec_key, aes_dec_iv)
 	print(json.loads(dec_data))
 if __name__ == '__main__':
 	url = "https://www.aqistudy.cn/historydata/daydata.php?city=%E4%BF%9D%E5%AE%9A&month=201910"
 	headers = {
 		"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36",
 		"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
 		"Origin": "https://www.aqistudy.cn",
 		"Referer": "https://www.aqistudy.cn/historydata/daydata.php?city=%E4%BF%9D%E5%AE%9A&month=202009",
 	}
 	urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 	req = requests.get(url, headers=headers, verify=False)
 	js_url = re.findall(r'src="(resource/js/.*?.min.js\?v=\d+)"', req.text)[0]
 	js_req = requests.get(url=urljoin(url, js_url), headers=headers, verify=False)
 	print(js_req.url)
 	js_code = open('/Users/renmeng/work_space/python_work/qnloft-get-web-everything/爬虫/aqistudy网站/airHistory_2108.js', 'r').read()
 	js_bs64_bs64_code = js_req.text[5:-2]
 	js_code = js_code.replace('jscode_pattern', js_bs64_bs64_code)
 	js = execjs.compile(js_code)
 	res = js.call("get_full_js", js_bs64_bs64_code)
 	# print(res)
 	type_len = len(re.findall("dweklxde", res))
 	print(type_len)
 	base64_str = re.findall("'(.*?)'", res)[0]
 	if type_len == 2:
 		target_js = base64.b64decode(base64.b64decode(base64_str)).decode('utf-8')
 		des_js(js_str=target_js)
 	elif type_len == 1:
 		target_js = base64.b64decode(base64_str).decode('utf-8')
 		aes_js(js_str=target_js)
 	elif type_len == 0:
 		bs64_js(js_str=res)
--- a/代理/proxy_info.py
+++ b/代理/proxy_info.py
@ -0,0 +1,11 @@
 # 代理服务器配置信息
 proxies = {
 	'http': 'http://proxy_server:port',
 	'https': 'https://proxy_server:port'
 }
 # 获取代理服务器的 API 函数
 def get_api():
 	# 在这里可以编写获取代理服务器的 API 调用代码
 	# 返回代理服务器地址和端口
 	return 'proxy_api_server:port'
--- a/理财记账/ali_pay.py
+++ b/理财记账/ali_pay.py
@ -0,0 +1,82 @@
 '''
 支付宝账单
 cchardet 使用这个包检测比chardet更准确
 '''
 import codecs
 import time
 from pathlib import Path
 import cchardet as chardet
 import numpy as np
 import pandas as pd
 def detection_file_encoding(file_name):  # 自动检测文件编码
 	with open(file_name, 'rb') as file:
 		rawdata = file.read()
 		result = chardet.detect(rawdata)
 	# 检测结果包含编码和置信度信息
 	encoding = result['encoding']
 	confidence = result['confidence']
 	print(f"文件【{file_name}】 编码：{encoding}, 置信度：{confidence:.2f}")
 	return encoding
 def encoding_conversion(source_file, target_file, source_encoding, target_encoding):  # 文件编码转换
 	file_path = Path(target_file)
 	if file_path.exists():
 		return detection_file_encoding(target_file)
 	# 指定源文件的编码和目标文件的编码
 	source_encoding = source_encoding  # 源文件编码
 	target_encoding = target_encoding  # 目标文件编码
 	# 使用codecs模块打开源文件和目标文件，进行编码转换
 	with codecs.open(source_file, 'r', encoding=source_encoding) as source:
 		with codecs.open(target_file, 'w', encoding=target_encoding) as target:
 			for line in source:
 				target.write(line)
 	encoding = detection_file_encoding(target_file)
 	print(f"文件已从 {source_encoding} 编码转换为 {encoding} 编码")
 	return encoding
 def reset_account_name(name):
 	if "余额宝" in name or '滴滴出行' in name:
 		return "支付宝"
 	elif "信用卡" in name:
 		return "信用卡"
 	elif "借记卡" in name:
 		return "现金"
 class ALiPay:
 	def __init__(self, csv_file):
 		# 获取文件编码
 		self.encoding = detection_file_encoding(csv_file)
 		rename = csv_file.split("-")[1:3]
 		if len(rename) > 0:
 			rename = "_".join(csv_file.split("-")[1:3])
 		else:
 			rename = int(time.time())
 		self.target_file = f'/Users/renmeng/Downloads/支付宝交易账单-{rename}.csv'  # 目标文件名
 		# 生成新文件，并且使用加入日期命名
 		self.encoding = encoding_conversion(source_file=csv_file, target_file=self.target_file,
 											source_encoding=self.encoding,
 											target_encoding="utf-8")
 	def get_ali_pay_bill(self):
 		# 你可以使用pandas库的skiprows参数来指定从第几行开始读取数据
 		df = pd.read_csv(self.target_file, encoding=self.encoding, skiprows=2)
 		df = df.drop(index=df[df['交易状态'] != '成功'].index)
 		# 将日期列转换为日期时间对象
 		df['创建时间'] = pd.to_datetime(df['创建时间'])
 		df['账户'] = df['支付渠道'].apply(reset_account_name)
 		# 格式化日期列为'%Y-%m-%d'
 		df['创建时间'] = df['创建时间'].dt.strftime('%Y-%m-%d')
 		df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
 		df['金额'] = df['订单金额(元)'].apply(lambda x: float(x) if x else 0) \
 					 - df['累计退款总额(元)'].apply(lambda x: float(x) if x else 0) \
 					 - df['优惠(元)'].apply(lambda x: 0 if not x.strip() else float(x))
 		return df
--- a/理财记账/main.py
+++ b/理财记账/main.py
@ -0,0 +1,48 @@
 import pandas as pd
 from 爬虫.理财记账.ali_pay import ALiPay
 from 爬虫.理财记账.zs_bank import ZsBank
 # 显示所有列
 pd.set_option('display.max_columns', None)
 # 显示所有行
 pd.set_option('display.max_rows', None)
 # 输出不折行
 pd.set_option('expand_frame_repr', False)
 # 最大列宽度
 pd.set_option('display.max_colwidth', None)
 class BillHandle:
 	def __init__(self, ali_pay_file, zs_bank_file, sheet_name):
 		self.ali_pay_file, self.zs_bank_file, self.sheet_name = ali_pay_file, zs_bank_file, sheet_name
 		self.df = pd.DataFrame(
 			columns=['交易类型', '日期', '分类', '子分类', '账户1', '账户2', '金额', '成员', '商家', '项目', '备注'])
 	def __init_ali_bill(self):
 		ali_pay_data = ALiPay(self.ali_pay_file).get_ali_pay_bill()
 		ali_pay_data['交易类型'] = '支出'
 		ali_pay_data['日期'] = ali_pay_data['创建时间']
 		ali_pay_data['账户1'] = ali_pay_data['账户']
 		ali_pay_data['备注'] = ali_pay_data['商品名称'] + "_" + ali_pay_data['对方名称']
 		self.df = pd.concat([self.df, ali_pay_data])
 	def __init_zs_bank_bill(self):
 		zs_bank_data = ZsBank(self.zs_bank_file, self.sheet_name).get_zs_bank_bill()
 		zs_bank_data['交易类型'] = '支出'
 		zs_bank_data['账户1'] = '信用卡'
 		zs_bank_data['备注'] = zs_bank_data['来源'] + "_" + zs_bank_data['详情']
 		self.df = pd.concat([self.df, zs_bank_data])
 	def bill_opt(self):
 		self.__init_ali_bill()
 		self.__init_zs_bank_bill()
 		df = self.df
 		df = df.sort_values(by='日期', ascending=False).reset_index()
 		print(df)
 if __name__ == '__main__':
 	ali_pay_file = '/Users/renmeng/Downloads/2088102231652088-20230918-108990157-买入交易.csv'
 	zs_bank_file = '/Users/renmeng/Downloads/招商银行对账单.xlsx'
 	zs_bank_sheet = '8-9月对账单'
 	BillHandle(ali_pay_file, zs_bank_file, zs_bank_sheet).bill_opt()
--- a/理财记账/zs_bank.py
+++ b/理财记账/zs_bank.py
@ -0,0 +1,44 @@
 '''
 招商银行账单
 '''
 from datetime import datetime
 import pandas as pd
 def reset_date(date):
 	# 获取当前日期和时间
 	current_datetime = datetime.now()
 	# 从当前日期中提取年份
 	current_year = current_datetime.year
 	# 将整数转换为字符串并添加前导零，确保它至少有四位数
 	date_str = str(current_year) + str(date).zfill(4)
 	# 将输入字符串解析为日期对象
 	input_date = datetime.strptime(date_str, '%Y%m%d')
 	# 将日期对象格式化为所需的日期字符串格式
 	return input_date.strftime('%Y-%m-%d')
 def pay_source(details):
 	res = ""
 	source = details.split('-')[0]
 	if source == '京东支付':
 		res = '京东'
 	elif source == '财付通':
 		res = '微信'
 	elif source == '支付宝':
 		res = '支付宝'
 	return res
 class ZsBank:
 	def __init__(self, bill_file, sheet_name):
 		self.df = pd.read_excel(bill_file, sheet_name=sheet_name)
 	def get_zs_bank_bill(self):
 		self.df['金额'] = self.df['金额'].astype(str).str.replace(',', '', regex=True).astype(float)
 		total_sum = self.df['金额'].sum()
 		print(total_sum)
 		self.df['日期'] = self.df['日期'].apply(reset_date)
 		self.df['来源'] = self.df['详情'].apply(pay_source)
 		return self.df