feat: 数据保存支持JSON格式
This commit is contained in:
parent
894dabcf63
commit
4dfa0d3fbf
|
@ -76,6 +76,7 @@
|
|||
### 数据保存
|
||||
- 支持保存到关系型数据库(Mysql、PgSQL等)
|
||||
- 支持保存到csv中(data/目录下)
|
||||
- 支持保存到json中(data/目录下)
|
||||
|
||||
## 如何使用 IP 代理
|
||||
➡️➡️➡️ [IP代理使用方法](docs/代理使用.md)
|
||||
|
|
|
@ -21,7 +21,7 @@ HEADLESS = True
|
|||
SAVE_LOGIN_STATE = True
|
||||
|
||||
# 数据保存类型选项配置,支持三种类型:csv、db、json
|
||||
SAVE_DATA_OPTION = "csv" # csv or db or json
|
||||
SAVE_DATA_OPTION = "json" # csv or db or json
|
||||
|
||||
# 用户浏览器缓存的浏览器文件配置
|
||||
USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
# @Author : relakkes@gmail.com
|
||||
# @Time : 2024/1/14 19:34
|
||||
# @Desc : B站存储实现类
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Dict
|
||||
|
||||
|
@ -122,8 +123,59 @@ class BiliDbStoreImplement(AbstractStore):
|
|||
|
||||
|
||||
class BiliJsonStoreImplement(AbstractStore):
|
||||
json_store_path: str = "data/bilibili"
|
||||
|
||||
def make_save_file_name(self, store_type: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
Args:
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||
|
||||
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||
"""
|
||||
Below is a simple way to save it in json format.
|
||||
Args:
|
||||
save_item: save content dict info
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||
save_data = []
|
||||
|
||||
if os.path.exists(save_file_name):
|
||||
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||
save_data = json.loads(await file.read())
|
||||
|
||||
save_data.append(save_item)
|
||||
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||
|
||||
async def store_content(self, content_item: Dict):
|
||||
pass
|
||||
"""
|
||||
content JSON storage implementation
|
||||
Args:
|
||||
content_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(content_item, "contents")
|
||||
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
pass
|
||||
"""
|
||||
comment JSON storage implementatio
|
||||
Args:
|
||||
comment_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(comment_item, "comments")
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
# @Author : relakkes@gmail.com
|
||||
# @Time : 2024/1/14 18:46
|
||||
# @Desc : 抖音存储实现类
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Dict
|
||||
|
||||
|
@ -122,9 +123,59 @@ class DouyinDbStoreImplement(AbstractStore):
|
|||
|
||||
|
||||
class DouyinJsonStoreImplement(AbstractStore):
|
||||
json_store_path: str = "data/douyin"
|
||||
|
||||
def make_save_file_name(self, store_type: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
Args:
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||
|
||||
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||
"""
|
||||
Below is a simple way to save it in json format.
|
||||
Args:
|
||||
save_item: save content dict info
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||
save_data = []
|
||||
|
||||
if os.path.exists(save_file_name):
|
||||
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||
save_data = json.loads(await file.read())
|
||||
|
||||
save_data.append(save_item)
|
||||
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||
|
||||
async def store_content(self, content_item: Dict):
|
||||
pass
|
||||
"""
|
||||
content JSON storage implementation
|
||||
Args:
|
||||
content_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(content_item, "contents")
|
||||
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
pass
|
||||
"""
|
||||
comment JSON storage implementatio
|
||||
Args:
|
||||
comment_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(comment_item, "comments")
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
# @Time : 2024/1/14 20:03
|
||||
# @Desc : 快手存储实现类
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Dict
|
||||
|
||||
|
@ -121,8 +123,59 @@ class KuaishouDbStoreImplement(AbstractStore):
|
|||
|
||||
|
||||
class KuaishouJsonStoreImplement(AbstractStore):
|
||||
json_store_path: str = "data/kuaishou"
|
||||
|
||||
def make_save_file_name(self, store_type: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
Args:
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||
|
||||
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||
"""
|
||||
Below is a simple way to save it in json format.
|
||||
Args:
|
||||
save_item: save content dict info
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||
save_data = []
|
||||
|
||||
if os.path.exists(save_file_name):
|
||||
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||
save_data = json.loads(await file.read())
|
||||
|
||||
save_data.append(save_item)
|
||||
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||
|
||||
async def store_content(self, content_item: Dict):
|
||||
pass
|
||||
"""
|
||||
content JSON storage implementation
|
||||
Args:
|
||||
content_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(content_item, "contents")
|
||||
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
pass
|
||||
"""
|
||||
comment JSON storage implementatio
|
||||
Args:
|
||||
comment_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(comment_item, "comments")
|
||||
|
|
|
@ -15,7 +15,7 @@ class WeibostoreFactory:
|
|||
STORES = {
|
||||
"csv": WeiboCsvStoreImplement,
|
||||
"db": WeiboDbStoreImplement,
|
||||
"json": BiliJsonStoreImplement
|
||||
"json": WeiboJsonStoreImplement
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
# @Time : 2024/1/14 21:35
|
||||
# @Desc : 微博存储实现类
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Dict
|
||||
|
||||
|
@ -120,9 +122,60 @@ class WeiboDbStoreImplement(AbstractStore):
|
|||
await WeiboComment.filter(comment_id=comment_id).update(**comment_data.model_dump())
|
||||
|
||||
|
||||
class BiliJsonStoreImplement(AbstractStore):
|
||||
class WeiboJsonStoreImplement(AbstractStore):
|
||||
json_store_path: str = "data/weibo"
|
||||
|
||||
def make_save_file_name(self, store_type: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
Args:
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||
|
||||
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||
"""
|
||||
Below is a simple way to save it in json format.
|
||||
Args:
|
||||
save_item: save content dict info
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||
save_data = []
|
||||
|
||||
if os.path.exists(save_file_name):
|
||||
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||
save_data = json.loads(await file.read())
|
||||
|
||||
save_data.append(save_item)
|
||||
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||
|
||||
async def store_content(self, content_item: Dict):
|
||||
pass
|
||||
"""
|
||||
content JSON storage implementation
|
||||
Args:
|
||||
content_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(content_item, "contents")
|
||||
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
pass
|
||||
"""
|
||||
comment JSON storage implementatio
|
||||
Args:
|
||||
comment_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(comment_item, "comments")
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
# @Time : 2024/1/14 16:58
|
||||
# @Desc : 小红书存储实现类
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Dict
|
||||
|
||||
|
@ -120,8 +122,59 @@ class XhsDbStoreImplement(AbstractStore):
|
|||
|
||||
|
||||
class XhsJsonStoreImplement(AbstractStore):
|
||||
json_store_path: str = "data/xhs"
|
||||
|
||||
def make_save_file_name(self, store_type: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
Args:
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||
|
||||
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||
"""
|
||||
Below is a simple way to save it in json format.
|
||||
Args:
|
||||
save_item: save content dict info
|
||||
store_type: Save type contains content and comments(contents | comments)
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||
save_data = []
|
||||
|
||||
if os.path.exists(save_file_name):
|
||||
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||
save_data = json.loads(await file.read())
|
||||
|
||||
save_data.append(save_item)
|
||||
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||
|
||||
async def store_content(self, content_item: Dict):
|
||||
pass
|
||||
"""
|
||||
content JSON storage implementation
|
||||
Args:
|
||||
content_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(content_item, "contents")
|
||||
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
pass
|
||||
"""
|
||||
comment JSON storage implementatio
|
||||
Args:
|
||||
comment_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_data_to_json(comment_item, "comments")
|
||||
|
|
Loading…
Reference in New Issue