diff --git a/store/bilibili/bilibili_store_impl.py b/store/bilibili/bilibili_store_impl.py index eea2e81..018244d 100644 --- a/store/bilibili/bilibili_store_impl.py +++ b/store/bilibili/bilibili_store_impl.py @@ -14,7 +14,9 @@ import aiofiles from base.base_crawler import AbstractStore from tools import utils from var import crawler_type_var -def calculatet_number_of_files(file_store_path: str) -> int: + + +def calculate_number_of_files(file_store_path: str) -> int: """计算数据保存文件的前部分排序数字,支持每次运行代码不写到同一个文件中 Args: file_store_path; @@ -23,11 +25,14 @@ def calculatet_number_of_files(file_store_path: str) -> int: """ if not os.path.exists(file_store_path): return 1 - return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + try: + return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + except ValueError: + return 1 class BiliCsvStoreImplement(AbstractStore): csv_store_path: str = "data/bilibili" - file_count:int=calculatet_number_of_files(csv_store_path) + file_count:int=calculate_number_of_files(csv_store_path) def make_save_file_name(self, store_type: str) -> str: """ make save file name by store type @@ -127,7 +132,7 @@ class BiliDbStoreImplement(AbstractStore): class BiliJsonStoreImplement(AbstractStore): json_store_path: str = "data/bilibili" lock = asyncio.Lock() - file_count:int=calculatet_number_of_files(json_store_path) + file_count:int=calculate_number_of_files(json_store_path) def make_save_file_name(self, store_type: str) -> str: diff --git a/store/douyin/douyin_store_impl.py b/store/douyin/douyin_store_impl.py index 02e61e0..ec5dfc6 100644 --- a/store/douyin/douyin_store_impl.py +++ b/store/douyin/douyin_store_impl.py @@ -14,7 +14,9 @@ import aiofiles from base.base_crawler import AbstractStore from tools import utils from var import crawler_type_var -def calculatet_number_of_files(file_store_path: str) -> int: + + +def calculate_number_of_files(file_store_path: str) -> int: """计算数据保存文件的前部分排序数字,支持每次运行代码不写到同一个文件中 Args: file_store_path; @@ -23,11 +25,14 @@ def calculatet_number_of_files(file_store_path: str) -> int: """ if not os.path.exists(file_store_path): return 1 - return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + try: + return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + except ValueError: + return 1 class DouyinCsvStoreImplement(AbstractStore): csv_store_path: str = "data/douyin" - file_count:int=calculatet_number_of_files(csv_store_path) + file_count:int=calculate_number_of_files(csv_store_path) def make_save_file_name(self, store_type: str) -> str: """ @@ -129,7 +134,7 @@ class DouyinDbStoreImplement(AbstractStore): class DouyinJsonStoreImplement(AbstractStore): json_store_path: str = "data/douyin" lock = asyncio.Lock() - file_count:int=calculatet_number_of_files(json_store_path) + file_count:int=calculate_number_of_files(json_store_path) def make_save_file_name(self, store_type: str) -> str: """ diff --git a/store/kuaishou/kuaishou_store_impl.py b/store/kuaishou/kuaishou_store_impl.py index c70d7f9..14b477a 100644 --- a/store/kuaishou/kuaishou_store_impl.py +++ b/store/kuaishou/kuaishou_store_impl.py @@ -14,7 +14,9 @@ import aiofiles from base.base_crawler import AbstractStore from tools import utils from var import crawler_type_var -def calculatet_number_of_files(file_store_path: str) -> int: + + +def calculate_number_of_files(file_store_path: str) -> int: """计算数据保存文件的前部分排序数字,支持每次运行代码不写到同一个文件中 Args: file_store_path; @@ -23,12 +25,15 @@ def calculatet_number_of_files(file_store_path: str) -> int: """ if not os.path.exists(file_store_path): return 1 - return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + try: + return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + except ValueError: + return 1 class KuaishouCsvStoreImplement(AbstractStore): csv_store_path: str = "data/kuaishou" - file_count:int=calculatet_number_of_files(csv_store_path) + file_count:int=calculate_number_of_files(csv_store_path) def make_save_file_name(self, store_type: str) -> str: """ @@ -128,7 +133,7 @@ class KuaishouDbStoreImplement(AbstractStore): class KuaishouJsonStoreImplement(AbstractStore): json_store_path: str = "data/kuaishou" lock = asyncio.Lock() - file_count:int=calculatet_number_of_files(json_store_path) + file_count:int=calculate_number_of_files(json_store_path) def make_save_file_name(self, store_type: str) -> str: diff --git a/store/weibo/weibo_store_impl.py b/store/weibo/weibo_store_impl.py index d0247cb..8bf09b4 100644 --- a/store/weibo/weibo_store_impl.py +++ b/store/weibo/weibo_store_impl.py @@ -15,7 +15,8 @@ from base.base_crawler import AbstractStore from tools import utils from var import crawler_type_var -def calculatet_number_of_files(file_store_path: str) -> int: + +def calculate_number_of_files(file_store_path: str) -> int: """计算数据保存文件的前部分排序数字,支持每次运行代码不写到同一个文件中 Args: file_store_path; @@ -24,12 +25,15 @@ def calculatet_number_of_files(file_store_path: str) -> int: """ if not os.path.exists(file_store_path): return 1 - return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + try: + return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + except ValueError: + return 1 class WeiboCsvStoreImplement(AbstractStore): csv_store_path: str = "data/weibo" - file_count:int=calculatet_number_of_files(csv_store_path) + file_count:int=calculate_number_of_files(csv_store_path) def make_save_file_name(self, store_type: str) -> str: """ @@ -130,7 +134,7 @@ class WeiboDbStoreImplement(AbstractStore): class WeiboJsonStoreImplement(AbstractStore): json_store_path: str = "data/weibo" lock = asyncio.Lock() - file_count:int=calculatet_number_of_files(json_store_path) + file_count:int=calculate_number_of_files(json_store_path) def make_save_file_name(self, store_type: str) -> str: diff --git a/store/xhs/xhs_store_impl.py b/store/xhs/xhs_store_impl.py index 8ca5ab0..63b5217 100644 --- a/store/xhs/xhs_store_impl.py +++ b/store/xhs/xhs_store_impl.py @@ -15,7 +15,8 @@ from base.base_crawler import AbstractStore from tools import utils from var import crawler_type_var -def calculatet_number_of_files(file_store_path: str) -> int: + +def calculate_number_of_files(file_store_path: str) -> int: """计算数据保存文件的前部分排序数字,支持每次运行代码不写到同一个文件中 Args: file_store_path; @@ -24,12 +25,15 @@ def calculatet_number_of_files(file_store_path: str) -> int: """ if not os.path.exists(file_store_path): return 1 - return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + try: + return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1 + except ValueError: + return 1 class XhsCsvStoreImplement(AbstractStore): csv_store_path: str = "data/xhs" - file_count:int=calculatet_number_of_files(csv_store_path) + file_count:int=calculate_number_of_files(csv_store_path) def make_save_file_name(self, store_type: str) -> str: """ @@ -159,7 +163,7 @@ class XhsDbStoreImplement(AbstractStore): class XhsJsonStoreImplement(AbstractStore): json_store_path: str = "data/xhs" lock = asyncio.Lock() - file_count:int=calculatet_number_of_files(json_store_path) + file_count:int=calculate_number_of_files(json_store_path) def make_save_file_name(self, store_type: str) -> str: """