From fc16ab7c5daf722972a7710107755991bbe7e2a6 Mon Sep 17 00:00:00 2001 From: AuYeung Date: Tue, 6 Aug 2024 15:24:23 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E8=BF=87=E6=BB=A4=E7=A9=BA=E7=99=BD?= =?UTF-8?q?=E5=AD=97=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/words.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/words.py b/tools/words.py index d9f0a8c..3ca0778 100644 --- a/tools/words.py +++ b/tools/words.py @@ -27,7 +27,7 @@ class AsyncWordCloudGenerator: async def generate_word_frequency_and_cloud(self, data, save_words_prefix): all_text = ' '.join(item['content'] for item in data) - words = [word for word in jieba.lcut(all_text) if word not in self.stop_words] + words = [word for word in jieba.lcut(all_text) if word not in self.stop_words and len(word.strip()) > 0] word_freq = Counter(words) # Save word frequency to file From 7e9a759d948b1e722f016c1f4ccbc9b547327280 Mon Sep 17 00:00:00 2001 From: Relakkes Date: Wed, 7 Aug 2024 01:02:35 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8Dwindows=E7=B3=BB?= =?UTF-8?q?=E7=BB=9F=E4=B8=8B=E5=88=9D=E5=A7=8B=E5=8C=96=E8=A1=A8=E7=BB=93?= =?UTF-8?q?=E6=9E=84=E7=BC=96=E7=A0=81=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db.py b/db.py index 335d8ae..13777a6 100644 --- a/db.py +++ b/db.py @@ -85,7 +85,7 @@ async def init_table_schema(): utils.logger.info("[init_table_schema] begin init mysql table schema ...") await init_mediacrawler_db() async_db_obj: AsyncMysqlDB = media_crawler_db_var.get() - async with aiofiles.open("schema/tables.sql", mode="r") as f: + async with aiofiles.open("schema/tables.sql", mode="r", encoding="utf-8") as f: schema_sql = await f.read() await async_db_obj.execute(schema_sql) utils.logger.info("[init_table_schema] mediacrawler table schema init successful")