add few arg cmds
This commit is contained in:
parent
645ec729f6
commit
985ea93caf
|
@ -1,5 +1,6 @@
|
||||||
import argparse
|
import argparse
|
||||||
import config
|
import config
|
||||||
|
from tools.utils import str2bool
|
||||||
|
|
||||||
|
|
||||||
async def parse_cmd():
|
async def parse_cmd():
|
||||||
|
@ -15,6 +16,14 @@ async def parse_cmd():
|
||||||
help='number of start page', default=config.START_PAGE)
|
help='number of start page', default=config.START_PAGE)
|
||||||
parser.add_argument('--keywords', type=str,
|
parser.add_argument('--keywords', type=str,
|
||||||
help='please input keywords', default=config.KEYWORDS)
|
help='please input keywords', default=config.KEYWORDS)
|
||||||
|
parser.add_argument('--get_comment', type=str2bool,
|
||||||
|
help='''whether to crawl level one comment, supported values case insensitive ('yes', 'true', 't', 'y', '1', 'no', 'false', 'f', 'n', '0')''', default=config.ENABLE_GET_COMMENTS)
|
||||||
|
parser.add_argument('--get_sub_comment', type=str2bool,
|
||||||
|
help=''''whether to crawl level two comment, supported values case insensitive ('yes', 'true', 't', 'y', '1', 'no', 'false', 'f', 'n', '0')''', default=config.ENABLE_GET_SUB_COMMENTS)
|
||||||
|
parser.add_argument('--save_data_option', type=str,
|
||||||
|
help='where to save the data (csv or db or json)', choices=['csv', 'db', 'json'], default=config.SAVE_DATA_OPTION)
|
||||||
|
parser.add_argument('--cookies', type=str,
|
||||||
|
help='cookies used for cookie login type', default=config.COOKIES)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -24,3 +33,7 @@ async def parse_cmd():
|
||||||
config.CRAWLER_TYPE = args.type
|
config.CRAWLER_TYPE = args.type
|
||||||
config.START_PAGE = args.start
|
config.START_PAGE = args.start
|
||||||
config.KEYWORDS = args.keywords
|
config.KEYWORDS = args.keywords
|
||||||
|
config.ENABLE_GET_COMMENTS = args.get_comment
|
||||||
|
config.ENABLE_GET_SUB_COMMENTS = args.get_sub_comment
|
||||||
|
config.SAVE_DATA_OPTION = args.save_data_option
|
||||||
|
config.COOKIES = args.cookies
|
||||||
|
|
|
@ -3,7 +3,8 @@ PLATFORM = "xhs"
|
||||||
KEYWORDS = "python,golang"
|
KEYWORDS = "python,golang"
|
||||||
LOGIN_TYPE = "qrcode" # qrcode or phone or cookie
|
LOGIN_TYPE = "qrcode" # qrcode or phone or cookie
|
||||||
COOKIES = ""
|
COOKIES = ""
|
||||||
SORT_TYPE = "popularity_descending" # 具体值参见media_platform.xxx.field下的枚举值,展示只支持小红书
|
# 具体值参见media_platform.xxx.field下的枚举值,展示只支持小红书
|
||||||
|
SORT_TYPE = "popularity_descending"
|
||||||
CRAWLER_TYPE = "search" # 爬取类型,search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
|
CRAWLER_TYPE = "search" # 爬取类型,search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
|
||||||
|
|
||||||
# 是否开启 IP 代理
|
# 是否开启 IP 代理
|
||||||
|
@ -45,9 +46,9 @@ ENABLE_GET_IMAGES = False
|
||||||
# 是否开启爬评论模式, 默认不开启爬评论
|
# 是否开启爬评论模式, 默认不开启爬评论
|
||||||
ENABLE_GET_COMMENTS = False
|
ENABLE_GET_COMMENTS = False
|
||||||
|
|
||||||
# 是否开启爬二级评论模式, 默认不开启爬二级评论, 目前仅支持 xhs
|
# 是否开启爬二级评论模式, 默认不开启爬二级评论, 目前仅支持 xhs, bilibili
|
||||||
# 老版本项目使用了 db, 则需参考 schema/tables.sql line 287 增加表字段
|
# 老版本项目使用了 db, 则需参考 schema/tables.sql line 287 增加表字段
|
||||||
ENABLE_GET_SUB_COMMENTS = True
|
ENABLE_GET_SUB_COMMENTS = False
|
||||||
|
|
||||||
# 指定小红书需要爬虫的笔记ID列表
|
# 指定小红书需要爬虫的笔记ID列表
|
||||||
XHS_SPECIFIED_ID_LIST = [
|
XHS_SPECIFIED_ID_LIST = [
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .crawler_util import *
|
from .crawler_util import *
|
||||||
|
@ -18,3 +19,13 @@ def init_loging_config():
|
||||||
|
|
||||||
|
|
||||||
logger = init_loging_config()
|
logger = init_loging_config()
|
||||||
|
|
||||||
|
def str2bool(v):
|
||||||
|
if isinstance(v, bool):
|
||||||
|
return v
|
||||||
|
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
||||||
|
return True
|
||||||
|
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise argparse.ArgumentTypeError('Boolean value expected.')
|
||||||
|
|
Loading…
Reference in New Issue