From ee95f69065c2ad2a4c86eb41b7e1ee432f65fb33 Mon Sep 17 00:00:00 2001
From: dylan <58234511@qq.com>
Date: Sun, 2 Oct 2022 13:38:20 +0800
Subject: [PATCH] 1

---
 jd_comment.py | 86 ++++++++++++++++++++++++++++++---------------------
 jdspider.py   | 44 ++++++++++++++++++--------
 2 files changed, 81 insertions(+), 49 deletions(-)

diff --git a/jd_comment.py b/jd_comment.py
index 08bf8db..82564c8 100644
--- a/jd_comment.py
+++ b/jd_comment.py
@@ -1,7 +1,12 @@
 # -*- coding: utf-8 -*-
-# @Time : 2022/2/8 20:50
-# @Author : @qiu-lzsnmb and @Dimlitter
-# @File : auto_comment_plus.py
+# 自动带图评价、追评、服务评价，需电脑端CK
+# @Time : 2022/10/2 
+# @Author : @qiu-lzsnmb and @Dimlitter @Dylan
+# @File : auto_comment.py
+'''
+new Env('自动评价');
+8 8 2 10 * https://raw.githubusercontent.com/6dylan6/auto_comment/main/jd_comment.py
+'''
 
 import argparse
 import copy
@@ -10,12 +15,24 @@ import os
 import random
 import sys
 import time
-
-import jieba  # just for linting
-import jieba.analyse
 import requests
-import yaml
-from lxml import etree
+
+try:
+    import jieba  # just for linting
+    import jieba.analyse
+    #import yaml
+    from lxml import etree
+except:
+    os.system('pip3 install lxml &> /dev/null')
+    os.system('pip3 install jieba &> /dev/null')
+    os.system('pip3 install zhon &> /dev/null')
+    import jieba  # just for linting
+    import jieba.analyse
+    #import yaml
+    from lxml import etree
+
+
+
 
 import jdspider
 
@@ -95,7 +112,7 @@ def generation(pname, _class=0, _type=1, opts=None):
     for i, item in enumerate(items):
         opts['logger'].debug('Loop: %d / %d', i + 1, loop_times)
         opts['logger'].debug('Current item: %s', item)
-        spider = jdspider.JDSpider(item)
+        spider = jdspider.JDSpider(item,ck)
         opts['logger'].debug('Successfully created a JDSpider instance')
         # 增加对增值服务的评价鉴别
         if "赠品" in pname or "非实物" in pname or "增值服务" in pname:
@@ -219,7 +236,7 @@ def sunbw(N, opts=None):
                 '//*[@id="main"]/div[2]/div[2]/table')
             opts['logger'].debug('Count of fetched order data: %d', len(elems))
             Order_data.extend(elems)
-    opts['logger'].info(f"当前共有{N['待评价订单']}个需要晒单。")
+    opts['logger'].info(f"当前共有{N['待评价订单']}个需要评价晒单。")
     opts['logger'].debug('Commenting on items')
     for i, Order in enumerate(Order_data):
         if i > 1:
@@ -270,7 +287,7 @@ def sunbw(N, opts=None):
         imgurl = imgdata["imgComments"]["imgList"][0]["imageUrl"]
         opts['logger'].debug('Image URL: %s', imgurl)
 
-        opts['logger'].info(f'\t\t图片url={imgurl}')
+        opts['logger'].info(f'\t图片url={imgurl}')
         # 提交晒单
         opts['logger'].debug('Preparing for commenting')
         url2 = "https://club.jd.com/myJdcomments/saveProductComment.action"
@@ -307,7 +324,6 @@ def review(N, opts=None):
     req_et = []
     Order_data = []
     loop_times = 2
-    print(loop_times)
     opts['logger'].debug('Fetching website data')
     opts['logger'].debug('Total loop times: %d', loop_times)
     for i in range(loop_times):
@@ -350,7 +366,7 @@ def review(N, opts=None):
     opts['logger'].info(f"当前共有{N['待追评']}个需要追评。")
     opts['logger'].debug('Commenting on items')
     for i, Order in enumerate(Order_data):
-        if i > 1:
+        if i + 1 > 1:
             opts['logger'].info(f'\t已评价10个订单，跳出')
             break
         oname = Order.xpath('td[1]/div/div[2]/div/a/text()')[0]
@@ -366,7 +382,7 @@ def review(N, opts=None):
         opts['logger'].debug('oid: %s', oid)
         opts['logger'].info(f'\t开始第{i+1}个订单: {oid}')
         _, context = generation(oname, _type=0, opts=opts)
-        opts['logger'].info(f'\t\t追评内容：{context}')
+        opts['logger'].info(f'\t追评内容：{context}')
         data1 = {
             'orderId': oid,
             'productId': pid,
@@ -384,8 +400,6 @@ def review(N, opts=None):
         opts['logger'].debug('Sleep time (s): %.1f', REVIEW_SLEEP_SEC)
         time.sleep(REVIEW_SLEEP_SEC)
         N['待追评'] -= 1
-        if i + 1 > 1:
-            break
     return N
 
 
@@ -436,7 +450,7 @@ def Service_rating(N, opts=None):
     opts['logger'].info(f"当前共有{N['服务评价']}个需要服务评价。")
     opts['logger'].debug('Commenting on items')
     for i, Order in enumerate(Order_data):
-        if i > 1:
+        if i + 1 > 1:
             opts['logger'].info(f'\t已评价10个订单，跳出')
             break
         oname = Order.xpath('td[1]/div[1]/div[2]/div/a/text()')[0]
@@ -487,7 +501,7 @@ def main(opts=None):
     N = No(opts)
     opts['logger'].debug('N value after executing No(): %s', N)
     if not N:
-        opts['logger'].error('Ck错误，请检查重新抓取！')
+        opts['logger'].error('CK错误，请确认是否电脑版CK！')
         exit()
     if N['待评价订单'] != 0:
         opts['logger'].info("1.开始评价晒单")
@@ -553,11 +567,11 @@ if __name__ == '__main__':
     # NOTE: The alignment number should set to 19 considering the style
     # controling characters. When it comes to file logger, the number should
     # set to 8.
-    formatter = StyleFormatter('%(asctime)s %(levelname)-19s %(message)s')
+    formatter = StyleFormatter('%(asctime)s %(levelname)-19s %(message)s',"%F %T")
     rawformatter = StyleFormatter('%(asctime)s %(levelname)-8s %(message)s', use_style=False)
     console = logging.StreamHandler()
     console.setLevel(_logging_level)
-    console.setFormatter(formatter)
+    console.setFormatter(logging.Formatter('%(message)s'))
     logger.addHandler(console)
     opts['logger'] = logger
     # It's a hack!!!
@@ -595,24 +609,24 @@ if __name__ == '__main__':
     logger.debug('  SERVICE_RATING_SLEEP_SEC: %s', SERVICE_RATING_SLEEP_SEC)
 
     # parse configurations
-    logger.debug('Reading the configuration file')
-    if os.path.exists(USER_CONFIG_PATH):
-        logger.debug('User configuration file exists')
-        _cfg_path = USER_CONFIG_PATH
-    else:
-        logger.debug('User configuration file doesn\'t exist, fallback to the default one')
-        _cfg_path = CONFIG_PATH
-    with open(_cfg_path, 'r', encoding='utf-8') as f:
-        cfg = yaml.safe_load(f)
-    logger.debug('Closed the configuration file')
-    logger.debug('Configurations in Python-dict format: %s', cfg)
-    if "JD_COOKIE" in os.environ:
-    if len (os.environ["PC_COOKIE"]) > 1:
-        ck = os.environ["PC_COOKIE"]
-        logger.info ("已获取并使用Env环境 Cookie")
+    #logger.debug('Reading the configuration file')
+    #if os.path.exists(USER_CONFIG_PATH):
+        #logger.debug('User configuration file exists')
+        #_cfg_path = USER_CONFIG_PATH
+    #else:
+        #logger.debug('User configuration file doesn\'t exist, fallback to the default one')
+        #_cfg_path = CONFIG_PATH
+   # with open(_cfg_path, 'r', encoding='utf-8') as f:
+        #cfg = yaml.safe_load(f)
+        #print()
+    #logger.debug('Closed the configuration file')
+    #logger.debug('Configurations in Python-dict format: %s', cfg)
+    if "PC_COOKIE" in os.environ:
+        if len(os.environ["PC_COOKIE"]) > 1:
+            ck = os.environ["PC_COOKIE"]
+            logger.info ("已获取并使用Env环境 Cookie")
 
     #ck = cfg['user']['cookie']
-
     headers = {
         'cookie': ck.encode("utf-8"),
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
diff --git a/jdspider.py b/jdspider.py
index 084078f..e0a40ec 100644
--- a/jdspider.py
+++ b/jdspider.py
@@ -1,5 +1,5 @@
-# @Time : 2022/2/8 20:50
-# @Author :@Zhang Jiale and @Dimlitter
+# @Time : 2022/2/10
+# @Author :@Zhang Jiale and @Dimlitter @Dylan
 # @File : jdspider.py
 
 import json
@@ -24,7 +24,7 @@ default_logger.addHandler(log_console)
 
 class JDSpider:
     # 爬虫实现类：传入商品类别（如手机、电脑），构造实例。然后调用getData搜集数据。
-    def __init__(self, categlory):
+    def __init__(self, categlory, ck):
         # jD起始搜索页面
         self.startUrl = "https://search.jd.com/Search?keyword=%s&enc=utf-8" % (
             quote(categlory))
@@ -42,11 +42,12 @@ class JDSpider:
             'sec-fetch-site': 'none',
             'sec-fetch-user': '?1',
             'upgrade-insecure-requests': '1',
-            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36'
+            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
         }
         self.productsId = self.getId()
-        self.comtype = {1: "nagetive", 2: "medium", 3: "positive"}
+        self.comtype = {1: "差评", 2: "中评", 3: "好评"}
         self.categlory = categlory
+        self.ck = ck
         self.iplist = {
             'http': [],
             'https': []
@@ -67,15 +68,32 @@ class JDSpider:
         return params, url
 
     def getHeaders(self, productid):  # 和初始的self.header不同，这是搜集某个商品的header，加入了商品id，我也不知道去掉了会怎样。
-        header = {"Referer": "https://item.jd.com/%s.html" % (productid),
-                  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
-                  }
+        header = {
+        "Referer": "https://item.jd.com/%s.html" % (productid),
+        "Host": "sclub.jd.com",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Cache-Control": "no-cache",
+        "sec-ch-ua": '"Chromium";v="21", " Not;A Brand";v="99"',
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": "Windows",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+        "Sec-Fetch-Site": "none",
+        "Sec-Fetch-Mode": "navigate",
+        "Sec-Fetch-User": "?1",
+        "Sec-Fetch-Dest": "document",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "upgrade-insecure-requests": "1",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
+        "Cookie": self.ck
+        }
         return header
 
     def getId(self):  # 获取商品id，为了得到具体商品页面的网址。结果保存在self.productId的数组里
         response = requests.get(self.startUrl, headers=self.headers)
         if response.status_code != 200:
-            default_logger.warning("状态码错误，爬虫连接异常！")
+            default_logger.warning("状态码错误，连接异常！")
         html = etree.HTML(response.text)
         return html.xpath('//li[@class="gl-item"]/@data-sku')
 
@@ -85,10 +103,10 @@ class JDSpider:
 
         comments = []
         scores = []
-        if len(self.productsId) < 6:  # limit the sum of products
+        if len(self.productsId) < 4:  # limit the sum of products
             sum = len(self.productsId)
         else:
-            sum = 5
+            sum = 3
         for j in range(sum):
             id = self.productsId[j]
             header = self.getHeaders(id)
@@ -115,7 +133,7 @@ class JDSpider:
                 if len((res_json['comments'])) == 0:
                     default_logger.warning("本页无评价数据，跳过")
                     break
-                default_logger.info("正在搜集%s %s" %
+                default_logger.info("正在搜集 %s 的%s信息" %
                                     (self.categlory, self.comtype[score]))
                 for cdit in res_json['comments']:
                     comment = cdit['content'].replace(
@@ -123,7 +141,7 @@ class JDSpider:
                     comments.append(comment)
                     scores.append(cdit['score'])
         # savepath = './'+self.categlory+'_'+self.comtype[score]+'.csv'
-        default_logger.warning("已搜集%d 条 %s 评价信息" %
+        default_logger.warning("已搜集%d条%s信息" %
                                (len(comments), self.comtype[score]))
         # 存入列表,简单处理评价
         remarks = []