Exemplo n.º 1
0
def Verify():
    l_orderNo = []
    column_name = [
        'orderNo', 'deliverFee', 'actualFee', 'couponPrice', 'fromStore',
        'orderStatus'
    ]
    condition = {'isVerify': '0', 'isDetaildown': '1'}
    # kwargs = {'isVerify': '2', 'isDetaildown': '1'}
    result = mysql.get_data(t="tb_order_spider", cn=column_name, c=condition)
    if result:
        for i in result:
            total = 0
            orderNo = i[0]
            deliverFee = i[1]
            actualFee = i[2]
            couponPrice = i[3]
            fromStore = i[4]
            column_name = ['unitPrice', 'sellNum', 'unitBenefits']
            condition = {'orderNo': orderNo}
            result2 = mysql.get_data(t="tb_order_detail_spider",
                                     cn=column_name,
                                     c=condition)
            for j in result2:
                unitPrice = j[0]
                sellNum = j[1]
                unitBenefits = j[2]
                total = total + unitPrice * sellNum - unitBenefits
            a = round(total, 3) + deliverFee - actualFee - couponPrice
            if int(a) != 0 and i[5] != '交易关闭':
                list_tmp = []
                list_tmp.append(str(round(total, 2)))
                list_tmp.append(str(deliverFee))
                list_tmp.append(str(actualFee))
                list_tmp.append(str(couponPrice))
                list_tmp.append(str(round(a, 2)))
                list_tmp.append(store_trans(fromStore))
                list_tmp.append(orderNo)
                l_orderNo.append("|".join(list_tmp))
                mysql.update_data(t="tb_order_spider",
                                  set={'isVerify': 2},
                                  c={'orderNo': orderNo})
            else:
                mysql.update_data(t="tb_order_spider",
                                  set={'isVerify': 1},
                                  c={'orderNo': orderNo})
                # print('没有异常数据,验证完成!')
    if l_orderNo:
        s = "\n".join(l_orderNo)
        # print(s)
        mail("数据异常报告", s, ["*****@*****.**"])
    taobao_check()
Exemplo n.º 2
0
 def reports_in(self, fromStore, price):
     reports = {}
     reports['reports_type'] = '订单爬虫报告'
     reports['store_name'] = store_trans(fromStore)
     reports['reports_date'] = datetime.date.today()
     temp = reports.copy()
     res = self.sql_element.select_data("spider_reports", 1,
                                        *['count', 'price'], **reports)
     if res:
         reports['count'] = res[0][0] + 1
         reports['price'] = res[0][1] + price
         self.sql_element.update_old_data("spider_reports", reports, temp)
     else:
         reports['count'] = 1
         reports['price'] = price
         self.sql_element.insert_new_data("spider_reports", **reports)
Exemplo n.º 3
0
 def split_store(self, item):
     if item['fromStore'] == 'YK':
         string = store_trans('YK') + "\n"
         for i in range(3):
             string += self.title_list[i] + ':\n'
             for j in range(len(self.total_list[i])):
                 pass
         print(string)
     elif item['fromStore'] == 'KY':
         pass
     elif item['fromStore'] == 'SC':
         pass
     elif item['fromStore'] == 'VP':
         pass
     elif item['fromStore'] == 'YJ':
         pass
     elif item['fromStore'] == 'TB':
         pass
Exemplo n.º 4
0
    async def order_page(self, browser_in=None, page_in=None):
        """爬取订单详情"""
        while 1:
            result = mysql.get_data(t="tb_order_spider",
                                    cn=["datailURL", "orderNo"],
                                    c={
                                        "isDetaildown": 0,
                                        "fromStore": self.fromStore
                                    },
                                    o=["createTime"],
                                    om="d")
            if result:
                logger.info("订单详情爬取")
                for url in result:
                    start_time = datetime.datetime.now()
                    logger.info(store_trans(self.fromStore))
                    logger.info("开始订单 " + url[1] + " 详情爬取")
                    order = {}
                    await self._page_order_detail.bringToFront()
                    # if browser_in:
                    #     page = await browser_in.newPage()
                    # else:
                    #     page = page_in
                    page = self._page_order_detail
                    while 1:
                        try:
                            await page.goto(url[0])
                        except errors.PageError:
                            sleep(5)
                        except errors.TimeoutError:
                            sleep(5)
                        else:
                            break
                    try:
                        await page.waitForSelector('#detail-panel',
                                                   timeout=30000)
                    except errors.TimeoutError:
                        continue

                    content = await page.content()
                    a = re.search("var data = JSON.parse\('(.*)'\);",
                                  content).group(1)
                    b = a.replace('\\\\\\"', '')
                    data = b.replace('\\"', '"')
                    m = json.loads(data)
                    order['actualFee'] = m['mainOrder']['payInfo'][
                        'actualFee']['value']
                    order['orderStatus'] = status_format(
                        m['mainOrder']['statusInfo']['text'])
                    if order['orderStatus'] == '等待买家付款':
                        order['isDetaildown'] = 2
                    else:
                        order['isDetaildown'] = 1
                    coupon = 0
                    for k, v in m['mainOrder']['payInfo'].items():
                        if k == 'promotions':
                            promotions = m['mainOrder']['payInfo'][
                                'promotions']
                            for i in range(len(promotions)):
                                if 'prefix' and 'suffix' in promotions[i]:
                                    coupon_temp = re.search(
                                        "(\d+\.\d+)", promotions[i]['value'])
                                    if coupon_temp:
                                        coupon += float(coupon_temp.group(1))
                    order['couponPrice'] = round(coupon, 2)
                    for k, v in m.items():
                        if k == 'buyMessage':
                            order['buyerComments'] = v
                    orderNo = m['mainOrder']['id']
                    order_info = m['mainOrder']['orderInfo']['lines'][1][
                        'content']
                    for i in range(len(order_info)):
                        if order_info[i]['value']['name'] == '支付宝交易号:':
                            try:
                                order['tradeNo'] = order_info[i]['value'][
                                    'value']
                            except KeyError:
                                order['tradeNo'] = None
                        # elif order_info[i]['value']['name'] == '创建时间:':
                        #     order['createTime'] = order_info[i]['value']['value']
                        # elif order_info[i]['value']['name'] == '发货时间:':
                        #     order['shipTime'] = order_info[i]['value']['value']
                        elif order_info[i]['value']['name'] == '付款时间:':
                            order['payTime'] = order_info[i]['value']['value']
                    ship_info = m['tabs']
                    for i in range(len(ship_info)):
                        if ship_info[i]['id'] == 'logistics':
                            temp = ship_info[i]['content']
                            for k, v in temp.items():
                                if k == 'logisticsName':
                                    order['shippingCompany'] = v
                                elif k == 'shipType':
                                    order['shippingMethod'] = v
                                elif k == 'logisticsNum':
                                    order['shippingNo'] = v
                                # elif k == 'logisticsUrl':
                                #     order['shipUrl'] = "https" + v
                                elif k == 'address':
                                    rec_info = v
                                    order['receiverName'] = rec_info.split(
                                        ",")[0].replace(" ", "")
                                    order['receiverPhone'] = rec_info.split(
                                        ",")[1]
                                    order['receiverAddress'] = "".join(
                                        rec_info.split(",")[2:])
                    sub_orders = m['mainOrder']['subOrders']
                    # print(len(sub_orders))
                    for i in range(len(sub_orders)):
                        item = {}
                        temp = 0
                        itemNo = i
                        if sub_orders[i]['promotionInfo']:
                            for j in sub_orders[i]['promotionInfo']:
                                for x in j['content']:
                                    for k, v in x.items():
                                        if k == 'value':
                                            p_list = re.findall(
                                                "-?\d+\.\d+", v)
                                            if p_list:
                                                temp += float(p_list.pop())
                        item['unitBenefits'] = temp
                        mysql.update_data(t="tb_order_detail_spider",
                                          set=item,
                                          c={
                                              'orderNo': orderNo,
                                              'itemNo': itemNo
                                          })
                        logger.info("详细订单状态更新成功")
                        # print(item)
                    # print(order)
                    mysql.update_data(t="tb_order_spider",
                                      set=order,
                                      c={'orderNo': orderNo})
                    logger.info("订单状态更新成功")

                    # if browser_in:
                    #     await page.close()
                    await self.page.bringToFront()
                    Verify()
                    end_time = datetime.datetime.now()
                    spend_time = end_time - start_time
                    logger.info(
                        str(spend_time.seconds) + " 秒完成订单 " + url[1] + " 详情爬取")
                    while True:
                        s = random.random()
                        if s > 0.3:
                            logger.info("休息 " + str(int(s * n_o_time)) +
                                        " 秒完开始下一单详情爬取")
                            for i in range(int(s * n_o_time)):
                                await asyncio.sleep(1)
                            break
            else:
                logger.info("没有可以爬取的详情")
                break
Exemplo n.º 5
0
    async def parse(self, mainOrders, pageNum):
        """解析爬取内容信息"""
        t = time_zone(["08:00", "18:00", "23:59"])
        a = datetime.datetime.now()
        if a < t[0]:
            eoc = EARLIEST_ORDER_CREATETIME
        elif t[0] < a < t[1]:
            eoc = 2
        else:
            eoc = 20

        start_time = datetime.datetime.now()
        logger.info("开始第 " + str(pageNum) + " 页订单爬取")
        logger.info(store_trans(self.fromStore))
        if pageNum == 1:
            self._loop_start_time = datetime.datetime.now()
        loop_control = 0
        for i in range(len(mainOrders)):
            order = {}  # 用于存储订单详细信息
            order['orderNo'] = mainOrders[i]["id"]
            order['createTime'] = mainOrders[i]['orderInfo']['createTime']
            order['buyerName'] = mainOrders[i]['buyer']['nick']
            flag = mainOrders[i]['extra']['sellerFlag']
            order['actualFee'] = mainOrders[i]['payInfo']['actualFee']
            order['deliverFee'] = re.search(
                "\(含快递:¥(\d+\.\d+)\)",
                mainOrders[i]['payInfo']['postType']).group(1)
            order['datailURL'] = "https:" + mainOrders[i]['statusInfo'][
                'operations'][0]['url']
            order['orderStatus'] = mainOrders[i]['statusInfo']['text']
            order['fromStore'] = self.fromStore
            order['updateTime'] = time_now()
            if flag == 1:
                data_url = self.base_url + mainOrders[i]['operations'][0][
                    'dataUrl']
                order['sellerFlag'] = await self.get_flag_text(data_url)
            try:
                order['isPhoneOrder'] = mainOrders[i]['payInfo']['icons'][0][
                    'linkTitle']
            except KeyError:
                pass
            items = mainOrders[i]['subOrders']
            line_no = 0
            for j in range(len(items)):
                continue_code = 0
                item = {}  # 用于存储售出商品详细信息
                item['orderNo'] = mainOrders[i]["id"]
                item['itemNo'] = line_no
                try:
                    item['goodsCode'] = items[j]['itemInfo']['extra'][0][
                        'value']
                except KeyError:
                    item['goodsCode'] = 'error'
                    logger.error(time_now() + " 订单:" + item['orderNo'])
                item['tbName'] = items[j]['itemInfo']['title'].strip() \
                    .replace("&plusmn;", "±").replace("&Phi;", "Φ").replace("&Omega;", "Ω") \
                    .replace("&mdash;", "—").replace("&deg;", "°").replace("&times;", "×") \
                    .replace("&mu;", "μ").replace("&nbsp;", "").replace("(", "(").replace(")", ")")
                item['unitPrice'] = items[j]['priceInfo']['realTotal']
                item['sellNum'] = items[j]['quantity']
                item['orderStatus'] = order['orderStatus']
                if self.orderno:
                    logger.info(item['orderStatus'])
                item['refundStatus'] = None
                item['isRefund'] = 0
                item['goodsAttribute'] = ""
                item['url'] = "https:" + items[j]['itemInfo']['itemUrl']
                try:
                    goodsAttributes = items[j]['itemInfo']['skuText']
                except KeyError:
                    pass
                else:
                    temp = []
                    for k in range(len(goodsAttributes)):
                        try:
                            goodsAttributes[k]['name']
                        except KeyError:
                            n = len(temp)
                            temp[n - 1] += goodsAttributes[k]['value'].replace(
                                "&Omega", "Ω").replace("&middot", "·")
                        else:
                            temp.append(goodsAttributes[k]['value'].replace(
                                "&Omega", "Ω").replace("&middot", "·"))
                    temp_ga = "-".join(temp)
                    item['goodsAttribute'] = temp_ga.replace("(", "(").replace(
                        ")", ")")
                try:
                    operations = items[j]['operations']
                except KeyError:
                    pass
                else:
                    for x in range(len(operations)):
                        t = operations[x]['style']
                        if t in ['t12', 't16'
                                 ] and operations[x]['text'] != "退运保险":
                            item['refundStatus'] = operations[x]['text']
                            item['isRefund'] = "1"
                        elif t == 't0' and operations[x]['text'] == '已取消':
                            continue_code = 1
                            delete_item = {
                                'orderNo': item['orderNo'],
                                'itemNo': item['itemNo'],
                                'goodsCode': item['goodsCode']
                            }
                            is_exist = mysql.get_data(
                                t="tb_order_detail_spider", l=1, c=delete_item)
                            if is_exist:
                                mysql.delete_data(t="tb_order_detail_spider",
                                                  c=delete_item)
                            sql = """
                            UPDATE tb_order_detail_spider
                            SET itemNo=itemNo-1
                            WHERE OrderNo='%s' and itemNo>'%s'
                            """ % (item['orderNo'], item['itemNo'])
                            mysql.update_data(sql=sql)
                            pass
                if continue_code:
                    continue
                else:
                    line_no += 1
                self.save_in_sql(item=item, tableName='tb_order_detail_spider')
            self.save_in_sql(item=order, tableName='tb_order_spider')
            if self.orderno:
                logger.info("定向爬取订单完成")
                return
            date = datetime.date.today()
            date_limit = (
                date - datetime.timedelta(eoc)).strftime("%Y-%m-%d %H:%M:%S")
            if order['createTime'] < date_limit:
                logger.info("完成本轮爬取,共翻 " + str(pageNum) + " 页。")
                loop_control = 1
                break
        end_time = datetime.datetime.now()
        spend_time = end_time - start_time
        logger.info(
            str(spend_time.seconds) + " 秒完成第 " + str(pageNum) + " 页订单爬取")
        if loop_control:
            self._loop_end_time = datetime.datetime.now()
            loop_spend_time = round(
                (self._loop_end_time - self._loop_start_time).seconds / 60, 0)
            logger.info(str(loop_spend_time) + " 分钟完成本轮订单爬取")
            self.complete = 2
        else:
            self.complete = 1
Exemplo n.º 6
0
    async def parse_2(self, data):
        verify = []
        repeat_list = []
        for i in data['skuOuterIdTable']['dataSource']:
            self.item['stockid'] = i['skuOuterId']
            logger.info(self.item['stockid'])
            if not self.item['stockid']:
                continue
            else:
                if self.item['stockid'] not in verify:
                    verify.append(self.item['stockid'])
                else:
                    if self.item['stockid'] not in repeat_list:
                        repeat_list.append(self.item['stockid'])
            skuId = str(i['skuId'])
            temp_attr = self.prop.get(skuId)
            self.item['attribute'] = temp_attr.replace("(",
                                                       "(").replace(")", ")")
            if not self.item['attribute']:
                self.item.pop('attribute')
            self.item['price_tb'] = self.prices.get(skuId)
            if self.promo_price:
                self.item["promotionprice"] = self.promo_price.get(skuId)
            else:
                self.item["promotionprice"] = 0

            condition = {
                "stockid": self.item['stockid'],
                "link_id": self.item['link_id'],
                "shop_id": self.item['shop_id'],
            }
            res = mysql.get_data(t="prices_tb",
                                 l=1,
                                 cn=["price_tb"],
                                 c=condition,
                                 db=self.target_server)
            if res:

                if res[0][0] == 0:
                    self.item['ratio'] = 1
                else:
                    self.item['ratio'] = round(
                        float(self.item['price_tb']) / float(res[0][0]), 2)

                print(self.item)
                mysql.update_data(t="prices_tb",
                                  set=self.item,
                                  c=condition,
                                  db=self.target_server)
            else:
                insert_item = self.item.copy()
                insert_item["currabrev"] = "CNY"
                insert_item["price_erp"] = 0
                insert_item["operator"] = self.operator
                insert_item["last_time"] = time_now()
                if self.operator == "爬虫维护":
                    insert_item["flag"] = "create"
                else:
                    insert_item['flag'] = "add"
                insert_item["ratio"] = 1
                insert_item["package_number"] = 1
                insert_item["Checker"] = ""
                insert_item["CheckDate"] = "0000-00-00 00:00:00"
                print(insert_item)

                with open("reports/report_" + self.fromStore + "_insert.txt",
                          "a") as file:
                    file.write("物料编码:" + insert_item['stockid'] + " 与商品ID:" +
                               insert_item['link_id'] + " 为最新匹配,添加至ERP系统。\n" +
                               self.item_url + insert_item['link_id'] + "\n" +
                               self.item_erp_url + insert_item['link_id'] +
                               "\n\n")

                mysql.insert_data(t="prices_tb",
                                  d=insert_item,
                                  db=self.target_server)

        if repeat_list:
            with open("reports/report_" + self.fromStore + "_repeat.txt",
                      "a") as file:
                file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" +
                           self.item['link_id'] + " 重复编码\n" + "重复编码:" +
                           ",".join(repeat_list) + "\n" + self.item_url +
                           self.item['link_id'] + "\n\n")

        if not verify:
            with open("reports/report_" + self.fromStore + "_empty.txt",
                      "a") as file:
                file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" +
                           self.item['link_id'] + " 空编码\n" + self.item_url +
                           self.item['link_id'] + "\n\n")

        sql = """
        select id,stockid 
        from prices_tb 
        where link_id='%s' 
        and flag not in('del','XiaJia')
        """ % (self.item['link_id'])
        res_verify = mysql.get_data(sql=sql, db=self.target_server)

        for rv in res_verify:
            if rv[1] not in verify:
                with open("reports/report_" + self.fromStore + "_delete.txt",
                          "a") as file:
                    file.write("物料编码:" + rv[1] + " 与 商品ID:" +
                               self.item['link_id'] + " 不匹配,已被爬虫从ERP系统中删除。\n" +
                               self.item_url + self.item['link_id'] + "\n" +
                               self.item_erp_url + self.item['link_id'] +
                               "\n\n")

                mysql.update_data(t="prices_tb",
                                  c={"id": rv[0]},
                                  db=self.target_server,
                                  set={
                                      "flag": "del",
                                      "operator": self.operator,
                                      "last_time": time_now()
                                  })

        self.complete = 1