Exemplo n.º 1
0
async def run():
    while 1:
        update()
        ms = MySql(db_setting=TEST_SERVER_DB_TEST)
        ms.update(t="spider_monitor",
                  set={"latest_time": time_now()},
                  c={"spider_address": SPIDER_ADDRESS})
        restart_signal = ms.get_one(t="spider_monitor",
                                    cn=["restart_signal"],
                                    c={"spider_address": SPIDER_ADDRESS})
        if SPIDER_ADDRESS == "3_floor":
            sql = "SELECT MAX(updateTime) as updateTime,fromStore FROM tb_order_spider WHERE fromStore IN ('KY','TB') GROUP BY fromStore"
        else:
            sql = "SELECT MAX(updateTime) as updateTime,fromStore FROM tb_order_spider WHERE fromStore IN ('YJ','YK') GROUP BY fromStore"
        results = MySql.cls_get_dict(sql=sql)
        t = time_ago(minutes=15)
        for result in results:
            if str(result['updateTime']) < t:
                restart_signal = 1
                break
        if restart_signal:
            ms.update(t="spider_monitor",
                      set={"restart_signal": 0},
                      c={"spider_address": SPIDER_ADDRESS})
            restart()
        del ms
        await asyncio.sleep(60)
Exemplo n.º 2
0
def verify():
    l_orderNo = []
    column_name = [
        'orderNo', 'deliverFee', 'actualFee', 'couponPrice', 'fromStore',
        'orderStatus'
    ]
    condition = {'isVerify': '0', 'isDetaildown': '1'}
    # kwargs = {'isVerify': '2', 'isDetaildown': '1'}
    ms = MySql()
    result = ms.get(t="tb_order_spider", cn=column_name, c=condition)
    if result:
        for i in result:
            total = 0
            orderNo = i[0]
            deliverFee = i[1]
            actualFee = i[2]
            couponPrice = i[3]
            fromStore = i[4]
            column_name = ['unitPrice', 'sellNum', 'unitBenefits']
            condition = {'orderNo': orderNo}
            result2 = ms.get(t="tb_order_detail_spider",
                             cn=column_name,
                             c=condition)
            for j in result2:
                unitPrice = j[0]
                sellNum = j[1]
                unitBenefits = j[2]
                total = total + unitPrice * sellNum - unitBenefits
            a = round(total, 3) + deliverFee - actualFee - couponPrice
            if abs(a) > 0.0001 and i[5] != '交易关闭':
                list_tmp = []
                list_tmp.append(str(round(total, 2)))
                list_tmp.append(str(deliverFee))
                list_tmp.append(str(actualFee))
                list_tmp.append(str(couponPrice))
                list_tmp.append(str(a))
                list_tmp.append(store_trans(fromStore))
                list_tmp.append(orderNo)
                l_orderNo.append("|".join(list_tmp))
                ms.update(t="tb_order_spider",
                          set={
                              'isVerify': 2,
                              'isDetaildown': 0
                          },
                          c={'orderNo': orderNo})
            else:
                ms.update(t="tb_order_spider",
                          set={'isVerify': 1},
                          c={'orderNo': orderNo})
                # print('没有异常数据,验证完成!')
                pass
    if l_orderNo:
        s = "\n".join(l_orderNo)
        # print(s)
        mail("数据异常报告", s, ["*****@*****.**"])
Exemplo n.º 3
0
def update():
    ms = MySql(db_setting=TEST_SERVER_DB_TEST)
    update_signals = ms.get_dict(t="spider_monitor",
                                 cn=["spider_address", "update_signal"])
    for update_signal in update_signals:
        if update_signal['update_signal']:
            if SPIDER_ADDRESS == update_signal['spider_address']:
                result = check_output(['git', 'pull'])
                ms.update(t='spider_monitor',
                          set={
                              "update_signal": 0,
                              "update_result": result.decode('utf-8').strip()
                          },
                          c={"spider_address": SPIDER_ADDRESS})
Exemplo n.º 4
0
    def _get_page_num(shop_id):
        #  从数据库得到数据
        ms = MySql(db_setting=test_db)
        result = ms.get_dict(t="tb_search_page_info", c={"shop_id": shop_id})
        if not result:
            #  没有数据就新增一个默认数据
            d = {
                "shop_id": shop_id,
                "total_page": 20,
                "used_page_nums": "0",
                "last_date": datetime.date.today(),
                "spent_time": 0
            }
            #  插入数据后再重新获取
            ms.insert(t="tb_search_page_info", d=d)
            result = ms.get_dict(t="tb_search_page_info",
                                 c={"shop_id": shop_id})

        if result[0]['last_date'] < datetime.date.today():
            ms.update(t="tb_search_page_info",
                      set={
                          "used_page_nums": "0",
                          "spent_time": 0
                      },
                      c={"shop_id": shop_id})
            result = ms.get_dict(t="tb_search_page_info",
                                 c={"shop_id": shop_id})
        #  获取已采集的数据的页码列表
        used_page_nums = [
            int(x) for x in result[0]['used_page_nums'].split(",")
        ]
        total_page = result[0]['total_page']
        set_a = set([i for i in range(total_page + 1)])  # 全部页码的set集合
        set_b = set(used_page_nums)  # 已采集的数据的页码集合
        list_result = list(set_a - set_b)  # 未采集数据的页码列表
        if list_result:
            # 返回一个随机的未采集数据的页码,已采集的页码集合,和总的页码数
            return random.choice(
                list_result
            ), used_page_nums, total_page, result[0]['spent_time']
        else:
            # 如果没有未采集的页码,则表示当前店铺的所有页码全部采集完成
            return 0, 0, 0, 0
Exemplo n.º 5
0
    async def save_link_id(self):
        ms = MySql()
        link_id_new_list = []
        self.completed = 0
        sql = """SELECT url,a.orderNo FROM tb_order_detail_spider a
            JOIN tb_order_spider b ON a.`orderNo`=b.`orderNo`
            WHERE link_id="1" AND b.`fromStore`='{}' AND a.url IS NOT NULL
            GROUP BY a.orderNo
            ORDER BY b.createTime DESC""".format(self.fromStore)
        results = ms.get_dict(sql=sql)
        if results:
            for result in results:
                logger.info("link_id_spider-" + result['orderNo'])
                data = await self._get_json(result['orderNo'])
                if not data:
                    return 0
                sub_orders = data["data"]["subOrderViewDTOs"]
                for so in sub_orders:
                    price_tb_item = PriceTBItem()
                    price_tb_item.link_id = so["itemId"]

                    order_no = so["orderNoStr"]
                    sql = "select * from tb_order_detail_spider where url like '%%{}%%'".format(
                        order_no)
                    res = ms.get_dict(sql=sql)[0]

                    price_tb_item.stockid = res['goodsCode']
                    price_tb_item.description = res['tbName']
                    price_tb_item.price_tb = res['unitPrice']
                    price_tb_item.shop_id = store_trans(string=self.fromStore,
                                                        action="code_2_id")
                    price_tb_item.attribute = res['goodsAttribute']
                    price_tb_item.typeabbrev = self.fromStore
                    sql = "update tb_order_detail_spider set link_id='{}' where url like '%{}%'".format(
                        price_tb_item.link_id, order_no)
                    ms.update(sql=sql)
                    price_tb_item.save(ms)
                    await my_async_sleep(3, True)
    async def do_it(self):
        shop_id = store_trans(self.fromStore, 'code_2_id')
        ms = MySql()
        sql = "select link_id from prices_tb where need_to_update=1 and shop_id='{}' limit 1".format(
            shop_id)
        link_id = ms.get_one(sql=sql)
        if not link_id:
            return 0
        await self.page.bringToFront()

        try:
            if not re.search(
                    "https://item.manager.taobao.com/taobao/manager/render.htm",
                    self.page.url):
                await self.page.goto(
                    "https://item.manager.taobao.com/taobao/manager/render.htm?tab=on_sale"
                )
        except Exception as e:
            logger.error(str(e) + "manager_page_error")
            return
        while 1:
            await self.page.waitForSelector("input[name='queryItemId']",
                                            timeout=0)
            await self.page.keyboard.press('Escape')
            await self.page.focus("input[name='queryItemId']")
            for _ in range(20):
                await self.page.keyboard.press("Delete")
                await self.page.keyboard.press("Backspace")
            await self.page.type("input[name='queryItemId']", str(link_id),
                                 {'delay': self.login.input_time_random()})
            await self.page.click(".filter-footer button:first-child")
            await self.page.waitForResponse(
                "https://item.manager.taobao.com/taobao/manager/table.htm")
            await asyncio.sleep(1)
            await self.listening(self.page)
            try:
                await self.page.waitForSelector(FAST_EDIT_BTN, timout=10000)
                await self.page.click(FAST_EDIT_BTN)
                restart = await self.login.slider(self.page)
                if restart:
                    exit("滑块验证码失败,退出")
            except errors.TimeoutError as e:
                logger.info("商品已下架,没有查询到对应的商品ID:" + link_id)
                ms.update(t="prices_tb",
                          set={
                              "SpiderDate": time_now(),
                              "need_to_update": 0,
                              "flag": "XiaJia"
                          },
                          c={"link_id": link_id})
                link_id = ms.get_one(sql=sql)
                if not link_id:
                    return 0
                continue
            else:
                await self.page.focus("input[name='queryItemId']")
                for _ in range(20):
                    await self.page.keyboard.press("Delete")
                    await self.page.keyboard.press("Backspace")
                break
        while 1:
            if self.completed == 4:
                break
            await asyncio.sleep(1)
        await asyncio.sleep(15)
    async def parse_order_detail_item(continue_code, i, main_orders,
                                      sub_orders, tb_order_item, ms):
        for j in range(len(sub_orders)):
            tb_order_detail_item = TBOrderDetailItem()
            tb_order_detail_item.orderNo = main_orders[i]["id"]
            tb_order_detail_item.itemNo = j
            try:
                tb_order_detail_item.goodsCode = sub_orders[j]['itemInfo'][
                    'extra'][0]['value']
            except KeyError:
                tb_order_detail_item.goodsCode = 'error'
            tb_order_detail_item.tbName = format_tb_name(
                sub_orders[j]['itemInfo']['title'])
            tb_order_detail_item.unitPrice = sub_orders[j]['priceInfo'][
                'realTotal']
            tb_order_detail_item.sellNum = sub_orders[j]['quantity']
            tb_order_detail_item.orderStatus = tb_order_item.orderStatus
            tb_order_detail_item.url = "https:" + sub_orders[j]['itemInfo'][
                'itemUrl']
            try:
                attribute_list = sub_orders[j]['itemInfo']['skuText']
            except KeyError:
                pass
            else:
                tb_order_detail_item.goodsAttribute = format_attribute(
                    attribute_list)

            try:
                operations = sub_orders[j]['operations']
            except KeyError:
                pass
            else:
                for x in range(len(operations)):
                    t = operations[x]['style']
                    if t in ['t12', 't16'] and operations[x]['text'] != "退运保险":
                        tb_order_detail_item.refundStatus = operations[x][
                            'text']
                        tb_order_detail_item.isRefund = "1"
                    elif t == 't0' and operations[x]['text'] == '已取消':
                        continue_code = 1
                        delete_item = {
                            'orderNo': tb_order_detail_item.orderNo,
                            'itemNo': tb_order_detail_item.itemNo,
                            'goodsCode': tb_order_detail_item.goodsCode
                        }
                        ms = MySql()
                        is_exist = ms.get(t="tb_order_detail_spider",
                                          l=1,
                                          c=delete_item)
                        if is_exist:
                            ms.delete(t="tb_order_detail_spider",
                                      c=delete_item)
                        sql = "UPDATE tb_order_detail_spider SET itemNo=itemNo-1 " \
                              "WHERE orderNo='{}' " \
                              "AND itemNo>'{}'".format(tb_order_detail_item.orderNo,
                                                       tb_order_detail_item.itemNo)
                        ms.update(sql=sql)
                        pass
            if continue_code:
                continue
            tb_order_detail_item.save(ms)
Exemplo n.º 8
0
 async def parse(self, html):
     ms = MySql()
     self._item['SpiderDate'] = time_now()
     sku_map = re.search('skuMap.*?(\{.*)', html)
     match_xia_jia = re.search("此宝贝已下架", html)
     if match_xia_jia:
         self._item['flag'] = "XiaJia"
     if not sku_map:
         MySql.cls_update(db_setting=TEST_SERVER_DB_TEST,
                          t="tb_master",
                          set={
                              "isUsed": 1,
                              "isMut": 0
                          },
                          c={"link_id": self._item['link_id']})
         res = ms.get_dict(t="prices_tb",
                           c={"link_id": self._item['link_id']})
         if res:
             ms.update(t="prices_tb",
                       set=self._item,
                       c={"link_id": self._item['link_id']})
         else:
             self._item['stockid'] = "no_match"
             self._item['SpiderDate'] = time_ago(minutes=60)
             self._item['need_to_update'] = 1
             ms.insert(t="prices_tb", d=self._item)
         logger.info(self._item)
     else:
         MySql.cls_update(db_setting=TEST_SERVER_DB_TEST,
                          t="tb_master",
                          set={
                              "isUsed": 1,
                              "isMut": 1
                          },
                          c={"link_id": self._item['link_id']})
         doc = PyQuery(html)
         items = doc("li[data-value]").items()
         logger.debug(items)
         attr_map = {}
         if items:
             for item in items:
                 attr_map[item.attr('data-value')] = item.find(
                     'span').text().replace("(", "(").replace(")", ")")
         sku_dict = json.loads(sku_map.group(1))
         count = 1
         for k, v in sku_dict.items():
             sku_result = self._item.copy()
             if self._item['promotionprice'] > 0:
                 discount = round(
                     float(self._item['price_tb']) -
                     float(self._item['promotionprice']), 4)
                 sku_result['promotionprice'] = round(
                     float(v.get('price')) - float(discount), 4)
             else:
                 sku_result['promotionprice'] = 0
             sku_result['skuId'] = v.get('skuId')
             sku_result['price_tb'] = v.get('price')
             sku_result['attribute'] = "-".join([
                 attr_map.get(r) for r in re.sub('^;|;$', "", k).split(";")
             ])
             res = ms.get_dict(t="prices_tb",
                               c={"skuId": sku_result['skuId']})
             if res:
                 ms.update(t="prices_tb",
                           set=sku_result,
                           c={"skuId": sku_result['skuId']})
             else:
                 sku_result['stockid'] = "no_match" + str(count)
                 sku_result['SpiderDate'] = time_ago(minutes=60)
                 sku_result['need_to_update'] = 1
                 ms.insert(t="prices_tb", d=sku_result)
                 count += 1
             logger.info(sku_result)
     del ms
     await self._goto_the_next()