def save_in_sql(self, item, tableName): if 'goodsCode' in item: """判断是否orderdetail""" dict_select_condition = { 'orderNo': item['orderNo'], 'itemNo': item['itemNo'] } result = mysql.get_data(t=tableName, c=dict_select_condition) if result: item.pop("goodsCode") mysql.update_data(t=tableName, set=item, c=dict_select_condition) # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|订单详情更新成功|") else: mysql.insert_data(t=tableName, d=item) # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|新订单详情写入成功") else: dict_select_condition = {'orderNo': item['orderNo']} result = mysql.get_data(t=tableName, c=dict_select_condition) if result: mysql.update_data(t=tableName, set=item, c=dict_select_condition) # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|订单详情更新成功|") else: mysql.insert_data(t=tableName, d=item)
def Verify(): l_orderNo = [] column_name = [ 'orderNo', 'deliverFee', 'actualFee', 'couponPrice', 'fromStore', 'orderStatus' ] condition = {'isVerify': '0', 'isDetaildown': '1'} # kwargs = {'isVerify': '2', 'isDetaildown': '1'} result = mysql.get_data(t="tb_order_spider", cn=column_name, c=condition) if result: for i in result: total = 0 orderNo = i[0] deliverFee = i[1] actualFee = i[2] couponPrice = i[3] fromStore = i[4] column_name = ['unitPrice', 'sellNum', 'unitBenefits'] condition = {'orderNo': orderNo} result2 = mysql.get_data(t="tb_order_detail_spider", cn=column_name, c=condition) for j in result2: unitPrice = j[0] sellNum = j[1] unitBenefits = j[2] total = total + unitPrice * sellNum - unitBenefits a = round(total, 3) + deliverFee - actualFee - couponPrice if int(a) != 0 and i[5] != '交易关闭': list_tmp = [] list_tmp.append(str(round(total, 2))) list_tmp.append(str(deliverFee)) list_tmp.append(str(actualFee)) list_tmp.append(str(couponPrice)) list_tmp.append(str(round(a, 2))) list_tmp.append(store_trans(fromStore)) list_tmp.append(orderNo) l_orderNo.append("|".join(list_tmp)) mysql.update_data(t="tb_order_spider", set={'isVerify': 2}, c={'orderNo': orderNo}) else: mysql.update_data(t="tb_order_spider", set={'isVerify': 1}, c={'orderNo': orderNo}) # print('没有异常数据,验证完成!') if l_orderNo: s = "\n".join(l_orderNo) # print(s) mail("数据异常报告", s, ["*****@*****.**"]) taobao_check()
def report_in(self, **kwargs): if kwargs['flag'] == 'lookup': res = mysql.get_data("update_reports", l=1, cn=["lookup"], c={"link_id": "count"}, db=self.db_test) mysql.update_data(t="update_reports", set={ 'lookup': res[0][0] + 1, 'last_time': kwargs['SpiderDate'] }, c={ 'link_id': 'count', 'shop_id': kwargs['shop_id'] }) else: self.report_item['stockid'] = kwargs['stockid'] self.report_item['link_id'] = kwargs['link_id'] self.report_item['shop_id'] = kwargs['shop_id'] self.report_item['price_tb'] = kwargs['price_tb'] # self.report_item['first_discount'] = kwargs['first_discount'] self.report_item['last_time'] = kwargs['SpiderDate'] self.report_item['attribute'] = kwargs['attribute'] self.report_item['flag'] = kwargs['flag'] self.report_item['description'] = kwargs['description'] mysql.insert_data(t="update_reports", d=self.report_item, db=self.db_test)
def data_compare(self, **kwargs): res = mysql.get_data(t="prices_tb", cn=["price_tb", "SpiderDate"], c={ "stockid": kwargs["goodsCode"], "link_id": kwargs["link_id"], "shop_id": self.shop_id(kwargs["fromStore"]) }) if res: # d = datetime.datetime.strptime(res[0][1], "%Y-%m-%d %H:%M:%S") # print(type(res[0][1])) update_condition = float(kwargs['unitPrice']) - float(res[0][0]) ratio = float(kwargs['unitPrice']) / float(res[0][0]) if res[0][1] == '0000-00-00 00:00:00': return "更新", ratio days = (datetime.datetime.now() - res[0][1]).days if abs(update_condition) >= 0.01 or days > 7: self.report_item['price_before'] = res[0][0] return "更新", ratio else: return None, None else: return "创建", None
async def save(self): """ 处理_parse()中获得的数据模型,并写入到数据库中 """ async for i in self._parse(): if not i: print("需要要切换淘宝账户") return 0 res = mysql.get_data(db=test_server, t="tb_master", c={'link_id': i['link_id']}, dict_result=True) flag = ["update"] narrative = [] if res: if res[0]['price'] != i['price']: flag.append("price") narrative.append("更新销售价格:[{}]=>[{}]".format(res[0]['price'], i['price'])) if res[0]['promotionPrice'] != i['promotionPrice']: flag.append("promotion") narrative.append("更新优惠售价格:[{}]=>[{}]".format(res[0]['promotionPrice'], i['promotionPrice'])) if res[0]['sale_num'] != i['sale_num']: flag.append("sale") narrative.append("更新销量:[{}]=>[{}]".format(res[0]['sale_num'], i['sale_num'])) i['flag'] = "_".join(flag) i['narrative'] = ";".join(narrative) mysql.update_data(db=test_server, t='tb_master', set=i, c={"link_id": i['link_id']}) else: i['flag'] = 'insert' mysql.insert_data(db=test_server, t="tb_master", d=i)
async def run(): b = await launch(**dev) p = await b.newPage() await p.setViewport({"width": 1440, "height": 900}) await p.goto("https://login.taobao.com") ms = await p.J(".module-static") if ms: ls = await p.J(".login-switch") box = await ls.boundingBox() await p.mouse.click(box['x'] + 10, box['y']) while 1: try: await p.waitForSelector("#J_QRCodeImg") image = await p.J("#J_QRCodeImg") await image.screenshot({'path': './qrcode.png'}) except Exception as e: pass else: break qrcode = mpimg.imread('qrcode.png') # 读取和代码处于同一目录下的 qrcode.png plt.imshow(qrcode) # 显示图片 plt.axis('off') # 不显示坐标轴 plt.show() await p.waitForNavigation() start_url = 'https://shop.taobao.com/' sql = "select shop_id from shop_info where shop_id!='88888888'" # 获取所有的店铺ID shop_infos = mysql.get_data(sql=sql, dict_result=True) for shop_info in shop_infos: item = {"shop_id": shop_info['shop_id']} url = start_url.replace("shop", "shop" + shop_info["shop_id"]) await p.goto(url) await asyncio.sleep(5) await p.waitForSelector(".all-cats-trigger.popup-trigger") await p.click(".all-cats-trigger.popup-trigger") await asyncio.sleep(5) item['user_agent'] = await b.userAgent() cookies = await p.cookies() item['cookies'] = ";".join( [c['name'] + "=" + c['value'] for c in cookies]) item['refer'] = p.url await p.waitForSelector('input[name="_ksTS"]') content = await p.content() print(content) doc = PyQuery(content) _ksTS = doc('input[name="_ksTS"]').val() item['_ksTS'] = _ksTS.split("_").pop() item['callback'] = 'jsonp' + str(int(item['_ksTS']) + 1) item['mid'] = doc('input[name="mid"]').val() item['wid'] = doc('input[name="wid"]').val() item['spm'] = doc('div.pagination form input[name="spm"]').val() mysql.insert_data(db=test_server, t='user_record', d=item) break
def _get_item(): sql = "SELECT shop_id,link_id,description,price,promotionPrice,sale_num FROM tb_master WHERE isUsed=0 and link_id='{}' LIMIT 1".format( "586886697621") while 1: result = mysql.get_data(db=test_server, sql=sql, dict_result=True) if result: yield result[0] else: break
def shop_id(self, fromStore): res = mysql.get_data(t="shop_info", l=1, c={ "typeabbrev": fromStore, "shopindex": 0 }) if res: return res[0][0]
def is_in_database(data, col_name, table_name): #判断数据是否在数据库内 cursor = mysql.connet_mysql() mysql_data = mysql.get_data(cursor, col_name, table_name) list_data = [] for i in range(len(mysql_data)): list_data.append(mysql_data[i][0]) if data in list_data: return True else: return False
def shop_id(self, fromStore): time.sleep(1) res = mysql.get_data(t="shop_info", l=1, cn=["shop_id"], c={ "typeabbrev": fromStore, "shopindex": 0 }) # res = mysql.get_data(t="salestypes", l=1, cn=["shop_id"], c={"typeabbrev": fromStore}) return res[0][0]
def get_link_id(self, **kwargs): shop_id = self.shop_id(fromStore=kwargs['fromStore']) result = mysql.get_data(t="prices_tb", cn=["link_id"], c={ "stockid": kwargs['goodsCode'], "shop_id": shop_id, "attribute": kwargs['goodsAttribute'] }) # result = self.sql_temp.select_data('prices_tb', 0, 'linkId', **kwargs_temp) if len(result) == 1: return result[0][0] else: return None
async def verify(self, p): try: await p.waitForSelector("div.aq_overlay_mask", timeout=10000) except errors.TimeoutError: pass else: logger.info("需要要手机验证码") if LINUX: test_server = ts.copy() test_server['db'] = "test" id = random.randint(0, 100) mysql.insert_data(db=test_server, t="phone_verify", d={"id": id}) frames = p.frames net_check() verify_code = "0" while True: net_check() await frames[1].click(".J_SendCodeBtn") for i in range(120): await asyncio.sleep(5) res = mysql.get_data( db=test_server, cn=["verify_code"], t="phone_verify", c={"id": id}, ) verify_code = res[0][0] if verify_code != "0": mysql.delete_data(db=test_server, t="phone_verify", c={"id": id}) break if verify_code != "0": break await asyncio.sleep(10) else: frames = p.frames net_check() await frames[1].click(".J_SendCodeBtn") verify_code = input(time_now() + " | 请输入6位数字验证码:") # await frames[1].click(".J_SendCodeBtn") # verify_code = input(time_now() + " | 请输入6位数字验证码:") await frames[1].type(".J_SafeCode", verify_code, {'delay': self.input_time_random() - 50}) net_check() await frames[1].click("#J_FooterSubmitBtn")
async def run_order_detail_spider(self): sql = """ SELECT COUNT(id),fromStore FROM tb_order_spider WHERE isDetaildown=0 GROUP BY fromStore ORDER BY COUNT(id) DESC LIMIT 1 """ res = mysql.get_data(sql=sql) if res: b, p, f = await self.login(**STORE_INFO[res[0][1]]) await self.order_detail_spider(p, f) else: # if self.b: # await self.b.close() # self.b = None await self.run_link_spider()
async def run_link_spider(self): sql = """ SELECT COUNT(a.id),fromStore FROM tb_order_detail_spider a JOIN tb_order_spider b ON a.`orderNo`=b.`orderNo` WHERE link_id="1" AND a.url IS NOT NULL GROUP BY fromStore ORDER BY COUNT(a.id) DESC """ time.sleep(2) res = mysql.get_data(sql=sql) if res: b, p, f = await self.login(**STORE_INFO[res[0][1]]) await self.link_spider(p, f) else: mysql.update_data(t="tb_order_spider", set={"isDetaildown": 0}, c={"isDetaildown": 2}) # if self.b: # await self.b.close() # self.b = None await self.run_order_detail_spider()
def load_day(date_load, online, lmysql, lbigquery): """ Get day data from mysql and load to bigquery. Before this delete data in big query from that day :param date_load: day :param online: if True, never finish, load data, sleep and load new data :param lmysql: Mysql conection :param lbigquery: BigQuery connection :return: """ print("Load day, delete " + str(date_load) + " online " + str(online)) big_query.delete_day(date_load, lbigquery) print("Load day " + str(date_load) + " online " + str(online)) end = False last_index = 0 # Last index read from mysql database while not end: # Get data from mysql data = mysql.get_data(lmysql, date_load, last_index) data = process_daraframe(data) if data is not None: # Last column is database index. # Store for new sql querys and remove from dataframes print("Loading " + str(data.shape)) last_index = data[15].max() data = data.drop(columns=15) if args.only_print: print_lines(data) else: if args.online: # If online, we store the last five window for future data incoming load_df(data, window_remain=5) else: # If not online, not future data incomming, not store. load_df(data, window_remain=0) if not args.online: end = True else: sleep(60)
def competitor_data(): if request.method == 'POST': # print("adlsfjlsdjf") result = request.form res = result.to_dict() stk_list = result.getlist("stockid[]") res.pop("stockid[]") attr_list = result.getlist("attribute[]") res.pop("attribute[]") price_list = result.getlist("price_tb[]") res.pop("price_tb[]") pgn_list = result.getlist("package_number[]") res.pop("package_number[]") for i in range(len(stk_list)): item = res.copy() item['stockid'] = stk_list[i] if not item['stockid']: continue item['attribute'] = attr_list[i] item['price_tb'] = price_list[i] pgn = item.pop("package_number_t") if int(pgn_list[i]) > 1: item['package_number'] = pgn_list[i] elif int(pgn) > 1: item['package_number'] = pgn else: item['package_number'] = 1 c = {"stockid": item["stockid"], "link_id": item["link_id"]} res_sql = mysql.get_data(c=c, t="prices_tb") if res_sql: mysql.update_data(set=item, c=c, t="prices_tb") else: mysql.insert_data(d=item, t="prices_tb") # print(res) # print(item) return "添加成功!"
async def _get_html(self, speed=1): """ :param speed: 翻页间隔时间,秒 :return: 返回爬取页面的HTML内容 """ sql = "select shop_id from shop_info where shop_id!='88888888'" # 获取所有的店铺ID shop_infos = mysql.get_data(sql=sql, dict_result=True) shop_ids = [] for shop_info in shop_infos: page_control = Format._read(shop_id=shop_info['shop_id'], flag="total_page") # 获得存储在本地的店铺总的页码数量 if not page_control: page_control = 1000 # 如果没有获取到页码总数,给个1000的总数 shop_ids.append(shop_info['shop_id']) # 将店铺ID存储起来用于后面重置翻页数据 url = self.start_url.replace("shop", "shop" + shop_info["shop_id"]) # 获得到店铺首页url地址 await self.page.goto(url) await self._jump_to_search_page() page_num = Format._read(shop_info['shop_id'], "page_num") # 读取存储在本地的page_num while page_num < page_control: start_time = time.time() # 本页面开始的时间存入变量 try: # if page_num: await self._goto_last_page_num(page_num + 1) await asyncio.sleep(5) frames = self.page.frames for f in frames: if await f.J("#TPL_username_1"): yield 0, 0 frame = await self.login.get_nc_frame(frames=frames) if frame: await self.login.slider(self.page, 1) except Exception as e: print(e) await asyncio.sleep(5) continue try: await self.page.waitForSelector(".shop-hesper-bd.grid") except errors.TimeoutError: break except Exception as e: print(e) continue Format._write(shop_id=shop_info['shop_id'], flag="page_num", value=page_num + 1) # 将下次需要爬取的页码存入本地的配件中 page_num = Format._read(shop_info['shop_id'], "page_num") # 读取下一次要爬取的页码 yield await self.page.content(), shop_info['shop_id'] # 返回页面HTML内容和 page_control = Format._read(shop_id=shop_info['shop_id'], flag="total_page") # 获得存储在本地的店铺总的页码数量 await asyncio.sleep(speed) # 翻页间隔时间 spent_time_this_page = time.time() - start_time # 计算本页完成时间 spent_time = Format._read(shop_id=shop_info['shop_id'], flag="spent_time") # 读取上一次存储在本地的时间 Format._write(shop_id=shop_info['shop_id'], flag="spent_time", value=spent_time + spent_time_this_page) # 将本页面完成时间加上后并存储在本地 is_mail = Format._read(shop_info['shop_id'], "mail") if not is_mail: Reports().report(shop_info['shop_id'].split(" ")) for shop_id in shop_ids: Format._del(shop_id=shop_id, flag="page_num") # 重置翻页的数据 Format._del(shop_id=shop_id, flag="total_page") # 重置总页码数据 Format._del(shop_id=shop_id, flag="mail") # 重置邮件标记 Format._del(shop_id=shop_id, flag="spent_time") # 重置完成时间
report_mail() t1, t2, t3 = 1, 0, 0 elif t[1] < now < t[2] and t2 == 0: report_mail() t1, t2, t3 = 0, 1, 0 elif now > t[2] and t3 == 0: report_mail() t1, t2, t3 = 0, 0, 1 sql = """ SELECT fromStore FROM prices_tb_fix WHERE isComplete='0' GROUP BY fromStore ORDER BY COUNT(link_id) DESC """ ts = test_server.copy() ts['db'] = 'test' res = mysql.get_data(db=ts, sql=sql) if res: b, p, f = loop.run_until_complete( ss.login(**STORE_INFO[res[0][0]])) ptb = PriceTaoBao(ss, b, p, f) loop.run_until_complete(ptb.run()) loop.run_until_complete(p.close()) if len(res) == 1: loop.run_until_complete(b.close()) ss.b = None else: sleep(10) else: b, p, f = loop.run_until_complete(l.login()) ptb = PriceTaoBao(l, b, p, f) loop.run_until_complete(ptb.run())
async def order_detail_spider(self, p, f): sql1 = """ SELECT datailURL,a.orderNo FROM tb_order_spider a JOIN taobaoorders b ON a.orderNo = b.OrderNo WHERE isDetaildown=0 AND fromStore='%s' AND b.Flag = 8 ORDER BY createTime DESC; """ % (f) sql = """ SELECT datailURL,orderNo FROM tb_order_spider WHERE isDetaildown=0 AND fromStore='%s' ORDER BY createTime DESC """ % (f) results = mysql.get_data(sql=sql1, dict_result=True) if not results: results = mysql.get_data(sql=sql, dict_result=True) if results: for result in results: order = {} url = result['datailURL'] try: net_check() await p.goto(url) except errors.TimeoutError: continue slider = await p.J('#nocaptcha') if slider: while True: print("出现滑块验证码") await asyncio.sleep(2) await p.hover('#nc_1_n1z') await p.mouse.down() await p.mouse.move( 2000, 0, {'delay': random.randint(1000, 2000)}) await p.mouse.up() try: p.waitForSelector(".nc-lang-cnt a", timeout=10000) await asyncio.sleep(2) await p.click(".nc-lang-cnt a") except errors.TimeoutError: break except errors.PageError: break try: await p.waitForSelector('#detail-panel', timeout=30000) except Exception as e: continue content = await p.content() a = re.search("var data = JSON.parse\('(.*)'\);", content).group(1) b = a.replace('\\\\\\"', '') data = b.replace('\\"', '"') m = json.loads(data) order['actualFee'] = m['mainOrder']['payInfo']['actualFee'][ 'value'] order['orderStatus'] = status_format( m['mainOrder']['statusInfo']['text']) if order['orderStatus'] == '等待买家付款': order['isDetaildown'] = 2 else: order['isDetaildown'] = 1 coupon = 0 for k, v in m['mainOrder']['payInfo'].items(): if k == 'promotions': promotions = m['mainOrder']['payInfo']['promotions'] for i in range(len(promotions)): if 'prefix' and 'suffix' in promotions[i]: coupon_temp = re.search( "(\d+\.\d+)", promotions[i]['value']) if coupon_temp: coupon += float(coupon_temp.group(1)) order['couponPrice'] = round(coupon, 2) for k, v in m.items(): if k == 'buyMessage': order['buyerComments'] = v orderNo = m['mainOrder']['id'] order_info = m['mainOrder']['orderInfo']['lines'][1]['content'] for i in range(len(order_info)): if order_info[i]['value']['name'] == '支付宝交易号:': try: order['tradeNo'] = order_info[i]['value']['value'] except KeyError: order['tradeNo'] = None # elif order_info[i]['value']['name'] == '创建时间:': # order['createTime'] = order_info[i]['value']['value'] # elif order_info[i]['value']['name'] == '发货时间:': # order['shipTime'] = order_info[i]['value']['value'] elif order_info[i]['value']['name'] == '付款时间:': order['payTime'] = order_info[i]['value']['value'] ship_info = m['tabs'] for i in range(len(ship_info)): if ship_info[i]['id'] == 'logistics': temp = ship_info[i]['content'] for k, v in temp.items(): if k == 'logisticsName': order['shippingCompany'] = v elif k == 'shipType': order['shippingMethod'] = v elif k == 'logisticsNum': order['shippingNo'] = v # elif k == 'logisticsUrl': # order['shipUrl'] = "https" + v elif k == 'address': rec_info = v order['receiverName'] = rec_info.split( ",")[0].replace(" ", "") order['receiverPhone'] = rec_info.split(",")[1] order['receiverAddress'] = "".join( rec_info.split(",")[2:]) sub_orders = m['mainOrder']['subOrders'] # mainOrder.subOrders[10].tradeStatus[0].content[0].value line_no = 0 for i in range(len(sub_orders)): if sub_orders[i]['tradeStatus'][0]['content'][0][ 'value'] == '已取消': continue item = {} temp = 0 itemNo = line_no line_no += 1 if sub_orders[i]['promotionInfo']: for j in sub_orders[i]['promotionInfo']: for x in j['content']: for k, v in x.items(): if k == 'value': p_list = re.findall("-?\d+\.\d+", v) if p_list: temp += float(p_list.pop()) item['unitBenefits'] = temp mysql.update_data(t="tb_order_detail_spider", set=item, c={ 'orderNo': orderNo, 'itemNo': itemNo }) mysql.update_data(t="tb_order_spider", set=order, c={'orderNo': orderNo}) Verify() while True: s = random.random() if s > 0.9: for i in range(int(s * n_o_time)): await asyncio.sleep(1) print(">", end="", flush=True) print("") break else: pass await p.close() await self.run_order_detail_spider()
async def fix_data(self, link_id=None): # page = await self.browser.newPage() self.complete = 0 self.prices = {} self.promo_price = {} await asyncio.sleep(2) await self.page.focus("input[name='queryItemId']") await self.page.keyboard.down("ShiftLeft") await self.page.keyboard.press("Home") await self.page.keyboard.down("ShiftLeft") await self.page.keyboard.press("Delete") server_name = 'production_server' self.sn = server_name if not link_id: if MODE == 1: link_id = "585308692855" elif MODE == 2: while True: link_id = input(time_now() + " | 输入link_id:") isMatch = re.match("^\d{10,20}$", link_id) if isMatch: break elif MODE == 3: sql = """ SELECT link_id,updateTime,server,operator FROM prices_tb_fix WHERE fromStore='%s' and isComplete=0 ORDER BY flag LIMIT 1 """ % (self.fromStore) res = mysql.get_data(sql=sql, db=self.db_test) if res: self.target_server = self.server[res[0][2]] link_id = res[0][0] updateTime = res[0][1] server_name = res[0][2] self.sn = server_name self.operator = res[0][3] else: return 1 logger.info(link_id) page = await self.browser.newPage() await page.setViewport({'width': 1600, 'height': 900}) net_check() await page.goto("https://item.taobao.com/item.htm?id=" + link_id, timeout=0) await asyncio.sleep(3) error_page = await page.J(".error-notice-hd") # 判断宝贝是否正常在售 offline = await page.J("#J_detail_offline") # 判断宝贝是否正常在售 if error_page or offline: logger.info("商品已下架") mysql.update_data(t="prices_tb", set={ "flag": "XiaJia", "typeabbrev": self.fromStore }, c={"link_id": link_id}, db=self.target_server) # mysql.update_data(t="tb_order_detail_spider", # set={"link_id": link_id + "xiajia"}, # c={"link_id": link_id}, # db=self.target_server) mysql.update_data(db=self.db_test, t="prices_tb_fix", set={ "isComplete": "2", "updateTime": time_now() }, c={ "link_id": link_id, "server": server_name }) await page.close() return else: while True: content = await page.content() # print(content) doc = pq(content) self.common['rates'] = doc.find("#J_RateCounter").text() self.common['sales'] = doc.find("#J_SellCounter").text() self.common['freight'] = doc.find("#J_WlServiceTitle").text() mat1 = re.match("\d+", self.common['sales']) mat2 = re.match("\d+", self.common['rates']) if mat1 and mat2: break res = re.findall('";(.*?);".*?e":"(\d+\.\d+).*?d":"(\d+)"', content) # 判断是否存在多属性 if res: control = 1 benefit_price = 0 for r in res: data_values = r[0].split(";") prop = [] for data in data_values: prop.append( doc.find("li[data-value='" + data + "'] span").text()) if control: for data in data_values: try: await page.click('li[data-value="' + data + '"]') except errors.PageError: pass content_p = await page.content() promo_price = re.findall( '<em id="J_PromoPriceNum".*?>(\d+\.?\d*)</em>', content_p) # 判断是否存在优惠 if len(promo_price) == 1: benefit_price = float(r[1]) - float(promo_price[0]) control = 0 self.prices[r[2]] = r[1] prop.reverse() self.prop[r[2]] = "-".join(prop) for r in res: if benefit_price: self.promo_price[r[2]] = round( float(r[1]) - benefit_price, 2) else: promo_price = re.findall( '<em id="J_PromoPriceNum".*?>(\d+.*\d*)</em>', content) # 判断是否存在优惠 if promo_price: self.promo_price[link_id] = promo_price[0] else: self.promo_price[link_id] = 0 # print(self.prices) # print(self.promo_price) await page.close() await self.page.type("input[name='queryItemId']", link_id) await self.page.setRequestInterception(True) self.page.on('request', self.intercept_request) self.page.on('response', self.intercept_response) await asyncio.sleep(1) net_check() await self.page.click(".filter-footer button:first-child") while True: await asyncio.sleep(1) if self.complete == 1: res = mysql.get_data(db=self.db_test, t="prices_tb_fix", c={ "link_id": link_id, "server": server_name }) if res: mysql.update_data(db=self.db_test, t="prices_tb_fix", set={ "isComplete": "1", "updateTime": time_now() }, c={ "link_id": link_id, "server": server_name }) break elif self.complete == 2: mysql.update_data(db=self.db_test, t="prices_tb_fix", set={"spe_link": "1"}, c={ "link_id": link_id, "server": server_name }) break
async def login(self, page=None, **kwargs): # shutil.rmtree(CHROME_PROFILE_PATH, True) if not page: page = await self.get_new_page() while 1: try: net_check() await page.goto(login_url) except errors.PageError: pass except errors.TimeoutError: pass else: break while True: try: await page.waitForSelector(".forget-pwd.J_Quick2Static", visible=True, timeout=10000) await page.click(".forget-pwd.J_Quick2Static") except errors.TimeoutError: pass except errors.ElementHandleError: await page.reload() continue finally: try: await page.type('#TPL_username_1', kwargs['username'], {'delay': self.input_time_random() - 50}) await page.type('#TPL_password_1', kwargs['password'], {'delay': self.input_time_random()}) except errors.ElementHandleError: await page.reload() else: break net_check() # 检测页面是否有滑块。原理是检测页面元素。 slider = await page.Jeval('#nocaptcha', 'node => node.style') # 是否有滑块 if slider: print("出现滑块情况判定") await self.mouse_slide(p=page) await page.click("#J_SubmitStatic") # 调用page模拟点击登录按钮。 time.sleep(2) await self.get_cookie(page) else: await page.click("#J_SubmitStatic") try: await page.waitForSelector("#container", timeout=10000) except errors.TimeoutError: print("超时需要手机验证!") frames = page.frames try: await frames[1].waitForSelector("button#J_GetCode", timeout=10000) except errors.TimeoutError: pass else: print("需要要手机验证码") test_server['db'] = "test" id = random.randint(0, 100) mysql.insert_data(db=test_server, t="phone_verify", d={"id": id}) # frames = page.frames # await frames[1].click(".J_SendCodeBtn") verify_code = "0" while True: net_check() await frames[1].click("button#J_GetCode") for i in range(120): await asyncio.sleep(5) res = mysql.get_data( db=test_server, cn=["verify_code"], t="phone_verify", c={"id": id}, ) verify_code = res[0][0] if verify_code != "0": mysql.delete_data(db=test_server, t="phone_verify", c={"id": id}) break if verify_code != "0": break await frames[1].type("input#J_Phone_Checkcode", verify_code, {"delay": self.input_time_random() - 50}) # await frames[1].type(".J_SafeCode", a, {'delay': self.input_time_random() - 50}) net_check() await frames[1].click("input#submitBtn") # await frames[1].click("#J_FooterSubmitBtn") net_check() await page.goto("https://myseller.taobao.com/home.htm") await page.waitForSelector("#container", timeout=30000) return self.b, page, kwargs['fromStore']
async def parse(self, data): if data != "q": for i in range(len(data)): self.item = {} self.item = self.common.copy() self.item['stockid'] = re.search( "编码:(.*)", data[i]['itemDesc']['desc'][1]['text']).group(1).upper() self.item['link_id'] = data[i]['itemId'] self.item['attribute'] = "" self.item['flag'] = "update" self.item['typeabbrev'] = self.fromStore self.item['shop_id'] = self.shop_id(self.fromStore) self.item['SpiderDate'] = time_now() temp_des = data[i]['itemDesc']['desc'][0]['text'] self.item['description'] = temp_des.replace("(", "(").replace( ")", ")") self.item['price_tb'] = re.findall( "(\d+.?\d*)", data[i]["managerPrice"]['currentPrice'])[0] self.item['promotionprice'] = self.promo_price.get( self.item['link_id']) # print(self.promo_price) sql = "select spe_link from prices_tb_fix where link_id='%s' and server='%s'" % ( self.item['link_id'], self.sn) spe_link_id = mysql.get_data(db=self.db_test, sql=sql, return_one=True) isMut = re.search("^MUT\D*", self.item['stockid']) if isMut or spe_link_id: await self.page.setRequestInterception(True) self.page.on('request', self.intercept_request) self.page.on('response', self.intercept_response) await asyncio.sleep(1) net_check() await self.page.click( ".next-table-row td:nth-child(2) div.product-desc-hasImg span:nth-child(2) i" ) await asyncio.sleep(1) await self.page.keyboard.press('Escape') else: # print(self.item) if self.item['promotionprice'] is None: mail("price_tb_error", self.fromStore + ":" + self.item['link_id'], ["*****@*****.**"]) logger.error("error:" + self.fromStore + " : " + self.item['link_id'] + " and " + mysql.concat(self.promo_price, "=")) self.complete = 2 break condition = { "stockid": self.item['stockid'], "link_id": self.item['link_id'], "shop_id": self.item['shop_id'], } res = mysql.get_data(t="prices_tb", l=1, cn=["id"], c=condition, db=self.target_server) if res: self.item['ratio'] = round( float(self.item['price_tb']) / float(res[0][0]), 2) print(self.item) mysql.update_data(t="prices_tb", set=self.item, c=condition, db=self.target_server) else: insert_item = self.item.copy() insert_item["currabrev"] = "CNY" insert_item["price_erp"] = 0 insert_item["operator"] = self.operator insert_item["last_time"] = time_now() if self.operator == "爬虫维护": insert_item["flag"] = "create" else: insert_item['flag'] = "add" insert_item["ratio"] = 1 insert_item["package_number"] = 1 insert_item["Checker"] = "" insert_item["CheckDate"] = "0000-00-00 00:00:00" print(insert_item) with open( "reports/report_" + self.fromStore + "_insert.txt", "a") as file: file.writelines("物料编码:" + insert_item['stockid'] + " 与 商品ID:" + insert_item['link_id'] + " 为最新匹配,添加至ERP系统。\n" + self.item_url + insert_item['link_id'] + "\n" + self.item_erp_url + insert_item['link_id'] + "\n\n") mysql.insert_data(t="prices_tb", d=insert_item, db=self.target_server) result = mysql.get_data( t="prices_tb", cn=["*"], c={"link_id": self.item['link_id']}, db=self.target_server, dict_result=True) if len(result) > 1: for r in result: if r['stockid'] != self.item['stockid'] and r[ 'flag'] != "del": with open( "reports/report_" + self.fromStore + "_delete.txt", "a") as file: file.writelines("物料编码:" + r['stockid'] + " 与 商品ID:" + self.item['link_id'] + " 不匹配,已被爬虫从ERP系统中删除。\n" + self.item_url + self.item['link_id'] + "\n" + self.item_erp_url + self.item['link_id'] + "\n\n") mysql.update_data(t="prices_tb", c={"id": r['id']}, db=self.target_server, set={"flag": "del"}) self.complete = 1 else: pass self.complete = 1
async def link_spider(self, p, f): test_server["db"] = "test" while True: sql = """ SELECT a.id,url,goodsCode,a.orderNo FROM tb_order_detail_spider a JOIN tb_order_spider b ON a.`orderNo`=b.`orderNo` WHERE link_id="1" AND b.`fromStore`='%s' AND a.url IS NOT NULL ORDER BY b.createTime DESC LIMIT 1 """ % (f) url = "https://smf.taobao.com/promotionmonitor/orderPromotionQuery.htm?orderNo=" results = mysql.get_data(sql=sql, dict_result=True) if not results: break orderno = results[0]['orderNo'] url += orderno await p.goto(url) content = await p.content() data = re.findall(">(\{.*?\})<", content) order = json.loads(data[0]) try: sub_orders = order["data"]["subOrderViewDTOs"] except KeyError: continue for so in sub_orders: order_no = so["orderNoStr"] link_id = so["itemId"] sql = "select goodsCode from tb_order_detail_spider where url like '%%%s%%'" % ( order_no) print(sql) goodsCode = mysql.get_data(sql=sql, return_one=True) del sql sql = "update tb_order_detail_spider set link_id='%s' where url like '%%%s%%'" % ( link_id, order_no) mysql.update_data(sql=sql) del sql sql = """ SELECT SpiderDate FROM prices_tb WHERE link_id='%s' AND stockid='%s' AND flag NOT IN ('del','XiaJia') """ % (link_id, goodsCode) res = mysql.get_data(sql=sql) res_fix = mysql.get_data(db=test_server, dict_result=True, t='prices_tb_fix', c={ "link_id": link_id, "server": "production_server" }) if res: spider_date = res[0][0] days = 1 if spider_date != '0000-00-00 00:00:00': days = (datetime.datetime.now() - spider_date).days if spider_date == '0000-00-00 00:00:00' or days > 14: if not res_fix: mysql.insert_data(db=test_server, t="prices_tb_fix", d={ "link_id": link_id, "fromStore": f, "flag": 1 }) elif res_fix[0]["isComplete"] != 0: mysql.update_data(db=test_server, t="prices_tb_fix", set={ "isComplete": 0, "flag": 1 }, c={ "link_id": link_id, "server": "production_server" }) else: if not res_fix: mysql.insert_data(db=test_server, t="prices_tb_fix", d={ "link_id": link_id, "fromStore": f, "flag": 0 }) elif res_fix[0]["isComplete"] != 0: mysql.update_data(db=test_server, t="prices_tb_fix", set={ "flag": 0, "isComplete": 0 }, c={ "link_id": link_id, "server": "production_server" }) sleep(5) await p.close() await self.run_link_spider()
async def parse(self, mainOrders, pageNum): """解析爬取内容信息""" t = time_zone(["08:00", "18:00", "23:59"]) a = datetime.datetime.now() if a < t[0]: eoc = EARLIEST_ORDER_CREATETIME elif t[0] < a < t[1]: eoc = 2 else: eoc = 20 start_time = datetime.datetime.now() logger.info("开始第 " + str(pageNum) + " 页订单爬取") logger.info(store_trans(self.fromStore)) if pageNum == 1: self._loop_start_time = datetime.datetime.now() loop_control = 0 for i in range(len(mainOrders)): order = {} # 用于存储订单详细信息 order['orderNo'] = mainOrders[i]["id"] order['createTime'] = mainOrders[i]['orderInfo']['createTime'] order['buyerName'] = mainOrders[i]['buyer']['nick'] flag = mainOrders[i]['extra']['sellerFlag'] order['actualFee'] = mainOrders[i]['payInfo']['actualFee'] order['deliverFee'] = re.search( "\(含快递:¥(\d+\.\d+)\)", mainOrders[i]['payInfo']['postType']).group(1) order['datailURL'] = "https:" + mainOrders[i]['statusInfo'][ 'operations'][0]['url'] order['orderStatus'] = mainOrders[i]['statusInfo']['text'] order['fromStore'] = self.fromStore order['updateTime'] = time_now() if flag == 1: data_url = self.base_url + mainOrders[i]['operations'][0][ 'dataUrl'] order['sellerFlag'] = await self.get_flag_text(data_url) try: order['isPhoneOrder'] = mainOrders[i]['payInfo']['icons'][0][ 'linkTitle'] except KeyError: pass items = mainOrders[i]['subOrders'] line_no = 0 for j in range(len(items)): continue_code = 0 item = {} # 用于存储售出商品详细信息 item['orderNo'] = mainOrders[i]["id"] item['itemNo'] = line_no try: item['goodsCode'] = items[j]['itemInfo']['extra'][0][ 'value'] except KeyError: item['goodsCode'] = 'error' logger.error(time_now() + " 订单:" + item['orderNo']) item['tbName'] = items[j]['itemInfo']['title'].strip() \ .replace("±", "±").replace("Φ", "Φ").replace("Ω", "Ω") \ .replace("—", "—").replace("°", "°").replace("×", "×") \ .replace("μ", "μ").replace(" ", "").replace("(", "(").replace(")", ")") item['unitPrice'] = items[j]['priceInfo']['realTotal'] item['sellNum'] = items[j]['quantity'] item['orderStatus'] = order['orderStatus'] if self.orderno: logger.info(item['orderStatus']) item['refundStatus'] = None item['isRefund'] = 0 item['goodsAttribute'] = "" item['url'] = "https:" + items[j]['itemInfo']['itemUrl'] try: goodsAttributes = items[j]['itemInfo']['skuText'] except KeyError: pass else: temp = [] for k in range(len(goodsAttributes)): try: goodsAttributes[k]['name'] except KeyError: n = len(temp) temp[n - 1] += goodsAttributes[k]['value'].replace( "&Omega", "Ω").replace("·", "·") else: temp.append(goodsAttributes[k]['value'].replace( "&Omega", "Ω").replace("·", "·")) temp_ga = "-".join(temp) item['goodsAttribute'] = temp_ga.replace("(", "(").replace( ")", ")") try: operations = items[j]['operations'] except KeyError: pass else: for x in range(len(operations)): t = operations[x]['style'] if t in ['t12', 't16' ] and operations[x]['text'] != "退运保险": item['refundStatus'] = operations[x]['text'] item['isRefund'] = "1" elif t == 't0' and operations[x]['text'] == '已取消': continue_code = 1 delete_item = { 'orderNo': item['orderNo'], 'itemNo': item['itemNo'], 'goodsCode': item['goodsCode'] } is_exist = mysql.get_data( t="tb_order_detail_spider", l=1, c=delete_item) if is_exist: mysql.delete_data(t="tb_order_detail_spider", c=delete_item) sql = """ UPDATE tb_order_detail_spider SET itemNo=itemNo-1 WHERE OrderNo='%s' and itemNo>'%s' """ % (item['orderNo'], item['itemNo']) mysql.update_data(sql=sql) pass if continue_code: continue else: line_no += 1 self.save_in_sql(item=item, tableName='tb_order_detail_spider') self.save_in_sql(item=order, tableName='tb_order_spider') if self.orderno: logger.info("定向爬取订单完成") return date = datetime.date.today() date_limit = ( date - datetime.timedelta(eoc)).strftime("%Y-%m-%d %H:%M:%S") if order['createTime'] < date_limit: logger.info("完成本轮爬取,共翻 " + str(pageNum) + " 页。") loop_control = 1 break end_time = datetime.datetime.now() spend_time = end_time - start_time logger.info( str(spend_time.seconds) + " 秒完成第 " + str(pageNum) + " 页订单爬取") if loop_control: self._loop_end_time = datetime.datetime.now() loop_spend_time = round( (self._loop_end_time - self._loop_start_time).seconds / 60, 0) logger.info(str(loop_spend_time) + " 分钟完成本轮订单爬取") self.complete = 2 else: self.complete = 1
async def order_page(self, browser_in=None, page_in=None): """爬取订单详情""" while 1: result = mysql.get_data(t="tb_order_spider", cn=["datailURL", "orderNo"], c={ "isDetaildown": 0, "fromStore": self.fromStore }, o=["createTime"], om="d") if result: logger.info("订单详情爬取") for url in result: start_time = datetime.datetime.now() logger.info(store_trans(self.fromStore)) logger.info("开始订单 " + url[1] + " 详情爬取") order = {} await self._page_order_detail.bringToFront() # if browser_in: # page = await browser_in.newPage() # else: # page = page_in page = self._page_order_detail while 1: try: await page.goto(url[0]) except errors.PageError: sleep(5) except errors.TimeoutError: sleep(5) else: break try: await page.waitForSelector('#detail-panel', timeout=30000) except errors.TimeoutError: continue content = await page.content() a = re.search("var data = JSON.parse\('(.*)'\);", content).group(1) b = a.replace('\\\\\\"', '') data = b.replace('\\"', '"') m = json.loads(data) order['actualFee'] = m['mainOrder']['payInfo'][ 'actualFee']['value'] order['orderStatus'] = status_format( m['mainOrder']['statusInfo']['text']) if order['orderStatus'] == '等待买家付款': order['isDetaildown'] = 2 else: order['isDetaildown'] = 1 coupon = 0 for k, v in m['mainOrder']['payInfo'].items(): if k == 'promotions': promotions = m['mainOrder']['payInfo'][ 'promotions'] for i in range(len(promotions)): if 'prefix' and 'suffix' in promotions[i]: coupon_temp = re.search( "(\d+\.\d+)", promotions[i]['value']) if coupon_temp: coupon += float(coupon_temp.group(1)) order['couponPrice'] = round(coupon, 2) for k, v in m.items(): if k == 'buyMessage': order['buyerComments'] = v orderNo = m['mainOrder']['id'] order_info = m['mainOrder']['orderInfo']['lines'][1][ 'content'] for i in range(len(order_info)): if order_info[i]['value']['name'] == '支付宝交易号:': try: order['tradeNo'] = order_info[i]['value'][ 'value'] except KeyError: order['tradeNo'] = None # elif order_info[i]['value']['name'] == '创建时间:': # order['createTime'] = order_info[i]['value']['value'] # elif order_info[i]['value']['name'] == '发货时间:': # order['shipTime'] = order_info[i]['value']['value'] elif order_info[i]['value']['name'] == '付款时间:': order['payTime'] = order_info[i]['value']['value'] ship_info = m['tabs'] for i in range(len(ship_info)): if ship_info[i]['id'] == 'logistics': temp = ship_info[i]['content'] for k, v in temp.items(): if k == 'logisticsName': order['shippingCompany'] = v elif k == 'shipType': order['shippingMethod'] = v elif k == 'logisticsNum': order['shippingNo'] = v # elif k == 'logisticsUrl': # order['shipUrl'] = "https" + v elif k == 'address': rec_info = v order['receiverName'] = rec_info.split( ",")[0].replace(" ", "") order['receiverPhone'] = rec_info.split( ",")[1] order['receiverAddress'] = "".join( rec_info.split(",")[2:]) sub_orders = m['mainOrder']['subOrders'] # print(len(sub_orders)) for i in range(len(sub_orders)): item = {} temp = 0 itemNo = i if sub_orders[i]['promotionInfo']: for j in sub_orders[i]['promotionInfo']: for x in j['content']: for k, v in x.items(): if k == 'value': p_list = re.findall( "-?\d+\.\d+", v) if p_list: temp += float(p_list.pop()) item['unitBenefits'] = temp mysql.update_data(t="tb_order_detail_spider", set=item, c={ 'orderNo': orderNo, 'itemNo': itemNo }) logger.info("详细订单状态更新成功") # print(item) # print(order) mysql.update_data(t="tb_order_spider", set=order, c={'orderNo': orderNo}) logger.info("订单状态更新成功") # if browser_in: # await page.close() await self.page.bringToFront() Verify() end_time = datetime.datetime.now() spend_time = end_time - start_time logger.info( str(spend_time.seconds) + " 秒完成订单 " + url[1] + " 详情爬取") while True: s = random.random() if s > 0.3: logger.info("休息 " + str(int(s * n_o_time)) + " 秒完开始下一单详情爬取") for i in range(int(s * n_o_time)): await asyncio.sleep(1) break else: logger.info("没有可以爬取的详情") break
def report_mail(self): d = time_zone(["18:05", "18:05"]) d1, d2 = d[0], d[1] d = (d1 - datetime.timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") sql = "SELECT shop_id,flag,COUNT(flag),lookup FROM update_reports " \ "WHERE last_time < '%s' AND last_time > '%s' " \ "GROUP BY Flag,shop_id" % (d1, d) sql2 = "SELECT * FROM update_reports WHERE last_time < '%s' AND last_time > '%s' " % ( d1, d) res = mysql.get_data(sql=sql, db=self.db_test) con, c, cd = mysql.connection(self.db_test) df = pd.read_sql(sql2, con) con.close() date = date_now_str() df.to_csv("./reports/reports" + date + ".csv") out_list = [] out_list.append("今日爬虫维护 开源店 价格 :<br>") for r in res: # print(r) if r[0] == '115443253': if r[1] == 'create': string = '创建了 ' + str(r[2]) + ' 条数据。<br>' out_list.append(string) elif r[1] == 'update': string = '更新了 ' + str(r[2]) + ' 条数据。<br>' out_list.append(string) elif r[1] == 'lookup': string = '查看了 ' + str(r[3]) + ' 条数据。<br>' out_list.append(string) out_list.append("今日爬虫维护 玉佳企业店 价格:<br>") for r in res: # print(r) if r[0] == '197444037': if r[1] == 'create': string = '创建了 ' + str(r[2]) + ' 条数据。<br>' out_list.append(string) elif r[1] == 'update': string = '更新了 ' + str(r[2]) + ' 条数据。<br>' out_list.append(string) elif r[1] == 'lookup': string = '查看了 ' + str(r[3]) + ' 条数据。<br>' out_list.append(string) out_list.append("今日爬虫维护 赛宝电子店 价格:<br>") for r in res: # print(r) if r[0] == '34933991': if r[1] == 'create': string = '创建了 ' + str(r[2]) + ' 条数据。<br>' out_list.append(string) elif r[1] == 'update': string = '更新了 ' + str(r[2]) + ' 条数据。<br>' out_list.append(string) elif r[1] == 'lookup': string = '查看了 ' + str(r[3]) + ' 条数据。<br>' out_list.append(string) out_list.append("今日爬虫维护 玉佳电子店 价格:<br>") for r in res: # print(r) if r[0] == '68559944': if r[1] == 'create': string = '创建了 ' + str(r[2]) + ' 条数据。<br>' out_list.append(string) elif r[1] == 'update': string = '更新了 ' + str(r[2]) + ' 条数据。<br>' out_list.append(string) elif r[1] == 'lookup': string = '查看了 ' + str(r[3]) + ' 条数据。<br>' out_list.append(string) # print("".join(out_list)) mail_reports("爬虫更新erp价格报告", "".join(out_list), date, *["*****@*****.**", '*****@*****.**']) # dt = (d1 - datetime.timedelta(days=3)).strftime("%Y-%m-%d %H:%M:%S") print(dt) sql = "delete from update_reports where last_time<'%s'" % (dt) mysql.delete_data(sql=sql, db=self.db_test) mysql.update_data(t="update_reports", set={"loopup": 0}, c={"link_id": "count"})
async def parse_2(self, data): verify = [] repeat_list = [] for i in data['skuOuterIdTable']['dataSource']: self.item['stockid'] = i['skuOuterId'] logger.info(self.item['stockid']) if not self.item['stockid']: continue else: if self.item['stockid'] not in verify: verify.append(self.item['stockid']) else: if self.item['stockid'] not in repeat_list: repeat_list.append(self.item['stockid']) skuId = str(i['skuId']) temp_attr = self.prop.get(skuId) self.item['attribute'] = temp_attr.replace("(", "(").replace(")", ")") if not self.item['attribute']: self.item.pop('attribute') self.item['price_tb'] = self.prices.get(skuId) if self.promo_price: self.item["promotionprice"] = self.promo_price.get(skuId) else: self.item["promotionprice"] = 0 condition = { "stockid": self.item['stockid'], "link_id": self.item['link_id'], "shop_id": self.item['shop_id'], } res = mysql.get_data(t="prices_tb", l=1, cn=["price_tb"], c=condition, db=self.target_server) if res: if res[0][0] == 0: self.item['ratio'] = 1 else: self.item['ratio'] = round( float(self.item['price_tb']) / float(res[0][0]), 2) print(self.item) mysql.update_data(t="prices_tb", set=self.item, c=condition, db=self.target_server) else: insert_item = self.item.copy() insert_item["currabrev"] = "CNY" insert_item["price_erp"] = 0 insert_item["operator"] = self.operator insert_item["last_time"] = time_now() if self.operator == "爬虫维护": insert_item["flag"] = "create" else: insert_item['flag'] = "add" insert_item["ratio"] = 1 insert_item["package_number"] = 1 insert_item["Checker"] = "" insert_item["CheckDate"] = "0000-00-00 00:00:00" print(insert_item) with open("reports/report_" + self.fromStore + "_insert.txt", "a") as file: file.write("物料编码:" + insert_item['stockid'] + " 与商品ID:" + insert_item['link_id'] + " 为最新匹配,添加至ERP系统。\n" + self.item_url + insert_item['link_id'] + "\n" + self.item_erp_url + insert_item['link_id'] + "\n\n") mysql.insert_data(t="prices_tb", d=insert_item, db=self.target_server) if repeat_list: with open("reports/report_" + self.fromStore + "_repeat.txt", "a") as file: file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" + self.item['link_id'] + " 重复编码\n" + "重复编码:" + ",".join(repeat_list) + "\n" + self.item_url + self.item['link_id'] + "\n\n") if not verify: with open("reports/report_" + self.fromStore + "_empty.txt", "a") as file: file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" + self.item['link_id'] + " 空编码\n" + self.item_url + self.item['link_id'] + "\n\n") sql = """ select id,stockid from prices_tb where link_id='%s' and flag not in('del','XiaJia') """ % (self.item['link_id']) res_verify = mysql.get_data(sql=sql, db=self.target_server) for rv in res_verify: if rv[1] not in verify: with open("reports/report_" + self.fromStore + "_delete.txt", "a") as file: file.write("物料编码:" + rv[1] + " 与 商品ID:" + self.item['link_id'] + " 不匹配,已被爬虫从ERP系统中删除。\n" + self.item_url + self.item['link_id'] + "\n" + self.item_erp_url + self.item['link_id'] + "\n\n") mysql.update_data(t="prices_tb", c={"id": rv[0]}, db=self.target_server, set={ "flag": "del", "operator": self.operator, "last_time": time_now() }) self.complete = 1