def update_desc(): db_params = DBParams() db_params.host = "172.16.8.147" db_params.port = "3306" db_params.user = "******" db_params.passwd = "123456" db_params.db = "asos" conn = get_param_conn(db_params) if conn is None: print("没有此数据库") return False cur = conn.cursor() pddao = ProductDescDao(conn, cur) pdescs = pddao.get_all_zh_desc() product = Product() for item in pdescs: product.spider_product_id = item[0] product.name = item[1] product.desc = do_trans(item[2]) product.constitute = item[3] product.location = item[4] product.size_desc = do_trans(item[5]) product.language_id = item[7] pddao.update_product_desc(product) print(product.spider_product_id)
def grab_product(self, flag, url): product = Product() # 来源 product.flag = flag # 源链接 product.url = url # 状态正常 product.status = "1" # 英文 product.language_id = "2" # 品牌 product.brand = "Ted Baker" pg = self.do_visit(url) # 解析商品信息 if self.ana_product_info(product, pg): # 保存商品信息 self.save_product(product) # 保存描述信息 self.save_product_desc(product) pimg = ProductImages() pimg.spid = product.spider_product_id # 解析图片信息 self.ana_product_images(pimg, pg) # 保存图片信息 self.save_product_images(pimg) psku = ProductSku() # 解析并保存sku信息 self.ana_and_save_product_sku(psku, product) else: self.log_info("".join([url, " product not saved!"]))
def grab_product(self, flag, url): product = Product() # 来源 product.flag = flag # 源链接 product.url = url # 状态正常 product.status = "1" # 英文 product.language_id = "2" pg = self.do_visit(url) # 解析商品信息 if self.ana_product_info(product, pg): # 保存商品信息 self.save_product(product) # 保存描述信息 self.save_product_desc(product) # 保存图片信息 pimg = ProductImages() pimg.spid = product.spider_product_id if len(product.images) > 0: pimg.images = ",".join(product.images) self.save_product_images(pimg) else: # 记录没有图片的商品 self.log_info("".join([str(pimg.spid), " no images!"])) psku = ProductSku() # 保存sku信息 self.ana_and_save_product_sku(psku, product) else: self.log_info("".join([url, " product not saved!"]))
def translate_desc(): # 初始化数据库连接 db_params = DBParams() db_params.host = "172.16.8.149" db_params.port = "3306" db_params.user = "******" db_params.passwd = "123456" db_params.db = "test" conn = get_param_conn(db_params) if conn is None: print("没有此数据库") return False cur = conn.cursor() pddao = ProductDescDao(conn, cur) # 初始化Google翻译工具 gg_translater = GGTranslater() # 获取需要翻译的商品描述 need_trans_pds = pddao.get_en_desc_no_zh() for item in need_trans_pds: product = Product() product.spider_product_id = item[0] product.language_id = 1 # 已经有中文翻译的,不再翻译 if pddao.is_exists_product_desc(product.spider_product_id, product.language_id): print "".join([str(product.spider_product_id), " exists!"]) continue # 翻译商品名 pname = item[1] if pname and pname != "": product.name = gg_translater.en_to_zh(pname) # 翻译描述 pdesc = item[2] if pdesc and pdesc != "": product.desc = gg_translater.en_to_zh(pdesc) # 翻译材质 pconstitue = item[3] if pconstitue and pconstitue != "": product.constitute = gg_translater.en_to_zh(pconstitue) # 翻译尺码描述 psizedesc = item[5] if psizedesc and psizedesc != "": product.size_desc = gg_translater.en_to_zh(psizedesc) # 翻译完成,存库 pddao.save(product) print str(product.spider_product_id)
def grab_product(self, flag, url): # 先爬取意大利站的信息 iturl = url.replace("/cn/", "/it/") it_page = self.do_visit(iturl) product = Product() product.flag = flag # 源链接 product.url = iturl # 状态设为在架 product.status = "1" # 先爬取的意大利站信息,记录描述的语言为英语 product.language_id = "2" # 解析商品信息 if self.ana_product_info(product, it_page): # 保存商品信息 self.save_product(product) # 保存描述信息 self.save_product_desc(product) pimg = ProductImages() pimg.spid = product.spider_product_id # 解析保存图片信息 self.ana_product_images(pimg, it_page) self.save_product_images(pimg) # 换成中国站,爬取中文描述 cnurl = iturl.replace("/it/", "/cn/") cn_page = self.do_visit(cnurl) # 设置描述语言为中文 product.language_id = "1" # 解析并保存商品描述 self.ana_product_info(product, cn_page) self.save_product_desc(product) # 调用商品sku信息接口获取sku信息 product_url = "".join([ "https://www.farfetch.cn/it/product/GetDetailState?productId=", product.resource_code, "&designerId=0" ]) skus = self.get_json(product_url) # 解析并保存sku信息 self.ana_and_save_skus(product.spider_product_id, skus) else: # 未解析成功的记录爬取失败 self.log_info("".join([url, " product not find!"]))
def grab_product(self, flag, url): # 将连接替换成意大利站点(意大利站是欧元价) surl = url.replace("/cn/", "/it/") pg = self.do_visit(surl) product = Product() # 来源 product.flag = flag # 源链接 product.url = surl # 此站只有女士 product.gender = "1" # 状态正常 product.status = "1" # 中文 product.language_id = "1" # 解析商品信息 if self.ana_product_info(product, pg): # 保存商品信息 self.save_product(product) # 保存商品描述 self.save_product_desc(product) pimg = ProductImages() pimg.spid = product.spider_product_id self.ana_product_images(pimg, pg) self.save_product_images(pimg) psku = ProductSku() psku.spid = product.spider_product_id self.ana_and_save_product_sku(psku, pg) # 语言切换成英文 en_pg = self.do_visit(surl.replace("/zh/", "/en/")) product.language_id = "2" # 保存英文版商品信息 if self.ana_product_info(product, en_pg): self.save_product_desc(product) else: self.log_info("".join([surl, " product not saved!"]))
def upload_to_db(upc, name): price_history = [] with open('chart.csv') as csv: curr_price = -1 for line in csv: cols = line.split(',') if abs(float(cols[1]) - curr_price) > 0.15: curr_price = float(cols[1]) date = datetime.strptime(cols[0], "%Y-%m-%d %H:%M:%S") price_history.append({'date': cols[0], 'price': curr_price}) product = Product(upc=upc, name=name) try: session.add(product) session.commit() except SQLAlchemyError as e: print(e) for point in price_history: pph = ProductPriceHistory(item_upc=upc, price=point['price'], date=point['date']) session.add(pph) session.commit()
def grab_product(self, flag, url): product = Product() # 来源 product.flag = flag # 源链接 product.url = url # 状态正常 product.status = "1" # 英语 product.language_id = "2" pg = self.do_visit(url) # 解析商品信息 if self.ana_product_info(product, pg): # 保存商品信息 self.save_product(product) # 保存描述信息 self.save_product_desc(product) pimg = ProductImages() pimg.spid = product.spider_product_id img_arr = [] if len(product.images) > 0: for img in product.images: # -20后缀结束的图片不是需要的图片 if "-20." in img: continue img_arr.append(img) pimg.images = ",".join(img_arr) # 保存图片信息 self.save_product_images(pimg) else: # 记录没有图片的商品 self.log_info("".join([str(pimg.spid), " no images!"])) psku = ProductSku() # 解析并保存sku信息 self.ana_and_save_product_sku(psku, product, pg) else: self.log_info("".join([url, " product not saved!"]))
def save_product(p_data, pdao, source_url, brand): product = Product() product.name = p_data['name'] product.brand = brand product.gender = p_data['gender'].lower() == "women" and "1" or "2" categories = p_data['categories'] if len(categories) > 0: cat = categories[len(categories) - 1] product.category = cat['friendlyName'] else: product.category = "" print source_url product.status = "1" p_images = p_data['images'] product.images = p_images product.color = p_images[0]['colourCode'] product.code = p_data['productCode'] product.resource_code = p_data['id'] product.flag = "013" product.url = source_url exists_id = pdao.get_id_by_code(product.resource_code) if exists_id is not None: product.spider_product_id = exists_id # pdao.update_product_info(product) print "".join([str(exists_id), " is exists!"]) else: product.spider_product_id = pdao.save(product) return product
def find_or_create_product(slug: str) -> Group: product = db.query(Product).filter(Product.slug == slug).first() if not product: product = Product(slug=slug) return product