Пример #1
0
def update_desc():
    db_params = DBParams()
    db_params.host = "172.16.8.147"
    db_params.port = "3306"
    db_params.user = "******"
    db_params.passwd = "123456"
    db_params.db = "asos"
    conn = get_param_conn(db_params)
    if conn is None:
        print("没有此数据库")
        return False
    cur = conn.cursor()
    pddao = ProductDescDao(conn, cur)

    pdescs = pddao.get_all_zh_desc()

    product = Product()
    for item in pdescs:
        product.spider_product_id = item[0]
        product.name = item[1]
        product.desc = do_trans(item[2])
        product.constitute = item[3]
        product.location = item[4]
        product.size_desc = do_trans(item[5])
        product.language_id = item[7]
        pddao.update_product_desc(product)
        print(product.spider_product_id)
Пример #2
0
    def grab_product(self, flag, url):
        product = Product()
        # 来源
        product.flag = flag
        # 源链接
        product.url = url
        # 状态正常
        product.status = "1"
        # 英文
        product.language_id = "2"
        # 品牌
        product.brand = "Ted Baker"

        pg = self.do_visit(url)
        # 解析商品信息
        if self.ana_product_info(product, pg):
            # 保存商品信息
            self.save_product(product)
            # 保存描述信息
            self.save_product_desc(product)
            pimg = ProductImages()
            pimg.spid = product.spider_product_id
            # 解析图片信息
            self.ana_product_images(pimg, pg)
            # 保存图片信息
            self.save_product_images(pimg)
            psku = ProductSku()
            # 解析并保存sku信息
            self.ana_and_save_product_sku(psku, product)
        else:
            self.log_info("".join([url, " product not saved!"]))
Пример #3
0
    def grab_product(self, flag, url):
        product = Product()
        # 来源
        product.flag = flag
        # 源链接
        product.url = url
        # 状态正常
        product.status = "1"
        # 英文
        product.language_id = "2"

        pg = self.do_visit(url)
        # 解析商品信息
        if self.ana_product_info(product, pg):
            # 保存商品信息
            self.save_product(product)
            # 保存描述信息
            self.save_product_desc(product)
            # 保存图片信息
            pimg = ProductImages()
            pimg.spid = product.spider_product_id
            if len(product.images) > 0:
                pimg.images = ",".join(product.images)
                self.save_product_images(pimg)
            else:
                # 记录没有图片的商品
                self.log_info("".join([str(pimg.spid), " no images!"]))
            psku = ProductSku()
            # 保存sku信息
            self.ana_and_save_product_sku(psku, product)
        else:
            self.log_info("".join([url, " product not saved!"]))
Пример #4
0
def translate_desc():
    # 初始化数据库连接
    db_params = DBParams()
    db_params.host = "172.16.8.149"
    db_params.port = "3306"
    db_params.user = "******"
    db_params.passwd = "123456"
    db_params.db = "test"
    conn = get_param_conn(db_params)
    if conn is None:
        print("没有此数据库")
        return False
    cur = conn.cursor()
    pddao = ProductDescDao(conn, cur)

    # 初始化Google翻译工具
    gg_translater = GGTranslater()

    # 获取需要翻译的商品描述
    need_trans_pds = pddao.get_en_desc_no_zh()
    for item in need_trans_pds:
        product = Product()
        product.spider_product_id = item[0]
        product.language_id = 1
        # 已经有中文翻译的,不再翻译
        if pddao.is_exists_product_desc(product.spider_product_id,
                                        product.language_id):
            print "".join([str(product.spider_product_id), " exists!"])
            continue
        # 翻译商品名
        pname = item[1]
        if pname and pname != "":
            product.name = gg_translater.en_to_zh(pname)
        # 翻译描述
        pdesc = item[2]
        if pdesc and pdesc != "":
            product.desc = gg_translater.en_to_zh(pdesc)
        # 翻译材质
        pconstitue = item[3]
        if pconstitue and pconstitue != "":
            product.constitute = gg_translater.en_to_zh(pconstitue)
        # 翻译尺码描述
        psizedesc = item[5]
        if psizedesc and psizedesc != "":
            product.size_desc = gg_translater.en_to_zh(psizedesc)
        # 翻译完成,存库
        pddao.save(product)
        print str(product.spider_product_id)
Пример #5
0
    def grab_product(self, flag, url):
        # 先爬取意大利站的信息
        iturl = url.replace("/cn/", "/it/")

        it_page = self.do_visit(iturl)
        product = Product()
        product.flag = flag
        # 源链接
        product.url = iturl
        # 状态设为在架
        product.status = "1"
        # 先爬取的意大利站信息,记录描述的语言为英语
        product.language_id = "2"
        # 解析商品信息
        if self.ana_product_info(product, it_page):
            # 保存商品信息
            self.save_product(product)
            # 保存描述信息
            self.save_product_desc(product)
            pimg = ProductImages()
            pimg.spid = product.spider_product_id
            # 解析保存图片信息
            self.ana_product_images(pimg, it_page)
            self.save_product_images(pimg)
            # 换成中国站,爬取中文描述
            cnurl = iturl.replace("/it/", "/cn/")
            cn_page = self.do_visit(cnurl)
            # 设置描述语言为中文
            product.language_id = "1"
            # 解析并保存商品描述
            self.ana_product_info(product, cn_page)
            self.save_product_desc(product)
            # 调用商品sku信息接口获取sku信息
            product_url = "".join([
                "https://www.farfetch.cn/it/product/GetDetailState?productId=",
                product.resource_code, "&designerId=0"
            ])
            skus = self.get_json(product_url)
            # 解析并保存sku信息
            self.ana_and_save_skus(product.spider_product_id, skus)
        else:
            # 未解析成功的记录爬取失败
            self.log_info("".join([url, " product not find!"]))
Пример #6
0
 def grab_product(self, flag, url):
     # 将连接替换成意大利站点(意大利站是欧元价)
     surl = url.replace("/cn/", "/it/")
     pg = self.do_visit(surl)
     product = Product()
     # 来源
     product.flag = flag
     # 源链接
     product.url = surl
     # 此站只有女士
     product.gender = "1"
     # 状态正常
     product.status = "1"
     # 中文
     product.language_id = "1"
     # 解析商品信息
     if self.ana_product_info(product, pg):
         # 保存商品信息
         self.save_product(product)
         # 保存商品描述
         self.save_product_desc(product)
         pimg = ProductImages()
         pimg.spid = product.spider_product_id
         self.ana_product_images(pimg, pg)
         self.save_product_images(pimg)
         psku = ProductSku()
         psku.spid = product.spider_product_id
         self.ana_and_save_product_sku(psku, pg)
         # 语言切换成英文
         en_pg = self.do_visit(surl.replace("/zh/", "/en/"))
         product.language_id = "2"
         # 保存英文版商品信息
         if self.ana_product_info(product, en_pg):
             self.save_product_desc(product)
     else:
         self.log_info("".join([surl, " product not saved!"]))
Пример #7
0
def upload_to_db(upc, name):
    price_history = []
    with open('chart.csv') as csv:
        curr_price = -1
        for line in csv:
            cols = line.split(',')
            if abs(float(cols[1]) - curr_price) > 0.15:
                curr_price = float(cols[1])
                date = datetime.strptime(cols[0], "%Y-%m-%d %H:%M:%S")
                price_history.append({'date': cols[0], 'price': curr_price})
    product = Product(upc=upc, name=name)
    try:
        session.add(product)
        session.commit()
    except SQLAlchemyError as e:
        print(e)

    for point in price_history:
        pph = ProductPriceHistory(item_upc=upc,
                                  price=point['price'],
                                  date=point['date'])
        session.add(pph)
    session.commit()
Пример #8
0
    def grab_product(self, flag, url):
        product = Product()
        # 来源
        product.flag = flag
        # 源链接
        product.url = url
        # 状态正常
        product.status = "1"
        # 英语
        product.language_id = "2"

        pg = self.do_visit(url)
        # 解析商品信息
        if self.ana_product_info(product, pg):
            # 保存商品信息
            self.save_product(product)
            # 保存描述信息
            self.save_product_desc(product)
            pimg = ProductImages()
            pimg.spid = product.spider_product_id
            img_arr = []
            if len(product.images) > 0:
                for img in product.images:
                    # -20后缀结束的图片不是需要的图片
                    if "-20." in img:
                        continue
                    img_arr.append(img)
                pimg.images = ",".join(img_arr)
                # 保存图片信息
                self.save_product_images(pimg)
            else:
                # 记录没有图片的商品
                self.log_info("".join([str(pimg.spid), " no images!"]))
            psku = ProductSku()
            # 解析并保存sku信息
            self.ana_and_save_product_sku(psku, product, pg)
        else:
            self.log_info("".join([url, " product not saved!"]))
Пример #9
0
def save_product(p_data, pdao, source_url, brand):
    product = Product()
    product.name = p_data['name']
    product.brand = brand
    product.gender = p_data['gender'].lower() == "women" and "1" or "2"
    categories = p_data['categories']
    if len(categories) > 0:
        cat = categories[len(categories) - 1]
        product.category = cat['friendlyName']
    else:
        product.category = ""
        print source_url
    product.status = "1"
    p_images = p_data['images']
    product.images = p_images
    product.color = p_images[0]['colourCode']
    product.code = p_data['productCode']
    product.resource_code = p_data['id']
    product.flag = "013"
    product.url = source_url
    exists_id = pdao.get_id_by_code(product.resource_code)
    if exists_id is not None:
        product.spider_product_id = exists_id
        # pdao.update_product_info(product)
        print "".join([str(exists_id), " is exists!"])
    else:
        product.spider_product_id = pdao.save(product)
    return product
Пример #10
0
def find_or_create_product(slug: str) -> Group:
    product = db.query(Product).filter(Product.slug == slug).first()
    if not product:
        product = Product(slug=slug)
    return product