Пример #1
0
def web_import_excel_prod_list(request):
    """
    通过bootstrap fileinput 上传文件
    """
    result = {"data": []}

    try:
        response = HttpResponse()
        response['Content-Type'] = "text/javascript"
        ret = -1
        brand_id = request.data.get('brand_id')
        source_id = request.data.get('source_id', "0")
        if not brand_id:
            return JsonResponse(result)
        file = request.FILES.get("file_data", None)
        print(settings.MEDIA_ROOT)
        if file:
            fullname = os.path.join(settings.MEDIA_ROOT, timefilename(file.name))
            print(fullname)
            destination = open(fullname, 'wb+')  # 打开特定的文件进行二进制的写操作
            for chunk in file.chunks():  # 分块写入文件
                destination.write(chunk)
            destination.close()

            # 写入这个xls文件
            import_prod_list.delay(fullname, file.name, brand_id, source_id)
        return JsonResponse(result)
    except Exception as e:
        traceback.print_exc()
        return JsonResponse(result)
Пример #2
0
def compare_zg_xs(request):
    """
    1)上传专柜的xlx(要求用户对该xls做一些基本描述,哪个sheet的哪个字段包含款号
    2)获得商品的列表(要求用户给出关键字,这样才能导出xls,放入临时目录)
    3)获得我们已经爬取的天猫商品列表
    4)读取两个表格,找出不同的:1)下线的 2)价格改动的 3)没有的(准新增)
    5)给出一个xls:
        sheet 1~n: 专柜
        sheet n+1: 我们线上商品
        sheet n+2:  我们天猫数据
        sheet n+3:  4)的分析结果
    :return:
    """

    """
    2019-10-14日:读取源,下载在线,读取爬取的,合并为一个excel
    
    
    """
    result = {}

    try:
        response = HttpResponse()
        response['Content-Type'] = "text/javascript"
        ret = -1
        brand_id = request.data.get('zgCompare_brand_id')
        zx_kw = request.data.get('zgCompare_xs_kw')
        zg_cols = request.data.get('zgCompare_src')
        print(brand_id, zg_cols, zx_kw)
        if not brand_id:
            return JsonResponse({"status": False, "message": "没有指定品牌编号"})
        if not zx_kw:
            return JsonResponse({"status": False, "message": "没有指定查询线上商品的关键字"})
        if not zg_cols:
            return JsonResponse({"status": False, "message": "没有指定专柜sheet中列的分布情况"})
        cols = zg_cols.split(":")
        print(cols)

        file = request.FILES.get("file_data")
        with NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
            for chunk in file.chunks():
                tmp.write(chunk)

        wb = pull_products_simple(tmp.name, brand_id,
                                  zx_kw, {"code_col": cols[0], "price_col": cols[1], "online_col": cols[2],
                                          "url_col": cols[3]})
        filename = timefilename("temp.xlsx")
        fullname = os.path.join(settings.MEDIA_ROOT, filename)
        wb.save(fullname)
        # filename = request.GET.get('filename', 'temp') + ".xlsx"
        # with NamedTemporaryFile() as tmp:
        #     print(tmp.name)
        #     wb.save(tmp.name)
        #     tmp.seek(0)
        #     stream = tmp.read()
        #     response = HttpResponse(content=stream)
        #     response['Content-Type'] = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        #     response['Content-Disposition'] = 'attachment;filename="{0}"'.format(filename)
        #     return response
        return Response({"status": True, "filename": filename}, status=status.HTTP_200_OK)
    except Exception as e:
        traceback.print_exc()
        return Response({"status": False, "message": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
def crawl_thread(filename, rule_name, xls_ver, items):
    try:
        rule = shop_rules.get(rule_name)
        if not rule:
            rule = shop_rules.get('tmall')

        hostname = "https://" + filename + ".tmall.com"
        print(hostname)
        brand_id = 0
        # 对品牌进行查找,如果能查到就进行插入新数据操作
        cnt = SCBrand.objects.filter(
            Q(brand_name__iexact=filename)
            | Q(tmall_url__istartswith=hostname)).count()
        print(cnt)
        if cnt == 1:
            brand = SCBrand.objects.get(
                Q(brand_name__iexact=filename)
                | Q(tmall_url__istartswith=hostname))
            brand_id = brand.brand_id
            brand.lastcrawl_time = datetime.now()
            brand.save()

        # 对SCBrand表进行操作
        row0 = ["链接", "货号", "标题", "副标题", "颜色", "尺寸", ""]
        new_rows = []
        file_name = os.path.join(settings.MEDIA_ROOT,
                                 timefilename(filename, 1))
        if xls_ver == '0':
            idx = 1
            f = xlwt.Workbook()
            sheet1 = f.add_sheet('商品列表', cell_overwrite_ok=True)

            # 写第一行
            for i in range(0, len(row0)):
                sheet1.write(0, i, row0[i])

            for item in items:
                crawl_one_simple(sheet1, rule, idx, item, new_rows, brand_id)
                idx = idx + 1

            f.save(file_name + '.xls')
        else:
            f = Workbook()
            sheet1 = f.active
            sheet1.title = "商品列表"
            sheet1.sheet_properties.tabColor = "1072BA"

            # 写第一行
            for i in range(1, len(row0) + 1):
                sheet1.cell(row=1, column=i, value=row0[i - 1])

            idx = 1
            driver = webdriver.Chrome()
            for item in items:
                idx = idx + 1
                crawl_one_simple_xlsx(sheet1, rule, idx, item, new_rows,
                                      brand_id, driver)

            f.save(file_name + '.xlsx')
            driver.quit()

        # 只有在找到brand_id时才会直接插入数据库,否则会造成很多商品没有和正确的品牌对应起来
        if len(new_rows) > 0 and brand_id > 0:
            CrawlProduct.objects.bulk_create(new_rows)

    except Exception as e:
        traceback.print_exc()
Пример #4
0
                    #print(link[0], one.attrib.get('data-itemid'), title, color,subtitle)

            else:
                break

        # f.save(file_name + '.xlsx')

        return idx
    except Exception as e:
        traceback.print_exc()


if __name__ == '__main__':
    row0 = ["链接", "货号", "标题", "副标题", "颜色"]
    new_rows = []
    file_name = timefilename("coccinelle")

    f = Workbook()
    sheet1 = f.active
    sheet1.title = "商品列表"
    sheet1.sheet_properties.tabColor = "1072BA"

    # 写第一行
    for i in range(1, len(row0) + 1):
        sheet1.cell(row=1, column=i, value=row0[i - 1])

    idx = 1
    idx = crawl_thread("", "https://www.coccinelle.com/zh/bags-all/", idx)
    # idx = crawl_thread("", "https://www.coccinelle.com/zh/小皮件-3/", idx)
    # idx = crawl_thread("", "https://www.coccinelle.com/zh/配饰-1/", idx)
    # idx = crawl_thread("", "https://www.coccinelle.com/zh/新款/category/皮包/", idx)