def web_import_excel_prod_list(request): """ 通过bootstrap fileinput 上传文件 """ result = {"data": []} try: response = HttpResponse() response['Content-Type'] = "text/javascript" ret = -1 brand_id = request.data.get('brand_id') source_id = request.data.get('source_id', "0") if not brand_id: return JsonResponse(result) file = request.FILES.get("file_data", None) print(settings.MEDIA_ROOT) if file: fullname = os.path.join(settings.MEDIA_ROOT, timefilename(file.name)) print(fullname) destination = open(fullname, 'wb+') # 打开特定的文件进行二进制的写操作 for chunk in file.chunks(): # 分块写入文件 destination.write(chunk) destination.close() # 写入这个xls文件 import_prod_list.delay(fullname, file.name, brand_id, source_id) return JsonResponse(result) except Exception as e: traceback.print_exc() return JsonResponse(result)
def compare_zg_xs(request): """ 1)上传专柜的xlx(要求用户对该xls做一些基本描述,哪个sheet的哪个字段包含款号 2)获得商品的列表(要求用户给出关键字,这样才能导出xls,放入临时目录) 3)获得我们已经爬取的天猫商品列表 4)读取两个表格,找出不同的:1)下线的 2)价格改动的 3)没有的(准新增) 5)给出一个xls: sheet 1~n: 专柜 sheet n+1: 我们线上商品 sheet n+2: 我们天猫数据 sheet n+3: 4)的分析结果 :return: """ """ 2019-10-14日:读取源,下载在线,读取爬取的,合并为一个excel """ result = {} try: response = HttpResponse() response['Content-Type'] = "text/javascript" ret = -1 brand_id = request.data.get('zgCompare_brand_id') zx_kw = request.data.get('zgCompare_xs_kw') zg_cols = request.data.get('zgCompare_src') print(brand_id, zg_cols, zx_kw) if not brand_id: return JsonResponse({"status": False, "message": "没有指定品牌编号"}) if not zx_kw: return JsonResponse({"status": False, "message": "没有指定查询线上商品的关键字"}) if not zg_cols: return JsonResponse({"status": False, "message": "没有指定专柜sheet中列的分布情况"}) cols = zg_cols.split(":") print(cols) file = request.FILES.get("file_data") with NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp: for chunk in file.chunks(): tmp.write(chunk) wb = pull_products_simple(tmp.name, brand_id, zx_kw, {"code_col": cols[0], "price_col": cols[1], "online_col": cols[2], "url_col": cols[3]}) filename = timefilename("temp.xlsx") fullname = os.path.join(settings.MEDIA_ROOT, filename) wb.save(fullname) # filename = request.GET.get('filename', 'temp') + ".xlsx" # with NamedTemporaryFile() as tmp: # print(tmp.name) # wb.save(tmp.name) # tmp.seek(0) # stream = tmp.read() # response = HttpResponse(content=stream) # response['Content-Type'] = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' # response['Content-Disposition'] = 'attachment;filename="{0}"'.format(filename) # return response return Response({"status": True, "filename": filename}, status=status.HTTP_200_OK) except Exception as e: traceback.print_exc() return Response({"status": False, "message": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
def crawl_thread(filename, rule_name, xls_ver, items): try: rule = shop_rules.get(rule_name) if not rule: rule = shop_rules.get('tmall') hostname = "https://" + filename + ".tmall.com" print(hostname) brand_id = 0 # 对品牌进行查找,如果能查到就进行插入新数据操作 cnt = SCBrand.objects.filter( Q(brand_name__iexact=filename) | Q(tmall_url__istartswith=hostname)).count() print(cnt) if cnt == 1: brand = SCBrand.objects.get( Q(brand_name__iexact=filename) | Q(tmall_url__istartswith=hostname)) brand_id = brand.brand_id brand.lastcrawl_time = datetime.now() brand.save() # 对SCBrand表进行操作 row0 = ["链接", "货号", "标题", "副标题", "颜色", "尺寸", ""] new_rows = [] file_name = os.path.join(settings.MEDIA_ROOT, timefilename(filename, 1)) if xls_ver == '0': idx = 1 f = xlwt.Workbook() sheet1 = f.add_sheet('商品列表', cell_overwrite_ok=True) # 写第一行 for i in range(0, len(row0)): sheet1.write(0, i, row0[i]) for item in items: crawl_one_simple(sheet1, rule, idx, item, new_rows, brand_id) idx = idx + 1 f.save(file_name + '.xls') else: f = Workbook() sheet1 = f.active sheet1.title = "商品列表" sheet1.sheet_properties.tabColor = "1072BA" # 写第一行 for i in range(1, len(row0) + 1): sheet1.cell(row=1, column=i, value=row0[i - 1]) idx = 1 driver = webdriver.Chrome() for item in items: idx = idx + 1 crawl_one_simple_xlsx(sheet1, rule, idx, item, new_rows, brand_id, driver) f.save(file_name + '.xlsx') driver.quit() # 只有在找到brand_id时才会直接插入数据库,否则会造成很多商品没有和正确的品牌对应起来 if len(new_rows) > 0 and brand_id > 0: CrawlProduct.objects.bulk_create(new_rows) except Exception as e: traceback.print_exc()
#print(link[0], one.attrib.get('data-itemid'), title, color,subtitle) else: break # f.save(file_name + '.xlsx') return idx except Exception as e: traceback.print_exc() if __name__ == '__main__': row0 = ["链接", "货号", "标题", "副标题", "颜色"] new_rows = [] file_name = timefilename("coccinelle") f = Workbook() sheet1 = f.active sheet1.title = "商品列表" sheet1.sheet_properties.tabColor = "1072BA" # 写第一行 for i in range(1, len(row0) + 1): sheet1.cell(row=1, column=i, value=row0[i - 1]) idx = 1 idx = crawl_thread("", "https://www.coccinelle.com/zh/bags-all/", idx) # idx = crawl_thread("", "https://www.coccinelle.com/zh/小皮件-3/", idx) # idx = crawl_thread("", "https://www.coccinelle.com/zh/配饰-1/", idx) # idx = crawl_thread("", "https://www.coccinelle.com/zh/新款/category/皮包/", idx)