def special_brand_loading(self): ex_brand_dict = tool.get_exchange_brand_pair() special_brand_list = [] special_brand_dict = {} b_id_dict = {} with open(self.special_brand_file, "r", encoding="utf-8") as f1: for line in f1: line = line.strip() if line == "": continue # brand_id, brand_name, cat1_id, cat1, gmv lst1 = line.split("\t") if len(lst1) != 2: continue lst1 = [tmp.strip() for tmp in lst1] b_id, b_name = lst1 if b_id in b_id_dict: continue b_id_dict[b_id] = '' if b_name in ex_brand_dict: b_name = ex_brand_dict[b_name] b_name = tool.brand_clean(b_name.lower()) ext_band_name = self.english_brand_extension(b_name) special_brand_list.append(ext_band_name) special_brand_dict[b_id] = b_name return special_brand_dict, special_brand_list
def brand_recall_local_file(self): try: ex_brand_dict = self.brand_exchange_dict r_lst = [] with open(self.in_file) as f1: for line in f1: line = line.strip() if line == "": continue lst1 = line.split("\t") if len(lst1) != 5: continue lst1 = [tmp.strip() for tmp in lst1] b_id, ori_b_name, cat1_name, cat2_name, cat3_name = lst1 if ori_b_name in self.brand_del_dict: continue if b_id in self.del_brandID_dict: continue if ori_b_name in ex_brand_dict: b_name = ex_brand_dict[ori_b_name] else: b_name = ori_b_name b_name = tool.brand_clean(b_name) ext_band_name = self.english_brand_extension(b_name) r_lst.append("\t".join([ b_id, ext_band_name, ori_b_name, cat1_name, cat2_name, cat3_name ])) with open(self.out_file, "w") as f1: f1.write("\n".join(r_lst)) f1.flush() except Exception as e: raise e
def brand_recall(self, output_file="pdd_xjd_brand_recall_info.txt"): ex_brand_dict = tool.get_exchange_brand_pair() special_brand_list = [] mijia_lst = [] with open(self.standard_brand_file, "r", encoding="utf-8") as f1: for line in f1: line = line.strip() if line == "": continue # brand_id, brand_name, cat2_id, cat2, gmv lst1 = line.split("\t") if len(lst1) != 5: continue lst1 = [tmp.strip() for tmp in lst1] b_id, b_name, cat2_id, cat2, gmv = lst1 if len(b_name) == 1: continue if self.del_brand_id(b_id): continue ori_b_name = b_name if b_name in ex_brand_dict: b_name = ex_brand_dict[b_name] b_name = tool.brand_clean(b_name.lower()) ext_band_name = self.english_brand_extension(b_name) special_brand_list.append("\t".join( [b_id, ori_b_name, ext_band_name, cat2_id, cat2, gmv])) s9 = self.mijia_special_brand_recall(b_id, cat2_id, cat2) if s9 != "": mijia_lst.append(s9) r_lst = special_brand_list + mijia_lst with open(output_file, "w", encoding="utf-8") as f1: f1.write("\n".join(r_lst)) f1.flush()
def special_brand_dealing(self, lst1, ex_brand_dict): b_id, b_name = lst1 if b_name in ex_brand_dict: b_name = ex_brand_dict[b_name] b_name = tool.brand_clean(b_name.lower()) ext_band_name = self.english_brand_extension(b_name) return ext_band_name
def _brand_ext(self, ori_b_name): if ori_b_name in self.brand_exchange_dict: b_name = self.brand_exchange_dict[ori_b_name] else: b_name = ori_b_name b_name = b_name.strip() if b_name == "": return "" b_name = tool.brand_clean(b_name) ext_band_name = self.english_brand_extension(b_name) return ext_band_name
def brand_recall(self, output_file="brand_recall_info.txt"): try: ex_brand_dict = tool.get_exchange_brand_pair() special_brand_list = self.special_brand_loading() recall_brand_dict = {} mijia_lst = [] idx = 0 with open(self.standard_brand_file, "r", encoding="utf-8") as f1: for line in f1: line = line.strip() if line == "": continue lst1 = line.split("\t") if len(lst1) != 5: continue lst1 = [tmp.strip() for tmp in lst1] b_id, ori_b_name, cat1_id, cat1, gmv = lst1 mijia_str = self.mijia_special_brand_recall( b_id, cat1_id, cat1) if mijia_str != "": mijia_lst.append(mijia_str) if ori_b_name in ex_brand_dict: b_name = ex_brand_dict[ori_b_name] else: b_name = ori_b_name b_name = tool.brand_clean(b_name) ext_band_name = self.english_brand_extension(b_name) for s_name in special_brand_list: s_name_list = s_name.strip().split("/") for s_name_item in s_name_list: idx += 1 if idx % 1000000 == 0: print("idx: %s" % idx) if len(ext_band_name.lower().split( s_name_item)) > 1: # 单个“后”字召回的品牌错误率很高 if s_name_item == "后": continue k = "\t".join([ b_id, ori_b_name, ext_band_name, cat1_id, cat1, gmv ]) recall_brand_dict[k] = '' r_lst = list(recall_brand_dict.keys()) + mijia_lst with open(output_file, "w", encoding="utf-8") as f1: f1.write("\n".join(r_lst)) f1.flush() except Exception as e: raise e
def brand_recall(self, output_file="brand_recall_info_tmp_.txt"): try: ex_brand_dict = tool.get_exchange_brand_pair() special_brand_list = self.special_brand_list recall_brand_dict = {} mijia_lst = [] pr_list = [] luolamima_list = [] boshikou_list = [] idx = 0 with open(self.standard_brand_file, "r", encoding="utf-8") as f1: for line in f1: line = line.strip() if line == "": continue lst1 = line.split("\t") if len(lst1) != 5: continue lst1 = [tmp.strip() for tmp in lst1] b_id, ori_b_name, cat1_id, cat1, gmv = lst1 if self.del_brand_func(b_id): continue # 米家扩展 mijia_str = self.mijia_special_brand_recall( b_id, cat1_id, cat1) if mijia_str != "": mijia_lst.append(mijia_str) # 红米扩展 redmi_str = self.redmi_special_brand_recall( b_id, cat1_id, cat1) if redmi_str != "": mijia_lst.append(redmi_str) if ori_b_name in ex_brand_dict: b_name = ex_brand_dict[ori_b_name] else: b_name = ori_b_name b_name = tool.brand_clean(b_name) ext_band_name = self.english_brand_extension(b_name) for s_name in special_brand_list: s_name_list = s_name.strip().split("/") for s_name_item in s_name_list: s_name_item = s_name_item.strip() if s_name_item == "": continue idx += 1 if idx % 1000000 == 0: print("idx: %s" % idx) ext_band_name = ext_band_name.lower() if len(ext_band_name.split(s_name_item)) > 1: # 单个“后”字召回的品牌错误率很高 if s_name_item == "后": continue if b_id == '10414486' and cat1_id == '100010': ok = 1 ''' 99714433 蓝月亮(纸品) 100012 家庭清洁 / 纸品 1685455.83320000 ''' if b_id == '99714433': continue if not self.brand_cat1_relation_del( b_id, cat1_id): k = "\t".join([ b_id, ori_b_name, ext_band_name, cat1_id, cat1, gmv ]) recall_brand_dict[k] = '' # 强制添加二手商品的一级类目 if not self.brand_cat1_relation_del( b_id, "100040"): k_tmp = "\t".join([ b_id, ori_b_name, ext_band_name, "100040", "二手商品", '0.0' ]) recall_brand_dict[k_tmp] = '' # 硬添加【米家】添加一级类目【家用电器】 mijia_lst += [ "\t".join([ "10698337", "MJ/米家", self.english_brand_extension("MJ/米家/小米米家"), "100031", "家用电器", "0.0" ]) ] mijia_lst += [ "\t".join([ "10698337", "MJ/米家", self.english_brand_extension("MJ/米家/小米米家"), "100034", "家装建材", "0.0" ]) ] # 硬添加【PLUS RAPIDE】添加一级类目【服饰内衣,运动户外,鞋靴】 pr_list += [ "\t".join([ "7830374549", "PLUS RAPIDE", self.english_brand_extension( "PLUS RAPIDE/GXG旗下品牌PR/GXG旗下PR/GXG旗下男装PR"), "100006", "服饰内衣", "0.0" ]) ] pr_list += [ "\t".join([ "7830374549", "PLUS RAPIDE", self.english_brand_extension( "PLUS RAPIDE/GXG旗下品牌PR/GXG旗下PR/GXG旗下男装PR"), "100008", "运动户外", "0.0" ]) ] pr_list += [ "\t".join([ "7830374549", "PLUS RAPIDE", self.english_brand_extension( "PLUS RAPIDE/GXG旗下品牌PR/GXG旗下PR/GXG旗下男装PR"), "100001", "鞋靴", "0.0" ]) ] # 硬添加【罗拉密码】添加一级类目【服饰内衣、鞋靴】 luolamima_list += [ "\t".join([ "99714457", "LOORA PWD/罗拉密码", self.english_brand_extension("LOORA PWD/罗拉密码"), "100006", "服饰内衣", "0.0" ]) ] luolamima_list += [ "\t".join([ "99714457", "LOORA PWD/罗拉密码", self.english_brand_extension("LOORA PWD/罗拉密码"), "100001", "鞋靴", "0.0" ]) ] # 硬添加【ROYALAPOTHIC/泊诗蔻】添加一级类目【美妆护肤】 boshikou_list += [ "\t".join([ "11015914", "ROYALAPOTHIC/泊诗蔻", self.english_brand_extension("ROYALAPOTHIC/泊诗蔻"), "100007", "美妆护肤", "0.0" ]) ] # 品牌丢失一级类目的情况 miss_lst = self.brand_miss_cat1() r_lst = list( recall_brand_dict.keys() ) + mijia_lst + miss_lst + pr_list + luolamima_list + boshikou_list with open(output_file, "w", encoding="utf-8") as f1: f1.write("\n".join(r_lst)) f1.flush() except Exception as e: raise e