def __init__(self): # self.athena_dao = CrawlDao.CrawlDao('dataserver') self.athena_dao = CrawlDao.CrawlDao('athena_center') self.part_dao = CrawlDao.CrawlDao('modeldatas') # # goods_uuId:goods_data self.part_goods_data = dict()
def __init__(self): # self.athena_dao = CrawlDao.CrawlDao("athena_center", "local") # self.monkey_dao = CrawlDao.CrawlDao('modeldatas', "local") self.athena_dao = CrawlDao.CrawlDao("dataserver", "online_cong") self.monkey_dao = CrawlDao.CrawlDao('modeldatas', "online_cong") # 实际online car_id 和 pid self.true_car_id_dict = dict() # id:data self.true_car_dict = dict() # athena的cate self.athena_cate_dict = dict() # liyang_id:tqmall_car_id self.liyang_online_dict = dict() # athena_g_id:third_cate_id self.goods_cate_dict = dict() # 已经出现的 goods_car对应关系 self.athena_goods_car_list = list() # 不同的model 的相同pic 是不同的,因此存已出现的 pic_id self.has_show_pic_id_list = list() # === 冗余表服务=== self.online_car_id_set = set() self.car_cate_yu_dict = dict() self.has_append_list = list() # 断了后的最后的pic_id self.athena_pic_final_id = 52167 # ======对应的关系 self.athena_goods_id = 0 self.athena_pic_id = 0 self.athena_sub_id = 0 # 新增商品和uuid的对应关系:uuid:athena_goods_id self.goods_dui_dict = dict() # uuid:new_data{xx:xx} self.goods_dui_data_dict = dict() # 新增图片和uuid的对应关系:uuid:athena_pic_id self.pic_dui_dict = dict() self.pic_dui_data_dict = dict() # 新增附加表和uuid的对应关系:uuid:athena_subjoin_id self.subjoin_dui_dict = dict() self.subjoin_dui_data_dict = dict() # ======存储的sql self.goods_insert_data_list = list() self.pic_insert_data_list = list() self.subjoin_insert_data_list = list() self.car_relation_insert_data_list = list() self.car_fix_insert_data_list = list() self.car_cate_insert_data_list = list()
def __init__(self): self.athena_dao = CrawlDao.CrawlDao("athena", "local") self.monkey_dao = CrawlDao.CrawlDao('modeldatas', "local") self.fileDao = FileUtil.FileDao() self.insert_part_data_list = list() self.updata_part_data_list = list() self.monkey_updata_goods_list = list() # cat_name+paren_id : cat_id self.athena_cat_dict = dict() self.init_athena()
def __init__(self): self.dao = CrawlDao.CrawlDao() self.fileDao = FileUtil.FileDao() self.stringUtil = StringUtil.StringUtil() # p_id+code : id self.cate_id_dict = dict() # self.cate_name_dict = dict() cate_sql = 'SELECT cat_id,cat_name,cat_code,parent_id,vehicle_code,cat_level FROM db_category_dian WHERE cat_id > 2999 ' cate_array = self.dao.db.get_data(cate_sql) for cate_data in cate_array: cat_id = str(cate_data['cat_id']) cat_code = str(cate_data['cat_code']) parent_id = str(cate_data['parent_id']) cat_level = str(cate_data['cat_level']) vehicle_code = str(cate_data['vehicle_code']) key = str(parent_id) + "_" + str(cat_code) if cat_level == '3': key += "_" + vehicle_code self.cate_id_dict[key] = cat_id # self.cate_name_dict[cat_id] = str(cate_data['cat_name']) # g_id:old_cat_id self.goods_dict = dict() goods_sql = "select cat_id,goods_id from db_goods where seller_id = 1" goods_array = self.dao.db.get_data(goods_sql) for goods_data in goods_array: self.goods_dict[str(goods_data['goods_id'])] = str( goods_data['cat_id'])
def __init__(self): self.dao = CrawlDao.CrawlDao() self.fileDao = FileUtil.FileDao() self.stringUtil = StringUtil.StringUtil() self.table_mapping = 'db_category_mapping' self.vehicle_dict = {"商用车": "H", "乘用车": "C"} # 新分类的 self.cate_dict = dict() cate_sql = "select cat_id,cat_name,vehicle_code,cat_level,parent_id from db_category_new " cate_array = self.dao.db.get_data(cate_sql) for cate_data in cate_array: cat_id = str(cate_data['cat_id']) cat_name = str(cate_data['cat_name']) cat_level = str(cate_data['cat_level']) vehicle_code = str(cate_data['vehicle_code']) parent_id = str(cate_data['parent_id']) key = cat_name + "_" + cat_level + "_" + parent_id if cat_level == '3': key += "_" + vehicle_code self.cate_dict[key] = cat_id # 老cate的图片数据 self.old_cat_dict = dict() old_cat_sql = "select cat_name,category_thumb from db_category where cat_id < 2999 and parent_id = 0 and is_deleted = 'N'" old_cate_array = self.dao.db.get_data(old_cat_sql) for old_cate_data in old_cate_array: cat_name = str(old_cate_data['cat_name']) category_thumb = str(old_cate_data['category_thumb']) self.old_cat_dict[cat_name] = category_thumb
def __init__(self): self.dao = CrawlDao.CrawlDao() self.http = HttpUtil.HttpUtil() self.brand_dict = dict() test_header_url = 'http://app.360cec.com' online_header_url = 'http://10.162.51.140' self.header_url = test_header_url try: url = self.header_url+'/car/info?pid=0' result = self.http.http_get(url) json_result = json.loads(result) alist = json_result['data'][0]['list'] for brand_dic in alist: brand_name = str(brand_dic['name']).replace("-", "") brand_id = str(brand_dic['id']) self.brand_dict[brand_name] = brand_id # 存车系的name和id self.series_dict = dict() for brand_id in self.brand_dict.itervalues(): series_result = self.http.http_get(self.header_url+'/car/info?pid='+brand_id) json_result = json.loads(series_result) company_array = json_result['data'] company_dict = dict() for company in company_array: company_name = company['tagName'] series_dic = dict() for series in company['list']: series_name = series['carName'] series_id = str(series['id']) series_dic[series_name] = series_id company_dict[company_name] = series_dic self.series_dict[brand_id] = company_dict except Exception as e: print "==================get error ================%e" % e.message
def get_online_car_relation_by_liyang(self, liyang_id=""): if liyang_id in self.liyang_dict.keys(): online_data = dict(self.liyang_dict[liyang_id]) else: liyang_sql = "select brand,brand_id,series,series_id,model,model_id,power,power_id,year,year_id,car_models,car_models_id from db_car_all where l_id = '" + liyang_id + "' limit 1" try: online_data = self.liyang_dao.db.get_data(liyang_sql)[0] except: self.liyang_dao = CrawlDao.CrawlDao('dataserver', 'online_cong') online_data = self.liyang_dao.db.get_data(liyang_sql)[0] self.liyang_dict[liyang_id] = online_data relation_data = { 'car_id': str(online_data['car_models_id']), 'car_name': str(online_data['car_models']), 'car_brand_id': str(online_data['brand_id']), 'car_brand': str(online_data['brand']), 'car_series_id': str(online_data['series_id']), 'car_series': str(online_data['series']), 'car_model_id': str(online_data['model_id']), 'car_model': str(online_data['model']), 'car_power_id': str(online_data['power_id']), 'car_power': str(online_data['power']), 'car_year_id': str(online_data['year_id']), 'car_year': str(online_data['year']) } return relation_data
def __init__(self): self.dao = CrawlDao.CrawlDao("modeldatas") self.fileDao = FileUtil.FileDao() self.stringUtil = StringUtil.StringUtil() self.sql_category_table = 'db_category' self.sql_part_table = 'db_category_part' # 已更新的分类 name+level+parent_id self.category_have_list = list() # 已更新的part name+level self.part_have_list = list() self.label_dict = {u'字标': '1', u'灯泡': '2', u'四滤': '3', u'': '0'} self.cat_kind_dict = {u'全车件': 1, u'易损件': 0} # 分类无法判断标识 self.cat_kind_not = '3' self.aliss_name_text = '' self.label_text = '' # db_category level:dict(name+parent_id) self.level_name = self.init_category() # db_category_part name+cate_id:dict(part) # self.part_name = self.init_part() self.init_part() # name+level+parent_id:id self.category_id_dict = dict() # name+level+parent_id:vehicle_code self.category_code_dict = dict() # cat_id:cat_name self.cat_id_name = dict() # 是否是新增的类目 self.is_new = False
def __init__(self): self.dao = CrawlDao.CrawlDao("modeldatas") self.commodity_table = 'db_monkey_commodity_goods' self.part_table = 'db_monkey_part_goods_base' self.offer_table = 'db_monkey_offer_goods' self.save_sql_set = list() self.save_sql_set.append("select @now_time := now();") # id:object self.part_id_dict = dict() # name:list(object) self.part_name_dict = dict() part_sql = "select id,part_name,sum_code,first_cat_id,second_cat_id,third_cat_id,first_cat_name,second_cat_name,third_cat_name from db_category_part WHERE is_deleted = 'N' and vehicle_code in ('C','CH')" part_array = self.dao.db.get_data(part_sql) for part_data in part_array: part_id = part_data['id'] part_name = part_data['part_name'] self.part_id_dict[part_id] = part_data if part_name in self.part_name_dict: part_set = list(self.part_name_dict[part_name]) else: part_set = list() part_set.append(part_data) self.part_name_dict[part_name] = part_set
def __init__(self): self.dao = CrawlDao.CrawlDao("modeldatas", "online_cong") # self.dao = CrawlDao.CrawlDao("modeldatas", "local") self.have_goods_uuid_set = set() self.need_del_id_list = list() pass
def __init__(self, *name, **kwargs): self.dao = CrawlDao.CrawlDao() self.wb = None self.ew = None self.sheet = None # attr_key:column self.attr_key_col = dict()
def __init__(self, *name, **kwargs): # super(OfferGoods, self).__init__(*name, **kwargs) self.measure_unit_dic = dict() self.dao = CrawlDao.CrawlDao() self.index_sql_table = {} self.goods_name = 'goods' self.car_name = 'car' self.record_name = 'record'
def __init__(self): self.dao = CrawlDao.CrawlDao("modeldatas") self.fileDao = FileUtil.FileDao() self.part_code_id_dict = dict() part_sql = "select id,sum_code from db_category_part where is_deleted = 'N'" for part_data in self.dao.db.get_data(part_sql): self.part_code_id_dict[str(part_data['sum_code'])] = int( part_data['id'])
def __init__(self): self. fileDao = FileUtil.FileDao() self.dao = CrawlDao.CrawlDao() # 获得新老的对应关系 relation_sql_string = 'select my_cat_id,old_cat_id from db_category_relation' result_array = self.dao.db.get_data(relation_sql_string) self.relation_table = {} for relation_result in result_array: self.relation_table[int(relation_result['old_cat_id'])] = int(relation_result['my_cat_id'])
def __init__(self): self.dao = CrawlDao.CrawlDao("test", "local") # 标准零件编号 写入 dict 中 part_code:part_name self.part_dict = dict() part_array = self.dao.db.get_data( "SELECT part_name,sum_code FROM db_category_part WHERE is_deleted = 'N'" ) for part_data in part_array: self.part_dict[str(part_data['sum_code'])] = str( part_data['part_name'])
def __init__(self): self.dao = CrawlDao.CrawlDao("athena_center", "local") self.fileDao = FileUtil.FileDao() self.stringUtil = StringUtil.StringUtil() self.insert_pic_data_list = list() self.update_goods_car_data_list = list() # mac pic self.max_pic_id = 0 # {{pic_num,pic_index}:[oe,oe...]} self.pic_dict = dict()
def __init__(self): # self.dao = CrawlDao.CrawlDao('test', "local") self.dao = CrawlDao.CrawlDao('dataserver', "online_cong") self.shop_dao = CrawlDao.CrawlDao('ol_autoparts', "stall") # car_category self.car_category_list = list() self.car_category_dict = dict() # cat:(car_id....) self.cat_car_dict = dict() # cat_id:cat_name self.cat_dict = dict() # goods brand self.brand_id = 849 self.brand_sum_num = 0 self.car_sum_num = 0 # save filepath's parent # self.file_parent = r'/Users/zxg/Desktop/statyunxiu' self.file_parent = r'/home/tqmall.pr/statyunxiu' self.init_data()
def __init__(self): self.athena_dao = CrawlDao.CrawlDao('dev_dataserver') # self.athena_dao = CrawlDao.CrawlDao('athena_center') self.part_dao = CrawlDao.CrawlDao('modeldatas') # goods_uuId:goods_data self.part_goods_data = dict() self.init_part_goods() # pic_uuId:pic_data self.part_pic_data = dict() self.init_pic() # oe:data self.center_goods_dict = dict() # part_goods_uuid:athena_goos_id self.center_part_goods_dict = dict() # liyangid:onlinecar_data self.liyang_dict = dict() # goods_id-car_id self.has_goods_car = set() # goods_car insert list self.relaton_insert_list = list()
def __init__(self): self.dao = CrawlDao.CrawlDao() # 品牌 self.brand_dict = self.online_brand() # 线上分类 self.cat_dict = dict() self.cat_parent_dict = dict() cat_select_sql = 'select cat_id,cat_name,parent_id from db_category' cat_result_array = self.dao.db.get_data(cat_select_sql) for cat_result in cat_result_array: cat_id = cat_result['cat_id'] self.cat_dict[cat_id] = cat_result['cat_name'] self.cat_parent_dict[cat_id] = cat_result['parent_id']
def __init__(self, *name, **kwargs): # super(OfferGoods, self).__init__(*name, **kwargs) # test = load_workbook(filename = r'/Users/zxg/Desktop/temp/cn/test.xlsx') self.dao = CrawlDao.CrawlDao() self.base_file_name = r'/Users/zxg/Desktop/temp/cn/base.txt' self.attr_file_name = r'/Users/zxg/Desktop/temp/cn/attr.txt' self.base_insert_list = list() self.attr_insert_list = list() self.source = u'商车网' # base sheet self.car_sheet = None # 所有truck id 对应的行 self.truck_id_dict = dict() # other sheet self.other_sheet = dict() self.other_sheet_row = 1 self.other_sheet_max_col = 1 # 参数对应的列 self.attr_column = dict() # 所有参数 self.attr_key_array_from_table = dict() # ===========车辆类型============ # type id - name self.type_name = dict() select_type_sql = "select id,type_name from sc_truck_car_type" type_array = self.dao.db.get_data(select_type_sql) for type_data in type_array: id = type_data['id'] type_name = type_data['type_name'] self.type_name[id] = type_name # ===========品牌========= # car id - name self.car_name = dict() car_category_sql = "select id,car_name from sc_truck_car_category" car_array = self.dao.db.get_data(car_category_sql) for car_data in car_array: id = car_data['id'] car_name = car_data['car_name'] self.car_name[id] = car_name
def __init__(self, *name, **kwargs): # super(OfferGoods, self).__init__(*name, **kwargs) self.dao = CrawlDao.CrawlDao() self.file_name = r'/Users/zxg/Desktop/text.xls' # 新建一个excel文件 self.file = xlwt.Workbook(encoding='utf-8') # base sheet self.car_sheet = None # 所有truck id 对应的行 self.truck_id_dict = dict() # 所有other sheet self.other_sheet_list = list() # 参数对应的sheet self.attr_sheet = dict() # 参数对应的列 self.attr_column = dict() # ===========车辆类型============ # type id - name self.type_name = dict() # type id -pid self.type_pid = dict() select_type_sql = "select id,type_name,pid from sc_truck_car_type" type_array = self.dao.db.get_data(select_type_sql) for type_data in type_array: id = type_data['id'] type_name = type_data['type_name'] pid = type_data['pid'] self.type_name[id] = type_name self.type_pid[id] = pid # ===========品牌========= # car id - name self.car_name = dict() # car id -pid self.car_pid = dict() car_category_sql = "select id,car_name,pid from sc_truck_car_category" car_array = self.dao.db.get_data(car_category_sql) for car_data in car_array: id = car_data['id'] car_name = car_data['car_name'] pid = car_data['pid'] self.car_name[id] = car_name self.car_pid[id] = pid
def add_data_to_other_sheet(self, truck_id): self.other_sheet_row += 1 self.other_sheet.cell(row=self.other_sheet_row, column=1).value = truck_id # self.other_sheet.write(self.other_sheet_row, 0, truck_id) attr_sql = "select attr_key_id,attr_value from sc_truck_car_attr where car_id = " + str( truck_id) print 'attr_sql : %s' % attr_sql try: attr_array = self.dao.db.get_data(attr_sql) except Exception, e: print e self.dao = CrawlDao.CrawlDao() attr_array = self.dao.db.get_data(attr_sql)
def get_pic_by_id(self, pic_id=""): if pic_id in self.pic_dict.keys(): pic_data = self.pic_dict[pic_id] else: try: pic_data = self.monkey_dao.db.get_data( "select picture_num,picture_index from db_monkey_part_picture where uuId = '" + pic_id + "'")[0] except: self.monkey_dao = CrawlDao.CrawlDao('modeldatas', 'online_cong') pic_data = self.monkey_dao.db.get_data( "select picture_num,picture_index from db_monkey_part_picture where uuId = '" + pic_id + "'")[0] self.pic_dict[pic_id] = pic_data return pic_data
def __init__(self): self.dao = CrawlDao.CrawlDao("test", "local") self.fileDao = FileUtil.FileDao() # car_brand+"_"+company:{data} self.tuhu_car_dict = dict() # tuhu_car_id:oil_used self.tuhu_oil_dict = dict() self.initTuhuCar() self.initTuhuOil() self.tuhu_car_dict_keys = self.tuhu_car_dict.keys() self.tuhu_oil_dict_keys = self.tuhu_oil_dict.keys() ## 缓存 self.online_car_liyang_cache = dict()
def __init__(self): self.dao = CrawlDao.CrawlDao("modeldatas") self.fileDao = FileUtil.FileDao() # 生成的excel wb = Workbook() self.write_ew = ExcelWriter(workbook=wb) self.lop_sheet = wb.create_sheet(u"导入lop数据", 0) self.wrong_sheet = wb.create_sheet(u"错误数据", 1) self.init_write_ew() # 保存的行数 self.save_lop_row_num = 1 self.save_wrong_row_num = 1 # part表的基础信息sum_code:data self.part_dict = dict() self.init_part() # 配件库 oe:list(part_code) self.part_oe_code_dict = dict() self.init_part_goods_base()
def get_tqmall_car_by_liyang(self, liyang_id): if liyang_id in self.liyang_online_dict.keys(): return self.liyang_online_dict[liyang_id] liyang_sql = "select car_models_id from db_car_all where l_id = '" + liyang_id + "' limit 1" try: liyang_array = self.athena_dao.db.get_data(liyang_sql) except: self.athena_dao = CrawlDao.CrawlDao("dataserver", "online_cong") liyang_array = self.athena_dao.db.get_data(liyang_sql) if len(liyang_array) == 0: print 'liyang_id:%s not exist in db_car_all' % liyang_id car_models_id = 0 else: car_models_id = str(liyang_array[0]['car_models_id']) self.liyang_online_dict[liyang_id] = car_models_id return car_models_id
def __init__(self): self.dao = CrawlDao.CrawlDao('test', "local") self.fileDao = FileUtil.FileDao() self.file_parent = r'/Users/zxg/Desktop/lvqing' # ===初始化的变量==== # brand_factory:list<map>{leyel_id,series,vehicle_type,model_year,displacement ,intake_style , max_power , fuel_type} self.liyang_dict_key = "{}_{}_{}" self.liyang_dict = dict() # liyang_id:online_car_id self.liyang_tqmall_dict = dict() # car_category # online_car_id_list self.car_category_list = list() # id:{brand,series,model,power,year,name} self.car_category_dict = dict() self.init_liyang() self.init_liyang_tqmall() self.init_car() # ====处理excel的变量=== # goods_format:goods_size self.goods_dict = dict() # goods_format:set(online_car_id) self.goods_car_dict = dict() # online_car_id:set(goods_format) self.car_goods_dict = dict() # liyang_car_id:set(goods_format) self.liyang_car_goods_dict = dict() # liyang_id:{liyang_data} self.liyang_id_data_dict = dict() # 不匹配的车型列表 self.not_car_list = copy.deepcopy(self.car_category_list) # excle中没有匹配上的数据 self.wrong_data_list = list()
def __init__(self): self.dao = CrawlDao.CrawlDao() self.sql_category_online_table = 'db_category_dian' self.sql_category_data_table = 'db_category_data' self.sql_category_cat_label_table = 'db_category_cat_label' # 新增的数据 id:cate self.new_data_dict = dict() # pid: list(id) self.new_data_pid_dict = dict() new_data_sql = "select cat_id,cat_name,parent_id,cat_kind,cat_level,code,vehicle_code from " + self.sql_category_data_table + " where is_deleted= 'N'" new_data_array = self.dao.db.get_data(new_data_sql) for new_data in new_data_array: cat_id = int(new_data['cat_id']) parent_id = int(new_data['parent_id']) cate_map = { 'cat_name': new_data['cat_name'], 'cat_kind': new_data['cat_kind'], 'cat_level': new_data['cat_level'], 'cat_code': new_data['code'], 'vehicle_code': new_data['vehicle_code'], 'category_thumb': '', 'category_img': '', 'original_img': '', 'style': '' } self.new_data_dict[cat_id] = cate_map if parent_id in self.new_data_pid_dict.keys(): cat_list = list(self.new_data_pid_dict[parent_id]) cat_list.append(cat_id) self.new_data_pid_dict[parent_id] = cat_list else: cat_list = list() cat_list.append(cat_id) self.new_data_pid_dict[parent_id] = cat_list
def __init__(self): self.dao = CrawlDao.CrawlDao() self.user = 1 # 电商brand_id对应的pic和name self.online_brand_name_dict = dict() self.online_brand_pic_dict = dict() db_brand_sql = "select brand_id,brand_name.brand_logo_app " \ "from db_brand" db_brand_array = self.dao.db.get_data(db_brand_sql) for db_brand_data in db_brand_array: brand_id = db_brand_data['brand_id'] self.online_brand_name_dict[brand_id] = str( db_brand_data['brand_name']) self.online_brand_pic_dict[brand_id] = str( db_brand_data['brand_logo_app']).strip() # 电商brand_id对应的center brand_id self.brand_online_center_dict = dict() # 电商goods_id 对应的其 cat_id self.goods_online_cat_dict = dict() # 电商goods_id 对应的center goodsuuid self.goods_online_uuid_dict = dict() # center goodsId goodsuuid self.goods_center_uuid_dict = dict() # attr self.online_attr_name_dict = dict() db_attr_key_sql = "select id,attr_name from db_attribute_config" db_attr_key_array = self.dao.db.get_data(db_attr_key_sql) for db_attr_key_data in db_attr_key_array: self.online_attr_name_dict[ db_attr_key_data['id']] = db_attr_key_data['attr_name'] # online attrkey+cat => center attrkey self.attr_key_online_center_dict = dict()
def __init__(self): self.dao = CrawlDao.CrawlDao() self.stringUtil = StringUtil.StringUtil() self.sql_category_table = 'db_category' self.file_name = r'/Users/zxg/Desktop/old_cate.xlsx' self.cate_id_name = dict() self.cate_id_parent = dict() self.second_cat_list = list() # 其余行 cate_sql = "select cat_id,cat_name,parent_id from db_category where is_deleted = 'N' order by cat_id" cate_array = self.dao.db.get_data(cate_sql) for cate_data in cate_array: cat_id = cate_data['cat_id'] cat_name = cate_data['cat_name'] parent_id = cate_data['parent_id'] self.cate_id_name[cat_id] = cat_name self.cate_id_parent[cat_id] = parent_id if int(parent_id) != 0: self.second_cat_list.append(cat_id)