def sub_process_data(self, container, mysql_conn, table, id_field_name): compare_list = container["compare_list"] update_list = container["update_list"] if not compare_list: return for data in compare_list: time_container = self.separate_time(data) new_data = ToolSave.sort_item(data) old_data = ToolSave.get_old_data(new_data=data, table_name=table, mysql_conn=mysql_conn, id_field_name=id_field_name) if not old_data: '''数据不存在!''' pass else: old_data = ToolSave.sort_item(old_data) compare_ret = ToolSave.compare_data(new_data=new_data, old_data=old_data) if compare_ret: '''数据无变化!''' pass else: self.voluation_time(data, time_container) update_list.append(data)
def store_data(self, container, mysql_conn, table, sm_instance, id_field_name): insert_list = container["insert_list"] update_list = container["update_list"] ToolSave.update_mysql_many(mysql_conn, update_list, table, id_field_name, sm_instance) ToolSave.insert_mysql_many(mysql_conn, insert_list, table, sm_instance)
def out_put_data(out_put_queue, sm_instance, dps_instance): ''' 管理和加载所有的数据库存储类 从out_put_queue中取出数据存到MySQL中 :param out_put_queue: :return: ''' db_mange = sm_instance.get_db_setting_instance() log_path_mange = sm_instance.get_log_setting_instance() mysql_conn = pymysql.connect(**db_mange.get_save_mysql_normal_params()) log_dir_full_path = log_path_mange.get_log_dir_full_path() log = Logger(filename="{}\outoutData.log".format(log_dir_full_path), level='error') q_size = out_put_queue.qsize() tq = tqdm(total=q_size, desc="数据处理进度:") try: while True: data_dic = out_put_queue.get() if data_dic == 'end': print("data_output is end !") break try: coll = data_dic.get("coll_name") data_pipeline_func = dps_instance.get_pip_func(coll) data_pipeline_func().process_data_dic(data_dic, mysql_conn, sm_instance) is_update = dps_instance.get_is_update() if is_update: ToolSave.update_is_process_status(db_mange, data_dic["_id"], coll) except Exception as e: log.logger.error(e) finally: tq.update(1) finally: mysql_conn.close() tq.close()
def sub_process_data(self, container, mysql_conn, table, id_field_name): compare_list = container["compare_list"] update_list = container["update_list"] if not compare_list: return new_data_log_id_set = set([data["log_id"] for data in compare_list]) new_data_car_id_set = set([data["car_id"] for data in compare_list]) old_data_log_id_set = ToolSave.get_compare_set( mysql_conn=mysql_conn, table=table, id_field_name="log_id", condition_field="car_id", condition_list=new_data_car_id_set) diff_log_id_set = new_data_log_id_set - old_data_log_id_set for data in compare_list: log_id = data["log_id"] if log_id in diff_log_id_set: # print("new data's log_id", log_id) # print("new data's car_id", data["car_id"]) print("append update data", data) if "add_time" in data: data.pop("add_time") update_list.append(data)
def get_filter_set(self, table, mysql_conn, id_field_name): if table not in self.id_field_set_dic: # print("初始化id_field_set。。。") id_field_set = ToolSave.get_filter_set(mysql_conn=mysql_conn, id_field_name=id_field_name, table=table) self.id_field_set_dic[table] = id_field_set else: # print("使用缓存的id_field_set。。。") id_field_set = self.id_field_set_dic[table] return id_field_set
def process_data(self, data, container, id_field_set, mysql_conn, table, id_field_name): insert_list = container["insert_list"] id_field = data.get(id_field_name) ret = ToolSave.test_exist(id_field=id_field, id_field_set=id_field_set) # print(data) if not ret: print("append insert data") insert_list.append(data) # print(data) else: pass