def update_items(self, tab_item, items_data, update_keys=()): """ @summary: --------- @param tab_item: redis中items的表名 @param items_data: [item.to_dict] 数据 @param update_keys: 更新的字段 --------- @result: """ to_table = tools.get_info(tab_item, ":s_(.*?)_item", fetch_one=True) sql, datas = tools.make_batch_sql( to_table, items_data, update_columns=update_keys or list(items_data[0].keys()), ) update_count = self.to_db.add_batch(sql, datas) if update_count is None: log.error("更新表 %s 数据失败" % (to_table)) else: msg = "共更新 %s 条数据 到 %s" % (update_count // 2, to_table) if update_keys: msg += " 更新字段为 {}".format(update_keys) log.info(msg) return update_count != None
def __export_to_db(self, tab_item, datas, is_update=False, update_keys=()): to_table = tools.get_info(tab_item, ":s_(.*?)_item$", fetch_one=True) # 打点 校验 self.check_datas(table=to_table, datas=datas) for pipeline in self._pipelines: if is_update: if to_table == self._task_table and not isinstance( pipeline, MysqlPipeline ): continue if not pipeline.update_items(to_table, datas, update_keys=update_keys): log.error( f"{pipeline.__class__.__name__} 更新数据失败. table: {to_table} items: {datas}" ) return False else: if not pipeline.save_items(to_table, datas): log.error( f"{pipeline.__class__.__name__} 保存数据失败. table: {to_table} items: {datas}" ) return False # 若是任务表, 且上面的pipeline里没mysql,则需调用mysql更新任务 if not self._have_mysql_pipeline and is_update and to_table == self._task_table: self.mysql_pipeline.update_items(to_table, datas, update_keys=update_keys)
def create(self, sort_keys=False): contents = self.get_data() json = {} for content in contents: content = content.strip() if not content or content.startswith(":"): continue regex = "([^:\s]*)[:|\s]*(.*)" result = tools.get_info(content, regex, fetch_one=True) if result[0] in json: json[result[0]] = json[result[0]] + "&" + result[1] else: json[result[0]] = result[1].strip() print(tools.dumps_json(json, sort_keys=sort_keys))
def export_items(self, tab_item, items_data): """ @summary: --------- @param tab_item: redis中items的表名 @param items_data: [item.to_dict] 数据 --------- @result: """ to_table = tools.get_info(tab_item, ":s_(.*?)_item", fetch_one=True) sql, datas = tools.make_batch_sql(to_table, items_data) add_count = self.to_db.add_batch(sql, datas) datas_size = len(datas) if add_count is None: log.error("导出数据到表 %s 失败" % (to_table)) else: log.info("共导出 %s 条数据 到 %s, 重复 %s 条" % (datas_size, to_table, datas_size - add_count)) return add_count != None
def export_all( self, tables, auto_update=False, batch_count=100, every_table_per_export_callback=None, ): """ @summary: 导出所有item --------- @param tables: 如qidian 则导出起点下面所有的items 数据库中的表格式必须有规律 如导出 qidian:comment:s_qidian_book_comment_dynamic_item 对应导入 qidian_book_comment_dynamic @param auto_update: 是否自动更新 @param batch_count: 每批次导出的数量 @every_table_per_export_callback: 导出前的回调函数, 用来修改特定表的参数 to_table, auto_update, batch_count 如: def every_table_per_export_callback(to_table, auto_update, batch_count): if to_table == 'xxx': auto_update = True return to_table, auto_update, batch_count --------- @result: """ tables = (self.redisdb.getkeys(tables + "*_item") if not isinstance(tables, list) else tables) if not tables: log.info("无表数据") for table in tables: from_table = table to_table = tools.get_info(str(from_table), ":s_(.*?)_item", fetch_one=True) if callable(every_table_per_export_callback): to_table, auto_update, batch_count = every_table_per_export_callback( to_table, auto_update, batch_count) log.info(""" \r正在导出 %s -> %s""" % (from_table, to_table)) self.export(from_table, to_table, auto_update, batch_count)
def __export_to_db(self, tab_item, datas, is_update=False, update_keys=()): export_success = False # 打点 校验 to_table = tools.get_info(tab_item, ":s_(.*?)_item$", fetch_one=True) item_name = to_table + "_item" self.check_datas(table=to_table, datas=datas) if setting.ADD_ITEM_TO_MYSQL: # 任务表需要入mysql if isinstance(setting.ADD_ITEM_TO_MYSQL, (list, tuple)): for item in setting.ADD_ITEM_TO_MYSQL: if item in item_name: export_success = ( self._export_data.export_items(tab_item, datas) if not is_update else self._export_data.update_items( tab_item, datas, update_keys=update_keys)) else: export_success = ( self._export_data.export_items(tab_item, datas) if not is_update else self._export_data.update_items( tab_item, datas, update_keys=update_keys)) if setting.ADD_ITEM_TO_REDIS: if isinstance(setting.ADD_ITEM_TO_REDIS, (list, tuple)): for item in setting.ADD_ITEM_TO_REDIS: if item in item_name: self._db.sadd(tab_item, datas) export_success = True log.info("共导出 %s 条数据 到redis %s" % (len(datas), tab_item)) break else: self._db.sadd(tab_item, datas) export_success = True log.info("共导出 %s 条数据 到redis %s" % (len(datas), tab_item)) return export_success