class BaseSaver(object): # 文档字符串 ''' BaseSaver class allows users to save all infos data fetched from website. :Usage: ''' # 数据存储器的静态成员定义 SAVE_MODES = ('mongodb', 'neo4j', 'mysql') # 初始化方法: def __init__(self, save_mode="neo4j"): # 文档字符串 ''' Initialize an instance of BaseSaver. :Args: - save_mode : a str of database to save data in. ''' # 方法实现 if save_mode not in self.SAVE_MODES: raise RuntimeError('存储模式指定有误,请输入mongodb、neo4j或者mysql') self.save_mode = save_mode if self.save_mode == 'mongodb': # mongodb initialize print('>>>> we are in mongodb.') self.connector = MongoClient( **MONGO_CONF)[MONGO_CONF.get('authSource')] elif self.save_mode == 'neo4j': # neo4j initialize print('>>>> we are in neo4j.') self.connector = Graph(**NEO_CONF) else: # mysql initialize print('>>>> we are in mysql.') self.connector = pymysql.connect(**SQL_CONF) self.cursor = self.connector.cursor() sql = RESORT_SQL.format(table_name) print(sql) self.cursor.execute(sql) self.connector.commit() # 数据存储方法: def data_save(self, file_name): # 文档字符串 ''' Saves spider fetched data into different databases. Wipes out the old data and saves the new fetched ones. :Args: - file_name : a str of file name to fetch data from. ''' # 方法实现 # 此处可以拓展成任意文件类型,其他文件类型的数据转换成json再写即可 file_path = os.path.join(save_path, file_name + '.json') if not os.access(file_path, os.F_OK): raise RuntimeError(f'数据文件{file_path}不存在,请检查数据!') with open(file_path, 'r', encoding='utf-8') as file: self.json_data = json.load(file, encoding='utf-8') if self.save_mode == 'mongodb': print('>>> we are saving to mongodb.') # 删除原始数据 self.connector.drop_collection(collection) # 保存新数据 self.connector[collection].insert_many(self.json_data) elif self.save_mode == 'neo4j': print('>>> we are saving to neo4j.') # 删除原始数据, 一定要小心使用 self.graph_cleaner() # 保存新数据 self.graph_builder() else: print('>>> we are saving to mysql.') # 删除原始数据,一定要小心使用 self.cursor.execute(f"DELETE FROM {table_name}") # 准备sql语句 data_key = self.json_data[0].keys() sql_key = ','.join(data_key) sql_value = ', '.join([f'%({key})s' for key in data_key]) # 保存新数据 sql = ''' INSERT INTO {0}({1}) VALUES ({2}); '''.format(table_name, sql_key, sql_value) print(sql) self.cursor.executemany(sql, self.json_data) self.connector.commit() # 知识图谱删除方法: def graph_cleaner(self): pass # 知识图谱生成方法: def graph_builder(self): pass # 数据存储器退出方法: def __del__(self): # 文档字符串 ''' The deconstructor of BaseSaver class. Deconstructs an instance of BaseSaver, closes Databases. ''' # 方法实现 print(f'>>>> closing {self.save_mode}.') if self.save_mode == 'mongodb': self.connector.client.close() elif self.save_mode == 'mysql': self.connector.close()
def commit(db_name: str, tx: Transaction): conn = Graph(URL, auth=(ADMIN_NAME, ADMIN_PASS), name=db_name) conn.commit(tx)