def update_existing_table(config_file, table_name, directory_name, data_file, alias, silent, integrity_cancel): """ :param alias: :param silent: :param config_file: :param table_name: :param directory_name: :param data_file: :return: """ _cfg, _tbl, _al, _sil = config_file, table_name, alias, silent config, config_file = ConfigManager.confirm_config_set(config_file) if alias != "None": table_name = ConfigManager.get_name_by_alias(alias, config) if table_name is None: print( "!! Table does not exist! Run CREATE to add to existing database, or INIT to create in new database !!" ) exit(1) if table_name != "None" and table_name not in config.keys(): print( "!! Table does not exist! Run CREATE to add to existing database, or INIT to create in new database !!" ) exit(1) cfg = ConfigManager(config, table_name) if not silent: _update_display_message_prelude( config[ConfigKeys.DATABASES][ConfigKeys.db_name], config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir], table_name, directory_name, data_file, alias) if data_file != "None": data_to_add = CountTable(data_file) else: data_to_add = None if directory_name != "None": genomic_files_to_add = (_f for _f in os.listdir(directory_name)) else: genomic_files_to_add = () if alias == "None": new_attrs = ClassManager.populate_data_to_existing_table( table_name, data_to_add, cfg, genomic_files_to_add, directory_name, silent, alias) else: new_attrs = ClassManager.populate_data_to_existing_table( table_name, data_to_add, cfg, genomic_files_to_add, directory_name, silent) ClassManager.write_class(new_attrs, cfg.classes_file) if not silent: _update_table_display_message_epilogue() if not integrity_cancel: integrity_check(_cfg, _tbl, _al, _sil)
def load_table_metadata(config, tbl_name): cfg = ConfigManager(config, tbl_name) engine = BaseData.get_engine(cfg.db_dir, cfg.db_name + ".db") sess = BaseData.get_session_from_engine(engine) TableClass = ClassManager.get_class_orm(tbl_name, engine) UserClass = type(tbl_name, (Record, ), {}) # Map to SQL orm mapper(UserClass, TableClass) # Display queried info for single table and break return sess, UserClass, cfg
def remove_columns_from_table(config_file, table_name, list_file, alias, silent, integrity_cancel): """ :param silent: :param config_file: :param table_name: :param list_file: :param alias: :return: """ _cfg, _tbl, _sil, _al = config_file, table_name, silent, alias config, config_file = ConfigManager.confirm_config_set(config_file) if alias != "None": table_name = ConfigManager.get_name_by_alias(alias, config) assert table_name is not None, TableNameAssertString.TABLE_NOT_FOUND if list_file == "None": raise ListFileNotProvidedError cfg = ConfigManager(config, table_name) columns_to_remove = set( line.rstrip("\r\n") for line in open(list_file, "r")) if not silent: _remove_columns_display_message_prelude( config[ConfigKeys.DATABASES][ConfigKeys.db_name], config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir], table_name, alias, columns_to_remove) engine = BaseData.get_engine(cfg.db_dir, cfg.db_name + ".db") sess = BaseData.get_session_from_engine(engine) TableClass = ClassManager.get_class_orm(table_name, engine) update_manager = UpdateManager(cfg, ClassManager.get_class_as_dict(cfg), sess) table_copy_csv = update_manager.create_table_copy( datetime.today().strftime("%Y%m%d"), TableClass, silent) new_attrs = { key: value for key, value in ClassManager.correct_dict( ClassManager.get_class_as_dict(cfg)).items() if key not in columns_to_remove } UpdatedDBClass, metadata = ClassManager.generate_class( cfg.table_name, new_attrs, cfg.db_dir, cfg.db_name, cfg.table_dir) ClassManager.write_class(new_attrs, cfg.classes_file) cfg.update_config_file(table_name) update_manager.delete_old_table_and_populate(engine, TableClass, UpdatedDBClass, table_copy_csv, table_name, sess, silent) if not silent: _remove_columns_display_message_epilogue() if not integrity_cancel: integrity_check(_cfg, _tbl, _al, _sil)
def delete_from_table(config_file, table_name, list_file, alias, silent, integrity_cancel): """ :param silent: :param config_file: :param table_name: :param list_file: :param alias: :return: """ _cfg, _tbl, _sil, _al = config_file, table_name, silent, alias config, config_file = ConfigManager.confirm_config_set(config_file) if alias != "None": table_name = ConfigManager.get_name_by_alias(alias, config) assert table_name is not None, TableNameAssertString.TABLE_NOT_FOUND if list_file == "None": raise ListFileNotProvidedError cfg = ConfigManager(config, table_name) ids_to_remove = set(line.rstrip("\r\n") for line in open(list_file, "r")) if not silent: _delete_records_display_message_prelude( config[ConfigKeys.DATABASES][ConfigKeys.db_name], config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir], table_name, alias, ids_to_remove ) engine = BaseData.get_engine(cfg.db_dir, cfg.db_name + ".db") sess = BaseData.get_session_from_engine(engine) TableClass = ClassManager.get_class_orm(table_name, engine) UserClass = type(table_name, (Record,), {}) mapper(UserClass, TableClass) for _id in ids_to_remove: print_if_not_silent(silent, " ..Removing record %s" % _id) try: os.remove(sess.query(UserClass).filter_by(_id=_id).first().full_path()) except TypeError: continue sess.delete(sess.query(UserClass).filter_by(_id=_id).first()) sess.commit() if not silent: _remove_columns_display_message_epilogue() if not integrity_cancel: integrity_check(_cfg, _tbl, _al, _sil)
def integrity_check(config_file, table_name, alias, silent): """ Function called from dbdm that checks integrity and issues in project at all levels :param config_file: :param table_name: :param alias:l :return: """ config, config_file = ConfigManager.confirm_config_set(config_file) if alias != "None": table_name = ConfigManager.get_name_by_alias(alias, config) if table_name is None or (table_name == "None" and alias == "None"): tables_to_search = list(config[ConfigKeys.TABLES_TO_DB].keys()) else: tables_to_search = [ table_name, ] py_fixfile_name = "%s.%s.fix" % (datetime.today().strftime("%Y%m%d"), str(randint(1, 1001))) if not silent: _integrity_check_display_message_prelude( config[ConfigKeys.DATABASES][ConfigKeys.db_name], config[ConfigKeys.DATABASES][ConfigKeys.working_dir], tables_to_search, py_fixfile_name) im = IntegrityManager(config, py_fixfile_name, tables_to_search) # TODO: Implement table-level checks im.initialize_fix_file() im.initial_project_check() # im.table_check() for table in tables_to_search: cfg = ConfigManager(config, table) engine = BaseData.get_engine(cfg.db_dir, cfg.db_name + ".db") sess = BaseData.get_session_from_engine(engine) TableClass = ClassManager.get_class_orm(table, engine) UserClass = type(table, (Record, ), {}) # Map to SQL orm mapper(UserClass, TableClass) im.record_check(sess, UserClass, table) if im.issues_found == 0: os.remove(py_fixfile_name) if not silent: _integrity_check_display_message_epilogue(im.issues_found, py_fixfile_name) del im
def remove_table_from_database(config_file, table_name, alias, silent, integrity_cancel): """ Function removes a given table from a database :param silent: :param config_file: :param table_name: :param alias: :return: """ _cfg, _tbl, _sil, _al = config_file, table_name, silent, alias config, config_file = ConfigManager.confirm_config_set(config_file) if alias != "None": table_name = ConfigManager.get_name_by_alias(alias, config) assert table_name is not None, TableNameAssertString.TABLE_NOT_FOUND if not silent: _remove_table_display_message_prelude( config[ConfigKeys.DATABASES][ConfigKeys.db_name], config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir], table_name, alias) cfg = ConfigManager(config, table_name) engine = BaseData.get_engine(cfg.db_dir, cfg.db_name + ".db") sess = BaseData.get_session_from_engine(engine) TableClass = ClassManager.get_class_orm(table_name, engine) UserClass = type(table_name, (Record, ), {}) mapper(UserClass, TableClass) all_records = sess.query(UserClass).all() for record in all_records: if record: print_if_not_silent(silent, " ..Removing record %s" % record._id) try: os.remove(record.full_path()) except OSError: continue TableClass.drop(engine) cfg.remove_table_from_config_file(table_name) os.remove(cfg.classes_file) shutil.rmtree(cfg.table_dir) if not silent: _remove_columns_display_message_epilogue() if not integrity_cancel: integrity_check(_cfg, _tbl, _al, _sil)
def get_table(config_path, table_name=None, alias=None): """ Primary method for users importing :param config_path: :param table_name: :param alias: :return: """ # Load config data from file cfg, config_path = ConfigManager.confirm_config_set(config_path) if alias: table_name = ConfigManager.get_name_by_alias(alias, cfg) config = ConfigManager(cfg, table_name) # Get session to return engine = BaseData.get_engine(config.db_dir, config.db_name + ".db") sess = BaseData.get_session_from_engine(engine) # Generate table class and name based on presence of alias TableClass = ClassManager.get_class_orm(table_name, engine) UserClass = type(table_name, (Record, ), {}) # Map to SQL orm mapper(UserClass, TableClass) return RecordList(sess, UserClass, config)
def create_database(db_name, table_name, directory_name, data_file, alias, silent, integrity_cancel): """ Function called from dbdm initializes project/module :param silent: :param alias: :param db_name: (str) Name of db :param table_name: (str) Record that will be created :param directory_name: (str) Directory with files to add :param data_file: (str) File with metadata for storing in database :return: """ # Confirm working dir does not exist and that directory with genomes does exist assert db_name != "None", CreateDBAssertString.WORKING_DB_NOT_SET assert table_name != "None", CreateDBAssertString.TABLE_NAME_NOT_SET assert os.path.isdir( db_name) is False, CreateDBAssertString.WORKING_DIR_EXISTS if directory_name != "None": assert os.path.isdir( directory_name), CreateDBAssertString.SEQUENCE_DIR_NOT_EXISTS table_name = table_name.lower() if not silent: _initialization_display_message_prelude(db_name, db_name, table_name, directory_name, data_file, alias) # Gather files to commit and initial data to store for each file print_if_not_silent(silent, "Beginning process...") print_if_not_silent(silent, " Getting files from %s" % directory_name) if directory_name != "None": genomic_files_to_add = (_f for _f in os.listdir(directory_name)) else: genomic_files_to_add = () data_types = {} initial_data = [] if data_file is not "None": _initial_data = CountTable(data_file) # Ignore first column name, typically announcing "Name" or "Genome ID" # Get names and types of each column in dict # Key is header name (which will be used as name of column in database) # Value is type of data (int, float, str) that is found for a random genome id's data value # TODO: Data type determination requires a uniformity from the .tsv file data. Consider a workaround data_types = TypeMapper.get_translated_types( _initial_data, TypeMapper.py_type_to_string) initial_data.append(_initial_data) # Create working directories print_if_not_silent(silent, " Creating directories at database root %s" % db_name) classes_dir, config_dir, db_dir, table_dir = _create_all_directories( db_name, table_name) # Create database file print_if_not_silent(silent, " Creating database file in %s" % db_dir) touch(os.path.join(db_dir, db_name + ".db")) # Write configuration info config_file = db_name + ".ini" print_if_not_silent( silent, " Writing database configuration to %s" % os.path.join(config_dir, config_file)) config = Config() abs_path_working_dir = os.path.abspath(db_name) db_name = os.path.basename(db_name) config[ConfigKeys.DATABASES] = { ConfigKeys.db_name: db_name, ConfigKeys.working_dir: abs_path_working_dir, ConfigKeys.rel_work_dir: db_name, ConfigKeys.migrations_dir: os.path.join(abs_path_working_dir, Directories.MIGRATIONS), ConfigKeys.config_dir: os.path.join(abs_path_working_dir, Directories.CONFIG), ConfigKeys.db_dir: os.path.join(abs_path_working_dir, Directories.DATABASE), ConfigKeys.rel_db_dir: os.path.join(db_name, Directories.DATABASE), } config[table_name] = { ConfigKeys.rel_classes_dir: os.path.join(db_name, Directories.CLASSES), } config[ConfigKeys.TABLES_TO_DB] = { table_name: db_name, } config[ConfigKeys.TABLES_TO_ALIAS] = { "{}|{}".format(alias, table_name): table_name, } with open(os.path.join(config_dir, config_file), "w") as W: config.write(W) # Create table print_if_not_silent( silent, "Creating new table %s at %s" % (table_name, os.path.join(db_dir, db_name))) os.makedirs(table_dir) ClassManager.create_initial_table_in_db(db_name, db_name, table_name, data_types, silent, initial=False) # Populate table with data from file and genomes # Get config file - confirms that it was written correctly cfg = ConfigManager(config, table_name) if data_file is not "None": for _data in initial_data: ClassManager.populate_data_to_existing_table( table_name, _data, cfg, genomic_files_to_add, directory_name, silent) if not silent: _initialization_display_message_epilogue() if not integrity_cancel: integrity_check(db_name, table_name, "None", silent)
def create_table_in_existing_database(config_file, table_name, directory_name, data_file, alias, silent, integrity_cancel): """ :param silent: :param config_file: :param table_name: :param directory_name: :param data_file: :param alias: :return: """ assert table_name != "None", CreateDBAssertString.TABLE_NAME_NOT_SET assert config_file is not None, ConfigAssertString.CONFIG_FILE_NOT_PASSED _cfg, _tbl, _sil, _al = config_file, table_name, silent, alias config, config_file = ConfigManager.confirm_config_set(config_file) if table_name in config.keys(): print("!! Record exists, exiting. To update table, use UPDATE !!") exit(1) if not silent: _create_table_display_message_prelude(config[ConfigKeys.DATABASES][ConfigKeys.db_name], config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir], table_name, directory_name, data_file, alias) data_types = {} initial_data = [] if data_file is not "None": _initial_data = CountTable(data_file) # Ignore first column name, typically announcing "Name" or "Genome ID" # Get names and types of each column in dict # Key is header name (which will be used as name of column in database) # Value is type of data (int, float, str) that is found for a random genome id's data value # TODO: Data type determination requires a uniformity from the .tsv file data. Consider a workaround data_types = TypeMapper.get_translated_types(_initial_data, TypeMapper.py_type_to_string) initial_data.append(_initial_data) # Gather bio data from folder if directory_name != "None": genomic_files_to_add = (_f for _f in os.listdir(directory_name)) else: genomic_files_to_add = () # Create new table directories _create_all_directories(config[ConfigKeys.DATABASES][ConfigKeys.working_dir], table_name) # Update config object with new data table_name = table_name.lower() config[table_name] = { ConfigKeys.rel_classes_dir: os.path.join(config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir], Directories.CLASSES), } config.set(ConfigKeys.TABLES_TO_DB, table_name, config[ConfigKeys.DATABASES][ConfigKeys.db_name]) config.set(ConfigKeys.TABLES_TO_ALIAS, "{}|{}".format(alias, table_name), table_name) # Write new config file with open(config_file, "w") as W: config.write(W) # Update ConfigManager object cfg = ConfigManager(config, table_name) # Create new table and populate with new data ClassManager.create_initial_table_in_db(cfg.db_name, cfg.working_dir, table_name, data_types, silent, initial=False) if data_file is not "None": for _data in initial_data: ClassManager.populate_data_to_existing_table(table_name, _data, cfg, genomic_files_to_add, directory_name, silent) if not silent: _create_table_display_message_epilogue() if not integrity_cancel: integrity_check(_cfg, _tbl, _al, _sil)
def move_project(config_file, path, integrity_cancel, silent): """ :param config_file: :param path: :param integrity_cancel: :param silent: :return: """ assert path != 'None', "Path (-p) does not exist, exiting" current_path = os.path.abspath( os.path.abspath(os.path.relpath(config_file))) path = os.path.abspath(os.path.relpath(path)) config, config_file = ConfigManager.confirm_config_set(config_file) old_path = config[ConfigKeys.DATABASES][ConfigKeys.working_dir] assert os.path.dirname(old_path) != os.path.abspath(path), \ "Project exists in directory, cancelling" project_name = config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir] if not silent: _move_project_display_message_prelude(project_name, old_path, path), # Move directory from old location to new location shutil.move(current_path, path) # Update config file with new location abs_path_working_dir = os.path.abspath(os.path.join(path, project_name)) db_name = project_name config[ConfigKeys.DATABASES] = { ConfigKeys.db_name: db_name, ConfigKeys.working_dir: abs_path_working_dir, ConfigKeys.rel_work_dir: db_name, ConfigKeys.migrations_dir: os.path.join(abs_path_working_dir, Directories.MIGRATIONS), ConfigKeys.config_dir: os.path.join(abs_path_working_dir, Directories.CONFIG), ConfigKeys.db_dir: os.path.join(abs_path_working_dir, Directories.DATABASE), ConfigKeys.rel_db_dir: os.path.join(db_name, Directories.DATABASE), } with open( os.path.join(path, project_name, Directories.CONFIG, os.path.basename(config_file)), "w") as W: config.write(W) # Update location for each record in each table tables_in_database = config[ConfigKeys.TABLES_TO_DB].keys() engine = BaseData.get_engine( os.path.join(config[ConfigKeys.DATABASES][ConfigKeys.working_dir], Directories.DATABASE), config[ConfigKeys.DATABASES][ConfigKeys.rel_work_dir] + ".db") sess = BaseData.get_session_from_engine(engine) for tbl_name in tables_in_database: TableClass = ClassManager.get_class_orm(tbl_name, engine) UserClass = type(tbl_name, (Record, ), {}) mapper(UserClass, TableClass) for record in sess.query(UserClass).all(): record.location = os.path.join(abs_path_working_dir, Directories.DATABASE, tbl_name) sess.commit() if not integrity_cancel: integrity_check(abs_path_working_dir, tbl_name, "None", silent) if not silent: _move_project_display_message_epilogue()