def construct_asm_lookup_table(metadata_db_name, metadata_db_user, metadata_db_host): with get_pg_connection_handle( metadata_db_name, metadata_db_user, metadata_db_host) as metadata_connection_handle: asm_lookup_table_name = "dbsnp_ensembl_species.EVA2015_snpmapinfo_asm_lookup" create_asm_lookup_table(metadata_connection_handle, asm_lookup_table_name) for species_info in get_species_info(metadata_connection_handle): for snpmapinfo_table_name in get_snpmapinfo_table_names_for_species( species_info): distinct_asm, type_of_asm = get_distinct_asm_with_overweight_snps_in_snpmapinfo_table\ (snpmapinfo_table_name, species_info) for asm in distinct_asm: resolved_GCA_accession = resolve_asm_to_GCA_accession( asm, type_of_asm) query = "insert into {0} values ('{1}', '{2}', '{3}', '{4}') " \ "on conflict on constraint unique_constraint do nothing"\ .format(asm_lookup_table_name , species_info["database_name"] , snpmapinfo_table_name , asm , resolved_GCA_accession) logger.info("Executing query: " + query) execute_query(metadata_connection_handle, query) metadata_connection_handle.commit()
def fill_in_table_from_remapping(private_config_xml_file, release_version, reference_directory): query_retrieve_info = ( "select taxonomy, scientific_name, assembly_accession, string_agg(distinct source, ', '), sum(num_ss_ids)" "from eva_progress_tracker.remapping_tracker " f"where release_version={release_version} " "group by taxonomy, scientific_name, assembly_accession") with get_metadata_connection_handle("development", private_config_xml_file) as pg_conn: for taxonomy, scientific_name, assembly_accession, sources, num_ss_id in get_all_results_for_query( pg_conn, query_retrieve_info): if num_ss_id == 0: # Do not release species with no data continue should_be_clustered = True should_be_released = True ncbi_assembly = NCBIAssembly(assembly_accession, scientific_name, reference_directory) fasta_path = ncbi_assembly.assembly_fasta_path report_path = ncbi_assembly.assembly_report_path tempmongo_instance = get_tempmongo_instance(pg_conn, taxonomy) release_folder_name = normalise_taxon_scientific_name( scientific_name) query_insert = ( 'INSERT INTO eva_progress_tracker.clustering_release_tracker ' '(sources, taxonomy, scientific_name, assembly_accession, release_version, should_be_clustered, ' 'fasta_path, report_path, tempmongo_instance, should_be_released, release_folder_name) ' f"VALUES ('{sources}', {taxonomy}, '{scientific_name}', '{assembly_accession}', {release_version}, " f"{should_be_clustered}, '{fasta_path}', '{report_path}', '{tempmongo_instance}', {should_be_released}, " f"'{release_folder_name}') ON CONFLICT DO NOTHING") execute_query(pg_conn, query_insert)
def get_dates_from_variant_warehouse(assembly, project, analysis_filename, metadata_handle, mongo_handle, database_name): logger.info("Database name to query: {0}".format(database_name)) logger.info( "Getting counts from files collection for {0}, {1}, {2}".format( assembly, project, analysis_filename)) files_collection = mongo_handle[database_name]['files_2_0'] where_clause = [] for analysis, filename in analysis_filename.items(): where = {'sid': project, 'fid': analysis, 'fname': filename} where_clause.append(where) cursor_files = files_collection.find({"$or": where_clause}, { "fid": 1, "sid": 1, "date": 1, "_id": 0 }) for dates in cursor_files: analysis = dates['fid'] date = dates['date'] logger.info("Update counts for {0}, {1}, {2}".format( assembly, project, analysis)) update_date_query = "update eva_stats.stats " \ "set date_processed = '{3}'" \ "where assembly_accession = '{0}'" \ "and project_accession = '{1}'" \ "and analysis_accession = '{2}'".format(assembly, project, analysis, date) execute_query(metadata_handle, update_date_query)
def update_assembly_set_in_analysis(self): taxonomy = self.eload_cfg.query('submission', 'taxonomy_id') analyses = self.eload_cfg.query('submission', 'analyses') with self.metadata_connection_handle as conn: for analysis_alias, analysis_data in analyses.items(): assembly_accession = analysis_data['assembly_accession'] assembly_set_id = get_assembly_set(conn, taxonomy, assembly_accession) analysis_accession = self.eload_cfg.query( 'brokering', 'ena', 'ANALYSIS', analysis_alias) # Check if the update is needed check_query = ( f"select assembly_set_id from evapro.analysis " f"where analysis_accession = '{analysis_accession}';") res = get_all_results_for_query(conn, check_query) if res and res[0][0] != assembly_set_id: if res[0][0]: self.error( f'Previous assembly_set_id {res[0][0]} for {analysis_accession} was wrong and ' f'will be updated to {assembly_set_id}') analysis_update = ( f"update evapro.analysis " f"set assembly_set_id = '{assembly_set_id}' " f"where analysis_accession = '{analysis_accession}';") execute_query(conn, analysis_update)
def create_multimap_snp_table_and_indices(metadata_connection_handle, dbsnp_species_name, species_info): union_of_snpmapinfo_tables_query = " union all ".join([ "select snp_id, weight, {0} as assembly from dbsnp_{1}.{2} where weight > 1" .format( "||'.'||".join(get_snpmapinfo_asm_columns(species_info, table_name)), dbsnp_species_name, table_name) for table_name in get_snpmapinfo_tables_with_overweight_snps_for_dbsnp_species( metadata_connection_handle, dbsnp_species_name) ]) if len(union_of_snpmapinfo_tables_query) > 0: multimap_snp_table_name = "multimap_snps" table_creation_query = """ create table if not exists dbsnp_{0}.{1} as (select distinct * from ({2}) temp); """.format(dbsnp_species_name, multimap_snp_table_name, union_of_snpmapinfo_tables_query) with get_db_conn_for_species( species_info) as species_connection_handle: logger.info("Executing query: " + table_creation_query) execute_query(species_connection_handle, table_creation_query) for column in ["snp_id", "weight", "assembly"]: create_index_on_table(species_connection_handle, "dbsnp_" + dbsnp_species_name, multimap_snp_table_name, [column]) vacuum_analyze_table(species_connection_handle, "dbsnp_" + dbsnp_species_name, multimap_snp_table_name, [column]) execute_query( species_connection_handle, "grant select on dbsnp_{0}.{1} to dbsnp_ro".format( dbsnp_species_name, multimap_snp_table_name))
def insert_file_into_evapro(file_dict): filename = os.path.basename(file_dict['filename']) if file_dict['filename'].endswith( '.vcf.gz') or file_dict['filename'].endswith('.vcf'): file_type = 'vcf' elif file_dict['filename'].endswith('.tbi'): file_type = 'tabix' else: raise ValueError('Unsupported file type') ftp_file = 'ftp.sra.ebi.ac.uk/vol1/' + file_dict['filename'] query = ( 'insert into file ' '(filename, file_md5, file_type, file_class, file_version, is_current, file_location, ftp_file) ' f"values ('{filename}', '{file_dict['md5']}', '{file_type}', 'submitted', 1, 1, " f"'scratch_folder', '{ftp_file}')") with get_metadata_connection_handle(cfg['maven']['environment'], cfg['maven']['settings_file']) as conn: logger.info(f'Create file {filename} in the file table') execute_query(conn, query) file_id = get_file_id_from_md5(file_dict['md5']) query = ( f'update file set ena_submission_file_id={file_id} where file_id={file_id}' ) with get_metadata_connection_handle(cfg['maven']['environment'], cfg['maven']['settings_file']) as conn: logger.info(f'Add file id in place of the ena_submission_file_id') execute_query(conn, query) return file_id
def insert_browsable_files(self): with self.metadata_connection_handle as conn: # insert into browsable file table, if files not already there files_query = ( f"select file_id, ena_submission_file_id,filename,project_accession,assembly_set_id " f"from evapro.browsable_file " f"where project_accession = '{self.project_accession}';") rows_in_table = get_all_results_for_query(conn, files_query) find_browsable_files_query = ( "select file.file_id,ena_submission_file_id,filename,project_accession,assembly_set_id " "from (select * from analysis_file af " "join analysis a on a.analysis_accession = af.analysis_accession " "join project_analysis pa on af.analysis_accession = pa.analysis_accession " f"where pa.project_accession = '{self.project_accession}' ) myfiles " "join file on file.file_id = myfiles.file_id where file.file_type ilike 'vcf';" ) rows_expected = get_all_results_for_query(conn, files_query) if len(rows_in_table) > 0: if set(rows_in_table) == set(rows_expected): self.info('Browsable files already inserted, skipping') else: self.warning( f'Found {len(rows_in_table)} browsable file rows in the table but they are different ' f'from the expected ones: ' f'{os.linesep + os.linesep.join([str(row) for row in rows_expected])}' ) else: self.info('Inserting browsable files...') insert_query = ( "insert into browsable_file (file_id,ena_submission_file_id,filename,project_accession," "assembly_set_id) " + find_browsable_files_query) execute_query(conn, insert_query)
def create_table_to_collect_assembly_report_genbank_contigs(private_config_xml_file): with psycopg2.connect(get_pg_metadata_uri_for_eva_profile("development", private_config_xml_file), user="******") \ as metadata_connection_handle: create_table_to_store_asm_report_contigs_query = "create table if not exists {0} " \ "(assembly_accession text, contig_accession text, " \ "chromosome_name text)".format(asm_report_contigs_table_name) execute_query(metadata_connection_handle, create_table_to_store_asm_report_contigs_query)
def get_counts_from_variant_warehouse(assembly, project, analyses, metadata_handle, mongo_handle, database_name): logger.info("Database name to query: {0}".format(database_name)) logger.info( "Getting counts from variants collection for {0}, {1}, {2}".format( assembly, project, analyses)) variants_collection = mongo_handle[database_name]['variants_2_0'] pipeline = [{ "$match": { "files.sid": project, "files.fid": { "$in": analyses } } }, { "$project": { "_id": 0, "files.fid": 1 } }, { "$unwind": "$files" }, { "$unwind": "$files.fid" }, { "$match": { "files.fid": { "$in": analyses } } }, { "$group": { "_id": "$files.fid", "count": { "$sum": 1 } } }, { "$project": { "_id": 0, "files.fid": "$_id", "count": 1 } }] cursor_variants = variants_collection.aggregate(pipeline=pipeline, allowDiskUse=True) for stat in cursor_variants: count = stat['count'] analysis = stat['files']['fid'] logger.info("Update counts for {0}, {1}, {2}".format( assembly, project, analysis)) update_counts_query = "update eva_stats.stats " \ "set variants_variant_warehouse = {3}" \ "where assembly_accession = '{0}'" \ "and project_accession = '{1}'" \ "and analysis_accession = '{2}'".format(assembly, project, analysis, count) execute_query(metadata_handle, update_counts_query)
def update_browsable_files_with_date(self): with self.metadata_connection_handle as conn: # update loaded and release date release_date = self.eload_cfg.query('brokering', 'ena', 'hold_date') release_update = f"update evapro.browsable_file " \ f"set loaded = true, eva_release = '{release_date.strftime('%Y%m%d')}' " \ f"where project_accession = '{self.project_accession}';" execute_query(conn, release_update)
def create_asm_lookup_table(metadata_connection_handle, asm_lookup_table_name): asm_lookup_table_creation_query = "create table if not exists " \ "{0} " \ "(database_name text, snpmapinfo_table_name text, " \ "assembly text, assembly_accession text, " \ "constraint unique_constraint unique(database_name, snpmapinfo_table_name, " \ "assembly))".format(asm_lookup_table_name) execute_query(metadata_connection_handle, asm_lookup_table_creation_query) metadata_connection_handle.commit()
def set_status_failed(self, assembly, taxid): query = ( 'UPDATE eva_progress_tracker.remapping_tracker ' f"SET remapping_status = 'Failed' " f"WHERE origin_assembly_accession='{assembly}' AND taxonomy='{taxid}'" ) with get_metadata_connection_handle( cfg['maven']['environment'], cfg['maven']['settings_file']) as pg_conn: execute_query(pg_conn, query)
def update_rs_count_for_taxonomy_assembly(pg_conn, rs_count, taxonomy, assembly): logger.info( f'updating rs count({rs_count}) for taxonomy({taxonomy}) and assembly({assembly})' ) query_update = ( f"""UPDATE eva_progress_tracker.clustering_release_tracker SET num_rs_to_release={rs_count} WHERE taxonomy={taxonomy} and assembly_accession='{assembly}'""" ) execute_query(pg_conn, query_update)
def create_table_to_collect_possible_assemblies(private_config_xml_file): with psycopg2.connect(get_pg_metadata_uri_for_eva_profile("development", private_config_xml_file), user="******") \ as metadata_connection_handle: create_table_to_store_possible_assemblies_query = "create table if not exists {0} " \ "(genbank_accession text, assembly_accession text, " \ "primary key (genbank_accession, assembly_accession))"\ .format(possible_assemblies_table_name) execute_query(metadata_connection_handle, create_table_to_store_possible_assemblies_query)
def create_table_accession_counts(private_config_xml_file): with psycopg2.connect(get_pg_metadata_uri_for_eva_profile( "development", private_config_xml_file), user="******") as metadata_connection_handle: query_create_table = ( 'CREATE TABLE IF NOT EXISTS eva_stats.submitted_variants_load_counts ' '(source TEXT, taxid INTEGER, assembly_accession TEXT, project_accession TEXT, date_loaded TIMESTAMP, ' 'number_submitted_variants BIGINT NOT NULL, ' 'primary key(taxid, assembly_accession, project_accession, date_loaded))' ) execute_query(metadata_connection_handle, query_create_table)
def insert_file_analysis_into_evapro(file_dict): query = ( f"insert into analysis_file (ANALYSIS_ACCESSION,FILE_ID) " f"values ({file_dict['file_id']}, '{file_dict['analysis_accession']}')" ) with get_metadata_connection_handle(cfg['maven']['environment'], cfg['maven']['settings_file']) as conn: logger.info( f"Create file {file_dict['file_id']} in the analysis_file table for '{file_dict['analysis_accession']}'" ) execute_query(conn, query)
def remove_file_from_analysis(file_dict): query = ( f"delete from analysis_file " f"where file_id={file_dict['file_id']} and analysis_accession='{file_dict['analysis_accession']}'" ) with get_metadata_connection_handle(cfg['maven']['environment'], cfg['maven']['settings_file']) as conn: logger.info( f"Remove file {file_dict['file_id']} from the analysis_file table for '{file_dict['analysis_accession']}'" ) execute_query(conn, query)
def create_table_for_count_validation(private_config_xml_file): with psycopg2.connect(get_pg_metadata_uri_for_eva_profile( "development", private_config_xml_file), user="******") as metadata_connection_handle: query_create_table_for_count_validation = "create table if not exists {0} " \ "(mongo_host text, database text, collection text, " \ "document_count bigint not null, report_time timestamp, " \ "primary key(mongo_host, database, collection, report_time))" \ .format(mongo_migration_count_validation_table_name) execute_query(metadata_connection_handle, query_create_table_for_count_validation)
def write_counts_to_table(private_config_xml_file, counts): all_columns = counts[0].keys() all_values = [ f"({','.join(str(species_counts[c]) for c in all_columns)})" for species_counts in counts ] insert_query = f"insert into {species_table_name} " \ f"({','.join(all_columns)}) " \ f"values {','.join(all_values)}" with get_metadata_connection_handle('development', private_config_xml_file) as db_conn: execute_query(db_conn, insert_query)
def set_clustering_status(private_config_xml_file, clustering_tracking_table, assembly, tax_id, release_version, status): now = datetime.datetime.now().isoformat() update_status_query = f"UPDATE {clustering_tracking_table} " update_status_query += f"SET clustering_status='{status}'" if status == 'Started': update_status_query += f", clustering_start='{now}'" elif status == 'Completed': update_status_query += f", clustering_end='{now}'" update_status_query += (f" WHERE assembly_accession='{assembly}' AND taxonomy='{tax_id}' " f"AND release_version={release_version}") with get_metadata_connection_handle("development", private_config_xml_file) as metadata_connection_handle: execute_query(metadata_connection_handle, update_status_query)
def create_table_to_collect_mongo_genbank_contigs(private_config_xml_file): with psycopg2.connect(get_pg_metadata_uri_for_eva_profile("development", private_config_xml_file), user="******") \ as metadata_connection_handle: create_table_to_store_asm_report_contigs_query = "create table if not exists {0} " \ "(source text, assembly_accession text, " \ "study text, contig_accession text, chromosome_name text, " \ "num_entries_in_db bigint, is_contig_in_asm_report boolean, " \ "primary key(source, assembly_accession, study, " \ "contig_accession))"\ .format(mongo_genbank_contigs_table_name) execute_query(metadata_connection_handle, create_table_to_store_asm_report_contigs_query)
def create_table_for_progress(private_config_xml_file): with psycopg2.connect(get_pg_metadata_uri_for_eva_profile( "development", private_config_xml_file), user="******") as metadata_connection_handle: query_create_table = ( 'CREATE TABLE IF NOT EXISTS remapping_progress ' '(source TEXT, taxid INTEGER, scientific_name TEXT, assembly_accession TEXT, number_of_study INTEGER NOT NULL,' 'number_submitted_variants BIGINT NOT NULL, release_number INTEGER, `target_assembly_accession` TEXT, ' 'report_time TIMESTAMP DEFAULT NOW(), progress_status TEXT, start_time TIMESTAMP, ' 'completion_time TIMESTAMP, remapping_version TEXT, nb_variant_extracted INTEGER, ' 'nb_variant_remapped INTEGER, nb_variant_ingested INTEGER, ' 'primary key(source, taxid, assembly_accession, release_number))') execute_query(metadata_connection_handle, query_create_table)
def create_stats_table(private_config_xml_file, ftp_table_name): with get_metadata_connection_handle( 'development', private_config_xml_file) as metadata_connection_handle: query_create_table = ( f'CREATE TABLE IF NOT EXISTS {ftp_table_name} ' '(_index TEXT, _id TEXT, event_ts_txt TEXT, event_ts TIMESTAMP, host TEXT, uhost TEXT,' ' request_time TEXT, request_year INTEGER, request_ts TIMESTAMP,' ' file_name TEXT, file_size BIGINT, transfer_time INTEGER,' ' transfer_type CHAR, direction CHAR, special_action CHAR(4), access_mode CHAR,' ' country CHAR(2), region TEXT, city TEXT, domain_name TEXT, isp TEXT, usage_type TEXT,' ' primary key(_index, _id))') execute_query(metadata_connection_handle, query_create_table)
def update_files_with_ftp_path(self): files_query = f"select file_id, filename from evapro.browsable_file " \ f"where project_accession = '{self.project_accession}';" with self.metadata_connection_handle as conn: # update FTP file paths rows = get_all_results_for_query(conn, files_query) if len(rows) == 0: raise ValueError('Something went wrong with loading from ENA') for file_id, filename in rows: ftp_update = f"update evapro.file " \ f"set ftp_file = '/ftp.ebi.ac.uk/pub/databases/eva/{self.project_accession}/{filename}' " \ f"where file_id = '{file_id}';" execute_query(conn, ftp_update)
def create_table_for_progress(private_config_xml_file): with get_metadata_connection_handle( "development", private_config_xml_file) as metadata_connection_handle: query_create_table = ( 'CREATE TABLE IF NOT EXISTS eva_progress_tracker.remapping_tracker ' '(source TEXT, taxonomy INTEGER, scientific_name TEXT, origin_assembly_accession TEXT, num_studies INTEGER NOT NULL,' 'num_ss_ids BIGINT NOT NULL, release_version INTEGER, assembly_accession TEXT, ' 'remapping_report_time TIMESTAMP DEFAULT NOW(), remapping_status TEXT, remapping_start TIMESTAMP, ' 'remapping_end TIMESTAMP, remapping_version TEXT, num_ss_extracted INTEGER, ' 'num_ss_remapped INTEGER, num_ss_ingested INTEGER, ' 'primary key(source, taxonomy, origin_assembly_accession, release_version))' ) execute_query(metadata_connection_handle, query_create_table)
def insert_into_stats(metadata_handle, row): assembly, project, analysis, file_id, filename, file_type, taxonomy_id = row check_exist_query = "select * from eva_stats.stats where assembly_accession = '{0}' and project_accession = " \ "'{1}' and analysis_accession = '{2}'".format(assembly, project, analysis) if len(get_all_results_for_query(metadata_handle, check_exist_query)) == 0: insert_query = "insert into eva_stats.stats(assembly_accession, project_accession, analysis_accession, " \ "file_id, filename, file_type, taxonomy_id) values ('{0}','{1}','{2}','{3}','{4}','{5}', " \ "{6})".format(assembly, project, analysis, file_id, filename, file_type, taxonomy_id) logger.info("Insert data for {0}, {1}, {2} in eva_stats table".format( assembly, project, analysis)) execute_query(metadata_handle, insert_query) else: logger.info("Already exists {0}, {1}, {2} in eva_stats table".format( assembly, project, analysis))
def fill_in_from_previous_inventory(private_config_xml_file, release_version): query = ("select taxonomy_id, scientific_name, assembly, sources, total_num_variants, release_folder_name " "from dbsnp_ensembl_species.release_species_inventory where sources='DBSNP - filesystem' and release_version=2") with get_metadata_connection_handle("production", private_config_xml_file) as pg_conn: for taxonomy, scientific_name, assembly, sources, total_num_variants, release_folder_name in get_all_results_for_query(pg_conn, query): should_be_clustered = False should_be_released = False query_insert = ( 'INSERT INTO eva_progress_tracker.clustering_release_tracker ' '(sources, taxonomy, scientific_name, assembly_accession, release_version, should_be_clustered, ' 'should_be_released, release_folder_name) ' f"VALUES ('{sources}', {taxonomy}, '{scientific_name}', '{assembly}', {release_version}, " f"{should_be_clustered}, {should_be_released}, '{release_folder_name}') ON CONFLICT DO NOTHING" ) execute_query(pg_conn, query_insert)
def insert_new_entry_for_taxonomy_assembly(pg_conn, sources, rs_count, release_version, taxonomy, assembly, reference_directory): logger.info(f'inserting rs count({rs_count}) for taxonomy({taxonomy}) and assembly({assembly})') scientific_name = get_scientific_name(pg_conn, taxonomy) release_folder_name = normalise_taxon_scientific_name(scientific_name) ncbi_assembly = NCBIAssembly(assembly, scientific_name, reference_directory) fasta_path = ncbi_assembly.assembly_fasta_path report_path = ncbi_assembly.assembly_report_path tempmongo_instance = get_tempmongo_instance(pg_conn, taxonomy) should_be_clustered = False should_be_released = True query_insert = ( 'INSERT INTO eva_progress_tracker.clustering_release_tracker ' '(sources, taxonomy, scientific_name, assembly_accession, release_version, should_be_clustered, ' 'fasta_path, report_path, tempmongo_instance, should_be_released, release_folder_name) ' f"VALUES ('{sources}', {taxonomy}, '{scientific_name}', '{assembly}', {release_version}, " f"{should_be_clustered}, '{fasta_path}', '{report_path}', '{tempmongo_instance}', {should_be_released}, " f"'{release_folder_name}') ON CONFLICT DO NOTHING") execute_query(pg_conn, query_insert)
def store_accessioning_counts(mongo_handle, metadata_handle, collection_name, provided_assemblies): collection = mongo_handle["eva_accession_sharded"][collection_name] assembly_field = "asm" if collection_name == "clusteredVariantEntity" else "seq" stats_table = "rs_stats" if collection_name == "clusteredVariantEntity" else "ss_stats" assemblies = provided_assemblies if provided_assemblies else collection.distinct(assembly_field) for assembly in assemblies: pipeline = [ {"$match": {assembly_field: assembly}}, {"$group": {"_id": assembly, "count": {"$sum": 1}}} ] cursor_stat = collection.aggregate(pipeline=pipeline, allowDiskUse=True) for stat in cursor_stat: logger.info(stat) assembly = stat["_id"] count = stat["count"] query = "insert into eva_stats.{2} values ('{0}', {1}) " \ "on conflict(assembly) do update set num_variants = {1}".format(assembly, count, stats_table) execute_query(metadata_handle, query)
def update_loaded_assembly_in_browsable_files(self): # find assembly associated with each browseable file and copy it to the browsable file table query = ( 'select bf.file_id, a.vcf_reference_accession ' 'from analysis a ' 'join analysis_file af on a.analysis_accession=af.analysis_accession ' 'join browsable_file bf on af.file_id=bf.file_id ' f"where bf.project_accession='{self.project_accession}';") with self.metadata_connection_handle as conn: rows = get_all_results_for_query(conn, query) if len(rows) == 0: raise ValueError('Something went wrong with loading from ENA') # Update each file with its associated assembly accession for file_id, assembly_accession in rows: ftp_update = f"update evapro.browsable_file " \ f"set loaded_assembly = '{assembly_accession}' " \ f"where file_id = '{file_id}';" execute_query(conn, ftp_update)