def run_pgdb_to_sbml(species_multiprocess_data): """Turn PGDBs into SBML2 using multi-processing. Args: species_multiprocess_data (list): pathname to species pgdb dir, pathname to species sbml file Returns: sbml_check (bool): Check if sbml file exists """ species_pgdb_dir = species_multiprocess_data[0] species_sbml_file = species_multiprocess_data[1] sbml_level = species_multiprocess_data[2] noorphan_bool = species_multiprocess_data[3] padmet_file_dir = species_multiprocess_data[4] padmet = pgdb_to_padmet.from_pgdb_to_padmet( pgdb_folder=species_pgdb_dir, extract_gene=True, no_orphan=noorphan_bool) if padmet_file_dir: padmet.generateFile(padmet_file_dir) sbmlGenerator.padmet_to_sbml(padmet, species_sbml_file, sbml_lvl=sbml_level, verbose=False) sbml_check = utils.is_valid_path(species_sbml_file) return sbml_check
def reduce_network(padmet_file:str, empty_padmet:str, reaction_list:list, sbml_output:str, del_cof:bool=False): """Create a sbml starting with the desired reactions. Args: padmet_file (str): path to padmet containing all reactions empty_padmet (str): path to empty padmet that will be filled reaction_list (list): list of reactions to be retrieved sbml_output (str): path to sbml file to be written """ p_ref = PadmetRef(padmet_file) p_spec = PadmetSpec(empty_padmet) # retrieve reactions from a given pathway # rxn_list = [rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction"] reaction_list = [convert_from_coded_id(i)[0] for i in reaction_list] for rxn_id in reaction_list: p_spec.copyNode(p_ref, rxn_id) # p_spec.generateFile("plop.padmet") cofactor_list = [convert_from_coded_id(i)[0] for i in COFACTORS] if del_cof: for rxn_id in reaction_list: cof_linked_rlt = [rlt for rlt in p_spec.dicOfRelationIn[rxn_id] if rlt.id_out in cofactor_list] for rel in cof_linked_rlt: p_spec._delRelation(rel) padmet_to_sbml(p_spec, sbml_output, sbml_lvl=3, verbose=True) return
def test_sbml_to_sbml(): fabo_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) padmet_to_sbml(fabo_padmetSpec, 'fabo.sbml') from_sbml_to_sbml('fabo.sbml', 'fabo_2.sbml', 2, cpu=1) sbml_3_genes, sbml_3_compounds, sbml_3_reactions = extract_data_sbml( 'fabo.sbml') sbml_2_genes, sbml_2_compounds, sbml_2_reactions = extract_data_sbml( 'fabo_2.sbml') assert set(FABO_RXNS).issubset(set(sbml_2_reactions)) assert set(FABO_CPDS).issubset(set(sbml_2_compounds)) assert set(FABO_GENES).issubset(set(sbml_2_genes)) assert set(sbml_3_reactions).issubset(set(sbml_2_reactions)) assert set(sbml_3_compounds).issubset(set(sbml_2_compounds)) assert set(sbml_3_genes).issubset(set(sbml_2_genes)) os.remove('fabo.sbml') os.remove('fabo_2.sbml')
def test_flux_analysis(): fabo_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) padmet_to_sbml(fabo_padmetSpec, 'fabo.sbml', obj_fct='KETOACYLCOATHIOL-RXN') flux_analysis('fabo.sbml', seeds_file='test_data/seeds.sbml', targets_file='test_data/targets.sbml', all_species=True) os.remove('fabo.sbml')
def run_merge(run_id, nb_cpu_to_use, verbose, veryverbose=None): if verbose: print('--- Running merge step ---') merge_start_time = time.time() aucome_pool = Pool(nb_cpu_to_use) config_data = parse_config_file(run_id) padmet_from_annotation_path = config_data['padmet_from_annotation_path'] padmet_from_networks_path = config_data['padmet_from_networks_path'] sbml_from_networks_path = config_data['sbml_from_networks_path'] database_path = config_data['database_path'] structural_padmets_path = config_data['structural_padmets_path'] orthofinder_filtered_path = config_data['orthofinder_filtered_path'] orthofinder_padmet_path = config_data['orthofinder_padmet_path'] padmet_from_annotation_path = config_data['padmet_from_annotation_path'] networks_path = config_data['networks_path'] structural_padmets = [padmet for padmet in os.listdir(structural_padmets_path) if padmet.endswith('.padmet')] orthofinder_filtered_padmets = [padmet for padmet in os.listdir(orthofinder_filtered_path) if padmet.endswith('.padmet')] orthofinder_padmets = [padmet for padmet in os.listdir(orthofinder_padmet_path) if padmet.endswith('.padmet')] pathway_tools_padmets = [padmet for padmet in os.listdir(padmet_from_annotation_path) if padmet.endswith('.padmet')] if len(structural_padmets) > 0: padmets = [(padmet, structural_padmets_path + '/' + padmet) for padmet in structural_padmets] elif len(orthofinder_filtered_padmets) > 0: padmets = [(padmet, orthofinder_filtered_path + '/' + padmet) for padmet in orthofinder_filtered_padmets] elif len(orthofinder_padmets) > 0: padmets = [(padmet, orthofinder_padmet_path + '/' + padmet) for padmet in orthofinder_padmets] elif len(pathway_tools_padmets) > 0: padmets = [(padmet, padmet_from_annotation_path + '/' + padmet) for padmet in pathway_tools_padmets] else: sys.exit('No padmets have been created, run reconstruction or workflow.') study_draft_data = [] for study_name, padmet_path in padmets: tmp_study_data = {'padmet_path': padmet_path, 'study_padmet': study_name, 'padmet_from_networks_path': padmet_from_networks_path, 'sbml_from_networks_path': sbml_from_networks_path, 'database_path': database_path, 'verbose': verbose, 'veryverbose': veryverbose} study_draft_data.append(tmp_study_data) aucome_pool.map(create_output, study_draft_data) aucome_pool.close() aucome_pool.join() padmet_to_padmet.padmet_to_padmet(padmet_from_networks_path, networks_path + '/panmetabolism.padmet', verbose=veryverbose) sbmlGenerator.padmet_to_sbml(padmet=networks_path + '/panmetabolism.padmet', output=networks_path + '/panmetabolism.sbml', verbose=veryverbose) merge_end_time = (time.time() - merge_start_time) integer_part, decimal_part = str(merge_end_time).split('.') merge_time = ".".join([integer_part, decimal_part[:3]]) if verbose: print("--- merge step done in: %ss ---" %merge_time)
def test_compare_sbml(): fabo_1_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) fabo_1_padmetSpec.delNode('ACYLCOASYN-RXN') padmet_to_sbml(fabo_1_padmetSpec, 'fabo_1.sbml') fabo_2_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) fabo_2_padmetSpec.delNode('ACYLCOADEHYDROG-RXN') padmet_to_sbml(fabo_2_padmetSpec, 'fabo_2.sbml') compare_multiple_sbml('fabo_1.sbml,fabo_2.sbml', 'output_folder') reactions_fabo_1 = [] reactions_fabo_2 = [] with open('output_folder/reactions.tsv', 'r') as reactions_file: csvreader = csv.reader(reactions_file, delimiter='\t') for row in csvreader: if row[1] == '1': reactions_fabo_1.append(row[0]) if row[2] == '1': reactions_fabo_2.append(row[0]) expected_fabo_1_rxns = [ rxn for rxn in FABO_RXNS if rxn != 'ACYLCOASYN-RXN' ] expected_fabo_2_rxns = [ rxn for rxn in FABO_RXNS if rxn != 'ACYLCOADEHYDROG-RXN' ] assert set(expected_fabo_1_rxns).issubset(set(reactions_fabo_1)) assert set(expected_fabo_2_rxns).issubset(set(reactions_fabo_2)) metabolites_fabo_1 = [] metabolites_fabo_2 = [] with open('output_folder/metabolites.tsv', 'r') as metabolites_file: csvreader = csv.reader(metabolites_file, delimiter='\t') for row in csvreader: if row[1] == '1': metabolites_fabo_1.append( sbmlPlugin.convert_from_coded_id(row[0])[0]) if row[2] == '1': metabolites_fabo_2.append( sbmlPlugin.convert_from_coded_id(row[0])[0]) metabolites_fabo_1 = list(set(metabolites_fabo_1)) metabolites_fabo_2 = list(set(metabolites_fabo_2)) assert set(FABO_CPDS).issubset(set(metabolites_fabo_1 + metabolites_fabo_2)) os.remove('fabo_1.sbml') os.remove('fabo_2.sbml') shutil.rmtree('output_folder')
def create_sbml(tmp_sbml_data): sbml_file = tmp_sbml_data['sbml_file'] padmet_file = tmp_sbml_data['padmet_file'] study_name = tmp_sbml_data['study_name'] verbose = tmp_sbml_data['verbose'] veryverbose = tmp_sbml_data['veryverbose'] if not os.path.isfile(sbml_file) and padmet_file: if verbose: print("Creating sbml from padmet for %s" %study_name) sbmlGenerator.padmet_to_sbml(padmet=padmet_file, output=sbml_file, sbml_lvl=3, verbose=veryverbose)
def test_sbmlGenerator(): fabo_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) padmet_to_sbml(fabo_padmetSpec, 'fabo.sbml') genes, id_compounds, id_reactions = extract_data_sbml('fabo.sbml') assert set(FABO_RXNS).issubset(set(id_reactions)) assert set(FABO_CPDS).issubset(set(id_compounds)) assert set(FABO_GENES).issubset(set(genes)) os.remove('fabo.sbml')
def test_sbml_to_curation_form(): fabo_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) padmet_to_sbml(fabo_padmetSpec, 'fabo.sbml') rxns = ['ACYLCOADEHYDROG-RXN', 'ACYLCOASYN-RXN', 'ENOYL-COA-HYDRAT-RXN'] id_reactions = [ 'R_' + sbmlPlugin.convert_to_coded_id(reaction) for reaction in rxns ] sbml_to_curation('fabo.sbml', id_reactions, 'form.txt') os.remove('fabo.sbml') with open('form.txt', 'r') as form_file: form_str = form_file.read() for rxn in rxns: assert rxn in form_str os.remove('form.txt')
def test_sbml_to_padmet(): fabo_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) padmet_to_sbml(fabo_padmetSpec, 'fabo.sbml') sbml_to_padmetSpec('fabo.sbml', 'fabo.padmet') fabo_padmet = PadmetSpec('fabo.padmet') all_pwys, all_cpds, all_rxns, all_genes = extract_data_padmet(fabo_padmet) assert all_pwys == [] assert set(FABO_RXNS).issubset(set(all_rxns)) assert set(FABO_CPDS).issubset(set(all_cpds)) assert set(FABO_GENES).issubset(set(all_genes)) os.remove('fabo.sbml') os.remove('fabo.padmet')
def create_output(tmp_study_data): padmet_path = tmp_study_data['padmet_path'] study_padmet = tmp_study_data['study_padmet'].replace('.padmet', '').replace('output_pathwaytools_', '') verbose = tmp_study_data['verbose'] veryverbose = tmp_study_data['veryverbose'] padmet_from_networks_path = tmp_study_data['padmet_from_networks_path'] sbml_from_networks_path = tmp_study_data['sbml_from_networks_path'] padmet_ref_path = tmp_study_data['database_path'] if not os.path.exists(padmet_from_networks_path + '/' + study_padmet + '.padmet'): if verbose: print('Create ' + study_padmet +' from ' + padmet_path + ' to ' + padmet_from_networks_path) add_spontaneous_reactions(padmet_path, padmet_ref_path, padmet_from_networks_path + '/' + study_padmet + '.padmet') else: print('There is already a padmet for ' + study_padmet + ' ' + padmet_from_networks_path + '.') if not os.path.exists(sbml_from_networks_path + '/' + study_padmet + '.sbml'): sbmlGenerator.padmet_to_sbml(padmet=padmet_path, output=sbml_from_networks_path + '/' + study_padmet + '.sbml', verbose=veryverbose) else: print('There is already a sbml for ' + study_padmet + ' in ' + sbml_from_networks_path + '.')
def test_extract_rxn_with_gene_assoc(): fabo_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) padmet_to_sbml(fabo_padmetSpec, 'fabo.sbml') # Extract reactions with only genes association so 2.3.1.49-RXN should not be here. extract_rxn_with_gene_assoc('fabo.sbml', 'fabo_rxn_with_genes.sbml', verbose=False) reader = libsbml.SBMLReader() document = reader.readSBML('fabo_rxn_with_genes.sbml') model = document.getModel() reactions = model.getListOfReactions() id_reactions = [ sbmlPlugin.convert_from_coded_id(reaction.id)[0] for reaction in reactions ] os.remove('fabo.sbml') os.remove('fabo_rxn_with_genes.sbml') assert '2.3.1.49-RXN' not in id_reactions
def run_sbml_to_sbml(multiprocess_data): """Turn sbml to sbml. Parameters ---------- multiprocess_data: dict pathname to species sbml file, pathname to output sbml file, new sbml level Returns ------- bool: True if sbml file exists """ padmet_id = os.path.splitext( os.path.basename(multiprocess_data['sbml_output_file']))[0] padmet = sbml_to_padmet(sbml=multiprocess_data['sbml_file'], db=None, version=None, source_tool=None, source_category=None, source_id=None, mapping=None, verbose=None, padmet_id=padmet_id) sbmlGenerator.padmet_to_sbml(padmet, multiprocess_data['sbml_output_file'], sbml_lvl=multiprocess_data['new_sbml_level'], verbose=False) if multiprocess_data['sbml_output_file'] and not os.access( multiprocess_data['sbml_output_file'], os.W_OK): try: open(multiprocess_data['sbml_output_file'], 'w').close() os.unlink(multiprocess_data['sbml_output_file']) return True except OSError: return False else: # path is accessible return True
def main(): args = docopt.docopt(__doc__) output = args["--output"] obj_fct = args["--obj_fct"] mnx_chem_xref = args["--mnx_chem_xref"] mnx_chem_prop = args["--mnx_chem_prop"] sbml_lvl = args["--sbml_lvl"] model_id = args["--model_id"] verbose = args["-v"] if args["--padmet"]: padmet_file = args["--padmet"] if args["--init_source"]: init_source = args["--init_source"] sbmlGenerator.from_init_source(padmet_file, init_source, output, verbose) else: sbmlGenerator.padmet_to_sbml(padmet_file, output, model_id, obj_fct, sbml_lvl, mnx_chem_prop, mnx_chem_xref, verbose) elif args["--reaction"]: padmetRef = args["--padmetRef"] reactions = args["--reaction"] sbmlGenerator.reaction_to_sbml(reactions, output, padmetRef, verbose) elif args["--compound"]: species_compart = args["--compound"] sbmlGenerator.compound_to_sbml(species_compart, output, verbose)
def analysis_on_group(group_name, groups, config_data, pvclust, nb_cpu_to_use, verbose): """Create reaction dendrogram and extract specific reactions using metabolic networks. Args: group_name (str): Name of the group from group_template.tsv. groups (list): All the species inside the group. config_data (dict): Dictionary with all configuration paths. pvclust (boolean): use also pvclust to create reaction dendrogram nb_cpu_to_use (int): number of CPU for multiprocessing verbose (bool): Verbose. """ database_path = config_data['database_path'] padmetRef = PadmetRef(database_path) padmet_from_networks_path = config_data['padmet_from_networks_path'] analysis_path = config_data['analysis_path'] all_padmet_path = [ os.path.join(padmet_from_networks_path, name + ".padmet") for name in groups ] group_analysis_path = analysis_path + '/' + group_name if not os.path.isdir(group_analysis_path): if len(groups) == 1: sys.exit('A group must contain more than one member.') for padmet_path in all_padmet_path: if not os.path.exists(padmet_path): org_name = os.path.splitext(os.path.basename(padmet_path))[0] sys.exit( "Padmet file of organism %s from group %s not found in %s" % (org_name, group_name, padmet_from_networks_path)) # Compare the padmet to create the reactions.tsv file needed to create the reaction dendrogram. compare_padmet.compare_padmet(padmet_path=",".join(all_padmet_path), output=group_analysis_path, padmetRef=padmetRef, verbose=verbose, number_cpu=nb_cpu_to_use) padmet_to_padmet.padmet_to_padmet( ",".join(all_padmet_path), group_analysis_path + '/' + group_name + '_panmetabolism.padmet') sbmlGenerator.padmet_to_sbml(padmet=group_analysis_path + '/' + group_name + '_panmetabolism.padmet', output=group_analysis_path + '/' + group_name + '_panmetabolism.sbml', verbose=verbose) dendrogram_reactions_distance.reaction_figure_creation( reaction_file=group_analysis_path + '/reactions.tsv', output_folder=group_analysis_path + '/dendrogram_output', padmetRef_file=database_path, pvclust=pvclust, verbose=verbose) else: print( group_analysis_path + ' already exists. Delete it if you want to relaunch the analysis.')
parser.add_argument("-s", "--species", help="padmet species", required=True) parser.add_argument("-p", "--pathway", help="pathway name", required=True) parser.add_argument("-o", "--output", help="SBML output filename", required=False) args = parser.parse_args() if args.output: outfile = args.output else: outfile = args.pathway.lower() + ".sbml" p_ref = PadmetRef(args.reference) #reads reference padmet p_spec = PadmetSpec(args.species) #reads organism padmet rxn_list = [ rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction" ] for rxn_id in rxn_list: print(rxn_id) p_spec.copyNode(p_ref, rxn_id) padmet_to_sbml(p_spec, outfile, sbml_lvl=2, verbose=True)