def test_single_attr_file(self): r"""Test of loading each attribute in rrCache and store it in a file. Method: Load a rrCache in 'file' store mode for each single attribute. Then, compare its length with it is supposed to be. """ for attr, length in self.attributes: with self.subTest(attr=attr, length=length): cache = rrCache([attr], logger=self.logger) self.assertEqual(len(cache.get(attr)), length)
def test_all_attr(self): r"""Test of loading all attributes in rrCache and store them in files. Method: Load a full rrCache in 'file' store mode. Then, for each attribute, compare its length with it is supposed to be. """ cache = rrCache(logger=self.logger) for attr, length in self.attributes: with self.subTest(attr=attr, length=length): self.assertEqual(len(cache.get(attr)), length)
def _cli(): parser = build_args_parser( prog='rpcompletion', description= 'Parse RP2 pathways to generate rpSBML collection of unique and complete (cofactors) pathways', m_add_args=add_arguments) args = parser.parse_args() from rptools.__main__ import init logger = init(parser, args) check_args(args.max_subpaths_filter, args.outdir, logger) cache = rrCache(attrs=[ 'rr_reactions', 'template_reactions', 'cid_strc', 'deprecatedCompID_compid', ]) pathways = rp_completion(rp2_metnet=args.rp2_metnet, sink=args.sink, rp2paths_compounds=args.rp2paths_compounds, rp2paths_pathways=args.rp2paths_pathways, cache=cache, upper_flux_bound=int(args.upper_flux_bound), lower_flux_bound=int(args.lower_flux_bound), max_subpaths_filter=args.max_subpaths_filter, logger=logger) # WRITE OUT if not os_path.exists(args.outdir): os_mkdir(args.outdir) # Write out selected pathways for pathway in pathways: pathway.to_rpSBML().write_to_file( os_path.join(args.outdir, 'rp_' + pathway.get_id()) + '.xml') # for pathway_id, sub_pathways in pathways.items(): # for sub_pathway in sub_pathways[-args.max_subpaths_filter:]: # sub_pathway.to_rpSBML().write_to_file( # os_path.join( # args.outdir, # 'rp_'+sub_pathway.get_id() # ) + '.xml' # ) StreamHandler.terminator = "" logger.info('{color}{typo}Results are stored in {rst}'.format( color=fg('white'), typo=attr('bold'), rst=attr('reset'))) StreamHandler.terminator = "\n" logger.info('{color}{outdir}\n'.format(color=fg('grey_70'), outdir=args.outdir))
def __init__(self, logger=None): """Constructor for the inchikeyMIRIAM class """ if logger is None: # Create logger self.logger = logging.getLogger(__name__) self.logger.setLevel(getattr(logging, 'ERROR')) self.logger.formatter = logging.Formatter('%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s') else: self.logger = logger self.logger.info('Started instance of inchikeyMIRIAM') self.cache = rrCache(['deprecatedCID_cid', 'cid_strc', 'chebi_cid'], logger=self.logger) self.deprecatedCID_cid = self.cache.get('deprecatedCID_cid') self.cid_strc = self.cache.get('cid_strc') self.chebi_cid = self.cache.get('chebi_cid')
def _cli(): parser = build_args_parser( prog = 'rpextractsink', description = 'Generate the sink from a model SBML by specifying the compartment', m_add_args = add_arguments ) args = parser.parse_args() from rptools.__main__ import init logger = init(parser, args) cache = rrCache(['cid_strc'], logger=logger) genSink(cache, args.input_sbml, args.output_sink, args.remove_dead_end, args.compartment_id, logger=logger)
def create_rp_compound( df: pd.DataFrame, logger: Logger = getLogger(__name__)) -> pd.DataFrame: '''Associate an rpCompound object with each entry in a dataframe describing medium composition. Use rrCache to retrieve information about inchi, inchi_keys for a compound. :param df: a dataframe describing medium composition. Use a "compound_id" column. :param logger: A logging object to oyyutput information :type df: pd.DataFrame :type logger: Logger :return: a dataframe with an additional "rp_compound" column. :rtype: pd.DataFrame ''' cache = rr_cache.rrCache(['cid_name']) def _create_rp_compound(x, cache): # Init. rp_compound = np.nan d_compound = {} # Search in rrcache. if not pd.isna(x[__MEDIUM_HEADER_COMPOUND_ID]) \ and x[__MEDIUM_HEADER_COMPOUND_ID] in cache.get_list_of_compounds(): d_compound = cache.get_compound(x[__MEDIUM_HEADER_COMPOUND_ID]) # Fmt. if len(d_compound.keys()) > 0: if 'cid' in d_compound.keys(): d_compound['id'] = d_compound.pop('cid') try: rp_compound = rpCompound(**d_compound) except BaseException as e: logger.error(str(e)) return rp_compound df['rp_compound'] = df.apply(_create_rp_compound, axis=1, args=(cache, )) return df
class Test_rpExtractSink(TestCase): data_path = os_path.join( os_path.dirname(__file__), 'data' ) e_coli_model_path_gz = os_path.join( data_path, 'e_coli_model.sbml.gz' ) cache = rrCache( ['cid_strc'] ) def setUp(self): self.logger = create_logger(__name__, 'ERROR') # Create persistent temp folder # to deflate compressed data file so that # it remains reachable outside of this method. # Has to remove manually it in tearDown() method self.temp_d = mkdtemp() self.e_coli_model_path = extract_gz( self.e_coli_model_path_gz, self.temp_d ) def tearDown(self): rmtree(self.temp_d) def test_genSink(self): outfile = NamedTemporaryFile(delete=False) outfile.close() genSink( self.cache, input_sbml = self.e_coli_model_path, output_sink = outfile.name, remove_dead_end = False, compartment_id = 'MNXC3', logger = self.logger ) outfile.close() with open(outfile.name, 'r') as test_f: test_content = test_f.read() with open( os_path.join( self.data_path, 'output_sink.csv' ), 'r' ) as ref_f: ref_content = ref_f.read() self.assertEqual(test_content, ref_content) remove(outfile.name) def test_genSink_rmDE(self): outfile = NamedTemporaryFile(delete=False) outfile.close() genSink( self.cache, input_sbml = self.e_coli_model_path, output_sink = outfile.name, remove_dead_end = True, compartment_id = 'MNXC3' ) outfile.close() with open(outfile.name, 'r') as test_f: test_content = test_f.read() with open( os_path.join( self.data_path, 'output_sink_woDE.csv' ), 'r' ) as ref_f: ref_content = ref_f.read() self.assertEqual(test_content, ref_content) remove(outfile.name)
def rp_completion( rp2_metnet, sink, rp2paths_compounds, rp2paths_pathways, cache: rrCache = None, upper_flux_bound: float = default_upper_flux_bound, lower_flux_bound: float = default_lower_flux_bound, max_subpaths_filter: int = default_max_subpaths_filter, logger: Logger = getLogger(__name__) ) -> List[rpPathway]: """Process to the completion of metabolic pathways generated by RetroPath2.0 and rp2paths. (1) rp2paths generates a sets of master pathways which each of them is a set of chemical transformations. (2) Each chemical transformation refers to one or multiple reaction rule. (3) Each reaction rule comes from one or multiple template (original) chemical reaction The completion consists in: 1. exploring all possible metabolic pathways through steps (2) and (3) 2. putting back chemical species removed during reaction rules building process The completion is done for all master pathways of step (1). Parameters ---------- rp2_metnet: str Path to the file containing the metabolic network sink: str Path to the file containing the list of species in the sink rp2paths_compounds: str Path to the file containing the chemical species involved in master metabolic pathways rp2paths_pathways: str Path to the file containing the master metabolic pathways cache: rrCache, optional Cache that contains reaction rules data upper_flux_bound: float, optional Upper flux bound for all new reactions created (default: default_upper_flux_bound from Args file), lower_flux_bound: float, optional Lower flux bound for all new reactions created (default: default_lower_flux_bound from Args file), max_subpaths_filter: int, optional Number of pathways (best) kept per master pathway (default: 10) logger: Logger, optional Returns ------- List of rpPathway objects """ if cache is None: cache = rrCache( attrs=[ 'rr_reactions', 'template_reactions', 'cid_strc', 'deprecatedCompID_compid', ] ) ## READ __rp2paths_compounds_in_cache( infile=rp2paths_compounds, cache=cache, logger=logger ) pathways, transfos = __read_pathways( infile=rp2paths_pathways, logger=logger ) ec_numbers = __read_rp2_metnet( infile=rp2_metnet, logger=logger ) sink_molecules = __read_sink( infile=sink, logger=logger ) # COMPLETE TRANSFORMATIONS full_transfos = __complete_transformations( transfos=transfos, ec_numbers=ec_numbers, cache=cache, logger=logger ) # GENERATE THE COMBINATORY OF SUB-PATHWAYS # Build pathways over: # - multiple reaction rules per transformation (TRS) and # - multiple template reactions per reaction rule pathway_combinatorics = __build_pathway_combinatorics( full_transfos, pathways, cache=cache, logger=logger ) # BUILD + RANK SUB-PATHWAYS all_pathways = __build_all_pathways( pathways=pathway_combinatorics, transfos=full_transfos, sink_molecules=sink_molecules, rr_reactions=cache.get('rr_reactions'), compounds_cache=cache.get('cid_strc'), max_subpaths_filter=max_subpaths_filter, lower_flux_bound=lower_flux_bound, upper_flux_bound=upper_flux_bound, logger=logger ) return all_pathways
class Test_rpCompletion(TestCase): __test__ = False def setUp(self): self.logger = create_logger(__name__, 'ERROR') # def test_rp_completion(self): # with TemporaryDirectory() as temp_d: # temp_d = '/tmp/joan20' # result = rp_completion( # self.cache, # self.rp2_pathways, # self.rp2paths_compounds, # self.rp2paths_pathways, # temp_d, # upper_flux_bound=999999, # lower_flux_bound=0, # max_subpaths_filter=10, # pathway_id='rp_pathway', # compartment_id='MNXC3', # species_group_id='central_species', # sink_species_group_id='rp_sink_species', # pubchem_search=False, # logger=self.logger # ) # # Useless to sort files since smiles could be equivalent and not equal, then checksum will be different # for file in listdir(temp_d): # self.assertEqual( # self.files[file], # Path(os_path.join(temp_d, file)).stat().st_size # ) # rpsbml = rpSBML(os_path.join(temp_d, self.rpsbml_xml)) # # print(json_dumps(rpsbml.toDict(), indent=4)) # # self.assertTrue(False) # # exit() # with open(self.rpsbml_json, 'r') as f: # self.assertDictEqual(rpsbml.toDict(), json_load(f)) # # self.assertEqual(os_stat(os_path.join(temp_d, file)).st_size, size) cache = rrCache() data_path = os_path.join(os_path.dirname(__file__), 'data', 'lycopene') rp2_pathways = os_path.join(data_path, '1-rp2_pathways.csv') rp2paths_compounds = os_path.join(data_path, '2-rp2paths_compounds.tsv') rp2paths_pathways = os_path.join(data_path, '3-rp2paths_pathways.csv') test_file_pattern = 'rp_002_0022' rpsbml_xml = test_file_pattern + '_sbml.xml' rpsbml_json = os_path.join(data_path, 'refs', test_file_pattern + '.json') files = { 'rp_001_0011_sbml.xml': 32217, 'rp_001_0001_sbml.xml': 32501, 'rp_001_0006_sbml.xml': 32086, 'rp_002_0012_sbml.xml': 32214, 'rp_002_0022_sbml.xml': 32465, 'rp_002_0002_sbml.xml': 32626, 'rp_003_0001_sbml.xml': 34943, 'rp_003_0002_sbml.xml': 35207, 'rp_003_0010_sbml.xml': 33746, 'rp_003_0131_sbml.xml': 34530, 'rp_003_0132_sbml.xml': 34794, 'rp_003_0140_sbml.xml': 33332, 'rp_003_0261_sbml.xml': 34658, 'rp_003_0262_sbml.xml': 34922, 'rp_003_0270_sbml.xml': 33461, }
def test_get_list_of_reaction_rules(self): cache = rrCache(attrs=None) self.assertTrue('RR-02-f85f00f767901186-16-F' in cache.get_list_of_reaction_rules()) self.assertEqual(len(cache.get_list_of_reaction_rules()), 229862)
def test_get_reaction_rule(self): cache = rrCache(attrs=None) self.assertDictEqual( cache.get_reaction_rule('RR-02-f85f00f767901186-16-F'), self.__RR_02_f85f00f767901186_16_F)
def test_get_list_of_reactions(self): cache = rrCache(attrs=None) self.assertTrue('MNXR94688' in cache.get_list_of_reactions()) self.assertEqual(len(cache.get_list_of_reactions()), 44045)
def test_get_reaction(self): cache = rrCache(attrs=None) self.assertDictEqual(cache.get_reaction('MNXR94688'), self.__MNXR94688)
def test_get_list_of_compounds(self): cache = rrCache(attrs=None) self.assertTrue('MNXM2' in cache.get_list_of_compounds()) self.assertEqual(len(cache.get_list_of_compounds()), 655684)
def test_get_compound(self): cache = rrCache(attrs=None) self.assertDictEqual(cache.get_compound('MNXM2'), self.__MNXM2)
def setUp(self): self.target = rpCompound( id='TARGET_0000000001', smiles='[H]OC(=O)C([H])=C([H])C([H])=C([H])C(=O)O[H]' ) species = { # "TARGET_0000000001": rpCompound( # id="TARGET_0000000001", # smiles="[H]OC(=O)C([H])=C([H])C([H])=C([H])C(=O)O[H]", # inchi="InChI=1S/C6H6O4/c7-5(8)3-1-2-4-6(9)10/h1-4H,(H,7,8)(H,9,10)", # inchikey="TXXHDPDFNKHHGW-UHFFFAOYSA-N" # ), "TARGET_0000000001": self.target, "CMPD_0000000010": rpCompound( id="CMPD_0000000010", smiles="[H]OC(=O)c1c([H])c([H])c(O[H])c(O[H])c1[H]", inchi="InChI=1S/C7H6O4/c8-5-2-1-4(7(10)11)3-6(5)9/h1-3,8-9H,(H,10,11)", inchikey="YQUVCSBJEUQKSH-UHFFFAOYSA-N" ), "MNXM23": rpCompound( id="MNXM23", formula="C3H3O3", smiles="CC(=O)C(=O)O]", inchi="InChI=1S/C3H4O3/c1-2(4)3(5)6/h1H3,(H,5,6)", inchikey="LCTONWCANYUPML-UHFFFAOYSA-N", name="pyruvate" ), "CMPD_0000000025": rpCompound( id="CMPD_0000000025", smiles="[H]OC(=O)c1c([H])c([H])c([H])c(O[H])c1[H]", inchi="InChI=1S/C7H6O3/c8-6-3-1-2-5(4-6)7(9)10/h1-4,8H,(H,9,10)", inchikey="IJFXRHURBJZNAO-UHFFFAOYSA-N" ), "CMPD_0000000003": rpCompound( id="CMPD_0000000003", smiles="[H]Oc1c([H])c([H])c([H])c([H])c1O[H]", inchi="InChI=1S/C6H6O2/c7-5-3-1-2-4-6(5)8/h1-4,7-8H", inchikey="YCIMNLLNPGFGHC-UHFFFAOYSA-N" ), "MNXM337": rpCompound( id="MNXM337", smiles="[H]OC(=O)C(OC1([H])C([H])=C(C(=O)O[H])C([H])=C([H])C1([H])O[H])=C([H])[H]", inchi="InChI=1S/C10H10O6/c1-5(9(12)13)16-8-4-6(10(14)15)2-3-7(8)11/h2-4,7-8,11H,1H2,(H,12,13)(H,14,15)", inchikey="WTFXTQVDAKGDEY-UHFFFAOYSA-N" ), "MNXM2": rpCompound( id="MNXM2", smiles="[H]O[H]", inchi="InChI=1S/H2O/h1H2", inchikey="XLYOFNOQVPJJNP-UHFFFAOYSA-N" ), "MNXM13": rpCompound( id="MNXM13", smiles="O=C=O", inchi="InChI=1S/CO2/c2-1-3", inchikey="CURLTUGMZLYLDI-UHFFFAOYSA-N", formula="CO2", name="CO2" ), "MNXM5": rpCompound( id="MNXM5", smiles="N=C(O)c1ccc[n+](C2OC(COP(=O)(O)OP(=O)(O)OCC3OC(n4cnc5c(N)ncnc54)C(OP(=O)(O)O)C3O)C(O)C2O)c1", inchi="InChI=1S/C21H28N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1-4,7-8,10-11,13-16,20-21,29-31H,5-6H2,(H7-,22,23,24,25,32,33,34,35,36,37,38,39)/p+1", inchikey="XJLXINKUBYWONI-UHFFFAOYSA-O", formula="C21H25N7O17P3", name="NADP(+)" ), "MNXM4": rpCompound( id="MNXM4", smiles="O=O", inchi="InChI=1S/O2/c1-2", inchikey="MYMOFIZGZYHOMD-UHFFFAOYSA-N" ), "MNXM1": rpCompound( id="MNXM1", smiles="[H+]", inchi="InChI=1S/p+1", inchikey="GPRLSGONYQIRFK-UHFFFAOYSA-N" ), "MNXM6": rpCompound( id="MNXM6", smiles="[H]N=C(O[H])C1=C([H])N(C2([H])OC([H])(C([H])([H])OP(=O)(O[H])OP(=O)(O[H])OC([H])([H])C3([H])OC([H])(n4c([H])nc5c(N([H])[H])nc([H])nc54)C([H])(OP(=O)(O[H])O[H])C3([H])O[H])C([H])(O[H])C2([H])O[H])C([H])=C([H])C1([H])[H]", inchi="InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)", inchikey="ACFIXJIJDZMPPO-UHFFFAOYSA-N" ) } self.reactants = { "CMPD_0000000003": 1, "MNXM4": 1 } self.products = { self.target.get_id(): 1, "MNXM1": 2 } self.rxn = rpReaction( id="rxn_4", ec_numbers=[ "1.13.11.1" ], reactants=self.reactants, products=self.products ) self.reactions = [ self.rxn, rpReaction( id="rxn_3", ec_numbers=[ "4.1.1.63" ], reactants={ "CMPD_0000000010": 1, "MNXM1": 1 }, products={ "CMPD_0000000003": 1, "MNXM13": 1 } ), rpReaction( id="rxn_2", ec_numbers=[ "1.14.13.23" ], reactants={ "CMPD_0000000025": 1, "MNXM4": 1, "MNXM6": 1, "MNXM1": 1 }, products={ "CMPD_0000000010": 1, "MNXM2": 1, "MNXM5": 1 } ), rpReaction( id="rxn_1", ec_numbers=[ "4.1.3.45" ], reactants={ "MNXM337": 1 }, products={ "CMPD_0000000025": 1, "MNXM23": 1 } ) ] self.fba = 0.57290585662576 self.thermo = { "dG0_prime": { "value": -884.6296371353768, "error": 9.819227446307337, "units": "kilojoule / mole" }, "dGm_prime": { "value": -884.6296371353768, "error": 9.819227446307337, "units": "kilojoule / mole" }, "dG_prime": { "value": -884.6296371353768, "error": 9.819227446307337, "units": "kilojoule / mole" } } self.rp2_transfo_id = 'TRS_0_0_0' self.rule_ids = ['RR-02-a0cc0be463ff412f-16-F'] self.tmpl_rxn_ids = ['MNXR96458'] self.idx_in_path = 1 self.rule_score = 0.5982208769718989 self.id = 'pathway' cache = rrCache( attrs=[ 'comp_xref', 'deprecatedCompID_compid', ] ) self.compartments = [ { 'id': 'MNXC3', 'name': 'cytosol', 'annot': cache.get('comp_xref')[ cache.get('deprecatedCompID_compid')[ 'MNXC3' ] ] } ] self.parameters = { "upper_flux_bound": { "value": 999999.0, "units": "mmol_per_gDW_per_hr" }, "lower_flux_bound": { "value": 0.0, "units": "mmol_per_gDW_per_hr" } } self.unit_def = { "mmol_per_gDW_per_hr": [ { "kind": 23, "exponent": 1, "scale": -3, "multiplier": 1.0 }, { "kind": 8, "exponent": 1, "scale": 0, "multiplier": 1.0 }, { "kind": 28, "exponent": 1, "scale": 0, "multiplier": 3600.0 } ], "kj_per_mol": [ { "kind": 13, "exponent": 1, "scale": 3, "multiplier": 1.0 }, { "kind": 13, "exponent": -1, "scale": 1, "multiplier": 1.0 } ] } self.pathway = rpPathway( id=self.id, ) self.pathway.set_parameters(self.parameters) self.pathway.set_unit_defs(self.unit_def) self.rxn.set_rp2_transfo_id(self.rp2_transfo_id) self.rxn.set_rule_ids(self.rule_ids) self.rxn.set_tmpl_rxn_ids(self.tmpl_rxn_ids) self.rxn.set_idx_in_path(self.idx_in_path) self.rxn.set_rule_score(self.rule_score) self.pathway.add_reaction(rxn=self.rxn, target_id=self.target.get_id()) for rxn in self.reactions[1:]: self.pathway.add_reaction(rxn) self.sink = ['MNXM23', 'MNXM6', 'MNXM13'] self.pathway.set_sink(self.sink) for key, value in self.thermo.items(): self.pathway.add_thermo_info(key, value) self.pathway.set_fba_fraction(self.fba)