def __init__(self, biochem_root='../../Biochemistry/', rxns_file='reactions.tsv'): self.BiochemRoot = biochem_root self.RxnsFile = biochem_root + rxns_file self.AliasFile = biochem_root + "Aliases/Reactions_Aliases.tsv" reader = DictReader(open(self.RxnsFile), dialect='excel-tab') self.Headers = reader.fieldnames from BiochemPy import Compounds self.CompoundsHelper = Compounds() self.Compounds_Dict = self.CompoundsHelper.loadCompounds()
def __init__(self, biochem_root='../../../Biochemistry/', rxns_file='reactions.tsv'): self.BiochemRoot = os.path.dirname(__file__) + '/' + biochem_root self.RxnsFile = self.BiochemRoot + rxns_file self.AliasFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Aliases.txt" self.NameFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Names.txt" self.PwyFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Pathways.txt" self.ECFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_ECs.txt" reader = DictReader(open(self.RxnsFile), dialect='excel-tab') self.Headers = reader.fieldnames from BiochemPy import Compounds self.CompoundsHelper = Compounds() self.Compounds_Dict = self.CompoundsHelper.loadCompounds()
if (inchikey not in mnx_inchikey_dict): mnx_inchikey_dict[inchikey] = mnx inchikey = "-".join(inchikey.split('-')[0:2]) if (inchikey not in mnx_inchikey_dict): mnx_inchikey_dict[inchikey] = mnx inchikey = inchikey.split('-')[0] if (inchikey not in mnx_inchikey_dict): mnx_inchikey_dict[inchikey] = mnx file_handle.close() #Here we can cross-check the structures that are in ModelSEED to find ones where #there is a match in eQuilibrator compounds_helper = Compounds() structures_dict = compounds_helper.loadStructures(["InChIKey"], ["ModelSEED"]) seed_mnx_structural_map = dict() for cpd in structures_dict: structure_type = 'InChIKey' if (structure_type not in structures_dict[cpd]): #The load structures function will return all compounds, so have to #Check that the structure is there continue #As these are unique structures, i.e. 1-1 mapping with compound id, #there's only ever one in each list for each compound structure = list(structures_dict[cpd][structure_type].keys())[0] #Here we check on three levels, we check the full string #Then the deprotonated string, then the structure alone
#!/usr/bin/env python import os, sys, re temp = list() header = 1 from BiochemPy import Compounds import pybel from rdkit.Chem import AllChem from rdkit import RDLogger lg = RDLogger.logger() lg.setLevel(RDLogger.ERROR) #Load Structures and Aliases CompoundsHelper = Compounds() Structures_Dict = CompoundsHelper.loadStructures(["SMILE", "InChI"], ["KEGG", "MetaCyc"]) Structures_Root = os.path.dirname(__file__) + "/../../Biochemistry/Structures/" file_handle_dict = dict() for source in "KEGG", "MetaCyc": for struct_type in "InChI", "SMILE": for struct_stage in "Charged", "Original": file_string = "_".join((source, struct_type, struct_stage)) file_name = Structures_Root + source + "/" + struct_type + "_" + struct_stage + "_Formulas_Charges.txt" file_handle_dict[file_string] = open(file_name, "w") resolved_structures = open('Resolved_Structures.txt', 'w') unresolved_structures = open('Unresolved_Structures.txt', 'w') for struct_type in sorted(Structures_Dict.keys()):