#!/usr/bin/env python import os, sys from BiochemPy import Compounds, Reactions, InChIs compounds_helper = Compounds() compounds_dict = compounds_helper.loadCompounds() reactions_helper = Reactions() reactions_dict = reactions_helper.loadReactions() print("\n================") print( "For Section: \"Computation of thermodynamic properties of ModelSEED compounds and reaction\"\n" ) MS_Complete_Structures = dict() with open("../../../Biochemistry/Structures/Unique_ModelSEED_Structures.txt" ) as fh: for line in fh.readlines(): line = line.strip() array = line.split('\t') if ("InChI" in array[5]): MS_Complete_Structures[array[5]] = 1 MNX_Complete_Structures = dict() with open("../../../Biochemistry/Structures/MetaNetX/chem_prop.tsv") as fh: header = 1 for line in fh.readlines(): if (line[0] == "#"): continue
class Reactions: def __init__(self, biochem_root='../../../Biochemistry/', rxns_file='reactions.tsv'): self.BiochemRoot = os.path.dirname(__file__) + '/' + biochem_root self.RxnsFile = self.BiochemRoot + rxns_file self.AliasFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Aliases.txt" self.NameFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Names.txt" self.PwyFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_Pathways.txt" self.ECFile = self.BiochemRoot + "Aliases/Unique_ModelSEED_Reaction_ECs.txt" reader = DictReader(open(self.RxnsFile), dialect='excel-tab') self.Headers = reader.fieldnames from BiochemPy import Compounds self.CompoundsHelper = Compounds() self.Compounds_Dict = self.CompoundsHelper.loadCompounds() def loadReactions(self): reader = DictReader(open(self.RxnsFile), dialect='excel-tab') type_mapping = { "is_transport": int, "is_obsolete": int, "deltag": float, "deltagerr": float } lists = ["aliases", "pathways", "ec_numbers", "notes"] dicts = [] rxns_dict = dict() for line in reader: for list_type in lists: if (line[list_type] != "null"): line[list_type] = line[list_type].split("|") for dict_type in dicts: if (line[dict_type] != "null"): entries = line[dict_type].split('|') line[dict_type] = dict() for entry in entries: (type, list) = entry.split(':') line[dict_type][type] = list for heading, target_type in type_mapping.items(): try: line[heading] = target_type(line[heading]) except ValueError: # Generally caused by "null" strings line[heading] = None rxns_dict[line['id']] = line return rxns_dict def parseEquation(self, equation_string): rxn_cpds_array = list() reagent = -1 coeff = 1 index = 0 for text in equation_string.split(" "): if (text == "+"): continue match = re.search('^<?=>?$', text) if (match is not None): reagent = 1 match = re.search('^\((\d+(?:\.\d+)?)\)$', text) if (match is not None): coeff = match.group(1) # Correct for redundant ".0" in floats coeff = float(coeff) if (str(coeff)[-2:] == ".0"): coeff = int(round(coeff)) match = re.search('^(cpd\d{5})\[(\d)\]$', text) if (match is not None): #Side of equation coeff = coeff * reagent (cpd, cpt) = (match.group(1), match.group(2)) rgt_id = cpd + "_" + cpt + str(index) cpt = int(cpt) name = self.Compounds_Dict[cpd]["name"] formula = self.Compounds_Dict[cpd]["formula"] charge = self.Compounds_Dict[cpd]["charge"] rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": coeff, "compound": cpd, "compartment": cpt, "index": index, "name": name, "formula": formula, "charge": charge }) #Need to reset coeff for next compound coeff = 1 return rxn_cpds_array def parseStoich(self, stoichiometry): rxn_cpds_array = list() #For empty reaction if (stoichiometry == ""): return rxn_cpds_array for rgt in stoichiometry.split(";"): (coeff, cpd, cpt, index, name) = rgt.split(":", 4) rgt_id = cpd + "_" + cpt + index coeff = float(coeff) # Correct for redundant ".0" in floats if (str(coeff)[-2:] == ".0"): coeff = int(round(coeff)) cpt = int(cpt) index = int(index) rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": coeff, "compound": cpd, "compartment": cpt, "index": index, "name": name, "formula": self.Compounds_Dict[cpd]["formula"], "charge": self.Compounds_Dict[cpd]["charge"] }) return rxn_cpds_array def parseStoichOnt(self, stoichiometry): rxn_cpds_dict = dict() #For empty reaction if (stoichiometry == ""): return rxn_cpds_array for rgt in stoichiometry.split(";"): (coeff, cpd, cpt, index, name) = rgt.split(":", 4) cpd_cpt_tuple = (cpd, cpt) rxn_cpds_dict[cpd_cpt_tuple] = coeff return rxn_cpds_dict # The basis for this code, and producing combinations of ontologically related reactions # was found in Filipe's code (see commit: 92db86) def generateOntologyReactionCodes(self, rxn_id, rxn_cpds, cpds_neighbors): # returns list of reaction codes to match with biochemistry new_codes = dict() replacements = list() for cpd_cpt_tuple in rxn_cpds: replace_list = list() cpd_id = cpd_cpt_tuple[0] if cpd_id in cpds_neighbors: for neighbor_id in cpds_neighbors[cpd_id]: replace_list.append((cpd_id, neighbor_id)) if len(replace_list) > 0: replacements.append(replace_list) # Iterate through different numbers of compounds to replace # i.e. replace 1 compound, replace 2 compounds etc. # The output is a list of all the possible combination of replacements to explore replacement_product = list() for n_cpds in range(1, len(replacements) + 1): combination = list(itertools.combinations(replacements, n_cpds)) for entry in combination: product_list = list(itertools.product(*entry)) replacement_product += product_list if (len(replacements) == 0): return new_codes for entry in replacement_product: # Old code assumed that all "new" compounds were unique # cpd_swap_dict = {x:y for x, y in entry} # new_swapped_rxn_cpds = { (x if not x in cpd_swap_dict else cpd_swap_dict[x], c):y # for (x, c), y in rxn_cpds.items() } # Regenerate array of cpd dicts for use with generateCode() swapped_rxn_cpds_array = list() for (cpd, cpt), coeff in rxn_cpds.items(): new_cpd = cpd for old, new in entry: if (cpd == old): new_cpd = new reagent = { "reagent": new_cpd + '_' + cpt + '0', "compartment": cpt, "coefficient": float(coeff) } # Correct for redundant ".0" in floats if (str(reagent["coefficient"])[-2:] == ".0"): reagent["coefficient"] = int(round(reagent["coefficient"])) swapped_rxn_cpds_array.append(reagent) new_code = self.generateCode(swapped_rxn_cpds_array) new_codes[new_code] = entry return new_codes @staticmethod def isTransport(rxn_cpds_array): compartments_dict = dict() for rgt in rxn_cpds_array: compartments_dict[rgt['compartment']] = 1 if (len(compartments_dict.keys()) > 1): return 1 else: return 0 def generateCodes(self, rxns_dict, check_obsolete=True): codes_dict = dict() for rxn in rxns_dict: if (rxns_dict[rxn]['status'] == "EMPTY"): continue if (check_obsolete is False and rxns_dict[rxn]['is_obsolete'] == 1): continue rxn_cpds_array = self.parseStoich(rxns_dict[rxn]['stoichiometry']) code = self.generateCode(rxn_cpds_array) if (code not in codes_dict): codes_dict[code] = dict() codes_dict[code][rxn] = 1 return codes_dict def generateCode(self, rxn_cpds_array): #It matters if its a transport reaction, and we include protons when matching transport is_transport = self.isTransport(rxn_cpds_array) #It matters which side of the equation, so build reagents and products arrays reagents = list() products = list() for rgt in sorted(rxn_cpds_array, key=lambda x: (x["reagent"], x["coefficient"])): #skip protons if ("cpd00067" in rgt["reagent"] and is_transport == 0): continue if (rgt["coefficient"] < 0): reagents.append(rgt["reagent"] + ":" + str(abs(rgt["coefficient"]))) if (rgt["coefficient"] > 0): products.append(rgt["reagent"] + ":" + str(abs(rgt["coefficient"]))) rgt_string = "|".join(reagents) pdt_string = "|".join(products) #Sorting the overall strings here helps with matching transporters rxn_string = "|=|".join(sorted([rgt_string, pdt_string])) return rxn_string @staticmethod def buildStoich(rxn_cpds_array): stoichiometry_array = list() for rgt in sorted(rxn_cpds_array, key=lambda x: (int(x["coefficient"] > 0), x["reagent"])): # Correct for redundant ".0" in floats if (str(rgt["coefficient"])[-2:] == ".0"): rgt["coefficient"] = int(round(rgt["coefficient"])) rgt["coefficient"] = str(rgt["coefficient"]) rgt["compartment"] = str(rgt["compartment"]) rgt["index"] = str(rgt["index"]) rgt_string = ":".join([ rgt["coefficient"], rgt["compound"], rgt["compartment"], rgt["index"], rgt["name"] ]) stoichiometry_array.append(rgt_string) stoichiometry_string = ";".join(stoichiometry_array) return stoichiometry_string @staticmethod def removeCpdRedundancy(rgts_array): rgts_dict = dict() for rgt in rgts_array: if (rgt["reagent"] not in rgts_dict): rgts_dict[rgt["reagent"]] = 0 rgts_dict[rgt["reagent"]] += float(rgt["coefficient"]) new_rgts_array = list() for rgt in rgts_array: if (rgts_dict[rgt["reagent"]] == 0): continue rgt["coefficient"] = rgts_dict[rgt["reagent"]] # Correct for redundant ".0" in floats if (str(rgt["coefficient"])[-2:] == ".0"): rgt["coefficient"] = int(round(rgt["coefficient"])) new_rgts_array.append(rgt) #Trick to exclude reagent if it appears in array more than once rgts_dict[rgt["reagent"]] = 0 return new_rgts_array def balanceReaction(self, rgts_array): if (len(rgts_array) == 0): return "EMPTY" ######################################## # Check that each reagent is either a # different compound or in a different # compartment, and report. ######################################## rgts_dict = dict() for rgt in rgts_array: if (rgt["reagent"] not in rgts_dict): rgts_dict[rgt["reagent"]] = 0 rgts_dict[rgt["reagent"]] += 1 for rgt in rgts_dict.keys(): if (rgts_dict[rgt] > 1): return "Duplicate reagents" ######################################## # Check for duplicate compounds in # different compartments, these are # balanced directly. ####################################### cpds_coeff_dict = dict() for rgt in rgts_array: cpd = rgt["compound"] if (cpd not in cpds_coeff_dict): cpds_coeff_dict[cpd] = 0 # Use float() because you can get real coefficients cpds_coeff_dict[cpd] += float(rgt["coefficient"]) # Build dict of compounds cpds_dict = dict() for rgt in rgts_array: #Skip trans-compartmental compounds if (cpds_coeff_dict[rgt["compound"]] == 0): continue proxy_rgt = copy.deepcopy(rgt) proxy_rgt["coefficient"] = cpds_coeff_dict[rgt["compound"]] cpds_dict[rgt["compound"]] = proxy_rgt ######################################## # Check for duplicate elements, across # all compounds, these are balanced # directly. ####################################### rxn_net_charge = 0.0 rxn_net_mass = dict() cpdformerror = list() for cpd in cpds_dict.keys(): cpd_atoms = self.CompoundsHelper.parseFormula( cpds_dict[cpd]["formula"]) if (len(cpd_atoms.keys()) == 0): #Here we can skip photons and electrons #They are the valid compounds with no mass if (cpd == 'cpd11632' or cpd == 'cpd12713'): pass else: cpdformerror.append(cpd) cpd_coeff_charge = float(cpds_dict[cpd]["charge"]) * float( cpds_dict[cpd]["coefficient"]) rxn_net_charge += cpd_coeff_charge for atom in cpd_atoms.keys(): atom_coeff_mass = float(cpd_atoms[atom]) * float( cpds_dict[cpd]["coefficient"]) if (atom not in rxn_net_mass.keys()): rxn_net_mass[atom] = 0.0 rxn_net_mass[atom] += atom_coeff_mass if (len(cpdformerror) > 0): return "CPDFORMERROR" # Round out tiny numbers that occur because we add/substract floats # Threshold of 1e-6 found to capture all these instances without # removing actual small differences in mass. for atom in rxn_net_mass.keys(): if (rxn_net_mass[atom] > -1e-6 and rxn_net_mass[atom] < 1e-6): rxn_net_mass[atom] = 0 if (rxn_net_charge > -1e-6 and rxn_net_charge < 1e-6): rxn_net_charge = 0 # Report any imbalance imbalanced_atoms_array = list() for atom in sorted(rxn_net_mass.keys()): if (rxn_net_mass[atom] == 0): continue rxn_net_mass[atom] = "{0:.2f}".format(rxn_net_mass[atom]) # Correct for redundant ".00" in floats if (rxn_net_mass[atom][-3:] == ".00"): rxn_net_mass[atom] = str(int(float(rxn_net_mass[atom]))) imbalanced_atoms_array.append(atom + ":" + rxn_net_mass[atom]) rxn_net_charge = "{0:.2f}".format(rxn_net_charge) # Correct for redundant ".00" in floats if (rxn_net_charge[-3:] == ".00"): rxn_net_charge = str(int(float(rxn_net_charge))) status = "" if (len(imbalanced_atoms_array) > 0): status = "MI:" + "/".join(imbalanced_atoms_array) if (rxn_net_charge != "0"): if (len(status) == 0): status = "CI:" + rxn_net_charge else: status += "|CI:" + rxn_net_charge if (status == ""): status = "OK" return status def adjustCompound(self, rxn_cpds_array, compound, adjustment, compartment=0): if (adjustment == 0): return rxn_cpds_array ###################################################################### # We will always assume to adjust a compound automatically # in the compartment indexed as zero, unless otherwise specified. # This answers the question of how to handle transporters. ###################################################################### # Check to see if it already exists cpd_exists = 0 cpd_remove = {} for rgt in rxn_cpds_array: if (rgt["compound"] == compound and rgt["compartment"] == compartment): rgt["coefficient"] -= adjustment cpd_exists = 1 if (rgt["coefficient"] == 0): cpd_remove = rgt if (cpd_exists != 1): rgt_id = compound + "_" + str(compartment) + "0" rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": 0 - adjustment, "compound": compound, "compartment": compartment, "index": 0, "name": self.Compounds_Dict[compound]["name"], "formula": self.Compounds_Dict[compound]["formula"], "charge": self.Compounds_Dict[compound]["charge"] }) if (len(cpd_remove.keys()) > 0): rxn_cpds_array.remove(cpd_remove) #Got to adjust for floats for rgt in rxn_cpds_array: if (str(rgt["coefficient"])[-2:] == ".0"): rgt["coefficient"] = int(round(rgt["coefficient"])) return def replaceCompound(self, rxn_cpds_array, old_compound, new_compound): ###################################################################### # We will always assume that we will maintain the coefficient. # We will always assume that we will replace in all compartments. # The adjustment will fail silently, returning an empty array # if the old_compound cannot be found. ###################################################################### found_cpd = False for rgt in rxn_cpds_array: if (rgt["compound"] == old_compound): found_cpd = True rgt["compound"] = new_compound rgt["reagent"] = new_compound + "_" + str( rgt["compartment"]) + "0" rgt["name"] = self.Compounds_Dict[new_compound]['name'] return found_cpd def rebuildReaction(self, reaction_dict, stoichiometry=None): # Retrieve/Assign stoich if (stoichiometry is None): stoichiometry = reaction_dict['stoichiometry'] else: reaction_dict["stoichiometry"] = stoichiometry # Build list of "reagents" and "products" rxn_cpds_array = self.parseStoich(stoichiometry) reagents_array = list() products_array = list() compound_ids_dict = dict() for rgt in rxn_cpds_array: compound_ids_dict[rgt["compound"]] = 1 if (rgt["coefficient"] > 0): products_array.append(rgt) else: reagents_array.append(rgt) rgts_str__array = list() for rgt in reagents_array: id_string = "(" + str(abs( rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str( rgt["compartment"]) + "]" rgts_str__array.append(id_string) equation_array = list() code_array = list() definition_array = list() equation_array.append(" + ".join(rgts_str__array)) definition_array.append(" + ".join(rgts_str__array)) code_array.append(" + ".join(x for x in rgts_str__array if "cpd00067" not in x)) code_array.append("<=>") if (reaction_dict["direction"] == "="): equation_array.append("<=>") definition_array.append("<=>") elif (reaction_dict["direction"] == "<"): equation_array.append("<=") definition_array.append("<=") else: equation_array.append("=>") definition_array.append("=>") pdts_str_array = list() for rgt in products_array: id_string = "(" + str(abs( rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str( rgt["compartment"]) + "]" pdts_str_array.append(id_string) equation_array.append(" + ".join(pdts_str_array)) definition_array.append(" + ".join(pdts_str_array)) code_array.append(" + ".join(x for x in pdts_str_array if "cpd00067" not in x)) reaction_dict["code"] = " ".join(code_array) reaction_dict["equation"] = " ".join(equation_array) reaction_dict["definition"] = " ".join(definition_array) reaction_dict["compound_ids"] = ";".join( sorted(compound_ids_dict.keys())) # Replace ids with names in Definition for cpd_id in compound_ids_dict.keys(): if (cpd_id in reaction_dict["definition"]): reaction_dict["definition"] = reaction_dict[ "definition"].replace(cpd_id, self.Compounds_Dict[cpd_id]["name"]) # Define if transport? return def saveECs(self, ecs_dict): ecs_root = os.path.splitext(self.ECFile)[0] # Print to TXT ecs_file = open(ecs_root + ".txt", 'w') ecs_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) + "\n") for rxn in sorted(ecs_dict.keys()): for name in sorted(ecs_dict[rxn]): ecs_file.write("\t".join((rxn, name, 'Enzyme Class')) + "\n") ecs_file.close() def saveNames(self, names_dict): names_root = os.path.splitext(self.NameFile)[0] # Print to TXT names_file = open(names_root + ".txt", 'w') names_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) + "\n") for rxn in sorted(names_dict.keys()): for name in sorted(names_dict[rxn]): names_file.write("\t".join((rxn, name, 'name')) + "\n") names_file.close() def saveAliases(self, alias_dict): alias_root = os.path.splitext(self.AliasFile)[0] # Print to TXT alias_file = open(alias_root + ".txt", 'w') alias_file.write("\t".join(("ModelSEED ID", "External ID", "Source")) + "\n") for rxn in sorted(alias_dict.keys()): for source in sorted(alias_dict[rxn].keys()): for alias in sorted(alias_dict[rxn][source]): alias_file.write("\t".join((rxn, alias, source)) + "\n") alias_file.close() def saveReactions(self, reactions_dict): rxns_root = os.path.splitext(self.RxnsFile)[0] # Print to TSV rxns_file = open(rxns_root + ".tsv", 'w') rxns_file.write("\t".join(self.Headers) + "\n") for rxn in sorted(reactions_dict.keys()): values_list = list() for header in self.Headers: value = reactions_dict[rxn][header] if (isinstance(value, list)): value = "|".join(value) if (isinstance(value, dict)): entries = list() for entry in value: entries.append(entry + ':' + value[entry]) value = "|".join(entries) values_list.append(str(value)) rxns_file.write("\t".join(values_list) + "\n") rxns_file.close() #Re-configure JSON new_reactions_dict = list() for rxn_id in sorted(reactions_dict): rxn_obj = reactions_dict[rxn_id] for key in rxn_obj: if (isinstance(rxn_obj[key], dict)): for entry in rxn_obj[key]: if (rxn_obj[key][entry] == "null"): rxn_obj[key][entry] = None if (rxn_obj[key] == "null"): rxn_obj[key] = None new_reactions_dict.append(rxn_obj) # Print to JSON rxns_file = open(rxns_root + ".json", 'w') rxns_file.write( json.dumps(new_reactions_dict, indent=4, sort_keys=True)) rxns_file.close() def loadMSAliases(self, sources_array=[]): if (len(sources_array) == 0): sources_array.append("All") aliases_dict = dict() reader = DictReader(open(self.AliasFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['ModelSEED ID']): continue if ("All" not in sources_array and line['Source'] not in sources_array): continue if (line['ModelSEED ID'] not in aliases_dict): aliases_dict[line['ModelSEED ID']] = dict() for source in line['Source'].split('|'): if (source not in aliases_dict[line['ModelSEED ID']]): aliases_dict[line['ModelSEED ID']][source] = list() aliases_dict[line['ModelSEED ID']][source].append( line['External ID']) return aliases_dict def loadNames(self): names_dict = dict() reader = DictReader(open(self.NameFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['ModelSEED ID']): continue if (line['ModelSEED ID'] not in names_dict): names_dict[line['ModelSEED ID']] = list() names_dict[line['ModelSEED ID']].append(line['External ID']) return names_dict def loadPathways(self): pathways_dict = dict() reader = DictReader(open(self.PwyFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['ModelSEED ID']): continue if (line['ModelSEED ID'] not in pathways_dict): pathways_dict[line['ModelSEED ID']] = dict() if (line['Source'] not in pathways_dict[line['ModelSEED ID']]): pathways_dict[line['ModelSEED ID']][line['Source']] = list() pathways_dict[line['ModelSEED ID']][line['Source']].append( line['External ID']) return pathways_dict def loadECs(self): ecs_dict = dict() reader = DictReader(open(self.ECFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['ModelSEED ID']): continue if (line['ModelSEED ID'] not in ecs_dict): ecs_dict[line['ModelSEED ID']] = list() ecs_dict[line['ModelSEED ID']].append(line['External ID']) return ecs_dict
#!/usr/bin/env python import os, sys from csv import DictReader temp = list() header = 1 sys.path.append('../../Libs/Python') from BiochemPy import Reactions, Compounds, InChIs CompoundsHelper = Compounds() Compounds_Dict = CompoundsHelper.loadCompounds() Aliases_Dict = CompoundsHelper.loadMSAliases() Names_Dict = CompoundsHelper.loadNames() Source_Classes = dict() reader = DictReader( open('../../../Biochemistry/Aliases/Source_Classifiers.txt'), dialect='excel-tab') for line in reader: if (line['Source Type'] not in Source_Classes): Source_Classes[line['Source Type']] = dict() Source_Classes[line['Source Type']][line['Source ID']] = 1 for cpd in sorted(Compounds_Dict.keys()): if (cpd not in Aliases_Dict): continue Cpd_Aliases = dict() Alias_Count = 0 for source_type in 'Primary Database', 'Secondary Database', 'Published Model': for source in sorted(Aliases_Dict[cpd].keys()):
class Reactions: def __init__(self, biochem_root='../../Biochemistry/', rxns_file='reactions.tsv'): self.BiochemRoot = biochem_root self.RxnsFile = biochem_root + rxns_file self.AliasFile = biochem_root + "Aliases/Reactions_Aliases.tsv" reader = DictReader(open(self.RxnsFile), dialect='excel-tab') self.Headers = reader.fieldnames from BiochemPy import Compounds self.CompoundsHelper = Compounds() self.Compounds_Dict = self.CompoundsHelper.loadCompounds() def loadReactions(self): reader = DictReader(open(self.RxnsFile), dialect='excel-tab') rxns_dict = dict() for line in reader: for header in ["is_transport", "is_obsolete"]: line[header] = int(line[header]) rxns_dict[line['id']] = line return rxns_dict def parseStoich(self, stoichiometry): rxn_cpds_array = list() for rgt in stoichiometry.split(";"): (coeff, cpd, cpt, index, name) = rgt.split(":", 4) rgt_id = cpd + "_" + cpt + index coeff = float(coeff) # Correct for redundant ".0" in floats if (str(coeff)[-2:] == ".0"): coeff = int(round(coeff)) cpt = int(cpt) index = int(index) rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": coeff, "compound": cpd, "compartment": cpt, "index": index, "name": name, "formula": self.Compounds_Dict[cpd]["formula"], "charge": self.Compounds_Dict[cpd]["charge"] }) return rxn_cpds_array @staticmethod def isTransport(rxn_cpds_array): compartments_dict = dict() for rgt in rxn_cpds_array: compartments_dict[rgt['compartment']] = 1 if (len(compartments_dict.keys()) > 1): return 1 else: return 0 def generateCodes(self, rxns_dict): codes_dict = dict() for rxn in rxns_dict: if (rxns_dict[rxn]['status'] == "EMPTY"): continue code = self.generateCode(rxns_dict[rxn]['stoichiometry']) if (code not in codes_dict): codes_dict[code] = dict() codes_dict[code][rxn] = 1 return codes_dict def generateCode(self, stoichiometry): rxn_cpds_array = self.parseStoich(stoichiometry) #It matters if its a transport reaction, and we include protons when matching transpor is_transport = self.isTransport(rxn_cpds_array) #It matters which side of the equation, so build reagents and products arrays reagents = list() products = list() for rgt in sorted(rxn_cpds_array, key=lambda x: (x["reagent"], x["coefficient"])): #skip protons if ("cpd00067" in rgt["reagent"] and is_transport == 0): continue if (rgt["coefficient"] < 0): reagents.append(rgt["reagent"] + ":" + str(abs(rgt["coefficient"]))) if (rgt["coefficient"] > 0): products.append(rgt["reagent"] + ":" + str(abs(rgt["coefficient"]))) rgt_string = "|".join(reagents) pdt_string = "|".join(products) #Sorting the overall strings here helps with matching transporters rxn_string = "|=|".join(sorted([rgt_string, pdt_string])) return rxn_string @staticmethod def buildStoich(rxn_cpds_array): stoichiometry_array = list() for rgt in sorted(rxn_cpds_array, key=lambda x: (int(x["coefficient"] > 0), x["reagent"])): # Correct for redundant ".0" in floats if (str(rgt["coefficient"])[-2:] == ".0"): rgt["coefficient"] = int(round(rgt["coefficient"])) rgt["coefficient"] = str(rgt["coefficient"]) rgt["compartment"] = str(rgt["compartment"]) rgt["index"] = str(rgt["index"]) rgt_string = ":".join([ rgt["coefficient"], rgt["compound"], rgt["compartment"], rgt["index"], rgt["name"] ]) stoichiometry_array.append(rgt_string) stoichiometry_string = ";".join(stoichiometry_array) return stoichiometry_string def balanceReaction(self, rgts_array): if (len(rgts_array) == 0): return "EMPTY" ######################################## # Check that each reagent is either a # different compound or in a different # compartment, and report. ######################################## rgts_dict = dict() for rgt in rgts_array: if (rgt["reagent"] not in rgts_dict): rgts_dict[rgt["reagent"]] = 0 rgts_dict[rgt["reagent"]] += 1 for rgt in rgts_dict.keys(): if (rgts_dict[rgt] > 1): return "ERROR: Duplicate reagents" ######################################## # Check for duplicate compounds in # different compartments, these are # balanced directly. ####################################### cpds_coeff_dict = dict() for rgt in rgts_array: cpd = rgt["compound"] if (cpd not in cpds_coeff_dict): cpds_coeff_dict[cpd] = 0 # Use float() because you can get real coefficients cpds_coeff_dict[cpd] += float(rgt["coefficient"]) # Build dict of compounds cpds_dict = dict() for rgt in rgts_array: rgt["coefficient"] = cpds_coeff_dict[rgt["compound"]] cpds_dict[rgt["compound"]] = rgt ######################################## # Check for duplicate elements, across # all compounds, these are balanced # directly. ####################################### rxn_net_charge = 0.0 rxn_net_mass = dict() for cpd in cpds_dict.keys(): cpd_atoms = self.CompoundsHelper.parseFormula( cpds_dict[cpd]["formula"]) if (len(cpd_atoms.keys()) == 0): return "CPDFORMERROR" cpd_coeff_charge = float(cpds_dict[cpd]["charge"]) * float( cpds_dict[cpd]["coefficient"]) rxn_net_charge += cpd_coeff_charge for atom in cpd_atoms.keys(): atom_coeff_mass = float(cpd_atoms[atom]) * float( cpds_dict[cpd]["coefficient"]) if (atom not in rxn_net_mass.keys()): rxn_net_mass[atom] = 0.0 rxn_net_mass[atom] += atom_coeff_mass # Round out tiny numbers that occur because we add/substract floats # Threshold of 1e-6 found to capture all these instances without # removing actual small differences in mass. for atom in rxn_net_mass.keys(): if (rxn_net_mass[atom] > -1e-6 and rxn_net_mass[atom] < 1e-6): rxn_net_mass[atom] = 0 if (rxn_net_charge > -1e-6 and rxn_net_charge < 1e-6): rxn_net_charge = 0 # Report any imbalance imbalanced_atoms_array = list() for atom in sorted(rxn_net_mass.keys()): if (rxn_net_mass[atom] == 0): continue # Correct for redundant ".0" in floats if (str(rxn_net_mass[atom])[-2:] == ".0"): rxn_net_mass[atom] = int(round(rxn_net_mass[atom])) imbalanced_atoms_array.append(atom + ":" + str(rxn_net_mass[atom])) # Correct for redundant ".0" in floats if (str(rxn_net_charge)[-2:] == ".0"): rxn_net_charge = int(rxn_net_charge) status = "" if (len(imbalanced_atoms_array) > 0): status = "MI:" + "/".join(imbalanced_atoms_array) if (rxn_net_charge != 0): if (len(status) == 0): status = "CI:" + str(rxn_net_charge) else: status += "|CI:" + str(rxn_net_charge) if (status == ""): status = "OK" return status def adjustCompound(self, rxn_cpds_array, compound, adjustment, compartment=0): if (adjustment == 0): return rxn_cpds_array ###################################################################### # We will always assume to adjust a compound automatically # in the compartment indexed as zero, unless otherwise specified. # This answers the question of how to handle transporters. ###################################################################### # Check to see if it already exists cpd_exists = 0 cpd_remove = {} for rgt in rxn_cpds_array: if (rgt["compound"] == compound and rgt["compartment"] == compartment): rgt["coefficient"] -= adjustment cpd_exists = 1 if (rgt["coefficient"] == 0): cpd_remove = rgt if (cpd_exists != 1): rgt_id = compound + "_" + str(compartment) + "0" rxn_cpds_array.append({ "reagent": rgt_id, "coefficient": 0 - adjustment, "compound": compound, "compartment": compartment, "index": 0, "name": self.Compounds_Dict[compound]["name"], "formula": self.Compounds_Dict[compound]["formula"], "charge": self.Compounds_Dict[compound]["charge"] }) if (len(cpd_remove.keys()) > 0): rxn_cpds_array.remove(cpd_remove) return def rebuildReaction(self, reaction_dict, stoichiometry): # Assign stoich reaction_dict["stoichiometry"] = stoichiometry # Build list of "reagents" and "products" rxn_cpds_array = self.parseStoich(stoichiometry) reagents_array = list() products_array = list() compound_ids_dict = dict() for rgt in rxn_cpds_array: compound_ids_dict[rgt["compound"]] = 1 if (rgt["coefficient"] > 0): products_array.append(rgt) else: reagents_array.append(rgt) rgts_str__array = list() for rgt in reagents_array: id_string = "(" + str(abs( rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str( rgt["compartment"]) + "]" rgts_str__array.append(id_string) equation_array = list() code_array = list() definition_array = list() equation_array.append(" + ".join(rgts_str__array)) definition_array.append(" + ".join(rgts_str__array)) code_array.append(" + ".join(x for x in rgts_str__array if "cpd00067" not in x)) code_array.append("<=>") if (reaction_dict["direction"] == "="): equation_array.append("<=>") definition_array.append("<=>") elif (reaction_dict["direction"] == "<"): equation_array.append("<=") definition_array.append("<=") else: equation_array.append("=>") definition_array.append("=>") pdts_str_array = list() for rgt in products_array: id_string = "(" + str(abs( rgt["coefficient"])) + ") " + rgt["compound"] + "[" + str( rgt["compartment"]) + "]" pdts_str_array.append(id_string) equation_array.append(" + ".join(pdts_str_array)) definition_array.append(" + ".join(pdts_str_array)) code_array.append(" + ".join(x for x in pdts_str_array if "cpd00067" not in x)) reaction_dict["code"] = " ".join(code_array) reaction_dict["equation"] = " ".join(equation_array) reaction_dict["definition"] = " ".join(definition_array) reaction_dict["compound_ids"] = ";".join( sorted(compound_ids_dict.keys())) # Replace ids with names in Definition for cpd_id in compound_ids_dict.keys(): if (cpd_id in reaction_dict["definition"]): reaction_dict["definition"] = reaction_dict[ "definition"].replace(cpd_id, self.Compounds_Dict[cpd_id]["name"]) return def saveReactions(self, reactions_dict): rxns_root = os.path.splitext(self.RxnsFile)[0] # Print to TSV rxns_file = open(rxns_root + ".tsv", 'w') rxns_file.write("\t".join(self.Headers) + "\n") for rxn in sorted(reactions_dict.keys()): rxns_file.write("\t".join( str(reactions_dict[rxn][header]) for header in self.Headers) + "\n") rxns_file.close() # Print to JSON rxns_file = open(rxns_root + ".json", 'w') rxns_file.write(json.dumps(reactions_dict, indent=4, sort_keys=True)) rxns_file.close() def loadMSAliases(self, sources_array=[]): if (len(sources_array) == 0): return {} aliases_dict = dict() reader = DictReader(open(self.AliasFile), dialect='excel-tab') for line in reader: if ("rxn" not in line['MS ID']): continue if (line['Source'] not in sources_array): continue if (line['MS ID'] not in aliases_dict): aliases_dict[line['MS ID']] = dict() if (line['Source'] not in aliases_dict[line['MS ID']]): aliases_dict[line['MS ID']][line['Source']] = list() aliases_dict[line['MS ID']][line['Source']].append( line['External ID']) return aliases_dict