def curate_metabolites_references(metabolite_curation=None, metabolites=None): """ Curates references for a metabolite. arguments: metabolite_curation (dict<str>): information to change about a metabolite metabolites (dict<dict>): information about metabolites returns: (dict<dict>): information about metabolites raises: """ # Interpretation. identifier = metabolite_curation["identifier_novel"] hmdb_novel = metabolite_curation["hmdb_novel"] hmdb_error = metabolite_curation["hmdb_error"] pubchem_novel = metabolite_curation["pubchem_novel"] pubchem_error = metabolite_curation["pubchem_error"] if identifier in metabolites.keys(): if len(hmdb_novel) > 0: references_original = metabolites[identifier]["references"]["hmdb"] references_original.append(hmdb_novel) references_novel = utility.collect_unique_elements( references_original) metabolites[identifier]["references"]["hmdb"] = references_novel if len(hmdb_error) > 0: def match(reference): return reference != hmdb_error references_original = metabolites[identifier]["references"]["hmdb"] references_novel = list(filter(match, references_original)) metabolites[identifier]["references"]["hmdb"] = references_novel if len(pubchem_novel) > 0: references_original = ( metabolites[identifier]["references"]["pubchem"]) references_original.append(pubchem_novel) references_novel = utility.collect_unique_elements( references_original) metabolites[identifier]["references"]["pubchem"] = references_novel if len(pubchem_error) > 0: def match(reference): return reference != pubchem_error references_original = ( metabolites[identifier]["references"]["pubchem"]) references_novel = list(filter(match, references_original)) metabolites[identifier]["references"]["pubchem"] = references_novel return metabolites
def include_reaction_transport_processes(reaction_original=None, processes_transports=None): """ Includes information about a reaction's transport processes arguments: reaction_original (dict): information about a reaction processes_transports (dict<dict<list<str>>>): information about transports in processes returns: (dict): information about a reaction raises: """ # Determine processes in which reaction participates by transport processes_transport = collect_reaction_transport_processes( reaction=reaction_original, processes_transports=processes_transports) processes_original = reaction_original["processes"] processes_total = processes_original + processes_transport processes_unique = utility.collect_unique_elements( elements_original=processes_total) # Compile information reaction_novel = copy.deepcopy(reaction_original) reaction_novel["processes"] = processes_unique # Return information return reaction_novel
def enhance_metabolite_references(name=None, references_original=None, summary_hmdb=None): """ Enhances information about a metabolite by including references from HMDB arguments: name (str): name of metabolite references_original (dict): references about a metabolite summary_hmdb (dict<dict>): information about metabolites from Human Metabolome Database (HMDB) returns: (dict): references about a metabolite raises: """ # Copy information. references_novel = copy.deepcopy(references_original) # Enhance references to HMDB. references_hmdb_original = references_novel["hmdb"] references_hmdb_novel = utility.match_hmdb_entries_by_identifiers_names( identifiers=references_hmdb_original, names=[name], summary_hmdb=summary_hmdb) # Extract references from entries in HMDB hmdb_references = collect_hmdb_entries_references( keys=references_hmdb_novel, summary_hmdb=summary_hmdb) # Combine supplemental references to original references references_novel["hmdb"] = utility.collect_unique_elements( hmdb_references["hmdb"]) references_novel["pubchem"] = utility.collect_unique_elements( references_original["pubchem"] + hmdb_references["pubchem"]) references_novel["chebi"] = utility.collect_unique_elements( references_original["chebi"] + hmdb_references["chebi"]) references_novel["kegg"] = utility.collect_unique_elements( references_original["kegg"] + hmdb_references["kegg"]) return references_novel
def access_reactions_summary(reactions_interest=None, reactions=None, directory=None): """ Accesses summary information about reactions of interest. arguments: reactions_interest (list<dict<str>>): identifiers of reactions of interest reactions (dict<dict>): information about reactions directory (str): path to directory for source and product files raises: returns: (list<dict<str>>): information about measurements and signals for all samples """ # Collect information about reactions of interest. identifiers = utility.collect_value_from_records( key="identifier", records=reactions_interest) identifiers_unique = utility.collect_unique_elements(identifiers) reactions_summary = [] for identifier in identifiers_unique: reaction = reactions[identifier] name = reaction["name"] metanetx = ";".join(reaction["references"]["metanetx"]) gene = ";".join(reaction["references"]["gene"]) enzyme = ";".join(reaction["references"]["enzyme"]) record_product = { "identifier": identifier, "name": name, "metanetx": metanetx, "gene": gene, "enzyme": enzyme } reactions_summary.append(record_product) # Return information. return reactions_summary
def define_reaction_node( reaction_candidacy_identifier=None, reactions_candidacy=None, reactions=None, metabolites=None, compartments=None, processes=None ): """ Defines information about a reaction's node. arguments: reaction_candidacy_identifier (str): identifier of a candidate reaction reactions_candidacy (dict<dict>): information about candidate reactions reactions (dict<dict>): information about reactions metabolites (dict<dict>): information about metabolites compartments (dict<dict>): information about compartments processes (dict<dict>): information about processes raises: returns: (dict): information about a reaction's node """ # Access information. reaction_candidacy = reactions_candidacy[reaction_candidacy_identifier] reaction = reactions[reaction_candidacy["reaction"]] # Compartments. compartments_reaction = utility.collect_value_from_records( key="compartment", records=reaction["participants"] ) compartments_unique = utility.collect_unique_elements( elements_original=compartments_reaction ) compartments_names = utility.collect_values_from_records_in_reference( key="name", identifiers=compartments_unique, reference=compartments ) # Processes. processes_names = utility.collect_values_from_records_in_reference( key="name", identifiers=reaction["processes"], reference=processes ) # Metabolites. metabolites_reaction = utility.collect_value_from_records( key="metabolite", records=reaction["participants"] ) metabolites_unique = utility.collect_unique_elements( elements_original=metabolites_reaction ) metabolites_names = utility.collect_values_from_records_in_reference( key="name", identifiers=metabolites_unique, reference=metabolites ) # Compile information. reaction_node = { "identifier": reaction_candidacy["identifier"], "type": "reaction", "entity": reaction["identifier"], "name": reaction_candidacy["name"], "reversibility": reaction_candidacy["reversibility"], "metabolites": ";".join(metabolites_names), "compartments": ";".join(compartments_names), "processes": ";".join(processes_names), "replicates": reaction_candidacy["replicates"], } # Return information. return reaction_node
def extract_hmdb_record_summary(element=None, space=None, spaces=None): """ Extracts information about a metabolite from Human Metabolome Database (HMDB). arguments: element (object): element within XML tree space (str): name of specific name space within XML document spaces (dict<str>): name spaces within XML document raises: returns: (dict<str>): information about a metabolite from HMDB """ # HMDB identifiers. hmdb_primary = extract_subelement_value( element=element, tag="accession", space=space, spaces=spaces ) hmdb_secondary = extract_subelement( element=element, tag="secondary_accessions", space=space, spaces=spaces ) references_hmdb_values = extract_subelement_values( element=hmdb_secondary, tag="accession", space=space, spaces=spaces ) references_hmdb_values.append(hmdb_primary) references_hmdb = utility.collect_unique_elements(references_hmdb_values) # Name. name = extract_subelement_value( element=element, tag="name", space=space, spaces=spaces ) # Synonyms. synonyms_element = extract_subelement( element=element, tag="synonyms", space=space, spaces=spaces ) synonyms_values = extract_subelement_values( element=synonyms_element, tag="synonym", space=space, spaces=spaces ) synonyms_values.append(name) synonyms = utility.collect_unique_elements(synonyms_values) # References. pubchem_tentative = extract_subelement_value( element=element, tag="pubchem_compound_id", space=space, spaces=spaces ) # Multiple entries have references to identifier "0" for PubChem. # This identifier is nonsense and erroneous. if ( (pubchem_tentative is not None) and (pubchem_tentative == "0") ): reference_pubchem = None else: reference_pubchem = pubchem_tentative reference_chebi = extract_subelement_value( element=element, tag="chebi_id", space=space, spaces=spaces ) reference_kegg = extract_subelement_value( element=element, tag="kegg_id", space=space, spaces=spaces ) # Compile and return information. record = { "identifier": hmdb_primary, "name": name, "synonyms": synonyms, "references_hmdb": references_hmdb, "reference_pubchem": reference_pubchem, "reference_chebi": reference_chebi, "reference_kegg": reference_kegg } return record
def convert_reactions_export_text( reactions=None, metabolites=None, compartments=None, processes=None, ): """ Converts information about reactions to text format. Converts identifiers of metabolites, compartments, and processes to names. arguments: reactions (dict<dict>): information about reactions metabolites (dict<dict>): information about metabolites compartments (dict<dict>): information about compartments processes (dict<dict>): information about processes returns: (list<dict>): information about reactions raises: """ records = [] for reaction in reactions.values(): # Participants. # Write a function to compose identifier (name) human readable... # Compartments compartments_identifiers = utility.collect_value_from_records( key="compartment", records=reaction["participants"]) compartments_identifiers_unique = utility.collect_unique_elements( elements_original=compartments_identifiers) compartments_names = utility.collect_values_from_records_in_reference( key="name", identifiers=compartments_identifiers_unique, reference=compartments, ) # Processes processes_names = utility.collect_values_from_records_in_reference( key="name", identifiers=reaction["processes"], reference=processes, ) # Metabolites reactants_identifiers = utility.collect_reaction_participants_value( key="metabolite", criteria={"roles": ["reactant"]}, participants=reaction["participants"]) reactants_names = utility.collect_values_from_records_in_reference( key="name", identifiers=reactants_identifiers, reference=metabolites, ) products_identifiers = utility.collect_reaction_participants_value( key="metabolite", criteria={"roles": ["product"]}, participants=reaction["participants"]) products_names = utility.collect_values_from_records_in_reference( key="name", identifiers=products_identifiers, reference=metabolites, ) # Compile information. record = { "identifier": reaction["identifier"], "name": reaction["name"], "reactants": "; ".join(reactants_names), "products": "; ".join(products_names), "compartments": "; ".join(compartments_names), "processes": ";".join(processes_names), "reversibility": reaction["reversibility"], "reference_metanetx": ("; ".join(reaction["references"]["metanetx"])), "reference_recon2m2": ("; ".join(reaction["references"]["recon2m2"])), "reference_gene": "; ".join(reaction["references"]["gene"]), "reference_enzyme": "; ".join(reaction["references"]["enzyme"]), "reference_kegg": "; ".join(reaction["references"]["kegg"]), "reference_reactome": ("; ".join(reaction["references"]["reactome"])), "reference_metacyc": "; ".join(reaction["references"]["metacyc"]), "reference_bigg": "; ".join(reaction["references"]["bigg"]), } records.append(record) return records
def convert_reactions_text(reactions=None): """ Converts information about reactions to text format. arguments: reactions (dict<dict>): information about reactions returns: (list<dict>): information about reactions raises: """ records = [] for reaction in reactions.values(): # Participants. compartments = utility.collect_value_from_records( key="compartment", records=reaction["participants"]) compartments_unique = utility.collect_unique_elements( elements_original=compartments) metabolites = utility.collect_value_from_records( key="metabolite", records=reaction["participants"]) metabolites_unique = utility.collect_unique_elements( elements_original=metabolites) # Transports. transport_metabolites = utility.collect_value_from_records( key="metabolite", records=reaction["transports"]) transport_compartments = utility.collect_values_from_records( key="compartments", records=reaction["transports"]) transport_compartments_unique = utility.collect_unique_elements( elements_original=transport_compartments) # Compile information. record = { "identifier": reaction["identifier"], "name": reaction["name"], "equation": reaction["equation"], "metabolites": ";".join(metabolites_unique), "compartments": ";".join(compartments_unique), "processes": ";".join(reaction["processes"]), "reversibility": reaction["reversibility"], "conversion": reaction["conversion"], "dispersal": reaction["dispersal"], "transport": reaction["transport"], "transport_metabolites": ";".join(transport_metabolites), "transport_compartments": ";".join(transport_compartments_unique), "replication": reaction["replication"], "replicates": ";".join(reaction["replicates"]), "reference_metanetx": ";".join(reaction["references"]["metanetx"]), "reference_recon2m2": ";".join(reaction["references"]["recon2m2"]), "reference_gene": ";".join(reaction["references"]["gene"]), "reference_enzyme": ";".join(reaction["references"]["enzyme"]), "reference_kegg": ";".join(reaction["references"]["kegg"]), "reference_reactome": ";".join(reaction["references"]["reactome"]), "reference_metacyc": ";".join(reaction["references"]["metacyc"]), "reference_bigg": ";".join(reaction["references"]["bigg"]), "reference_rhea": ";".join(reaction["references"]["rhea"]), "reference_sabiork": ";".join(reaction["references"]["sabiork"]), "reference_seed": ";".join(reaction["references"]["seed"]) } records.append(record) return records
def collect_reaction_transports(reaction=None): """ Collects information about a reaction's transports. This procedure applies an overly restrictive definition of transport that requires chemically-identical metabolites in two separate compartments. Some transports involve chemical conversion of substrates as part of transport. arguments: reaction (dict): information about a reaction returns: (list<dict>): information about a reaction's transports raises: """ metabolites_reactant = utility.collect_reaction_participants_value( key="metabolite", criteria={"roles": ["reactant"]}, participants=reaction["participants"]) metabolites_product = utility.collect_reaction_participants_value( key="metabolite", criteria={"roles": ["product"]}, participants=reaction["participants"]) # Collect metabolites that participate as both reactants and products metabolites = utility.filter_common_elements(list_one=metabolites_product, list_two=metabolites_reactant) transports = [] for metabolite in metabolites: # Determine metabolite's compartments as reactant and product compartments_reactant = utility.collect_reaction_participants_value( key="compartment", criteria={ "metabolites": [metabolite], "roles": ["reactant"] }, participants=reaction["participants"]) compartments_product = utility.collect_reaction_participants_value( key="compartment", criteria={ "metabolites": [metabolite], "roles": ["product"] }, participants=reaction["participants"]) # Determine whether there is a difference between the metabolite's # compartments as reactant and product transport = not utility.compare_lists_by_mutual_inclusion( list_one=compartments_reactant, list_two=compartments_product) if transport: compartments = compartments_reactant + compartments_product compartments_unique = utility.collect_unique_elements( elements_original=compartments) record = { "metabolite": metabolite, "compartments": compartments_unique } transports.append(record) return transports
def curate_processes(processes_curation=None, processes_original=None, reactions_original=None): """ Curates information about specific processes and relevant reactions. arguments: processes_curation (list<dict<str>>): information to change about specific processes processes_original (dict<dict>): information about processes reactions_original (dict<dict>): information about reactions returns: (dict<dict<dict>>): information about processes and reactions raises: """ # Copy information. processes_novel = copy.deepcopy(processes_original) reactions_novel = copy.deepcopy(reactions_original) for record in processes_curation: # Interpretation. identifier_original = record["identifier_original"] identifier_novel = record["identifier_novel"] name_original = record["name_original"] name_novel = record["name_novel"] # Determine method to change information. match_identifiers = identifier_original == identifier_novel match_names = name_original == name_novel if identifier_novel == "null": if identifier_original in processes_novel: # Remove process. del processes_novel[identifier_original] # TODO: also remove the process from any reactions' processes # Removal of a process does not justify removal of any # reactions that participate in that process. else: if not match_identifiers: # Change identifier. # Remove original. if identifier_original in processes_novel: del processes_novel[identifier_original] # Replace with novel. if identifier_novel in processes_novel: for reaction in reactions_novel.values(): processes = reaction["processes"] if identifier_original in processes: for index, process in enumerate(processes): if process == identifier_original: processes[index] = identifier_novel # Collect unique values. processes_unique = utility.collect_unique_elements( processes) reaction["processes"] = processes_unique reactions_novel[reaction["identifier"]] = reaction if not match_names: # Change name. if identifier_novel in processes_novel: processes_novel[identifier_novel]["name"] = name_novel # Compile and return information return {"processes": processes_novel, "reactions": reactions_novel}