示例#1
0
def curate_metabolites_references(metabolite_curation=None, metabolites=None):
    """
    Curates references for a metabolite.

    arguments:
        metabolite_curation (dict<str>): information to change about a
            metabolite
        metabolites (dict<dict>): information about metabolites

    returns:
        (dict<dict>): information about metabolites

    raises:

    """

    # Interpretation.
    identifier = metabolite_curation["identifier_novel"]
    hmdb_novel = metabolite_curation["hmdb_novel"]
    hmdb_error = metabolite_curation["hmdb_error"]
    pubchem_novel = metabolite_curation["pubchem_novel"]
    pubchem_error = metabolite_curation["pubchem_error"]
    if identifier in metabolites.keys():
        if len(hmdb_novel) > 0:
            references_original = metabolites[identifier]["references"]["hmdb"]
            references_original.append(hmdb_novel)
            references_novel = utility.collect_unique_elements(
                references_original)
            metabolites[identifier]["references"]["hmdb"] = references_novel
        if len(hmdb_error) > 0:

            def match(reference):
                return reference != hmdb_error

            references_original = metabolites[identifier]["references"]["hmdb"]
            references_novel = list(filter(match, references_original))
            metabolites[identifier]["references"]["hmdb"] = references_novel
        if len(pubchem_novel) > 0:
            references_original = (
                metabolites[identifier]["references"]["pubchem"])
            references_original.append(pubchem_novel)
            references_novel = utility.collect_unique_elements(
                references_original)
            metabolites[identifier]["references"]["pubchem"] = references_novel
        if len(pubchem_error) > 0:

            def match(reference):
                return reference != pubchem_error

            references_original = (
                metabolites[identifier]["references"]["pubchem"])
            references_novel = list(filter(match, references_original))
            metabolites[identifier]["references"]["pubchem"] = references_novel
    return metabolites
示例#2
0
def include_reaction_transport_processes(reaction_original=None,
                                         processes_transports=None):
    """
    Includes information about a reaction's transport processes

    arguments:
        reaction_original (dict): information about a reaction
        processes_transports (dict<dict<list<str>>>): information about
            transports in processes

    returns:
        (dict): information about a reaction

    raises:

    """

    # Determine processes in which reaction participates by transport
    processes_transport = collect_reaction_transport_processes(
        reaction=reaction_original, processes_transports=processes_transports)
    processes_original = reaction_original["processes"]
    processes_total = processes_original + processes_transport
    processes_unique = utility.collect_unique_elements(
        elements_original=processes_total)
    # Compile information
    reaction_novel = copy.deepcopy(reaction_original)
    reaction_novel["processes"] = processes_unique
    # Return information
    return reaction_novel
示例#3
0
def enhance_metabolite_references(name=None,
                                  references_original=None,
                                  summary_hmdb=None):
    """
    Enhances information about a metabolite by including references from HMDB

    arguments:
        name (str): name of metabolite
        references_original (dict): references about a metabolite
        summary_hmdb (dict<dict>): information about metabolites from Human
            Metabolome Database (HMDB)

    returns:
        (dict): references about a metabolite

    raises:

    """

    # Copy information.
    references_novel = copy.deepcopy(references_original)
    # Enhance references to HMDB.
    references_hmdb_original = references_novel["hmdb"]
    references_hmdb_novel = utility.match_hmdb_entries_by_identifiers_names(
        identifiers=references_hmdb_original,
        names=[name],
        summary_hmdb=summary_hmdb)
    # Extract references from entries in HMDB
    hmdb_references = collect_hmdb_entries_references(
        keys=references_hmdb_novel, summary_hmdb=summary_hmdb)
    # Combine supplemental references to original references
    references_novel["hmdb"] = utility.collect_unique_elements(
        hmdb_references["hmdb"])
    references_novel["pubchem"] = utility.collect_unique_elements(
        references_original["pubchem"] + hmdb_references["pubchem"])
    references_novel["chebi"] = utility.collect_unique_elements(
        references_original["chebi"] + hmdb_references["chebi"])
    references_novel["kegg"] = utility.collect_unique_elements(
        references_original["kegg"] + hmdb_references["kegg"])
    return references_novel
示例#4
0
def access_reactions_summary(reactions_interest=None,
                             reactions=None,
                             directory=None):
    """
    Accesses summary information about reactions of interest.

    arguments:
        reactions_interest (list<dict<str>>): identifiers of reactions of
            interest
        reactions (dict<dict>): information about reactions
        directory (str): path to directory for source and product files

    raises:

    returns:
        (list<dict<str>>): information about measurements and signals for all
            samples

    """

    # Collect information about reactions of interest.
    identifiers = utility.collect_value_from_records(
        key="identifier", records=reactions_interest)
    identifiers_unique = utility.collect_unique_elements(identifiers)
    reactions_summary = []
    for identifier in identifiers_unique:
        reaction = reactions[identifier]
        name = reaction["name"]
        metanetx = ";".join(reaction["references"]["metanetx"])
        gene = ";".join(reaction["references"]["gene"])
        enzyme = ";".join(reaction["references"]["enzyme"])
        record_product = {
            "identifier": identifier,
            "name": name,
            "metanetx": metanetx,
            "gene": gene,
            "enzyme": enzyme
        }
        reactions_summary.append(record_product)
    # Return information.
    return reactions_summary
示例#5
0
def define_reaction_node(
    reaction_candidacy_identifier=None,
    reactions_candidacy=None,
    reactions=None,
    metabolites=None,
    compartments=None,
    processes=None
):
    """
    Defines information about a reaction's node.

    arguments:
        reaction_candidacy_identifier (str): identifier of a candidate reaction
        reactions_candidacy (dict<dict>): information about candidate reactions
        reactions (dict<dict>): information about reactions
        metabolites (dict<dict>): information about metabolites
        compartments (dict<dict>): information about compartments
        processes (dict<dict>): information about processes

    raises:

    returns:
        (dict): information about a reaction's node

    """

    # Access information.
    reaction_candidacy = reactions_candidacy[reaction_candidacy_identifier]
    reaction = reactions[reaction_candidacy["reaction"]]
    # Compartments.
    compartments_reaction = utility.collect_value_from_records(
        key="compartment", records=reaction["participants"]
    )
    compartments_unique = utility.collect_unique_elements(
        elements_original=compartments_reaction
    )
    compartments_names = utility.collect_values_from_records_in_reference(
        key="name",
        identifiers=compartments_unique,
        reference=compartments
    )
    # Processes.
    processes_names = utility.collect_values_from_records_in_reference(
        key="name",
        identifiers=reaction["processes"],
        reference=processes
    )
    # Metabolites.
    metabolites_reaction = utility.collect_value_from_records(
        key="metabolite", records=reaction["participants"]
    )
    metabolites_unique = utility.collect_unique_elements(
        elements_original=metabolites_reaction
    )
    metabolites_names = utility.collect_values_from_records_in_reference(
        key="name",
        identifiers=metabolites_unique,
        reference=metabolites
    )
    # Compile information.
    reaction_node = {
        "identifier": reaction_candidacy["identifier"],
        "type": "reaction",
        "entity": reaction["identifier"],
        "name": reaction_candidacy["name"],
        "reversibility": reaction_candidacy["reversibility"],
        "metabolites": ";".join(metabolites_names),
        "compartments": ";".join(compartments_names),
        "processes": ";".join(processes_names),
        "replicates": reaction_candidacy["replicates"],
    }
    # Return information.
    return reaction_node
示例#6
0
def extract_hmdb_record_summary(element=None, space=None, spaces=None):
    """
    Extracts information about a metabolite from Human Metabolome Database
    (HMDB).

    arguments:
        element (object): element within XML tree
        space (str): name of specific name space within XML document
        spaces (dict<str>): name spaces within XML document

    raises:

    returns:
        (dict<str>): information about a metabolite from HMDB

    """

    # HMDB identifiers.
    hmdb_primary = extract_subelement_value(
        element=element,
        tag="accession",
        space=space,
        spaces=spaces
    )
    hmdb_secondary = extract_subelement(
        element=element,
        tag="secondary_accessions",
        space=space,
        spaces=spaces
    )
    references_hmdb_values = extract_subelement_values(
        element=hmdb_secondary,
        tag="accession",
        space=space,
        spaces=spaces
    )
    references_hmdb_values.append(hmdb_primary)
    references_hmdb = utility.collect_unique_elements(references_hmdb_values)
    # Name.
    name = extract_subelement_value(
        element=element,
        tag="name",
        space=space,
        spaces=spaces
    )
    # Synonyms.
    synonyms_element = extract_subelement(
        element=element,
        tag="synonyms",
        space=space,
        spaces=spaces
    )
    synonyms_values = extract_subelement_values(
        element=synonyms_element,
        tag="synonym",
        space=space,
        spaces=spaces
    )
    synonyms_values.append(name)
    synonyms = utility.collect_unique_elements(synonyms_values)
    # References.
    pubchem_tentative = extract_subelement_value(
        element=element,
        tag="pubchem_compound_id",
        space=space,
        spaces=spaces
    )
    # Multiple entries have references to identifier "0" for PubChem.
    # This identifier is nonsense and erroneous.
    if (
        (pubchem_tentative is not None) and
        (pubchem_tentative == "0")
    ):
        reference_pubchem = None
    else:
        reference_pubchem = pubchem_tentative
    reference_chebi = extract_subelement_value(
        element=element,
        tag="chebi_id",
        space=space,
        spaces=spaces
    )
    reference_kegg = extract_subelement_value(
        element=element,
        tag="kegg_id",
        space=space,
        spaces=spaces
    )
    # Compile and return information.
    record = {
        "identifier": hmdb_primary,
        "name": name,
        "synonyms": synonyms,
        "references_hmdb": references_hmdb,
        "reference_pubchem": reference_pubchem,
        "reference_chebi": reference_chebi,
        "reference_kegg": reference_kegg
    }
    return record
示例#7
0
def convert_reactions_export_text(
    reactions=None,
    metabolites=None,
    compartments=None,
    processes=None,
):
    """
    Converts information about reactions to text format.

    Converts identifiers of metabolites, compartments, and processes to names.

    arguments:
        reactions (dict<dict>): information about reactions
        metabolites (dict<dict>): information about metabolites
        compartments (dict<dict>): information about compartments
        processes (dict<dict>): information about processes


    returns:
        (list<dict>): information about reactions

    raises:

    """

    records = []
    for reaction in reactions.values():
        # Participants.
        # Write a function to compose identifier (name) human readable...
        # Compartments
        compartments_identifiers = utility.collect_value_from_records(
            key="compartment", records=reaction["participants"])
        compartments_identifiers_unique = utility.collect_unique_elements(
            elements_original=compartments_identifiers)
        compartments_names = utility.collect_values_from_records_in_reference(
            key="name",
            identifiers=compartments_identifiers_unique,
            reference=compartments,
        )
        # Processes
        processes_names = utility.collect_values_from_records_in_reference(
            key="name",
            identifiers=reaction["processes"],
            reference=processes,
        )
        # Metabolites
        reactants_identifiers = utility.collect_reaction_participants_value(
            key="metabolite",
            criteria={"roles": ["reactant"]},
            participants=reaction["participants"])
        reactants_names = utility.collect_values_from_records_in_reference(
            key="name",
            identifiers=reactants_identifiers,
            reference=metabolites,
        )
        products_identifiers = utility.collect_reaction_participants_value(
            key="metabolite",
            criteria={"roles": ["product"]},
            participants=reaction["participants"])
        products_names = utility.collect_values_from_records_in_reference(
            key="name",
            identifiers=products_identifiers,
            reference=metabolites,
        )
        # Compile information.
        record = {
            "identifier": reaction["identifier"],
            "name": reaction["name"],
            "reactants": "; ".join(reactants_names),
            "products": "; ".join(products_names),
            "compartments": "; ".join(compartments_names),
            "processes": ";".join(processes_names),
            "reversibility": reaction["reversibility"],
            "reference_metanetx":
            ("; ".join(reaction["references"]["metanetx"])),
            "reference_recon2m2":
            ("; ".join(reaction["references"]["recon2m2"])),
            "reference_gene": "; ".join(reaction["references"]["gene"]),
            "reference_enzyme": "; ".join(reaction["references"]["enzyme"]),
            "reference_kegg": "; ".join(reaction["references"]["kegg"]),
            "reference_reactome":
            ("; ".join(reaction["references"]["reactome"])),
            "reference_metacyc": "; ".join(reaction["references"]["metacyc"]),
            "reference_bigg": "; ".join(reaction["references"]["bigg"]),
        }
        records.append(record)
    return records
示例#8
0
def convert_reactions_text(reactions=None):
    """
    Converts information about reactions to text format.

    arguments:
        reactions (dict<dict>): information about reactions

    returns:
        (list<dict>): information about reactions

    raises:

    """

    records = []
    for reaction in reactions.values():
        # Participants.
        compartments = utility.collect_value_from_records(
            key="compartment", records=reaction["participants"])
        compartments_unique = utility.collect_unique_elements(
            elements_original=compartments)
        metabolites = utility.collect_value_from_records(
            key="metabolite", records=reaction["participants"])
        metabolites_unique = utility.collect_unique_elements(
            elements_original=metabolites)
        # Transports.
        transport_metabolites = utility.collect_value_from_records(
            key="metabolite", records=reaction["transports"])
        transport_compartments = utility.collect_values_from_records(
            key="compartments", records=reaction["transports"])
        transport_compartments_unique = utility.collect_unique_elements(
            elements_original=transport_compartments)
        # Compile information.
        record = {
            "identifier": reaction["identifier"],
            "name": reaction["name"],
            "equation": reaction["equation"],
            "metabolites": ";".join(metabolites_unique),
            "compartments": ";".join(compartments_unique),
            "processes": ";".join(reaction["processes"]),
            "reversibility": reaction["reversibility"],
            "conversion": reaction["conversion"],
            "dispersal": reaction["dispersal"],
            "transport": reaction["transport"],
            "transport_metabolites": ";".join(transport_metabolites),
            "transport_compartments": ";".join(transport_compartments_unique),
            "replication": reaction["replication"],
            "replicates": ";".join(reaction["replicates"]),
            "reference_metanetx": ";".join(reaction["references"]["metanetx"]),
            "reference_recon2m2": ";".join(reaction["references"]["recon2m2"]),
            "reference_gene": ";".join(reaction["references"]["gene"]),
            "reference_enzyme": ";".join(reaction["references"]["enzyme"]),
            "reference_kegg": ";".join(reaction["references"]["kegg"]),
            "reference_reactome": ";".join(reaction["references"]["reactome"]),
            "reference_metacyc": ";".join(reaction["references"]["metacyc"]),
            "reference_bigg": ";".join(reaction["references"]["bigg"]),
            "reference_rhea": ";".join(reaction["references"]["rhea"]),
            "reference_sabiork": ";".join(reaction["references"]["sabiork"]),
            "reference_seed": ";".join(reaction["references"]["seed"])
        }
        records.append(record)
    return records
示例#9
0
def collect_reaction_transports(reaction=None):
    """
    Collects information about a reaction's transports.

    This procedure applies an overly restrictive definition of transport that
    requires chemically-identical metabolites in two separate compartments.
    Some transports involve chemical conversion of substrates as part of
    transport.

    arguments:
        reaction (dict): information about a reaction

    returns:
        (list<dict>): information about a reaction's transports

    raises:

    """

    metabolites_reactant = utility.collect_reaction_participants_value(
        key="metabolite",
        criteria={"roles": ["reactant"]},
        participants=reaction["participants"])
    metabolites_product = utility.collect_reaction_participants_value(
        key="metabolite",
        criteria={"roles": ["product"]},
        participants=reaction["participants"])
    # Collect metabolites that participate as both reactants and products
    metabolites = utility.filter_common_elements(list_one=metabolites_product,
                                                 list_two=metabolites_reactant)
    transports = []
    for metabolite in metabolites:
        # Determine metabolite's compartments as reactant and product
        compartments_reactant = utility.collect_reaction_participants_value(
            key="compartment",
            criteria={
                "metabolites": [metabolite],
                "roles": ["reactant"]
            },
            participants=reaction["participants"])
        compartments_product = utility.collect_reaction_participants_value(
            key="compartment",
            criteria={
                "metabolites": [metabolite],
                "roles": ["product"]
            },
            participants=reaction["participants"])
        # Determine whether there is a difference between the metabolite's
        # compartments as reactant and product
        transport = not utility.compare_lists_by_mutual_inclusion(
            list_one=compartments_reactant, list_two=compartments_product)
        if transport:
            compartments = compartments_reactant + compartments_product
            compartments_unique = utility.collect_unique_elements(
                elements_original=compartments)
            record = {
                "metabolite": metabolite,
                "compartments": compartments_unique
            }
            transports.append(record)
    return transports
示例#10
0
def curate_processes(processes_curation=None,
                     processes_original=None,
                     reactions_original=None):
    """
    Curates information about specific processes and relevant reactions.

    arguments:
        processes_curation (list<dict<str>>): information to change about
            specific processes
        processes_original (dict<dict>): information about processes
        reactions_original (dict<dict>): information about reactions

    returns:
        (dict<dict<dict>>): information about processes and reactions

    raises:

    """

    # Copy information.
    processes_novel = copy.deepcopy(processes_original)
    reactions_novel = copy.deepcopy(reactions_original)
    for record in processes_curation:
        # Interpretation.
        identifier_original = record["identifier_original"]
        identifier_novel = record["identifier_novel"]
        name_original = record["name_original"]
        name_novel = record["name_novel"]
        # Determine method to change information.
        match_identifiers = identifier_original == identifier_novel
        match_names = name_original == name_novel
        if identifier_novel == "null":
            if identifier_original in processes_novel:
                # Remove process.
                del processes_novel[identifier_original]
                # TODO: also remove the process from any reactions' processes
                # Removal of a process does not justify removal of any
                # reactions that participate in that process.
        else:
            if not match_identifiers:
                # Change identifier.
                # Remove original.
                if identifier_original in processes_novel:
                    del processes_novel[identifier_original]
                # Replace with novel.
                if identifier_novel in processes_novel:
                    for reaction in reactions_novel.values():
                        processes = reaction["processes"]
                        if identifier_original in processes:
                            for index, process in enumerate(processes):
                                if process == identifier_original:
                                    processes[index] = identifier_novel
                            # Collect unique values.
                            processes_unique = utility.collect_unique_elements(
                                processes)
                            reaction["processes"] = processes_unique
                            reactions_novel[reaction["identifier"]] = reaction
            if not match_names:
                # Change name.
                if identifier_novel in processes_novel:
                    processes_novel[identifier_novel]["name"] = name_novel
    # Compile and return information
    return {"processes": processes_novel, "reactions": reactions_novel}