Пример #1
0
def _setType(types, current_id, padmet):
    """
    For id current_id, create relation 'is_a_class' to each id in types
    add new nodes in padmet and relation in list_of_relation
    Parameters
    ----------
    types: list
        list of classe id associated to id 'current_id'
    current_id: str
        element id to link to all classe id in types
    padmet: padmet.PadmetRef
        padmet instance
    """        
    for subClass_id in types:
        #Type allow the hierachization of the current
        #XX is_a_class type
        #if type is not already in padmet, create a new class node (subClass_node)
        #subClass_node id == type
        #create a relation xx is_a_class type
        try:
            subClass_node = padmet.dicOfNode[subClass_id]
        except KeyError:
            subClass_node = Node("class", subClass_id)
            padmet.dicOfNode[subClass_id] = subClass_node
        is_a_class_rlt = Relation(current_id, "is_a_class", subClass_id)
        list_of_relation.append(is_a_class_rlt)
Пример #2
0
def add_kegg_pwy(pwy_file, padmetRef, verbose=False):
    """
    #TODO
    """
    global list_of_relation
    with open(pwy_file, 'r') as f:
        for data in [line.split("\t") for line in f.read().splitlines()][1:]:
            pwy_id, name, ec, rxn_id = data
            try:
                pwy_node = padmetRef.dicOfNode[pwy_id]
            except KeyError:
                pwy_node = padmetRef.createNode("pathway", pwy_id)
            if name:
                try:
                    pwy_node.misc["COMMON_NAME"].append(name)
                except KeyError:
                    pwy_node.misc["COMMON_NAME"] = [name]
            if rxn_id:
                if rxn_id in list(padmetRef.dicOfNode.keys()):
                    pwy_rlt = Relation(rxn_id, "is_in_pathway", pwy_id)
                    padmetRef._addRelation(pwy_rlt)
                else:
                    if verbose:
                        print("%s in pwy %s but not in padmet" %
                              (rxn_id, pwy_id))
    padmetRef.generateFile("/home/maite/Documents/data/bigg/bigg_v2.padmet")
Пример #3
0
def _setSyns(syns, current_id, padmet):
    """
    For id current_id, create relation 'has_name' to a node name 'current_id'_names
    store a list of synonymous from syns list in the node name 'current_id'_names
    add new node in padmet and relation in list_of_relation

    Parameters
    ----------
    syns: list
        list of synonymous of current_id
    current_id: str
        element id to link to node name 'current_id'_names
    padmet: padmet.PadmetRef
        padmet instance
    """        
    name_id = current_id+"_names"
    try:
        name_node = padmet.dicOfNode[name_id]
    except KeyError:
        #create node name
        name_node = Node("name", name_id, {"LABEL":[]})
        padmet.dicOfNode[name_id] = name_node
        has_name_rlt = Relation(current_id, "has_name", name_id)
        list_of_relation.append(has_name_rlt)
    [name_node.misc["LABEL"].append(syn) for syn in syns 
    if syn not in name_node.misc["LABEL"]]
Пример #4
0
def _setXrefs(xrefs, current_id, padmet):
    """
    For id current_id, create relation 'has_xref' to a node xref 'current_id'_xrefs
    store a list of external reference from xrefs list in the node xref 'current_id'_xrefs
    parse each external reference with regex_xref
    add new node in padmet and relation in list_of_relation

    Parameters
    ----------
    xrefs: list
        list of external reference of current_id
    current_id: str
        element id to link to node xref 'current_id'_xref
    padmet: padmet.PadmetRef
        padmet instance
    """        
    xref_id = current_id+"_xrefs"
    try:
        xref_node = padmet.dicOfNode[xref_id]
    except KeyError:
        #create node xref
        xref_node = Node("xref", xref_id)
        padmet.dicOfNode[xref_id] = xref_node
        has_xref_rlt = Relation(current_id, "has_xref", xref_id)
        list_of_relation.append(has_xref_rlt)

    for xref in xrefs:
        #an xref is like: (REFSEQ "NP_417401" NIL NIL NIL NIL NIL)
        #in this example DB = REFSEQ and ID = NP_417401
        #update node xref, with in misc k = DB and v = [ID]
        #node id is created by incrementing meta_max_id
        xref_search = regex_xref.search(xref)
        if xref_search is not None:
            xref_dict = xref_search.groupdict()
            db = xref_dict["DB"]
            _id = xref_dict["ID"]
        else:
            db = "GO-TERMS"
            _id = xref
        if db in list(xref_node.misc.keys()) and _id not in xref_node.misc[db]:
            xref_node.misc[db].append(_id)
        else:
            xref_node.misc[db] = [_id]
Пример #5
0
def add_delete_rxn(data_file, padmetSpec, output, padmetRef=None, source=None, tool=None, category="MANUAL", verbose=False):
    """
    Read a data_file (form created with template_add_delete and filed), for each reaction
    if column 'Action' == 'add':
        add the reaction from padmetRef to padmetSpec.
    elif column 'Action' == 'delete':
        remove the reaction
    Can't add a reaction without a padmetRef !

    the source ensure the traceability of the reaction, its a simple tag ex 'pathway_XX_update'
    if not given the filename of data_file will be used.
    if a tool was used to infer the reaction, define tool='name_of_the_tool'

    Parameters
    ----------
    data_file: str
        path to file based on template_new_rxn()
    padmetSpec: padmet.classes.PadmetSpec
        padmet to update
    padmetRef: padmet.classes.PadmetRef
        padmet containing the database of reference
    output: str
        path to the new padmet file
    source: str
        tag associated to the new reactions to create and add, used for traceability
    tool: str
        The eventual tool used to infer the reactions to create and add
    category: str
        The default category of the reaction added manually is 'MANUAL'. Must not be changed.
    verbose: bool
        if True print information
    """
    if not source:
        filename = os.path.splitext(os.path.basename(data_file))[0]
        source = filename
    source = source.upper()
    if tool:
        tool = tool.upper()
    if not category:
        category = "MANUAL"

    with open(data_file, 'r') as csvfile:
        dialect = csv.Sniffer().sniff(csvfile.read())
        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        file_name = os.path.basename(data_file)
        file_name = os.path.splitext(file_name)[0]

        reader = csv.DictReader(csvfile, delimiter=dialect.delimiter)
        for row in reader:
            element_id, comment, action, genes_assoc = row["idRef"], row["Comment"], row["Action"], row.get("Genes", None)
            if action.upper() == "ADD":
                if padmetRef is None:
                    if verbose:
                        print("No given padmetRef, unable to copy %s" %element_id)
                else:
                    if verbose:
                        print("Adding: %s" %(element_id))
                    padmetSpec.copyNode(padmetRef, element_id)

                    #reconstructionData:
                    if tool:
                        reconstructionData_id = element_id+"_reconstructionData_"+tool
                        reconstructionData = {"SOURCE": [source], "CATEGORY":[category], "TOOL":[tool], "COMMENT":[comment]}
                        if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
                            print("Warning: The reaction %s seems to be already added from the same source %s" %(element_id, tool))
                    else:
                        reconstructionData_id = element_id+"_reconstructionData_MANUAL"
                        reconstructionData = {"SOURCE": [source], "CATEGORY":["MANUAL"], "COMMENT":[comment]}
                        if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
                            print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %element_id)

                    reconstructionData_rlt = Relation(element_id, "has_reconstructionData", reconstructionData_id)
                    padmetSpec.createNode("reconstructionData", reconstructionData_id, reconstructionData, [reconstructionData_rlt])

                    if genes_assoc:
                        #suppData:
                        if tool:
                            suppData_id = element_id+"_SuppData_"+tool
                            if suppData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
                                print("Warning: The reaction %s seems to be already added from the same source %s" %(element_id, tool))
                        else:
                            suppData_id = element_id+"_SuppData_MANUAL"
                            if suppData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
                                print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %element_id)
                        suppData = {"GENE_ASSOCIATION":[genes_assoc]}
                        #create the node suppData and the relation has_suppData
                        suppData_rlt = Relation(element_id, "has_suppData", suppData_id)
                        padmetSpec.createNode("suppData", suppData_id, suppData, [suppData_rlt])

                        all_genes = parseGeneAssoc(genes_assoc)
                        nbGenes = len(all_genes)
                        if verbose:
                            print("%s is linked to %s genes" %(element_id, nbGenes))
                        for gene_id in all_genes:
                            try:
                                #check if gene already in the padmet
                                padmetSpec.dicOfNode[gene_id]
                            except KeyError:
                                padmetSpec.createNode("gene", gene_id)
                            #check if rxn already linked to gene x
                            try:
                                linked_rlt = [rlt for rlt in padmetSpec.dicOfRelationIn[element_id]
                                if rlt.type == "is_linked_to" and rlt.id_out == gene_id][0]
                                #rxn already linked to gene x, update misc
                                try:
                                    linked_rlt.misc["SOURCE:ASSIGNMENT"].append(source)
                                except KeyError:
                                    linked_rlt.misc["SOURCE:ASSIGNMENT"] = [source]
                            #rxn not linked to gene x
                            except IndexError:
                                linked_rlt = Relation(element_id, "is_linked_to", gene_id, {"SOURCE:ASSIGNMENT":[source]})
                            padmetSpec._addRelation(linked_rlt)

            elif action.upper() == "DELETE":
                if verbose:
                    print("deleting: %s" %(element_id))
                padmetSpec.delNode(element_id)
            elif action == "":
                print("Nothing to do for: %s" %(element_id))
            else:
                print("Action: %s unknown for %s" %(action, element_id))
                print("action must be = 'add' or 'delete' or ''")
                exit()
        padmetSpec.generateFile(output)
Пример #6
0
def rxn_creator(data_file, padmetSpec, output, padmetRef=None, source=None, tool=None, category="MANUAL", verbose=False):
    """
    Read a data_file (form created with template_new_rxn and filed), for each reaction
    to create, add the reaction in padmetSpec (only if the id of the reaction is not already in padmetSpec or in padmetRef if given)
    the source ensure the traceability of the reaction, its a simple tag ex 'pathway_XX_update'
    if not given the filename of data_file will be used.
    if a tool was used to infer the reaction, define tool='name_of_the_tool'
    the Padmet of reference padmetRef can be used to check that the reaction id is not
    already in the database and copy information from the database for existing compounds
    strongly recommended to give a padmetRef.

    Parameters
    ----------
    data_file: str
        path to file based on template_new_rxn()
    padmetSpec: padmet.classes.PadmetSpec
        padmet to update
    output: str
        path to the new padmet file
    source: str
        tag associated to the new reactions to create and add, used for traceability
    tool: str
        The eventual tool used to infer the reactions to create and add
    category: str
        The default category of the reaction added manually is 'MANUAL'. Must not be changed.
    padmetRef: padmet.classes.PadmetRef
        padmet containing the database of reference
    verbose: bool
        if True print information
    """
    if not source:
        filename = os.path.splitext(os.path.basename(data_file))[0]
        source = filename
    source = source.upper()
    if tool:
        tool = tool.upper()
    if not category:
        category = "MANUAL"


    dict_data = {}
    with open(data_file, 'r') as f:
        all_read = f.read()
    sep = csv.Sniffer().sniff(all_read).delimiter
    data = (line for line in all_read.splitlines() if len(line) != 0 and not line.startswith("#"))
    for line in data:
        #if len of value is 0 then TypeError raised
        try:
            attrib, value = line.split(sep)
        except TypeError:
            continue
        attrib = attrib.replace(" ", "")
        if attrib == "reaction_id":
            current_id = value
            dict_data[current_id] = {}
        else:
            try:
                dict_data[current_id][attrib] .append(value)
            except KeyError:
                dict_data[current_id][attrib] = [value]
    if verbose:
        print("%s reactions to add" %len(list(dict_data.keys())))
    for reaction_id, reaction_data in dict_data.items():
        if verbose:
            print("check if the id %s is already used" %reaction_id)
        if reaction_id in list(padmetSpec.dicOfNode.keys()):
            print("the id : %s is already associated to an other reaction in padmetSpec, choose an other" %reaction_id)
            continue
        if padmetRef is not None and reaction_id in list(padmetRef.dicOfNode.keys()):
            print("the id : %s is already associated to an other reaction in padmetRef, choose an other" %reaction_id)
            continue

        if verbose:
            print("Adding reaction %s" %reaction_id)
        reaction_rev = reaction_data["reversible"][0].lower()
        if reaction_rev.upper() == "TRUE":
            reaction_rev = "REVERSIBLE"
        elif reaction_rev.upper() == "FALSE":
            reaction_rev = "LEFT-TO-RIGHT"
        else:
            print("Please choose a value in ['true','false'] for the reversibility of the reaction: %s" %reaction_id)
            continue
        comment = reaction_data["comment"]
        node_misc = {"DIRECTION":[reaction_rev]}
        padmetSpec.createNode("reaction", reaction_id, node_misc)

        #reconstructionData:
        if tool:
            reconstructionData_id = reaction_id+"_reconstructionData_"+tool
            reconstructionData = {"SOURCE": [source], "CATEGORY":[category], "TOOL":[tool], "COMMENT":comment}
            if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
                print("Warning: The reaction %s seems to be already added from the same source %s" %(reaction_id, tool))
        else:
            reconstructionData_id = reaction_id+"_reconstructionData_MANUAL"
            reconstructionData = {"SOURCE": [source], "CATEGORY":["MANUAL"], "COMMENT":comment}
            if reconstructionData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
                print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %reaction_id)

        reconstructionData_rlt = Relation(reaction_id, "has_reconstructionData", reconstructionData_id)
        padmetSpec.createNode("reconstructionData", reconstructionData_id, reconstructionData, [reconstructionData_rlt])

        genes_assoc = reaction_data["linked_gene"][0]
        if genes_assoc:
            #suppData:
            if tool:
                suppData_id = reaction_id+"_SuppData_"+tool
                if suppData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
                    print("Warning: The reaction %s seems to be already added from the same source %s" %(reaction_id, tool))
            else:
                suppData_id = reaction_id+"_SuppData_MANUAL"
                if suppData_id in list(padmetSpec.dicOfNode.keys()) and verbose:
                    print("Warning: The reaction %s seems to be already added from the same source 'MANUAL'" %reaction_id)
            suppData = {"GENE_ASSOCIATION":[genes_assoc]}
            #create the node suppData and the relation has_suppData
            suppData_rlt = Relation(reaction_id, "has_suppData", suppData_id)
            padmetSpec.createNode("suppData", suppData_id, suppData, [suppData_rlt])

            all_genes = parseGeneAssoc(genes_assoc)
            nbGenes = len(all_genes)
            if verbose:
                print("%s is linked to %s genes" %(reaction_id, nbGenes))
            for gene_id in all_genes:
                try:
                    #check if gene already in the padmet
                    padmetSpec.dicOfNode[gene_id]
                except KeyError:
                    padmetSpec.createNode("gene", gene_id)
                #check if rxn already linked to gene x
                try:
                    linked_rlt = [rlt for rlt in padmetSpec.dicOfRelationIn[reaction_id]
                    if rlt.type == "is_linked_to" and rlt.id_out == gene_id][0]
                    #rxn already linked to gene x, update misc
                    try:
                        linked_rlt.misc["SOURCE:ASSIGNMENT"].append(source)
                    except KeyError:
                        linked_rlt.misc["SOURCE:ASSIGNMENT"] = [source]
                #rxn not linked to gene x
                except IndexError:
                    linked_rlt = Relation(reaction_id, "is_linked_to", gene_id, {"SOURCE:ASSIGNMENT":[source]})
                padmetSpec._addRelation(linked_rlt)

        if verbose:
            print("check if all metabolites are already in the network")
        try:
            for reactant_data in reaction_data["reactant"]:
                stoechio, metabo_id, compart = reactant_data.split(":")
                stoechio = stoechio.replace(",", ".") #in case comma for sep
                try:
                    padmetSpec.dicOfNode[metabo_id]
                except KeyError:
                    if verbose:
                        print("%s not in the network" %metabo_id)
                    try:
                        if padmetRef is not None:
                            if verbose:
                                print("Try to copy from dbref")
                            padmetSpec._copyNodeExtend(padmetRef, metabo_id)
                        else:
                            raise KeyError
                    except KeyError:
                        if padmetRef is not None and verbose:
                            print("%s not in the padmetRef" %metabo_id)
                        if verbose:
                            print("creating a new compound")
                        padmetSpec.createNode("compound", metabo_id)
                        if verbose:
                            print(("new compound created: id = %s" %metabo_id))
                rlt = Relation(reaction_id, "consumes", metabo_id)
                rlt.misc.update({"STOICHIOMETRY":[stoechio], "COMPARTMENT":[compart]})
                padmetSpec._addRelation(rlt)
        except KeyError:
            if verbose: print("No reactants defined")

        try:
            for product_data in reaction_data["product"]:
                stoechio, metabo_id, compart = product_data.split(":")
                stoechio = stoechio.replace(",", ".") #in case comma for sep
                try:
                    padmetSpec.dicOfNode[metabo_id]
                except KeyError:
                    if verbose:
                        print("%s not in the network" %metabo_id)
                    try:
                        if padmetRef is not None:
                            if verbose:
                                print("Try to copy from dbref")
                            padmetSpec._copyNodeExtend(padmetRef, metabo_id)
                        else:
                            raise KeyError
                    except KeyError:
                        if padmetRef is not None and verbose:
                            print("%s not in the padmetRef" %metabo_id)
                        if verbose:
                            print("creating a new compound")
                        padmetSpec.createNode("compound", metabo_id)
                        print("new compound created: id = %s" % metabo_id)
                rlt = Relation(reaction_id, "produces", metabo_id)
                rlt.misc.update({"STOICHIOMETRY":[stoechio], "COMPARTMENT":[compart]})
                padmetSpec._addRelation(rlt)
        except KeyError:
            if verbose:
                print("No products defined")
        if "pathway" in reaction_data.keys():
            pathways = reaction_data["pathway"][0].split(";")
            for pwy_id in pathways:
                try:
                    padmetSpec.dicOfNode[pwy_id]
                except KeyError:
                    if verbose:
                        print("%s not in the network" %pwy_id)
                    if padmetRef is not None:
                        if verbose:
                            print("Check if new pathway %s is in dbref" %pwy_id)
                        if pwy_id in padmetRef.dicOfNode.keys():
                            print("Warning the new pathway %s exist in the dbref, risk of overwritting data, change pwy id" %pwy_id)
                            continue
                    padmetSpec.createNode("pathway", pwy_id)
                    if verbose:
                        print(("new pathway created: id = %s" %pwy_id))
                rlt = Relation(reaction_id, "is_in_pathway", pwy_id)
                padmetSpec._addRelation(rlt)
    if verbose:
        print("Creating output: %s" % output)
    padmetSpec.generateFile(output)
Пример #7
0
def modelSeed_to_padmet(rxn_file, pwy_file, output, verbose=False):
    """
    #TODO
    """
    global list_of_relation
    padmet_id = os.path.splitext(os.path.basename(output))[0]
    padmetRef = instantiate_padmet("PadmetRef", None, padmet_id, "MODELSEED",
                                   "1.0", verbose)

    list_of_relation = []

    if not os.path.exists(rxn_file):
        raise FileNotFoundError(
            "No json file of modelSeed reactions (--rxn_file/rxn_file) accessible at "
            + rxn_file)

    if not os.path.exists(pwy_file):
        raise FileNotFoundError(
            "No pathway reactions association file from modelSeed (--pwy_file/pwy_file) accessible at "
            + pwy_file)

    rxn_data = json.load(open(rxn_file))
    #remove biomass rxn:
    rxn_data.pop("rxn12985")
    if verbose: print("updating padmet")
    count = 0
    for rxn_id, rxn_dict in list(rxn_data.items()):
        count += 1
        if verbose:
            print("reaction: %s, %s/%s" % (rxn_id, count, len(rxn_data)))
        try:
            if not rxn_dict["compound_ids"]:
                raise KeyError
        except KeyError:
            print(rxn_id)
            continue
        if rxn_id not in list(padmetRef.dicOfNode.keys()):
            if rxn_dict["reversibility"] == ">":
                rxn_direction = "LEFT-TO-RIGHT"
            else:
                rxn_direction = "REVERSIBLE"
            rxn_name = rxn_dict["name"]
            padmetRef.createNode("reaction", rxn_id, {
                "COMMON_NAME": [rxn_name],
                "DIRECTION": [rxn_direction]
            })

            rxn_metabolites = rxn_dict["stoichiometry"].split(";")

            for metabo_data in rxn_metabolites:
                metabo_data = metabo_data.replace("???", "\"")
                try:
                    metabo_temp, metabo_name = metabo_data.split("\"")[:2]
                    metabo_stoich, metabo_id, metabo_compart = metabo_temp.split(
                        ":")[:3]
                except ValueError:
                    metabo_stoich, metabo_id, metabo_compart, metabo_name = metabo_data.split(
                        ":")[:4]

                metabo_stoich = float(metabo_stoich)
                #from modelSeed github
                if metabo_compart == "0":
                    metabo_compart = "c"
                elif metabo_compart == "1":
                    metabo_compart = "e"
                elif metabo_compart == "2":
                    metabo_compart = "p"
                try:
                    padmetRef.dicOfNode[metabo_id]
                except KeyError:
                    padmetRef.createNode("compound", metabo_id,
                                         {"COMMON_NAME": [metabo_name]})
                if metabo_stoich < 0:
                    consumes_rlt = Relation(
                        rxn_id, "consumes", metabo_id, {
                            "STOICHIOMETRY": [abs(metabo_stoich)],
                            "COMPARTMENT": [metabo_compart]
                        })
                    list_of_relation.append(consumes_rlt)
                else:
                    produces_rlt = Relation(
                        rxn_id, "produces", metabo_id, {
                            "STOICHIOMETRY": [abs(metabo_stoich)],
                            "COMPARTMENT": [metabo_compart]
                        })
                    list_of_relation.append(produces_rlt)
        else:
            if verbose: print("%s already in padmet" % rxn_id)
            continue
    with open(pwy_file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        pwy_raw_data = [row for row in reader]
    for pwy_raw in pwy_raw_data:
        pwy_id = pwy_raw["Source ID"]
        pwy_names = [pwy_raw["Name"], pwy_raw["Aliases"]]
        rxn_ids = pwy_raw["Reactions"].split("|")
        try:
            padmetRef.dicOfNode[pwy_id]
        except KeyError:
            padmetRef.createNode("pathway", pwy_id, {"COMMON_NAME": pwy_names})
        for rxn_id in rxn_ids:
            pwy_rlt = Relation(rxn_id, "is_in_pathway", pwy_id)
            list_of_relation.append(pwy_rlt)

    if verbose: print("Adding all relations")
    count = 0
    for rlt in list_of_relation:
        count += 1
        if verbose: print("relation %s/%s" % (count, len(list_of_relation)))
        try:
            padmetRef.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmetRef.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationOut[rlt.id_out] = [rlt]
    """
    if pwy_file:
        add_kegg_pwy(pwy_file, padmetRef, verbose)
    """
    if verbose: print("Generating file: %s" % output)
    padmetRef.generateFile(output)
Пример #8
0
def biggAPI_to_padmet(output, pwy_file=None, verbose=False):
    """
    Extract BIGG database using the api. Create a padmet file.
    Escape reactions of biomass.
    Require internet access !

    Allows to extract the bigg database from the API to create a padmet.

    1./ Get all reactions universal id from http://bigg.ucsd.edu/api/v2/universal/reactions, escape reactions of biomass.
    2./ Using async_list, extract all the informations for each reactions (compounds, stochio, name ...)
    3./ Need to use sleep time to avoid to lose the server access.
    4./ Because the direction fo the reaction is not set by default in bigg. 
    We get all the models where the reaction is and the final direction will the one found
    in more than 75%
    5./ Also extract xrefs

    Parameters
    ----------
    output: str
        path to output, the padmet file.
    pwy_file: str
        path to pathway file, add kegg pathways, line:'pwy_id, pwy_name, x, rxn_id'.
    verbose: bool
        if True print information
    """
    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")
    #print(verbose,today_date,version, output, classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file)
    policyInArray = [
        ['compound', 'has_name', 'name'], ['compound', 'has_xref', 'xref'],
        ['compound', 'has_suppData', 'suppData'], ['gene', 'has_name', 'name'],
        ['gene', 'has_xref', 'xref'], ['gene', 'has_suppData', 'suppData'],
        ['gene', 'codes_for', 'protein'], ['pathway', 'has_name', 'name'],
        ['pathway',
         'has_xref', 'xref'], ['pathway', 'is_in_pathway', 'pathway'],
        ['protein', 'has_name', 'name'], ['protein', 'has_xref', 'xref'],
        ['protein', 'has_suppData',
         'suppData'], ['protein', 'catalyses', 'reaction'],
        ['reaction', 'has_name', 'name'], ['reaction', 'has_xref', 'xref'],
        ['reaction', 'has_suppData', 'suppData'],
        ['reaction', 'has_reconstructionData', 'reconstructionData'],
        ['reaction', 'is_in_pathway', 'pathway'],
        [
            'reaction', 'consumes', 'class', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'class', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'consumes', 'compound', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'compound', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'consumes', 'protein', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'protein', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ], ['reaction', 'is_linked_to', 'gene', 'SOURCE:ASSIGNMENT', 'X:Y']
    ]
    dbNotes = {
        "PADMET": {
            "Creation": today_date,
            "version": "2.6"
        },
        "DB_info": {
            "DB": "BIGG",
            "version": "1.5"
        }
    }
    padmetRef = PadmetRef()
    if verbose: print("setting policy")
    padmetRef.setPolicy(policyInArray)
    if verbose: print("setting dbInfo")
    padmetRef.setInfo(dbNotes)
    list_of_relation = []
    if verbose: print("Getting all reactions ids")
    url_bigg = 'http://bigg.ucsd.edu/api/v2/'
    raw_data = requests.get(url_bigg + "universal/reactions").json()['results']
    all_reactions_ids = [
        rxn_dict['bigg_id'] for rxn_dict in raw_data
        if not rxn_dict['bigg_id'].startswith("BIOMASS")
    ]
    if verbose: print("%s reactions to extract" % (len(all_reactions_ids)))
    """
    if verbose: print("Extracting informations... Wait")
    step = 100
    rxn_lower_index = -(step)
    rxn_upper_index = 0
    rxn_responses = []
    all_range = len(all_reactions_ids)/step

    for i in range(all_range):
        async_list = []
        rxn_lower_index += step
        rxn_upper_index += step

        for rxn_id in all_reactions_ids[rxn_lower_index:rxn_upper_index]:
            action_item = grequests.get(url_bigg + "universal/reactions/" +rxn_id)
            async_list.append(action_item) 
        new_responses = [r.json() for r in grequests.map(async_list)]
        rxn_responses += new_responses
        print("%s/%s done" %(len(rxn_responses),len(all_reactions_ids)))

    if rxn_upper_index != len(all_reactions_ids):
        async_list = []
        last_index = len(all_reactions_ids) - rxn_upper_index
        rxn_lower_index += step
        rxn_upper_index += last_index
        for rxn_id in all_reactions_ids[rxn_lower_index:rxn_upper_index]:
            action_item = grequests.get(url_bigg + "universal/reactions/" +rxn_id)
            async_list.append(action_item) 
        new_responses = [r.json() for r in grequests.map(async_list)]
        rxn_responses += new_responses
    """
    if verbose: print("updating padmet")
    count = 0
    all_reactions_ids = [
        i for i in all_reactions_ids if 'biomass' not in i.upper()
    ]
    for rxn_id in [
            i for i in all_reactions_ids if not i.startswith("BIOMASS")
    ]:
        count += 1
        if verbose:
            print("reaction: %s, %s/%s" %
                  (rxn_id, count, len(all_reactions_ids)))
        if rxn_id not in list(padmetRef.dicOfNode.keys()):
            rxn_response = requests.get(url_bigg + "universal/reactions/" +
                                        rxn_id)
            rxn_dict = rxn_response.json()

            rxn_metabolites = rxn_dict["metabolites"]
            if len(rxn_metabolites) > 1:
                rxn_id = rxn_dict['bigg_id']
                rxn_name = rxn_dict["name"]

                all_models_id = [
                    i["bigg_id"]
                    for i in rxn_dict["models_containing_reaction"]
                ]
                async_list = []
                for model_id in all_models_id:
                    action_item = grequests.get(url_bigg + "models/" +
                                                model_id + "/reactions/" +
                                                rxn_id)
                    async_list.append(action_item)
                models_responses = [
                    r.json() for r in grequests.map(async_list)
                ]
                all_lower_bound = [
                    i["results"][0]["lower_bound"] for i in models_responses
                ]
                ratio_not_rev = float(all_lower_bound.count(0)) / float(
                    len(all_lower_bound))
                if verbose:
                    print("Reaction not reversible in %s/%s model(s)" %
                          (all_lower_bound.count(0), len(all_lower_bound)))
                if ratio_not_rev >= 0.75:
                    rxn_direction = "LEFT-TO-RIGHT"
                    if verbose: print("Reaction not reversible")
                else:
                    rxn_direction = "REVERSIBLE"
                    if verbose: print("Reaction reversible")
                padmetRef.createNode("reaction", rxn_id, {
                    "COMMON_NAME": [rxn_name],
                    "DIRECTION": [rxn_direction]
                })

                rxn_xrefs = rxn_dict["database_links"]

                xref_id = rxn_id + "_xrefs"
                xref_node = padmetRef.createNode("xref", xref_id)
                has_xref_rlt = Relation(rxn_id, "has_xref", xref_id)
                list_of_relation.append(has_xref_rlt)

                for db, k in list(rxn_xrefs.items()):
                    _id = k[0]["id"]
                    if db in list(xref_node.misc.keys()
                                  ) and _id not in xref_node.misc[db]:
                        xref_node.misc[db].append(_id)
                    else:
                        xref_node.misc[db] = [_id]

                for metabo_dict in rxn_metabolites:
                    metabo_id = metabo_dict["bigg_id"]
                    metabo_name = metabo_dict["name"]
                    metabo_compart = metabo_dict["compartment_bigg_id"]
                    metabo_stoich = metabo_dict["stoichiometry"]
                    try:
                        padmetRef.dicOfNode[metabo_id]
                    except KeyError:
                        padmetRef.createNode("compound", metabo_id,
                                             {"COMMON_NAME": [metabo_name]})
                    if metabo_stoich < 0:
                        consumes_rlt = Relation(
                            rxn_id, "consumes", metabo_id, {
                                "STOICHIOMETRY": [abs(metabo_stoich)],
                                "COMPARTMENT": [metabo_compart]
                            })
                        list_of_relation.append(consumes_rlt)
                    else:
                        produces_rlt = Relation(
                            rxn_id, "produces", metabo_id, {
                                "STOICHIOMETRY": [abs(metabo_stoich)],
                                "COMPARTMENT": [metabo_compart]
                            })
                        list_of_relation.append(produces_rlt)
        else:
            if verbose: print("%s already in padmet" % rxn_id)
            continue
    if verbose: print("Adding all relations")
    count = 0
    for rlt in list_of_relation:
        count += 1
        if verbose: print("relation %s/%s" % (count, len(list_of_relation)))
        try:
            padmetRef.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmetRef.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationOut[rlt.id_out] = [rlt]

    if pwy_file:
        add_kegg_pwy(pwy_file, padmetRef, verbose)
    if verbose: print("Generating file: %s" % output)
    padmetRef.generateFile(output)
Пример #9
0
def modelSeed_to_padmet(rxn_file, pwy_file, output, verbose=False):
    """
    #TODO
    """
    global list_of_relation
    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")
    #print(verbose,today_date,version, output, classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file)
    policyInArray = [
        ['compound', 'has_name', 'name'], ['compound', 'has_xref', 'xref'],
        ['compound', 'has_suppData', 'suppData'], ['gene', 'has_name', 'name'],
        ['gene', 'has_xref', 'xref'], ['gene', 'has_suppData', 'suppData'],
        ['gene', 'codes_for', 'protein'], ['pathway', 'has_name', 'name'],
        ['pathway',
         'has_xref', 'xref'], ['pathway', 'is_in_pathway', 'pathway'],
        ['protein', 'has_name', 'name'], ['protein', 'has_xref', 'xref'],
        ['protein', 'has_suppData',
         'suppData'], ['protein', 'catalyses', 'reaction'],
        ['reaction', 'has_name', 'name'], ['reaction', 'has_xref', 'xref'],
        ['reaction', 'has_suppData', 'suppData'],
        ['reaction', 'has_reconstructionData', 'reconstructionData'],
        ['reaction', 'is_in_pathway', 'pathway'],
        [
            'reaction', 'consumes', 'class', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'class', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'consumes', 'compound', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'compound', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'consumes', 'protein', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'protein', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ], ['reaction', 'is_linked_to', 'gene', 'SOURCE:ASSIGNMENT', 'X:Y']
    ]
    dbNotes = {
        "PADMET": {
            "Creation": today_date,
            "version": "2.6"
        },
        "DB_info": {
            "DB": "MODELSEED",
            "version": "1.0"
        }
    }
    padmetRef = PadmetRef()
    if verbose: print("setting policy")
    padmetRef.setPolicy(policyInArray)
    if verbose: print("setting dbInfo")
    padmetRef.setInfo(dbNotes)
    list_of_relation = []

    rxn_data = json.load(open(rxn_file))
    #remove biomass rxn:
    rxn_data.pop("rxn12985")
    if verbose: print("updating padmet")
    count = 0
    for rxn_id, rxn_dict in list(rxn_data.items()):
        count += 1
        if verbose:
            print("reaction: %s, %s/%s" % (rxn_id, count, len(rxn_data)))
        try:
            if not rxn_dict["compound_ids"]:
                raise KeyError
        except KeyError:
            print(rxn_id)
            continue
        if rxn_id not in list(padmetRef.dicOfNode.keys()):
            if rxn_dict["reversibility"] == ">":
                rxn_direction = "LEFT-TO-RIGHT"
            else:
                rxn_direction = "REVERSIBLE"
            rxn_name = rxn_dict["name"]
            padmetRef.createNode("reaction", rxn_id, {
                "COMMON_NAME": [rxn_name],
                "DIRECTION": [rxn_direction]
            })

            rxn_metabolites = rxn_dict["stoichiometry"].split(";")

            for metabo_data in rxn_metabolites:
                metabo_data = metabo_data.replace("???", "\"")
                try:
                    metabo_temp, metabo_name = metabo_data.split("\"")[:2]
                    metabo_stoich, metabo_id, metabo_compart = metabo_temp.split(
                        ":")[:3]
                except ValueError:
                    metabo_stoich, metabo_id, metabo_compart, metabo_name = metabo_data.split(
                        ":")[:4]

                metabo_stoich = float(metabo_stoich)
                #from modelSeed github
                if metabo_compart == "0":
                    metabo_compart = "c"
                elif metabo_compart == "1":
                    metabo_compart = "e"
                elif metabo_compart == "2":
                    metabo_compart = "p"
                try:
                    padmetRef.dicOfNode[metabo_id]
                except KeyError:
                    padmetRef.createNode("compound", metabo_id,
                                         {"COMMON_NAME": [metabo_name]})
                if metabo_stoich < 0:
                    consumes_rlt = Relation(
                        rxn_id, "consumes", metabo_id, {
                            "STOICHIOMETRY": [abs(metabo_stoich)],
                            "COMPARTMENT": [metabo_compart]
                        })
                    list_of_relation.append(consumes_rlt)
                else:
                    produces_rlt = Relation(
                        rxn_id, "produces", metabo_id, {
                            "STOICHIOMETRY": [abs(metabo_stoich)],
                            "COMPARTMENT": [metabo_compart]
                        })
                    list_of_relation.append(produces_rlt)
        else:
            if verbose: print("%s already in padmet" % rxn_id)
            continue
    with open(pwy_file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        pwy_raw_data = [row for row in reader]
    for pwy_raw in pwy_raw_data:
        pwy_id = pwy_raw["Source ID"]
        pwy_names = [pwy_raw["Name"], pwy_raw["Aliases"]]
        rxn_ids = pwy_raw["Reactions"].split("|")
        try:
            padmetRef.dicOfNode[pwy_id]
        except KeyError:
            padmetRef.createNode("pathway", pwy_id, {"COMMON_NAME": pwy_names})
        for rxn_id in rxn_ids:
            pwy_rlt = Relation(rxn_id, "is_in_pathway", pwy_id)
            list_of_relation.append(pwy_rlt)

    if verbose: print("Adding all relations")
    count = 0
    for rlt in list_of_relation:
        count += 1
        if verbose: print("relation %s/%s" % (count, len(list_of_relation)))
        try:
            padmetRef.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmetRef.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationOut[rlt.id_out] = [rlt]
    """
    if pwy_file:
        add_kegg_pwy(pwy_file, padmetRef, verbose)
    """
    if verbose: print("Generating file: %s" % output)
    padmetRef.generateFile(output)
Пример #10
0
def enzrxns_parser(filePath, padmet, dict_protein_gene_id, source, verbose = False):
    """
    Parameters
    ----------
    filePath: str
        path to enzrxns.dat
    padmet: padmet.PadmetRef
        padmet instance
    verbose: bool
        if True print information
    """        
    dict_data = {}
    with open(filePath, 'r', encoding='windows-1252') as f:
        data = (line for line in f.read().splitlines() if not line.startswith("#") and not line == "//")
        for line in data:
            try:
                #if len of value is 0 then ValueError raised
                attrib, value = line.split(" - ")
                #delete all tags
                value = re.sub(regex_purge,"",value)
                if attrib == "UNIQUE-ID":
                    current_id = value
                    dict_data[current_id] = {}
                if attrib in ["COMMON-NAME","ENZYME","REACTION","BASIS-FOR-ASSIGNMENT"]:
                    try:
                        dict_data[current_id][attrib].append(value)
                    except KeyError:
                        dict_data[current_id][attrib] = [value]
            except ValueError:
                pass

    count = 0
    #nb_enzrxns = str(len(list(dict_data.keys())))
    for current_id, dict_values in dict_data.items():
        count += 1
        """
        if verbose:
            print("\r%s/%s" %(count, nb_enzrxns), end="", flush=True)
            #print(current_id)
        """
        rxn_id = dict_values["REACTION"][0]
        names = dict_values.get("COMMON-NAME",[])
        for name in names: 
            if name.endswith("_"):
                names[names.index(name)] = name[:-1]
        try:
            protein = dict_values["ENZYME"][0]
        except KeyError:
            pass
        try:
            rxn_node = padmet.dicOfNode[rxn_id]
            try:
                [rxn_node.misc["COMMON-NAME"].append(name) for name in names if name not in rxn_node.misc["COMMON-NAME"]]
            except KeyError:
                rxn_node.misc["COMMON-NAME"] = names
            try:
                genes_id = dict_protein_gene_id[protein]
                try:
                    assignment = dict_values["BASIS-FOR-ASSIGNMENT"][0]
                    if assignment.startswith(":"): assignment = assignment[1:]
                except KeyError:
                    assignment = "NA"
                for gene_id in genes_id:
                    is_linked_rlt = Relation(rxn_id, "is_linked_to", gene_id, {"SOURCE:ASSIGNMENT":[source+":"+assignment]})
                    list_of_relation.append(is_linked_rlt)
            except KeyError:
                pass
        except KeyError:
            pass
    if verbose: print("")
Пример #11
0
def pathways_parser(filePath, padmet, verbose = False):
    """
    Parameters
    ----------
    filePath: str
        path to pathways.dat
    padmet: padmet.PadmetRef
        padmet instance
    verbose: bool
        if True print information
    """        
    dict_data = {}
    with open(filePath, 'r', encoding='windows-1252') as f:
        data = (line for line in f.read().splitlines() if not line.startswith("#") and not line == "//")
        for line in data:
            try:
                #if len of value is 0 then ValueError raised
                attrib, value = line.split(" - ")
                #delete all tags
                value = re.sub(regex_purge,"",value)
                if attrib == "UNIQUE-ID":
                    current_id = value
                    dict_data[current_id] = {}
                if attrib in ["COMMON-NAME", "TAXONOMIC-RANGE",\
                "TYPES", "SYNONYMS", "DBLINKS", "IN-PATHWAY", "REACTION-LIST"]:
                    try:
                        dict_data[current_id][attrib].append(value)
                    except KeyError:
                        dict_data[current_id][attrib] = [value]
            except ValueError:
                pass
    
    count = 0
    nb_pathways = str(len(list(dict_data.keys())))
    for pathway_id, dict_values in dict_data.items():
        count += 1
        if verbose:
            print("\r%s/%s" %(count, nb_pathways), end="", flush=True)
            #print(pathway_id)
        pathway_node = Node("pathway", pathway_id)
        padmet.dicOfNode[pathway_id] = pathway_node
        try:
            pathway_node.misc["COMMON-NAME"] = dict_values["COMMON-NAME"]
        except KeyError:
            pass
        try:
            pathway_node.misc["TAXONOMIC-RANGE"] = dict_values["TAXONOMIC-RANGE"]
        except KeyError:
            pass
        try:
            types = dict_values["TYPES"]
            _setType(types, pathway_id, padmet)
        except KeyError:
            pass
        try:
            syns = dict_values["SYNONYMS"]
            _setSyns(syns, pathway_id, padmet)
        except KeyError:
            pass
        try:
            xrefs = dict_values["DBLINKS"]
            _setXrefs(xrefs, pathway_id, padmet)
        except KeyError:
            pass
        try:
            subPathways = dict_values["IN-PATHWAY"]
            for subPathway in subPathways:
                #add the hierachization info, current pathway is_in_pathway subpathway
                is_in_pathway_rlt = Relation(pathway_id, "is_in_pathway", subPathway)
                list_of_relation.append(is_in_pathway_rlt)
        except KeyError:
            pass
        try:
            subNodes = dict_values["REACTION-LIST"]
            for subNode in subNodes:
                #add the hierachization info, Reaction/pathway is_in_pathway current pathway
                is_in_pathway_rlt = Relation(subNode, "is_in_pathway", pathway_id)
                list_of_relation.append(is_in_pathway_rlt)
        except KeyError:
            pass
    if verbose: print("")
Пример #12
0
def reactions_parser(filePath, padmet, extract_gene, source, verbose = False):
    """
    from reaction.dat: get for each reaction, the UNIQUE-ID, COMMON-NAME, TYPES, SYNONYMS, DBLINKS
    Create a reaction node with node.id = UNIQUE-ID,  node.misc = {COMMON-NAME:[COMMON-NAMES]}
    - For each types:
    A type is in fact a class. this information is stocked in padmet as: is_a_class relation btw a node and a class_node
    check if the type is already in the padmet
    if not create a new class_node (var: subClass) with subClass_node.id = type
    Create a relation current node is_a_class type
    - For each Synonyms:
    this information is stocked in padmet as: has_name relation btw a node and a name_node
    create a new name_node with name_node.id = reaction_id+"_names" name_node.misc = {LABEL:[synonyms]}
    Create a relation current node has_name name_node.id
    - For each DBLINKS:
    DBLINKS is parsed with regex_xref to get the db and the id
    this information is stocked in padmet as: has_xref relation btw a node and a xref_node
    create a new xref_node with xref_node.id = reaction_id+"_xrefs" and xref_node.misc = {db:[id]}
    Create a relation current node has_xref xref_node.id

    Parameters
    ----------
    filePath: str
        path to reactions.dat
    padmet: padmet.PadmetRef
        padmet instance
    verbose: bool
        if True print information
    """
    dict_data = {}
    with open(filePath, 'r', encoding='windows-1252') as f:
        data = [line for line in f.read().splitlines() if not line.startswith("#") and not line == "//"]
        index = -1        
        for line in data:
            index += 1
            try:
                #if len of value is 0 then ValueError raised
                attrib, value = line.split(" - ")
                #delete all tags
                value = re.sub(regex_purge,"",value)
                if attrib == "UNIQUE-ID":
                    current_id = value
                    dict_data[current_id] = {}
                if attrib in ["COMMON-NAME", "EC-NUMBER", "REACTION-DIRECTION",\
                "TYPES", "SYNONYMS", "DBLINKS"]:
                    try:
                        dict_data[current_id][attrib].append(value)
                    except KeyError:
                        dict_data[current_id][attrib] = [value]

                elif attrib in ["LEFT", "RIGHT"]:
                    #set default values
                    compartment = def_compart_in
                    stoichiometry = "1"
                    #check if information about stoechiometry and compartment in line + 1 and line + 2
                    try:
                        first_next_line = data[index+1]
                    #last line of the file, just add the LEFT/RIGHT
                    except IndexError:
                        first_next_line = ""
                    try:
                        second_next_line = data[index+2]
                    except IndexError:
                        second_next_line = ""

                    if first_next_line.startswith("^COEFFICIENT"):
                        stoichiometry = first_next_line.split(" - ")[1]
                        if second_next_line.startswith("^COMPARTMENT"):
                            compartment = second_next_line.split(" - ")[1]
                    if first_next_line.startswith("^COMPARTMENT"):
                        compartment = first_next_line.split(" - ")[1]
                        if second_next_line.startswith("^COEFFICIENT"):
                            stoichiometry = second_next_line.split(" - ")[1]                            
                    
                    #delete all tags
                    compartment = re.sub(regex_purge, "", compartment)
                    stoichiometry = re.sub(regex_purge, "", stoichiometry)
                    try:
                        dict_data[current_id][attrib].append((value, stoichiometry, compartment))
                    except KeyError:
                        dict_data[current_id][attrib] = [(value, stoichiometry, compartment)]

            except ValueError:
                pass

    count = 0
    nb_rxn = str(len(list(dict_data.keys())))
    for rxn_id, dict_values in dict_data.items():
        if "LEFT" in list(dict_values.keys()) or "RIGHT" in list(dict_values.keys()):
            count += 1
            if verbose:
                print("\r%s/%s: %s" %(count, nb_rxn, rxn_id), end="", flush=True)
                #print(rxn_id)
            rxn_node = Node("reaction", rxn_id)
            padmet.dicOfNode[rxn_id] = rxn_node
            try:
                rxn_node.misc["COMMON-NAME"] = dict_values["COMMON-NAME"]
            except KeyError:
                pass
            try:
                rxn_node.misc["EC-NUMBER"] = dict_values["EC-NUMBER"]
            except KeyError:
                pass
            try:
                rxn_dir = dict_values["REACTION-DIRECTION"][0]
                if rxn_dir == "REVERSIBLE":
                    rxn_node.misc["DIRECTION"] = ["REVERSIBLE"]
                elif "LEFT-TO-RIGHT" in rxn_dir:
                    #if:LEFT-TO-RIGHT, IRREVERSIBLE-LEFT-TO-RIGHT, PHYSIOL-RIGHT-TO-LEFT
                    rxn_node.misc["DIRECTION"] = ["LEFT-TO-RIGHT"]
                elif "RIGHT-TO-LEFT" in rxn_dir:
                    #Temporarily set direaction as RIGHT-TO-LEFT
                    #then, RIGHT' metabolites will be LEFT and LEFT -> RIGHT
                    #To finish set back DIRECTION to LEFT-TO-RIGHT
                    rxn_node.misc["DIRECTION"] = ["RIGHT-TO-LEFT"] 
            except KeyError:
                rxn_node.misc["DIRECTION"] = ["REVERSIBLE"]
            """
            try:
                rxn_node.misc["COMPARTMENT"] = dict_values["RXN-LOCATIONS"]
            except KeyError:
                pass
            """
            try:
                types = dict_values["TYPES"]
                _setType(types, rxn_id, padmet)
            except KeyError:
                pass
            try:
                syns = dict_values["SYNONYMS"]
                _setSyns(syns, rxn_id, padmet)
            except KeyError:
                pass
            try:
                xrefs = dict_values["DBLINKS"]
                _setXrefs(xrefs, rxn_id, padmet)
            except KeyError:
                pass
            if extract_gene:
                reconstructionData_id = rxn_id+"_reconstructionData_"+source
                if reconstructionData_id in list(padmet.dicOfNode.keys()) and verbose:
                    print("Warning: The reaction %s seems to be already added from the same source %s" %(rxn_id, source))
                reconstructionData = {"SOURCE":[source],"TOOL":["PATHWAYTOOLS"],"CATEGORY":["ANNOTATION"]}
                reconstructionData_rlt = Relation(rxn_id,"has_reconstructionData",reconstructionData_id)
                padmet.dicOfNode[reconstructionData_id] = Node("reconstructionData", reconstructionData_id, reconstructionData)
                list_of_relation.append(reconstructionData_rlt)
            try:
                reactants_data = dict_values["LEFT"]
                for reactant_id, stoichiometry, compartment in reactants_data:
                    if compartment == "CCO-OUT":
                        compartment = def_compart_out
                    else:
                        compartment = def_compart_in
                    try:
                        reactant_node = padmet.dicOfNode[reactant_id]
                    except KeyError:
                        reactant_node = Node("compound", reactant_id)
                        padmet.dicOfNode[reactant_id] = reactant_node
    
                    #if the reaction direction was set to RIGHT-TO-LEFT, then this compound is in fact a product
                    if rxn_node.misc["DIRECTION"][0] == "RIGHT-TO-LEFT":
                        produces_rlt = Relation(rxn_id, "produces", reactant_id, {"STOICHIOMETRY": [stoichiometry], "COMPARTMENT": [compartment]})
                        list_of_relation.append(produces_rlt)
                    else:
                        consumes_rlt = Relation(rxn_id, "consumes", reactant_id, {"STOICHIOMETRY": [stoichiometry], "COMPARTMENT": [compartment]})
                        list_of_relation.append(consumes_rlt)
            except KeyError:
                pass
            try:
                products_data = dict_values["RIGHT"]
                for product_id, stoichiometry, compartment in products_data:
                    if compartment == "CCO-OUT":
                        compartment = def_compart_out
                    else:
                        compartment = def_compart_in
                    try:
                        product_node = padmet.dicOfNode[product_id]
                    except KeyError:
                        product_node = Node("compound", product_id)
                        padmet.dicOfNode[product_id] = product_node

                    #if the reaction direction was set to RIGHT-TO-LEFT, then this compound is in fact a reactant
                    if rxn_node.misc["DIRECTION"][0] == "RIGHT-TO-LEFT":
                        consumes_rlt = Relation(rxn_id, "consumes", product_id, {"STOICHIOMETRY": [stoichiometry], "COMPARTMENT": [compartment]})
                        list_of_relation.append(consumes_rlt)
                    else:
                        produces_rlt = Relation(rxn_id, "produces", product_id, {"STOICHIOMETRY": [stoichiometry], "COMPARTMENT": [compartment]})
                        list_of_relation.append(produces_rlt)
            except KeyError:
                pass
            if rxn_node.misc["DIRECTION"][0] == "RIGHT-TO-LEFT":
                rxn_node.misc["DIRECTION"] = ["LEFT-TO-RIGHT"]
    if verbose: print("")
Пример #13
0
def from_pgdb_to_padmet(pgdb_folder, db='NA', version='NA', source='GENOME', extract_gene=False, no_orphan=False, enhanced_db=False, padmetRef_file=None, verbose=False):
    """
    Parameters
    ----------
    pgdb_folder: str
        path to pgdb
    db: str
        pgdb name, default is 'NA'
    version: str
        pgdb version, default is 'NA'
    source: str
        tag reactions for traceability, default is 'GENOME'
    extract_gene: bool
        if true extract genes information
    no_orphan: bool
        if true, remove reactions without genes associated
    enhanced_db: bool
        if true, read metabolix-reactions.xml sbml file and add information in final padmet
    padmetRef_file: str
        path to padmetRef corresponding to metacyc in padmet format
    verbose: bool
        if True print information
    
    Returns
    -------
    padmet.padmetRef:
        padmet instance with pgdb within pgdb data
    """
    global regex_purge, regex_xref, list_of_relation, def_compart_in, def_compart_out
    regex_purge = re.compile("<.*?>|\|")
    regex_xref = re.compile('^\((?P<DB>\S*)\s*"(?P<ID>\S*)"')
    list_of_relation = []
    def_compart_in = "c"
    def_compart_out = "e"
    #parsing args
    source = source.upper()
    
    classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file = \
    [os.path.join(pgdb_folder,_file) for _file in ["classes.dat", "compounds.dat", "proteins.dat", "reactions.dat", "enzrxns.dat", "pathways.dat"]]
    if enhanced_db:
        metabolic_reactions = os.path.join(pgdb_folder,"metabolic-reactions.xml")
    else:
        metabolic_reactions = None
    if extract_gene:
        genes_file = os.path.join(pgdb_folder,"genes.dat")
    else:
        genes_file = None

    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")
    if padmetRef_file:
        padmet = PadmetSpec()
        padmetRef = PadmetRef(padmetRef_file)
        version = padmetRef.info["DB_info"]["version"]
        db = padmetRef.info["DB_info"]["DB"]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet.setInfo(dbNotes)

        padmet.setPolicy(padmetRef)
        with open(reactions_file, 'r') as f:
            rxns_id = [line.split(" - ")[1] for line in f.read().splitlines() if line.startswith("UNIQUE-ID")]
        count = 0
        for rxn_id in rxns_id:
            count += 1
            if verbose: print("%s/%s Copy %s" %(count, len(rxns_id), rxn_id))
            try:
                padmet.copyNode(padmetRef, rxn_id)
                reconstructionData_id = rxn_id+"_reconstructionData_"+source
                if reconstructionData_id in list(padmet.dicOfNode.keys()) and verbose:
                    print("Warning: The reaction %s seems to be already added from the same source %s" %(rxn_id, source))
                reconstructionData = {"SOURCE":[source],"TOOL":["PATHWAYTOOLS"],"CATEGORY":["ANNOTATION"]}
                reconstructionData_rlt = Relation(rxn_id,"has_reconstructionData",reconstructionData_id)
                padmet.dicOfNode[reconstructionData_id] = Node("reconstructionData", reconstructionData_id, reconstructionData)
                padmet._addRelation(reconstructionData_rlt)

            except TypeError:
                print("%s not in padmetRef" %(rxn_id))

        if extract_gene:
            if verbose: print("parsing genes")
            map_gene_ids = genes_parser(genes_file, padmet, verbose)
            if verbose: print("parsing proteins")
            dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose)
            mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids)
            if verbose: print("parsing association enzrxns")
            enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose)

    else:
        POLICY_IN_ARRAY = [['class','is_a_class','class'], ['class','has_name','name'], ['class','has_xref','xref'], ['class','has_suppData','suppData'],
                        ['compound','is_a_class','class'], ['compound','has_name','name'], ['compound','has_xref','xref'], ['compound','has_suppData','suppData'],
                        ['gene','is_a_class','class'], ['gene','has_name','name'], ['gene','has_xref','xref'], ['gene','has_suppData','suppData'], ['gene','codes_for','protein'],
                        ['pathway','is_a_class','class'], ['pathway','has_name','name'], ['pathway','has_xref','xref'], ['pathway','is_in_pathway','pathway'],
                        ['protein','is_a_class','class'], ['protein','has_name','name'], ['protein','has_xref','xref'], ['protein','has_suppData','suppData'], ['protein','catalyses','reaction'],
                        ['protein','is_in_species','class'],
                        ['reaction','is_a_class','class'], ['reaction','has_name','name'], ['reaction','has_xref','xref'], ['reaction','has_suppData','suppData'], ['reaction','has_reconstructionData','reconstructionData'], ['reaction','is_in_pathway','pathway'],
                        ['reaction','consumes','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','class','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','compound','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','protein','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','is_linked_to','gene','SOURCE:ASSIGNMENT','X:Y']]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet = PadmetRef()
        if verbose: print("setting policy")
        padmet.setPolicy(POLICY_IN_ARRAY)
        if verbose: print("setting dbInfo")
        padmet.setInfo(dbNotes)
    
    
        if verbose: print("parsing classes")
        classes_parser(classes_file, padmet, verbose)
    
        if verbose: print("parsing compounds")
        compounds_parser(compounds_file, padmet, verbose)
    
        if verbose: print("parsing reactions")
        reactions_parser(reactions_file, padmet, extract_gene, source, verbose)
    
        if verbose: print("parsing pathways")
        pathways_parser(pathways_file, padmet, verbose)
    
        if extract_gene:
            if verbose: print("parsing genes")
            map_gene_ids = genes_parser(genes_file, padmet, verbose)
            if verbose: print("parsing proteins")
            dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose)
            mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids)
            if verbose: print("parsing association enzrxns")
            enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose)
    
        if metabolic_reactions is not None:
            if verbose: print("enhancing db from metabolic-reactions.xml")
            padmet = enhance_db(metabolic_reactions, padmet, extract_gene, verbose)
    
    for rlt in list_of_relation:
        try:
            padmet.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmet.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmet.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmet.dicOfRelationOut[rlt.id_out] = [rlt]

    if extract_gene and no_orphan:
        all_reactions = [node for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]
        rxn_to_del = [r for r in all_reactions if not any([rlt for rlt in padmet.dicOfRelationIn[r.id] if rlt.type == "is_linked_to"])]
        for rxn in rxn_to_del:
            padmet.delNode(rxn.id)
        if verbose:
            print("%s/%s orphan reactions (without gene association) deleted" %(len(rxn_to_del), len(all_reactions)))
        all_genes_linked = set([rlt.id_out for rlt in padmet.getAllRelation() if rlt.type == "is_linked_to"])
        all_genes = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "gene"])
        count = 0
        for gene_id in [g for g in all_genes if g not in all_genes_linked]:
            count += 1
            #if verbose: print("Removing gene without gene assoc %s" %gene_id)
            padmet.dicOfNode.pop(gene_id)
        if verbose:
            print("%s/%s orphan genes (not linked to any reactions) deleted" %(count, len(all_genes)))

    rxns = [node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]
    for rxn_id in rxns:
        cp_rlts = set([rlt.type for rlt in padmet.dicOfRelationIn[rxn_id] if rlt.type in ["consumes","produces"]])
        if len(cp_rlts) == 1:
            print("rxn only consume or produce, transport ???: %s" %rxn_id)
            padmet.delNode(rxn_id)

    return padmet
Пример #14
0
def enhance_db(metabolic_reactions, padmet, with_genes, verbose = False):
    """
    Parse sbml metabolic_reactions and add reactions in padmet
    if with_genes: add also genes information

    Parameters
    ----------
    metabolic_reactions: str
        path to sbml metabolic-reactions.xml
    padmet: padmet.PadmetRef
        padmet instance
    with_genes: bool
        if true alos add genes information.

    Returns
    -------
    padmet.padmetRef:
        padmet instance with pgdb within pgdb + metabolic-reactions.xml data
    """        
    
    print("loading sbml file: %s" %metabolic_reactions)
    reader = libsbml.SBMLReader()
    document = reader.readSBML(metabolic_reactions)
    for i in range(document.getNumErrors()):
        print(document.getError(i).getMessage())
    model = document.getModel()
    listOfReactions = model.getListOfReactions()
    #recovere the reactions that are not in the basic metacyc but in the sbml file
    #use the reactions_name instead of ids because the ids are encoded, the name is the non-encoded version of the id
    padmet_reactions_id = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"])
    reaction_to_add = [reaction for reaction in listOfReactions 
    if reaction.getName() not in padmet_reactions_id]
    count = 0
    if verbose: print(str(len(reaction_to_add))+" reactions to add")
    for reactionSBML in reaction_to_add:
        count += 1
        reaction_id = reactionSBML.getName()
        if verbose: print(str(count)+"/"+str(len(reaction_to_add))+"\t"+reaction_id)
        if reactionSBML.getReversible():
            reaction_dir = "REVERSIBLE"
        else:
            reaction_dir = "LEFT-TO-RIGHT"
        try:
            reaction_node = padmet.dicOfNode[reaction_id]
        except KeyError:
            reaction_node = Node("reaction", reaction_id, {"DIRECTION": [reaction_dir]})
            padmet.dicOfNode[reaction_id] = reaction_node
        reactants = reactionSBML.getListOfReactants()
        for reactant in reactants: #convert ids
            reactant_id, _type, reactant_compart = sbmlPlugin.convert_from_coded_id(reactant.getSpecies())
            if reactant_id not in list(padmet.dicOfNode.keys()):
                reactant_node = Node("compound",reactant_id)
                padmet.dicOfNode[reaction_id] = reactant_node
            reactant_stoich = reactant.getStoichiometry()
            consumes_rlt = Relation(reaction_id,"consumes",reactant_id, {"STOICHIOMETRY":[reactant_stoich], "COMPARTMENT": [reactant_compart]})
            list_of_relation.append(consumes_rlt)

        products = reactionSBML.getListOfProducts()
        for product in products:
            product_id, _type, product_compart = sbmlPlugin.convert_from_coded_id(product.getSpecies())
            if product_id not in list(padmet.dicOfNode.keys()):
                product_node = Node("compound",product_id)
                padmet.dicOfNode[product_id] = product_node
            product_stoich = product.getStoichiometry()
            produces_rlt = Relation(reaction_id,"produces",product_id,{"STOICHIOMETRY": [product_stoich], "COMPARTMENT": [product_compart]})
            list_of_relation.append(produces_rlt)
        
        if with_genes:
            notes = sbmlPlugin.parseNotes(reactionSBML)
            if "GENE_ASSOCIATION" in list(notes.keys()):
                #Using sbmlPlugin to recover all genes associated to the reaction
                listOfGenes = sbmlPlugin.parseGeneAssoc(notes["GENE_ASSOCIATION"][0])
                if len(listOfGenes) != 0:
                    for gene in listOfGenes:
                        try:
                            #check if gene already in the padmet
                            padmet.dicOfNode[gene]
                        except TypeError:
                            gene_node = Node("gene",gene)
                            padmet.dicOfNode[gene] = gene_node
                        is_linked_rlt = Relation(reaction_id, "is_linked_to", gene)
                        list_of_relation.append(is_linked_rlt)
    return padmet