示例#1
0
def extract_rxn_with_gene_assoc(sbml, output, verbose=False):
    """
    From a given sbml document, create a sbml with only the reactions associated to a gene.
    Need for a reaction, in section 'note', 'GENE_ASSOCIATION': ....

    Parameters
    ----------
    sbml_file: libsbml.document
        sbml document
    output: str
        pathname of the output sbml
    """
    reader = libsbml.SBMLReader()
    sbml_document = reader.readSBML(sbml)
    for i in range(sbml_document.getNumErrors()):
        print(sbml_document.getError(i).getMessage())

    sbml_model = sbml_document.getModel()

    listOfReactions = sbml_model.getListOfReactions()

    reactions_to_remove = []
    for reaction in listOfReactions:
        if "GENE_ASSOCIATION" not in list(parseNotes(reaction).keys()):
            reactions_to_remove.append(reaction.getId())
    for rId in reactions_to_remove:
        listOfReactions.remove(rId)

    libsbml.writeSBMLToFile(sbml_document, output)
示例#2
0
def extract_data_sbml(sbml_filepath):
    reader = libsbml.SBMLReader()
    document = reader.readSBML(sbml_filepath)
    model = document.getModel()

    compounds = model.getListOfSpecies()
    reactions = model.getListOfReactions()
    genes = []
    for reactionSBML in reactions:
        notes = sbmlPlugin.parseNotes(reactionSBML)
        if "GENE_ASSOCIATION" in list(notes.keys()):
            # Using sbmlPlugin to recover all genes associated to the reaction
            for gene in sbmlPlugin.parseGeneAssoc(
                    notes["GENE_ASSOCIATION"][0]):
                if gene not in genes:
                    genes.append(gene)

    id_compounds = [
        sbmlPlugin.convert_from_coded_id(compound.id)[0]
        for compound in compounds
    ]
    id_reactions = [
        sbmlPlugin.convert_from_coded_id(reaction.id)[0]
        for reaction in reactions
    ]

    return genes, id_compounds, id_reactions
示例#3
0
def check_ids(model_metabolic, model_faa, cutoff, verbose=False):
    """
    check if genes ids of model_metabolic = model_faa for a given cutoff
    faa genes ids are in the first line of each sequence: >GENE_ID ....
    metabolic netowkrs genes ids are in note section, GENE_ASSOCIATION: gene_id-1 or gene_id-2

    Parameters
    ----------
    model_metabolic: str
        path to sbml file
    model_faa: str
        path to fasta faa file
    cutoff: int
        cutoff genes ids from model found in faa
    verbose: bool
        verbose
        
    Returns
    -------
    bool    
        True if same ids, if verbose, print % of genes under cutoff
    """
    reader = libsbml.SBMLReader()
    document = reader.readSBML(model_metabolic)
    model = document.getModel()
    document.getNumErrors()
    listOfReactions = model.getListOfReactions()
    #convert to set
    model_metabolic_ids = set(itertools.chain.from_iterable([sp.parseGeneAssoc(geneAssoc) 
    for geneAssoc in (sp.parseNotes(r).get("GENE_ASSOCIATION",[None])[0] for r in listOfReactions)
    if geneAssoc is not None]))
    
    with open(model_faa, "r") as f:
        model_faa_ids = set([record.id for record in SeqIO.parse(f, "fasta")])

    diff_genes = model_metabolic_ids.difference(model_faa_ids)
    try:
        diff_genes_ratio = float(len(diff_genes))/float(len(model_metabolic_ids))
    except ZeroDivisionError:
        raise SystemExit("No genes found in model metabolic")
    #if all model_metabolic_ids are in model_faa_ids
    if diff_genes_ratio == 0:
        if verbose: print("all genes of the model_metabolic are in the model_faa")
        return True
    #if not check if the nb is sup-equal to the cutoff
    elif diff_genes_ratio <= float(1-cutoff):
        if verbose: print("Only %.2f%% genes of the model_metabolic are not in the model_faa" % (diff_genes_ratio*100))
        return True
    else:
        if verbose: 
            print("%s%% genes of the model_metabolic are not in the model_faa" % (diff_genes_ratio*100))
            print(";".join(diff_genes))
        return False
示例#4
0
def dict_data_to_sbml(dict_data,
                      dict_orthogroups=None,
                      dict_orthologues=None,
                      strict_match=True):
    """
    Use a dict of data dict_data and dict of orthogroups dict_orthogroup to create sbml files.
    dict_data and dict_orthogroup are obtained with fun orthofinder_to_sbml
    1./ Read dict_orthogroups and check if model associated to dict_data and study org share orthologue
    2./ Read sbml of model, parse all reactions and get genes associated to reaction.
    3./ For each reactions:
        Parse genes associated to sub part (ex: (gene-a and gene-b) or gene-c) = [(gene-a,gene-b), gene-c]
        Check if study org have orthologue with at least one sub part (gene-a, gene-b) or gene-c
        if yes: add the reaction to the new sbml and change genes ids by study org genes ids
    4./ Create the new sbml file.
    
    Parameters
    ----------
    dict_data: dict
        {'study_id': study_id,
        'model_id' : model_id,
        'sbml_template': path to sbml of model',
        'output': path to the output sbml,
        'verbose': bool, if true print information
        }
    dict_orthogroup: dict
        k=orthogroup_id, v = {k = name, v = set of genes}
    verbose: bool
        if True print information
    """
    #dict_data = {'study_name':'', 'o_compare_name': '', sbml_template':'', 'output':''}
    study_id = dict_data['study_id']
    model_id = dict_data['model_id']
    sbml_template = dict_data['sbml_template']
    output = dict_data['output']
    verbose = dict_data.get('verbose')

    if dict_orthogroups:
        if verbose:
            print(
                "*Extracting orthogroups data to create sbml of {0} from {1}".
                format(study_id, model_id))

        #k = gene_id from to_compare, v = list of genes id of study
        sub_dict_orth = {}
        for k in dict_orthogroups.values():
            try:
                all_to_compare_genes = k[model_id]
                all_study_genes = k[study_id]
                for to_compare_gene in all_to_compare_genes:
                    try:
                        sub_dict_orth[to_compare_gene].update(all_study_genes)
                    except KeyError:
                        sub_dict_orth[to_compare_gene] = set(all_study_genes)
            except KeyError:
                pass

        if not sub_dict_orth:
            if verbose:
                print("\t{0} and {1} don't share any ortholgue".format(
                    study_id, model_id))
            return
    elif dict_orthologues:
        if verbose:
            print(
                "*Extracting orthologues data to create sbml of {0} from {1}".
                format(study_id, model_id))

        #k = gene_id from to_compare, v = list of genes id of study
        sub_dict_orth = {}
        for gene_id, gene_dict in dict_orthologues[model_id].items():
            try:
                sub_dict_orth[gene_id] = gene_dict[study_id]
            except KeyError:
                pass
        if not sub_dict_orth:
            if verbose:
                print("\t{0} and {1} don't share any ortholgue".format(
                    study_id, model_id))
            return
    else:
        ValueError("Must give one dict of orthogroups or orthologue")

    reader = libsbml.SBMLReader()
    document_to_compare = reader.readSBML(sbml_template)
    for i in range(document_to_compare.getNumErrors()):
        print(document_to_compare.getError(i).getMessage())
    model_to_compare = document_to_compare.getModel()
    listOfReactions_with_genes = [
        rxn for rxn in model_to_compare.getListOfReactions()
        if sp.parseNotes(rxn).get("GENE_ASSOCIATION", [None])[0]
    ]
    if verbose:
        print("\tSbml of {0} contains {1}/{2} reactions with genes assocation".
              format(model_id, len(listOfReactions_with_genes),
                     len(model_to_compare.getListOfReactions())))
    dict_rxn_ga = {}
    for rxn in listOfReactions_with_genes:
        ga = sp.parseNotes(rxn)['GENE_ASSOCIATION'][0]
        ga_for_gbr = re.sub(r" or ", "|", ga)
        ga_for_gbr = re.sub(r" and ", "&", ga_for_gbr)
        ga_for_gbr = re.sub(r"\s", "", ga_for_gbr)
        if re.findall("\||&", ga_for_gbr):
            to_compare_ga_subsets = list(gbr.compile_input(ga_for_gbr))
        else:
            ga_for_gbr = re.sub(r"\(|\)", "", ga_for_gbr)
            to_compare_ga_subsets = [[ga_for_gbr]]

        study_ga_subsets = []
        """
        to_compare_ga_subsets = [('a','c','d'),('c',)]
        sub_dict_orth = {'a':['a_a'],'c':['c_c'], 'd':['d_d']}
        """
        for to_compare_subset in to_compare_ga_subsets:
            study_subset = set()
            for gene in to_compare_subset:
                if gene in list(sub_dict_orth.keys()):
                    study_subset.update(sub_dict_orth[gene])
                else:
                    study_subset = set()
                    break
            if study_subset:
                """
                if verbose:
                    print("\t\t{0} == {1}".format(tuple(to_compare_subset), tuple(study_subset)))
                """
                study_ga_subsets.append(study_subset)
        if study_ga_subsets:
            study_ga = " or ".join([
                "(" + " and ".join(subset) + ")" for subset in study_ga_subsets
            ])
            if verbose:
                print("\t\tAdding %s" % rxn.id)
                print("\t\tGENE_ASSOCIATION: %s" % (study_ga))
            dict_rxn_ga[rxn.id] = study_ga
    if not dict_rxn_ga:
        if verbose:
            print(
                "\tNo reaction added from {0} to {1} because of missing orthologues"
                .format(model_id, study_id))
        return
    rxn_id_to_remove = set([
        rxn.id for rxn in model_to_compare.getListOfReactions()
    ]).difference(list(dict_rxn_ga.keys()))
    if verbose:
        print("\tRemoving %s unused reactions" % len(rxn_id_to_remove))
    [model_to_compare.removeReaction(rxn_id) for rxn_id in rxn_id_to_remove]
    cpd_id_to_preserve = set()
    for rxn_id, study_ga in list(dict_rxn_ga.items()):
        rxn = model_to_compare.getElementBySId(rxn_id)
        #update notes
        notes_in_dict = sp.parseNotes(rxn)
        notes_in_dict["GENE_ASSOCIATION"] = [study_ga]
        notes = "<body xmlns=\"http://www.w3.org/1999/xhtml\">"
        for k, v_list in list(notes_in_dict.items()):
            for v in v_list:
                notes += "<p>" + k + ": " + v + "</p>"
        notes += "</body>"
        rxn.setNotes(notes)
        cpd_in_rxn = set([p.getSpecies() for p in rxn.getListOfProducts()]).union(\
                         set([r.getSpecies() for r in rxn.getListOfReactants()]))
        cpd_id_to_preserve.update(cpd_in_rxn)
    all_species = [cpd.id for cpd in model_to_compare.getListOfSpecies()]
    [
        model_to_compare.removeSpecies(cpd_id) for cpd_id in all_species
        if cpd_id not in cpd_id_to_preserve
    ]
    new_id = os.path.basename(os.path.splitext(output)[0])
    model_to_compare.setId(new_id)
    libsbml.writeSBMLToFile(document_to_compare, output)
示例#5
0
def sbml_to_curation(sbml_file,
                     rxn_list,
                     output,
                     extract_gene=False,
                     comment="N.A",
                     verbose=False):
    """
    Read a sbml file, check if each reaction ids are in the sbml, if no, raise ValueError
    Then create the form. this form can then be used with manual_curation.py

    Parameters
    ----------
    sbml_file: str
        path to sbml file
    rxn_list: list
        list of reaction id, ids must be identic as in the sbml, carrefull to encoded ids.
    output: str
        path to the form to create
    extract_gene: bool
        if true extract genes association
    comment: str
        Comment why the reaction will be added in the network for traceability.
    verbose: bool
        if True print information
    
    """
    if not os.path.exists(sbml_file):
        raise FileNotFoundError(
            "No SBML file (--sbml/sbml_file) accessible at " + sbml_file)

    reader = libsbml.SBMLReader()
    document = reader.readSBML(sbml_file)
    for i in range(document.getNumErrors()):
        print(document.getError(i).getMessage())
    model = document.getModel()
    listOfReactions = model.getListOfReactions()
    #check if reactions id are in model.
    if verbose:
        print("Check if reaction(s) are in sbml file")
    for rxn_id in rxn_list:
        if rxn_id in [r.id for r in listOfReactions]:
            if verbose:
                print("reaction %s found" % rxn_id)
        else:
            raise ValueError("/!\ reaction %s not found" % rxn_id)

    #create form output
    with open(output, 'w') as f:
        for rxn_id in rxn_list:
            rxn_sbml = listOfReactions.getElementBySId(rxn_id)
            rxn_id_decoded = convert_from_coded_id(rxn_id)[0]
            if verbose:
                print("extracting reaction %s, decoded id as %s" %
                      (rxn_id, rxn_id_decoded))
            line = ["reaction_id", rxn_id_decoded]
            line = "\t".join(line) + "\n"
            f.write(line)
            line = ["comment", comment]
            line = "\t".join(line) + "\n"
            f.write(line)
            if rxn_sbml.reversible:
                line = ["reversible", "true"]
            else:
                line = ["reversible", "false"]
            line = "\t".join(line) + "\n"
            f.write(line)
            #check if have gene assoc
            if extract_gene:
                try:
                    gene_assoc = parseNotes(rxn_sbml)["GENE_ASSOCIATION"][0]
                    line = ["linked_gene", gene_assoc]
                except KeyError:
                    line = ["linked_gene", ""]
            else:
                line = ["linked_gene", ""]

            line = "\t".join(line) + "\n"
            f.write(line)
            line = ["#reactant/product", "#stoichio:compound_id:compart"]
            line = "\t".join(line) + "\n"
            f.write(line)
            reactants = rxn_sbml.getListOfReactants()
            products = rxn_sbml.getListOfProducts()
            for reactant in reactants:
                stoich = str(abs(reactant.getStoichiometry()))
                reactant_id, x, compart = convert_from_coded_id(
                    reactant.getSpecies())
                line = ":".join([stoich, reactant_id, compart])
                line = "reactant" + "\t" + line + "\n"
                f.write(line)
            for product in products:
                stoich = str(abs(product.getStoichiometry()))
                product_id, x, compart = convert_from_coded_id(
                    product.getSpecies())
                line = ":".join([stoich, product_id, compart])
                line = "product" + "\t" + line + "\n"
                f.write(line)
            f.write("\n")
示例#6
0
def enhance_db(metabolic_reactions, padmet, with_genes, verbose = False):
    """
    Parse sbml metabolic_reactions and add reactions in padmet
    if with_genes: add also genes information

    Parameters
    ----------
    metabolic_reactions: str
        path to sbml metabolic-reactions.xml
    padmet: padmet.PadmetRef
        padmet instance
    with_genes: bool
        if true alos add genes information.

    Returns
    -------
    padmet.padmetRef:
        padmet instance with pgdb within pgdb + metabolic-reactions.xml data
    """        
    
    print("loading sbml file: %s" %metabolic_reactions)
    reader = libsbml.SBMLReader()
    document = reader.readSBML(metabolic_reactions)
    for i in range(document.getNumErrors()):
        print(document.getError(i).getMessage())
    model = document.getModel()
    listOfReactions = model.getListOfReactions()
    #recovere the reactions that are not in the basic metacyc but in the sbml file
    #use the reactions_name instead of ids because the ids are encoded, the name is the non-encoded version of the id
    padmet_reactions_id = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"])
    reaction_to_add = [reaction for reaction in listOfReactions 
    if reaction.getName() not in padmet_reactions_id]
    count = 0
    if verbose: print(str(len(reaction_to_add))+" reactions to add")
    for reactionSBML in reaction_to_add:
        count += 1
        reaction_id = reactionSBML.getName()
        if verbose: print(str(count)+"/"+str(len(reaction_to_add))+"\t"+reaction_id)
        if reactionSBML.getReversible():
            reaction_dir = "REVERSIBLE"
        else:
            reaction_dir = "LEFT-TO-RIGHT"
        try:
            reaction_node = padmet.dicOfNode[reaction_id]
        except KeyError:
            reaction_node = Node("reaction", reaction_id, {"DIRECTION": [reaction_dir]})
            padmet.dicOfNode[reaction_id] = reaction_node
        reactants = reactionSBML.getListOfReactants()
        for reactant in reactants: #convert ids
            reactant_id, _type, reactant_compart = sbmlPlugin.convert_from_coded_id(reactant.getSpecies())
            if reactant_id not in list(padmet.dicOfNode.keys()):
                reactant_node = Node("compound",reactant_id)
                padmet.dicOfNode[reaction_id] = reactant_node
            reactant_stoich = reactant.getStoichiometry()
            consumes_rlt = Relation(reaction_id,"consumes",reactant_id, {"STOICHIOMETRY":[reactant_stoich], "COMPARTMENT": [reactant_compart]})
            list_of_relation.append(consumes_rlt)

        products = reactionSBML.getListOfProducts()
        for product in products:
            product_id, _type, product_compart = sbmlPlugin.convert_from_coded_id(product.getSpecies())
            if product_id not in list(padmet.dicOfNode.keys()):
                product_node = Node("compound",product_id)
                padmet.dicOfNode[product_id] = product_node
            product_stoich = product.getStoichiometry()
            produces_rlt = Relation(reaction_id,"produces",product_id,{"STOICHIOMETRY": [product_stoich], "COMPARTMENT": [product_compart]})
            list_of_relation.append(produces_rlt)
        
        if with_genes:
            notes = sbmlPlugin.parseNotes(reactionSBML)
            if "GENE_ASSOCIATION" in list(notes.keys()):
                #Using sbmlPlugin to recover all genes associated to the reaction
                listOfGenes = sbmlPlugin.parseGeneAssoc(notes["GENE_ASSOCIATION"][0])
                if len(listOfGenes) != 0:
                    for gene in listOfGenes:
                        try:
                            #check if gene already in the padmet
                            padmet.dicOfNode[gene]
                        except TypeError:
                            gene_node = Node("gene",gene)
                            padmet.dicOfNode[gene] = gene_node
                        is_linked_rlt = Relation(reaction_id, "is_linked_to", gene)
                        list_of_relation.append(is_linked_rlt)
    return padmet