Пример #1
0
def createSequence(modrefseq, filtered_ids=None, description=None):
    emod = cbmpy.readSBML3FBC(modrefseq)
    if len(emod.genes) == 0:
        emod.createGeneAssociationsFromAnnotations()
    geneseq = {}
    if description == None:
        secDescr = os.path.split(modrefseq)[-1]
    else:
        secDescr = description
    if filtered_ids == None:
        filtered_ids = emod.getReactionIds()
    for r_ in emod.reactions:
        if r_.getId() in filtered_ids:
            for a in r_.annotation:
                if a.startswith('gbank_seq_'):
                    newName = a.replace('gbank_seq_', '')
                    if newName not in geneseq:
                        if oldbiopython:
                            seq = Bio.Seq.Seq(
                                r_.getAnnotation(a), Bio.Alphabet.ProteinAlphabet()
                            )
                        else:
                            seq = Bio.Seq.Seq(r_.getAnnotation(a))

                        geneseq[newName] = Bio.SeqRecord.SeqRecord(
                            seq,
                            id=newName,
                            name=newName,
                            description=secDescr,
                            annotations={"molecule_type": "protein"},
                        )
                        # print('Adding {}'.format(a))
    print('{} genseqs added'.format(len(geneseq)))
    return geneseq
Пример #2
0
 def roundTripModelV1(self, model):
     modname = 'rtv1.xml'
     cbmpy.writeSBML3FBCV2(model, modname, directory=self.cDir)
     del model
     model = cbmpy.readSBML3FBC(os.path.join(self.cDir, modname),
                                xoptions={'validate': True})
     return model
Пример #3
0
 def setup_class(klass):
     """This method is run once for each class before any tests are run"""
     klass.m = {}
     for m in DATA:
         if m.startswith('BIGG2_'):
             cmod = cbmpy.readSBML3FBC(os.path.join(MDIR, m),
                                       xoptions={'validate': True})
             klass.m[m] = cmod
Пример #4
0
 def test_run_fba_cobra(self):
     print('\n\nThis test fails under Linux.\n\n')
     cmod = cbmpy.readCOBRASBML('L2CBR_iJR904.glc.xml', work_dir=MDIR,\
                                                     output_dir=self.CDIR, delete_intermediate=True)
     cmod2 = cbmpy.readSBML3FBC('L3FBCV1_iJR904.glc.xml',
                                work_dir=MDIR,
                                xoptions={'validate': True})
     fba1 = cbmpy.doFBA(cmod)
     fba2 = cbmpy.doFBA(cmod2)
     del cmod, cmod2
     assert_almost_equal(fba1, fba2)
Пример #5
0
def createSeqplusModelMetaIdx(fmod, oid, oclass, metadraft_lib_model):
    """
    Create a MetaDraft template model index file from a seqplus model file::

     - *fmod* the seqplus model file e.g. 'EstherDB.xml'
     - *oid* the model unique shortname e.g. 'edb'
     - *class* the model category e.g. 'vu'
     - *metadraft_lib_model* the target MetaDraft lib_model directory

     'edb', 'vu', "<path>"

    """

    import cbmpy

    dmod = cbmpy.readSBML3FBC(fmod)
    fgb = os.path.abspath(fmod)
    input_path, fmod = os.path.split(fgb)
    # dmod.createGeneAssociationsFromAnnotations()
    oclass = oclass.replace('-', '')
    oid = oid.replace('-', '')
    oid = '{}-{}'.format(oclass, oid)
    if fmod.endswith('.xml'):
        fmod = fmod[:-4]
    fmod = '({})-({}).seqplus'.format(oid, fmod)
    print(fmod)

    linkDict = {}
    linkDict[oid] = {}
    linkDict["__idx__"] = {}
    LD = linkDict[oid]
    LD['genbank_in'] = fgb
    LD['sbml_in'] = fgb
    LD['data_path'] = input_path
    LD['gene2reaction'] = dmod.getAllProteinGeneAssociations()
    for g_ in LD['gene2reaction']:
        linkDict['__idx__'][g_] = oid
    LD['reaction2gene'] = dmod.getAllGeneProteinAssociations()
    LD['taxon_id'] = "unknown"
    LD['sbml_out'] = os.path.join(metadraft_lib_model, "{}.xml".format(fmod))
    LD['sbml_out_generic'] = os.path.join(metadraft_lib_model, "{}.xml".format(fmod))
    LD['fasta_out'] = None

    Fj = open(os.path.join(input_path, '{}-link.json'.format(fmod)), 'w')
    json.dump(linkDict, Fj, indent=1, separators=(',', ': '))
    Fj.close()

    cbmpy.writeSBML3FBC(
        dmod,
        os.path.join(input_path, fmod + '.xml'),
        add_cbmpy_annot=True,
        add_cobra_annot=False,
        add_groups=False,
    )
Пример #6
0
 def setup_class(klass):
     """This method is run once for each class before any tests are run"""
     klass.m = {}
     for m in DATA:
         cmod = None
         if m.startswith('L2CBR_'):
             cmod = cbmpy.readCOBRASBML(m, work_dir=MDIR,\
                                        output_dir=self.CDIR, delete_intermediate=True)
         elif m.startswith('L2FBA_'):
             cmod = cbmpy.readSBML2FBA(os.path.join(MDIR, m))
         elif m.startswith('L3FBCV1_') or m.startswith('L3FBCV2_'):
             cmod = cbmpy.readSBML3FBC(os.path.join(MDIR, m),
                                       xoptions={'validate': True})
         if cmod is not None:
             klass.m[m] = cmod
     klass.CDIR = cDir
Пример #7
0
import os, time, numpy
cDir = os.path.dirname(os.path.abspath(os.sys.argv[0]))
import cbmpy as cbm

model_name = 'iAF692.xml'
cmod = cbm.readSBML3FBC(os.path.join(cDir, 'models', model_name))
cmodS = cmod.clone()

cbm.doFBA(cmod)
fva, fvan = cbm.CBMultiCore.runMultiCoreFVA(cmod, procs=4)
cbm.CBWrite.writeFVAtoCSV(fva,
                          fvan,
                          model_name.replace('.xml', ''),
                          fbaObj=cmod)
cbm.writeModelToExcel97(cmod, model_name.replace('.xml', ''))

exch_reactions = cmod.getReactionIds('R_EX_')
dump = []
# reactions are defined back to front so fwd is "o" and and backward is "i"
for xr_ in exch_reactions:
    dump.append(cbm.CBTools.splitSingleReversibleReaction(cmodS, xr_, xr_.replace('R_EX_', 'R_EXo_'),\
                                                          xr_.replace('R_EX_', 'R_EXi_')))
cbm.doFBA(cmodS)
fva, fvan = cbm.CBMultiCore.runMultiCoreFVA(cmodS, procs=4)
cbm.CBWrite.writeFVAtoCSV(fva,
                          fvan,
                          model_name.replace('.xml', '.split'),
                          fbaObj=cmodS)
cbm.writeSBML3FBC(cmodS, model_name.replace('.xml', '.split.xml'))
cbm.CBWrite.writeModelToExcel97(cmodS, model_name.replace('.xml', '.split'))
Пример #8
0
import cbmpy as cbm

from examples_and_results.helpers_ECM_calc import *
"""CONSTANTS"""
model_name = "iND750"
# For a bigger computation, try:
# input_file_name = "bacteroid_ECMinputAll.csv"

# Define directories for finding models
model_dir = os.path.join(os.getcwd(), "models")

model_path = os.path.join(model_dir, model_name + ".xml")
mod = cbm.readSBML3FBC(model_path)

#
pairs = cbm.CBTools.findDeadEndReactions(mod)
external_metabolites, external_reactions = list(zip(
    *pairs)) if len(pairs) else (list(
        zip(*cbm.CBTools.findDeadEndMetabolites(mod)))[0], [])

# External according to Urbanczik
ext_urbanczik = [
    'ac', 'acald', 'ala__L', 'co2', 'csn', 'ergst', 'etoh', 'gam6p', 'glc__D',
    'hdcea', 'ocdcea', 'ocdcya', 'so4', 'xylt', 'zymst', 'nh4', 'asp__L',
    'ser__L', 'fum', 'gly', 'thr__L'
]
force_feed = ['ac']

ext_urbanczik_inds = [
    ind for ind, metab in enumerate(external_metabolites)
    if metab[2:-2] in ext_urbanczik
Пример #9
0
 def test_load_L3FBCV1_iJR904(self):
     cmod = cbmpy.readSBML3FBC(os.path.join(MDIR, 'L3FBCV1_iJR904.glc.xml'),
                               xoptions={'validate': True})
     assert_not_equal(cmod, None)
Пример #10
0
def checkModelLocusTags(sbml, genbank, allow_gene_names=False):
    """
    Checks the gene identifiers (assuming they are locus tags) against a genbank file of the same organism

    - *sbml* the model SBML (*.xml) file
    - *genbank* the associated GenBank (*.gbk) full file_s) (including CDS annotations and sequences)
    - *allow_gene_names* allow gene names, non-unique as gene identifiers if locus tags are not present. USE WITH CAUTION!!!

    """

    cmod = cbmpy.readSBML3FBC(sbml)

    cntr = 0
    if type(genbank) == str:
        genbank = [genbank]

    gprMap = {}
    gprMapAnnot = {}
    gnoprMap = {}
    fileNum = 0
    no_locus_tag = []
    for G in genbank:
        print('CheckModelLocusTags is processing: {}'.format(G))
        for seq_record in SeqIO.parse(G, "genbank"):
            # print(seq_record.id)
            cntr += 1
        if cntr > 1:
            print("INFO: Multiple sequences encountered in file: {}".format(G))
            # raise RuntimeError, "\nMutltiple sequences encountered in file: {}".format(G)
        # print(repr(seq_record.seq))
        # print(len(seq_record))

        # add all the annotations from the genbank record(s) into the model? Use first record
        if fileNum == 0:

            gprMapAnnot[seq_record.id] = seq_record.annotations.copy()
            try:
                for r_ in gprMapAnnot[seq_record.id]['references']:
                    if r_.pubmed_id != '':
                        cmod.addMIRIAMannotation(
                            'isDescribedBy', 'PubMed', r_.pubmed_id
                        )
                    gprMapAnnot[seq_record.id].pop('references')
                    cmod.setAnnotation('genbank_id', seq_record.id)
                    cmod.setAnnotation('genbank_name', seq_record.name)
            except KeyError:
                print('checkModelLocusTags: no references')

            # global features
            features = [f.qualifiers for f in seq_record.features if f.type == 'source']
            if len(features) > 0:
                features = features[0]
            for f_ in features:
                if f_ == 'db_xref':
                    for ff_ in features[f_]:
                        if ff_.startswith('taxon:'):
                            cmod.setAnnotation(
                                'genbank_taxon_id', ff_.replace('taxon:', '')
                            )
                            break
                cmod.setAnnotation('genbank_{}'.format(f_), features[f_])
            for r_ in gprMapAnnot[seq_record.id]:
                cmod.setAnnotation(
                    'genbank_{}'.format(r_), gprMapAnnot[seq_record.id][r_]
                )
            fileNum += 1

        GBFile = open(G, 'r')

        USING_GENE_NAME = False
        for cds in Bio.SeqIO.InsdcIO.GenBankCdsFeatureIterator(GBFile):
            if cds.seq != None:
                if cds.name != '<unknown name>':
                    gprMap[cds.name] = cds
                # this is dangerous as there is no defined 1:1 mapping
                elif 'gene' in cds.annotations and allow_gene_names:
                    gprMap[cds.annotations['gene']] = cds
                    USING_GENE_NAME = True
                    no_locus_tag.append(cds.annotations['gene'])
                else:
                    gnoprMap[cds.name] = cds
            else:
                gnoprMap[cds.name] = cds
        GBFile.close()

    oldLoTags = []
    # for g_ in cmod.getGeneIds():
    for g_ in cmod.getGeneLabels():
        if g_ not in gprMap:
            # print(g_)
            if g_ != 'None':
                oldLoTags.append(g_)

    # cbmpy.CBTools.pprint.pprint(gprMap)

    oldtags = [
        gprMap[a].annotations['old_locus_tag'].split(' ') + [a]
        for a in gprMap
        if 'old_locus_tag' in gprMap[a].annotations
    ]
    oldtags2 = [
        gnoprMap[a].annotations['old_locus_tag'].split(' ') + [a]
        for a in gnoprMap
        if 'old_locus_tag' in gnoprMap[a].annotations
    ]

    old2new = {}
    old2new2 = {}

    for x in oldtags:
        for y in x:
            if y != x[-1]:
                old2new[y] = x[-1]

    for x in oldtags2:
        for y in x:
            if y != x[-1]:
                old2new2[y] = x[-1]

    print('\n\nChecking locus tags\n===================')
    updated = {}
    unknown = []
    noseq = []
    good = []
    F = open(sbml.replace('.xml', '.seqcheck.csv'), 'w')
    # if cmod.__FBC_VERSION__ < 2:
    # geneIDs = cmod.getGeneIds()
    # else:
    # geneIDs = cmod.getGeneLabels()
    geneIDs = cmod.getGeneLabels()
    for g_ in geneIDs:
        if g_ not in gprMap:
            if g_ in old2new:
                print('old {} --> new {}'.format(g_, old2new[g_]))
                updated[g_] = old2new[g_]
                F.write('UPDATED,{},{}\n'.format(g_, old2new[g_]))
            elif g_ in old2new2:
                print('NoSEQ: old {} --> new {}'.format(g_, old2new2[g_]))
                noseq.append((g_, old2new2[g_]))
                F.write('NOSEQ,{},{}\n'.format(g_, old2new2[g_]))
            else:
                print('UNKNOWN gene: {}'.format(g_))
                unknown.append(g_)
                F.write('UNKNOWN,{}\n'.format(g_))
        else:
            good.append(g_)
    F.close()
    if USING_GENE_NAME:
        print(
            '\nINFO: model contained {} genes without locus tags. For these genes \
the /gene name was used instead. This gene name may not be unique, please check model!!'.format(
                len(no_locus_tag)
            )
        )
        # print(no_locus_tag)

    # cbmpy.CBTools.storeObj(gprMap, sbml.replace('.xml', '.seqplus.dat'))

    return good, updated, noseq, unknown, cmod, gprMap, gprMapAnnot, no_locus_tag
Пример #11
0
 def setup_class(klass):
     """This method is run once for each class before any tests are run"""
     klass.mcore = cbmpy.readSBML3FBC('cbmpy_test_core')
     klass.sboterm1 = 'SBO:1234567'
     klass.cDir = cDir