Пример #1
0
    def test01GetDicts(self):
        """
        ensure we can create, save and retrieve gene2go and go2gene dictionaries
        """

        termsPath = 'terms.pickle'
        if os.path.exists(termsPath) == True:
            os.remove(termsPath)

        go = GeneOntology([self.taxId],upass=UPASS,idType='ncbi',useIea=True,\
                          aspect='biological_process')
        go.create_dicts(termsPath)
        gene2go, go2gene = go.load_dicts(termsPath)
        print("there are %s genes"%(len(gene2go.keys())))
        print("there are %s terms"%(len(go2gene.keys())))
Пример #2
0
from htsint.blast import BlastMapper

# specify main variables (biological_process, molecular_function, cellular_component)
homeDir = os.path.join(".","demo")
if not os.path.isdir(homeDir):
    os.mkdir(homeDir)

_aspect = 'bp'
aspect = 'biological_process' 

# Create a term graph
go = GeneOntology(["8364","8355"],useIea=False,aspect=aspect)
termsPath = os.path.join(homeDir,"go-terms.pickle")
graphPath = os.path.join(homeDir,"go-graph.pickle")
if not os.path.exists(termsPath):
    go.create_dicts(termsPath)
gene2go,go2gene = go.load_dicts(termsPath)
G = go.create_gograph(termsPath=termsPath,graphPath=graphPath)
print("%s genes have at least one annotation"%(len(gene2go.keys())))
print("Term graph for with %s nodes successfully created."%(len(G.nodes())))

# Calculate term distances (most time consuming step)
termDistancePath = os.path.join(homeDir,"term-distances.npy")
if not os.path.exists(termDistancePath):
    td = TermDistances(termsPath,graphPath)
    print("total distances to evaluate: %s"%td.totalDistances)
    timeStart = time.time()
    td.run_with_multiprocessing(termDistancePath,cpus=30)
    print time.strftime('%H:%M:%S', time.gmtime(time.time()-timeStart))

# Calculate gene distances
Пример #3
0
if not os.path.exists(gsaDir):
    os.mkdir(gsaDir)

## make imports and specify variables
from htsint import GeneOntology, TermDistances
useIea = True
aspect = "biological_process"
_aspect = 'bp'
taxaList = ['9606']
go = GeneOntology(taxaList, useIea=useIea, aspect=aspect)
termsPath = os.path.join(gsaDir, "go-terms-%s.pickle" % (_aspect))
graphPath = os.path.join(gsaDir, "go-graph-%s.pickle" % (_aspect))

geneIds = geneList.keys()
if not os.path.exists(termsPath):
    go.create_dicts(termsPath, accepted=geneIds)
gene2go, go2gene = go.load_dicts(termsPath)
print("pathway genes with terms: %s/%s" % (len(gene2go.keys()), len(geneIds)))

if not os.path.exists(graphPath):
    G = go.create_gograph(termsPath=termsPath, graphPath=graphPath)
    print("Term graph for with %s nodes successfully created." %
          (len(G.nodes())))

# Calculate term distances
termDistancePath = os.path.join(gsaDir, "term-distances-%s.npy" % (_aspect))
if not os.path.exists(termDistancePath):
    td = TermDistances(termsPath, graphPath)
    print("total distances to evaluate: %s" % td.totalDistances)
    td.run_with_multiprocessing(termDistancePath, cpus=7)
Пример #4
0
if not os.path.exists(gsaDir):
    os.mkdir(gsaDir)

## make imports and specify variables
from htsint import GeneOntology,TermDistances
useIea = True
aspect = "biological_process"
_aspect = 'bp'
taxaList = ['9606']
go = GeneOntology(taxaList,useIea=useIea,aspect=aspect)
termsPath = os.path.join(gsaDir,"go-terms-%s.pickle"%(_aspect))
graphPath = os.path.join(gsaDir,"go-graph-%s.pickle"%(_aspect))

geneIds = geneList.keys()
if not os.path.exists(termsPath):
    go.create_dicts(termsPath,accepted=geneIds)
gene2go,go2gene = go.load_dicts(termsPath)
print("pathway genes with terms: %s/%s"%(len(gene2go.keys()),len(geneIds)))

if not os.path.exists(graphPath):        
    G = go.create_gograph(termsPath=termsPath,graphPath=graphPath)
    print("Term graph for with %s nodes successfully created."%(len(G.nodes())))

# Calculate term distances
termDistancePath = os.path.join(gsaDir,"term-distances-%s.npy"%(_aspect))
if not os.path.exists(termDistancePath):
    td = TermDistances(termsPath,graphPath)
    print("total distances to evaluate: %s"%td.totalDistances)
    td.run_with_multiprocessing(termDistancePath,cpus=7)

# Calculate gene distances
Пример #5
0
from htsint.blast import BlastMapper

# specify main variables (biological_process, molecular_function, cellular_component)
homeDir = os.path.join(".", "demo")
if not os.path.isdir(homeDir):
    os.mkdir(homeDir)

_aspect = 'bp'
aspect = 'biological_process'

# Create a term graph
go = GeneOntology(["8364", "8355"], useIea=False, aspect=aspect)
termsPath = os.path.join(homeDir, "go-terms.pickle")
graphPath = os.path.join(homeDir, "go-graph.pickle")
if not os.path.exists(termsPath):
    go.create_dicts(termsPath)
gene2go, go2gene = go.load_dicts(termsPath)
G = go.create_gograph(termsPath=termsPath, graphPath=graphPath)
print("%s genes have at least one annotation" % (len(gene2go.keys())))
print("Term graph for with %s nodes successfully created." % (len(G.nodes())))

# Calculate term distances (most time consuming step)
termDistancePath = os.path.join(homeDir, "term-distances.npy")
if not os.path.exists(termDistancePath):
    td = TermDistances(termsPath, graphPath)
    print("total distances to evaluate: %s" % td.totalDistances)
    timeStart = time.time()
    td.run_with_multiprocessing(termDistancePath, cpus=30)
    print time.strftime('%H:%M:%S', time.gmtime(time.time() - timeStart))

# Calculate gene distances