Пример #1
0
def start_app_from_args(args):
    global homDB
    global genomDB
    global xrefDB
    global opDB
    global sorfDB
    global pfamDB
    global tssDB
    global tmpfolder
    global clustalobin

    tmpfolder = args.tmp
    clustalobin = args.clustalo.name

    homDB = HomologyDatabase.loadFromFile(args.databases + "/homdb/" +
                                          "/hpdb_full_new")
    xrefDB = XRefDatabase(gobo=args.databases + "/obos/go.obo",
                          fileName=args.databases + "/homdb/" +
                          "/hpdb_full_xref")
    opDB = OperonDB.from_cs_operons(args.databases + "/sharma/operons.xlsx")
    tssDB = TSSDB.from_cs_tss(args.databases + "/sharma/tss.xlsx")
    sorfDB = SORFDB.from_cs_sorfs(args.databases + "/sharma/sorfs.xlsx")
    pfamDB = PfamResultDB.from_folder(args.databases + "/pfam/")

    genomDB = GenomeDB(args.genomes, loadAll=False)

    for orgname in homDB.get_all_organisms():
        genomDB.loadGenome(orgname)
            if printAlignment:
                print(orgJT[1])
                #print("WL Changes", orgIT, orgJT, orgSubMatrix[('L', 'W')])
                #print(alignment[0])
                #print(alignment[1])

        for x in orgSubMatrix:
            orgSubMatRel[x] = orgSubMatrix[x] / aaLength

        print("W content")
        for org in orgAACounts:
            print(org, "W", orgAACounts[org]['W'])

        return orgSubMatrix, orgSubMatRel, aaLength, orgSubMatrixDir

    hpHomolDB = HomologyDatabase.loadFromFile(fileLocation + "/hpp12_hp")
    cbHomolDB = HomologyDatabase.loadFromFile(fileLocation + "../cbdb/" +
                                              "/cbj")

    genomeDB = GenomeDB(fileLocation + "/genomes/")

    genomeDB.loadGenome(fileLocation + "/genomes/CP001217.gb")
    genomeDB.loadGenome(fileLocation + "/genomes/AE000511.gb")

    genomeDB.fileExtension = '.gbff'
    genomeDB.fileFormat = 'gb'

    genomeDB.loadGenome(fileLocation + "../cbdb/genomes/NC003912.gbff")
    genomeDB.loadGenome(fileLocation + "../cbdb/genomes/NC002163.gbff")

    matrix = matlist.blosum80
Пример #3
0
import glob

import os

from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

from database.genomedb import GenomeDB
from database.homDBAnalyser import HomDBAnalyser
from database.homologydb import HomologyDatabase

if __name__ == '__main__':
    baseDIR = '/mnt/c/dev/data/haas/homdb/'

    homDB = HomologyDatabase.loadFromFile(baseDIR + "/hpp_split")

    homs = ['HOMID1448', 'HOMID1742', 'HOMID1692', 'HOMID1795', 'HOMID2024', 'HOMID2027', 'HOMID1621', 'HOMID1338', 'HOMID1672', 'HOMID1693']
    homs = ['HOMID1286']
    homs = ['HOMID933', 'HOMID1354', 'HOMID1792', 'HOMID1621', 'HOMID1165', 'HOMID2171', 'HOMID283']
    homs = ['HOMID933', 'HOMID1354']

    homs = ['sp_HOMID2426', 'sp_HOMID2488']

    homs = ['HOMID403', 'HOMID1649', 'HOMID981', 'HOMID607', 'HOMID346']


    promLen = 50

    for homid in homs:

        alignSeqs = []
Пример #4
0
import editdistance

from database.genomedb import GenomeDB
from database.homologydb import HomologyDatabase

if __name__ == '__main__':

    genomeLocation = '/home/users/joppich/ownCloud/data/hpyloriDB/genomes/'

    homDB = HomologyDatabase.loadFromFile(
        "/home/proj/projekte/dataintegration/hpyloriDB/hpp12.homdb")
    genDB = GenomeDB(genomeLocation)

    for homGroup in homDB.homologies:

        entries = homDB.homologies[homGroup]

        allSeqs = []

        for seqID in entries:

            if not seqID[0] in genDB.genomes:
                genDB.loadGenome(genomeLocation + "/" + seqID[0] + ".gb")

            seq = genDB.get_sequence(seqID[0], seqID[1])

            allSeqs.append(seq)

        if len(allSeqs) == 0:
            continue
Пример #5
0
def distance(r1, r2):
    # sort the two ranges such that the range with smaller first element
    # is assigned to x and the bigger one is assigned to y
    x, y = sorted((r1, r2))

    if x[0] <= x[1] < y[0] and all(y[0] <= y[1] for y in (r1, r2)):
        return y[0] - x[1]
    return 0


if __name__ == '__main__':

    fileLocation = "/mnt/c/dev/data/haas/homdb/"

    homDB = HomologyDatabase.loadFromFile(fileLocation + "/hpp_split")
    genomDB = GenomeDB(fileLocation + "/genomes", loadAll=False)

    allorgs = homDB.get_all_organisms()

    for org in allorgs:
        genomDB.loadGenome(org)

    extra = ['AE001439', 'CP009259']
    mc = [
        '4_N1-031C1', '2_N1-025A2', '14_1-20A_UB64', '13_N5-004A1',
        '3_N1-029C1', '11_N4-029C2', '10_N2-085C2', '1_N1-024A1'
    ]
    nmc = [
        x for x in allorgs
        if not x in mc and not x in extra and not x.startswith("6_")
Пример #6
0
    for homID in interestHomCluster:
        val = homDB.get_homology_cluster(homID)

        mcc = sum([1 for x in val if x in mc])
        nmcc = sum([1 for x in val if x in nmc])

        print(homID, "MC", mcc, "NMC", nmcc)

    print([int(x.replace('HOMID', '')) for x in interestHomCluster])


if __name__ == '__main__':

    fileLocation = "/mnt/c/dev/data/haas/homdb/"

    homDB = HomologyDatabase.loadFromFile(fileLocation + "/combed")
    genomDB = GenomeDB(fileLocation + "/genomes", loadAll=False)

    """
    for combid in homDB.combinations:
        elems = homDB.combinations[combid]

        homDB.homologies[combid] = elems

    homDB.finalize()
    homDB.save_to_file(fileLocation + "combed")
    """

    for orgname in homDB.get_all_organisms():
        genomDB.loadGenome(orgname)
    allorgs = list(homDB.get_all_organisms())
Пример #7
0
import sys
import os
from collections import defaultdict

sys.path.insert(0, str(os.path.dirname(os.path.realpath(__file__))) + "/../")

import math

from database.genomedb import GenomeDB
from database.homologydb import HomologyDatabase
from utils.utils import fileLocation

if __name__ == '__main__':

    homolDB = HomologyDatabase.loadFromFile(fileLocation + "/hpp12_hp")
    genomeDB = GenomeDB(fileLocation + "/genomes/")

    allowedOrgs = ['CP001217', 'AE000511']

    compareAA = (['W'], ['F', 'G', 'A'])
    compareAA = (['W'], ['H', 'F', 'Y', 'P', 'K'])

    #compareAA = (['W', 'M'], ['H', 'F', 'Y', 'P', 'K'])
    #compareAA = (['W', 'M'], ['F', 'G', 'A'])


    def calculateDifferences(orgI, orgJ, allAA):

        allDiffs = list()
        foundGenes = 0
Пример #8
0
                        '--output',
                        type=argparse.FileType('w'),
                        help='input',
                        required=True)
    parser.add_argument('--redo',
                        action='store_true',
                        help='input',
                        default=False)

    restrictOrgs = ['AE001439', 'AE000511', 'CP001217']
    restrictOrgs = None

    args = parser.parse_args()

    print("Loading Hom DB")
    homDB = HomologyDatabase.loadFromFile(args.location.name)

    print("Loading Genomes")
    genomDB = GenomeDB(os.path.dirname(args.location.name) + "/genomes",
                       loadAll=False)

    allorgs = homDB.get_all_organisms()

    if restrictOrgs:
        allorgs = restrictOrgs

    for org in allorgs:
        genomDB.loadGenome(org)

    print("Loading HomDB analyser")
    analyse = HomDBAnalyser(homDB, genomDB, loadAll=False)
Пример #9
0
from database.genomedb import GenomeDB
from database.homDBAnalyser import HomDBAnalyser
from database.homologydb import HomologyDatabase

if __name__ == '__main__':
    baseDIR = '/mnt/c/dev/data/haas/homdb/'

    genomeDB = GenomeDB(baseDIR + "/genomes", loadAll=False)
    homDB = HomologyDatabase.loadFromFile(baseDIR + "/hpp_comb")

    analyse = HomDBAnalyser(homDB, genomeDB)

    def printHOM(homid):
        print(homid)

        aligned = analyse.cluster_align('HOMID'+str(homid))
        longest = ""
        allseqs = set()

        for rec in sorted(aligned._records, key=lambda x: x.id):

            seq = str(rec.seq).replace('-', '')
            allseqs.add((seq, rec.id))

            if len(seq) > len(longest):
                longest = str(rec.seq).replace('-', '')

            print(rec.seq, rec.id)

        return ('HOMID'+str(homid), longest, set(allseqs))
Пример #10
0
from database.homologydb import HomologyDatabase
from utils.utils import fileLocation
from xrefs.GeneIdentity import GeneIdentity
from xrefs.uniprotStore import UniprotStore

hpHomolDB = HomologyDatabase.loadFromFile(fileLocation + "/hpdb_full")

allIDS = []

for org in hpHomolDB.get_all_organisms():

    allOrgElems = hpHomolDB.get_organism_elements(org)

    for x in allOrgElems:
        allIDS.append(x)

print("Fetching information for", len(allIDS), "gene ids")

finalDF = None
chunksize = 1000

for i in range(0, len(allIDS), chunksize):

    imax = min([i + chunksize, len(allIDS)])

    chunkElems = allIDS[i:imax]

    print(i, imax, len(chunkElems))

    up = UniprotStore()
    allConvertedIDs = up.fetch(GeneIdentity.GENE_NAME,