Пример #1
0
def testTranslateAndCountHits(hits):
    (total, counts) = countHits(hits)
    myAssertEq(total,29)
    myAssertEq(counts["Prochlorococcus"],10)
    myAssertEq(counts['root'],7)

    translateHits(hits,{'Bacteria <prokaryote>':'other','root':'other','Candidatus Pelagibacter':'Pelagibacter'})
    myAssertEq(hits['000178_2410_1152'],['other'])
    myAssertEq(hits['000093_2435_2228'],['Pelagibacter'])
Пример #2
0
def testTranslateAndCountHits(hits):
    (total, counts) = countHits(hits)
    myAssertEq(total, 29)
    myAssertEq(counts["Prochlorococcus"], 10)
    myAssertEq(counts['root'], 7)

    translateHits(hits,
                  {'Bacteria <prokaryote>': 'other',
                   'root': 'other',
                   'Candidatus Pelagibacter': 'Pelagibacter'})
    myAssertEq(hits['000178_2410_1152'], ['other'])
    myAssertEq(hits['000093_2435_2228'], ['Pelagibacter'])
Пример #3
0
def testReadKoFile(koFile):
    kPmap = readKOFile(koFile, "PATHWAY")
    assert "K00397" not in kPmap
    myAssertEq(kPmap["K00399"], ["ko00680  Methane metabolism", "ko01200  Carbon metabolism"])

    kEmap = readKOFile(koFile, "EC")
    myAssertEq(kEmap["K00397"], ["EC:1.8.99.-"])
    myAssertEq(kEmap["K00399"], ["EC:2.8.4.1"])
Пример #4
0
def test_read_ncbi(ndir):
    taxNames = True
    taxonomy = readTaxonomy(ndir, taxNames)
    taxIds = taxonomy.idMap
    taxNames = taxonomy.nameMap
    myAssertEq(len(taxIds), 783145)
    myAssertEq(len(taxNames), 1101991)

    # pick some random things to check
    myAssertEq(taxIds[123456].name, 'Psammomoya choretroides')
    myAssertIs(taxNames[simplifyString('Psammomoya choretroides')],
               taxIds[123456])
    myAssertIs(
        taxNames[simplifyString('Psammomoya choretroides '
                                '(F.Muell.) Diels & Loes.')], taxIds[123456])
    myAssertEq(taxIds[123499].parent.id, 50537)

    return taxIds
Пример #5
0
def test_read_ncbi(ndir):
    taxNames = True
    taxonomy = readTaxonomy(ndir, taxNames)
    taxIds = taxonomy.idMap
    taxNames = taxonomy.nameMap
    myAssertEq(len(taxIds), 783145)
    myAssertEq(len(taxNames), 1101991)

    # pick some random things to check
    myAssertEq(taxIds[123456].name, 'Psammomoya choretroides')
    myAssertIs(
        taxNames[
            simplifyString('Psammomoya choretroides')],
        taxIds[123456])
    myAssertIs(taxNames[simplifyString('Psammomoya choretroides '
                                       '(F.Muell.) Diels & Loes.')],
               taxIds[123456])
    myAssertEq(taxIds[123499].parent.id, 50537)

    return taxIds
Пример #6
0
def testParseHits(testFile):
    # test line parsing methods
    cells = [1, 2, 3, 4, "(4,5)", "6,7"]
    (read, hitIter) = _getReadHitsSimple(cells, 0, 2, None)
    hits = []
    for h in hitIter:
        hits.append(h)

    myAssertEq(read, 1)
    myAssertEq(len(hits), 1)
    myAssertEq(hits[0], 3)

    (read, hitIter) = _getReadHitsSep(cells, 1, 5, ',')
    hits = []
    for h in hitIter:
        hits.append(h)
    myAssertEq(read, 2)
    myAssertEq(hits, ['6', '7'])

    (read, hitIter) = _getReadHitsAll(list(cells), 3, -1, None)
    hits = []
    for h in hitIter:
        hits.append(h)
    myAssertEq(read, 4)
    myAssertEq(len(hits), 5)
    myAssertEq(hits, [1, 2, 3, "(4,5)", "6,7"])

    # give it a test file
    hitIter = parseHits(open(testFile), 0, -1, True, None)
    hits = {}
    for r, h in hitIter:
        hits[r] = h
    logging.debug(repr(hits))
    myAssertEq(len(hits), 29)
    myAssertEq(hits['000023_2435_2174'], ['Prochlorococcus'])
    myAssertEq(hits['000178_2410_1152'], ['Bacteria <prokaryote>'])
    myAssertEq(hits['000093_2435_2228'], ['Candidatus Pelagibacter'])

    return hits
Пример #7
0
def testParseHits(testFile):
    # test line parsing methods
    cells = [1, 2, 3, 4, "(4,5)", "6,7"]
    (read, hitIter) = _getReadHitsSimple(cells, 0, 2, None)
    hits = []
    for h in hitIter:
        hits.append(h)

    myAssertEq(read, 1)
    myAssertEq(len(hits), 1)
    myAssertEq(hits[0], 3)

    (read, hitIter) = _getReadHitsSep(cells, 1, 5, ',')
    hits = []
    for h in hitIter:
        hits.append(h)
    myAssertEq(read, 2)
    myAssertEq(hits, ['6', '7'])

    (read, hitIter) = _getReadHitsAll(list(cells), 3, -1, None)
    hits = []
    for h in hitIter:
        hits.append(h)
    myAssertEq(read, 4)
    myAssertEq(len(hits), 5)
    myAssertEq(hits, [1, 2, 3, "(4,5)", "6,7"])

    # give it a test file
    hitIter = parseHits(open(testFile), 0, -1, True, None)
    hits = {}
    for r, h in hitIter:
        hits[r] = h
    logging.debug(repr(hits))
    myAssertEq(len(hits), 29)
    myAssertEq(hits['000023_2435_2174'], ['Prochlorococcus'])
    myAssertEq(hits['000178_2410_1152'], ['Bacteria <prokaryote>'])
    myAssertEq(hits['000093_2435_2228'], ['Candidatus Pelagibacter'])

    return hits
Пример #8
0
def testReadKeggFile(keggFile):
    kDmap = readKeggFile(keggFile, "DESCRIPTION")
    myAssertEq(kDmap["K09630"], ["PRSS36; protease, serine, 36 [EC:3.4.21.-]"])
    kPmap = readKeggFile(keggFile, "PATHWAY")
    assert "K00397" in kPmap
    myAssertEq(kPmap["K00399"], ["01200 Carbon metabolism", "00680 Methane metabolism", "01000 Enzymes"])
    k2map = readKeggFile(keggFile, 2)
    myAssertEq(k2map["K13810"][1].lower(), "Carbohydrate Metabolism".lower())
    myAssertEq(k2map["K13810"][0].lower(), "Overview".lower())
    myAssertEq(k2map["K00399"][1].lower(), "Energy Metabolism".lower())
    myAssertEq(k2map["K00399"][0].lower(), "Overview".lower())
    k3map = readKeggFile(keggFile, 3)
    myAssertEq(
        k3map["K13810"],
        [
            "01230 Biosynthesis of amino acids",
            "00010 Glycolysis / Gluconeogenesis",
            "00030 Pentose phosphate pathway",
            "00500 Starch and sucrose metabolism",
            "00520 Amino sugar and nucleotide sugar metabolism",
            "01000 Enzymes",
            "01000 Enzymes",
        ],
    )
    myAssertEq(k3map["K00399"], ["01200 Carbon metabolism", "00680 Methane metabolism", "01000 Enzymes"])
    myAssertEq(k3map["K03404"], ["00860 Porphyrin and chlorophyll metabolism", "01000 Enzymes"])
    myAssertEq(k3map["K01976"], ["01000 Enzymes"])
    myAssertEq(
        k3map["K07347"],
        ["02000 Transporters", "02044 Secretion system", "02035 Bacterial motility proteins", "05133 Pertussis"],
    )
    myAssertEq(k3map["K09630"], ["01000 Enzymes", "01002 Peptidases"])
    k3mapQ = readKeggFile(keggFile, "3")
    for k in k3map.iterkeys():
        try:
            myAssertEq(k3map[k], k3mapQ[k])
        except AsserionError:
            raise AssertionError("level 3 classes for %s do not match:\n%s\n%s" % (k, k3map[k], k3mapQ[k]))
Пример #9
0
def testParseGeneKOMap(koFile):
    gkmap = parseGeneKOMap(koFile)
    myAssertEq(gkmap["dpe:Dper_GL25993"], ["K00001"])
    myAssertEq(gkmap["rpc:RPC_2974"], ["K00001"])
    myAssertEq(gkmap["pic:PICST_59568"], ["K00100"])
    myAssertEq(gkmap["bbp:BBPR_1508"], ["K02755", "K02756", "K02757"])
    myAssertEq(gkmap["sfv:SFV_2242"], ["K02769", "K02770"])
    myAssertEq(gkmap["fma:FMG_0161"], ["K02982"])