Пример #1
0
    def testParse(self):
        f = open("./SCOP/dir.cla.scop.txt_test")
        try:
            cla = f.read()
            f.close()

            f = open("./SCOP/dir.des.scop.txt_test")
            des = f.read()
            f.close()

            f = open("./SCOP/dir.hie.scop.txt_test")
            hie = f.read()
        finally:
            f.close()

        scop = Scop(StringIO(cla), StringIO(des), StringIO(hie))

        cla_out = StringIO()
        scop.write_cla(cla_out)
        lines = zip(cla.rstrip().split('\n'),
                    cla_out.getvalue().rstrip().split('\n'))
        for expected_line, line in lines:
            self.assertTrue(self._compare_cla_lines(expected_line, line))

        des_out = StringIO()
        scop.write_des(des_out)
        self.assertEqual(des_out.getvalue(), des)

        hie_out = StringIO()
        scop.write_hie(hie_out)
        self.assertEqual(hie_out.getvalue(), hie)

        domain = scop.getDomainBySid("d1hbia_")
        self.assertEqual(domain.sunid, 14996)

        domains = scop.getDomains()
        self.assertEqual(len(domains), 14)
        self.assertEqual(domains[4].sunid, 14988)

        dom = scop.getNodeBySunid(-111)
        self.assertEqual(dom, None)
        dom = scop.getDomainBySid("no such domain")
        self.assertEqual(dom, None)
Пример #2
0
    def testParse(self):
        f = open("./SCOP/dir.cla.scop.txt_test")
        try:
            cla = f.read()
            f.close()

            f = open("./SCOP/dir.des.scop.txt_test")
            des = f.read()
            f.close()

            f = open("./SCOP/dir.hie.scop.txt_test")
            hie = f.read()
        finally:
            f.close()

        scop = Scop(StringIO(cla), StringIO(des), StringIO(hie))

        cla_out = StringIO()
        scop.write_cla(cla_out)
        lines = zip(cla.rstrip().split("\n"),
                    cla_out.getvalue().rstrip().split("\n"))
        for expected_line, line in lines:
            self.assertTrue(self._compare_cla_lines(expected_line, line))

        des_out = StringIO()
        scop.write_des(des_out)
        self.assertEqual(des_out.getvalue(), des)

        hie_out = StringIO()
        scop.write_hie(hie_out)
        self.assertEqual(hie_out.getvalue(), hie)

        domain = scop.getDomainBySid("d1hbia_")
        self.assertEqual(domain.sunid, 14996)

        domains = scop.getDomains()
        self.assertEqual(len(domains), 14)
        self.assertEqual(domains[4].sunid, 14988)

        dom = scop.getNodeBySunid(-111)
        self.assertEqual(dom, None)
        dom = scop.getDomainBySid("no such domain")
        self.assertEqual(dom, None)
Пример #3
0
    def testGetAscendent(self):
        scop = Scop(dir_path="SCOP", version="test")
        domain = scop.getDomainBySid("d1hbia_")

        # get the fold
        fold = domain.getAscendent("cf")
        self.assertEqual(fold.sunid, 46457)

        # get the superfamily
        sf = domain.getAscendent("superfamily")
        self.assertEqual(sf.sunid, 46458)

        # px has no px ascendent
        px = domain.getAscendent("px")
        self.assertEqual(px, None)

        # an sf has no px ascendent
        px2 = sf.getAscendent("px")
        self.assertEqual(px2, None)
Пример #4
0
    def testGetAscendent(self):
        scop = Scop(dir_path="SCOP", version="test")
        domain = scop.getDomainBySid("d1hbia_")

        # get the fold
        fold = domain.getAscendent('cf')
        self.assertEqual(fold.sunid, 46457)

        # get the superfamily
        sf = domain.getAscendent('superfamily')
        self.assertEqual(sf.sunid, 46458)

        # px has no px ascendent
        px = domain.getAscendent('px')
        self.assertEqual(px, None)

        # an sf has no px ascendent
        px2 = sf.getAscendent('px')
        self.assertEqual(px2, None)
Пример #5
0
class AstralTests(unittest.TestCase):
    def setUp(self):
        self.scop = Scop(dir_path="SCOP", version="test")
        self.astral = Astral(scop=self.scop, dir_path="SCOP", version="test")

    def testGetSeq(self):
        self.assertEqual(str(self.astral.getSeqBySid("d3sdha_")), "AAAAA")
        self.assertEqual(str(self.astral.getSeqBySid("d4hbib_")), "KKKKK")

        dom = self.scop.getDomainBySid("d3sdha_")
        self.assertEqual(str(self.astral.getSeq(dom)), "AAAAA")

    def testConstructWithCustomFile(self):
        scop = Scop(dir_path="SCOP", version="test")
        astral = Astral(
            scop=scop,
            astral_file="SCOP/scopseq-test/astral-scopdom-seqres-all-test.fa")
        self.assertEqual(str(astral.getSeqBySid("d3sdha_")), "AAAAA")
        self.assertEqual(str(astral.getSeqBySid("d4hbib_")), "KKKKK")

    def testGetDomainsFromFile(self):
        filename = "SCOP/scopseq-test/astral-scopdom-seqres-sel-gs-bib-20-test.id"
        domains = self.astral.getAstralDomainsFromFile(filename)

        self.assertEqual(len(domains), 3)
        self.assertEqual(domains[0].sid, "d3sdha_")
        self.assertEqual(domains[1].sid, "d4hbib_")
        self.assertEqual(domains[2].sid, "d5hbia_")

    def testGetDomainsClustered(self):
        domains1 = self.astral.domainsClusteredById(20)
        self.assertEqual(len(domains1), 3)
        self.assertEqual(domains1[0].sid, "d3sdha_")
        self.assertEqual(domains1[1].sid, "d4hbib_")
        self.assertEqual(domains1[2].sid, "d5hbia_")

        domains2 = self.astral.domainsClusteredByEv(1e-15)
        self.assertEqual(len(domains2), 1)
Пример #6
0
class AstralTests(unittest.TestCase):

    def setUp(self):
        self.scop = Scop(dir_path="SCOP", version="test")
        self.astral = Astral(scop=self.scop, dir_path="SCOP", version="test")

    def testGetSeq(self):
        self.assertEqual(str(self.astral.getSeqBySid('d3sdha_')), "AAAAA")
        self.assertEqual(str(self.astral.getSeqBySid('d4hbib_')), "KKKKK")

        dom = self.scop.getDomainBySid('d3sdha_')
        self.assertEqual(str(self.astral.getSeq(dom)), "AAAAA")

    def testConstructWithCustomFile(self):
        scop = Scop(dir_path="SCOP", version="test")
        astral = Astral(scop=scop,
                        astral_file="SCOP/scopseq-test/astral-scopdom-seqres-all-test.fa")
        self.assertEqual(str(astral.getSeqBySid('d3sdha_')), "AAAAA")
        self.assertEqual(str(astral.getSeqBySid('d4hbib_')), "KKKKK")

    def testGetDomainsFromFile(self):
        filename = "SCOP/scopseq-test/astral-scopdom-seqres-sel-gs-bib-20-test.id"
        domains = self.astral.getAstralDomainsFromFile(filename)

        self.assertEqual(len(domains), 3)
        self.assertEqual(domains[0].sid, "d3sdha_")
        self.assertEqual(domains[1].sid, "d4hbib_")
        self.assertEqual(domains[2].sid, "d5hbia_")

    def testGetDomainsClustered(self):
        domains1 = self.astral.domainsClusteredById(20)
        self.assertEqual(len(domains1), 3)
        self.assertEqual(domains1[0].sid, "d3sdha_")
        self.assertEqual(domains1[1].sid, "d4hbib_")
        self.assertEqual(domains1[2].sid, "d5hbia_")

        domains2 = self.astral.domainsClusteredByEv(1e-15)
        self.assertEqual(len(domains2), 1)
Пример #7
0
    def testConstructFromDirectory(self):
        scop = Scop(dir_path="SCOP", version="test")
        self.assertTrue(isinstance(scop, Scop))

        domain = scop.getDomainBySid("d1hbia_")
        self.assertEqual(domain.sunid, 14996)
Пример #8
0
    def testConstructFromDirectory(self):
        scop = Scop(dir_path="SCOP", version="test")
        self.assertTrue(isinstance(scop, Scop))

        domain = scop.getDomainBySid("d1hbia_")
        self.assertEqual(domain.sunid, 14996)
Пример #9
0
    samples = np.array([v for i, v in enumerate(scop40) if i in fold1[1]])
    # sf_sunid of scop100 (sid)
    px = np.array([x.sid for x in scop100_hie.getNodeBySunid(sf_sunid).getDescendents('px')])
    # select only sf_sunid in scop40 from scop100
    isect = np.intersect1d(samples, px)
    # select half of sf_sunid only in scop40 for test data
    fold1 = next(KFold(n_splits=test_n_splits, shuffle=True).split(isect))
    tests = np.array([v for i, v in enumerate(isect) if i in fold1[1]])
    np.save(Path(f'data/test/scop40_{n_splits}fold_sf{sf_sunid}_testdata_{test_n_splits}fold.npy'), tests)
    # select domain sids only in scop40 for training data
    train = np.setdiff1d(samples, tests)
    np.save(Path(f'data/train/scop40_{n_splits}fold_trainingdata.npy'), train)
    # {sf: [sid]} list for making alignment pairs in the same superfamily
    hie = {}
    for i in train:
        dom = scop100_hie.getDomainBySid(i)
        if dom:
            sf = dom.getAscendent('sf').sccs
        else:
            # FIX: Why nothing?
            continue
        if sf in hie:
            hie[sf].append(i)
        else:
            hie[sf] = [i]
    pickle.dump(hie, Path(f'data/train/scop40_{n_splits}fold_sf{sf_sunid}_hie.pkl').open('wb'))
else:
    train = np.array([x for x in scop40 if x not in test_data])
    np.save(Path(f'data/train/scop40_{n_splits}fold.npy'), train)
    test = np.array(test_data)
    np.save(Path(f'data/test/scop40_{n_splits}fold.npy'), test)