Exemplo n.º 1
0
 def test_init1(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
     self.assertFalse(dsv1.isCreated())
     self.assertEqual(',', dsv1.dialect.delimiter)
     dsv1.close()
Exemplo n.º 2
0
 def test_init6(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, comment resolved successfully
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, comment='#')
     self.assertFalse(dsv1.isCreated())
     self.assertEqual('#', dsv1.comment)
     dsv1.close()
Exemplo n.º 3
0
 def test_init3(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.close()
     with self.assertRaises(Error):
         DataSet(dbtable=dsv, cols='BBB')
Exemplo n.º 4
0
 def test_recache2(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     # data set by default spanning all rows and all columns
     ds = DataSet(dbtable=dsv)
     dsv.close()
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
     # wipe out underlying table
     wipe_cs = dsv.db.cursor()
     wipe_cs.execute('delete from "%s";' % self.test_dtname)
     dsv.db.commit()
     wipe_cs.execute('vacuum;')
     dsv.db.commit()
     # NOTE: before numpy 1.6.0, empty file in loadtxt() generates IOError,
     # with 1.6.0+ only warning
     if check_min_numpy_version(1, 6, 0):
         # perform recache (we shall see empty array)
         # suppress numpy warning of empty source file
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             ds.recache()
         numpy.testing.assert_array_almost_equal(self.empty_array, ds.array)
     else:
         with self.assertRaises(Error):
             ds.recache()
Exemplo n.º 5
0
 def test_build1(self):
     geneidmap = GeneIDMapHGNCGPL()
     # GPL96.txt
     gpl_path = os.path.abspath(os.path.join(self.test_data_root, 'gpl96_sample.txt'))
     gpl_comment = '#'
     gpl_delimiter = '\t'
     gpl_fh = DSV.getHandle(gpl_path)
     gpl_dsv = DSV(self.dbm, self.testdb, gpl_fh, dtname=self.annoTable, delimiter=gpl_delimiter, comment=gpl_comment)
     gpl_dsv.create()
     gpl_dsv.loadAll()
     gpl_dsv.close()
     # HGNC.tsv
     hgnc_path = os.path.abspath(os.path.join(self.test_data_root, 'hgnc_sample.txt'))
     hgnc_comment = '#'
     hgnc_delimiter = '\t'
     hgnc_fh = DSV.getHandle(hgnc_path)
     hgnc_dsv = DSV(self.dbm, self.testdb, hgnc_fh, dtname=self.hgncTable, delimiter=hgnc_delimiter, comment=hgnc_comment)
     hgnc_dsv.create()
     hgnc_dsv.loadAll()
     hgnc_dsv.close()
     # build test map
     geneidmap.build(gpl_dsv, hgnc_dsv, self.testdb)
     self.assertTrue(geneidmap.built)
     self.assertIsInstance(geneidmap.dbt, DBTable)
     fwdmap = dict(geneidmap.gene2emid.getFwdMap())
     self.assertEqual(self.ref_fwd1, fwdmap)
     bwdmap = dict(geneidmap.gene2emid.getBwdMap())
     self.assertEqual(self.ref_bwd1, bwdmap)
Exemplo n.º 6
0
 def test_close1(self):
     dsv2_fh = DSV.getHandle(self.anno_dsv_path)
     dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
     dsv2.create()
     dsv2.loadAll()
     dsv2.close()
     with self.assertRaises(Error):
         dsv2.loadAll()
Exemplo n.º 7
0
 def test_init4(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.close()
     with self.assertRaises(Error):
         DataSet(dbtable=dsv,
                 rows=self.sample_rows_none,
                 cols=self.sample_cols_none)
Exemplo n.º 8
0
 def test_init10(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header extracted (default), ID not resolved
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, make_missing_ID_column=False)
     self.assertSequenceEqual(self.num_dsv_actual_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     dsv1.close()
Exemplo n.º 9
0
 def test_init2(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # predefined delimiter, resolved successfully
     # NOTE: class does not check if delimiter is valid at this point
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, delimiter='\t')
     self.assertFalse(dsv1.isCreated())
     self.assertEqual(csv.get_dialect('excel-tab'), dsv1.dialect)
     self.assertEqual('\t', dsv1.dialect.delimiter)
     dsv1.close()
Exemplo n.º 10
0
 def test_init9(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header extracted (default), ID resolved
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
     self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     dsv1.close()
Exemplo n.º 11
0
 def test_init6(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     dsv.close()
     # data set by default spanning all rows and all columns
     ds = DataSet(dbtable=dsv)
     numpy.testing.assert_array_almost_equal(self.array1, ds.array)
Exemplo n.º 12
0
 def test_init12(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header supplied (proper length)
     our_header = tuple(['C%d' % n for n in range(1, len(self.num_dsv_actual_header) + 1)])
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, header=our_header)
     self.assertSequenceEqual(our_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     dsv1.close()
Exemplo n.º 13
0
 def test_init11(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # delimiter sniffed, header auto-generated
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, header=())
     ref_header = tuple(['%d' % n for n in range(1, len(self.num_dsv_actual_header) + 1)])
     self.assertSequenceEqual(ref_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     dsv1.close()
Exemplo n.º 14
0
 def test_init7(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     dsv.create()
     dsv.loadAll()
     dsv.close()
     # get only first row
     ds = DataSet(dbtable=dsv, rows=self.sample_rows_1)
     numpy.testing.assert_array_almost_equal(self.array_v1, ds.array)
Exemplo n.º 15
0
 def test_loadall3(self):
     dsv2_fh = DSV.getHandle(self.anno_dsv_path)
     dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
     dsv2.create()
     st = dsv2.loadAll(debug=True)
     dsv2.close()
     ref_st = ['insert into "Test1" values ("1007_s_at","U48705","","H**o sapiens","Mar 11, 2009",'
                                            '"Exemplar sequence","Affymetrix Proprietary Database",'
                                            '"U48705 /FEATURE=mRNA /DEFINITION=HSU48705 Human receptor tyrosine kinase DDR gene, complete cds",'
                                            '"U48705","discoidin domain receptor tyrosine kinase 1",'
                                            '"DDR1","780","NM_001954 /// NM_013993 /// NM_013994",'
                                            '"0006468 // protein amino acid phosphorylation // inferred from electronic annotation ///'
                                            ' 0007155 // cell adhesion // traceable author statement ///'
                                            ' 0007155 // cell adhesion // inferred from electronic annotation ///'
                                            ' 0007169 // transmembrane receptor protein tyrosine kinase signaling pathway // inferred from electronic annotation",'
                                            '"0005887 // integral to plasma membrane // traceable author statement ///'
                                            ' 0016020 // membrane // inferred from electronic annotation ///'
                                            ' 0016021 // integral to membrane // inferred from electronic annotation",'
                                            '"0000166 // nucleotide binding // inferred from electronic annotation ///'
                                            ' 0004672 // protein kinase activity // inferred from electronic annotation ///'
                                            ' 0004713 // protein tyrosine kinase activity // inferred from electronic annotation ///'
                                            ' 0004714 // transmembrane receptor protein tyrosine kinase activity // traceable author statement ///'
                                            ' 0004714 // transmembrane receptor protein tyrosine kinase activity // inferred from electronic annotation ///'
                                            ' 0004872 // receptor activity // inferred from electronic annotation ///'
                                            ' 0005515 // protein binding // inferred from physical interaction ///'
                                            ' 0005524 // ATP binding // inferred from electronic annotation ///'
                                            ' 0016301 // kinase activity // inferred from electronic annotation ///'
                                            ' 0016740 // transferase activity // inferred from electronic annotation")',
             'insert into "Test1" values ("1053_at","M87338","","H**o sapiens","Mar 11, 2009",'
                                            '"Exemplar sequence","GenBank",'
                                            '"M87338 /FEATURE= /DEFINITION=HUMA1SBU Human replication factor C, 40-kDa subunit (A1) mRNA, complete cds",'
                                            '"M87338","replication factor C (activator 1) 2, 40kDa",'
                                            '"RFC2","5982","NM_002914 /// NM_181471",'
                                            '"0006260 // DNA replication // not recorded ///'
                                            ' 0006260 // DNA replication // inferred from electronic annotation ///'
                                            ' 0006297 // nucleotide-excision repair, DNA gap filling // not recorded",'
                                            '"0005634 // nucleus // inferred from electronic annotation ///'
                                            ' 0005654 // nucleoplasm // not recorded ///'
                                            ' 0005663 // DNA replication factor C complex // inferred from direct assay ///'
                                            ' 0005663 // DNA replication factor C complex // inferred from electronic annotation",'
                                            '"0000166 // nucleotide binding // inferred from electronic annotation ///'
                                            ' 0003677 // DNA binding // inferred from electronic annotation ///'
                                            ' 0003689 // DNA clamp loader activity // inferred from electronic annotation ///'
                                            ' 0005515 // protein binding // inferred from physical interaction ///'
                                            ' 0005524 // ATP binding // traceable author statement ///'
                                            ' 0005524 // ATP binding // inferred from electronic annotation ///'
                                            ' 0017111 // nucleoside-triphosphatase activity // inferred from electronic annotation")']
     self.assertSequenceEqual(ref_st, st)
Exemplo n.º 16
0
 def test_loadall2(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
     self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
     dsv1.create()
     dsv1.loadAll()
     dsv1.close()
     # low level checks
     cs = dsv1.db.cursor()
     cs.execute('select %s from %s' % (self.num_dsv_desired_header[0], dsv1.name))
     rres = cs.fetchall()
     res = [str(r[0]) for r in rres]
     self.assertSequenceEqual(self.num_rows, res)
     cs.execute('select %s from %s' % (self.num_dsv_desired_header[3], dsv1.name))
     rres = cs.fetchall()
     res = [str(r[0]) for r in rres]
     self.assertSequenceEqual(self.num_column_3, res)
Exemplo n.º 17
0
 def test_loadall1(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
     self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
     dsv1.create()
     self.assertTrue(dsv1.isCreated())
     self.assertTrue(dsv1.isEmpty())
     # load from file
     st = dsv1.loadAll(debug=True)
     dsv1.close()
     ref_st = ['insert into "Test1" values ("V1","7.29865639942","7.1839394018853","8.08785988003525","8.43784327460378","7.56725674896063","7.17150350961048",'
                                            '"8.23772125375395","7.26860393651388","6.74186036580687","7.55493056104098","7.37521470969549","6.35468766815909",'
                                            '"7.03794441889888","6.75197742759923","7.26608934160658","8.70335292880697","6.85443361759566","7.59055769774248",'
                                            '"8.01751559655053","6.99993079846214","7.10871523619365","7.65161630470663","6.71058065426046","6.64437907655326",'
                                            '"6.93172233805358","7.61870427987243","6.9634175191832","6.37433009206648","6.34485366708736","6.0977075555399",'
                                            '"6.9061361459302","6.54264897912374","6.31961323363347","6.16533391728077","6.90481905323935","6.7168440158265",'
                                            '"7.22535319774288","6.20123577217092","6.93391118518623","6.82985307889579","6.35468239627533","7.09693639659124",'
                                            '"7.60449775270475","7.12266778930967","6.35835046528365","6.76414046791","6.17508883882112","6.52508274039929",'
                                            '"7.11162248509395","6.89152906126555","6.49949720627377","6.69448041622817","6.37526926527225","5.80401273298264",'
                                            '"7.12987703240072","6.05831629170905","6.81624397767137","6.66820808623227","6.64998519558867","6.42308111524492",'
                                            '"7.58672787003923","3.84767749509431","6.71665724008276","6.35468766815909","6.54859953448512","7.23447515724748",'
                                            '"6.70007125889196","6.28445976227631","6.75206243946758","6.7168440158265","6.55922419484843","6.93675713126568",'
                                            '"6.80067557800434","6.50103393612957","6.91542815411986","6.19960368164491","7.6448783709798","6.2125929974423",'
                                            '"6.35468766815909","7.32784699996015","6.14659907126786","6.7168440158265","6.8825610653412","6.72831600642366",'
                                            '"6.46374697412319","5.79584776993902","6.0825372527799","7.1204899554919","6.39620062779895","6.35814627516342",'
                                            '"6.35814627516342")',
               'insert into "Test1" values ("V2","2.38904325749261","2.37588862645719","2.37310583895584","2.38904325749261","2.42091222425779","2.38904325749261",'
                                            '"2.38626046999126","2.38904325749261","2.38904325749261","2.41002306956031","2.38904325749261","2.38904325749261",'
                                            '"2.38904325749261","2.37310583895584","2.38626046999126","2.34429782913723","2.38904325749261","2.98112952430922",'
                                            '"2.34553574786241","2.37310583895584","2.39660701797421","2.38904325749261","2.40955866820479","2.38626046999126",'
                                            '"2.35577218230877","2.39443448171899","2.34433277775847","2.69053923836483","2.38430054425455","2.86158891209344",'
                                            '"2.34595261411454","2.89813268468409","2.42777977950130","2.38626046999126","2.44904175049461","3.55795174775419",'
                                            '"2.66896481156844","2.38626046999126","2.71772299956764","2.61602731442131","2.56996895766296","3.86202701130675",'
                                            '"2.38904325749261","2.35577218230877","2.60505670342601","3.12697260562512","2.38904325749261","3.15740854425796",'
                                            '"2.65364423092787","2.45124596034905","3.14913252263311","2.38904325749261","2.39700474393300","2.38904325749261",'
                                            '"2.46188514405506","3.23873137510437","2.55373906857937","3.39601442806742","3.16936129560691","3.18777558546775",'
                                            '"2.38904325749261","2.38904325749261","2.38626046999126","2.34553574786241","2.35577218230877","2.38624782221570",'
                                            '"2.35577218230877","2.38904325749261","2.74265374191966","2.37188401381886","2.37588862645719","2.38904325749261",'
                                            '"2.35577218230877","2.35121946858936","2.49946444329392","2.38904325749261","2.34553574786241","2.93960156829307",'
                                            '"2.38904325749261","2.39182604499395","2.38904325749261","2.35315614841910","2.47149945385376","2.38626046999126",'
                                            '"2.39596753440869","2.38904325749261","2.40223987191512","2.34715558421848","2.38210356896247","2.34719053283972",'
                                            '"2.76820667786915")']
     self.assertSequenceEqual(ref_st, st)
Exemplo n.º 18
0
 def test_init5(self):
     dsv_fh = DSV.getHandle(self.num_dsv_path)
     # default DSV, dialect and delimiter sniffed
     dsv = DSV(self.dbm, self.testdb, dsv_fh, dtname=self.test_dtname)
     # data set by default spanning all rows and all columns
     # data not loaded, we shall see empty array
     dsv.create()
     dsv.close()
     # NOTE: before numpy 1.6.0, empty file in loadtxt() generates IOError,
     # with 1.6.0+ only warning
     if check_min_numpy_version(1, 6, 0):
         # suppress numpy warning of empty source file
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             ds = DataSet(dbtable=dsv)
         numpy.testing.assert_equal(self.empty_array, ds.array)
     else:
         with self.assertRaises(Error):
             DataSet(dbtable=dsv)
Exemplo n.º 19
0
 def test_build1(self):
     pkcidmap = PKCIDMapGOGPL()
     # GPL96.txt
     #        gpl_path = os.path.abspath(os.path.join(self.test_data_root, 'GPL96.txt.bz2'))
     gpl_path = os.path.abspath(
         os.path.join(self.test_data_root, 'gpl96_sample.txt'))
     gpl_comment = '#'
     gpl_delimiter = '\t'
     gpl_fh = DSV.getHandle(gpl_path)
     gpl_dsv = DSV(self.dbm,
                   self.testdb,
                   gpl_fh,
                   dtname=self.annoTable,
                   delimiter=gpl_delimiter,
                   comment=gpl_comment)
     gpl_dsv.create()
     gpl_dsv.loadAll()
     gpl_dsv.close()
     pkcidmap.build(gpl_dsv, self.testdb)
     self.assertTrue(pkcidmap.built)
     self.assertIsInstance(pkcidmap.dbt, DBTable)
     # test bidirectional map
     bwdmap = pkcidmap.pkc2emid.getBwdMap()
     fwdmap = pkcidmap.pkc2emid.getFwdMap()
     # check valid probes and terms for existence
     for ref_probe, ref_terms in self.test_bwd.iteritems():
         # backward map
         self.assertIn(ref_probe, bwdmap)
         terms = bwdmap[ref_probe]
         self.assertEqual(ref_terms, terms)
         # forward map
         for term in terms:
             self.assertIn(term, fwdmap)
             probes = fwdmap[term]
             self.assertIn(ref_probe, probes)
     # check control probes for nonexistence
     for test_ctrl in self.test_ctrl_probes:
         # backward map
         self.assertNotIn(test_ctrl, bwdmap)
     for probes in fwdmap.values():
         # forward map
         self.assertNotIn(test_ctrl, probes)
Exemplo n.º 20
0
 def test_em2annotation1(self):
     geneidmap = GeneIDMapGPL()
     # GPL96.txt
     gpl_path = os.path.abspath(
         os.path.join(self.test_data_root, 'gpl96_sample.txt'))
     gpl_comment = '#'
     gpl_delimiter = '\t'
     gpl_fh = DSV.getHandle(gpl_path)
     gpl_dsv = DSV(self.dbm,
                   self.testdb,
                   gpl_fh,
                   dtname=self.annoTable,
                   delimiter=gpl_delimiter,
                   comment=gpl_comment)
     gpl_dsv.create()
     gpl_dsv.loadAll()
     gpl_dsv.close()
     geneidmap.build(gpl_dsv, None, self.testdb)
     em2a = get_em2annotation(geneidmap.dbt)
     self.assertEqual(self.ref_em2a, em2a)
Exemplo n.º 21
0
 def test_loadall4(self):
     dsv2_fh = DSV.getHandle(self.anno_dsv_path)
     dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
     self.assertSequenceEqual(self.anno_header, dsv2.header)
     dsv2.create()
     dsv2.loadAll()
     dsv2.close()
     # low level checks
     cs = dsv2.db.cursor()
     cs.execute('select %s from %s' % (self.anno_header[0], dsv2.name))
     rres = cs.fetchall()
     res = [str(r[0]) for r in rres]
     self.assertSequenceEqual(self.anno_rows, res)
     cols = ','.join([quote(c) for c in self.anno_columns])
     cs.execute('select %s from %s' % (cols, dsv2.name))
     rres = cs.fetchall()
     res = {}
     for ix, ac in enumerate(self.anno_columns):
         res[ac] = [str(r[ix]) for r in rres]
     self.assertDictEqual(self.anno_columns_dict, res)
Exemplo n.º 22
0
class TestHGNC1(unittest.TestCase):
    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.hgncTable = 'HGNC'
        self.dbm = DBManager(self.test_write_root)
        #
        hgnc_path = os.path.abspath(
            os.path.join(self.test_data_root, 'hgnc_sample_2.txt'))
        hgnc_comment = '#'
        hgnc_delimiter = '\t'
        hgnc_fh = DSV.getHandle(hgnc_path)
        self.hgnc_dsv = DSV(self.dbm,
                            self.testdb,
                            hgnc_fh,
                            dtname=self.hgncTable,
                            delimiter=hgnc_delimiter,
                            comment=hgnc_comment)
        self.hgnc_dsv.create()
        self.hgnc_dsv.loadAll()
        self.hgnc_dsv.close()
        #
        self.withdrawn_pattern = '%~withdrawn'
        self.symbol_col = 'Approved Symbol'
        #
        # NOTE: we use unicode since we do not reparse immediately after querying
        self.ref_previous1 = {
            u'NTRK4': [u'DDR1'],
            u'PTK3A': [u'DDR1'],
            u'NEP': [u'DDR1'],
            u'CAK': [u'DDR1'],
            u'EDDR1': [u'DDR1'],
            u'C19orf72': [u'DCAF15'],
        }
        self.ref_synonyms1 = {
            u'A1': [u'RFC2'],
            u'BEHAB': [u'BCAN'],
            u'CD167': [u'DDR1'],
            u'CSPG7': [u'BCAN'],
            u'DRC3': [u'EPS8L1'],
            u'FLJ20258': [u'EPS8L1'],
            u'MGC13038': [u'BCAN'],
            u'MGC23164': [u'EPS8L1'],
            u'MGC4642': [u'EPS8L1'],
            u'MGC99481': [u'DCAF15'],
            u'RFC40': [u'RFC2'],
            u'RTK6': [u'DDR1']
        }

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_correctHGNCApprovedSymbols1(self):
        correctHGNCApprovedSymbols(self.hgnc_dsv)
        # check that no symbol has '~withdrawn' suffix
        c = self.hgnc_dsv.db.cursor()
        st = 'select "%s" from %s where "%s" like "%s"' % (
            self.symbol_col, self.hgncTable, self.symbol_col,
            self.withdrawn_pattern)
        c.execute(st)
        res = list([r for r in c])
        self.assertEqual([], res)
        c.close()

    def test_generateHGNCPreviousSymbols1(self):
        previous_dt = generateHGNCPreviousSymbols(self.hgnc_dsv, self.testdb)
        res = previous_dt.getAll(as_dict=True, dict_on_rows=True)
        self.assertEqual(self.ref_previous1, res)

    def test_generateHGNCSynonyms1(self):
        synonyms_dt = generateHGNCSynonyms(self.hgnc_dsv, self.testdb)
        res = synonyms_dt.getAll(as_dict=True, dict_on_rows=True)
        self.assertEqual(self.ref_synonyms1, res)
Exemplo n.º 23
0
class TestLabels1(unittest.TestCase):

    def setUp(self):
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.sample_data_root = self.test_write_root
        self.testdb = 'DB1'
        self.dbm = DBManager(self.sample_data_root)
        #
        self.test_dtname1 = 'LABELS1'
        self.lab1_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'labels1.dsv'))
        self.lab1_fh = DSV.getHandle(self.lab1_dsv_path, 'rb')
        self.lab1_dsv = DSV(self.dbm, self.testdb, self.lab1_fh, dtname=self.test_dtname1)
        self.lab1_dsv.create()
        self.lab1_dsv.loadAll()
        self.lab1_dsv.close()
        self.lab1_cnt = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1', 'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1', }
        self.lab1_samples1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
        self.lab1_resp1 = ['1', '1', '1', '1', '-1', '-1', '-1', '-1']
        self.lab1_samples_resp1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
        self.lab1_samples2 = ['B4', 'B3', 'B2', 'B1', 'A4', 'A3', 'A2', 'A1']
        self.lab1_resp2 = ['-1', '-1', '-1', '-1', '1', '1', '1', '1']
        self.lab1_samples_resp2 = ['B4', 'B3', 'B2', 'B1', 'A4', 'A3', 'A2', 'A1']
        self.lab1_samples3 = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3', 'A4', 'B4']
        self.lab1_resp3 = ['1', '-1', '1', '-1', '1', '-1', '1', '-1']
        self.lab1_samples_resp3 = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3', 'A4', 'B4']
        self.lab1_samples4 = ['A1', 'B1', 'B4', 'A3']
        self.lab1_resp4 = ['1', '-1', '-1', '1']
        self.lab1_samples_resp4 = ['A1', 'B1', 'B4', 'A3']
        self.lab1_samples5 = ['A1', 'B1', None, 'A3']
        self.lab1_resp5 = ['1', '-1', '1']
        self.lab1_samples_resp5 = ['A1', 'B1', 'A3']
        self.lab1_samples6 = ['A1', 'XXX1', 'B4' 'QQQ7546dsfsdfs453']
        self.lab1_resp6 = ['1']
        self.lab1_samples_resp6 = ['A1']
        #
        self.test_dtname2 = 'LABELS2'
        self.lab2_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'labels2.dsv'))
        self.lab2_fh = DSV.getHandle(self.lab2_dsv_path, 'rb')
        self.lab2_dsv = DSV(self.dbm, self.testdb, self.lab2_fh, dtname=self.test_dtname2)
        self.lab2_dsv.create()
        self.lab2_dsv.loadAll()
        self.lab2_dsv.close()
        self.lab2_cntN = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1',
                       'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1',
                       'PP': '0', 'QQ': '0', 'RR': '0', 'SS': '0'}
        self.lab2_cntY = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1',
                        'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1'}
        self.lab2_samples_order1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
        self.lab2_samples_resp1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
        self.lab2_samples_order2 = ['A1', 'A2', 'QQ', 'A3', 'A4', 'SS', 'B1', 'B2', 'B3', 'RR', 'B4', 'PP']
        self.lab2_samples_resp2 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' % (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' % (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        lab = Labels(self.lab1_dsv)
        self.assertEqual(self.lab1_cnt, lab.labels)
        self.assertEqual('0', lab.unused_sample_label)

    def test_init2(self):
        lab = Labels(self.lab1_dsv, unused_sample_label='XXX')
        self.assertEqual('XXX', lab.unused_sample_label)
        self.assertEqual(self.lab1_cnt, lab.labels)

    def test_getLabels1(self):
        lab = Labels(self.lab1_dsv)
        resp1 = lab.getLabels(self.lab1_samples1)
        self.assertEqual(self.lab1_resp1, resp1)
        resp2 = lab.getLabels(self.lab1_samples2)
        self.assertEqual(self.lab1_resp2, resp2)
        resp3 = lab.getLabels(self.lab1_samples3)
        self.assertEqual(self.lab1_resp3, resp3)
        resp4 = lab.getLabels(self.lab1_samples4)
        self.assertEqual(self.lab1_resp4, resp4)
        resp5 = lab.getLabels(self.lab1_samples5)
        self.assertEqual(self.lab1_resp5, resp5)
        resp6 = lab.getLabels(self.lab1_samples6)
        self.assertEqual(self.lab1_resp6, resp6)

    def test_getLabels2(self):
        lab = Labels(self.lab1_dsv)
        resp1 = lab.getLabels(self.lab1_samples1, as_array=True)
        num1 = np.array([float(l) for l in self.lab1_resp1])
        np.testing.assert_array_equal(resp1, num1)
        resp2 = lab.getLabels(self.lab1_samples2, as_array=True)
        num2 = np.array([float(l) for l in self.lab1_resp2])
        np.testing.assert_array_equal(resp2, num2)
        resp3 = lab.getLabels(self.lab1_samples3, as_array=True)
        num3 = np.array([float(l) for l in self.lab1_resp3])
        np.testing.assert_array_equal(resp3, num3)
        resp4 = lab.getLabels(self.lab1_samples4, as_array=True)
        num4 = np.array([float(l) for l in self.lab1_resp4])
        np.testing.assert_array_equal(resp4, num4)
        resp5 = lab.getLabels(self.lab1_samples5, as_array=True)
        num5 = np.array([float(l) for l in self.lab1_resp5])
        np.testing.assert_array_equal(resp5, num5)
        resp6 = lab.getLabels(self.lab1_samples6, as_array=True)
        num6 = np.array([float(l) for l in self.lab1_resp6])
        np.testing.assert_array_equal(resp6, num6)

    def test_getLabels3(self):
        lab = Labels(self.lab2_dsv)
        self.assertNotEqual(self.lab2_cntN, lab.labels)
        self.assertEqual(self.lab2_cntY, lab.labels)
        resp1 = lab.getLabels(self.lab1_samples1)
        self.assertEqual(self.lab1_resp1, resp1)
        resp2 = lab.getLabels(self.lab1_samples2)
        self.assertEqual(self.lab1_resp2, resp2)
        resp3 = lab.getLabels(self.lab1_samples3)
        self.assertEqual(self.lab1_resp3, resp3)
        resp4 = lab.getLabels(self.lab1_samples4)
        self.assertEqual(self.lab1_resp4, resp4)
        resp5 = lab.getLabels(self.lab1_samples5)
        self.assertEqual(self.lab1_resp5, resp5)
        resp6 = lab.getLabels(self.lab1_samples6)
        self.assertEqual(self.lab1_resp6, resp6)

    def test_getLabels4(self):
        lab = Labels(self.lab2_dsv)
        resp1 = lab.getLabels(self.lab1_samples1, as_array=True)
        num1 = np.array([float(l) for l in self.lab1_resp1])
        np.testing.assert_array_equal(resp1, num1)
        resp2 = lab.getLabels(self.lab1_samples2, as_array=True)
        num2 = np.array([float(l) for l in self.lab1_resp2])
        np.testing.assert_array_equal(resp2, num2)
        resp3 = lab.getLabels(self.lab1_samples3, as_array=True)
        num3 = np.array([float(l) for l in self.lab1_resp3])
        np.testing.assert_array_equal(resp3, num3)
        resp4 = lab.getLabels(self.lab1_samples4, as_array=True)
        num4 = np.array([float(l) for l in self.lab1_resp4])
        np.testing.assert_array_equal(resp4, num4)
        resp5 = lab.getLabels(self.lab1_samples5, as_array=True)
        num5 = np.array([float(l) for l in self.lab1_resp5])
        np.testing.assert_array_equal(resp5, num5)
        resp6 = lab.getLabels(self.lab1_samples6, as_array=True)
        num6 = np.array([float(l) for l in self.lab1_resp6])
        np.testing.assert_array_equal(resp6, num6)

    def test_getSamples1(self):
        lab1 = Labels(self.lab1_dsv)
        samples1 = lab1.getSamples(self.lab1_samples1)
        self.assertEqual(self.lab1_samples_resp1, samples1)
        samples2 = lab1.getSamples(self.lab1_samples2)
        self.assertEqual(self.lab1_samples_resp2, samples2)
        samples3 = lab1.getSamples(self.lab1_samples3)
        self.assertEqual(self.lab1_samples_resp3, samples3)
        samples4 = lab1.getSamples(self.lab1_samples4)
        self.assertEqual(self.lab1_samples_resp4, samples4)
        samples5 = lab1.getSamples(self.lab1_samples5)
        self.assertEqual(self.lab1_samples_resp5, samples5)
        samples6 = lab1.getSamples(self.lab1_samples6)
        self.assertEqual(self.lab1_samples_resp6, samples6)

    def test_getSamples2(self):
        lab2 = Labels(self.lab2_dsv)
        samples1 = lab2.getSamples(self.lab2_samples_order1)
        self.assertEqual(self.lab2_samples_resp1, samples1)
        samples2 = lab2.getSamples(self.lab2_samples_order2)
        self.assertEqual(self.lab2_samples_resp2, samples2)
Exemplo n.º 24
0
class TestPKDrivenDBDataManager1(unittest.TestCase):
    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        # ssdata.dsv
        self.ssdata_dsv_path = os.path.abspath(
            os.path.join(self.test_data_root, 'ssdata.dsv'))
        self.ssdata_comment = '#'
        ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
        self.ssdata_dsv1 = DSV(self.dbm,
                               self.testdb,
                               ssdata_dsv_fh,
                               dtname=self.test_dtname,
                               comment=self.ssdata_comment)
        self.ssdata_dsv1.create()
        self.ssdata_dsv1.loadAll()
        self.ssdata_dsv1.close()
        self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
        # pkcidmap
        self.pkc2id1 = {
            'PKC1': ('R1', 'R2'),
            'PKC2': ('R3', ),
            'PKC3': ('R4', 'R5')
        }
        self.pkc1 = ('PKC1', 'PKC2', 'PKC3')
        self.ss_cols1 = '*'
        self.ref_ss1 = [
            ({
                'pkcID': 'PKC1',
                'dtable': self.ssdata_dsv1,
                'rows': ['R1', 'R2'],
                'cols': list(self.ssdata_samples)
            }, None),
            ({
                'pkcID': 'PKC2',
                'dtable': self.ssdata_dsv1,
                'rows': ['R3'],
                'cols': list(self.ssdata_samples)
            }, None),
            ({
                'pkcID': 'PKC3',
                'dtable': self.ssdata_dsv1,
                'rows': ['R4', 'R5'],
                'cols': list(self.ssdata_samples)
            }, None),
        ]
        self.ss_cols2 = ('S1', 'S4', 'S5')
        self.ref_ss2 = [
            ({
                'pkcID': 'PKC1',
                'dtable': self.ssdata_dsv1,
                'rows': ['R1', 'R2'],
                'cols': list(self.ss_cols2)
            }, None),
            ({
                'pkcID': 'PKC2',
                'dtable': self.ssdata_dsv1,
                'rows': ['R3'],
                'cols': list(self.ss_cols2)
            }, None),
            ({
                'pkcID': 'PKC3',
                'dtable': self.ssdata_dsv1,
                'rows': ['R4', 'R5'],
                'cols': list(self.ss_cols2)
            }, None),
        ]
        self.pkc2 = ('PKC3', 'PKC1')
        self.ref_ss3 = [
            ({
                'pkcID': 'PKC3',
                'dtable': self.ssdata_dsv1,
                'rows': ['R4', 'R5'],
                'cols': list(self.ss_cols2)
            }, None),
            ({
                'pkcID': 'PKC1',
                'dtable': self.ssdata_dsv1,
                'rows': ['R1', 'R2'],
                'cols': list(self.ss_cols2)
            }, None),
        ]
        self.ref_ss4 = [
            (None,
             numpy.array([
                 [2.44753543273, 42.9497086717, 30.8331998765],
                 [42.1888598933, 39.1743921225, 15.9744094108],
             ])),
            (None,
             numpy.array([
                 [16.5734780715, 14.8233987496, 21.7385342744],
                 [60.0958378228, 98.4321570519, 71.9193619126],
             ])),
        ]
        # for categorization tests
        self.pkc2id2 = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
        self.pkc3 = ('PKC1', 'PKC2')
        self.size_thr = 3
        self.cat1 = SubsetSizeCategorizer(self.size_thr)
        self.exp_categories1 = ['>', '<=']
        self.cat2 = NullCategorizer()
        self.exp_categories2 = [self.cat2.NULL] * len(self.pkc3)

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        self.assertEqual(self.pkc2id1, pkdm.pkcidmap.pkc2emid)
        self.assertSequenceEqual(self.ssdata_samples, pkdm.all_samples)

    def test_getSubset1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        ss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols1,
                           get_ssinfo=True,
                           get_dataset=False) for pkc in self.pkc1
        ]
        self.assertEqual(self.ref_ss1, ss)

    def test_getSubset2(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        ss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=True,
                           get_dataset=False) for pkc in self.pkc1
        ]
        self.assertEqual(self.ref_ss2, ss)

    def test_getSubset3(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        ss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=True,
                           get_dataset=False) for pkc in self.pkc2
        ]
        self.assertEqual(self.ref_ss3, ss)

    def test_getSubset4(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        ss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=False,
                           get_dataset=True) for pkc in self.pkc2
        ]
        for refss, actss in zip(self.ref_ss4, ss):
            self.assertEqual(refss[0], actss[0])
            numpy.testing.assert_array_equal(refss[1], actss[1].array)

    def test_categorizeSubset1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id2))
        ss_dss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=False,
                           get_dataset=True)[1] for pkc in self.pkc3
        ]
        ss_categories = [
            PKDrivenDBDataManager.categorizeSubset(ss_ds, self.cat1)
            for ss_ds in ss_dss
        ]
        self.assertSequenceEqual(self.exp_categories1, ss_categories)

    def test_categorizeSubset2(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id2))
        ss_dss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=False,
                           get_dataset=True)[1] for pkc in self.pkc3
        ]
        ss_categories = [
            PKDrivenDBDataManager.categorizeSubset(ss_ds, self.cat2)
            for ss_ds in ss_dss
        ]
        self.assertSequenceEqual(self.exp_categories2, ss_categories)
Exemplo n.º 25
0
class TestPKDrivenDBSubsetHierarchy1(unittest.TestCase):
    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        # ssdata.dsv
        self.ssdata_dsv_path = os.path.abspath(
            os.path.join(self.test_data_root, 'ssdata.dsv'))
        self.ssdata_comment = '#'
        ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
        self.ssdata_dsv1 = DSV(self.dbm,
                               self.testdb,
                               ssdata_dsv_fh,
                               dtname=self.test_dtname,
                               comment=self.ssdata_comment)
        self.ssdata_dsv1.create()
        self.ssdata_dsv1.loadAll()
        self.ssdata_dsv1.close()
        self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
        # hierarchy tests
        self.pkc2id = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
        self.pkc = ('PKC1', 'PKC2')
        self.size_thr1 = 2
        self.cat1 = SubsetSizeCategorizer(self.size_thr1, ID='Cat1')
        self.cat1_uniq_le = self.cat1.uniquifyCategory(
            self.cat1.ROW_SIZE_LESSER)
        self.cat1_uniq_gt = self.cat1.uniquifyCategory(
            self.cat1.ROW_SIZE_GREATER)
        self.size_thr2 = 3
        self.cat2 = SubsetSizeCategorizer(self.size_thr2, ID='Cat2')
        self.cat2_uniq_le = self.cat2.uniquifyCategory(
            self.cat2.ROW_SIZE_LESSER)
        self.cat2_uniq_gt = self.cat2.uniquifyCategory(
            self.cat2.ROW_SIZE_GREATER)
        self.size_thr3 = 0
        self.cat3 = SubsetSizeCategorizer(self.size_thr3, ID='Cat3')
        self.cat3_uniq_le = self.cat3.uniquifyCategory(
            self.cat3.ROW_SIZE_LESSER)
        self.cat3_uniq_gt = self.cat3.uniquifyCategory(
            self.cat3.ROW_SIZE_GREATER)
        self.cinst = {
            'Cat1': self.cat1,
            'Cat2': self.cat2,
            'Cat3': self.cat3,
        }
        self.cmap1 = ['Cat1', 'Cat2', 'Cat3']
        self.cmap2 = ['Cat3', 'Cat1', 'Cat2']
        self.symbols = list(self.pkc)

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id))
        pkdss = PKDrivenDBSubsetHierarchy(pkdm, self.ssdata_samples)
        pkdss.build(self.cmap1, self.cinst, self.symbols)
        # ['Cat1', 'Cat2', 'Cat3']
        expected_hierarchy = dict()
        expected_hierarchy.update({None: self.cat1.id})
        expected_hierarchy.update({self.cat1_uniq_le: self.cat2.id})
        expected_hierarchy.update({self.cat1_uniq_gt: self.cat2.id})
        expected_hierarchy.update({self.cat2_uniq_le: self.cat3.id})
        expected_hierarchy.update({self.cat2_uniq_gt: self.cat3.id})
        expected_hierarchy.update({self.cat3_uniq_gt: None})
        expected_hierarchy.update({self.cat3_uniq_le: None})
        self.assertEqual(expected_hierarchy, pkdss.hierarchy)
        #            [PKC1 PKC2]
        # Cat1  [PKC1]>2     [PKC2]<=2
        # Cat2  [PKC1]>3     [PKC2]<=3
        # Cat3  [PKC1]>0     [PKC2]>0
        expected_symboltree = dict()
        expected_symboltree.update(
            {None: {
                self.cat1_uniq_gt: ['PKC1'],
                self.cat1_uniq_le: ['PKC2']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_gt: {
                self.cat2_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_le: {
                self.cat2_uniq_le: ['PKC2']
            }})
        expected_symboltree.update(
            {self.cat2_uniq_gt: {
                self.cat3_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat2_uniq_le: {
                self.cat3_uniq_gt: ['PKC2']
            }})
        self.assertEqual(expected_symboltree, pkdss.symboltree)

    def test_init2(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id))
        pkdss = PKDrivenDBSubsetHierarchy(pkdm, self.ssdata_samples)
        pkdss.build(self.cmap2, self.cinst, self.symbols)
        # ['Cat3', 'Cat1', 'Cat2']
        expected_hierarchy = dict()
        expected_hierarchy.update({None: self.cat3.id})
        expected_hierarchy.update({self.cat3_uniq_le: self.cat1.id})
        expected_hierarchy.update({self.cat3_uniq_gt: self.cat1.id})
        expected_hierarchy.update({self.cat1_uniq_le: self.cat2.id})
        expected_hierarchy.update({self.cat1_uniq_gt: self.cat2.id})
        expected_hierarchy.update({self.cat2_uniq_gt: None})
        expected_hierarchy.update({self.cat2_uniq_le: None})
        self.assertEqual(expected_hierarchy, pkdss.hierarchy)
        #                   [PKC1 PKC2]
        # Cat3      [PKC1 PKC2]>0      []<=0
        # Cat1    [PKC1]>2  [PKC2]<=2
        # Cat2    [PKC1]>3  [PKC2]<=3
        expected_symboltree = dict()
        expected_symboltree.update(
            {None: {
                self.cat3_uniq_gt: ['PKC1', 'PKC2']
            }})
        expected_symboltree.update({
            self.cat3_uniq_gt: {
                self.cat1_uniq_gt: ['PKC1'],
                self.cat1_uniq_le: ['PKC2']
            }
        })
        expected_symboltree.update(
            {self.cat1_uniq_gt: {
                self.cat2_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_le: {
                self.cat2_uniq_le: ['PKC2']
            }})
        self.assertEqual(expected_symboltree, pkdss.symboltree)