Пример #1
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_cols = ('A', 'B', 'C')
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     # num.dsv
     self.num_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'num.dsv'))
     num_cols = ['S%d' % n for n in range(91)]
     self.num_dsv_actual_header = ['']
     self.num_dsv_actual_header.extend(num_cols)
     self.num_dsv_desired_header = [DSV_DEFAULT_ID_COLUMN]
     self.num_dsv_desired_header.extend(num_cols)
     self.num_rows = ['V1', 'V2']
     self.num_column_3 = ['8.08785988003525', '2.37310583895584']
     # anno.dsv
     self.anno_dsv_path = os.path.abspath('%s/%s' % (self.test_data_root, 'anno.dsv'))
     self.anno_comment = '#'
     self.anno_header = ['ID', 'GB_ACC', 'SPOT_ID', 'Species Scientific Name', 'Annotation Date', 'Sequence Type',
                         'Sequence Source', 'Target Description', 'Representative Public ID', 'Gene Title', 'Gene Symbol',
                         'ENTREZ_GENE_ID', 'RefSeq Transcript ID', 'Gene Ontology Biological Process',
                         'Gene Ontology Cellular Component', 'Gene Ontology Molecular Function']
     self.anno_rows = ['1007_s_at', '1053_at']
     self.anno_columns = ['GB_ACC', 'Gene Symbol']
     self.anno_columns_dict = {'GB_ACC' : ['U48705', 'M87338'], 'Gene Symbol' : ['DDR1', 'RFC2']}
Пример #2
0
 def test_DBManager5(self):
     dbm = DBManager(self.test_write_root)
     db = dbm.memdb
     self.assertIn('memdb', dbm.db)
     self.assertIn('memdb', dbm.db_loc)
     self.assertEqual(':memory:', dbm.getDBloc('memdb'))
     cs = db.cursor()
     cs.execute('create table A(a TEXT);')
     cs.execute('drop table A;')
     cs.close()
     dbm.close()
Пример #3
0
 def setUp(self):
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_cols = ('A', 'B', 'C')
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     self.dt1 = DBTable(self.dbm,
                        self.testdb,
                        self.test_cols,
                        name=self.test_dtname)
     self.dt1.create()
     self.dt1.load(self.__gen1())
     self.rcs = self.dt1.db.cursor()
Пример #4
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_cols = ('A', 'B', 'C')
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     # num.dsv
     self.num_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'num.dsv'))
     num_cols = ['S%d' % n for n in range(91)]
     self.num_dsv_actual_header = ['']
     self.num_dsv_actual_header.extend(num_cols)
     self.num_dsv_desired_header = [DSV_DEFAULT_ID_COLUMN]
     self.num_dsv_desired_header.extend(num_cols)
Пример #5
0
 def setUp(self):
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.sample_data_root = self.test_write_root
     self.testdb = 'DB1'
     self.dbm = DBManager(self.sample_data_root)
     #
     self.test_dtname1 = 'LABELS1'
     self.lab1_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'labels1.dsv'))
     self.lab1_fh = DSV.getHandle(self.lab1_dsv_path, 'rb')
     self.lab1_dsv = DSV(self.dbm, self.testdb, self.lab1_fh, dtname=self.test_dtname1)
     self.lab1_dsv.create()
     self.lab1_dsv.loadAll()
     self.lab1_dsv.close()
     self.lab1_cnt = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1', 'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1', }
     self.lab1_samples1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
     self.lab1_resp1 = ['1', '1', '1', '1', '-1', '-1', '-1', '-1']
     self.lab1_samples_resp1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
     self.lab1_samples2 = ['B4', 'B3', 'B2', 'B1', 'A4', 'A3', 'A2', 'A1']
     self.lab1_resp2 = ['-1', '-1', '-1', '-1', '1', '1', '1', '1']
     self.lab1_samples_resp2 = ['B4', 'B3', 'B2', 'B1', 'A4', 'A3', 'A2', 'A1']
     self.lab1_samples3 = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3', 'A4', 'B4']
     self.lab1_resp3 = ['1', '-1', '1', '-1', '1', '-1', '1', '-1']
     self.lab1_samples_resp3 = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3', 'A4', 'B4']
     self.lab1_samples4 = ['A1', 'B1', 'B4', 'A3']
     self.lab1_resp4 = ['1', '-1', '-1', '1']
     self.lab1_samples_resp4 = ['A1', 'B1', 'B4', 'A3']
     self.lab1_samples5 = ['A1', 'B1', None, 'A3']
     self.lab1_resp5 = ['1', '-1', '1']
     self.lab1_samples_resp5 = ['A1', 'B1', 'A3']
     self.lab1_samples6 = ['A1', 'XXX1', 'B4' 'QQQ7546dsfsdfs453']
     self.lab1_resp6 = ['1']
     self.lab1_samples_resp6 = ['A1']
     #
     self.test_dtname2 = 'LABELS2'
     self.lab2_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'labels2.dsv'))
     self.lab2_fh = DSV.getHandle(self.lab2_dsv_path, 'rb')
     self.lab2_dsv = DSV(self.dbm, self.testdb, self.lab2_fh, dtname=self.test_dtname2)
     self.lab2_dsv.create()
     self.lab2_dsv.loadAll()
     self.lab2_dsv.close()
     self.lab2_cntN = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1',
                    'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1',
                    'PP': '0', 'QQ': '0', 'RR': '0', 'SS': '0'}
     self.lab2_cntY = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1',
                     'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1'}
     self.lab2_samples_order1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
     self.lab2_samples_resp1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
     self.lab2_samples_order2 = ['A1', 'A2', 'QQ', 'A3', 'A4', 'SS', 'B1', 'B2', 'B3', 'RR', 'B4', 'PP']
     self.lab2_samples_resp2 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
Пример #6
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     # ssdata.dsv
     self.ssdata_dsv_path = os.path.abspath(
         os.path.join(self.test_data_root, 'ssdata.dsv'))
     self.ssdata_comment = '#'
     ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
     self.ssdata_dsv1 = DSV(self.dbm,
                            self.testdb,
                            ssdata_dsv_fh,
                            dtname=self.test_dtname,
                            comment=self.ssdata_comment)
     self.ssdata_dsv1.create()
     self.ssdata_dsv1.loadAll()
     self.ssdata_dsv1.close()
     self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
     # hierarchy tests
     self.pkc2id = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
     self.pkc = ('PKC1', 'PKC2')
     self.size_thr1 = 2
     self.cat1 = SubsetSizeCategorizer(self.size_thr1, ID='Cat1')
     self.cat1_uniq_le = self.cat1.uniquifyCategory(
         self.cat1.ROW_SIZE_LESSER)
     self.cat1_uniq_gt = self.cat1.uniquifyCategory(
         self.cat1.ROW_SIZE_GREATER)
     self.size_thr2 = 3
     self.cat2 = SubsetSizeCategorizer(self.size_thr2, ID='Cat2')
     self.cat2_uniq_le = self.cat2.uniquifyCategory(
         self.cat2.ROW_SIZE_LESSER)
     self.cat2_uniq_gt = self.cat2.uniquifyCategory(
         self.cat2.ROW_SIZE_GREATER)
     self.size_thr3 = 0
     self.cat3 = SubsetSizeCategorizer(self.size_thr3, ID='Cat3')
     self.cat3_uniq_le = self.cat3.uniquifyCategory(
         self.cat3.ROW_SIZE_LESSER)
     self.cat3_uniq_gt = self.cat3.uniquifyCategory(
         self.cat3.ROW_SIZE_GREATER)
     self.cinst = {
         'Cat1': self.cat1,
         'Cat2': self.cat2,
         'Cat3': self.cat3,
     }
     self.cmap1 = ['Cat1', 'Cat2', 'Cat3']
     self.cmap2 = ['Cat3', 'Cat1', 'Cat2']
     self.symbols = list(self.pkc)
Пример #7
0
 def test_DBManager2(self):
     dbm = DBManager(self.test_write_root)
     dbm.getDB('TestDB1')
     db1_path = dbm.getDBloc('TestDB1')
     ref1_path = os.path.abspath(
         os.path.join(self.test_write_root, 'TestDB1.db'))
     self.assertEqual(ref1_path, db1_path)
     db_created = dbm.getDB('TestDB2')
     db_opened = dbm.getDB('TestDB2')
     self.assertEqual(db_created, db_opened)
     dbm.close()
Пример #8
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.hgncTable = 'HGNC'
     self.dbm = DBManager(self.test_write_root)
     #
     hgnc_path = os.path.abspath(
         os.path.join(self.test_data_root, 'hgnc_sample_2.txt'))
     hgnc_comment = '#'
     hgnc_delimiter = '\t'
     hgnc_fh = DSV.getHandle(hgnc_path)
     self.hgnc_dsv = DSV(self.dbm,
                         self.testdb,
                         hgnc_fh,
                         dtname=self.hgncTable,
                         delimiter=hgnc_delimiter,
                         comment=hgnc_comment)
     self.hgnc_dsv.create()
     self.hgnc_dsv.loadAll()
     self.hgnc_dsv.close()
     #
     self.withdrawn_pattern = '%~withdrawn'
     self.symbol_col = 'Approved Symbol'
     #
     # NOTE: we use unicode since we do not reparse immediately after querying
     self.ref_previous1 = {
         u'NTRK4': [u'DDR1'],
         u'PTK3A': [u'DDR1'],
         u'NEP': [u'DDR1'],
         u'CAK': [u'DDR1'],
         u'EDDR1': [u'DDR1'],
         u'C19orf72': [u'DCAF15'],
     }
     self.ref_synonyms1 = {
         u'A1': [u'RFC2'],
         u'BEHAB': [u'BCAN'],
         u'CD167': [u'DDR1'],
         u'CSPG7': [u'BCAN'],
         u'DRC3': [u'EPS8L1'],
         u'FLJ20258': [u'EPS8L1'],
         u'MGC13038': [u'BCAN'],
         u'MGC23164': [u'EPS8L1'],
         u'MGC4642': [u'EPS8L1'],
         u'MGC99481': [u'DCAF15'],
         u'RFC40': [u'RFC2'],
         u'RTK6': [u'DDR1']
     }
Пример #9
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.annoTable = 'MA_GPL_ANNO'
     self.dbm = DBManager(self.test_write_root)
     # three first and three last (non-control)
     self.test_probes = [
         '1007_s_at', '1053_at', '117_at', '91826_at', '91920_at',
         '91952_at'
     ]
     # three last control probes
     self.test_ctrl_probes = [
         'AFFX-TrpnX-3_at', 'AFFX-TrpnX-5_at', 'AFFX-TrpnX-M_at'
     ]
     # reference bwd mapping
     self.test_bwd = {
         '1007_s_at':
         set([
             'GO:0000166', 'GO:0004672', 'GO:0004713', 'GO:0004714',
             'GO:0004872', 'GO:0005515', 'GO:0005524', 'GO:0005887',
             'GO:0006468', 'GO:0007155', 'GO:0007169', 'GO:0016020',
             'GO:0016021', 'GO:0016301', 'GO:0016740'
         ]),
         '1053_at':
         set([
             'GO:0000166', 'GO:0003677', 'GO:0003689', 'GO:0005515',
             'GO:0005524', 'GO:0005634', 'GO:0005654', 'GO:0005663',
             'GO:0006260', 'GO:0006297', 'GO:0017111'
         ]),
         '117_at':
         set(['GO:0000166', 'GO:0005524', 'GO:0006950', 'GO:0006986']),
         '91826_at':
         set(['GO:0004872', 'GO:0005737', 'GO:0016301']),
         '91920_at':
         set([
             'GO:0005488', 'GO:0005529', 'GO:0005540', 'GO:0005576',
             'GO:0005578', 'GO:0005634', 'GO:0005730', 'GO:0005737',
             'GO:0005739', 'GO:0007155', 'GO:0016020', 'GO:0016021',
             'GO:0031225'
         ]),
         '91952_at':
         set(['GO:0006511']),
     }
Пример #10
0
 def _createDB(self, env):
     rootsm = env.var('rootsm')
     rloc = env.var('root_output_location')
     dbm_location_part = env.var('dbm_location')
     dbloc = rootsm.sublocation_separator.join([rloc, dbm_location_part])
     rootsm.createLocation(dbloc)
     env.addVar('dbm_location_id', dbloc)
     dblocpath = rootsm.getLocation(dbloc)
     dbm = DBManager(arbitrary_data_root=dblocpath)
     env.addVar('dbm', dbm)
     env.logger.info('Created DB manager in %s with root DB ID: %s' %
                     (dblocpath, dbm.rootdb_key))
Пример #11
0
    def __init__(self, name=None, root_path=None, create_dbm=False):
        r"""
Parameters
----------
name : string/None
    name of the current instance; it will be used to identify all managed locations;
    if None, the name is generated randomly (UUID4)

root_path : string/None
    directory path that refers to the root of locations that will be managed by
    this instance; if None, default root path will be used ('~/.kdvs/')

create_dbm : boolean
    if True, default :class:`~kdvs.core.db.DBManager` instance will be created
    as well, rooted on specified root path; False by default

See Also
--------
uuid
os.path.expanduser
        """
        # ---- resolve instance name
        if name is None:
            self.name = uuid.uuid4().hex
        else:
            self.name = name
        # ---- resolve root_path
        self.def_root_path = os.path.expanduser('~/.%s/' % (SYSTEM_NAME_LC))
        if root_path is None:
            self.root_path = self.def_root_path
        else:
            self.root_path = root_path
        self.abs_root_path = os.path.abspath(self.root_path)
        # ---- check if root path is available and writable
        if not os.path.exists(self.abs_root_path):
            raise Error('Could not access root path %s for manager %s!' %
                        (quote(self.abs_root_path), quote(self.name)))
        if not self._check_path_writable(self.abs_root_path):
            raise Error('Could not write to root path %s of manager %s!' %
                        (quote(self.abs_root_path), quote(self.name)))
        # ---- setup locations management
        self.locations = {}
        self.sublocation_separator = SUBLOCATION_SEPARATOR
        # add ROOT location
        self.root_location_id = 'ROOT_%s' % self.name
        self.locations[self.root_location_id] = self.abs_root_path
        # ---- resolve DBM
        if create_dbm:
            # create default DBManager with file-based SQLite3 provider
            self.dbm = DBManager(self.root_path)
        else:
            self.dbm = None
Пример #12
0
class TestDTShelve1(unittest.TestCase):
    def setUp(self):
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.dbm = DBManager(self.test_write_root)

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        dtsh = DBShelve(self.dbm, self.testdb, None)
        self.assertSequenceEqual(DBSHELVE_TMPL['columns'],
                                 (dtsh.key, dtsh.val))
Пример #13
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.annoTable = 'ANNO_GPL'
     self.hgncTable = 'HGNC'
     self.dbm = DBManager(self.test_write_root)
     self.ref_fwd1 = {
         '': set(['AFFX-TrpnX-3_at', 'AFFX-TrpnX-5_at', 'AFFX-TrpnX-M_at']),
         'BCAN': set(['91920_at']),
         'DDR1': set(['1007_s_at']),
         'EPS8L1': set(['91826_at']),
         'HSPA6': set(['117_at']),
         'LOC90379': set(['91952_at']),
         'RFC2': set(['1053_at'])
     }
     self.ref_bwd1 = {
         '1007_s_at': set(['DDR1']),
         '1053_at': set(['RFC2']),
         '117_at': set(['HSPA6']),
         '91826_at': set(['EPS8L1']),
         '91920_at': set(['BCAN']),
         '91952_at': set(['LOC90379']),
         'AFFX-TrpnX-3_at': set(['']),
         'AFFX-TrpnX-5_at': set(['']),
         'AFFX-TrpnX-M_at': set([''])
     }
     self.ref_em2a = {
         '1007_s_at': ['DDR1', 'AFFX:U48705', 'U48705', '780', '', ''],
         '1053_at': ['RFC2', 'GB:M87338', 'M87338', '5982', '', ''],
         '117_at': ['HSPA6', 'AFFX:X51757', 'X51757', '3310', '', ''],
         '91826_at': ['EPS8L1', 'GB:AI219073', 'AI219073', '54869', '', ''],
         '91920_at': ['BCAN', 'GB:AI205180', 'AI205180', '63827', '', ''],
         '91952_at':
         ['LOC90379', 'GB:AI363375', 'AI363375', '90379', '', ''],
         'AFFX-TrpnX-3_at': ['', 'AFFX:AFFX-TrpnX-3', '', '', '', ''],
         'AFFX-TrpnX-5_at': ['', 'AFFX:AFFX-TrpnX-5', '', '', '', ''],
         'AFFX-TrpnX-M_at': ['', 'AFFX:AFFX-TrpnX-M', '', '', '', '']
     }
Пример #14
0
 def setUp(self):
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.dbm = DBManager(self.test_write_root)
Пример #15
0
class TestLabels1(unittest.TestCase):

    def setUp(self):
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.sample_data_root = self.test_write_root
        self.testdb = 'DB1'
        self.dbm = DBManager(self.sample_data_root)
        #
        self.test_dtname1 = 'LABELS1'
        self.lab1_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'labels1.dsv'))
        self.lab1_fh = DSV.getHandle(self.lab1_dsv_path, 'rb')
        self.lab1_dsv = DSV(self.dbm, self.testdb, self.lab1_fh, dtname=self.test_dtname1)
        self.lab1_dsv.create()
        self.lab1_dsv.loadAll()
        self.lab1_dsv.close()
        self.lab1_cnt = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1', 'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1', }
        self.lab1_samples1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
        self.lab1_resp1 = ['1', '1', '1', '1', '-1', '-1', '-1', '-1']
        self.lab1_samples_resp1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
        self.lab1_samples2 = ['B4', 'B3', 'B2', 'B1', 'A4', 'A3', 'A2', 'A1']
        self.lab1_resp2 = ['-1', '-1', '-1', '-1', '1', '1', '1', '1']
        self.lab1_samples_resp2 = ['B4', 'B3', 'B2', 'B1', 'A4', 'A3', 'A2', 'A1']
        self.lab1_samples3 = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3', 'A4', 'B4']
        self.lab1_resp3 = ['1', '-1', '1', '-1', '1', '-1', '1', '-1']
        self.lab1_samples_resp3 = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3', 'A4', 'B4']
        self.lab1_samples4 = ['A1', 'B1', 'B4', 'A3']
        self.lab1_resp4 = ['1', '-1', '-1', '1']
        self.lab1_samples_resp4 = ['A1', 'B1', 'B4', 'A3']
        self.lab1_samples5 = ['A1', 'B1', None, 'A3']
        self.lab1_resp5 = ['1', '-1', '1']
        self.lab1_samples_resp5 = ['A1', 'B1', 'A3']
        self.lab1_samples6 = ['A1', 'XXX1', 'B4' 'QQQ7546dsfsdfs453']
        self.lab1_resp6 = ['1']
        self.lab1_samples_resp6 = ['A1']
        #
        self.test_dtname2 = 'LABELS2'
        self.lab2_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'labels2.dsv'))
        self.lab2_fh = DSV.getHandle(self.lab2_dsv_path, 'rb')
        self.lab2_dsv = DSV(self.dbm, self.testdb, self.lab2_fh, dtname=self.test_dtname2)
        self.lab2_dsv.create()
        self.lab2_dsv.loadAll()
        self.lab2_dsv.close()
        self.lab2_cntN = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1',
                       'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1',
                       'PP': '0', 'QQ': '0', 'RR': '0', 'SS': '0'}
        self.lab2_cntY = {'A1': '1', 'A2': '1', 'A3': '1', 'A4': '1',
                        'B1': '-1', 'B2': '-1', 'B3': '-1', 'B4': '-1'}
        self.lab2_samples_order1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
        self.lab2_samples_resp1 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']
        self.lab2_samples_order2 = ['A1', 'A2', 'QQ', 'A3', 'A4', 'SS', 'B1', 'B2', 'B3', 'RR', 'B4', 'PP']
        self.lab2_samples_resp2 = ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4']

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' % (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' % (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        lab = Labels(self.lab1_dsv)
        self.assertEqual(self.lab1_cnt, lab.labels)
        self.assertEqual('0', lab.unused_sample_label)

    def test_init2(self):
        lab = Labels(self.lab1_dsv, unused_sample_label='XXX')
        self.assertEqual('XXX', lab.unused_sample_label)
        self.assertEqual(self.lab1_cnt, lab.labels)

    def test_getLabels1(self):
        lab = Labels(self.lab1_dsv)
        resp1 = lab.getLabels(self.lab1_samples1)
        self.assertEqual(self.lab1_resp1, resp1)
        resp2 = lab.getLabels(self.lab1_samples2)
        self.assertEqual(self.lab1_resp2, resp2)
        resp3 = lab.getLabels(self.lab1_samples3)
        self.assertEqual(self.lab1_resp3, resp3)
        resp4 = lab.getLabels(self.lab1_samples4)
        self.assertEqual(self.lab1_resp4, resp4)
        resp5 = lab.getLabels(self.lab1_samples5)
        self.assertEqual(self.lab1_resp5, resp5)
        resp6 = lab.getLabels(self.lab1_samples6)
        self.assertEqual(self.lab1_resp6, resp6)

    def test_getLabels2(self):
        lab = Labels(self.lab1_dsv)
        resp1 = lab.getLabels(self.lab1_samples1, as_array=True)
        num1 = np.array([float(l) for l in self.lab1_resp1])
        np.testing.assert_array_equal(resp1, num1)
        resp2 = lab.getLabels(self.lab1_samples2, as_array=True)
        num2 = np.array([float(l) for l in self.lab1_resp2])
        np.testing.assert_array_equal(resp2, num2)
        resp3 = lab.getLabels(self.lab1_samples3, as_array=True)
        num3 = np.array([float(l) for l in self.lab1_resp3])
        np.testing.assert_array_equal(resp3, num3)
        resp4 = lab.getLabels(self.lab1_samples4, as_array=True)
        num4 = np.array([float(l) for l in self.lab1_resp4])
        np.testing.assert_array_equal(resp4, num4)
        resp5 = lab.getLabels(self.lab1_samples5, as_array=True)
        num5 = np.array([float(l) for l in self.lab1_resp5])
        np.testing.assert_array_equal(resp5, num5)
        resp6 = lab.getLabels(self.lab1_samples6, as_array=True)
        num6 = np.array([float(l) for l in self.lab1_resp6])
        np.testing.assert_array_equal(resp6, num6)

    def test_getLabels3(self):
        lab = Labels(self.lab2_dsv)
        self.assertNotEqual(self.lab2_cntN, lab.labels)
        self.assertEqual(self.lab2_cntY, lab.labels)
        resp1 = lab.getLabels(self.lab1_samples1)
        self.assertEqual(self.lab1_resp1, resp1)
        resp2 = lab.getLabels(self.lab1_samples2)
        self.assertEqual(self.lab1_resp2, resp2)
        resp3 = lab.getLabels(self.lab1_samples3)
        self.assertEqual(self.lab1_resp3, resp3)
        resp4 = lab.getLabels(self.lab1_samples4)
        self.assertEqual(self.lab1_resp4, resp4)
        resp5 = lab.getLabels(self.lab1_samples5)
        self.assertEqual(self.lab1_resp5, resp5)
        resp6 = lab.getLabels(self.lab1_samples6)
        self.assertEqual(self.lab1_resp6, resp6)

    def test_getLabels4(self):
        lab = Labels(self.lab2_dsv)
        resp1 = lab.getLabels(self.lab1_samples1, as_array=True)
        num1 = np.array([float(l) for l in self.lab1_resp1])
        np.testing.assert_array_equal(resp1, num1)
        resp2 = lab.getLabels(self.lab1_samples2, as_array=True)
        num2 = np.array([float(l) for l in self.lab1_resp2])
        np.testing.assert_array_equal(resp2, num2)
        resp3 = lab.getLabels(self.lab1_samples3, as_array=True)
        num3 = np.array([float(l) for l in self.lab1_resp3])
        np.testing.assert_array_equal(resp3, num3)
        resp4 = lab.getLabels(self.lab1_samples4, as_array=True)
        num4 = np.array([float(l) for l in self.lab1_resp4])
        np.testing.assert_array_equal(resp4, num4)
        resp5 = lab.getLabels(self.lab1_samples5, as_array=True)
        num5 = np.array([float(l) for l in self.lab1_resp5])
        np.testing.assert_array_equal(resp5, num5)
        resp6 = lab.getLabels(self.lab1_samples6, as_array=True)
        num6 = np.array([float(l) for l in self.lab1_resp6])
        np.testing.assert_array_equal(resp6, num6)

    def test_getSamples1(self):
        lab1 = Labels(self.lab1_dsv)
        samples1 = lab1.getSamples(self.lab1_samples1)
        self.assertEqual(self.lab1_samples_resp1, samples1)
        samples2 = lab1.getSamples(self.lab1_samples2)
        self.assertEqual(self.lab1_samples_resp2, samples2)
        samples3 = lab1.getSamples(self.lab1_samples3)
        self.assertEqual(self.lab1_samples_resp3, samples3)
        samples4 = lab1.getSamples(self.lab1_samples4)
        self.assertEqual(self.lab1_samples_resp4, samples4)
        samples5 = lab1.getSamples(self.lab1_samples5)
        self.assertEqual(self.lab1_samples_resp5, samples5)
        samples6 = lab1.getSamples(self.lab1_samples6)
        self.assertEqual(self.lab1_samples_resp6, samples6)

    def test_getSamples2(self):
        lab2 = Labels(self.lab2_dsv)
        samples1 = lab2.getSamples(self.lab2_samples_order1)
        self.assertEqual(self.lab2_samples_resp1, samples1)
        samples2 = lab2.getSamples(self.lab2_samples_order2)
        self.assertEqual(self.lab2_samples_resp2, samples2)
Пример #16
0
 def test_DBManager1(self):
     dbm = DBManager(self.test_write_root)
     self.assertTrue(os.path.exists(self.rootdb_path))
     rootdb_loc = os.path.abspath(dbm.getDBloc(dbm.rootdb_key))
     self.assertEqual(self.rootdb_path, rootdb_loc)
     dbm.close()
Пример #17
0
class TestDBResult1(unittest.TestCase):
    def __gen1(self):
        nums = range(1, len(self.test_cols) + 1)
        for l in string.ascii_uppercase:
            yield tuple(["%s%s" % (l, n) for n in nums])

    def setUp(self):
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_cols = ('A', 'B', 'C')
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        self.dt1 = DBTable(self.dbm,
                           self.testdb,
                           self.test_cols,
                           name=self.test_dtname)
        self.dt1.create()
        self.dt1.load(self.__gen1())
        self.rcs = self.dt1.db.cursor()

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def testDBR_get1(self):
        self.rcs.execute('select * from %s' % self.test_dtname)
        dbr = DBResult(self.dt1, self.rcs)
        # get generator within cursor size limits
        res = list(dbr.get())
        ref_res = list(self.dt1.get())
        self.assertSequenceEqual(ref_res, res)

    def testDBR_get2(self):
        self.rcs.execute('select * from %s' % self.test_dtname)
        dbr = DBResult(self.dt1, self.rcs)
        # get all as iterable within cursor size limits
        res = dbr.getAll()
        ref_res = list(self.dt1.get())
        self.assertSequenceEqual(ref_res, res)

    def testDBR_get3(self):
        self.rcs.execute('select * from %s' % self.test_dtname)
        dbr = DBResult(self.dt1, self.rcs)
        # get all as iterable within cursor size limits
        res = dbr.getAll(as_dict=False, dict_on_rows=False)
        ref_res = list(self.dt1.get())
        self.assertSequenceEqual(ref_res, res)

    def testDBR_get4(self):
        self.rcs.execute('select * from %s' % self.test_dtname)
        dbr = DBResult(self.dt1, self.rcs)
        # get all as dict within cursor size limits, keyed on columns
        res = dbr.getAll(as_dict=True, dict_on_rows=False)
        ref_res = {}
        for ix, c in enumerate(self.test_cols):
            ref_res[c] = [
                u"%s%d" % (l, ix + 1) for l in string.ascii_uppercase
            ]
        self.assertDictEqual(ref_res, res)

    def testDBR_get5(self):
        self.rcs.execute('select * from %s' % self.test_dtname)
        dbr = DBResult(self.dt1, self.rcs)
        # get all as dict within cursor size limits, keyed on rows
        res = dbr.getAll(as_dict=True, dict_on_rows=True)
        ref_res = {}
        numsuffs = ["%d" % (i[0] + 1) for i in enumerate(self.test_cols)]
        for l in string.ascii_uppercase:
            key = u'%s%s' % (l, numsuffs[0])
            vls = [u'%s%s' % (l, ns) for ns in numsuffs[1:]]
            if l not in ref_res:
                ref_res[key] = vls
        self.assertDictEqual(ref_res, res)

    def testDBR_get6(self):
        # generate 100x load
        for _ in range(99):
            self.dt1.load(self.__gen1())
        self.rcs.execute('select * from %s' % self.test_dtname)
        # get all as iterable with limited cursor size, 26 internal loops
        dbr = DBResult(self.dt1, self.rcs, rowbufsize=100)
        # get all results at once
        res = list(dbr.get())
        numsuffs = ["%d" % (i[0] + 1) for i in enumerate(self.test_cols)]
        ref_res = []
        for i in range(100):
            for l in string.ascii_uppercase:
                tup = tuple([u'%s%s' % (l, ns) for ns in numsuffs])
                ref_res.append(tup)
        self.assertSequenceEqual(ref_res, res)

    def testDBR_get7(self):
        # generate 100x load
        for _ in range(99):
            self.dt1.load(self.__gen1())
        self.rcs.execute('select * from %s' % self.test_dtname)
        # get all as iterable with limited cursor size, 3 internal loops
        dbr = DBResult(self.dt1, self.rcs, rowbufsize=1000)
        # get all results at once
        res = list(dbr.get())
        numsuffs = ["%d" % (i[0] + 1) for i in enumerate(self.test_cols)]
        ref_res = []
        for i in range(100):
            for l in string.ascii_uppercase:
                tup = tuple([u'%s%s' % (l, ns) for ns in numsuffs])
                ref_res.append(tup)
        self.assertSequenceEqual(ref_res, res)

    def testDBR_get8(self):
        # generate 100x load
        for _ in range(99):
            self.dt1.load(self.__gen1())
        self.rcs.execute('select * from %s' % self.test_dtname)
        # get all as iterable with limited cursor size, 26 internal loops
        dbr = DBResult(self.dt1, self.rcs, rowbufsize=100)

        def __gen():
            numsuffs = ["%d" % (i[0] + 1) for i in enumerate(self.test_cols)]
            for i in range(100):
                for l in string.ascii_uppercase:
                    tup = tuple([u'%s%s' % (l, ns) for ns in numsuffs])
                    yield tup

        ref_gen = __gen()
        # iterate over single results
        for rtup in dbr.get():
            self.assertEqual(ref_gen.next(), rtup)
Пример #18
0
class TestGeneIDMapHGNCGPL1(unittest.TestCase):

    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.annoTable = 'ANNO_GPL'
        self.hgncTable = 'HGNC'
        self.dbm = DBManager(self.test_write_root)
        self.ref_fwd1 = {
                         '': set(['91952_at', 'AFFX-TrpnX-3_at', 'AFFX-TrpnX-5_at', 'AFFX-TrpnX-M_at']),
                         'BCAN': set(['91920_at']),
                         'DDR1': set(['1007_s_at']),
                         'EPS8L1': set(['91826_at']),
                         'HSPA6': set(['117_at']),
                         'RFC2': set(['1053_at'])
                         }
        self.ref_bwd1 = {
                         '1007_s_at': set(['DDR1']),
                         '1053_at': set(['RFC2']),
                         '117_at': set(['HSPA6']),
                         '91826_at': set(['EPS8L1']),
                         '91920_at': set(['BCAN']),
                         '91952_at': set(['']),
                         'AFFX-TrpnX-3_at': set(['']),
                         'AFFX-TrpnX-5_at': set(['']),
                         'AFFX-TrpnX-M_at': set([''])
                         }
        self.ref_em2a = {
                         '1007_s_at': ['DDR1',
                                       'AFFX:U48705',
                                       'U48705',
                                       '780',
                                       'ENSG00000204580',
                                       'NM_013994'],
                         '1053_at': ['RFC2',
                                     'GB:M87338',
                                     'M87338',
                                     '5982',
                                     'ENSG00000049541',
                                     'NM_181471'],
                         '117_at': ['HSPA6',
                                    'AFFX:X51757',
                                    'X51757',
                                    '3310',
                                    'ENSG00000173110',
                                    'NM_002155'],
                         '91826_at': ['EPS8L1',
                                      'GB:AI219073',
                                      'AI219073',
                                      '54869',
                                      '',
                                      'NM_017729'],
                         '91920_at': ['BCAN',
                                      'GB:AI205180',
                                      'AI205180',
                                      '63827',
                                      'ENSG00000132692',
                                      'NM_021948'],
                         '91952_at': ['',
                                      'GB:AI363375',
                                      'AI363375',
                                      '',
                                      '',
                                      ''],
                         'AFFX-TrpnX-3_at': ['', 'AFFX:AFFX-TrpnX-3', '', '', '', ''],
                         'AFFX-TrpnX-5_at': ['', 'AFFX:AFFX-TrpnX-5', '', '', '', ''],
                         'AFFX-TrpnX-M_at': ['', 'AFFX:AFFX-TrpnX-M', '', '', '', '']
                         }

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' % (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' % (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        geneidmap = GeneIDMapHGNCGPL()
        self.assertFalse(geneidmap.built)
        self.assertIsNone(geneidmap.dbt)

    def test_build1(self):
        geneidmap = GeneIDMapHGNCGPL()
        # GPL96.txt
        gpl_path = os.path.abspath(os.path.join(self.test_data_root, 'gpl96_sample.txt'))
        gpl_comment = '#'
        gpl_delimiter = '\t'
        gpl_fh = DSV.getHandle(gpl_path)
        gpl_dsv = DSV(self.dbm, self.testdb, gpl_fh, dtname=self.annoTable, delimiter=gpl_delimiter, comment=gpl_comment)
        gpl_dsv.create()
        gpl_dsv.loadAll()
        gpl_dsv.close()
        # HGNC.tsv
        hgnc_path = os.path.abspath(os.path.join(self.test_data_root, 'hgnc_sample.txt'))
        hgnc_comment = '#'
        hgnc_delimiter = '\t'
        hgnc_fh = DSV.getHandle(hgnc_path)
        hgnc_dsv = DSV(self.dbm, self.testdb, hgnc_fh, dtname=self.hgncTable, delimiter=hgnc_delimiter, comment=hgnc_comment)
        hgnc_dsv.create()
        hgnc_dsv.loadAll()
        hgnc_dsv.close()
        # build test map
        geneidmap.build(gpl_dsv, hgnc_dsv, self.testdb)
        self.assertTrue(geneidmap.built)
        self.assertIsInstance(geneidmap.dbt, DBTable)
        fwdmap = dict(geneidmap.gene2emid.getFwdMap())
        self.assertEqual(self.ref_fwd1, fwdmap)
        bwdmap = dict(geneidmap.gene2emid.getBwdMap())
        self.assertEqual(self.ref_bwd1, bwdmap)

    def test_em2annotation1(self):
        geneidmap = GeneIDMapHGNCGPL()
        # GPL96.txt
        gpl_path = os.path.abspath(os.path.join(self.test_data_root, 'gpl96_sample.txt'))
        gpl_comment = '#'
        gpl_delimiter = '\t'
        gpl_fh = DSV.getHandle(gpl_path)
        gpl_dsv = DSV(self.dbm, self.testdb, gpl_fh, dtname=self.annoTable, delimiter=gpl_delimiter, comment=gpl_comment)
        gpl_dsv.create()
        gpl_dsv.loadAll()
        gpl_dsv.close()
        # HGNC.tsv
        hgnc_path = os.path.abspath(os.path.join(self.test_data_root, 'hgnc_sample.txt'))
        hgnc_comment = '#'
        hgnc_delimiter = '\t'
        hgnc_fh = DSV.getHandle(hgnc_path)
        hgnc_dsv = DSV(self.dbm, self.testdb, hgnc_fh, dtname=self.hgncTable, delimiter=hgnc_delimiter, comment=hgnc_comment)
        hgnc_dsv.create()
        hgnc_dsv.loadAll()
        hgnc_dsv.close()
        # build test map
        geneidmap.build(gpl_dsv, hgnc_dsv, self.testdb)
        em2a = get_em2annotation(geneidmap.dbt)
        self.assertEqual(self.ref_em2a, em2a)
Пример #19
0
class TestDSV2(unittest.TestCase):

    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_cols = ('A', 'B', 'C')
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        # num.dsv
        self.num_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'num.dsv'))
        num_cols = ['S%d' % n for n in range(91)]
        self.num_dsv_actual_header = ['']
        self.num_dsv_actual_header.extend(num_cols)
        self.num_dsv_desired_header = [DSV_DEFAULT_ID_COLUMN]
        self.num_dsv_desired_header.extend(num_cols)
        self.num_rows = ['V1', 'V2']
        self.num_column_3 = ['8.08785988003525', '2.37310583895584']
        # anno.dsv
        self.anno_dsv_path = os.path.abspath('%s/%s' % (self.test_data_root, 'anno.dsv'))
        self.anno_comment = '#'
        self.anno_header = ['ID', 'GB_ACC', 'SPOT_ID', 'Species Scientific Name', 'Annotation Date', 'Sequence Type',
                            'Sequence Source', 'Target Description', 'Representative Public ID', 'Gene Title', 'Gene Symbol',
                            'ENTREZ_GENE_ID', 'RefSeq Transcript ID', 'Gene Ontology Biological Process',
                            'Gene Ontology Cellular Component', 'Gene Ontology Molecular Function']
        self.anno_rows = ['1007_s_at', '1053_at']
        self.anno_columns = ['GB_ACC', 'Gene Symbol']
        self.anno_columns_dict = {'GB_ACC' : ['U48705', 'M87338'], 'Gene Symbol' : ['DDR1', 'RFC2']}

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' % (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' % (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_loadall1(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # default DSV
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
        self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
        dsv1.create()
        self.assertTrue(dsv1.isCreated())
        self.assertTrue(dsv1.isEmpty())
        # load from file
        st = dsv1.loadAll(debug=True)
        dsv1.close()
        ref_st = ['insert into "Test1" values ("V1","7.29865639942","7.1839394018853","8.08785988003525","8.43784327460378","7.56725674896063","7.17150350961048",'
                                               '"8.23772125375395","7.26860393651388","6.74186036580687","7.55493056104098","7.37521470969549","6.35468766815909",'
                                               '"7.03794441889888","6.75197742759923","7.26608934160658","8.70335292880697","6.85443361759566","7.59055769774248",'
                                               '"8.01751559655053","6.99993079846214","7.10871523619365","7.65161630470663","6.71058065426046","6.64437907655326",'
                                               '"6.93172233805358","7.61870427987243","6.9634175191832","6.37433009206648","6.34485366708736","6.0977075555399",'
                                               '"6.9061361459302","6.54264897912374","6.31961323363347","6.16533391728077","6.90481905323935","6.7168440158265",'
                                               '"7.22535319774288","6.20123577217092","6.93391118518623","6.82985307889579","6.35468239627533","7.09693639659124",'
                                               '"7.60449775270475","7.12266778930967","6.35835046528365","6.76414046791","6.17508883882112","6.52508274039929",'
                                               '"7.11162248509395","6.89152906126555","6.49949720627377","6.69448041622817","6.37526926527225","5.80401273298264",'
                                               '"7.12987703240072","6.05831629170905","6.81624397767137","6.66820808623227","6.64998519558867","6.42308111524492",'
                                               '"7.58672787003923","3.84767749509431","6.71665724008276","6.35468766815909","6.54859953448512","7.23447515724748",'
                                               '"6.70007125889196","6.28445976227631","6.75206243946758","6.7168440158265","6.55922419484843","6.93675713126568",'
                                               '"6.80067557800434","6.50103393612957","6.91542815411986","6.19960368164491","7.6448783709798","6.2125929974423",'
                                               '"6.35468766815909","7.32784699996015","6.14659907126786","6.7168440158265","6.8825610653412","6.72831600642366",'
                                               '"6.46374697412319","5.79584776993902","6.0825372527799","7.1204899554919","6.39620062779895","6.35814627516342",'
                                               '"6.35814627516342")',
                  'insert into "Test1" values ("V2","2.38904325749261","2.37588862645719","2.37310583895584","2.38904325749261","2.42091222425779","2.38904325749261",'
                                               '"2.38626046999126","2.38904325749261","2.38904325749261","2.41002306956031","2.38904325749261","2.38904325749261",'
                                               '"2.38904325749261","2.37310583895584","2.38626046999126","2.34429782913723","2.38904325749261","2.98112952430922",'
                                               '"2.34553574786241","2.37310583895584","2.39660701797421","2.38904325749261","2.40955866820479","2.38626046999126",'
                                               '"2.35577218230877","2.39443448171899","2.34433277775847","2.69053923836483","2.38430054425455","2.86158891209344",'
                                               '"2.34595261411454","2.89813268468409","2.42777977950130","2.38626046999126","2.44904175049461","3.55795174775419",'
                                               '"2.66896481156844","2.38626046999126","2.71772299956764","2.61602731442131","2.56996895766296","3.86202701130675",'
                                               '"2.38904325749261","2.35577218230877","2.60505670342601","3.12697260562512","2.38904325749261","3.15740854425796",'
                                               '"2.65364423092787","2.45124596034905","3.14913252263311","2.38904325749261","2.39700474393300","2.38904325749261",'
                                               '"2.46188514405506","3.23873137510437","2.55373906857937","3.39601442806742","3.16936129560691","3.18777558546775",'
                                               '"2.38904325749261","2.38904325749261","2.38626046999126","2.34553574786241","2.35577218230877","2.38624782221570",'
                                               '"2.35577218230877","2.38904325749261","2.74265374191966","2.37188401381886","2.37588862645719","2.38904325749261",'
                                               '"2.35577218230877","2.35121946858936","2.49946444329392","2.38904325749261","2.34553574786241","2.93960156829307",'
                                               '"2.38904325749261","2.39182604499395","2.38904325749261","2.35315614841910","2.47149945385376","2.38626046999126",'
                                               '"2.39596753440869","2.38904325749261","2.40223987191512","2.34715558421848","2.38210356896247","2.34719053283972",'
                                               '"2.76820667786915")']
        self.assertSequenceEqual(ref_st, st)

    def test_loadall2(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # default DSV
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
        self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
        dsv1.create()
        dsv1.loadAll()
        dsv1.close()
        # low level checks
        cs = dsv1.db.cursor()
        cs.execute('select %s from %s' % (self.num_dsv_desired_header[0], dsv1.name))
        rres = cs.fetchall()
        res = [str(r[0]) for r in rres]
        self.assertSequenceEqual(self.num_rows, res)
        cs.execute('select %s from %s' % (self.num_dsv_desired_header[3], dsv1.name))
        rres = cs.fetchall()
        res = [str(r[0]) for r in rres]
        self.assertSequenceEqual(self.num_column_3, res)

    def test_loadall3(self):
        dsv2_fh = DSV.getHandle(self.anno_dsv_path)
        dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
        dsv2.create()
        st = dsv2.loadAll(debug=True)
        dsv2.close()
        ref_st = ['insert into "Test1" values ("1007_s_at","U48705","","H**o sapiens","Mar 11, 2009",'
                                               '"Exemplar sequence","Affymetrix Proprietary Database",'
                                               '"U48705 /FEATURE=mRNA /DEFINITION=HSU48705 Human receptor tyrosine kinase DDR gene, complete cds",'
                                               '"U48705","discoidin domain receptor tyrosine kinase 1",'
                                               '"DDR1","780","NM_001954 /// NM_013993 /// NM_013994",'
                                               '"0006468 // protein amino acid phosphorylation // inferred from electronic annotation ///'
                                               ' 0007155 // cell adhesion // traceable author statement ///'
                                               ' 0007155 // cell adhesion // inferred from electronic annotation ///'
                                               ' 0007169 // transmembrane receptor protein tyrosine kinase signaling pathway // inferred from electronic annotation",'
                                               '"0005887 // integral to plasma membrane // traceable author statement ///'
                                               ' 0016020 // membrane // inferred from electronic annotation ///'
                                               ' 0016021 // integral to membrane // inferred from electronic annotation",'
                                               '"0000166 // nucleotide binding // inferred from electronic annotation ///'
                                               ' 0004672 // protein kinase activity // inferred from electronic annotation ///'
                                               ' 0004713 // protein tyrosine kinase activity // inferred from electronic annotation ///'
                                               ' 0004714 // transmembrane receptor protein tyrosine kinase activity // traceable author statement ///'
                                               ' 0004714 // transmembrane receptor protein tyrosine kinase activity // inferred from electronic annotation ///'
                                               ' 0004872 // receptor activity // inferred from electronic annotation ///'
                                               ' 0005515 // protein binding // inferred from physical interaction ///'
                                               ' 0005524 // ATP binding // inferred from electronic annotation ///'
                                               ' 0016301 // kinase activity // inferred from electronic annotation ///'
                                               ' 0016740 // transferase activity // inferred from electronic annotation")',
                'insert into "Test1" values ("1053_at","M87338","","H**o sapiens","Mar 11, 2009",'
                                               '"Exemplar sequence","GenBank",'
                                               '"M87338 /FEATURE= /DEFINITION=HUMA1SBU Human replication factor C, 40-kDa subunit (A1) mRNA, complete cds",'
                                               '"M87338","replication factor C (activator 1) 2, 40kDa",'
                                               '"RFC2","5982","NM_002914 /// NM_181471",'
                                               '"0006260 // DNA replication // not recorded ///'
                                               ' 0006260 // DNA replication // inferred from electronic annotation ///'
                                               ' 0006297 // nucleotide-excision repair, DNA gap filling // not recorded",'
                                               '"0005634 // nucleus // inferred from electronic annotation ///'
                                               ' 0005654 // nucleoplasm // not recorded ///'
                                               ' 0005663 // DNA replication factor C complex // inferred from direct assay ///'
                                               ' 0005663 // DNA replication factor C complex // inferred from electronic annotation",'
                                               '"0000166 // nucleotide binding // inferred from electronic annotation ///'
                                               ' 0003677 // DNA binding // inferred from electronic annotation ///'
                                               ' 0003689 // DNA clamp loader activity // inferred from electronic annotation ///'
                                               ' 0005515 // protein binding // inferred from physical interaction ///'
                                               ' 0005524 // ATP binding // traceable author statement ///'
                                               ' 0005524 // ATP binding // inferred from electronic annotation ///'
                                               ' 0017111 // nucleoside-triphosphatase activity // inferred from electronic annotation")']
        self.assertSequenceEqual(ref_st, st)

    def test_loadall4(self):
        dsv2_fh = DSV.getHandle(self.anno_dsv_path)
        dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
        self.assertSequenceEqual(self.anno_header, dsv2.header)
        dsv2.create()
        dsv2.loadAll()
        dsv2.close()
        # low level checks
        cs = dsv2.db.cursor()
        cs.execute('select %s from %s' % (self.anno_header[0], dsv2.name))
        rres = cs.fetchall()
        res = [str(r[0]) for r in rres]
        self.assertSequenceEqual(self.anno_rows, res)
        cols = ','.join([quote(c) for c in self.anno_columns])
        cs.execute('select %s from %s' % (cols, dsv2.name))
        rres = cs.fetchall()
        res = {}
        for ix, ac in enumerate(self.anno_columns):
            res[ac] = [str(r[ix]) for r in rres]
        self.assertDictEqual(self.anno_columns_dict, res)

    def test_close1(self):
        dsv2_fh = DSV.getHandle(self.anno_dsv_path)
        dsv2 = DSV(self.dbm, self.testdb, dsv2_fh, dtname=self.test_dtname, comment=self.anno_comment)
        dsv2.create()
        dsv2.loadAll()
        dsv2.close()
        with self.assertRaises(Error):
            dsv2.loadAll()
Пример #20
0
class TestPKDrivenDBDataManager1(unittest.TestCase):
    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        # ssdata.dsv
        self.ssdata_dsv_path = os.path.abspath(
            os.path.join(self.test_data_root, 'ssdata.dsv'))
        self.ssdata_comment = '#'
        ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
        self.ssdata_dsv1 = DSV(self.dbm,
                               self.testdb,
                               ssdata_dsv_fh,
                               dtname=self.test_dtname,
                               comment=self.ssdata_comment)
        self.ssdata_dsv1.create()
        self.ssdata_dsv1.loadAll()
        self.ssdata_dsv1.close()
        self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
        # pkcidmap
        self.pkc2id1 = {
            'PKC1': ('R1', 'R2'),
            'PKC2': ('R3', ),
            'PKC3': ('R4', 'R5')
        }
        self.pkc1 = ('PKC1', 'PKC2', 'PKC3')
        self.ss_cols1 = '*'
        self.ref_ss1 = [
            ({
                'pkcID': 'PKC1',
                'dtable': self.ssdata_dsv1,
                'rows': ['R1', 'R2'],
                'cols': list(self.ssdata_samples)
            }, None),
            ({
                'pkcID': 'PKC2',
                'dtable': self.ssdata_dsv1,
                'rows': ['R3'],
                'cols': list(self.ssdata_samples)
            }, None),
            ({
                'pkcID': 'PKC3',
                'dtable': self.ssdata_dsv1,
                'rows': ['R4', 'R5'],
                'cols': list(self.ssdata_samples)
            }, None),
        ]
        self.ss_cols2 = ('S1', 'S4', 'S5')
        self.ref_ss2 = [
            ({
                'pkcID': 'PKC1',
                'dtable': self.ssdata_dsv1,
                'rows': ['R1', 'R2'],
                'cols': list(self.ss_cols2)
            }, None),
            ({
                'pkcID': 'PKC2',
                'dtable': self.ssdata_dsv1,
                'rows': ['R3'],
                'cols': list(self.ss_cols2)
            }, None),
            ({
                'pkcID': 'PKC3',
                'dtable': self.ssdata_dsv1,
                'rows': ['R4', 'R5'],
                'cols': list(self.ss_cols2)
            }, None),
        ]
        self.pkc2 = ('PKC3', 'PKC1')
        self.ref_ss3 = [
            ({
                'pkcID': 'PKC3',
                'dtable': self.ssdata_dsv1,
                'rows': ['R4', 'R5'],
                'cols': list(self.ss_cols2)
            }, None),
            ({
                'pkcID': 'PKC1',
                'dtable': self.ssdata_dsv1,
                'rows': ['R1', 'R2'],
                'cols': list(self.ss_cols2)
            }, None),
        ]
        self.ref_ss4 = [
            (None,
             numpy.array([
                 [2.44753543273, 42.9497086717, 30.8331998765],
                 [42.1888598933, 39.1743921225, 15.9744094108],
             ])),
            (None,
             numpy.array([
                 [16.5734780715, 14.8233987496, 21.7385342744],
                 [60.0958378228, 98.4321570519, 71.9193619126],
             ])),
        ]
        # for categorization tests
        self.pkc2id2 = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
        self.pkc3 = ('PKC1', 'PKC2')
        self.size_thr = 3
        self.cat1 = SubsetSizeCategorizer(self.size_thr)
        self.exp_categories1 = ['>', '<=']
        self.cat2 = NullCategorizer()
        self.exp_categories2 = [self.cat2.NULL] * len(self.pkc3)

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        self.assertEqual(self.pkc2id1, pkdm.pkcidmap.pkc2emid)
        self.assertSequenceEqual(self.ssdata_samples, pkdm.all_samples)

    def test_getSubset1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        ss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols1,
                           get_ssinfo=True,
                           get_dataset=False) for pkc in self.pkc1
        ]
        self.assertEqual(self.ref_ss1, ss)

    def test_getSubset2(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        ss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=True,
                           get_dataset=False) for pkc in self.pkc1
        ]
        self.assertEqual(self.ref_ss2, ss)

    def test_getSubset3(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        ss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=True,
                           get_dataset=False) for pkc in self.pkc2
        ]
        self.assertEqual(self.ref_ss3, ss)

    def test_getSubset4(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id1))
        ss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=False,
                           get_dataset=True) for pkc in self.pkc2
        ]
        for refss, actss in zip(self.ref_ss4, ss):
            self.assertEqual(refss[0], actss[0])
            numpy.testing.assert_array_equal(refss[1], actss[1].array)

    def test_categorizeSubset1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id2))
        ss_dss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=False,
                           get_dataset=True)[1] for pkc in self.pkc3
        ]
        ss_categories = [
            PKDrivenDBDataManager.categorizeSubset(ss_ds, self.cat1)
            for ss_ds in ss_dss
        ]
        self.assertSequenceEqual(self.exp_categories1, ss_categories)

    def test_categorizeSubset2(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id2))
        ss_dss = [
            pkdm.getSubset(pkc,
                           forSamples=self.ss_cols2,
                           get_ssinfo=False,
                           get_dataset=True)[1] for pkc in self.pkc3
        ]
        ss_categories = [
            PKDrivenDBDataManager.categorizeSubset(ss_ds, self.cat2)
            for ss_ds in ss_dss
        ]
        self.assertSequenceEqual(self.exp_categories2, ss_categories)
Пример #21
0
 def test_DBManager0(self):
     dbm = DBManager(self.test_write_root)
     dbm.close()
     self.assertDictEqual(dbm.db, {})
     self.assertDictEqual(dbm.db_loc, {})
Пример #22
0
class TestPKDrivenDBSubsetHierarchy1(unittest.TestCase):
    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        # ssdata.dsv
        self.ssdata_dsv_path = os.path.abspath(
            os.path.join(self.test_data_root, 'ssdata.dsv'))
        self.ssdata_comment = '#'
        ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
        self.ssdata_dsv1 = DSV(self.dbm,
                               self.testdb,
                               ssdata_dsv_fh,
                               dtname=self.test_dtname,
                               comment=self.ssdata_comment)
        self.ssdata_dsv1.create()
        self.ssdata_dsv1.loadAll()
        self.ssdata_dsv1.close()
        self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
        # hierarchy tests
        self.pkc2id = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
        self.pkc = ('PKC1', 'PKC2')
        self.size_thr1 = 2
        self.cat1 = SubsetSizeCategorizer(self.size_thr1, ID='Cat1')
        self.cat1_uniq_le = self.cat1.uniquifyCategory(
            self.cat1.ROW_SIZE_LESSER)
        self.cat1_uniq_gt = self.cat1.uniquifyCategory(
            self.cat1.ROW_SIZE_GREATER)
        self.size_thr2 = 3
        self.cat2 = SubsetSizeCategorizer(self.size_thr2, ID='Cat2')
        self.cat2_uniq_le = self.cat2.uniquifyCategory(
            self.cat2.ROW_SIZE_LESSER)
        self.cat2_uniq_gt = self.cat2.uniquifyCategory(
            self.cat2.ROW_SIZE_GREATER)
        self.size_thr3 = 0
        self.cat3 = SubsetSizeCategorizer(self.size_thr3, ID='Cat3')
        self.cat3_uniq_le = self.cat3.uniquifyCategory(
            self.cat3.ROW_SIZE_LESSER)
        self.cat3_uniq_gt = self.cat3.uniquifyCategory(
            self.cat3.ROW_SIZE_GREATER)
        self.cinst = {
            'Cat1': self.cat1,
            'Cat2': self.cat2,
            'Cat3': self.cat3,
        }
        self.cmap1 = ['Cat1', 'Cat2', 'Cat3']
        self.cmap2 = ['Cat3', 'Cat1', 'Cat2']
        self.symbols = list(self.pkc)

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id))
        pkdss = PKDrivenDBSubsetHierarchy(pkdm, self.ssdata_samples)
        pkdss.build(self.cmap1, self.cinst, self.symbols)
        # ['Cat1', 'Cat2', 'Cat3']
        expected_hierarchy = dict()
        expected_hierarchy.update({None: self.cat1.id})
        expected_hierarchy.update({self.cat1_uniq_le: self.cat2.id})
        expected_hierarchy.update({self.cat1_uniq_gt: self.cat2.id})
        expected_hierarchy.update({self.cat2_uniq_le: self.cat3.id})
        expected_hierarchy.update({self.cat2_uniq_gt: self.cat3.id})
        expected_hierarchy.update({self.cat3_uniq_gt: None})
        expected_hierarchy.update({self.cat3_uniq_le: None})
        self.assertEqual(expected_hierarchy, pkdss.hierarchy)
        #            [PKC1 PKC2]
        # Cat1  [PKC1]>2     [PKC2]<=2
        # Cat2  [PKC1]>3     [PKC2]<=3
        # Cat3  [PKC1]>0     [PKC2]>0
        expected_symboltree = dict()
        expected_symboltree.update(
            {None: {
                self.cat1_uniq_gt: ['PKC1'],
                self.cat1_uniq_le: ['PKC2']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_gt: {
                self.cat2_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_le: {
                self.cat2_uniq_le: ['PKC2']
            }})
        expected_symboltree.update(
            {self.cat2_uniq_gt: {
                self.cat3_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat2_uniq_le: {
                self.cat3_uniq_gt: ['PKC2']
            }})
        self.assertEqual(expected_symboltree, pkdss.symboltree)

    def test_init2(self):
        pkdm = PKDrivenDBDataManager(self.ssdata_dsv1,
                                     MockPKCIDMap(self.pkc2id))
        pkdss = PKDrivenDBSubsetHierarchy(pkdm, self.ssdata_samples)
        pkdss.build(self.cmap2, self.cinst, self.symbols)
        # ['Cat3', 'Cat1', 'Cat2']
        expected_hierarchy = dict()
        expected_hierarchy.update({None: self.cat3.id})
        expected_hierarchy.update({self.cat3_uniq_le: self.cat1.id})
        expected_hierarchy.update({self.cat3_uniq_gt: self.cat1.id})
        expected_hierarchy.update({self.cat1_uniq_le: self.cat2.id})
        expected_hierarchy.update({self.cat1_uniq_gt: self.cat2.id})
        expected_hierarchy.update({self.cat2_uniq_gt: None})
        expected_hierarchy.update({self.cat2_uniq_le: None})
        self.assertEqual(expected_hierarchy, pkdss.hierarchy)
        #                   [PKC1 PKC2]
        # Cat3      [PKC1 PKC2]>0      []<=0
        # Cat1    [PKC1]>2  [PKC2]<=2
        # Cat2    [PKC1]>3  [PKC2]<=3
        expected_symboltree = dict()
        expected_symboltree.update(
            {None: {
                self.cat3_uniq_gt: ['PKC1', 'PKC2']
            }})
        expected_symboltree.update({
            self.cat3_uniq_gt: {
                self.cat1_uniq_gt: ['PKC1'],
                self.cat1_uniq_le: ['PKC2']
            }
        })
        expected_symboltree.update(
            {self.cat1_uniq_gt: {
                self.cat2_uniq_gt: ['PKC1']
            }})
        expected_symboltree.update(
            {self.cat1_uniq_le: {
                self.cat2_uniq_le: ['PKC2']
            }})
        self.assertEqual(expected_symboltree, pkdss.symboltree)
Пример #23
0
 def test_DBManager6(self):
     dbm = DBManager(self.test_write_root)
     dbm.getDB('TestDB1')
     dbm.getDB('TestDB2')
     dbm.getDB('TestDB3')
     self.assertIn('TestDB1', dbm.db)
     self.assertIn('TestDB1', dbm.db_loc)
     self.assertIn('TestDB2', dbm.db)
     self.assertIn('TestDB2', dbm.db_loc)
     self.assertIn('TestDB3', dbm.db)
     self.assertIn('TestDB3', dbm.db_loc)
     rootdb = dbm.getDB(dbm.rootdb_key)
     cs = rootdb.cursor()
     cs.execute('select db from DB')
     dbs = [str(r[0]) for r in cs.fetchall()]
     self.assertSequenceEqual(['TestDB1', 'TestDB2', 'TestDB3'], dbs)
     cs.close()
     dbm.close('TestDB2')
     self.assertIn('TestDB1', dbm.db)
     self.assertIn('TestDB1', dbm.db_loc)
     self.assertNotIn('TestDB2', dbm.db)
     self.assertNotIn('TestDB2', dbm.db_loc)
     self.assertIn('TestDB3', dbm.db)
     self.assertIn('TestDB3', dbm.db_loc)
     dbm.close()
     self.assertDictEqual(dbm.db, {})
     self.assertDictEqual(dbm.db_loc, {})
Пример #24
0
class TestDTShelve2(unittest.TestCase):
    def setUp(self):
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.dbm = DBManager(self.test_write_root)
        self.dbsh = DBShelve(self.dbm, self.testdb, None)

    def tearDown(self):
        self.dbsh.clear()
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_getset1(self):
        self.dbsh['A'] = '1'
        self.dbsh['B'] = '2'
        self.dbsh['C'] = '3'
        self.assertEqual('1', self.dbsh['A'])
        self.assertEqual('2', self.dbsh['B'])
        self.assertEqual('3', self.dbsh['C'])
        self.assertEqual(3, len(self.dbsh))

    def test_contains1(self):
        self.dbsh['A'] = '1'
        self.dbsh['B'] = '2'
        self.dbsh['C'] = '3'
        self.assertTrue('A' in self.dbsh)
        self.assertFalse('XXX' in self.dbsh)

    def test_del1(self):
        self.dbsh['A'] = '1'
        self.dbsh['B'] = '2'
        self.dbsh['C'] = '3'
        del self.dbsh['B']
        with self.assertRaises(KeyError):
            self.dbsh['B']
        self.assertSequenceEqual(['A', 'C'], self.dbsh.keys())
        self.assertSequenceEqual(['1', '3'], self.dbsh.values())

    def test_update1(self):
        self.dbsh['A'] = '1'
        self.dbsh['B'] = '2'
        self.dbsh['C'] = '3'
        self.dbsh.update(items=(('D', '4'), ('E', '5')))
        self.assertEqual(5, len(self.dbsh))
        self.assertEqual('4', self.dbsh['D'])
        self.assertEqual('5', self.dbsh['E'])

    def test_update2(self):
        self.dbsh['A'] = '1'
        self.dbsh['B'] = '2'
        self.dbsh['C'] = '3'
        self.dbsh.update({'D': '4', 'E': '5'})
        self.assertEqual(5, len(self.dbsh))
        self.assertEqual('4', self.dbsh['D'])
        self.assertEqual('5', self.dbsh['E'])

    def test_clear1(self):
        self.dbsh['A'] = '1'
        self.dbsh['B'] = '2'
        self.dbsh['C'] = '3'
        self.dbsh.clear()
        self.assertEqual(0, len(self.dbsh))
        with self.assertRaises(KeyError):
            self.dbsh['A']
            self.dbsh['B']
            self.dbsh['C']

    def test_view1(self):
        self.dbsh['A'] = '1'
        self.dbsh['B'] = '2'
        self.dbsh['C'] = '3'
        self.assertEqual({'A': '1', 'B': '2', 'C': '3'}, self.dbsh.view())
        self.dbsh.clear()
        self.assertEqual({}, self.dbsh.view())
Пример #25
0
 def setUp(self):
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.dbm = DBManager(self.test_write_root)
     self.dbsh = DBShelve(self.dbm, self.testdb, None)
Пример #26
0
class TestHGNC1(unittest.TestCase):
    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.hgncTable = 'HGNC'
        self.dbm = DBManager(self.test_write_root)
        #
        hgnc_path = os.path.abspath(
            os.path.join(self.test_data_root, 'hgnc_sample_2.txt'))
        hgnc_comment = '#'
        hgnc_delimiter = '\t'
        hgnc_fh = DSV.getHandle(hgnc_path)
        self.hgnc_dsv = DSV(self.dbm,
                            self.testdb,
                            hgnc_fh,
                            dtname=self.hgncTable,
                            delimiter=hgnc_delimiter,
                            comment=hgnc_comment)
        self.hgnc_dsv.create()
        self.hgnc_dsv.loadAll()
        self.hgnc_dsv.close()
        #
        self.withdrawn_pattern = '%~withdrawn'
        self.symbol_col = 'Approved Symbol'
        #
        # NOTE: we use unicode since we do not reparse immediately after querying
        self.ref_previous1 = {
            u'NTRK4': [u'DDR1'],
            u'PTK3A': [u'DDR1'],
            u'NEP': [u'DDR1'],
            u'CAK': [u'DDR1'],
            u'EDDR1': [u'DDR1'],
            u'C19orf72': [u'DCAF15'],
        }
        self.ref_synonyms1 = {
            u'A1': [u'RFC2'],
            u'BEHAB': [u'BCAN'],
            u'CD167': [u'DDR1'],
            u'CSPG7': [u'BCAN'],
            u'DRC3': [u'EPS8L1'],
            u'FLJ20258': [u'EPS8L1'],
            u'MGC13038': [u'BCAN'],
            u'MGC23164': [u'EPS8L1'],
            u'MGC4642': [u'EPS8L1'],
            u'MGC99481': [u'DCAF15'],
            u'RFC40': [u'RFC2'],
            u'RTK6': [u'DDR1']
        }

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' %
                                   (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' %
                                      (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_correctHGNCApprovedSymbols1(self):
        correctHGNCApprovedSymbols(self.hgnc_dsv)
        # check that no symbol has '~withdrawn' suffix
        c = self.hgnc_dsv.db.cursor()
        st = 'select "%s" from %s where "%s" like "%s"' % (
            self.symbol_col, self.hgncTable, self.symbol_col,
            self.withdrawn_pattern)
        c.execute(st)
        res = list([r for r in c])
        self.assertEqual([], res)
        c.close()

    def test_generateHGNCPreviousSymbols1(self):
        previous_dt = generateHGNCPreviousSymbols(self.hgnc_dsv, self.testdb)
        res = previous_dt.getAll(as_dict=True, dict_on_rows=True)
        self.assertEqual(self.ref_previous1, res)

    def test_generateHGNCSynonyms1(self):
        synonyms_dt = generateHGNCSynonyms(self.hgnc_dsv, self.testdb)
        res = synonyms_dt.getAll(as_dict=True, dict_on_rows=True)
        self.assertEqual(self.ref_synonyms1, res)
Пример #27
0
class TestDSV1(unittest.TestCase):

    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_cols = ('A', 'B', 'C')
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        # num.dsv
        self.num_dsv_path = os.path.abspath(os.path.join(self.test_data_root, 'num.dsv'))
        num_cols = ['S%d' % n for n in range(91)]
        self.num_dsv_actual_header = ['']
        self.num_dsv_actual_header.extend(num_cols)
        self.num_dsv_desired_header = [DSV_DEFAULT_ID_COLUMN]
        self.num_dsv_desired_header.extend(num_cols)

    def tearDown(self):
        self.dbm.close()
        db1_path = os.path.abspath('%s/%s.db' % (self.test_write_root, self.testdb))
        rootdb_path = os.path.abspath('%s/%s.root.db' % (self.test_write_root, SYSTEM_NAME_LC))
        if os.path.exists(db1_path):
            os.remove(db1_path)
        if os.path.exists(rootdb_path):
            os.remove(rootdb_path)
        self.dbm = None

    def test_init1(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # default DSV, dialect and delimiter sniffed
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
        self.assertFalse(dsv1.isCreated())
        self.assertEqual(',', dsv1.dialect.delimiter)
        dsv1.close()

    def test_init2(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # predefined delimiter, resolved successfully
        # NOTE: class does not check if delimiter is valid at this point
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, delimiter='\t')
        self.assertFalse(dsv1.isCreated())
        self.assertEqual(csv.get_dialect('excel-tab'), dsv1.dialect)
        self.assertEqual('\t', dsv1.dialect.delimiter)
        dsv1.close()

    def test_init3(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter not resolved
        with self.assertRaises(Error):
            DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, delimiter='AAAAAA')

    def test_init4(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter not resolved
        with self.assertRaises(Error):
            DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, delimiter=('A',))

    def test_init5(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter not resolved
        with self.assertRaises(Error):
            DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, delimiter=100)

    def test_init6(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter sniffed, comment resolved successfully
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, comment='#')
        self.assertFalse(dsv1.isCreated())
        self.assertEqual('#', dsv1.comment)
        dsv1.close()

    def test_init7(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter sniffed, comment not resolved
        with self.assertRaises(Error):
            DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, comment=('#',))

    def test_init8(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter sniffed, comment not resolved
        with self.assertRaises(Error):
            DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, comment=1000)

    def test_init9(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter sniffed, header extracted (default), ID resolved
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname)
        self.assertSequenceEqual(self.num_dsv_desired_header, dsv1.header)
        dsv1.create()
        self.assertTrue(dsv1.isCreated())
        self.assertTrue(dsv1.isEmpty())
        dsv1.close()

    def test_init10(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter sniffed, header extracted (default), ID not resolved
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, make_missing_ID_column=False)
        self.assertSequenceEqual(self.num_dsv_actual_header, dsv1.header)
        dsv1.create()
        self.assertTrue(dsv1.isCreated())
        self.assertTrue(dsv1.isEmpty())
        dsv1.close()

    def test_init11(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter sniffed, header auto-generated
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, header=())
        ref_header = tuple(['%d' % n for n in range(1, len(self.num_dsv_actual_header) + 1)])
        self.assertSequenceEqual(ref_header, dsv1.header)
        dsv1.create()
        self.assertTrue(dsv1.isCreated())
        self.assertTrue(dsv1.isEmpty())
        dsv1.close()

    def test_init12(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter sniffed, header supplied (proper length)
        our_header = tuple(['C%d' % n for n in range(1, len(self.num_dsv_actual_header) + 1)])
        dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, header=our_header)
        self.assertSequenceEqual(our_header, dsv1.header)
        dsv1.create()
        self.assertTrue(dsv1.isCreated())
        self.assertTrue(dsv1.isEmpty())
        dsv1.close()

    def test_init13(self):
        dsv1_fh = DSV.getHandle(self.num_dsv_path)
        # delimiter sniffed, header supplied (improper length)
        our_header = tuple(['C%d' % n for n in range(1, len(self.num_dsv_actual_header) * 2 + 1)])
        with self.assertRaises(Error):
            DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, header=our_header)
Пример #28
0
    def setUp(self):
        self.test_data_root = TEST_INVARIANTS['test_data_root']
        self.test_write_root = TEST_INVARIANTS['test_write_root']
        self.testdb = 'DB1'
        self.test_dtname = 'Test1'
        self.dbm = DBManager(self.test_write_root)
        # num.dsv
        self.num_dsv_path = os.path.abspath('%s/%s' %
                                            (self.test_data_root, 'num.dsv'))
        self.sample_rows_none = ()
        self.sample_rows_1 = ('V1', )
        self.sample_cols_none = ()
        self.sample_cols_with_id = ['ID']

        self.empty_array = numpy.array([]).reshape((1, 0))
        self.array1 = numpy.array([
            [
                7.29865639942, 7.1839394018853, 8.08785988003525,
                8.43784327460378, 7.56725674896063, 7.17150350961048,
                8.23772125375395, 7.26860393651388, 6.74186036580687,
                7.55493056104098, 7.37521470969549, 6.35468766815909,
                7.03794441889888, 6.75197742759923, 7.26608934160658,
                8.70335292880697, 6.85443361759566, 7.59055769774248,
                8.01751559655053, 6.99993079846214, 7.10871523619365,
                7.65161630470663, 6.71058065426046, 6.64437907655326,
                6.93172233805358, 7.61870427987243, 6.9634175191832,
                6.37433009206648, 6.34485366708736, 6.0977075555399,
                6.9061361459302, 6.54264897912374, 6.31961323363347,
                6.16533391728077, 6.90481905323935, 6.7168440158265,
                7.22535319774288, 6.20123577217092, 6.93391118518623,
                6.82985307889579, 6.35468239627533, 7.09693639659124,
                7.60449775270475, 7.12266778930967, 6.35835046528365,
                6.76414046791, 6.17508883882112, 6.52508274039929,
                7.11162248509395, 6.89152906126555, 6.49949720627377,
                6.69448041622817, 6.37526926527225, 5.80401273298264,
                7.12987703240072, 6.05831629170905, 6.81624397767137,
                6.66820808623227, 6.64998519558867, 6.42308111524492,
                7.58672787003923, 3.84767749509431, 6.71665724008276,
                6.35468766815909, 6.54859953448512, 7.23447515724748,
                6.70007125889196, 6.28445976227631, 6.75206243946758,
                6.7168440158265, 6.55922419484843, 6.93675713126568,
                6.80067557800434, 6.50103393612957, 6.91542815411986,
                6.19960368164491, 7.6448783709798, 6.2125929974423,
                6.35468766815909, 7.32784699996015, 6.14659907126786,
                6.7168440158265, 6.8825610653412, 6.72831600642366,
                6.46374697412319, 5.79584776993902, 6.0825372527799,
                7.1204899554919, 6.39620062779895, 6.35814627516342,
                6.35814627516342
            ],
            [
                2.38904325749261, 2.37588862645719, 2.37310583895584,
                2.38904325749261, 2.42091222425779, 2.38904325749261,
                2.38626046999126, 2.38904325749261, 2.38904325749261,
                2.41002306956031, 2.38904325749261, 2.38904325749261,
                2.38904325749261, 2.37310583895584, 2.38626046999126,
                2.34429782913723, 2.38904325749261, 2.98112952430922,
                2.34553574786241, 2.37310583895584, 2.39660701797421,
                2.38904325749261, 2.40955866820479, 2.38626046999126,
                2.35577218230877, 2.39443448171899, 2.34433277775847,
                2.69053923836483, 2.38430054425455, 2.86158891209344,
                2.34595261411454, 2.89813268468409, 2.42777977950130,
                2.38626046999126, 2.44904175049461, 3.55795174775419,
                2.66896481156844, 2.38626046999126, 2.71772299956764,
                2.61602731442131, 2.56996895766296, 3.86202701130675,
                2.38904325749261, 2.35577218230877, 2.60505670342601,
                3.12697260562512, 2.38904325749261, 3.15740854425796,
                2.65364423092787, 2.45124596034905, 3.14913252263311,
                2.38904325749261, 2.39700474393300, 2.38904325749261,
                2.46188514405506, 3.23873137510437, 2.55373906857937,
                3.39601442806742, 3.16936129560691, 3.18777558546775,
                2.38904325749261, 2.38904325749261, 2.38626046999126,
                2.34553574786241, 2.35577218230877, 2.38624782221570,
                2.35577218230877, 2.38904325749261, 2.74265374191966,
                2.37188401381886, 2.37588862645719, 2.38904325749261,
                2.35577218230877, 2.35121946858936, 2.49946444329392,
                2.38904325749261, 2.34553574786241, 2.93960156829307,
                2.38904325749261, 2.39182604499395, 2.38904325749261,
                2.35315614841910, 2.47149945385376, 2.38626046999126,
                2.39596753440869, 2.38904325749261, 2.40223987191512,
                2.34715558421848, 2.38210356896247, 2.34719053283972,
                2.76820667786915
            ],
        ])
        self.array_v1 = numpy.array([
            [
                7.29865639942, 7.1839394018853, 8.08785988003525,
                8.43784327460378, 7.56725674896063, 7.17150350961048,
                8.23772125375395, 7.26860393651388, 6.74186036580687,
                7.55493056104098, 7.37521470969549, 6.35468766815909,
                7.03794441889888, 6.75197742759923, 7.26608934160658,
                8.70335292880697, 6.85443361759566, 7.59055769774248,
                8.01751559655053, 6.99993079846214, 7.10871523619365,
                7.65161630470663, 6.71058065426046, 6.64437907655326,
                6.93172233805358, 7.61870427987243, 6.9634175191832,
                6.37433009206648, 6.34485366708736, 6.0977075555399,
                6.9061361459302, 6.54264897912374, 6.31961323363347,
                6.16533391728077, 6.90481905323935, 6.7168440158265,
                7.22535319774288, 6.20123577217092, 6.93391118518623,
                6.82985307889579, 6.35468239627533, 7.09693639659124,
                7.60449775270475, 7.12266778930967, 6.35835046528365,
                6.76414046791, 6.17508883882112, 6.52508274039929,
                7.11162248509395, 6.89152906126555, 6.49949720627377,
                6.69448041622817, 6.37526926527225, 5.80401273298264,
                7.12987703240072, 6.05831629170905, 6.81624397767137,
                6.66820808623227, 6.64998519558867, 6.42308111524492,
                7.58672787003923, 3.84767749509431, 6.71665724008276,
                6.35468766815909, 6.54859953448512, 7.23447515724748,
                6.70007125889196, 6.28445976227631, 6.75206243946758,
                6.7168440158265, 6.55922419484843, 6.93675713126568,
                6.80067557800434, 6.50103393612957, 6.91542815411986,
                6.19960368164491, 7.6448783709798, 6.2125929974423,
                6.35468766815909, 7.32784699996015, 6.14659907126786,
                6.7168440158265, 6.8825610653412, 6.72831600642366,
                6.46374697412319, 5.79584776993902, 6.0825372527799,
                7.1204899554919, 6.39620062779895, 6.35814627516342,
                6.35814627516342
            ],
        ])
        self.array1_rows = ['__R%d__' % i for i in range(2)]
        self.array1_cols = ['__S%d__' % i for i in range(91)]
Пример #29
0
 def setUp(self):
     self.test_data_root = TEST_INVARIANTS['test_data_root']
     self.test_write_root = TEST_INVARIANTS['test_write_root']
     self.testdb = 'DB1'
     self.test_dtname = 'Test1'
     self.dbm = DBManager(self.test_write_root)
     # ssdata.dsv
     self.ssdata_dsv_path = os.path.abspath(
         os.path.join(self.test_data_root, 'ssdata.dsv'))
     self.ssdata_comment = '#'
     ssdata_dsv_fh = DSV.getHandle(self.ssdata_dsv_path)
     self.ssdata_dsv1 = DSV(self.dbm,
                            self.testdb,
                            ssdata_dsv_fh,
                            dtname=self.test_dtname,
                            comment=self.ssdata_comment)
     self.ssdata_dsv1.create()
     self.ssdata_dsv1.loadAll()
     self.ssdata_dsv1.close()
     self.ssdata_samples = ('S1', 'S2', 'S3', 'S4', 'S5')
     # pkcidmap
     self.pkc2id1 = {
         'PKC1': ('R1', 'R2'),
         'PKC2': ('R3', ),
         'PKC3': ('R4', 'R5')
     }
     self.pkc1 = ('PKC1', 'PKC2', 'PKC3')
     self.ss_cols1 = '*'
     self.ref_ss1 = [
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ssdata_samples)
         }, None),
         ({
             'pkcID': 'PKC2',
             'dtable': self.ssdata_dsv1,
             'rows': ['R3'],
             'cols': list(self.ssdata_samples)
         }, None),
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ssdata_samples)
         }, None),
     ]
     self.ss_cols2 = ('S1', 'S4', 'S5')
     self.ref_ss2 = [
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC2',
             'dtable': self.ssdata_dsv1,
             'rows': ['R3'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ss_cols2)
         }, None),
     ]
     self.pkc2 = ('PKC3', 'PKC1')
     self.ref_ss3 = [
         ({
             'pkcID': 'PKC3',
             'dtable': self.ssdata_dsv1,
             'rows': ['R4', 'R5'],
             'cols': list(self.ss_cols2)
         }, None),
         ({
             'pkcID': 'PKC1',
             'dtable': self.ssdata_dsv1,
             'rows': ['R1', 'R2'],
             'cols': list(self.ss_cols2)
         }, None),
     ]
     self.ref_ss4 = [
         (None,
          numpy.array([
              [2.44753543273, 42.9497086717, 30.8331998765],
              [42.1888598933, 39.1743921225, 15.9744094108],
          ])),
         (None,
          numpy.array([
              [16.5734780715, 14.8233987496, 21.7385342744],
              [60.0958378228, 98.4321570519, 71.9193619126],
          ])),
     ]
     # for categorization tests
     self.pkc2id2 = {'PKC1': ('R1', 'R2', 'R3', 'R4'), 'PKC2': ('R5', )}
     self.pkc3 = ('PKC1', 'PKC2')
     self.size_thr = 3
     self.cat1 = SubsetSizeCategorizer(self.size_thr)
     self.exp_categories1 = ['>', '<=']
     self.cat2 = NullCategorizer()
     self.exp_categories2 = [self.cat2.NULL] * len(self.pkc3)
Пример #30
0
 def test_DBManager4(self):
     dbm = DBManager(self.test_write_root)
     with self.assertRaises(Error):
         dbm.getDB(tuple())
     self.assertIsNone(dbm.getDBloc(tuple()))
     dbm.close()