예제 #1
0
    def test_query_incl_rsids_multiple(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_rsids(2, ['rs2000082', 'rs2000142'])

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 2

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 2
        assert results.loc[0, 'rsid'] == 'rs2000082'
        assert results.loc[1, 'rsid'] == 'rs2000142'

        assert results.loc[0, 'allele1'] == 'A'
        assert results.loc[0, 'allele2'] == 'T'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'G'

        assert results.loc[0, '1.aa'] == 0.0016
        assert results.loc[0, '1.ab'] == 0.8613
        assert results.loc[0, '1.bb'] == 0.1371

        assert results.loc[0, '300.aa'] == 0.0234
        assert results.loc[0, '300.ab'] == 0.0148
        assert results.loc[0, '300.bb'] == 0.9618

        assert results.loc[1, '1.aa'] == 0.9619
        assert results.loc[1, '1.ab'] == 0.0015
        assert results.loc[1, '1.bb'] == 0.0366

        assert results.loc[1, '300.aa'] == 0.0185
        assert results.loc[1, '300.ab'] == 0.1408
        assert results.loc[1, '300.bb'] == 0.8407

        pos_values = results['pos'].unique()
        assert len(pos_values) == 2
        assert results.loc[0, 'pos'] == 6192
        assert results.loc[1, 'pos'] == 10750
예제 #2
0
    def test_query_incl_rsids_using_file(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))
        # rsids are not ordered in the file, but they should be returned ordered
        rsids_file = get_repository_path('example01/rsids01.txt')

        # run
        bgen_file = genoq.get_incl_rsids(2, [rsids_file])

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs2000000'
        assert results.loc[1, 'rsid'] == 'rs2000020'
        assert results.loc[2, 'rsid'] == 'rs2000079'
        assert results.loc[3, 'rsid'] == 'rs2000138'
        assert results.loc[4, 'rsid'] == 'rs2000149'

        assert results.loc[0, 'allele1'] == 'A'
        assert results.loc[0, 'allele2'] == 'G'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'A'

        assert results.loc[3, 'allele1'] == 'A'
        assert results.loc[3, 'allele2'] == 'G'

        assert results.loc[4, 'allele1'] == 'G'
        assert results.loc[4, 'allele2'] == 'T'

        assert results.loc[0, '1.aa'] == 0.9440
        assert results.loc[0, '1.ab'] == 0.0298
        assert results.loc[0, '1.bb'] == 0.0262

        assert results.loc[1, '2.aa'] == 0.1534
        assert results.loc[1, '2.ab'] == 0.7249
        assert results.loc[1, '2.bb'] == 0.1218

        assert results.loc[2, '3.aa'] == 0.9357
        assert results.loc[2, '3.ab'] == 0.0047
        assert results.loc[2, '3.bb'] == 0.0596

        assert results.loc[3, '1.aa'] == 0.8246
        assert results.loc[3, '1.ab'] == 0.0686
        assert results.loc[3, '1.bb'] == 0.1068

        assert results.loc[4, '2.aa'] == 0.0137
        assert results.loc[4, '2.ab'] == 0.0953
        assert results.loc[4, '2.bb'] == 0.8909

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 1623
        assert results.loc[2, 'pos'] == 5925
        assert results.loc[3, 'pos'] == 10447
        assert results.loc[4, 'pos'] == 11226