示例#1
0
    def test_query_incl_range_upper_limit_only(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_range(chr=1, stop=276)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 3
        assert results.loc[0, 'rsid'] == 'rs1'
        assert results.loc[1, 'rsid'] == 'rs2'
        assert results.loc[2, 'rsid'] == 'rs3'

        assert results.loc[0, 'allele1'] == 'G'
        assert results.loc[0, 'allele2'] == 'A'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'A'

        assert results.loc[0, '1.aa'] == 0.7491
        assert results.loc[0, '1.ab'] == 0.0133
        assert results.loc[0, '1.bb'] == 0.2376

        assert results.loc[1, '2.aa'] == 0.8654
        assert results.loc[1, '2.ab'] == 0.1041
        assert results.loc[1, '2.bb'] == 0.0306

        assert results.loc[2, '300.aa'] == 0.0828
        assert results.loc[2, '300.ab'] == 0.7752
        assert results.loc[2, '300.bb'] == 0.1421

        pos_values = results['pos'].unique()
        assert len(pos_values) == 3
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 181
        assert results.loc[2, 'pos'] == 276
示例#2
0
    def test_query_incl_rsids_multiple(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_rsids(2, ['rs2000082', 'rs2000142'])

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 2

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 2
        assert results.loc[0, 'rsid'] == 'rs2000082'
        assert results.loc[1, 'rsid'] == 'rs2000142'

        assert results.loc[0, 'allele1'] == 'A'
        assert results.loc[0, 'allele2'] == 'T'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'G'

        assert results.loc[0, '1.aa'] == 0.0016
        assert results.loc[0, '1.ab'] == 0.8613
        assert results.loc[0, '1.bb'] == 0.1371

        assert results.loc[0, '300.aa'] == 0.0234
        assert results.loc[0, '300.ab'] == 0.0148
        assert results.loc[0, '300.bb'] == 0.9618

        assert results.loc[1, '1.aa'] == 0.9619
        assert results.loc[1, '1.ab'] == 0.0015
        assert results.loc[1, '1.bb'] == 0.0366

        assert results.loc[1, '300.aa'] == 0.0185
        assert results.loc[1, '300.ab'] == 0.1408
        assert results.loc[1, '300.bb'] == 0.8407

        pos_values = results['pos'].unique()
        assert len(pos_values) == 2
        assert results.loc[0, 'pos'] == 6192
        assert results.loc[1, 'pos'] == 10750
示例#3
0
    def test_genotype_temp_files_removed_in_server_side(self):
        # Prepare
        shutil.rmtree('/tmp/ukbrest2tmp/', ignore_errors=True)
        genoq = GenoQuery(get_repository_path('example01'),
                          tmpdir='/tmp/ukbrest2tmp/')

        # Configure
        app.app.config['TESTING'] = True
        app.app.config['genoquery'] = genoq
        test_client = app.app.test_client()

        # Run
        response = test_client.get(
            '/ukbrest/api/v1.0/genotype/1/positions/100/276')

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        assert os.path.isdir('/tmp/ukbrest2tmp/')
        assert len(os.listdir('/tmp/ukbrest2tmp/')) == 0
示例#4
0
    def setUp(self,
              data_dir='example01',
              bgen_names='chr{:d}impv1.bgen',
              bgenix_path='bgenix',
              user_pass_line=None):
        super(TestRestApiGenotype, self).setUp()

        # Load data
        genoq = GenoQuery(get_repository_path(data_dir),
                          bgen_names=bgen_names,
                          bgenix_path=bgenix_path)

        # Configure
        app.app.config['testing'] = True
        app.app.config['auth'] = None
        app.app.config['genoquery'] = genoq

        if user_pass_line is not None:
            f = tempfile.NamedTemporaryFile(delete=False)
            f.close()

            with open(f.name, 'w') as fi:
                fi.write(user_pass_line)

            ph = PasswordHasher(f.name, method='pbkdf2:sha256')
            app.app.config['auth'] = ph.setup_http_basic_auth()

        self.app = app.app.test_client()
示例#5
0
def setup_app(app, ph):
    # Add GenoQuery object
    genoq = GenoQuery(**config.get_genoquery_parameters())
    app.config.update({'genoquery': genoq})

    # Add Pheno2SQL object
    p2sql = Pheno2SQL(**config.get_pheno2sql_parameters())
    app.config.update({'pheno2sql': p2sql})

    # Add auth object
    auth = ph.setup_http_basic_auth()
    app.config.update({'auth': auth})
示例#6
0
    def test_query_incl_range_temp_directory(self):
        # prepare
        shutil.rmtree('/tmp/ukbrest_different/', ignore_errors=True)
        genoq = GenoQuery(get_repository_path('example01'),
                          tmpdir='/tmp/ukbrest_different/')

        # run
        bgen_file = genoq.get_incl_range(chr=1, start=100, stop=276)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        assert isdir('/tmp/ukbrest_different/')
        assert len(os.listdir('/tmp/ukbrest_different/')) == 1
示例#7
0
    from ukbrest import config
    from ukbrest.common.utils.misc import update_parameters_from_args, parameter_empty

    logger = config.logger
    parser = config.get_argparse_arguments()

    args = parser.parse_args()

    # GenoQuery
    genoq_parameters = config.get_genoquery_parameters()
    genoq_parameters = update_parameters_from_args(genoq_parameters, args)

    if parameter_empty(genoq_parameters, 'genotype_path'):
        logger.warning('--genotype-path missing')

    genoq = GenoQuery(**genoq_parameters)
    app.config.update({'genoquery': genoq})

    # Pheno2SQL
    pheno2sql_parameters = config.get_pheno2sql_parameters()
    pheno2sql_parameters = update_parameters_from_args(pheno2sql_parameters,
                                                       args)

    if parameter_empty(pheno2sql_parameters, 'db_uri'):
        parser.error('--db-uri missing')

    p2sql = Pheno2SQL(**pheno2sql_parameters)

    app.config.update({'pheno2sql': p2sql})

    ph = PasswordHasher(args.users_file, method='pbkdf2:sha256')
示例#8
0
    def test_query_incl_range_lower_and_upper_limits_at_end(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_range(chr=1, start=18058, stop=18389)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs246'
        assert results.loc[1, 'rsid'] == 'rs247'
        assert results.loc[2, 'rsid'] == 'rs248'
        assert results.loc[3, 'rsid'] == 'rs249'
        assert results.loc[4, 'rsid'] == 'rs250'

        assert results.loc[0, 'allele1'] == 'C'
        assert results.loc[0, 'allele2'] == 'A'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'G'
        assert results.loc[2, 'allele2'] == 'C'

        assert results.loc[3, 'allele1'] == 'G'
        assert results.loc[3, 'allele2'] == 'A'

        assert results.loc[4, 'allele1'] == 'T'
        assert results.loc[4, 'allele2'] == 'C'

        assert results.loc[0, '1.aa'] == 0.0537
        assert results.loc[0, '1.ab'] == 0.9160
        assert results.loc[0, '1.bb'] == 0.0302

        assert results.loc[1, '2.aa'] == 0.0698
        assert results.loc[1, '2.ab'] == 0.9116
        assert results.loc[1, '2.bb'] == 0.0186

        assert results.loc[2, '300.aa'] == 0.0826
        assert results.loc[2, '300.ab'] == 0.0316
        assert results.loc[2, '300.bb'] == 0.8858

        assert results.loc[3, '299.aa'] == 0.7988
        assert results.loc[3, '299.ab'] == 0.1666
        assert results.loc[3, '299.bb'] == 0.0346

        assert results.loc[4, '150.aa'] == 0.0773
        assert results.loc[4, '150.ab'] == 0.8683
        assert results.loc[4, '150.bb'] == 0.0544

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 18058
        assert results.loc[1, 'pos'] == 18139
        assert results.loc[2, 'pos'] == 18211
        assert results.loc[3, 'pos'] == 18294
        assert results.loc[4, 'pos'] == 18389
示例#9
0
    def test_query_incl_rsids_using_file(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))
        # rsids are not ordered in the file, but they should be returned ordered
        rsids_file = get_repository_path('example01/rsids01.txt')

        # run
        bgen_file = genoq.get_incl_rsids(2, [rsids_file])

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs2000000'
        assert results.loc[1, 'rsid'] == 'rs2000020'
        assert results.loc[2, 'rsid'] == 'rs2000079'
        assert results.loc[3, 'rsid'] == 'rs2000138'
        assert results.loc[4, 'rsid'] == 'rs2000149'

        assert results.loc[0, 'allele1'] == 'A'
        assert results.loc[0, 'allele2'] == 'G'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'A'

        assert results.loc[3, 'allele1'] == 'A'
        assert results.loc[3, 'allele2'] == 'G'

        assert results.loc[4, 'allele1'] == 'G'
        assert results.loc[4, 'allele2'] == 'T'

        assert results.loc[0, '1.aa'] == 0.9440
        assert results.loc[0, '1.ab'] == 0.0298
        assert results.loc[0, '1.bb'] == 0.0262

        assert results.loc[1, '2.aa'] == 0.1534
        assert results.loc[1, '2.ab'] == 0.7249
        assert results.loc[1, '2.bb'] == 0.1218

        assert results.loc[2, '3.aa'] == 0.9357
        assert results.loc[2, '3.ab'] == 0.0047
        assert results.loc[2, '3.bb'] == 0.0596

        assert results.loc[3, '1.aa'] == 0.8246
        assert results.loc[3, '1.ab'] == 0.0686
        assert results.loc[3, '1.bb'] == 0.1068

        assert results.loc[4, '2.aa'] == 0.0137
        assert results.loc[4, '2.ab'] == 0.0953
        assert results.loc[4, '2.bb'] == 0.8909

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 1623
        assert results.loc[2, 'pos'] == 5925
        assert results.loc[3, 'pos'] == 10447
        assert results.loc[4, 'pos'] == 11226
示例#10
0
    def test_query_incl_range_using_file(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))
        # positions are not ordered in the file, but they should be returned ordered
        positions_file = get_repository_path('example01/positions01.txt')

        # run
        bgen_file = genoq.get_incl_range_from_file(2, positions_file)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs2000003'
        assert results.loc[1, 'rsid'] == 'rs2000008'
        assert results.loc[2, 'rsid'] == 'rs2000094'
        assert results.loc[3, 'rsid'] == 'rs2000118'
        assert results.loc[4, 'rsid'] == 'rs2000149'

        assert results.loc[0, 'allele1'] == 'C'
        assert results.loc[0, 'allele2'] == 'G'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'A'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'G'

        assert results.loc[3, 'allele1'] == 'T'
        assert results.loc[3, 'allele2'] == 'C'

        assert results.loc[4, 'allele1'] == 'G'
        assert results.loc[4, 'allele2'] == 'T'

        assert results.loc[0, '1.aa'] == 0.7889
        assert results.loc[0, '1.ab'] == 0.1538
        assert results.loc[0, '1.bb'] == 0.0573

        assert results.loc[1, '2.aa'] == 0.8776
        assert results.loc[1, '2.ab'] == 0.0670
        assert results.loc[1, '2.bb'] == 0.0554

        assert results.loc[2, '3.aa'] == 0.0553
        assert results.loc[2, '3.ab'] == 0.0939
        assert results.loc[2, '3.bb'] == 0.8509

        assert results.loc[3, '1.aa'] == 0.1219
        assert results.loc[3, '1.ab'] == 0.8459
        assert results.loc[3, '1.bb'] == 0.0323

        assert results.loc[4, '2.aa'] == 0.0137
        assert results.loc[4, '2.ab'] == 0.0953
        assert results.loc[4, '2.bb'] == 0.8909

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 300
        assert results.loc[1, 'pos'] == 661
        assert results.loc[2, 'pos'] == 7181
        assert results.loc[3, 'pos'] == 8949
        assert results.loc[4, 'pos'] == 11226