Exemplo n.º 1
0
    def test_genotype_temp_files_removed_in_server_side(self):
        # Prepare
        shutil.rmtree('/tmp/ukbrest2tmp/', ignore_errors=True)
        genoq = GenoQuery(get_repository_path('example01'),
                          tmpdir='/tmp/ukbrest2tmp/')

        # Configure
        app.app.config['TESTING'] = True
        app.app.config['genoquery'] = genoq
        test_client = app.app.test_client()

        # Run
        response = test_client.get(
            '/ukbrest/api/v1.0/genotype/1/positions/100/276')

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        assert os.path.isdir('/tmp/ukbrest2tmp/')
        assert len(os.listdir('/tmp/ukbrest2tmp/')) == 0
Exemplo n.º 2
0
    def test_process_users_file_file_does_not_exist_test00(self):
        # prepare
        users_file = get_repository_path('no/existing/file/here.txt')

        # run
        ph = PasswordHasher(users_file, method='pbkdf2:sha256')
        ph.process_users_file()
Exemplo n.º 3
0
    def test_process_users_file_one_password_hashed_rest_not_test01(self):
        # prepare
        orig_user_file = get_repository_path('wsgi/test01/users.txt')
        users_file = orig_user_file + '.bak'
        copyfile(orig_user_file, users_file)

        orig_users = self.load_data(orig_user_file)

        # run
        ph = PasswordHasher(users_file, method='pbkdf2:sha256')
        ph.process_users_file()

        # evaluate
        assert os.path.isfile(users_file)

        users = self.load_data(users_file)

        assert len(users) == 3
        for user, password in users.items():
            assert user in orig_users.keys(), user

            if user != 'adams':
                assert password != orig_users[user], user + ' / ' + password + ' / ' + orig_users[user]
            else:
                assert password == users[user], user +  password + ' / ' + users[user]

            assert len(password) == 93, len(password)

        os.remove(users_file)
Exemplo n.º 4
0
    def test_process_users_file_already_hashed_test00(self):
        # prepare
        orig_user_file = get_repository_path('wsgi/test00/users.txt')
        users_file = orig_user_file + '.bak'
        copyfile(orig_user_file, users_file)

        orig_users = self.load_data(orig_user_file)
        ph = PasswordHasher(users_file, method='pbkdf2:sha256')
        ph.process_users_file()
        users = self.load_data(users_file)

        # run
        ph = PasswordHasher(users_file, method='pbkdf2:sha256')
        ph.process_users_file()

        # evaluate
        assert os.path.isfile(users_file)

        new_users = self.load_data(users_file)

        assert len(users) == 3
        for user, password in new_users.items():
            assert user in orig_users.keys(), user
            assert password == users[user], password + ' / ' + users[user]
            assert len(password) == 93, len(password)

        os.remove(users_file)
Exemplo n.º 5
0
    def setUp(self,
              data_dir='example01',
              bgen_names='chr{:d}impv1.bgen',
              bgenix_path='bgenix',
              user_pass_line=None):
        super(TestRestApiGenotype, self).setUp()

        # Load data
        genoq = GenoQuery(get_repository_path(data_dir),
                          bgen_names=bgen_names,
                          bgenix_path=bgenix_path)

        # Configure
        app.app.config['testing'] = True
        app.app.config['auth'] = None
        app.app.config['genoquery'] = genoq

        if user_pass_line is not None:
            f = tempfile.NamedTemporaryFile(delete=False)
            f.close()

            with open(f.name, 'w') as fi:
                fi.write(user_pass_line)

            ph = PasswordHasher(f.name, method='pbkdf2:sha256')
            app.app.config['auth'] = ph.setup_http_basic_auth()

        self.app = app.app.test_client()
Exemplo n.º 6
0
    def test_get_iterator_repeated_variant_positions(self):
        # Prepare
        bgen_dosage = BGENDosage(
            get_repository_path('set06_repeated_positions/chr1impv1.bgen'))

        # Run
        all_items = list(bgen_dosage.items(n_rows_cached=5))
        assert len(all_items) == 11, len(all_items)

        # snp 1
        assert all_items[0].chr == 1
        assert all_items[0].position == 100
        assert all_items[0].allele0 == 'T'
        assert all_items[0].allele1 == 'A'
        assert all_items[0].rsid == 'rs1'
        assert all_items[0].dosages.shape == (20, )
        assert truncate(all_items[0].dosages[0]) == truncate(
            np.dot([0.06817, 0.27690, 0.65493], [0, 1, 2])) == 1.5867
        assert truncate(all_items[0].dosages[19]) == truncate(
            np.dot([0.00219, 0.08983, 0.90798], [0, 1, 2])) == 1.9057

        # snp 5
        assert all_items[4].chr == 1
        assert all_items[4].position == 418
        assert all_items[4].allele0 == 'T'
        assert all_items[4].allele1 == 'A'
        assert all_items[4].rsid == 'rs5'
        assert all_items[4].dosages.shape == (20, )
        assert truncate(all_items[4].dosages[0]) == truncate(
            np.dot([0.09158, 0.16910, 0.73933], [0, 1, 2])) == 1.6477
        assert truncate(all_items[4].dosages[1]) == truncate(
            np.dot([0.09820, 0.09934, 0.80246], [0, 1, 2])) == 1.7042
        assert truncate(all_items[4].dosages[19]) == truncate(
            np.dot([0.02833, 0.93189, 0.03978], [0, 1, 2])) == 1.0114

        # snp 6
        assert all_items[5].chr == 1
        assert all_items[5].position == 418
        assert all_items[5].allele0 == 'T'
        assert all_items[5].allele1 == 'C'
        assert all_items[5].rsid == 'rs5'
        assert all_items[5].dosages.shape == (20, )
        assert truncate(all_items[5].dosages[0]) == truncate(
            np.dot([0.00598, 0.02878, 0.96524], [0, 1, 2])) == 1.9592
        assert truncate(all_items[5].dosages[1]) == truncate(
            np.dot([0.01553, 0.14800, 0.83647], [0, 1, 2])) == 1.8209
        assert truncate(all_items[5].dosages[19]) == truncate(
            np.dot([0.08347, 0.02509, 0.89144], [0, 1, 2])) == 1.8079

        # snp last
        assert all_items[10].chr == 1
        assert all_items[10].position == 839
        assert all_items[10].allele0 == 'G'
        assert all_items[10].allele1 == 'A'
        assert all_items[10].rsid == 'rs10'
        assert all_items[10].dosages.shape == (20, )
        assert truncate(all_items[10].dosages[0]) == truncate(
            np.dot([0.03161, 0.82957, 0.13882], [0, 1, 2])) == 1.1072
        assert truncate(all_items[10].dosages[19]) == truncate(
            np.dot([0.96104, 0.03167, 0.00729], [0, 1, 2])) == 0.0462
Exemplo n.º 7
0
    def test_genotype_rsids_using_file_http_auth_with_credentials(self):
        # Prepare
        self.setUp(user_pass_line='user: thepassword2')

        rsids_file = get_repository_path('example01/rsids01.txt')

        # Run
        response = self.app.post(
            '/ukbrest/api/v1.0/genotype/2/rsids',
            data={'file': (open(rsids_file, 'rb'), rsids_file)},
            headers=self._get_http_basic_auth_header('user', 'thepassword2'),
        )

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5
Exemplo n.º 8
0
    def test_init(self):
        # Prepare
        # Run
        bgen_dosage = BGENDosage(get_repository_path('set00/chr1impv1.bgen'))

        # Validate
        assert bgen_dosage is not None
Exemplo n.º 9
0
    def test_postload_load_samples_data_no_eid_column(self):
        # prepare
        directory = get_repository_path('postloader/samples_data03')

        # run
        pl = Postloader(POSTGRESQL_ENGINE)
        pl.load_samples_data(directory)

        # Validate
        db_engine = create_engine(POSTGRESQL_ENGINE)

        # samplesqc
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('samplesqc'), db_engine)

        assert not table.iloc[0, 0]

        # relatedness
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('relatedness'), db_engine)

        assert not table.iloc[0, 0]
Exemplo n.º 10
0
    def test_performance_get_iterator_with_cache(self):
        # measure time with no cache
        bgen_dosage = BGENDosage(get_repository_path('set00/chr2impv1.bgen'))

        start_time = time()
        no_cache_results = list(bgen_dosage.items(n_rows_cached=1))
        no_cache_time = time() - start_time

        # measure time with cache
        bgen_dosage = BGENDosage(get_repository_path('set00/chr2impv1.bgen'))

        start_time = time()
        cache_results = list(bgen_dosage.items(n_rows_cached=200))
        cache_time = time() - start_time

        assert len(no_cache_results) == len(cache_results)
        assert cache_time * 3.0 <= no_cache_time, (cache_time, no_cache_time)
Exemplo n.º 11
0
    def test_postload_samples_data_check_constrains_exist(self):
        # prepare
        directory = get_repository_path('postloader/samples_data04')

        # run
        pl = Postloader(POSTGRESQL_ENGINE)
        pl.load_samples_data(directory,
                             identifier_columns={
                                 'relatedness.txt': 'ID1',
                                 'samplesqc.txt': 'ID',
                             },
                             separators={
                                 'relatedness.txt': '\t',
                                 'samplesqc.txt': ',',
                             })

        # Validate
        ## Check samplesqc table exists
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('samplesqc'), create_engine(POSTGRESQL_ENGINE))

        assert table.iloc[0, 0]

        # primary key
        constraint_sql = self._get_table_contrains('samplesqc',
                                                   relationship_query='pk_%%')
        constraints_results = pd.read_sql(constraint_sql,
                                          create_engine(POSTGRESQL_ENGINE))
        assert constraints_results is not None
        assert not constraints_results.empty
        columns = constraints_results['column_name'].tolist()
        assert len(columns) == 1
        assert 'eid' in columns

        ## Check relatedness table exists
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('relatedness'), create_engine(POSTGRESQL_ENGINE))

        assert table.iloc[0, 0]

        # primary key
        constraint_sql = self._get_table_contrains('relatedness',
                                                   relationship_query='pk_%%')
        constraints_results = pd.read_sql(constraint_sql,
                                          create_engine(POSTGRESQL_ENGINE))
        assert constraints_results is not None
        assert not constraints_results.empty
        columns = constraints_results['column_name'].tolist()
        assert len(columns) == 1
        assert 'eid' in columns
Exemplo n.º 12
0
    def test_basic_call(self):
        os.environ[GENOTYPE_PATH_ENV] = get_repository_path('pheno2sql/example12/')
        os.environ[PHENOTYPE_PATH] = get_repository_path('pheno2sql/example12/')
        os.environ[DB_URI_ENV] = POSTGRESQL_ENGINE
        os.environ[GENOTYPE_BGEN_SAMPLE] = 'impv2.sample'
        os.environ[LOAD_DATA_VACUUM] = "yes"

        _setup_genotype_path()
        _setup_phenotype_path()
        _setup_db_uri()

        options = [
            'python',
            self.load_data_path,
        ]

        return_code = call(options)
        assert return_code == 0
Exemplo n.º 13
0
    def test_postload_codings_table_many_tab_characters_and_na(self):
        # prepare
        directory = get_repository_path('postloader/codings04_many_tabs')

        # run
        pl = Postloader(POSTGRESQL_ENGINE)
        pl.load_codings(directory)

        # validate
        ## Check samples table exists
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('codings'), create_engine(POSTGRESQL_ENGINE))

        assert table.iloc[0, 0]

        codings = pd.read_sql(
            "select * from codings order by data_coding, coding",
            create_engine(POSTGRESQL_ENGINE))
        assert codings is not None
        expected_columns = ['data_coding', 'coding', 'meaning']
        assert len(codings.columns) >= len(expected_columns)
        assert all(x in codings.columns for x in expected_columns)

        assert not codings.empty
        assert codings.shape[0] == 5

        cidx = 0
        assert codings.loc[cidx, 'data_coding'] == 7
        assert codings.loc[cidx, 'coding'] == '0'
        assert codings.loc[cidx, 'meaning'] == 'No'

        cidx += 1
        assert codings.loc[cidx, 'data_coding'] == 7
        assert codings.loc[cidx, 'coding'] == '1'
        assert codings.loc[cidx, 'meaning'] == 'Yes'

        cidx += 1
        assert codings.loc[cidx, 'data_coding'] == 9
        assert codings.loc[cidx, 'coding'] == '0'
        assert codings.loc[cidx, 'meaning'] == 'Female'

        cidx += 1
        assert codings.loc[cidx, 'data_coding'] == 9
        assert codings.loc[cidx, 'coding'] == '1'
        assert codings.loc[cidx, 'meaning'] == 'Male'

        cidx += 1
        assert codings.loc[cidx, 'data_coding'] == 9
        assert codings.loc[cidx, 'coding'] == '2'
        assert codings.loc[cidx, 'meaning'] == 'N/A'
Exemplo n.º 14
0
    def test_postload_load_samples_data_one_file(self):
        # prepare
        directory = get_repository_path('postloader/samples_data01')

        # run
        pl = Postloader(POSTGRESQL_ENGINE)
        pl.load_samples_data(directory)

        # Validate
        db_engine = create_engine(POSTGRESQL_ENGINE)

        ## Check samples table exists
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('samplesqc'), db_engine)

        assert table.iloc[0, 0]

        samplesqc = pd.read_sql("select * from samplesqc order by eid asc",
                                create_engine(POSTGRESQL_ENGINE),
                                index_col='eid')
        assert samplesqc is not None
        expected_columns = [
            'ccolumn_name_0_0', 'canothercolumn_0_0', 'cthird_column_0_0',
            'cother_measure_col_umn_0_0'
        ]
        assert len(samplesqc.columns) == len(expected_columns)
        assert all(x in samplesqc.columns for x in expected_columns)

        assert not samplesqc.empty
        assert samplesqc.shape[0] == 4

        assert samplesqc.loc[10, 'ccolumn_name_0_0'] == 'UKBB'
        assert samplesqc.loc[10, 'canothercolumn_0_0'] == 'Batch'
        assert samplesqc.loc[10, 'cthird_column_0_0'] == 'SomeValue'
        assert samplesqc.loc[10, 'cother_measure_col_umn_0_0'] == 8.33992

        assert samplesqc.loc[20, 'ccolumn_name_0_0'] == 'Other'
        assert samplesqc.loc[20, 'canothercolumn_0_0'] == 'Some'
        assert samplesqc.loc[20, 'cthird_column_0_0'] == 'AnotherValue'
        assert samplesqc.loc[20, 'cother_measure_col_umn_0_0'] == -772.1234

        assert samplesqc.loc[30, 'ccolumn_name_0_0'] == 'Other12'
        assert samplesqc.loc[30, 'canothercolumn_0_0'] == 'Some12'
        assert samplesqc.loc[30, 'cthird_column_0_0'] == 'AnotherValue12'
        assert samplesqc.loc[30, 'cother_measure_col_umn_0_0'] == -0.000001234

        assert samplesqc.loc[2222240, 'ccolumn_name_0_0'] == 'Other13'
        assert samplesqc.loc[2222240, 'canothercolumn_0_0'] == 'Some13'
        assert samplesqc.loc[2222240, 'cthird_column_0_0'] == 'AnotherValue13'
        assert samplesqc.loc[2222240, 'cother_measure_col_umn_0_0'] == 0.051234
Exemplo n.º 15
0
    def test_verify_password_users_file_does_not_exist_test01(self):
        # prepare
        users_file = get_repository_path('no/existing/file/here.txt')

        ph = PasswordHasher(users_file, method='pbkdf2:sha256')
        ph.process_users_file()

        # evaluate
        assert not ph.verify_password('milton', 'whatever')
        assert not ph.verify_password('john', 'mypassword')
        assert not ph.verify_password('adams', 'anotherpassword')
        assert not ph.verify_password('james', 'mypassword')
Exemplo n.º 16
0
    def test_verify_password_users_file_empty_test01(self):
        # prepare
        orig_user_file = get_repository_path('wsgi/test02/users.txt')
        users_file = orig_user_file + '.bak'

        ph = PasswordHasher(users_file, method='pbkdf2:sha256')
        ph.process_users_file()

        # evaluate
        assert not ph.verify_password('milton', 'whatever')
        assert not ph.verify_password('john', 'mypassword')
        assert not ph.verify_password('adams', 'anotherpassword')
        assert not ph.verify_password('james', 'mypassword')
Exemplo n.º 17
0
    def test_genotype_rsids_using_file_http_auth_no_credentials(self):
        # Prepare
        self.setUp(user_pass_line='user: thepassword2')

        rsids_file = get_repository_path('example01/rsids01.txt')

        # Run
        response = self.app.post(
            '/ukbrest/api/v1.0/genotype/2/rsids',
            data={'file': (open(rsids_file, 'rb'), rsids_file)})

        # Validate
        assert response.status_code == 401, response.status_code
Exemplo n.º 18
0
    def test_query_incl_range_upper_limit_only(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_range(chr=1, stop=276)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 3
        assert results.loc[0, 'rsid'] == 'rs1'
        assert results.loc[1, 'rsid'] == 'rs2'
        assert results.loc[2, 'rsid'] == 'rs3'

        assert results.loc[0, 'allele1'] == 'G'
        assert results.loc[0, 'allele2'] == 'A'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'A'

        assert results.loc[0, '1.aa'] == 0.7491
        assert results.loc[0, '1.ab'] == 0.0133
        assert results.loc[0, '1.bb'] == 0.2376

        assert results.loc[1, '2.aa'] == 0.8654
        assert results.loc[1, '2.ab'] == 0.1041
        assert results.loc[1, '2.bb'] == 0.0306

        assert results.loc[2, '300.aa'] == 0.0828
        assert results.loc[2, '300.ab'] == 0.7752
        assert results.loc[2, '300.bb'] == 0.1421

        pos_values = results['pos'].unique()
        assert len(pos_values) == 3
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 181
        assert results.loc[2, 'pos'] == 276
Exemplo n.º 19
0
    def test_query_incl_rsids_multiple(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_rsids(2, ['rs2000082', 'rs2000142'])

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 2

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 2
        assert results.loc[0, 'rsid'] == 'rs2000082'
        assert results.loc[1, 'rsid'] == 'rs2000142'

        assert results.loc[0, 'allele1'] == 'A'
        assert results.loc[0, 'allele2'] == 'T'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'G'

        assert results.loc[0, '1.aa'] == 0.0016
        assert results.loc[0, '1.ab'] == 0.8613
        assert results.loc[0, '1.bb'] == 0.1371

        assert results.loc[0, '300.aa'] == 0.0234
        assert results.loc[0, '300.ab'] == 0.0148
        assert results.loc[0, '300.bb'] == 0.9618

        assert results.loc[1, '1.aa'] == 0.9619
        assert results.loc[1, '1.ab'] == 0.0015
        assert results.loc[1, '1.bb'] == 0.0366

        assert results.loc[1, '300.aa'] == 0.0185
        assert results.loc[1, '300.ab'] == 0.1408
        assert results.loc[1, '300.bb'] == 0.8407

        pos_values = results['pos'].unique()
        assert len(pos_values) == 2
        assert results.loc[0, 'pos'] == 6192
        assert results.loc[1, 'pos'] == 10750
Exemplo n.º 20
0
    def test_genotype_positions_different_file_naming_chr1_wrong_bgen_name(
            self):
        # Prepare
        data_dir = get_repository_path('example02')

        if os.path.isdir(data_dir):
            shutil.rmtree(data_dir)

        shutil.copytree(get_repository_path('example01'), data_dir)

        shutil.move(os.path.join(data_dir, 'chr1impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr1.bgen'))
        shutil.move(os.path.join(data_dir, 'chr1impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr1.bgen.bgi'))
        shutil.move(os.path.join(data_dir, 'chr2impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr2.bgen'))
        shutil.move(os.path.join(data_dir, 'chr2impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr2.bgen.bgi'))
        shutil.move(os.path.join(data_dir, 'chr3impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr3.bgen'))
        shutil.move(os.path.join(data_dir, 'chr3impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr3.bgen.bgi'))

        self.setUp(data_dir='example02', bgen_names='wrong.bgen')

        # Run
        response = self.app.get(
            '/ukbrest/api/v1.0/genotype/1/positions/100/276')

        # Validate
        assert response.status_code == 400, response.status_code
        data = json.load(io.StringIO(response.data.decode('utf-8')))
        assert 'message' in data, data
        assert 'wrong.bgen' in data['message'], data['message']

        shutil.rmtree(data_dir)
Exemplo n.º 21
0
    def test_get_last_row(self):
        # Prepare
        bgen_dosage = BGENDosage(get_repository_path('set00/chr1impv1.bgen'))

        # Run
        dosage_row = bgen_dosage.get_row(-1)

        assert dosage_row is not None

        assert hasattr(dosage_row, 'chr')
        assert dosage_row.chr == 1

        assert hasattr(dosage_row, 'rsid')
        assert dosage_row.rsid == 'rs250'

        assert hasattr(dosage_row, 'position')
        assert dosage_row.position == 18389

        assert hasattr(dosage_row, 'allele0')
        assert dosage_row.allele0 == 'T'

        assert hasattr(dosage_row, 'allele1')
        assert dosage_row.allele1 == 'C'

        # assert hasattr(dosage_row, 'maf')
        # assert dosage_row.maf == 0.4722

        assert hasattr(dosage_row, 'dosages')
        assert dosage_row.dosages is not None
        assert hasattr(dosage_row.dosages, 'shape')
        assert len(dosage_row.dosages) == 300

        # 1
        assert round(dosage_row.dosages[0], 4) == round(
            np.dot([0.04713, 0.94817, 0.00470], [0, 1, 2]),
            4) == 0.9576, dosage_row.dosages[0]
        # NA (plink)
        assert round(dosage_row.dosages[2], 4) == round(
            np.dot([0.07355, 0.50369, 0.42276], [0, 1, 2]),
            4) == 1.3492, dosage_row.dosages[2]
        # 2
        assert round(dosage_row.dosages[8], 4) == round(
            np.dot([0.01488, 0.07935, 0.90576], [0, 1, 2]),
            4) == 1.8909, dosage_row.dosages[8]
        # 0
        assert round(dosage_row.dosages[12], 4) == round(
            np.dot([0.95152, 0.02008, 0.02840], [0, 1, 2]),
            4) == 0.0769, dosage_row.dosages[12]
Exemplo n.º 22
0
    def test_get_first_row(self):
        # Prepare
        bgen_dosage = BGENDosage(get_repository_path('set00/chr1impv1.bgen'))

        # Run
        dosage_row = bgen_dosage.get_row(0)

        assert dosage_row is not None

        assert hasattr(dosage_row, 'chr')
        assert dosage_row.chr == 1

        assert hasattr(dosage_row, 'rsid')
        assert dosage_row.rsid == 'rs1'

        assert hasattr(dosage_row, 'position')
        assert dosage_row.position == 100

        assert hasattr(dosage_row, 'allele0')
        assert dosage_row.allele0 == 'G'

        assert hasattr(dosage_row, 'allele1')
        assert dosage_row.allele1 == 'A'

        # assert hasattr(dosage_row, 'maf')
        # assert dosage_row.maf == 0.4894

        assert hasattr(dosage_row, 'dosages')
        assert dosage_row.dosages is not None
        assert hasattr(dosage_row.dosages, 'shape')
        assert len(dosage_row.dosages) == 300

        # NA
        assert round(dosage_row.dosages[0], 4) == round(
            np.dot([0.74909, 0.01333, 0.23758], [0, 1, 2]),
            4) == 0.4885, dosage_row.dosages[0]
        # 1
        assert round(dosage_row.dosages[2], 4) == round(
            np.dot([0.05437, 0.91567, 0.02996], [0, 1, 2]),
            4) == 0.9756, dosage_row.dosages[2]
        # 2
        assert round(dosage_row.dosages[3], 4) == round(
            np.dot([0.00650, 0.02577, 0.96773], [0, 1, 2]),
            4) == 1.9612, dosage_row.dosages[3]
        # 0
        assert round(dosage_row.dosages[5], 4) == round(
            np.dot([0.95803, 0.03895, 0.00302], [0, 1, 2]),
            4) == 0.0450, dosage_row.dosages[5]
Exemplo n.º 23
0
    def test_postload_codings_check_constrains_exist(self):
        # prepare
        directory = get_repository_path('postloader/codings03_tree')

        # run
        pl = Postloader(POSTGRESQL_ENGINE)
        pl.load_codings(directory)

        # Validate
        ## Check samples table exists
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('codings'), create_engine(POSTGRESQL_ENGINE))

        assert table.iloc[0, 0]

        # primary key
        constraint_sql = self._get_table_contrains('codings',
                                                   relationship_query='pk_%%')
        constraints_results = pd.read_sql(constraint_sql,
                                          create_engine(POSTGRESQL_ENGINE))
        assert constraints_results is not None
        assert not constraints_results.empty
        columns = constraints_results['column_name'].tolist()
        assert len(columns) == 3
        assert 'data_coding' in columns
        assert 'coding' in columns
        assert 'meaning' in columns

        # index on 'event' column
        constraint_sql = self._get_table_contrains('codings',
                                                   relationship_query='ix_%%')
        constraints_results = pd.read_sql(constraint_sql,
                                          create_engine(POSTGRESQL_ENGINE))
        assert constraints_results is not None
        assert not constraints_results.empty
        columns = constraints_results['column_name'].tolist()
        assert len(columns) == 5
        assert 'data_coding' in columns
        assert 'coding' in columns
        assert 'node_id' in columns
        assert 'parent_id' in columns
        assert 'selectable' in columns
Exemplo n.º 24
0
    def test_get_iterator(self):
        # Prepare
        bgen_dosage = BGENDosage(get_repository_path('set00/chr2impv1.bgen'))

        # Run
        all_items = list(bgen_dosage.items(n_rows_cached=10))
        assert len(all_items) == 150

        # snp 1
        assert all_items[0].chr == 2
        assert all_items[0].position == 100
        assert all_items[0].allele0 == 'A'
        assert all_items[0].allele1 == 'G'
        assert all_items[0].rsid == 'rs2000000'
        assert all_items[0].dosages.shape == (300, )
        assert truncate(all_items[0].dosages[0]) == truncate(
            np.dot([0.94401, 0.02976, 0.02623], [0, 1, 2])) == 0.0822
        assert truncate(all_items[0].dosages[2]) == truncate(
            np.dot([0.00658, 0.92760, 0.06582], [0, 1, 2])) == 1.0592

        # snp middle
        assert all_items[99].chr == 2
        assert all_items[99].position == 7516
        assert all_items[99].allele0 == 'T'
        assert all_items[99].allele1 == 'A'
        assert all_items[99].rsid == 'rs2000099'
        assert all_items[99].dosages.shape == (300, )
        assert truncate(
            all_items[99].dosages[0]
        ) == 1.1071  # truncate(np.dot([0.03148, 0.82993, 0.13854], [0, 1, 2])) == 1.1070, truncate(all_items[99].dosages[0])
        assert truncate(all_items[99].dosages[5]) == truncate(
            np.dot([0.04327, 0.89103, 0.06570], [0, 1, 2])) == 1.0224

        # snp last
        assert all_items[149].chr == 2
        assert all_items[149].position == 11226
        assert all_items[149].allele0 == 'G'
        assert all_items[149].allele1 == 'T'
        assert all_items[149].rsid == 'rs2000149'
        assert all_items[149].dosages.shape == (300, )
        assert truncate(
            all_items[149].dosages[1]
        ) == 1.8772  # truncate(np.dot([0.01371, 0.09532, 0.89091], [0, 1, 2])) == 1.8771, truncate(all_items[149].dosages[1])
        assert truncate(
            all_items[149].dosages[2]
        ) == 1.7562  # truncate(np.dot([0.07391, 0.09597, 0.83011], [0, 1, 2])) == 1.7561, truncate(all_items[149].dosages[2])
Exemplo n.º 25
0
    def test_genotype_rsids_bgenix_not_in_path(self):
        # Prepare
        self.setUp(bgenix_path='/path/not/found/bgenix')

        rsids_file = get_repository_path('example01/rsids01.txt')

        # Run
        response = self.app.post(
            '/ukbrest/api/v1.0/genotype/2/rsids',
            data={'file': (open(rsids_file, 'rb'), rsids_file)})

        # Validate
        assert response.status_code == 400, response.status_code
        data = json.load(io.StringIO(response.data.decode('utf-8')))
        assert 'message' in data, data
        assert 'bgenix' in data['message'], data['message']
        assert '/path/not/found/bgenix' in data['message'], data['message']
Exemplo n.º 26
0
    def test_genotype_positions_using_file_wrong_format(self):
        # Prepare
        positions_file = get_repository_path('example01/positions01_bug.txt')

        # Run
        response = self.app.post(
            '/ukbrest/api/v1.0/genotype/2/positions',
            data={'file': (open(positions_file, 'rb'), positions_file)})

        # Validate
        assert response.status_code == 400, response.status_code
        data = json.load(io.StringIO(response.data.decode('utf-8')))

        assert 'message' in data, data
        assert 'bgenix' in data['message'], data['message']

        assert 'output' in data, data
        assert 'Welcome to bgenix' in data['output'], data['output']
        assert 'spec="02:8949/8949"' in data['output'], data['output']
Exemplo n.º 27
0
    def test_get_last_row_other_chromosome(self):
        # Prepare
        bgen_dosage = BGENDosage(get_repository_path('set00/chr2impv1.bgen'))

        # Run
        dosage_row = bgen_dosage.get_row(-1)

        assert dosage_row is not None

        assert hasattr(dosage_row, 'chr')
        assert dosage_row.chr == 2

        assert hasattr(dosage_row, 'rsid')
        assert dosage_row.rsid == 'rs2000149'

        assert hasattr(dosage_row, 'position')
        assert dosage_row.position == 11226

        assert hasattr(dosage_row, 'allele0')
        assert dosage_row.allele0 == 'G'

        assert hasattr(dosage_row, 'allele1')
        assert dosage_row.allele1 == 'T'

        # assert hasattr(dosage_row, 'maf')
        # assert dosage_row.maf == 0.4722

        assert hasattr(dosage_row, 'dosages')
        assert dosage_row.dosages is not None
        assert hasattr(dosage_row.dosages, 'shape')
        assert len(dosage_row.dosages) == 300

        # 1
        assert round(dosage_row.dosages[0], 4) == round(
            np.dot([0.94620, 0.05350, 0.00030], [0, 1, 2]),
            4) == 0.0541, dosage_row.dosages[0]
        assert round(dosage_row.dosages[1], 4) == round(
            np.dot([0.01373, 0.09532, 0.89094], [0, 1, 2]),
            4) == 1.8772, dosage_row.dosages[1]
        assert round(dosage_row.dosages[299], 4) == round(
            np.dot([0.04675, 0.93974, 0.01351], [0, 1, 2]),
            4) == 0.9668, dosage_row.dosages[299]
Exemplo n.º 28
0
    def test_postload_codings_negative_coding(self):
        # prepare
        directory = get_repository_path('postloader/codings02_negative')

        # run
        pl = Postloader(POSTGRESQL_ENGINE)
        pl.load_codings(directory)

        # validate
        ## Check samples table exists
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('codings'), create_engine(POSTGRESQL_ENGINE))

        assert table.iloc[0, 0]

        codings = pd.read_sql(
            "select * from codings order by data_coding, coding",
            create_engine(POSTGRESQL_ENGINE))
        assert codings is not None
        expected_columns = ['data_coding', 'coding', 'meaning']
        assert len(codings.columns) >= len(expected_columns)
        assert all(x in codings.columns for x in expected_columns)

        assert not codings.empty
        assert codings.shape[0] == 2

        cidx = 0
        assert codings.loc[cidx, 'data_coding'] == 13
        assert codings.loc[cidx, 'coding'] == '-1'
        assert codings.loc[cidx, 'meaning'] == 'Date uncertain or unknown'

        cidx += 1
        assert codings.loc[cidx, 'data_coding'] == 13
        assert codings.loc[cidx, 'coding'] == '-3'
        assert codings.loc[cidx, 'meaning'] == 'Preferred not to answer'
Exemplo n.º 29
0
    def test_get_second_row(self):
        # Prepare
        bgen_dosage = BGENDosage(get_repository_path('set00/chr1impv1.bgen'))

        # Run
        dosage_row = bgen_dosage.get_row(1)

        assert dosage_row is not None

        assert hasattr(dosage_row, 'chr')
        assert dosage_row.chr == 1

        assert hasattr(dosage_row, 'rsid')
        assert dosage_row.rsid == 'rs2'

        assert hasattr(dosage_row, 'position')
        assert dosage_row.position == 181

        assert hasattr(dosage_row, 'allele0')
        assert dosage_row.allele0 == 'G'

        assert hasattr(dosage_row, 'allele1')
        assert dosage_row.allele1 == 'C'

        # assert hasattr(dosage_row, 'maf')
        # assert dosage_row.maf == 0.4894

        assert hasattr(dosage_row, 'dosages')
        assert dosage_row.dosages is not None
        assert hasattr(dosage_row.dosages, 'shape')
        assert len(dosage_row.dosages) == 300

        assert round(dosage_row.dosages[0], 4) == round(
            np.dot([0.75232, 0.11725, 0.13043], [0, 1, 2]),
            4) == 0.3781, dosage_row.dosages[0]
        assert round(
            dosage_row.dosages[299], 4
        ) == 1.8471  # round(np.dot([0.00937, 0.13421, 0.85642], [0, 1, 2]), 4) == 1.8471, dosage_row.dosages[299]
Exemplo n.º 30
0
    def test_postload_codings_vacuum(self):
        # prepare
        directory = get_repository_path('postloader/codings03_tree')

        # run
        pl = Postloader(POSTGRESQL_ENGINE)
        pl.load_codings(directory)

        # Validate
        db_engine = create_engine(POSTGRESQL_ENGINE)

        ## Check samples table exists
        table = pd.read_sql(
            """
            SELECT EXISTS (
                SELECT 1 FROM pg_tables
                WHERE schemaname = 'public' AND tablename = '{}'
            )""".format('codings'), db_engine)

        assert table.iloc[0, 0]

        vacuum_data = pd.DataFrame()
        query_count = 0

        # FIXME waits for vacuum to finish
        while vacuum_data.empty and query_count < 150:
            vacuum_data = pd.read_sql(
                """
                select relname, last_vacuum, last_analyze
                from pg_stat_user_tables
                where schemaname = 'public' and last_vacuum is not null and last_analyze is not null
            """, db_engine)
            query_count += 1

        assert vacuum_data is not None
        assert not vacuum_data.empty
        tables = vacuum_data['relname'].tolist()
        assert 'codings' in tables