Exemplo n.º 1
0
    def test_query_columns_with_dots(self):
        df = ff.load(self.tab2_file)

        conditions = ["TVC.counts < 3"]

        self.assertEqual(ff.dffilter(conditions, df).shape, (8, 99))

        df = ff.load(self.tab2_file)  # The filtering of df is always in place

        conditions = ["TVC.counts > 3"]

        self.assertEqual(ff.dffilter(conditions, df).shape, (1, 99))
Exemplo n.º 2
0
    def test_reverse_the_filter_condition_similar_words_and_or(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = [
            r'ExonicFunc.refGene not_contains \bsynonymous SNV|deletion'
        ]
        self.assertEqual(ff.dffilter(conditions, df).shape, (7, 99))
Exemplo n.º 3
0
    def test_reverse_the_filter_condition_similar_ords_and_or_json(self):
        conds = json.load(open(file_test("slashb.json")))
        df = ff.load(file_test("DOT.column.tab"))

        new_df = ff.dffilter(conds, df)

        self.assertEqual(new_df.shape, (4, 99))
Exemplo n.º 4
0
    def test_num_columns_that_fails_cast_to_str_coerced_into_nan(self):
        df = ff.load(self.tab4_file)

        conditions = ['CG46 != CG46']  # This is a trick to check if the column
        # has a NaN (NaN is not equal to itself)

        self.assertEqual(ff.dffilter(conditions, df).shape, (1, 76))
Exemplo n.º 5
0
    def test_DF_can_be_filtered_with_OR_string_fields(self):
        df = ff.load(self.tab_file)

        conditions = [
            'PopFreqMax < 0.01 | PopFreqMax > 0.99',
            'Func.refGene contains exonic|intronic'
        ]
        self.assertEqual(ff.dffilter(conditions, df).shape, (53, 151))
Exemplo n.º 6
0
    def test_DF_can_be_filtered_by_numeric_AND_string_conditions(self):
        df = ff.load(self.tab_file)

        conditions = ['PopFreqMax < 0.01', 'Func.refGene contains exonic']
        self.assertEqual(ff.dffilter(conditions, df).shape, (2, 151))
Exemplo n.º 7
0
    def test_DF_can_be_filtered_by_two_conditions(self):
        df = ff.load(self.tab_file)

        conditions = ['Ref == "A"', 'Func.refGene contains exonic']

        self.assertEqual(ff.dffilter(conditions, df).shape, (13, 151))
Exemplo n.º 8
0
    def test_DF_can_be_filtered_by_one_condition_query(self):
        df = ff.load(self.tab_file)

        self.assertEqual(ff.dffilter(['Ref == "G"'], df).shape, (60, 151))
Exemplo n.º 9
0
    def test_thousand_genomes_column_filtering(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = ['1000G_ALL > 0.2']
        self.assertEqual(ff.dffilter(conditions, df).shape, (1, 99))
Exemplo n.º 10
0
    def test_news_filter_2017_03(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = ['gnomAD_exome_ALL > 0.99']
        self.assertEqual(ff.dffilter(conditions, df).shape, (2, 99))
Exemplo n.º 11
0
    def test_reverse_the_filter_condition_multiple(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = ['ExonicFunc.refGene not_contains frameshift deletion']
        self.assertEqual(ff.dffilter(conditions, df).shape, (8, 99))
Exemplo n.º 12
0
    def test_reverse_the_filter_condition_single(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = ['Func.refGene not_contains intronic']
        self.assertEqual(ff.dffilter(conditions, df).shape, (4, 99))
Exemplo n.º 13
0
    def test_num_columns_with_commas(self):
        tab_file = join(dirname(__file__), "test_files", "floats_comma.tab")
        df = ff.load(tab_file)

        self.assertEqual(ff.dffilter(['ExAC_ALL <= 0.1'], df).shape, (5, 2))
Exemplo n.º 14
0
    def test_NaN_in_str_fields_dont_break_the_filter(self):
        df = ff.load(self.tab3_file)

        conditions = ['Func.refGene contains exonic|intronic']

        self.assertEqual(ff.dffilter(conditions, df).shape, (1, 76))
Exemplo n.º 15
0
    def test_multiple_weirdness_can_function(self):
        df = ff.load(self.tab2_file)

        conditions = ["TVC.counts < 3", "TVC.counts > 3"]

        self.assertEqual(ff.dffilter(conditions, df).shape, (0, 99))
Exemplo n.º 16
0
    def test_non_existent_columns_doesnt_break_code(self):
        df = ff.load(self.tab2_file)

        conditions = ["Imaginary < 3"]

        self.assertEqual(ff.dffilter(conditions, df).shape, (9, 99))