def test_query_columns_with_dots(self): df = ff.load(self.tab2_file) conditions = ["TVC.counts < 3"] self.assertEqual(ff.dffilter(conditions, df).shape, (8, 99)) df = ff.load(self.tab2_file) # The filtering of df is always in place conditions = ["TVC.counts > 3"] self.assertEqual(ff.dffilter(conditions, df).shape, (1, 99))
def test_reverse_the_filter_condition_similar_words_and_or(self): df = ff.load(file_test("DOT.column.tab")) conditions = [ r'ExonicFunc.refGene not_contains \bsynonymous SNV|deletion' ] self.assertEqual(ff.dffilter(conditions, df).shape, (7, 99))
def test_reverse_the_filter_condition_similar_ords_and_or_json(self): conds = json.load(open(file_test("slashb.json"))) df = ff.load(file_test("DOT.column.tab")) new_df = ff.dffilter(conds, df) self.assertEqual(new_df.shape, (4, 99))
def test_num_columns_that_fails_cast_to_str_coerced_into_nan(self): df = ff.load(self.tab4_file) conditions = ['CG46 != CG46'] # This is a trick to check if the column # has a NaN (NaN is not equal to itself) self.assertEqual(ff.dffilter(conditions, df).shape, (1, 76))
def test_DF_can_be_filtered_with_OR_string_fields(self): df = ff.load(self.tab_file) conditions = [ 'PopFreqMax < 0.01 | PopFreqMax > 0.99', 'Func.refGene contains exonic|intronic' ] self.assertEqual(ff.dffilter(conditions, df).shape, (53, 151))
def test_DF_can_be_filtered_by_numeric_AND_string_conditions(self): df = ff.load(self.tab_file) conditions = ['PopFreqMax < 0.01', 'Func.refGene contains exonic'] self.assertEqual(ff.dffilter(conditions, df).shape, (2, 151))
def test_DF_can_be_filtered_by_two_conditions(self): df = ff.load(self.tab_file) conditions = ['Ref == "A"', 'Func.refGene contains exonic'] self.assertEqual(ff.dffilter(conditions, df).shape, (13, 151))
def test_DF_can_be_filtered_by_one_condition_query(self): df = ff.load(self.tab_file) self.assertEqual(ff.dffilter(['Ref == "G"'], df).shape, (60, 151))
def test_thousand_genomes_column_filtering(self): df = ff.load(file_test("DOT.column.tab")) conditions = ['1000G_ALL > 0.2'] self.assertEqual(ff.dffilter(conditions, df).shape, (1, 99))
def test_news_filter_2017_03(self): df = ff.load(file_test("DOT.column.tab")) conditions = ['gnomAD_exome_ALL > 0.99'] self.assertEqual(ff.dffilter(conditions, df).shape, (2, 99))
def test_reverse_the_filter_condition_multiple(self): df = ff.load(file_test("DOT.column.tab")) conditions = ['ExonicFunc.refGene not_contains frameshift deletion'] self.assertEqual(ff.dffilter(conditions, df).shape, (8, 99))
def test_reverse_the_filter_condition_single(self): df = ff.load(file_test("DOT.column.tab")) conditions = ['Func.refGene not_contains intronic'] self.assertEqual(ff.dffilter(conditions, df).shape, (4, 99))
def test_num_columns_with_commas(self): tab_file = join(dirname(__file__), "test_files", "floats_comma.tab") df = ff.load(tab_file) self.assertEqual(ff.dffilter(['ExAC_ALL <= 0.1'], df).shape, (5, 2))
def test_NaN_in_str_fields_dont_break_the_filter(self): df = ff.load(self.tab3_file) conditions = ['Func.refGene contains exonic|intronic'] self.assertEqual(ff.dffilter(conditions, df).shape, (1, 76))
def test_multiple_weirdness_can_function(self): df = ff.load(self.tab2_file) conditions = ["TVC.counts < 3", "TVC.counts > 3"] self.assertEqual(ff.dffilter(conditions, df).shape, (0, 99))
def test_non_existent_columns_doesnt_break_code(self): df = ff.load(self.tab2_file) conditions = ["Imaginary < 3"] self.assertEqual(ff.dffilter(conditions, df).shape, (9, 99))