Exemplo n.º 1
0
 def test_search_missing_values(self, caplog):
     name = os.path.join(here, dir_data, "sample-missing-values.tsv")
     ds = wrapper.Dataset(name, "real location", error=0.01)
     ds.read_datafile()
     ds.search_missing_values()
     assert "Missing values found in column: colI" in caplog.text
     assert "Missing values found in column: colJ" in caplog.text
Exemplo n.º 2
0
 def test_clean_column_names(self, caplog):
     name = os.path.join(here, dir_data, "sample-column-names.tsv")
     ds = wrapper.Dataset(name, "real location", error=0.01)
     ds.read_datafile()
     ds.clean_column_names()
     assert "Column 'gene(name)' renamed to 'gene_name_'" in caplog.text
     assert "Column 'coléèà' renamed to 'col_'" in caplog.text
     assert "Column 'col[]()/' renamed to 'col_'" in caplog.text
Exemplo n.º 3
0
 def test_check_data_type_discrete_OK(self, caplog):
     name = os.path.join(here, dir_data, "sample-discrete.tsv")
     ds = wrapper.Dataset(name, "discrete")
     ds.read_datafile()
     ds.check_data_type()
     assert "10 rows and 3 columns" in caplog.text
     assert "Column 'colD': 2 different values" in caplog.text
     assert "Column 'colE': 3 different values" in caplog.text
Exemplo n.º 4
0
 def test_check_data_type_real_location_not_OK(self, caplog):
     name = os.path.join(here, dir_data, "sample-discrete.tsv")
     ds = wrapper.Dataset(name, "real location", error=0.01)
     ds.read_datafile()
     assert "10 rows and 3 columns" in caplog.text
     with pytest.raises(wrapper.CastFloat64Error,
                        match="could not convert string to float"):
         ds.check_data_type()
Exemplo n.º 5
0
 def test_check_data_type_real_location_OK(self, caplog):
     name = os.path.join(here, dir_data, "sample-real-location.tsv")
     ds = wrapper.Dataset(name, "real location", error=0.01)
     ds.read_datafile()
     ds.check_data_type()
     assert "10 rows and 4 columns" in caplog.text
     assert "Column 'colA'" in caplog.text
     assert "Column 'colB'" in caplog.text
     assert "Column 'colC'" in caplog.text
Exemplo n.º 6
0
 def test_init(self):
     ds = wrapper.Dataset("", "merged")
     assert ds.input_file == ""
     assert ds.data_type == "merged"
     assert ds.error == None
     assert ds.df == None
     assert ds.column_meta == {}
     assert ds.separator_char == "\t"
     assert ds.missing_char == ""
Exemplo n.º 7
0
 def test_read_datafile(self, caplog):
     name = os.path.join(here, dir_data, "sample-real-location.tsv")
     ds = wrapper.Dataset(name, "real location", error=0.01)
     ds.read_datafile()
     assert "10 rows and 4 columns" in caplog.text
Exemplo n.º 8
0
 def test_check_duplicate_col_names(self):
     ds = wrapper.Dataset("", "merged")
     ds.input_file = os.path.join(here, dir_data, "input-dup-col.tsv")
     with pytest.raises(wrapper.DuplicateColumnNameError,
                        match="Found duplicate column names"):
         ds.check_duplicate_col_names()