def test_converts_nan_to_empty_strings(self): data = {"A": ["A", "B ", " C", np.nan]} df = pd.DataFrame(data) data2 = {"A": ["A", "B", "C", ""]} expected = pd.DataFrame(data2) remove_whitespace(df) assert_frame_equal(df, expected)
def test_handles_empty_strings(self): data = {"A": ["A", "B ", " C", " "]} df = pd.DataFrame(data) data2 = {"A": ["A", "B", "C", ""]} expected = pd.DataFrame(data2) remove_whitespace(df) assert_frame_equal(df, expected)
def test_remove_leading_and_trailing_spaces_from_dataframe(self): data = { "A": ["A", "B ", " C", "D ", " Ed ", " 1 "], "B": ["Aa", "Bb ", " Cc", "Dd ", " Ed Ed ", " 11 "], } df = pd.DataFrame(data) data2 = { "A": ["A", "B", "C", "D", "Ed", "1"], "B": ["Aa", "Bb", "Cc", "Dd", "Ed Ed", "11"], } expected = pd.DataFrame(data2) remove_whitespace(df) assert_frame_equal(df, expected)
def test_ignores_numeric_columns(self): data = { "A": ["A", "B ", " C"], "B": [1, 2, 3], "C": [1.1, 2.2, 3.3], } df = pd.DataFrame(data) data2 = { "A": ["A", "B", "C"], "B": [1, 2, 3], "C": [1.1, 2.2, 3.3], } expected = pd.DataFrame(data2) remove_whitespace(df) assert_frame_equal(df, expected)
def create_taxa_list_df(df): fields = taxa_rank_fields + taxa_fields + pdbd_fields print("fields:", fields) filtered_taxa = pd.DataFrame(df, columns=fields) remove_whitespace(filtered_taxa) print("initial df: ", filtered_taxa.shape) add_normalized_name_column(filtered_taxa) filtered_taxa = filtered_taxa.drop( filtered_taxa[filtered_taxa["normalized_name"] == ""].index) print("remove nontaxa df: ", filtered_taxa.shape) filtered_taxa.drop_duplicates(keep="first", inplace=True, subset=["normalized_name", "taxon_group"]) print("drop duplicates df: ", filtered_taxa.shape) return filtered_taxa