def drop_rows_with_any_null_values(data: dd = None) -> dd:
    """
    drop and rows containing null values from the input dataframe
    :param data: dask dataframe
    :return: modified dask dataframe
    """
    return data.dropna()
Пример #2
0
    def impute_nulls(self, data: dataframe):
        # Impute by mean
        data[self.cols["CONTINUOUS"]] = data[self.cols["CONTINUOUS"]].fillna(
            data[self.cols["CONTINUOUS"]].mean(
                axis=0, skipna=True).compute(num_workers=self.workers),
            axis=0)

        # Impute by mode
        cat_cols: list = self.cols["CATEGORICAL"]["STRING"] +\
            self.cols["CATEGORICAL"]["NUMERIC"]
        col_modes = data[cat_cols].mode(dropna=True).compute(
            num_workers=self.workers)
        for col in cat_cols:
            data[col] = data[col].fillna(col_modes[col].iloc[0], axis=0)

        data = data.dropna(how="any")
        return data
def remove_papers_with_null_cols(dask_df: dd, cols: List[str]) -> None:
    return dask_df.dropna(subset=cols, how="all")