示例#1
0
    def comapare_dataframes(
            cls,
            df1: DataFrame,
            df2: DataFrame,
            excluded_keys: Union[List, str, None] = []) -> bool:
        """
        Compares 2 DataFrames for exact match\
        internally it use pandas.testing.assert_frame_equal


        :param df1: processed data
        :type df1: DataFrame
        :param df2: gold standard expected data
        :type df2: DataFrame
        :return: True
        :param excluded_keys: columns to be excluded from comparision, optional
        :type excluded_keys: Union[List, str, None]
        :rtype: Boolean
        :raises: AssertionError Dataframe mismatch
        """
        excluded_keys = excluded_keys if type(excluded_keys) == list else [
            excluded_keys
        ]
        df1 = df1.drop(*excluded_keys)
        df2 = df2.drop(*excluded_keys)
        sort_columns = [cols[0] for cols in df1.dtypes]
        df1_sorted = df1.toPandas().sort_values(by=sort_columns,
                                                ignore_index=True)
        df2_sorted = df2.toPandas().sort_values(by=sort_columns,
                                                ignore_index=True)
        assert_frame_equal(df1_sorted, df2_sorted)
        return True
def create_input_widgets(years: DataFrame, countries: DataFrame, ratings: DataFrame, dbutils: DBUtils):
    min_year = years.toPandas().values[0][0]
    max_year = years.toPandas().values[0][1]
    country_list = list(map(lambda x: x[0], countries.toPandas().values.tolist()))
    rating_list = list(map(lambda x: x[0], ratings.toPandas().values.tolist()))
    # country_list.append("All")
    rating_list.remove(None)
    rating_list.sort()

    dbutils.widgets.dropdown("year", str(min_year), list(map(str, range(min_year, max_year + 1))), "Select year")
    dbutils.widgets.dropdown("country", country_list[0], country_list, "Select country")
    dbutils.widgets.dropdown("rating", "C", rating_list, "Select rating")
def plot_defaults_per_country(df: DataFrame):
    year = dbutils.widgets.get("year")  # noqa: F821
    rating = dbutils.widgets.get("rating")  # noqa: F821

    if len(df.head(1)) == 0:
        return

    ax = sns.barplot(x="Country", y="Defaults", data=df.toPandas())
    ax.set_title(f"Defaults per Country of {rating} rating during {year}")
    return display(ax)  # noqa: F821
def plot_defaults_per_month(df: DataFrame):
    year = dbutils.widgets.get("year")  # noqa: F821
    country = dbutils.widgets.get("country")  # noqa: F821
    rating = dbutils.widgets.get("rating")  # noqa: F821

    if len(df.head(1)) == 0:
        return

    ax = sns.barplot(x="Month", y="Defaults", data=df.toPandas())
    ax.set_title(f"Defaults per Month in {year} in {country} of {rating} rating")
    return display(ax)  # noqa: F821