Exemplo n.º 1
0
def get_classes(df):
    classes = []
    feat_list = parse.get_features_list(df)
    for feature in df:
        # Skip the Index column and the features that do not contain exclusively numeric values
        if feature not in feat_list:
            continue
        classes.append(feature)
    return classes
Exemplo n.º 2
0
def my_scatter_plot(df):
    fig = go.Figure()
    feat_list = parse.get_features_list(df)
    for feature in df:
        # Skip the Index column and the features that do not contain exclusively numeric values
        if feature not in feat_list:
            continue
        fig.add_trace(
            go.Scatter(y=df[feature],
                       name=feature,
                       opacity=0.8,
                       mode='markers'))

    fig.update_layout(title="Normalized grades for each class",
                      xaxis_title="Index",
                      yaxis_title="Normalized grades")
    fig.show()
Exemplo n.º 3
0
def plot_hist(df):
    i = 0
    j = 0
    fig, axs = plt.subplots(4, 4)
    feat_list = parse.get_features_list(df)
    for feature in df:
        if feature not in feat_list:
            continue
        for house in cst.houses:
            axs[i, j].hist(get_feature_per_house(df, house, feature),
                           bins=25,
                           alpha=0.5,
                           color=cst.houses_colors[house])
            axs[i, j].set_title(feature)
        j += 1
        if j == 4:
            i += 1
            j = 0
    plt.tight_layout()
    plt.show()
Exemplo n.º 4
0
def my_histogram(df):
    std = []
    feat_name = []
    n_feat = 0
    feat_list = parse.get_features_list(df)
    for feature in df:
        if feature not in feat_list:
            continue
        count = describe.get_count(df[feature])
        std.append(
            describe.get_std(count, describe.get_mean(count, df[feature]),
                             df[feature]))
        feat_name.append(feature)
        n_feat += 1

    plt.bar(range(n_feat), std, color=cst.colors)
    plt.xticks(range(n_feat), feat_name, rotation=-45, fontsize=6, ha="left")
    plt.title(
        "Standard deviation between the student's grades for each feature \n(less std means that the student's grades are homogeneous)"
    )
    plt.show()
Exemplo n.º 5
0
def test_describe(df, describe, print_describe=True):
    # Values calculated with system/numpy functions go to control_values dict
    # It will allow us to check if our functions return correct values
    feat_list = parse.get_features_list(df)
    control_values = df.describe().loc[:, feat_list[0]:]
    errors = 0
    
    for feature in df:
        # Skip the features that do not contain exclusively numeric values
        if feature not in feat_list:
            continue

        # For each significative difference between results from our functions and results from system/numpy functions, we output a warning
        for value in control_values[feature].keys():
            if not np.isclose(describe[feature][value], control_values[feature][value]):
                errors += 1
                warn_diff(feature, value, describe[feature][value], control_values[feature][value])

    if print_describe is True:
        print(tabulate(describe, headers="keys", tablefmt="fancy_grid", floatfmt=".6f"))
        print(tabulate(control_values, headers="keys", tablefmt="fancy_grid", floatfmt=".6f"))

    return errors