def featimp_by_steps(model, steps, sort_step, top, cols):
    """
    Return pd.DataFrame of top feature importances by selected steps.
    
    Args:
        model: Model to get importances for.
        steps: List of ordered steps to include in table.
        sort_step: Step to sort table by.
        top: Top number of features to include.
        cols: Feature list.
    """
    for step in steps:
        fi_dict = model.extras.feature_importances["test"][step]
        step_df = pd.DataFrame(fi.reorder_fi_dict(fi_dict))
        step_df = step_df.rename(columns={"importance": f"s={step}"})
        step_df.set_index("feature", inplace=True)
        df = df.join(step_df) if step > steps[0] else step_df.copy()

    df = df.sort_values(by=[f"s={sort_step}"], ascending=False)
    df = df[0:top + 1]

    return df
Exemplo n.º 2
0
def featimp_by_steps(model, period, steps, sort_step, top, cols):
    """
    Return pd.DataFrame of top feature importances by all selected steps,
    sorted by selected step, from the test partition.

    Args:
        model: Model to extract feature importances from.
        period (str): Period to extract feature importances from.
        steps (list): List of ordered steps to include in table.
        sort_step (int): Step to sort table by.
        top (int): Top number of features to include.
        cols (list): Feature list.
    """
    for step in steps:
        fi_dict = model.extras.feature_importances[period][step]
        step_df = pd.DataFrame(fi.reorder_fi_dict(fi_dict))
        step_df = step_df.rename(columns={"importance": f"s={step}"})
        step_df.set_index("feature", inplace=True)
        df = df.join(step_df) if step > steps[0] else step_df.copy()

    df = df.sort_values(by=[f"s={sort_step}"], ascending=False)
    df = df[0:top + 1]
    return df
Exemplo n.º 3
0
for model in models:
    fi_cm = featimp_by_steps(model=model,
                             steps=steps,
                             sort_step=sort_step,
                             top=top,
                             cols=model.cols_features)
    fi.write_fi_tex(
        pd.DataFrame(fi_cm),
        os.path.join(out_paths["features"],
                     f"impurity_imp_{model.name}_{level}.tex"))

sort_step = 3
top = 30

for model in models:
    for step in steps:
        pi_dict = model.extras.permutation_importances["test"][step]["test"]
        step_df = pd.DataFrame(fi.reorder_fi_dict(pi_dict))
        step_df = step_df.rename(columns={"importance": f"s={step}"})
        step_df.set_index("feature", inplace=True)
        pi_df = pi_df.join(step_df) if step > steps[0] else step_df.copy()

    pi_df = pi_df.sort_values(by=[f"s={sort_step}"], ascending=False)
    pi_df = pi_df[0:top + 1]

    fi.write_fi_tex(
        pi_df,
        os.path.join(out_paths["features"],
                     f"permutation_imp_{model.name}.tex"))