示例#1
0
def determine_errors(df_val, tree, ml_task):
    predictions = decision_tree_predictions(df_val, tree)
    actual_values = df_val.label
    
    if ml_task == "regression":
        # mean squared error
        return ((predictions - actual_values) **2).mean()
    else:
        # number of errors
        return sum(predictions != actual_values)
示例#2
0
def random_forest_predictions(test_df, forest):
    df_predictions = {}
    for i in range(len(forest)):
        column_name = "tree_{}".format(i)
        predictions = decision_tree_predictions(test_df, tree=forest[i])
        df_predictions[column_name] = predictions

    df_predictions = pd.DataFrame(df_predictions)
    random_forest_predictions = df_predictions.mode(axis=1)[0]

    return random_forest_predictions
示例#3
0
def random_forest_predictions(test_df, forest):
    df_predictions = {}
    for i in range(len(forest)):
        column_name = "tree_{}".format(i)
        predictions = decision_tree_predictions(test_df, tree=forest[i])
        # keys and values
        df_predictions[column_name] = predictions

    # transform the dictionary into dataframe.
    # rows:index of datapts; columns: prediction of each trees
    df_predictions = pd.DataFrame(df_predictions)
    # vote for most. Type of predicion is pandas.core.series.Series
    random_forest_predictions = df_predictions.mode(axis=1)[0]

    return random_forest_predictions
示例#4
0
def random_forest_predictions(test_df, forest, ml_task):
    df_predictions = {}
    for i in range(len(forest)):
        column_name = "tree_{}".format(i)
        predictions = decision_tree_predictions(test_df, tree=forest[i])
        df_predictions[column_name] = predictions

    df_predictions = pd.DataFrame(df_predictions)
    if ml_task == "regression":
      random_forest_predictions = df_predictions.mean(axis=1)
    else:
      random_forest_predictions = df_predictions.mode(axis=1)[0]
    
    return random_forest_predictions