def get_all_eval_measures(res, endog, include_prc=False):
    predict = res.predict()
    measures = {}
    pred_table = ems.cm(predict, endog)
    measures["precision"] = ems.precision(pred_table)
    measures["recall"] = ems.recall(pred_table)
    measures["accuracy"] = ems.accuracy(pred_table)
    measures["f_score"] = ems.fscore_measure(pred_table)
    measures["rmse"] = ems.rmse(predict, endog)
    measures["mae"] = ems.mae(predict, endog)
    measures["auc"] = ems.auc(predict, endog)
    measures["llf"] = res.llf
    measures["aic"] = res.aic
    measures["bic"] = res.bic
    measures["prsquared"] = res.prsquared
    measures["df_model"] = res.df_model
    tn, fp, fn, tp = map(float,
                         pred_table.flatten())  # WRT to 1 as positive label
    measures["tn"] = tn
    measures["fn"] = fn
    measures["fp"] = fp
    measures["tp"] = tp
    print "In eval measures function."
    if include_prc:
        ## Include the precision recall values
        prc = ems.prc(predict, endog, float_precision=3)
        measures["prc"] = prc
    return measures
示例#2
0
def get_all_eval_measures(predict, endog, include_prc=False):
    measures = {}
    pred_table = ems.cm(predict, endog)
    measures["precision"] = ems.precision(pred_table)
    measures["recall"] = ems.recall(pred_table)
    measures["accuracy"] = ems.accuracy(pred_table)
    measures["f_score"] = ems.fscore_measure(pred_table)
    measures["rmse"] = ems.rmse(predict, endog)
    measures["mae"] = ems.mae(predict, endog)
    measures["auc"] = ems.auc(predict, endog)
    tn, fp, fn, tp = map(float,
                         pred_table.flatten())  # WRT to 1 as positive label
    measures["tn"] = tn
    measures["fn"] = fn
    measures["fp"] = fp
    measures["tp"] = tp
    measures["tpr"] = tp * 1. / (tp + fn)
    measures["fpr"] = fp * 1. / (fp + tn)
    print "In eval measures function."
    if include_prc:
        print "Generating PRC AND ROC"
        ## Include the precision recall values
        prc = ems.prc(predict, endog, float_precision=3)
        measures["prc"] = prc
        roc = ems.roc(predict, endog, float_precision=3)
        measures["roc"] = roc
    return measures
示例#3
0
def fit_model(df,
              formula,
              title="Full",
              fp=None,
              filename="Model",
              save=False):
    """
  Function to fit model, collect stats and save predictions and model.
  df: dataframe
  formula: formula
  title: title of model (Default: "Full")
  fp: File pointer (Default: None)
  filename: Model and data file prefix ("Model")
  save: Weather to save predictions, model or both or none ["Both", "Data", "Model", False] (Default: False)
  """
    if df.shape[0] < 10:
        print "Too less instances. Skipping. Make sure you have atleast 10 instances."
        return None, None
    print "Modelling Model[%s] with instances %s" % (title, df.shape[0])
    print "Using formula:\n %s" % (formula)
    print "Generating patsy matrices"
    y, X = patsy.dmatrices(formula, df, return_type="dataframe")
    print "Initializing model"
    model = Logit(y, X)
    print "Fitting model"
    res = model.fit()
    print title, "\n", res.summary2()
    print "Confusion Matrix:", res.pred_table()
    precision = ems.precision(res.pred_table())
    recall = ems.recall(res.pred_table())
    accuracy = ems.accuracy(res.pred_table())
    f_score = ems.fscore_measure(res.pred_table())
    rmse = ems.rmse(res.predict(), model.endog)
    mae = ems.mae(res.predict(), model.endog)
    auc = ems.auc(res.predict(), model.endog)
    prc = ems.prc(res.predict(), model.endog)
    prc_filename = "%s.pdf" % filename
    plot_prc(prc, prc_filename)
    evaluation_metrics = "[Model Measures]: Confusion Matrix: %s\nRMSE: %s\tMAE: %s\tAUC: %s\nPrecision: %s\tRecall: %s\tAccuracy: %s\tF1-Score: %s\nPRC:\n%s" % (
        res.pred_table(), rmse, mae, auc, precision, recall, accuracy, f_score,
        prc_filename)
    print evaluation_metrics
    print "[save=%s]" % save, "" if save else "Not", "Saving Model to %s" % filename
    if fp is not None:
        print >> fp, "Modelling Model[%s] with instances %s" % (title,
                                                                df.shape[0])
        print >> fp, "Using formula:\n %s" % (formula)
        print >> fp, title, "\n", res.summary2()
        print >> fp, evaluation_metrics
        print >> fp, "[save=%s]" % save, "" if save else "Not", "Saving Model to %s" % filename
    model_save, data_save = False, False
    if save == "Both":
        model_save, data_save = True, True
    if save == "Model" or model_save:
        model_file = "%s.pkl" % filename
        res.save(model_file, remove_data=True)  # Save model
    if save == "Data" or data_save:
        data_file = "%s.data.txt" % filename  # Include predictions
        print "df.index", df.index
        save_data(df[["from_id", "is_self_cite"]],
                  res.predict(),
                  filename=data_file)
    print "Done Saving"
    return model, res