def get_all_eval_measures(res, endog, include_prc=False): predict = res.predict() measures = {} pred_table = ems.cm(predict, endog) measures["precision"] = ems.precision(pred_table) measures["recall"] = ems.recall(pred_table) measures["accuracy"] = ems.accuracy(pred_table) measures["f_score"] = ems.fscore_measure(pred_table) measures["rmse"] = ems.rmse(predict, endog) measures["mae"] = ems.mae(predict, endog) measures["auc"] = ems.auc(predict, endog) measures["llf"] = res.llf measures["aic"] = res.aic measures["bic"] = res.bic measures["prsquared"] = res.prsquared measures["df_model"] = res.df_model tn, fp, fn, tp = map(float, pred_table.flatten()) # WRT to 1 as positive label measures["tn"] = tn measures["fn"] = fn measures["fp"] = fp measures["tp"] = tp print "In eval measures function." if include_prc: ## Include the precision recall values prc = ems.prc(predict, endog, float_precision=3) measures["prc"] = prc return measures
def get_all_eval_measures(predict, endog, include_prc=False): measures = {} pred_table = ems.cm(predict, endog) measures["precision"] = ems.precision(pred_table) measures["recall"] = ems.recall(pred_table) measures["accuracy"] = ems.accuracy(pred_table) measures["f_score"] = ems.fscore_measure(pred_table) measures["rmse"] = ems.rmse(predict, endog) measures["mae"] = ems.mae(predict, endog) measures["auc"] = ems.auc(predict, endog) tn, fp, fn, tp = map(float, pred_table.flatten()) # WRT to 1 as positive label measures["tn"] = tn measures["fn"] = fn measures["fp"] = fp measures["tp"] = tp measures["tpr"] = tp * 1. / (tp + fn) measures["fpr"] = fp * 1. / (fp + tn) print "In eval measures function." if include_prc: print "Generating PRC AND ROC" ## Include the precision recall values prc = ems.prc(predict, endog, float_precision=3) measures["prc"] = prc roc = ems.roc(predict, endog, float_precision=3) measures["roc"] = roc return measures
def fit_model(df, formula, title="Full", fp=None, filename="Model", save=False): """ Function to fit model, collect stats and save predictions and model. df: dataframe formula: formula title: title of model (Default: "Full") fp: File pointer (Default: None) filename: Model and data file prefix ("Model") save: Weather to save predictions, model or both or none ["Both", "Data", "Model", False] (Default: False) """ if df.shape[0] < 10: print "Too less instances. Skipping. Make sure you have atleast 10 instances." return None, None print "Modelling Model[%s] with instances %s" % (title, df.shape[0]) print "Using formula:\n %s" % (formula) print "Generating patsy matrices" y, X = patsy.dmatrices(formula, df, return_type="dataframe") print "Initializing model" model = Logit(y, X) print "Fitting model" res = model.fit() print title, "\n", res.summary2() print "Confusion Matrix:", res.pred_table() precision = ems.precision(res.pred_table()) recall = ems.recall(res.pred_table()) accuracy = ems.accuracy(res.pred_table()) f_score = ems.fscore_measure(res.pred_table()) rmse = ems.rmse(res.predict(), model.endog) mae = ems.mae(res.predict(), model.endog) auc = ems.auc(res.predict(), model.endog) prc = ems.prc(res.predict(), model.endog) prc_filename = "%s.pdf" % filename plot_prc(prc, prc_filename) evaluation_metrics = "[Model Measures]: Confusion Matrix: %s\nRMSE: %s\tMAE: %s\tAUC: %s\nPrecision: %s\tRecall: %s\tAccuracy: %s\tF1-Score: %s\nPRC:\n%s" % ( res.pred_table(), rmse, mae, auc, precision, recall, accuracy, f_score, prc_filename) print evaluation_metrics print "[save=%s]" % save, "" if save else "Not", "Saving Model to %s" % filename if fp is not None: print >> fp, "Modelling Model[%s] with instances %s" % (title, df.shape[0]) print >> fp, "Using formula:\n %s" % (formula) print >> fp, title, "\n", res.summary2() print >> fp, evaluation_metrics print >> fp, "[save=%s]" % save, "" if save else "Not", "Saving Model to %s" % filename model_save, data_save = False, False if save == "Both": model_save, data_save = True, True if save == "Model" or model_save: model_file = "%s.pkl" % filename res.save(model_file, remove_data=True) # Save model if save == "Data" or data_save: data_file = "%s.data.txt" % filename # Include predictions print "df.index", df.index save_data(df[["from_id", "is_self_cite"]], res.predict(), filename=data_file) print "Done Saving" return model, res