def build_and_evaluate_user_model(X, y): with open("categories.pickle", 'rb') as f: category_model = pickle.load(f) kf = KFold(n_splits=5) aucs = [] for train, test in kf.split(X): X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test] features = category_model.predict_proba(X_train) user_model = LogisticRegression() user_model.fit(features, y_train) y_pred = user_model.predict_proba(category_model.predict_proba(X_test))[:,1] fpr, tpr, _ = metrics.roc_curve(y_test, y_pred) aucs.append(metrics.auc(fpr,tpr)) user_model.auc = sum(aucs)/len(aucs) features = category_model.predict_proba(X) user_model.fit(features, y) print(metrics.classification_report(y, user_model.predict(features))) with open(BASEDIR + "user_models/" + username + ".pickle", 'wb') as f: pickle.dump(user_model, f) print "Model written out to " + BASEDIR + "user_models/" + username + ".pickle" return user_model