def tune(train_fn, param_vals, train_feats, train_labels, val_feats, val_labels): train_accs = np.ndarray(len(param_vals)) val_accs = np.ndarray(len(param_vals)) for i, val in enumerate(param_vals): theta, theta_0 = train_fn(train_feats, train_labels, val) train_preds = p1.classify(train_feats, theta, theta_0) train_accs[i] = p1.accuracy(train_preds, train_labels) val_preds = p1.classify(val_feats, theta, theta_0) val_accs[i] = p1.accuracy(val_preds, val_labels) return train_accs, val_accs
def mainClassification(): folds = 10 n_classifiers = classifiers.len_classify() cm, acc = [], [] result_cm, result_acc = [0] * folds, [0] * folds for i in range(folds): result_cm[i], result_acc[i] = [0] * n_classifiers, [0] * n_classifiers data = preprocessing.readData() samples, targets = preprocessing.splitSamples(data) kfold = preprocessing.crossValidation(folds) for train, test in kfold.split(samples, targets): X_train, X_test = samples[train], samples[test] y_train, y_test = targets[train], targets[test] for i in range(n_classifiers): y_pred = classifiers.classify(X_train, y_train, X_test, i) fold_cm, fold_acc = classifiers.mensureAcc(y_pred, y_test) cm.append(fold_cm) acc.append(fold_acc) for j in range(n_classifiers): for k in range(folds): result_cm[k][j] = cm[k * n_classifiers + j] result_acc[k][j] = acc[k * n_classifiers + j] utils.writeCSV("all_classifiers_confusion-matrix.csv", result_cm) utils.writeCSV("all_classifiers_accuracy.csv", result_acc) print("Finish!!\n")
def subset_run(data, n_seed=5, splits=10, methods=['sfs'], estimators=['rdforest']): X = np.array(data.drop('HPYLORI',axis=1)) y = np.array(data.HPYLORI) if not os.path.exists('data'): os.makedirs('data') if not os.path.exists('results'): os.makedirs('results') outer_names = create_empty_dic(estimators, methods) for seed in range(n_seed): skf = StratifiedKFold(n_splits=splits, random_state=seed, shuffle=True) split_num = 0 names = create_empty_dic(estimators, methods) for train, test in skf.split(X,y): split_num += 1 X_train, X_test = X[train], X[test] y_train, y_test = y[train], y[test] X_train, X_test = dp.impute(X_train), dp.impute(X_test) for estimator in estimators: for s in methods: result = e.classify(estimator, X_train, X_test, y_train, y_test) filename = data_path+estimator+'_'+s+'_'+str(split_num)+'_'+str(seed) names[estimator][s].append(filename) result.to_csv(filename+'.csv') create_interim_csv(names, outer_names, seed, splits) delete_interim_csv(names) file_list = create_final_csv(outer_names, n_seed) return file_list
def execute_subset_run(index, run, estimators, methods): X_train, X_test = run[0], run[1] y_train, y_test = run[2], run[3] for estimator in estimators: for method in methods: result = e.classify(estimator, X_train, X_test, y_train, y_test) filename = data_path + estimator + method + '_' + str(index) result.to_csv(filename + '.csv')
def execute_a_run(index, run, features, estimators, methods): for i in range(len(methods)): method = methods[i] selection = features[i] X_train, X_test = run[0], run[1] y_train, y_test = run[2], run[3] X_train, X_test = X_train[:, selection], X_test[:, selection] for estimator in estimators: result = e.classify(estimator, X_train, X_test, y_train, y_test) filename = data_path + estimator + method + '_' + str(index) result.to_csv(filename + '.csv')
def normal_run(data, n_seed=2, splits=5, methods=['infogain_10'], estimators=['rdforest']): X = np.array(data.drop('HPYLORI', axis=1)) y = np.array(data.HPYLORI) if not os.path.exists('data'): os.makedirs('data') if not os.path.exists('results'): os.makedirs('results') features = np.zeros(X.shape[1]) outer_names = create_empty_dic(estimators, methods) for seed in range(n_seed): skf = StratifiedKFold(n_splits=splits, random_state=seed, shuffle=True) split_num = 0 names = create_empty_dic(estimators, methods) for train, test in skf.split(X, y): split_num += 1 X_train, X_test = X[train], X[test] y_train, y_test = y[train], y[test] X_train, X_test = dp.impute(X_train), dp.impute(X_test) for s in methods: selector = s.split('_')[0] n_features = int(s.split('_')[1]) selection = fselect.run_feature_selection( selector, X_train, y_train, n_features) for i in selection: features[i] += 1 X_train, X_test = X_train[:, selection], X_test[:, selection] for estimator in estimators: result = e.classify(estimator, X_train, X_test, y_train, y_test) filename = data_path + estimator + s + '_' + str( split_num) + '_' + str(seed) names[estimator][s].append(filename) result.to_csv(filename + '.csv') create_interim_csv(names, outer_names, seed, splits) delete_interim_csv(names) print(outer_names) file_list = create_final_csv(outer_names, n_seed) return file_list
def runClassify(preProcessingMethod, forceBalance, proportional, nseed, explanation, gridSearch, generatePickle, hasPlotLibs, paralled, nJobs, listOfClassifiers, outfileName, nCV, measureProbas): positiveOutputFile = "positive-%s.pk" % (explanation) validationPosOutputFile = "positive-validation.pk" negativeOutputFile = "negative-%s.pk" % (explanation) validationNegOutputFile = "negative-validation.pk" testOutputFile = "test-%s.pk" % (explanation) logging.info("Using seed: %d", nseed) logging.info("Loading: %s and %s", positiveOutputFile, negativeOutputFile) logging.info("Processing method used: %s", preProcessingMethod) if forceBalance > 0: logging.warning("Forcing only %s examples for each dataset",forceBalance) if proportional > 0: logging.warning("Using proportional representation. %s percente of the base.",proportional) if forceBalance > 0 and proportional > 0: logging.error("ERROR! YOU SHOULD CHOOSE OR FORCEBALANCE OR PROPORTIONAL DATA!") print "ERROR! YOU SHOULD CHOOSE OR FORCEBALANCE OR PROPORTIONAL DATA!" exit(0) #### ### Load Datasets ## # logging.info("Loading the datasets...") with open(negativeOutputFile, 'rb') as input: negativeFV = pickle.load(input) with open(validationNegOutputFile, 'rb') as input: validationNegFV = pickle.load(input) with open(positiveOutputFile, 'rb') as input: positiveFV = pickle.load(input) with open(validationPosOutputFile, 'rb') as input: validationPosFV = pickle.load(input) with open(testOutputFile, 'rb') as input: testFV = pickle.load(input) logging.info("Loaded") testFV = sorted(testFV.iteritems(), key=lambda k: int(k[0])) logging.info("Transforming datasets into Dictionaries...") ld1, ll1 = transformeInDict(sorted(negativeFV.iteritems()), nseed, forceBalance, proportional) ld2, ll2 = transformeInDict(sorted(positiveFV.iteritems()), nseed, forceBalance, proportional) ldTest, llTest = transformeInDict(testFV, nseed, forceBalance, proportional) valldNeg, valllNeg = transformeInDict(sorted(validationNegFV.iteritems()), nseed, forceBalance, proportional) valldPos, valllPos = transformeInDict(sorted(validationPosFV.iteritems()), nseed, forceBalance, proportional) valY = np.array( valllNeg + valllPos) valDicts = valldNeg + valldPos logging.info("Transformed") listOfDicts = ld1 + ld2 listOfLabels = ll1 + ll2 y = np.array( listOfLabels ) greatestClass = 0 if len(ll1) > len(ll2) else 1 y_greatest = np.array((len(ll1) + len(ll2)) * [greatestClass] ) logging.info("Using %d positive examples -- class %s" % (len(ll1), ll1[0])) logging.info("Using %d negative examples -- class %s" % (len(ll2), ll2[0])) baselines = calculateBaselines(y, y_greatest) logging.info("Vectorizing dictionaries...") vec, X_noProcess = vectorizeData(listOfDicts) if X_noProcess != []: logging.info("Feature Names: %s", vec.get_feature_names()) logging.info("Vectorized") logging.info("Preprocessing data") X = preprocessing(X_noProcess, preProcessingMethod) #print "X_noProcess ----> ", X_noProcess #print "X ---> ", X logging.info("Data preprocessed") #Prepare Test data: Xtest = vec.transform(ldTest).toarray() Xtest = preprocessing(Xtest, preProcessingMethod) valX = vec.transform(valDicts).toarray() valX = preprocessing(valX, preProcessingMethod) #### ### Shuffer samples (TODO: Cross-validation) ## # logging.info("Shuffling the data...") n_samples = len(y) newIndices = shuffleIndices(n_samples, nseed) X = X[newIndices] y = y[newIndices] n_samples_val = len(valY) newIndices = shuffleIndices(n_samples_val, nseed) valX = valX[newIndices] valY = valY[newIndices] logging.debug("X - %s", X) # Shuffle samples logging.info("Shuffled") #### ### Run classifiers ## # precRecall, roc = {}, {} results = [] logging.info("Running classifiers...") if "dmfc" in listOfClassifiers: dmfc = DummyClassifier(strategy='most_frequent') results.append(classify(dmfc, "DummyMostFrequent", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, Xtest)) # ================================================================ if "nbc" in listOfClassifiers or "nb" in listOfClassifiers: nbc = GaussianNB() results.append(classify(nbc, "Naive Bayes", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, Xtest)) # ================================================================ if "knnc" in listOfClassifiers or "knn" in listOfClassifiers: knnc = KNeighborsClassifier(n_neighbors=classifyParameters["KNN-K"]) results.append(classify(knnc, "KNN", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridKNN, "measureProbas":measureProbas}, Xtest)) # ================================================================ if "lrc" in listOfClassifiers or "lgr" in listOfClassifiers or "lr" in listOfClassifiers: lrc = LogisticRegression(C=classifyParameters["LR-C"]) results.append(classify(lrc, "Logistic Regression", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridLR, "measureProbas":measureProbas}, Xtest, valX, valY)) # ================================================================ if "dtc" in listOfClassifiers: dtc = DecisionTreeClassifier( criterion=classifyParameters["DT-criterion"], max_features=classifyParameters["DT-max_features"] ) results.append(classify(dtc, "Decision Tree", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridDT, "measureProbas":measureProbas}, Xtest)) # ================================================================ if "svmc" in listOfClassifiers or "svm" in listOfClassifiers: #if SVMKernel == "linear": # svmc = LinearSVC(C=classifyParameters["SVM-C"], class_weight=classifyParameters["SVM-class_weight"]) #else: # svmc = SVC(kernel=classifyParameters["SVM-kernel"], cache_size=classifyParameters["SVM-cacheSize"], C=classifyParameters["SVM-C"], max_iter=classifyParameters["SVM-maxIter"], probability=measureProbas, gamma=classifyParameters["SVM-gamma"], class_weight=classifyParameters["SVM-class_weight"]) #results.append(classify(svmc, "SVM", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridSVM, "measureProbas":measureProbas}, Xtest)) pass # ================================================================ if "etc" in listOfClassifiers: etc = ExtraTreesClassifier(random_state=0, n_jobs=nJobs, n_estimators=classifyParameters["ETC-n_estimators"], criterion=classifyParameters["ETC-criterion"], max_features=classifyParameters["ETC-max_features"]) results.append(classify(etc, "Random Forest", X, y, nCV, nJobs, baselines, {"tryToMeasureFeatureImportance":measureProbas, "featuresOutFilename":(outfileName + ".pk"), "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridETC, "measureProbas":measureProbas}, Xtest, valX, valY)) # ================================================================ if "sgd" in listOfClassifiers: sgd = SGDClassifier(n_jobs=nJobs) results.append(classify(sgd, "SGD", X, y, nCV, nJobs, baselines, {"featuresOutFilename":(outfileName + ".pk"), "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridSGD, "measureProbas":measureProbas}, Xtest, valX, valY)) # ================================================================ if "gbc" in listOfClassifiers: gbc = GradientBoostingClassifier(n_estimators=300,subsample=0.6,max_depth=4,random_state=nseed) results.append(classify(gbc, "GBC", X, y, nCV, nJobs, baselines, {"featuresOutFilename":(outfileName + ".pk"), "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridSGD, "measureProbas":measureProbas}, Xtest, valX, valY)) # ================================================================ precRecall, roc = getCurves(results) roc["Random Classifier"] = ([0,1],[0,1]) plotGraph(precRecall, fileName=PRECRECALLNAME, xlabel="Recall", ylabel="Precision", generatePickle=generatePickle, hasPlotLibs=hasPlotLibs) plotGraph(roc, fileName=ROCNAME, xlabel="False Positive Rate", ylabel="True Positive Rate", generatePickle=generatePickle, hasPlotLibs=hasPlotLibs) fo = open(outfileName, "a") listProbas = [] for r in results: clfName = r[0] resultMetrics = r[1] fo.write("%s, %.3f, %.3f, %.3f, %.3f\n" % (clfName, 100.0*resultMetrics.acc, 100.0*resultMetrics.sf1, 100.0*resultMetrics.mf1, 100.0*resultMetrics.wf1)) print "%s, %.3f, %.3f, %.3f, %.3f" % (clfName, 100.0*resultMetrics.acc, 100.0*resultMetrics.sf1, 100.0*resultMetrics.mf1, 100.0*resultMetrics.wf1) yTraining = r[4] yTrainingProbas = r[5] yTest = r[6] yTestProbas = r[7] writeOutput(clfName + ".csv", yTest) listProbas.append(yTestProbas) #for t,p in zip(yTest, yTestProbas): # print t, p mergedYTest = voting(listProbas) writeOutput("merged.csv", mergedYTest) fo.close() logging.info("Done")
def runClassify(preProcessingMethod, forceBalance, proportional, minNumberOfQueries, nseed, explanation, healthUsers, gridSearch, generatePickle, hasPlotLibs, paralled, nJobs, listOfClassifiers, groupsToUse, usingIncremental, outfileName, nCV, measureProbas, incrementalVector): if healthUsers: positiveOutputFile = "healthUser-%d-%s.pk" % (minNumberOfQueries, explanation) negativeOutputFile = "notHealthUser-%d-%s.pk" % (minNumberOfQueries, explanation) else: negativeOutputFile = "regularUser-%d-%s.pk" % (minNumberOfQueries, explanation) positiveOutputFile = "medicalUser-%d-%s.pk" % (minNumberOfQueries, explanation) logging.info("Using seed: %d", nseed) logging.info("Loading: %s and %s", positiveOutputFile, negativeOutputFile) logging.info("Processing method used: %s", preProcessingMethod) if forceBalance > 0: logging.warning("Forcing only %s examples for each dataset",forceBalance) if proportional > 0: logging.warning("Using proportional representation. %s percente of the base.",proportional) if forceBalance > 0 and proportional > 0: logging.error("ERROR! YOU SHOULD CHOOSE OR FORCEBALANCE OR PROPORTIONAL DATA!") print "ERROR! YOU SHOULD CHOOSE OR FORCEBALANCE OR PROPORTIONAL DATA!" exit(0) #### ### Load Datasets ## # logging.info("Loading the datasets...") with open(negativeOutputFile, 'rb') as input: negativeUserFV = pickle.load(input) with open(positiveOutputFile, 'rb') as input: positiveUserFV = pickle.load(input) logging.info("Loaded") logging.info("Transforming datasets into Dictionaries...") if usingIncremental: negativeUserFV,ll1 = transformeInIncrementalDict(negativeUserFV, nseed, forceBalance, proportional, groupsToUse, incrementalVector) positiveUserFV,ll2 = transformeInIncrementalDict(positiveUserFV, nseed, forceBalance, proportional, groupsToUse, incrementalVector) ld1, ld2 = [], [] lm1 = len(negativeUserFV) if lm1 != len(positiveUserFV): logging.error("ERROR MAP SIZES ARE NOT EQUAL!") print "ERROR MAP SIZES ARE NOT EQUAL!" exit(0) incrementalFV = defaultdict(list) for i in range(lm1): incrementalFV[i] = negativeUserFV[i] + positiveUserFV[i] else: ld1, ll1 = transformeInDict(negativeUserFV, nseed, forceBalance, proportional, groupsToUse) ld2, ll2 = transformeInDict(positiveUserFV, nseed, forceBalance, proportional, groupsToUse) #Free memory del positiveUserFV del negativeUserFV logging.info("Transformed") listOfDicts = ld1 + ld2 listOfLabels = ll1 + ll2 y = np.array( listOfLabels ) greatestClass = 0 if len(ll1) > len(ll2) else 1 y_greatest = np.array((len(ll1) + len(ll2)) * [greatestClass] ) logging.info("Using %d regular users -- class %s" % (len(ll1), ll1[0])) logging.info("Using %d medical users -- class %s" % (len(ll2), ll2[0])) baselines = calculateBaselines(y, y_greatest) logging.info("Vectorizing dictionaries...") vec, X_noProcess = vectorizeData(listOfDicts) if X_noProcess != []: logging.info("Feature Names: %s", vec.get_feature_names()) logging.info("Vectorized") logging.info("Preprocessing data") X = preprocessing(X_noProcess, preProcessingMethod) #print "X_noProcess ----> ", X_noProcess #print "X ---> ", X logging.info("Data preprocessed") if usingIncremental: incrementalFV = [preprocessing(vec.fit_transform(l).toarray(), preProcessingMethod) for k, l in incrementalFV.iteritems()] else: incrementalFV = None #### ### Shuffer samples (TODO: Cross-validation) ## # logging.info("Shuffling the data...") n_samples = len(y) newIndices = shuffleIndices(n_samples, nseed) if X != []: X = X[newIndices] y = y[newIndices] if usingIncremental: incrementalFV = [ fv[newIndices] for fv in incrementalFV ] logging.debug("X - %s", X) # Shuffle samples logging.info("Shuffled") #### ### Run classifiers ## # precRecall, roc = {}, {} clfrs = [] logging.info("Running classifiers...") if "dmfc" in listOfClassifiers: dmfc = DummyClassifier(strategy='most_frequent') clfrs.append( (dmfc, "DummyMostFrequent", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}) ) # ================================================================ if "dsc" in listOfClassifiers: dsc = DummyClassifier(strategy='stratified') clfrs.append( (dsc, "DummyStratified", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}) ) # ================================================================ if "duc" in listOfClassifiers: duc = DummyClassifier(strategy='uniform') clfrs.append( (duc, "DummyUniform", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}) ) # ================================================================ if "nbc" in listOfClassifiers or "nb" in listOfClassifiers: nbc = GaussianNB() clfrs.append( (nbc, "Naive Bayes", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}) ) # ================================================================ if "knnc" in listOfClassifiers or "knn" in listOfClassifiers: knnc = KNeighborsClassifier(n_neighbors=classifyParameters["KNN-K"]) clfrs.append( (knnc, "KNN", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridKNN, "measureProbas":measureProbas}) ) # ================================================================ if "lrc" in listOfClassifiers: lrc = LogisticRegression(C=classifyParameters["LR-C"]) clfrs.append( (lrc, "Logistic Regression", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridLR, "measureProbas":measureProbas})) # ================================================================ if "dtc" in listOfClassifiers: dtc = DecisionTreeClassifier( criterion=classifyParameters["DT-criterion"], max_features=classifyParameters["DT-max_features"] ) clfrs.append( (dtc, "Decision Tree", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridDT, "measureProbas":measureProbas}) ) # ================================================================ if "svmc" in listOfClassifiers or "svm" in listOfClassifiers: if SVMKernel == "linear": svmc = LinearSVC(C=classifyParameters["SVM-C"], class_weight=classifyParameters["SVM-class_weight"]) else: svmc = SVC(kernel=classifyParameters["SVM-kernel"], cache_size=classifyParameters["SVM-cacheSize"], C=classifyParameters["SVM-C"], max_iter=classifyParameters["SVM-maxIter"], probability=measureProbas, gamma=classifyParameters["SVM-gamma"], class_weight=classifyParameters["SVM-class_weight"]) clfrs.append( (svmc, "SVM", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridSVM, "measureProbas":measureProbas}) ) # ================================================================ if "etc" in listOfClassifiers: etc = ExtraTreesClassifier(random_state=0, n_jobs=nJobs, n_estimators=classifyParameters["ETC-n_estimators"], criterion=classifyParameters["ETC-criterion"], max_features=classifyParameters["ETC-max_features"]) clfrs.append( (etc, "Random Forest", X, y, nCV, nJobs, baselines, {"tryToMeasureFeatureImportance":True, "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridETC, "measureProbas":measureProbas, "featuresOutFilename":(outfileName + ".pk")}) ) results = [] if paralled: from scoop import futures results = futures.map(parallelClassify,clfrs) else: if "dmfc" in listOfClassifiers: results.append(classify(dmfc, "DummyMostFrequent", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, incremental=incrementalFV)) if "dsc" in listOfClassifiers: results.append(classify(dsc, "DummyStratified", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, incremental=incrementalFV)) if "duc" in listOfClassifiers: results.append(classify(duc, "DummyUniform", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, incremental=incrementalFV)) if "nbc" in listOfClassifiers or "nb" in listOfClassifiers: results.append(classify(nbc, "Naive Bayes", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, incremental=incrementalFV)) if "knnc" in listOfClassifiers or "knn" in listOfClassifiers: results.append(classify(knnc, "KNN", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridKNN, "measureProbas":measureProbas}, incremental=incrementalFV)) if "lrc" in listOfClassifiers: results.append(classify(lrc, "Logistic Regression", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridLR, "measureProbas":measureProbas}, incremental=incrementalFV)) if "dtc" in listOfClassifiers: results.append(classify(dtc, "Decision Tree", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridDT, "measureProbas":measureProbas}, incremental=incrementalFV)) if "svmc" in listOfClassifiers or "svm" in listOfClassifiers: results.append(classify(svmc, "SVM", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridSVM, "measureProbas":measureProbas}, incremental=incrementalFV)) if "etc" in listOfClassifiers: results.append(classify(etc, "Random Forest", X, y, nCV, nJobs, baselines, {"tryToMeasureFeatureImportance":measureProbas, "featuresOutFilename":(outfileName + ".pk"), "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridETC, "measureProbas":measureProbas}, incremental=incrementalFV)) precRecall, roc = getCurves(results) roc["Random Classifier"] = ([0,1],[0,1]) plotGraph(precRecall, fileName=PRECRECALLNAME, xlabel="Recall", ylabel="Precision", generatePickle=generatePickle, hasPlotLibs=hasPlotLibs) plotGraph(roc, fileName=ROCNAME, xlabel="False Positive Rate", ylabel="True Positive Rate", generatePickle=generatePickle, hasPlotLibs=hasPlotLibs) fo = open(outfileName, "a") for r in results: label = r[0] resultMetrics = r[1] if usingIncremental: for i, part in zip(range(len(incrementalVector)), incrementalVector): fo.write("%s, Partition %d, %.3f, %.3f, %.3f, %.3f\n" % (label, part/10, 100.0*(resultMetrics.acc[i]), 100.0*resultMetrics.sf1[i], 100.0*resultMetrics.mf1[i], 100.0*resultMetrics.wf1[i])) print "%s, Partition %d, %.3f, %.3f, %.3f, %.3f" % (label, part/10, 100.0*(resultMetrics.acc[i]), 100.0*resultMetrics.sf1[i], 100.0*resultMetrics.mf1[i], 100.0*resultMetrics.wf1[i]) print "Means ----- %s, %.3f, %.3f, %.3f, %.3f" % (label, 100.0*(np.mean(resultMetrics.acc)), 100.0*np.mean(resultMetrics.sf1), 100.0*np.mean(resultMetrics.mf1), 100.0*np.mean(resultMetrics.wf1)) else: fo.write("%s, %.3f, %.3f, %.3f, %.3f\n" % (label, 100.0*resultMetrics.acc, 100.0*resultMetrics.sf1, 100.0*resultMetrics.mf1, 100.0*resultMetrics.wf1)) print "%s, %.3f, %.3f, %.3f, %.3f" % (label, 100.0*resultMetrics.acc, 100.0*resultMetrics.sf1, 100.0*resultMetrics.mf1, 100.0*resultMetrics.wf1) fo.close() logging.info("Done")