Пример #1
0
def tune(train_fn, param_vals, train_feats, train_labels, val_feats,
         val_labels):
    train_accs = np.ndarray(len(param_vals))
    val_accs = np.ndarray(len(param_vals))

    for i, val in enumerate(param_vals):
        theta, theta_0 = train_fn(train_feats, train_labels, val)

        train_preds = p1.classify(train_feats, theta, theta_0)
        train_accs[i] = p1.accuracy(train_preds, train_labels)

        val_preds = p1.classify(val_feats, theta, theta_0)
        val_accs[i] = p1.accuracy(val_preds, val_labels)

    return train_accs, val_accs
Пример #2
0
def mainClassification():
    folds = 10
    n_classifiers = classifiers.len_classify()
    cm, acc = [], []
    result_cm, result_acc = [0] * folds, [0] * folds

    for i in range(folds):
        result_cm[i], result_acc[i] = [0] * n_classifiers, [0] * n_classifiers

    data = preprocessing.readData()
    samples, targets = preprocessing.splitSamples(data)
    kfold = preprocessing.crossValidation(folds)

    for train, test in kfold.split(samples, targets):
        X_train, X_test = samples[train], samples[test]
        y_train, y_test = targets[train], targets[test]

        for i in range(n_classifiers):
            y_pred = classifiers.classify(X_train, y_train, X_test, i)
            fold_cm, fold_acc = classifiers.mensureAcc(y_pred, y_test)
            cm.append(fold_cm)
            acc.append(fold_acc)

    for j in range(n_classifiers):
        for k in range(folds):
            result_cm[k][j] = cm[k * n_classifiers + j]
            result_acc[k][j] = acc[k * n_classifiers + j]
    utils.writeCSV("all_classifiers_confusion-matrix.csv", result_cm)
    utils.writeCSV("all_classifiers_accuracy.csv", result_acc)
    print("Finish!!\n")
def subset_run(data, n_seed=5, splits=10, methods=['sfs'], estimators=['rdforest']):
    X = np.array(data.drop('HPYLORI',axis=1))
    y = np.array(data.HPYLORI)

    if not os.path.exists('data'):
        os.makedirs('data')
    if not os.path.exists('results'):
        os.makedirs('results')
    
    outer_names = create_empty_dic(estimators, methods)

    for seed in range(n_seed):
        skf = StratifiedKFold(n_splits=splits, random_state=seed, shuffle=True)
        split_num = 0
        names = create_empty_dic(estimators, methods)
        
        for train, test in skf.split(X,y):
            split_num += 1
            X_train, X_test = X[train], X[test]
            y_train, y_test = y[train], y[test]
            X_train, X_test = dp.impute(X_train), dp.impute(X_test)

            for estimator in estimators:
                for s in methods:
                    result = e.classify(estimator, X_train, X_test, y_train, y_test)
                    filename = data_path+estimator+'_'+s+'_'+str(split_num)+'_'+str(seed)
                    names[estimator][s].append(filename)
                    result.to_csv(filename+'.csv')

        create_interim_csv(names, outer_names, seed, splits)
        delete_interim_csv(names)

    file_list = create_final_csv(outer_names, n_seed)
    
    return file_list
def execute_subset_run(index, run, estimators, methods):
    X_train, X_test = run[0], run[1]
    y_train, y_test = run[2], run[3]
    for estimator in estimators:
        for method in methods:
            result = e.classify(estimator, X_train, X_test, y_train, y_test)
            filename = data_path + estimator + method + '_' + str(index)
            result.to_csv(filename + '.csv')
Пример #5
0
def execute_a_run(index, run, features, estimators, methods):
    for i in range(len(methods)):
        method = methods[i]
        selection = features[i]
        X_train, X_test = run[0], run[1]
        y_train, y_test = run[2], run[3]
        X_train, X_test = X_train[:, selection], X_test[:, selection]
        for estimator in estimators:
            result = e.classify(estimator, X_train, X_test, y_train, y_test)
            filename = data_path + estimator + method + '_' + str(index)
            result.to_csv(filename + '.csv')
Пример #6
0
def normal_run(data,
               n_seed=2,
               splits=5,
               methods=['infogain_10'],
               estimators=['rdforest']):
    X = np.array(data.drop('HPYLORI', axis=1))
    y = np.array(data.HPYLORI)

    if not os.path.exists('data'):
        os.makedirs('data')
    if not os.path.exists('results'):
        os.makedirs('results')

    features = np.zeros(X.shape[1])
    outer_names = create_empty_dic(estimators, methods)

    for seed in range(n_seed):
        skf = StratifiedKFold(n_splits=splits, random_state=seed, shuffle=True)
        split_num = 0
        names = create_empty_dic(estimators, methods)

        for train, test in skf.split(X, y):
            split_num += 1
            X_train, X_test = X[train], X[test]
            y_train, y_test = y[train], y[test]
            X_train, X_test = dp.impute(X_train), dp.impute(X_test)

            for s in methods:
                selector = s.split('_')[0]
                n_features = int(s.split('_')[1])
                selection = fselect.run_feature_selection(
                    selector, X_train, y_train, n_features)
                for i in selection:
                    features[i] += 1

                X_train, X_test = X_train[:, selection], X_test[:, selection]

                for estimator in estimators:

                    result = e.classify(estimator, X_train, X_test, y_train,
                                        y_test)
                    filename = data_path + estimator + s + '_' + str(
                        split_num) + '_' + str(seed)
                    names[estimator][s].append(filename)
                    result.to_csv(filename + '.csv')

        create_interim_csv(names, outer_names, seed, splits)
        delete_interim_csv(names)
    print(outer_names)
    file_list = create_final_csv(outer_names, n_seed)

    return file_list
Пример #7
0
def runClassify(preProcessingMethod, forceBalance, proportional, nseed, explanation, gridSearch, generatePickle, hasPlotLibs, paralled, nJobs, listOfClassifiers, outfileName, nCV, measureProbas):
   
    positiveOutputFile = "positive-%s.pk" % (explanation)
    validationPosOutputFile = "positive-validation.pk"
    negativeOutputFile = "negative-%s.pk" % (explanation)
    validationNegOutputFile = "negative-validation.pk"
    testOutputFile = "test-%s.pk" % (explanation)

    logging.info("Using seed: %d", nseed)
    logging.info("Loading: %s and %s", positiveOutputFile, negativeOutputFile)
    logging.info("Processing method used: %s", preProcessingMethod)

    if forceBalance > 0:
        logging.warning("Forcing only %s examples for each dataset",forceBalance)

    if proportional > 0:
        logging.warning("Using proportional representation. %s percente of the base.",proportional)
    
    if forceBalance > 0 and proportional > 0:
        logging.error("ERROR! YOU SHOULD CHOOSE OR FORCEBALANCE OR PROPORTIONAL DATA!")
        print "ERROR! YOU SHOULD CHOOSE OR FORCEBALANCE OR PROPORTIONAL DATA!"
        exit(0)

    ####
    ### Load Datasets
    ##
    #
    logging.info("Loading the datasets...")
    with open(negativeOutputFile, 'rb') as input:
        negativeFV = pickle.load(input)
    
    with open(validationNegOutputFile, 'rb') as input:
        validationNegFV = pickle.load(input)
    
    with open(positiveOutputFile, 'rb') as input:
        positiveFV = pickle.load(input)
    
    with open(validationPosOutputFile, 'rb') as input:
        validationPosFV = pickle.load(input)

    with open(testOutputFile, 'rb') as input:
        testFV = pickle.load(input)
    logging.info("Loaded")

    testFV = sorted(testFV.iteritems(), key=lambda k: int(k[0])) 

    logging.info("Transforming datasets into Dictionaries...")
    ld1, ll1 = transformeInDict(sorted(negativeFV.iteritems()), nseed, forceBalance, proportional)
    ld2, ll2 = transformeInDict(sorted(positiveFV.iteritems()), nseed, forceBalance, proportional)
    ldTest, llTest = transformeInDict(testFV, nseed, forceBalance, proportional)

    valldNeg, valllNeg = transformeInDict(sorted(validationNegFV.iteritems()), nseed, forceBalance, proportional)
    valldPos, valllPos = transformeInDict(sorted(validationPosFV.iteritems()), nseed, forceBalance, proportional)
    valY = np.array( valllNeg + valllPos)
    valDicts = valldNeg + valldPos
    
    logging.info("Transformed")
    
    listOfDicts = ld1 + ld2
    listOfLabels = ll1 + ll2
    y = np.array( listOfLabels )
    
    greatestClass = 0 if len(ll1) > len(ll2) else 1
    y_greatest =  np.array((len(ll1) + len(ll2)) * [greatestClass] )

    logging.info("Using %d positive examples -- class %s" % (len(ll1), ll1[0]))
    logging.info("Using %d negative examples -- class %s" % (len(ll2), ll2[0]))
    
    baselines = calculateBaselines(y, y_greatest)
    
    logging.info("Vectorizing dictionaries...")
    vec, X_noProcess = vectorizeData(listOfDicts) 
    if X_noProcess != []:
        logging.info("Feature Names: %s", vec.get_feature_names())
    logging.info("Vectorized")
   
    logging.info("Preprocessing data")
    X = preprocessing(X_noProcess, preProcessingMethod)
    #print "X_noProcess ----> ", X_noProcess
    #print "X ---> ", X
    logging.info("Data preprocessed")

    #Prepare Test data: 
    Xtest = vec.transform(ldTest).toarray()
    Xtest = preprocessing(Xtest, preProcessingMethod)

    valX = vec.transform(valDicts).toarray()
    valX = preprocessing(valX, preProcessingMethod)
    
    ####
    ### Shuffer samples  (TODO: Cross-validation)
    ##
    #
    logging.info("Shuffling the data...")
    n_samples = len(y)
    newIndices = shuffleIndices(n_samples, nseed)
    X = X[newIndices]
    y = y[newIndices]

    n_samples_val = len(valY)
    newIndices = shuffleIndices(n_samples_val, nseed)
    valX = valX[newIndices]
    valY = valY[newIndices]

    logging.debug("X - %s", X)
    # Shuffle samples
    logging.info("Shuffled")
    
    ####
    ### Run classifiers
    ##
    #
    precRecall, roc = {}, {}
    results = []

    logging.info("Running classifiers...")
    
    if "dmfc" in listOfClassifiers:
        dmfc = DummyClassifier(strategy='most_frequent')
        results.append(classify(dmfc, "DummyMostFrequent", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, Xtest))
    # ================================================================
    if "nbc" in listOfClassifiers or "nb" in listOfClassifiers:
        nbc = GaussianNB()
        results.append(classify(nbc, "Naive Bayes", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, Xtest))
    # ================================================================
    if "knnc" in listOfClassifiers or "knn" in listOfClassifiers:
        knnc = KNeighborsClassifier(n_neighbors=classifyParameters["KNN-K"])
        results.append(classify(knnc, "KNN", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridKNN, "measureProbas":measureProbas}, Xtest))
    # ================================================================
    if "lrc" in listOfClassifiers or "lgr" in listOfClassifiers or "lr" in listOfClassifiers:
        lrc = LogisticRegression(C=classifyParameters["LR-C"])
        results.append(classify(lrc, "Logistic Regression", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridLR, "measureProbas":measureProbas}, Xtest, valX, valY))
    # ================================================================
    if "dtc" in listOfClassifiers:
        dtc = DecisionTreeClassifier( criterion=classifyParameters["DT-criterion"], max_features=classifyParameters["DT-max_features"] )
        results.append(classify(dtc, "Decision Tree", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridDT, "measureProbas":measureProbas}, Xtest))
    # ================================================================
    if "svmc" in listOfClassifiers or "svm" in listOfClassifiers:
        #if SVMKernel == "linear":
        #    svmc = LinearSVC(C=classifyParameters["SVM-C"], class_weight=classifyParameters["SVM-class_weight"])
        #else:
        #    svmc = SVC(kernel=classifyParameters["SVM-kernel"], cache_size=classifyParameters["SVM-cacheSize"], C=classifyParameters["SVM-C"], max_iter=classifyParameters["SVM-maxIter"], probability=measureProbas, gamma=classifyParameters["SVM-gamma"], class_weight=classifyParameters["SVM-class_weight"])
        #results.append(classify(svmc, "SVM", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridSVM, "measureProbas":measureProbas}, Xtest))
        pass
    # ================================================================
    if "etc" in listOfClassifiers:
        etc = ExtraTreesClassifier(random_state=0, n_jobs=nJobs, n_estimators=classifyParameters["ETC-n_estimators"], criterion=classifyParameters["ETC-criterion"], max_features=classifyParameters["ETC-max_features"])
        results.append(classify(etc, "Random Forest", X, y, nCV, nJobs, baselines, {"tryToMeasureFeatureImportance":measureProbas, "featuresOutFilename":(outfileName + ".pk"), "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridETC, "measureProbas":measureProbas}, Xtest, valX, valY))
    
    # ================================================================
    if "sgd" in listOfClassifiers:
        sgd = SGDClassifier(n_jobs=nJobs)
        results.append(classify(sgd, "SGD", X, y, nCV, nJobs, baselines, {"featuresOutFilename":(outfileName + ".pk"), "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridSGD, "measureProbas":measureProbas}, Xtest, valX, valY))

    # ================================================================
    if "gbc" in listOfClassifiers:
        gbc = GradientBoostingClassifier(n_estimators=300,subsample=0.6,max_depth=4,random_state=nseed)
        results.append(classify(gbc, "GBC", X, y, nCV, nJobs, baselines, {"featuresOutFilename":(outfileName + ".pk"), "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridSGD, "measureProbas":measureProbas}, Xtest, valX, valY))
    # ================================================================
    
    
    precRecall, roc = getCurves(results)
    roc["Random Classifier"] = ([0,1],[0,1])

    plotGraph(precRecall, fileName=PRECRECALLNAME, xlabel="Recall", ylabel="Precision", generatePickle=generatePickle, hasPlotLibs=hasPlotLibs)
    plotGraph(roc, fileName=ROCNAME, xlabel="False Positive Rate", ylabel="True Positive Rate", generatePickle=generatePickle, hasPlotLibs=hasPlotLibs)
   
    fo = open(outfileName, "a")

    listProbas = []
    for r in results:
        clfName = r[0]
        resultMetrics = r[1]
        fo.write("%s, %.3f, %.3f, %.3f, %.3f\n" % (clfName, 100.0*resultMetrics.acc, 100.0*resultMetrics.sf1, 100.0*resultMetrics.mf1, 100.0*resultMetrics.wf1))
        print "%s, %.3f, %.3f, %.3f, %.3f" % (clfName, 100.0*resultMetrics.acc, 100.0*resultMetrics.sf1, 100.0*resultMetrics.mf1, 100.0*resultMetrics.wf1)
        
        yTraining = r[4]
        yTrainingProbas = r[5]
        yTest = r[6]
        yTestProbas = r[7]
        writeOutput(clfName + ".csv", yTest)
        
        listProbas.append(yTestProbas)
        #for t,p in zip(yTest, yTestProbas):
        #    print t, p

    mergedYTest = voting(listProbas)
    writeOutput("merged.csv", mergedYTest)


    fo.close()
    logging.info("Done")
Пример #8
0
def runClassify(preProcessingMethod, forceBalance, proportional, minNumberOfQueries, nseed, explanation, healthUsers, gridSearch, generatePickle, hasPlotLibs, paralled, nJobs, listOfClassifiers, groupsToUse, usingIncremental, outfileName, nCV, measureProbas, incrementalVector):
   
    if healthUsers:
        positiveOutputFile = "healthUser-%d-%s.pk" % (minNumberOfQueries, explanation)
        negativeOutputFile = "notHealthUser-%d-%s.pk" % (minNumberOfQueries, explanation)
    else:
        negativeOutputFile = "regularUser-%d-%s.pk" % (minNumberOfQueries, explanation)
        positiveOutputFile = "medicalUser-%d-%s.pk" % (minNumberOfQueries, explanation)
    
    logging.info("Using seed: %d", nseed)
    logging.info("Loading: %s and %s", positiveOutputFile, negativeOutputFile)
    logging.info("Processing method used: %s", preProcessingMethod)

    if forceBalance > 0:
        logging.warning("Forcing only %s examples for each dataset",forceBalance)

    if proportional > 0:
        logging.warning("Using proportional representation. %s percente of the base.",proportional)
    
    if forceBalance > 0 and proportional > 0:
        logging.error("ERROR! YOU SHOULD CHOOSE OR FORCEBALANCE OR PROPORTIONAL DATA!")
        print "ERROR! YOU SHOULD CHOOSE OR FORCEBALANCE OR PROPORTIONAL DATA!"
        exit(0)

    ####
    ### Load Datasets
    ##
    #
    logging.info("Loading the datasets...")
    with open(negativeOutputFile, 'rb') as input:
        negativeUserFV = pickle.load(input)
    
    with open(positiveOutputFile, 'rb') as input:
        positiveUserFV = pickle.load(input)
    logging.info("Loaded")

    logging.info("Transforming datasets into Dictionaries...")
    if usingIncremental:
        negativeUserFV,ll1 = transformeInIncrementalDict(negativeUserFV, nseed, forceBalance, proportional, groupsToUse, incrementalVector)
        positiveUserFV,ll2 = transformeInIncrementalDict(positiveUserFV, nseed, forceBalance, proportional, groupsToUse, incrementalVector)
        ld1, ld2 = [], []

        lm1 = len(negativeUserFV)
        if lm1 != len(positiveUserFV):
            logging.error("ERROR MAP SIZES ARE NOT EQUAL!")
            print "ERROR MAP SIZES ARE NOT EQUAL!"
            exit(0)

        incrementalFV = defaultdict(list)
        for i in range(lm1):
            incrementalFV[i] = negativeUserFV[i] + positiveUserFV[i]
        
    else:
        ld1, ll1 = transformeInDict(negativeUserFV, nseed, forceBalance, proportional, groupsToUse)
        ld2, ll2 = transformeInDict(positiveUserFV, nseed, forceBalance, proportional, groupsToUse)
    #Free memory
    del positiveUserFV
    del negativeUserFV

    logging.info("Transformed")
    
    listOfDicts = ld1 + ld2
    listOfLabels = ll1 + ll2
    y = np.array( listOfLabels )
    
    greatestClass = 0 if len(ll1) > len(ll2) else 1
    y_greatest =  np.array((len(ll1) + len(ll2)) * [greatestClass] )

    logging.info("Using %d regular users -- class %s" % (len(ll1), ll1[0]))
    logging.info("Using %d medical users -- class %s" % (len(ll2), ll2[0]))
    
    baselines = calculateBaselines(y, y_greatest)
    
    logging.info("Vectorizing dictionaries...")
    vec, X_noProcess = vectorizeData(listOfDicts) 
    if X_noProcess != []:
        logging.info("Feature Names: %s", vec.get_feature_names())
    logging.info("Vectorized")
   
    logging.info("Preprocessing data")
    X = preprocessing(X_noProcess, preProcessingMethod)
    #print "X_noProcess ----> ", X_noProcess
    #print "X ---> ", X
    logging.info("Data preprocessed")

    if usingIncremental:
        incrementalFV = [preprocessing(vec.fit_transform(l).toarray(), preProcessingMethod) for k, l in incrementalFV.iteritems()]
    else:
        incrementalFV = None

    ####
    ### Shuffer samples  (TODO: Cross-validation)
    ##
    #
    logging.info("Shuffling the data...")
    n_samples = len(y)
    newIndices = shuffleIndices(n_samples, nseed)
    if X != []:
        X = X[newIndices]
    y = y[newIndices]
    if usingIncremental:
        incrementalFV = [ fv[newIndices] for fv in incrementalFV ]

    logging.debug("X - %s", X)
    # Shuffle samples
    logging.info("Shuffled")
    
    ####
    ### Run classifiers
    ##
    #
    precRecall, roc = {}, {}
    clfrs = []

    logging.info("Running classifiers...")
    
    if "dmfc" in listOfClassifiers:
        dmfc = DummyClassifier(strategy='most_frequent')
        clfrs.append( (dmfc, "DummyMostFrequent", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}) )
    # ================================================================
    if "dsc" in listOfClassifiers:
        dsc = DummyClassifier(strategy='stratified')
        clfrs.append( (dsc, "DummyStratified", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}) )
    # ================================================================
    if "duc" in listOfClassifiers:
        duc = DummyClassifier(strategy='uniform')
        clfrs.append( (duc, "DummyUniform", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}) )
    # ================================================================
    if "nbc" in listOfClassifiers or "nb" in listOfClassifiers:
        nbc = GaussianNB()
        clfrs.append( (nbc, "Naive Bayes", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}) )
    # ================================================================
    if "knnc" in listOfClassifiers or "knn" in listOfClassifiers:
        knnc = KNeighborsClassifier(n_neighbors=classifyParameters["KNN-K"])
        clfrs.append( (knnc, "KNN", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridKNN, "measureProbas":measureProbas}) )
    # ================================================================
    if "lrc" in listOfClassifiers:
        lrc = LogisticRegression(C=classifyParameters["LR-C"])
        clfrs.append( (lrc, "Logistic Regression", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridLR, "measureProbas":measureProbas}))
    # ================================================================
    if "dtc" in listOfClassifiers:
        dtc = DecisionTreeClassifier( criterion=classifyParameters["DT-criterion"], max_features=classifyParameters["DT-max_features"] )
        clfrs.append( (dtc, "Decision Tree", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridDT, "measureProbas":measureProbas}) )
    # ================================================================
    if "svmc" in listOfClassifiers or "svm" in listOfClassifiers:
        if SVMKernel == "linear":
            svmc = LinearSVC(C=classifyParameters["SVM-C"], class_weight=classifyParameters["SVM-class_weight"])
        else:
            svmc = SVC(kernel=classifyParameters["SVM-kernel"], cache_size=classifyParameters["SVM-cacheSize"], C=classifyParameters["SVM-C"], max_iter=classifyParameters["SVM-maxIter"], probability=measureProbas, gamma=classifyParameters["SVM-gamma"], class_weight=classifyParameters["SVM-class_weight"])

        clfrs.append( (svmc, "SVM", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridSVM, "measureProbas":measureProbas}) )
    # ================================================================
    if "etc" in listOfClassifiers:
        etc = ExtraTreesClassifier(random_state=0, n_jobs=nJobs, n_estimators=classifyParameters["ETC-n_estimators"], criterion=classifyParameters["ETC-criterion"], max_features=classifyParameters["ETC-max_features"])
        clfrs.append( (etc, "Random Forest", X, y, nCV, nJobs, baselines, {"tryToMeasureFeatureImportance":True, "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridETC, "measureProbas":measureProbas, "featuresOutFilename":(outfileName + ".pk")}) )
    
    results = []
    if paralled:
        from scoop import futures
        results = futures.map(parallelClassify,clfrs)
    else:
        if "dmfc" in listOfClassifiers:
            results.append(classify(dmfc, "DummyMostFrequent", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, incremental=incrementalFV))
        if "dsc" in listOfClassifiers:
            results.append(classify(dsc, "DummyStratified", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, incremental=incrementalFV))
        if "duc" in listOfClassifiers:
            results.append(classify(duc, "DummyUniform", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, incremental=incrementalFV))
        if "nbc" in listOfClassifiers or "nb" in listOfClassifiers:
            results.append(classify(nbc, "Naive Bayes", X, y, nCV, nJobs, baselines, {"measureProbas":measureProbas}, incremental=incrementalFV))
        if "knnc" in listOfClassifiers or "knn" in listOfClassifiers:
            results.append(classify(knnc, "KNN", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridKNN, "measureProbas":measureProbas}, incremental=incrementalFV))
        if "lrc" in listOfClassifiers:
            results.append(classify(lrc, "Logistic Regression", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridLR, "measureProbas":measureProbas}, incremental=incrementalFV))
        if "dtc" in listOfClassifiers:
            results.append(classify(dtc, "Decision Tree", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridDT, "measureProbas":measureProbas}, incremental=incrementalFV))
        if "svmc" in listOfClassifiers or "svm" in listOfClassifiers:
            results.append(classify(svmc, "SVM", X, y, nCV, nJobs, baselines, {"useGridSearch":gridSearch, "gridParameters":gridSVM, "measureProbas":measureProbas}, incremental=incrementalFV))
        if "etc" in listOfClassifiers:
            results.append(classify(etc, "Random Forest", X, y, nCV, nJobs, baselines, {"tryToMeasureFeatureImportance":measureProbas, "featuresOutFilename":(outfileName + ".pk"), "featureNames":vec.get_feature_names(), "useGridSearch":gridSearch, "gridParameters":gridETC, "measureProbas":measureProbas}, incremental=incrementalFV))

    precRecall, roc = getCurves(results)
    roc["Random Classifier"] = ([0,1],[0,1])

    plotGraph(precRecall, fileName=PRECRECALLNAME, xlabel="Recall", ylabel="Precision", generatePickle=generatePickle, hasPlotLibs=hasPlotLibs)
    plotGraph(roc, fileName=ROCNAME, xlabel="False Positive Rate", ylabel="True Positive Rate", generatePickle=generatePickle, hasPlotLibs=hasPlotLibs)
   
    fo = open(outfileName, "a")

    for r in results:
        label = r[0]
        resultMetrics = r[1]
        if usingIncremental:
            for i, part in zip(range(len(incrementalVector)), incrementalVector):
                fo.write("%s, Partition %d, %.3f, %.3f, %.3f, %.3f\n" % (label, part/10, 100.0*(resultMetrics.acc[i]), 100.0*resultMetrics.sf1[i], 100.0*resultMetrics.mf1[i], 100.0*resultMetrics.wf1[i]))
                print "%s, Partition %d, %.3f, %.3f, %.3f, %.3f" % (label, part/10, 100.0*(resultMetrics.acc[i]), 100.0*resultMetrics.sf1[i], 100.0*resultMetrics.mf1[i], 100.0*resultMetrics.wf1[i])
            
            print "Means ----- %s, %.3f, %.3f, %.3f, %.3f" % (label, 100.0*(np.mean(resultMetrics.acc)), 100.0*np.mean(resultMetrics.sf1), 100.0*np.mean(resultMetrics.mf1), 100.0*np.mean(resultMetrics.wf1))
        else:
            fo.write("%s, %.3f, %.3f, %.3f, %.3f\n" % (label, 100.0*resultMetrics.acc, 100.0*resultMetrics.sf1, 100.0*resultMetrics.mf1, 100.0*resultMetrics.wf1))
            print "%s, %.3f, %.3f, %.3f, %.3f" % (label, 100.0*resultMetrics.acc, 100.0*resultMetrics.sf1, 100.0*resultMetrics.mf1, 100.0*resultMetrics.wf1)

    fo.close()
    logging.info("Done")