def loadDataset(identifier): d = DataSetLoader() x = d.LoadDataSet(identifier) print 'X', x.shape y = d.LoadDataSetClasses(identifier) print 'Y', y.shape y = numpy.transpose(y.astype(numpy.int64)) print 'Y', y.shape target = [] y = list(y) print "y before manual transform =", y for i in y: target.append(int(i)) print len(y) print y return x, y
def loadDataset(identifier): d = DataSetLoader() x = d.LoadDataSet(identifier) print 'X', x.shape y = d.LoadDataSetClasses(identifier) print 'Y', y.shape #y=numpy.transpose(y.astype(numpy.int64)) y = sklearn.utils.validation.column_or_1d(y, warn=True) print 'Y', y.shape target = [] y = list(y) print "y before manual transform =", y for i in y: target.append(int(i)) print len(y) print y return x, y
dataset = "B" f = open('mcc/B-Full-mccResults' + dataset + '.txt', 'w') f.write( "dataset, size, method, classifier, validationTechnique, mc, timeTaken, extra info" ) for classifierName in classifiers: for method in methods: for size in sizes: for preproc in preprocessings: for validation in validationTechnique: #print size #print method d = DataSetLoader() X_train = d.LoadDataSet("B_train") y_train = d.LoadDataSetClasses("B_train") X_test = d.LoadDataSet("B_test") y_test = d.LoadDataSetClasses("B_test") #chaipee will fix it later on y_train = numpy.transpose(y_train) print y_train.shape targets = list(y_train) y_train = [] for i in targets: #print i y_train.append(int(i)) y_test = numpy.transpose(y_test) print y_test.shape targets = list(y_test)
from sklearn.pipeline import make_pipeline from sklearn import metrics from sklearn import preprocessing from sklearn.ensemble import VotingClassifier from sklearn.model_selection import LeaveOneOut sizes = ['10', '50', '100', '150', '200', '250'] methods = ['MRMR', 'JMI', 'JMIM'] for method in methods: for size in sizes: print size print method import time d = DataSetLoader() X_train = d.LoadDataSet("A") y_train = d.LoadDataSetClasses("A") print X_train.shape print y_train.shape #chaipee will fix it later on y_train = numpy.transpose(y_train) print y_train.shape targets = list(y_train) y_train = [] for i in targets: #print i y_train.append(int(i)) #print len(y_train) #first run indices indices = joblib.load('datasetA_pickles/selected_indices_' + method + '.joblib.pkl') X_train = X_train[:, indices]
# check ranking of features print (feat_selector.ranking_) print (len(feat_selector.ranking_)) selected_indices=feat_selector.ranking_ # call transform() on X to filter it down to selected features X_filtered = feat_selector.transform(X) return [X_filtered,selected_indices] d = DataSetLoader(); x = d.LoadDataSet("B_train"); y=d.LoadDataSetClasses("B_train"); print y.shape y=numpy.transpose(y) print x.shape print y.shape target=[] y=list(y) for i in y: target.append(int(i)) print len(y) sizes=['10','50','100','150','200','250'] methods=['MRMR','JMI','JMIM'] for method in methods: for size in sizes: print size print method
import scipy.io import numpy from DataSetLoaderLib import DataSetLoader import csv #Used for storing and loading the trained classifier from sklearn.externals import joblib print("") print("") print("") print("") variables = None targets = None d = DataSetLoader() variables = d.LoadDataSet("A") targets = d.LoadDataSetClasses("A") """ convert an array to csv http://stackoverflow.com/questions/16482895/convert-a-numpy-array-to-a-csv-string-and-a-csv-string-back-to-a-numpy-array targetsString = ','.join(['%d' % num for num in targets[0]]) variablesString = ','.join(['%.5f' % num for num in variables[0]]) numpy.fromstring(targetsString, sep=',') load a csv to an array http://stackoverflow.com/questions/13381815/python-csv-text-file-to-arrayi-j """ selected_indices = [] [subset, selected_indices] = SelectSubSetmRMR(variables, targets)
results = "" LIGs = eval(Dataset + "_LIGs") LIG_Accuracies = eval(Dataset + "_LIG_Accuracies") start_time = time.time() padding = 0 #load the dataset d = DataSetLoader() #X_train_full = d.LoadDataSet(Dataset+"_train"); #y_train = d.LoadDataSetClasses(Dataset+"_train"); #targets=list(numpy.transpose(y_train)) #y_train=[] #for i in targets: # y_train.append(int(i)) X_validate_full = d.LoadDataSet(Dataset + "_test") y_validate = d.LoadDataSetClasses(Dataset + "_test") print("Dimensions of validation data and labels:", X_validate_full.shape, y_validate.shape) targets = list(numpy.transpose(y_validate)) y_validate = [] if Dataset == "C": y_validate = numpy.array(targets) #y_validate[y_validate == 0] = -1 else: for i in targets: y_validate.append(int(i)) y_test = y_validate actuals = ','.join([str(elem) for elem in y_test]) actuals = actuals.replace("\n",
classifiers = ["RandomForest", "AdaBoost", "DT", "ExtraTree", "MLP", "SVM"] validationTechniques = ["10FoldCV"] #"LOOCV", preps = ["Standard", "Robust", "Quantile", "Imputer"] basePath = '' #needed when we want to run it locally #Iterating over each method for dataset in datasets: f = open('mcc/mccResults' + dataset + '.txt', 'a') f.write('\n{date:%Y-%m-%d_%H:%M:%S}'.format(date=datetime.datetime.now())) #f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken, cv.max, cv.mean, cv.min, cv.std, preprocessing"); print "Dataset = ", dataset #initiating datasetloader object d = DataSetLoader() #loading relevant Data and coresponding labels of dataset A X_train_full = d.LoadDataSet(dataset + "_train") y_train = d.LoadDataSetClasses(dataset + "_train") X_validate_full = d.LoadDataSet(dataset + "_test") y_validate = d.LoadDataSetClasses(dataset + "_test") print("Dimensions of training data and labels:", X_train_full.shape, y_train.shape) print("Dimensions of validation data and labels:", X_validate_full.shape, y_validate.shape) #READY with Dataset, going to perform the main loop now for method in methods: #Iterating over each size for size in sizes: print("Size and method:", size, method) #first run indices
joblib.dump(values, 'selected_indices' + '_' + useMethod + '.joblib.pkl', compress=9) except: print "Error Occured" threadLock.release() print len(values) print "Exiting " + self.name return threads = [] d = DataSetLoader() G = d.LoadDataSet("B_train") targets = d.LoadDataSetClasses("B_train") print "Dataset loaded" G = numpy.asarray(G) targets = numpy.asarray(targets) threadLock = threading.Lock() print G.shape vals = 649 original = 649 for i in range(0, 1547): print "vals= " + str(vals) + "\n" # Create new threads thread = myThread(i, "Thread-" + str(i), vals - original, G[:, vals - original:vals], targets)
methods = ['MRMR','JMI','JMIM'] validationTechnique = ['LOOCV',"10FoldCV"] preprocessing = ['Standard','Imputer','Robust','Quantile'] #datasets = ["A","B"] classifiers = ["MLP","SVM","AdaBoost","DT","RandomForest","ExtraTree"] dataset = "A" f=open('mcc/mccResults'+dataset+'.txt','w'); f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken, extra info"); for classifierName in classifiers: for method in methods: for size in sizes: for preproc in preprocessing: d = DataSetLoader(); X_train= d.LoadDataSet(dataset); y_train = d.LoadDataSetClasses(dataset); #print X_train.shape #print y_train.shape #chaipee will fix it later on y_train=numpy.transpose(y_train) #print y_train.shape targets=list(y_train) y_train=[] for i in targets: #print i y_train.append(int(i)) #print len(y_train) #first run indices indices= joblib.load('dataset'+dataset+'_pickles/selected_indices_'+method+'.joblib.pkl') X_train=X_train[:,indices]
#validationTechnique = ['LOOCV',"10FoldCV"] -- NOT USED??? #preprocessing = ['','NP'] #datasets = ["A","B"] classifiers = ["MLP","SVM","AdaBoost","DT","RandomForest","ExtraTree"] f=open('mcc/mccResultsC.txt','w'); f.write("dataset, size, method, classifier, validationTechnique, mc, timeTaken"); for classifierName in classifiers: for method in methods: for size in sizes: #print size #print method d = DataSetLoader(); X_train= d.LoadDataSet("C_train"); y_train = d.LoadDataSetClasses("C_train"); X_test= d.LoadDataSet("C_test"); y_test = d.LoadDataSetClasses("C_test"); #chaipee will fix it later on y_train=numpy.transpose(y_train) print y_train.shape targets=list(y_train) y_train=[] for i in targets: #print i y_train.append(int(i)) y_test=numpy.transpose(y_train) print y_test.shape targets=list(y_train)