def CrossValidation(estimator, data, rowstart, rowfinish, attributes, CV, CVparams): CVObject = cv.GetCVObject(CV, **CVparams) X = data["inputdata"][rowstart:rowfinish] y = data["outputdata"][rowstart:rowfinish] groups = data["groupdata"][rowstart:rowfinish] if "groupdata" in data.keys( ) else None return cv.cross_validation(estimator, X, y, groups, attributes, CVObject)
def neural_network(layers, lamb, theta_matrices, inputs, outputs, dataset_file): network = np.array(layers) thetas = [theta_matrices[0]] cont = 0 for theta in theta_matrices: if cont == 0: cont += 1 continue thetas.append(theta) thetas = np.array(thetas) regularization = lamb examples = [] for i in range(0, len(inputs)): examples.append([inputs[i], outputs[i]]) thetas_finais, gradientes_finais = cv.run(examples, thetas, regularization, network, dataset_file) #j_value = calculate_j(examples, thetas_finais, regularization, network) #print(j_value) return thetas_finais, gradientes_finais
def start(self): # perform some logging self.jlogger.info("Starting job with job id {}".format(self.job_id)) self.jlogger.debug("Job Config: {}".format(self.config)) self.jlogger.debug("Job Other Data: {}".format(self.job_data)) try: rud.ReadUserData(self) fg.FeatureGeneration(self, is_train=True) pp.Preprocessing(self, is_train=True) fs.FeatureSelection(self, is_train=True) fe.FeatureExtraction(self, is_train=True) clf.Classification(self) cv.CrossValidation(self) tsg.TestSetGeneration(self) tspp.TestSetPreprocessing(self) tsprd.TestSetPrediction(self) job_success_status = True except: job_success_status = False helper.update_running_job_status(self.job_id, "Errored") self.jlogger.exception("Exception occurred in ML Job {} ".format( self.job_id)) return job_success_status
def run(self, train_returns, train_sizes, bin_number): cross_val = CrossValidation.CrossVal(train_sizes, bin_number) bins = cross_val.get_bins() scores = self.predict(train_returns, bins) self.interpret_scores(scores) print(scores) #decile scores
def buildTestsTrainings(self): """ Do the cross validation with the protein classes """ self.crossValidation = CrossValidation.CrossValidation() self.crossValidation.addClass(self.oxidoreductaseProteinList) self.crossValidation.addClass(self.transferaseProteinList) self.crossValidation.addClass(self.hydrolaseProteinList) self.crossValidation.addClass(self.lyaseProteinList) self.crossValidation.addClass(self.isomeraseProteinList) self.crossValidation.addClass(self.ligaseProteinList)
def iterativeTester(self): # Example tests on iosphere data # --------------------------------- # lr = 20, eps = 0, tests = 50, iterstep = 5 # -> Oscialates around best answer # lr = 1, eps = 0, tests = 50, iterstep = 5 # -> Reaches answer quickly # lr = 0.01, eps = 0, tests = 50, iterstep = 5 # -> Does not have time to reach best answer # Example tests on adult data # --------------------------------- # lr = 1, eps = 0, tests = 10, iterstep = 1000 # -> Oscialates a lot # lr = 1, eps = 0, tests = 100, iterstep = 10 # -> Still lots of oscilating # lr = 0.001, eps = 0, tests = 100, iterstep = 10 # -> Still lots of oscilating # Example tests on glass data # ----------------------------------- # lr = 0.02, eps = 0.01, tests = 10, iterstep = 5000 # -> Good results (over 0.965) # Example tests on auto-mg data # ----------------------------------- # lr = 0.02, eps = 0.01, tests = 10, iterstep = 5000 # -> Good results (over 0.8 except first run) # Settings # -------------- test_lr = 0.001 test_eps = 0 tests = 100000 iterStep = 10 self.results = [0] * tests # Test iteratively for i in range(0, tests): crossValidator = CV.LogRegCrossValidation(self.data, self.target, self.k, lr=test_lr, eps=test_eps, iterations=(i + 1) * iterStep) crossValidator.crossValidation() self.results[i] = crossValidator.averageError() # Plot results self.plotResults()
def test_addClass(self): """ test adding a class """ crossValidation = CrossValidation.CrossValidation() self.class1 = self.createClass('1', 148) self.class2 = self.createClass('2', 17) crossValidation.addClass(self.class1) crossValidation.addClass(self.class2) self.assertCrossValidation(crossValidation)
def run_part3(trainX, trainY, testX, testY, lr, eps, max_iter, lmd_reg, k=1): valid_loss = [] smp_num, dim_num = trainX.shape test_num = smp_num / k # sample number of a validation set random_index = random.sample(xrange(0, smp_num), smp_num) # for randomized split for lmd in lmd_reg: loss = cv.CrossValidation(trainX, trainY, lr, eps, max_iter, lmd, k, test_num, random_index) valid_loss.append(loss) print "for diff lambda, their final validation loss are:", valid_loss
def run(dominios, targets, anotacoes, atributos, incluiParticipante): folds = cross.crossValidationParticipant(6, anotacoes) diceTotal = [] masiTotal = [] acuraciaTotal = 0.0 results = [] acertosT = {} totalT = {} for participante in folds.keys(): resultadoTotal, dice, masi, acuracia = exp4.run(dominios, targets, folds[participante], atributos, {}, incluiParticipante) diceTotal.extend(dice) masiTotal.extend(masi) acuraciaTotal = acuraciaTotal + acuracia for resultados in resultadoTotal: acertos = resultados[0] total = resultados[1] for atributo in acertos.keys(): if atributo not in acertosT: acertosT[atributo] = 0.0 totalT[atributo] = 0.0 acertosT[atributo] = acertosT[atributo] + acertos[atributo] totalT[atributo] = totalT[atributo] + total[atributo] results.append([acertosT, totalT]) print "\n" print "General:" print 50 * "*" print "Expressions: " print "Dice: " + str(np.mean(diceTotal)) print "Masi: " + str(np.mean(masiTotal)) print "Accuracy: " + str(acuraciaTotal / len(diceTotal)) print "\n" print "Attributes:" print 15 * "-" for atributo in acertosT.keys(): print "Attribute: " + str(atributo) print "Accuracy: " + str(acertosT[atributo] / totalT[atributo]) print 10 * "-" return results, diceTotal, masiTotal, acuraciaTotal
def __init__(self, path_folder, winlen): if winlen != None: self.winlen_ = winlen else: self.winlen_ = 0.025 self.reader_ = WaveReader.WaveReader(path_folder) (self.signals, self.rate) = self.reader_.read_all() self.converter = WaveToMfcc.WaveToMfcc(self.signals, self.rate, self.winlen_, nfilt=30, ncep=7) self.gmm_table_ = [] self.cross_split = CrossValidation.CrossValidation( self.converter.list_of_speakers, 2) self.results_ = np.array([]) self.rr_ = np.array([])
def run(self, train_corpus, freq_type, stopwords, train_returns, bin_number, filename): vectorizer = MatrixVectorizer.Vectorizer(train_corpus, freq_type, stopwords) train_count_matrix = vectorizer.get_count_matrix() negative_word_list = self.create_negative_stuff(filename) negative_word_matrix = vectorizer.transform_new_data( negative_word_list) document_scores = self.score_documents(negative_word_matrix, train_count_matrix) cross_val = CrossValidation.CrossVal(document_scores, bin_number) bins = cross_val.get_bins() scores = self.predict(train_returns, bins) self.interpret_scores(scores) print(scores)
def run(self, train_corpus, freq_type, stopwords, test_corpus, svals, reduce_type, test_returns, bin_number, train_returns, method): vectorizer = MatrixVectorizer.Vectorizer(train_corpus, freq_type, stopwords) train_count_matrix = vectorizer.get_count_matrix() test_count_matrix = vectorizer.transform_new_data(test_corpus) dim_reducer = DimensionReducer.Reducer(train_count_matrix) reduced_train_count_matrix = dim_reducer.reduce_dimension( svals, reduce_type) reduced_test_count_matrix = dim_reducer.reduce_more_data( test_count_matrix.todense()) cross_val = CrossValidation.CrossVal(test_returns, bin_number) bins = cross_val.get_bins() scores = self.fit_predict(reduced_train_count_matrix, train_returns, reduced_test_count_matrix, bins, method) self.interpret_scores(scores)
def getSolution_5_a_1(IrisData, fold, patternDimension, labelIndex, FlowerDict): ## 10-fold cross validation errorRateList, gaussianMatrix = crvd.crossValidate(IrisData, fold, patternDimension, labelIndex) sizeOfErrorList = len(errorRateList) numOfGroup = len(gaussianMatrix) numOfGaussianEachGroup = len(gaussianMatrix[0]) assert sizeOfErrorList == numOfGroup ## first print overall error averageErrorRate = sum(errorRateList) / len(errorRateList) string = str.format("errorRate: {0:.4f}%", averageErrorRate * 100) string += "\n [" ## now print parameters of each data for i in range(0, numOfGroup): print print "Cross Validation Set " + str(i + 1) + ": \\\\" print "\htab Error of this Testing Set: " + str.format( "${0:.2f}\%$ \n", errorRateList[i] * 100) gaussians = gaussianMatrix[i] for j in range(0, numOfGaussianEachGroup): tempGaussian = gaussians[j] print "\\textbf{For class " + FlowerDict.get( tempGaussian.getLabel()) + "}:" tempGaussian.showMuInTex() print "\\vspace{-1cm}" tempGaussian.showSigmaInTex() print if (i + 1) % 2 == 0: print "\\newpage" ## overall summary for error for i in range(0, numOfGroup): tempError = errorRateList[i] string += " " + str.format("{0:.3f}%", tempError * 100) + "," string += "] \n" print string
def test_getNumberTests(self): """ Tests the getNumberTests method """ crossValidation = CrossValidation.CrossValidation() actual_test1 = crossValidation.getNumberTests(73) actual_test2 = crossValidation.getNumberTests(117) actual_test3 = crossValidation.getNumberTests(131) actual_test4 = crossValidation.getNumberTests(51) actual_test5 = crossValidation.getNumberTests(47) actual_test6 = crossValidation.getNumberTests(17) expected_test1 = 7, 10 expected_test2 = 12, 9 expected_test3 = 13, 14 expected_test4 = 5, 6 expected_test5 = 5, 2 expected_test6 = 1, 8 self.assertEqual(actual_test1, expected_test1) self.assertEqual(actual_test2, expected_test2) self.assertEqual(actual_test3, expected_test3) self.assertEqual(actual_test4, expected_test4) self.assertEqual(actual_test5, expected_test5) self.assertEqual(actual_test6, expected_test6)
print("=== Preprocessing ===") pr.praproses_data(data_input, data_clean) # --- Load Kamus Kata (unique word) --- print("=== Fitur Freq Perdoc & Alldoc ===") fitur_onedoc, fitur_alldoc = Regex.load_fitur_postag(data_clean) # ''' print("=== Bag Of Words ===") bow = tfidf.bagofword(fitur_alldoc) start_time1 = time.time() # --- Load Feature Extraction Using TF IDF--- print("=== NEW Feature Extraction TfIdf ===") hasil_ekstraksi_tfidf, bow = tfidf.main(fitur_onedoc, fitur_alldoc, result_tfidf) h_loss_tfidf = cr.cross_validation(result_tfidf, data_label, path_hasil_tfidf, metode_tfidf) waktu.write("TF-IDF " + "--- %s seconds ---" % (time.time() - start_time1) + '\n') start_time1 = time.time() # --- Load Feature Extraction Using Vector --- print("=== NEW Feature Extraction Vector ===") model = vector.load_w2vec_model(path_model) hasil_ekstraksi_w2vec = vector.feature_extraction(model, fitur_onedoc, result_word2vec) h_loss_vector = cr.cross_validation(result_word2vec, data_label, path_hasil_word2vec, metode_w2vec) waktu.write("W2VEC " + "--- %s seconds ---" % (time.time() - start_time1) + '\n') start_time1 = time.time() # --- Load Feature Extraction Using TF IDF Concat Vector--- print("=== NEW Feature Extraction TfIdf & Vector ===") hasil_ekstraksi_gabungan = gabungan.load_weight_gabungan(result_tfidf, bow, model, result_gabungan) h_loss_gabungan = cr.cross_validation(result_gabungan, data_label, path_hasil_gabungan, metode_gabungan)
from collections import OrderedDict def full_tree(validation_data, attribute_matrix): fullDecisionTree = dt.DecisionTree() dt.select_node_id(fullDecisionTree, validation_data, attribute_matrix, True) dt.add_branch(fullDecisionTree, validation_data, attribute_matrix) dt.split_examples(fullDecisionTree, validation_data, attribute_matrix, True) print("root attribute selected:" + fullDecisionTree.node_id) dt.print_tree(fullDecisionTree) return fullDecisionTree # uncomment to test CrossValidation if __name__ == '__main__': #arquivo = "dadosBenchmark_validacaoAlgoritmoAD.csv" arquivo = "vertebra.csv" #arquivo = "dataset_191_wine-1.csv" #arquivo = "dataset_31_credit-g.csv" data, attribute_matrix = CsvReader.read_csv(arquivo) #decision_tree = full_tree(data, attribute_matrix) cs.run(data, attribute_matrix)
X_test = data_set_test X_test = (X_test - X_test.mean()) / X_test.std() X_test.insert(0, "Intercept", 1) X_test = np.matrix(X_test) # Initial Thetas theta = np.matrix(np.zeros(shape=X.shape[1])) # Parameters learning_rate = 0.01 iteration = 500 print("\nRunning Linear Regression On Whole Set") result = gd.gradient_descent(X, y, theta, learning_rate, iteration) gd.plot_graph(iteration, result[1]) final_predictions = X.dot(result[0].T) mae = gd.mean_absolute_error(final_predictions, y) print("Mean Absolute Error: {0}".format(mae)) print("\nRunning Linear Regression On Split Sets") splits = cv.cross_validation_split(data_set, 5) cv.perform_gradient_on_splits(splits, learning_rate, iteration) prediction_of_test_set = X_test.dot(result[0].T) prediction_df = pd.DataFrame(prediction_of_test_set) prediction_df.columns = ['SalePrice'] df_submission = pd.concat([data_test['Id'], prediction_df], axis=1) df_submission.to_csv('data/Submission.csv', index=False)
import sys #Generate the data from the basis function if(len(sys.argv) == 1): #Generate the order of the random true polynomial function trueOrder = random.randint(1,10) D = Data.genData(trueOrder) elif(sys.argv[1] == "nonpoly"): D = Data.genNonPoly() else: raise Exception("Invalid command line argument") #In the following, D is the data set which has all the x values as its first entry and the y values as its second. error,order = CV.kFoldErrorChoose(D[0],D[1],10,5) #Graph the points on the base polynomial Graph.lineColor(D[0],D[1],'red') #Add Gaussian noise to the data outputs D[1] = Data.addGaussianNoise(D[1],1.0/2000) #Graph them as points in blue Graph.pointsSimple(D[0],D[1]) #Estimate the coefficients of the polynomial with best order fit = Regression.polyTrain(D[0],D[1],order) #Get the function's estimates for the training x values z = [fit(i) for i in D[0]]
def prepClassfier(classfier, predictFunc, classfierParam, classfierName = 'default', doCV = kFold_CV_Active): dataAndLabels = loadDataAndLabels(num_mnist_images,input_csv_train if classfierName == 'naiveBayes' else (input_mnist_data_train,input_mnist_labels_train),classfierName) if doCV == True: return cv.crossValidation(np.array(dataAndLabels[0]),np.array(dataAndLabels[1]),classfier,predictFunc,classfierParam,num_fold) return classfier(np.array(dataAndLabels[0]),np.array(dataAndLabels[1]),classfierParam[len(classfierParam)//2])
''' Paper Title: Road Surface Recognition Based on DeepSense Neural Network using Accelerometer Data Created by ITS Lab, Institute of Computer Science, University of Tartu ''' from model import DeepSense import CrossValidation if __name__ == '__main__': ds = DeepSense.DeepSenseTS(preprocess=True) cv = CrossValidation.CV(network=ds) cv.create_folds() cv.train_on_cv()
import pprint import BuildTree import CrossValidation import datasets.DataLenses as Lenses import datasets.DataMushrooms as Mushrooms import RelativeFrequency #Import DataFrame from DataMushrooms class mushrooms_data = Mushrooms.data #Import DataFrame from DataMushrooms class lenses_data = Lenses.data """ First part: Design and Implementation """ #Build tree using ID3 algorithm for Lenses data print('Lenses data solution tree:') pprint.pprint(BuildTree.build_tree(lenses_data)) print() #Build tree using ID3 algorithm for Mushrooms data print('Mushrooms data solution tree:') pprint.pprint(BuildTree.build_tree(mushrooms_data)) """ Second part: Experimentation """ print('Cross-validation over the Mushrooms data:') #Perform cross-validation over the mushrooms dataset in 10 folds CrossValidation.cross_validation(mushrooms_data)
def execute_softmax(X_train,y_train,OX_test,oy_test): learning_rates = [1e-5, 1e-8] regularization_strengths = [10e2, 10e4] results = {} best_val = -1 best_softmax = None # X_train = getCIFAR_as_32Pixels_Image(X_train) # OX_test = getCIFAR_as_32Pixels_Image(OX_test) accuracy = [] totalAccuracy = 0.0 ## Implementing Cross Validation crossValidObj = CrossValidation(5, X_train, y_train) foldsGen = crossValidObj.generateTrainAndTest() for i in range(5): next(foldsGen) X_test = OX_test X_train = crossValidObj.train y_train = crossValidObj.labels_train X_val = crossValidObj.test y_val = crossValidObj.labels_test # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis = 0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T softmax_sgd = Softmax() tic = time.time() losses_sgd = softmax_sgd.train(X_train, y_train, method='sgd', batch_size=200, learning_rate=1e-6, reg = 1e5, num_iters=1000, verbose=False, vectorized=True) toc = time.time() y_train_pred_sgd = softmax_sgd.predict(X_train)[0] print('Training accuracy: %f' % (np.mean(y_train == y_train_pred_sgd))) y_val_pred_sgd = softmax_sgd.predict(X_val)[0] print('Validation accuracy: %f' % (np.mean(y_val == y_val_pred_sgd))) # Choose the best hyperparameters by tuning on the validation set i = 0 interval = 5 for learning_rate in np.linspace(learning_rates[0], learning_rates[1], num=interval): i += 1 print('The current iteration is %d/%d' % (i, interval)) for reg in np.linspace(regularization_strengths[0], regularization_strengths[1], num=interval): softmax = Softmax() softmax.train(X_train, y_train, method='sgd', batch_size=200, learning_rate=learning_rate, reg = reg, num_iters=1000, verbose=False, vectorized=True) y_train_pred = softmax.predict(X_train)[0] y_val_pred = softmax.predict(X_val)[0] train_accuracy = np.mean(y_train == y_train_pred) val_accuracy = np.mean(y_val == y_val_pred) results[(learning_rate, reg)] = (train_accuracy, val_accuracy) if val_accuracy > best_val: best_val = val_accuracy best_softmax = softmax else: pass # Print out the results for learning_rate, reg in sorted(results): train_accuracy,val_accuracy = results[(learning_rate, reg)] print('learning rate %e and regularization %e, \n \ the training accuracy is: %f and validation accuracy is: %f.\n' % (learning_rate, reg, train_accuracy, val_accuracy)) y_test_predict_result = best_softmax.predict(X_test) y_test_predict = y_test_predict_result[0] test_accuracy = np.mean(oy_test == y_test_predict) accuracy.append(test_accuracy) totalAccuracy+=test_accuracy print('The test accuracy is: %f' % test_accuracy) print(accuracy) avgAccuracy = totalAccuracy / 5.0 print('Average Accuracy: %f' % avgAccuracy)
## Replace my references with references to your answers to those assignments. ## IMPORTANT NOTE !! ## Remember to install the Pillow library (which is required to execute 'import PIL') ## Remember to install Pytorch: https://pytorch.org/get-started/locally/ (if you want GPU you need to figure out CUDA...) from PIL import Image import torchvision import torchvision.transforms as transforms import torch import numpy as np import Assignment5Support import EvaluationsStub import CrossValidation crossValidation = CrossValidation.CrossValidation(3) import Featurize if __name__=='__main__': kDataPath = '../dataset_B_Eye_Images' (xRaw, yRaw) = Assignment5Support.LoadRawData(kDataPath, includeLeftEye = True, includeRightEye = True) (xTrainRaw, yTrainRaw, xTestRaw, yTestRaw) = Assignment5Support.TrainTestSplit(xRaw, yRaw, percentTest = .25) print('Train is %f percent closed.' % (sum(yTrainRaw)/len(yTrainRaw))) print('Test is %f percent closed.' % (sum(yTestRaw)/len(yTestRaw))) # Load the images and then convert them into tensors (no normalization) xTrainImages = [ Image.open(path) for path in xTrainRaw ] xTrain, yTrain = Featurize.Featurize(xTrainImages, yTrainRaw) print(f'Training data size: {xTrain.size()}')
def main(): print "" print "\t+----------------------------------------------------------------+" print "\t| |" print "\t| CROSS VALIDATION OF LEARNING FORM EXAMPLE MODULES (LEM1) |" print "\t| RULE INDUCTION ALGORITHM |" print "\t| Author : Madhu Chegondi |" print "\t| KUID : m136c192 |" print "\t| |" print "\t+----------------------------------------------------------------+" print "" dataFile = raw_input("\tEnter Name Of DataFile : ") while (True): if (dataFile): try: dfp = open('Data/' + dataFile, 'r') # This Program assumes that first 2 lines of the input data filename have # < a a a d > # [ attribute1 attribute2 attribute3 decision ] header1 = dfp.readline() header2 = dfp.readline().strip().split() AttNames = header2[1:-1] DesName = header2[-2] attr = [] decisions = [] for line in dfp: if re.match(r'^\!.*', line) or line.strip() == '': continue line.strip() values = line.split() rawData = {} des = {} for i in range(len(values) - 1): try: if (type(float(values[i])) == float): rawData[AttNames[i]] = float(values[i]) except ValueError: rawData[AttNames[i]] = values[i] attr.append(rawData) des[DesName] = values[-1] decisions.append(des.items()) break except: print "\t\tERROR: Enter A Valid File Name\n" dataFile = raw_input("\tEnter Name Of DataFile : ") else: dataFile = raw_input("\tEnter Name Of DataFile : ") print "\n\tCROSS VALIDATION TECHNIQUES" print "\t\t1. BOOTSTRAP CROSS VALIDATION" print "\t\t2. LEAVING ONE OUT CROSS VALIDATION" choice = raw_input("\n\tENTER YOUR CHOICE OF CROSS VALIDATION (1 or 2) : ") while True: if choice == '1' or choice == '2': break else: choice = raw_input( "\tENTER YOUR CHOICE OF CROSS VALIDATION (1 or 2) : ") samples = None if choice == '1': method = 'Bootstrap' print "\n\tCONFIGURING BOOTSTRAP" samples = raw_input( "\t\tHow many samples do you wish to create (default 200 samples) : " ) else: method = 'LeaveOneOut' CrossValidation.CrossValidation(attr, decisions, DesName, method, samples, dataFile)
# from hcluster import * import ParserStars as parser import CrossValidation as cross import Experiment1 as exp1 import SVMValidatedExperiment as exp2 import SVMValidatedExperiment2 as exp3 import ExperimentDecisionTree as exp4 import ValidatedExperimentIndividual as exp5 def initialize(): anotacoes = parser.parseAnnotation() dominios = parser.parseDominio() participantes = {} atributos = ["type", "size", "colour", "hpos", "vpos", "near", "left", "right", "below", "above", "in-front-of"] targets = {"01f-t1n":"h", "01f-t1r":"h", "01f-t2n":"h", "01f-t2r":"h", "01o-t1n":"h", "01o-t1r":"h", "01o-t2n":"h", "01o-t2r":"h", "02f-t1n":"o", "02f-t1r":"o", "02f-t2n":"o", "02f-t2r":"o", "02o-t1n":"o", "02o-t1r":"o", "02o-t2n":"o", "02o-t2r":"o", "03f-t1n":"m", "03f-t1r":"m", "03f-t2n":"m", "03f-t2r":"m", "03o-t1n":"m", "03o-t1r":"m", "03o-t2n":"m", "03o-t2r":"m", "04f-t1n":"a", "04f-t1r":"a", "04f-t2n":"a", "04f-t2r":"a", "04o-t1n":"a", "04o-t1r":"a", "04o-t2n":"a", "04o-t2r":"a", "05f-t1n":"m", "05f-t2n":"m", "05f-t1r":"m", "05f-t2r":"m", "05o-t1n":"m", "05o-t1r":"m", "05o-t2n":"m", "05o-t2r":"m", "06f-t1n":"h", "06f-t1r":"h", "06f-t2n":"h", "06f-t2r":"h", "06o-t1n":"h", "06o-t1r":"h", "06o-t2n":"h", "06o-t2r":"h", "07f-t1n":"i", "07f-t1r":"i", "07f-t2n":"i", "07f-t2r":"i", "07o-t1n":"i", "07o-t1r":"i", "07o-t2n":"i", "07o-t2r":"i", "08f-t1n":"a", "08f-t1r":"a", "08f-t2n":"a", "08f-t2r":"a", "08o-t1n":"a", "08o-t1r":"a", "08o-t2n":"a", "08o-t2r":"a" } return dominios, targets, anotacoes, atributos, participantes if __name__ == '__main__': dominios, targets, anotacoes, atributos, participantes = initialize() folds = cross.crossValidation(10, anotacoes) print "Machine Learning sem ID" # exp5.run(dominios, targets, anotacoes, atributos, False) exp2.run(dominios, targets, folds, atributos, {}, False) print "\n\n" print "Machine Learning com ID" # exp5.run(dominios, targets, anotacoes, atributos, True) exp2.run(dominios, targets, folds, atributos, {}, True)
def testDataset(csvFile): attributes, targetAttr, examples, trainingSet, lista = dataDefinition( csvFile) cv.unknownDataTest(examples, attributes, targetAttr, 5)
def executeSVM(X_train, y_train, OX_test, oy_test): learning_rates = [1e-5, 1e-8] regularization_strengths = [10e2, 10e4] results = {} best_val = -1 best_svm = None # X_train = getCIFAR_as_32Pixels_Image(X_train) # OX_test = getCIFAR_as_32Pixels_Image(OX_test) accuracy = [] totalAccuracy = 0.0 ## Implementing Cross Validation crossValidObj = CrossValidation(5, X_train, y_train) foldsGen = crossValidObj.generateTrainAndTest() for i in range(5): next(foldsGen) X_test = OX_test X_train = crossValidObj.train y_train = crossValidObj.labels_train X_val = crossValidObj.test y_val = crossValidObj.labels_test # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T SVM_sgd = SVM() losses_sgd = SVM_sgd.train(X_train, y_train, method='sgd', batch_size=200, learning_rate=1e-6, reg=1e5, num_iters=1000, verbose=False, vectorized=True) y_train_pred_sgd = SVM_sgd.predict(X_train)[0] print('Training accuracy: %f' % (np.mean(y_train == y_train_pred_sgd))) y_val_pred_sgd = SVM_sgd.predict(X_val)[0] print('Validation accuracy: %f' % (np.mean(y_val == y_val_pred_sgd))) i = 0 interval = 5 for learning_rate in np.linspace(learning_rates[0], learning_rates[1], num=interval): i += 1 print('The current iteration is %d/%d' % (i, interval)) for reg in np.linspace(regularization_strengths[0], regularization_strengths[1], num=interval): svm = SVM() svm.train(X_train, y_train, method='sgd', batch_size=200, learning_rate=learning_rate, reg=reg, num_iters=1000, verbose=False, vectorized=True) y_train_pred = svm.predict(X_train)[0] y_val_pred = svm.predict(X_val)[0] train_accuracy = np.mean(y_train == y_train_pred) val_accuracy = np.mean(y_val == y_val_pred) results[(learning_rate, reg)] = (train_accuracy, val_accuracy) if val_accuracy > best_val: best_val = val_accuracy best_svm = svm else: pass # Print out the results for learning_rate, reg in sorted(results): train_accuracy, val_accuracy = results[(learning_rate, reg)] print('learning rate %e and regularization %e, \n \ the training accuracy is: %f and validation accuracy is: %f.\n' % (learning_rate, reg, train_accuracy, val_accuracy)) print(accuracy) y_test_predict_result = best_svm.predict(X_test) y_test_predict = y_test_predict_result[0] test_accuracy = np.mean(oy_test == y_test_predict) accuracy.append(test_accuracy) totalAccuracy += test_accuracy print('The test accuracy is: %f' % test_accuracy) print(accuracy) avgAccuracy = totalAccuracy / 5.0 print('Average Accuracy: %f' % avgAccuracy)
dataset = DataSet(filename='../config/referral_source.txt') path_to_file = os.path.join( gen_dir, filename+'.csv') df = pd.read_csv(path_to_file, sep=',') sf_univarate = dataset.univariate_selection(data=df, k_best=(len(df.loc[0]) - 1) ) sf_univarate.insert(0, 'value') sf_importance = dataset.f_importance(data=df, n_attrs =(len(df.loc[0]) - 1) ) sf_importance.insert(0, 'value') nm = NetworkModel() ds = CrossValidation() result_list = [] if feature_selection_name == 'univ': f_selection = sf_univarate elif feature_selection_name == 'impot': f_selection = sf_importance tmp_row = [] for layers in ann_proposal: for momentum in momentum_proposal: model = nm.create_model(layers=layers, input_size=number_of_features, output_size=len(set(df['value'])) , momentum=momentum)
def EvaluateCompletionMain(data,mask,method,useRelation,execTimes,logger,information,unobservedRates = None,alpha=None,ranks=None): """ 数値実験本体 """ global log log = logger varianceTimes = execTimes L = data["L"] X = data["X"] normX = numpy.linalg.norm(X) #X = X / normX if not useRelation: L = [None for i in range(X.ndim)] alpha = [1] #if unobservedRates == None: # unobservedRates = array([0.5,0.75,0.9]) # #unobservedRates = array([0.75,0.9,0.95]) # #unobservedRates = unobservedRates[::-1] #if alpha == None: # #alpha =[pow(10,x) for x in [-4,-3,-2,-1,0,1]] #for L # alpha =[pow(10,x) for x in [-5,-4,-3,-2,-1]] #for L # alpha =[pow(10,x) for x in [-7,-6,-5,-4,-3,-2,-1]] #for L #if ranks == None: # #ranks = [2,3,5] # ranks = [5,7,9] # ranks = [40] # ranks = [5,10,15] # ranks=[35] # #ranks = [7] shape = X.shape #alphaはLにしか関係ない if all(map(lambda i:i==None,L)): print "hogehogehogehogehogehoge" alpha = [1] maskAxis = 1 elems = prod(X.shape) print elems, "kdkdkdkd" if mask == "Random": targetelems = elems print "MASKING: RANDOM" def createObservedTensor(data): data = array(data) X = zeros(elems) def setter(index): X[index] = 1 vectset = vectorize(setter) vectset(data) return X.reshape(shape) elif mask == "Fiber": targetelems = elems / X.shape[maskAxis] print "MASKING: FIBER" def createObservedTensor(data): data = array(data) S = zeros(targetelems) def setter(index): S[index] = 1 vectset = vectorize(setter) vectset(data) X = zeros(elems).reshape(shape) if maskAxis == 0: S = S.reshape(shape[1],shape[2]) for i in xrange(shape[0]): X[i,:,:] = S elif maskAxis == 1: S = S.reshape(shape[0],shape[2]) for i in xrange(shape[1]): X[:,i,:] = S elif maskAxis == 2: S = S.reshape(shape[0],shape[1]) for i in xrange(shape[2]): X[:,:,i] = S return X.reshape(shape) elif mask == "Slice": targetelems = X.shape[maskAxis] print "MASKING: SLICE" def createObservedTensor(data): data = array(data) S = zeros(targetelems) def setter(index): S[index] = 1 vectset = vectorize(setter) vectset(data) X = zeros(elems).reshape(shape) if maskAxis == 0: for i in xrange(X.shape[1]): for j in xrange(X.shape[2]): X[:,i,j] = S elif maskAxis == 1: for i in xrange(X.shape[0]): for j in xrange(X.shape[2]): X[i,:,j] = S elif maskAxis == 2: for i in xrange(X.shape[0]): for j in xrange(X.shape[1]): X[i,j,:] = S return X.reshape(shape) evalDataGenerator = lambda separatingNumber,unobservedRate,targetIndeces:dataGenerator( int(targetelems* unobservedRate),separatingNumber,unobservedRate, targetIndeces) hpOptimDataGenerator = lambda separatingNumber,unobservedRate,targetIndeces:dataGenerator( min(int(targetelems* unobservedRate * 0.5),int(len(targetIndeces) * 0.5)), separatingNumber,unobservedRate, targetIndeces) def dataGenerator(hiddens,separatingNumber,unobservedRate,targetIndeces): #print elems #print len(targetIndeces) rs = Toolbox.GenerateRandomSeparation(targetIndeces, hiddens) return Toolbox.Take(rs,separatingNumber) import CompletionMethods import Decomposition if method in ["Tucker","TuckerSum"]: decomposition = Decomposition.TuckerSum() completionMethod = CompletionMethods.Tucker(X,L,decomposition) elif method in ["CP","CPSum"]: print "hogehogehgoehgoe" decomposition = Decomposition.CPSum() completionMethod = CompletionMethods.CP(X,L,decomposition) elif method == "TuckerProd": decomposition = Decomposition.TuckerProd() completionMethod = CompletionMethods.Tucker(X,L,decomposition) elif method == "CPProd": decomposition = Decomposition.CPProd() completionMethod = CompletionMethods.CP(X,L,decomposition) elif method == "CPWOPT": completionMethod = CompletionMethods.CPWOPT(X,L) if not useRelation: alpha = [0] elif method == "CPWOPTProd": completionMethod = CompletionMethods.CPWOPTProd(X,L) if not useRelation: alpha = [0] #convert list of indeces to binary tensor completionMethod.createObservedTensor = createObservedTensor estimator = completionMethod.estimator def lossFunction(estimation,evalData): #evalData = Toolbox.Take(evalData,500) W = createObservedTensor(evalData) Y=estimation return numpy.linalg.norm((Y - X)*W) * sqrt(1.0*elems / len(evalData)) trainingData = range(targetelems) log.WriteLine("Start Evaluatig method:" + method + " ") log.WriteLine("Using Relation Data" if useRelation else "Without Relation Data") log.WriteLine("Ranks for Estimation:"+str(ranks)) log.WriteLine("Unobserved Rates:"+str(unobservedRates)) log.WriteLine("HyperParameters alpha to try:"+str(alpha)) log.WriteLine("hyperParameters rank to try:" + str(ranks)) print type(information) information["setting"]={} information["setting"]["method"] = method information["setting"]["using relation data"] = useRelation information["setting"]["rank for estimation"] = ranks information["setting"]["fraction of unobserved elements"] = unobservedRates information["setting"]["tested alpha"] = alpha information["setting"]["tested rank"] = ranks information["result"]={} for unobservedRate in unobservedRates: information["result"][unobservedRate]={} #log.WriteLine("unobserved rate, "+str(unobservedRate)+ " ") time = varianceTimes import CrossValidation print unobservedRate, "kkkkkkkk" parameters = [(a,rank) for a in alpha for rank in ranks] errors = CrossValidation.Evaluate( trainingData, estimator, lossFunction, functools.partial(evalDataGenerator,time,unobservedRate), parameters, functools.partial(hpOptimDataGenerator,1,unobservedRate)) #[log.Write(", " + str(error),False) for error in errors] for error in errors: e = error["error"] param = error["param"] log.WriteLine("unobserved, "+ str(unobservedRate)+", bestparam,"+str(param)+", error, "+str(e)) if not param in information["result"][unobservedRate]: information["result"][unobservedRate][param]=[] information["result"][unobservedRate][param].append(e) print "score logged:", e log.WriteLine() return information
import numpy as np from scipy.stats import wilcoxon, chisquare # import matplotlib.pyplot as plt # from hcluster import * import Assurance as ass import Parser as parser import Experiment1 as exp1 import Experiment5 as exp5 import Experiment6 as exp6 import CrossValidation as cross import SVMValidatedExperiment as exp7 def initialize(): trials = parser.parse() atributos = ["type", "orientation", "age", "hairColour", "hasBeard", "hasHair", "hasGlasses", "hasShirt", "hasTie", "hasSuit", "x-dimension", "y-dimension"] return trials, atributos if __name__ == '__main__': trials, atributos = initialize() # trials = exp1.run(trials, atributos) folds = cross.crossValidation(10, trials) # exp5.run(folds, atributos, 0.7) # exp6.run(folds, atributos, 0.7) # exp7.run(trials, folds, atributos, {}, False) exp7.run(trials, folds, atributos, {}, True)
print("Then press Enter to continue...") raw_input() GetGenes.Sort(fps, labels) GetGenes.getDiff_Badge(fps, labels) GetGenes.nuID2enterzID(fps, labels) import David David.davidCall(fps, labels) import String String.stringCall(path, fps, labels) String.genEdgeList(path, fps, labels) String.genNetworkInput(path, fps, labels) String.genNetwork(path, progpath) String.annoNetwork(path, progpath, fps, labels) import CrossValidation CrossValidation.exprToArff(path, fps, labels) CrossValidation.syncArffFeatures(path, fps, labels) CrossValidation.callWeka(fps, labels) import WriteReport WriteReport.writeDocReport(path, IOpath, fps, labels) WriteReport.writeXlsReport(path, IOpath, fps, labels)