def csvConversion(old_filepath, new_filepath): # Open and read txt file under variable "file" file = open(old_filepath, "r") csv = csvFileEvaluation() #Create csv with identified .txt file rows and columns csv.createCSV(file, old_filepath, new_filepath) #Print rows and columnes of "file" print("\nNew .csv file dimensions: ") print("Rows: ", csv.rows) print("Columns: ", csv.columns) # Close .txt file file.close()
y_train, y_test = Y[train_index], Y[test_index] # Naive Bayes elapsedTraining, elapsedTesting, y_pred = nbClassifier(X_train, y_train, X_test) # Calculate the different accuracy scores cm_metrics = Calculate_Metrics(y_test, y_pred) metrics_dict = cm_metrics.calculateScore() # Calculate the run time elapsed = (timeit.default_timer() - start_time) * 1000 metrics_dict.update({'Run Time(MSec)': elapsed}) # Create metrics for each fold pdf_indi = printPDF(metrics_dict) pdf_indi.create_pdf_indi(test_spec_dict, test_description, test_path_indi, str(fold) + '_' + testTimeConvertet + '_' + hostname) metrics_array.append(metrics_dict) # Calculate the overall score for all the folds metrics_mean_dict = cm_metrics.calculate_mean_score(metrics_array, folds) # # Create PDF pdf = printPDF(metrics_mean_dict) pdf.create_pdf(test_spec_dict, test_description, test_path, '_' + testTimeConvertet + '_' + hostname) # Create CSV csv = printCSV(metrics_mean_dict, test_spec_dict) csv.createCSV(path, csvfile_name)
def runTest(self, path, treeSize, trainingFile, test_description, type, traning_size, create_pdf=False): ## Overall configurations hostname = socket.gethostname() #testStart = datetime.datetime.now() #testStartConvertet = time.strftime("(%Y-%m-%d)-(%H-%M-%S)") # path = testStartConvertet + '_' + hostname + '/' try: os.makedirs(path) except OSError: if not os.path.isdir(path): raise ## Induvidual test configurations testTime = datetime.datetime.now() testTimeConvertet = time.strftime("(%Y-%m-%d)-(%H-%M-%S)") csvfile_name = str(treeSize) + '_' + testTimeConvertet + '_' + str( traning_size) + '.csv' test_path = path + str(treeSize) + '/' test_path_indi = path + str(treeSize) + '/' + 'Folds/' try: os.makedirs(test_path) except OSError: if not os.path.isdir(test_path): raise try: os.makedirs(test_path_indi) except OSError: if not os.path.isdir(test_path_indi): raise print "DT Test: " + str(treeSize) + " Time: " + testTimeConvertet # Training Set Path setup traning_set = trainingFile # Load training set test_set = loadTestSet(traning_set) X, Y = test_set.loadTestSet() # Set number of folds folds = 6 kf = KFold(n_splits=folds) kf.get_n_splits(X) metrics_array = [] fold = 0 test_spec_dict = { 'Classifier': 'DT', 'Depth': treeSize, 'Date': testTimeConvertet, 'Host': hostname, 'Training Set': traning_set, 'Sample Type': type } elapsedTraining1 = 0 elapsedTesting2 = 0 for train_index, test_index in kf.split(X): fold += 1 start_time = timeit.default_timer() X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] # Classifies fields with different k values from 1 to 50. elapsedTraining, elapsedTesting, y_pred = dtClassifier( treeSize, X_train, y_train, X_test) elapsedTraining1 += elapsedTraining elapsedTesting2 += elapsedTesting # Calculate the different accuracy scores cm_metrics = Calculate_Metrics(y_test, y_pred) metrics_dict = cm_metrics.calculateScore() # Calculate the run time elapsed = (timeit.default_timer() - start_time) * 1000 metrics_dict.update({'Run Time(MSec)': elapsed}) if create_pdf: # Create metrics for the fold pdf_indi = printPDF(metrics_dict) pdf_indi.create_pdf_indi( test_spec_dict, test_description, test_path_indi, str(fold) + '_' + testTimeConvertet + '_' + hostname) metrics_array.append(metrics_dict) # Calculate the overall score for all the folds metrics_mean_dict = cm_metrics.calculate_mean_score( metrics_array, folds) print 'Time in ms for training: ' + str(elapsedTraining1 / 6) print 'Time in ms for testing: ' + str(elapsedTesting2 / 6) if create_pdf: # Create PDF pdf = printPDF(metrics_mean_dict) pdf.create_pdf( test_spec_dict, test_description, test_path, str(treeSize) + '_' + testTimeConvertet + '_' + hostname) # Create CSV csv = printCSV(metrics_mean_dict, test_spec_dict) csv.createCSV(path, csvfile_name) return csvfile_name