def get_predictions(self, query_manager) : #Filenames test_filename = "test" + str(int(time.time())) + ".arff" train_filename = "train" + str(int(time.time())) + ".arff" train_log = "train_log" + str(int(time.time())) + ".arff" result_filename = "results" + str(int(time.time())) + ".txt" #Creates (or clears) files that are used by the binary IS_NUM_TEST = False file_creation_info = test_file_creation(IS_NUM_TEST, self.using_pca, test_filename, train_filename, query_manager, self) target_values = file_creation_info["target_values"] target_value_null = file_creation_info["target_value_null"] attribute_indexes = file_creation_info["attribute_indexes"] cat_att_mapping = file_creation_info["cat_att_mapping"] #If there are no null values in the test set #And the run is only replacing null values then terminate if no null values if not self.MAKE_ALL_PREDS and target_value_null.count(True) == 0 : os.remove(test_filename) os.remove(train_filename) return None #Running feature selection process if needed acc_est = {} if self.use_feature_selection : (test_filename, train_filename, selected_attributes) = feature_selection(test_filename, train_filename, query_manager, file_creation_info, self, IS_NUM_TEST) acc_est["selected attributes"] = selected_attributes #Running tests model_name = "saved_model" + str(int(time.time())) path_spef_weka = os.path.join( path, "models", "weka.jar") path_spef_libsvm = os.path.join( path, "models", "libsvm.jar") train_string = "java -Xmx1024m -cp " + path_spef_weka + ":" + path_spef_libsvm + " " + self.test_classifier + " -d " + model_name + " " + self.test_options + " -t " + train_filename + " >> " + train_log test_string = "java -Xmx1024m -cp " + path_spef_weka + ":" + path_spef_libsvm + " " + self.test_classifier + " -l " + model_name + " -T " + test_filename + " -p 0 >> " + result_filename self.printOut.pLog( "PRED- Training model") os.system(train_string) self.printOut.pLog( "PRED- Making predictions") os.system(test_string) #Gathering results for each test instance self.printOut.pLog( "PRED- Getting results") f = open(result_filename) prediction_list = [] probability_list = [] correctly_imputed = 0 non_null_count = 0 index = 0 collect_results = False for line in f.readlines() : line_list = line.split() #Getting results if collect_results and len(line_list) > 1: tuple = line_list[2].split(":") prediction = str(tuple[1]) if not target_value_null[index] and prediction == str(target_values[index]) : correctly_imputed += 1 if not target_value_null[index] : non_null_count += 1 prediction_list.append(prediction) probability_list.append(1) index += 1 #Seeing if you are at the results portion of the file if line.find("inst#") > -1 : collect_results = True f.close() #Gathering accuracy estimations f = open(train_log) cross_val_info = False for line in f.readlines() : #Getting all performance related metrics if cross_val_info : line = line.rstrip('\n') line = line.rstrip('\t') line = line.rstrip('\b') line = line.rstrip(' %') if line.find('Correctly Classified Instances') > -1 or line.find('Kappa statistic') > -1: list = line.split(' ') if len(list) > 1: attribute = list[0] value = list[len(list) - 1] value = float(value) acc_est[attribute] = value #Finding cross validation info if line.find('Stratified cross-validation') > -1 : cross_val_info = True elif line.find('Confusion Matrix') > -1 : cross_val_info = False f.close() #Actual Performance Stats acc_est["Actual Correctly Imputed Percent"] = (float(correctly_imputed) / non_null_count) * 100 #Removing files used for test os.remove(train_log) os.remove(result_filename) os.remove(test_filename) os.remove(train_filename) os.remove(model_name) #Add number of test instances to the accuracy estimation current_test_num = query_manager.current_test_block.parcel_count acc_est["test instance count"] = current_test_num acc_est["block number"] = len(query_manager.used_blocks) return Test_result(self.test_type, self.test_attribute, prediction_list, probability_list, acc_est)
def get_predictions(self, query_manager) : #Filenames test_filename = "test" + str(int(time.time())) + ".arff" train_filename = "train" + str(int(time.time())) + ".arff" train_log = "train_log" + str(int(time.time())) + ".arff" result_filename = "results" + str(int(time.time())) + ".txt" #Creates (or clears) files that are used by the binary IS_NUM_TEST = True file_creation_info = test_file_creation(IS_NUM_TEST, self.using_pca, test_filename, train_filename, query_manager, self) target_values = file_creation_info["target_values"] target_value_null = file_creation_info["target_value_null"] attribute_indexes = file_creation_info["attribute_indexes"] #If there are no null values in the test set #And the run is only replacing null values then terminate if no null values if not self.MAKE_ALL_PREDS and target_value_null.count(True) == 0 : os.remove(test_filename) os.remove(train_filename) return None #Running feature selection process if needed acc_est = {} if self.use_feature_selection : (test_filename, train_filename, selected_attributes) = feature_selection(test_filename, train_filename, query_manager, file_creation_info, self, IS_NUM_TEST) acc_est["selected attributes"] = selected_attributes #Running tests model_name = "saved_model" + str(int(time.time())) path_spef_weka = os.path.join( path, "models", "weka.jar") train_string = "java -Xmx1024m -cp " + path_spef_weka + " " + self.test_classifier + " -d " + model_name + " " + self.test_options + " -t " + train_filename + " >> " + train_log test_string = "java -Xmx1024m -cp " + path_spef_weka + " " + self.test_classifier + " -l " + model_name + " -T " + test_filename + " -p 0 >> " + result_filename self.printOut.pLog( "PRED- Training model") os.system(train_string) self.printOut.pLog( "PRED- Making predictions") os.system(test_string) #Gathering results for each test instance self.printOut.pLog( "PRED- Getting results") f = open(result_filename) prediction_list = [] confidence_list = [] #For stat keeping absolute_diff_list = [] relative_diff_list = [] index = 0 collect_results = False for line in f.readlines() : line_list = line.split() #Getting results if collect_results and len(line_list) > 1: prediction = float(line_list[2]) prediction_list.append(prediction) confidence_list.append(0.0) #Getting difference between predicted and actuall results #For non null values if not target_value_null[index] : actual = float(target_values[index]) diff = math.fabs(actual - prediction) absolute_diff_list.append(diff) if actual > 0 : relative_diff_list.append(diff / actual) else : relative_diff_list.append(-1) index += 1 #Seeing if you are at the results portion of the file if line.find("inst#") > -1 : collect_results = True f.close() #Gathering accuracy estimations f = open(train_log) cross_val_info = False get_k_value = False for line in f.readlines() : #Getting all performance related metrics if cross_val_info : line = line.rstrip('\n') line = line.rstrip('\t') line = line.rstrip('\b') line = line.rstrip(' %') list = line.split(' ') if len(list) > 1: attribute = list[0] value = list[len(list) - 1] value = float(value) acc_est[attribute] = value #Getting parameter search results if get_k_value and line.find('using') > -1: list = line.split(' ') k = int(list[1]) acc_est["1 Parameter: k value"] = k get_k_value = False #Finding cross validation info if line.find('Cross-validation') > -1 : cross_val_info = True #Finding k value info if line.find('IB1 instance-based classifier') > -1 : get_k_value = True f.close() #Adding actual performance statistics absolute_diff_array = numpy.array(absolute_diff_list) relative_diff_array = numpy.array(relative_diff_list) absolute_mean = numpy.mean(absolute_diff_array) absolute_std = numpy.std(absolute_diff_array) relative_mean = numpy.mean(relative_diff_array) relative_std = numpy.std(relative_diff_array) acc_est["2 On test data: mean absolute diff"] = absolute_mean acc_est["2 On test data: std absolute diff"] = absolute_std acc_est["2 On test data: mean relative diff"] = relative_mean acc_est["2 On test data: std relative diff"] = relative_std #Add number of test instances to the accuracy estimation current_test_num = query_manager.current_test_block.parcel_count acc_est["test instance count"] = current_test_num / query_manager.group_max acc_est["block number"] = (len(query_manager.used_blocks) - 1)*query_manager.group_max + query_manager.group_count #Removing files os.remove(test_filename) os.remove(train_filename) os.remove(train_log) os.remove(result_filename) os.remove(model_name) return Test_result("Num", self.test_attribute, prediction_list, confidence_list, acc_est)
def get_predictions(self, query_manager): #Filenames test_filename = "test" + str(int(time.time())) + ".arff" train_filename = "train" + str(int(time.time())) + ".arff" train_log = "train_log" + str(int(time.time())) + ".arff" result_filename = "results" + str(int(time.time())) + ".txt" #Creates (or clears) files that are used by the binary IS_NUM_TEST = False file_creation_info = test_file_creation(IS_NUM_TEST, self.using_pca, test_filename, train_filename, query_manager, self) target_values = file_creation_info["target_values"] target_value_null = file_creation_info["target_value_null"] attribute_indexes = file_creation_info["attribute_indexes"] cat_att_mapping = file_creation_info["cat_att_mapping"] #If there are no null values in the test set #And the run is only replacing null values then terminate if no null values if not self.MAKE_ALL_PREDS and target_value_null.count(True) == 0: os.remove(test_filename) os.remove(train_filename) return None #Running feature selection process if needed acc_est = {} if self.use_feature_selection: (test_filename, train_filename, selected_attributes) = feature_selection(test_filename, train_filename, query_manager, file_creation_info, self, IS_NUM_TEST) acc_est["selected attributes"] = selected_attributes #Running tests model_name = "saved_model" + str(int(time.time())) path_spef_weka = os.path.join(path, "models", "weka.jar") path_spef_libsvm = os.path.join(path, "models", "libsvm.jar") train_string = "java -Xmx1024m -cp " + path_spef_weka + ":" + path_spef_libsvm + " " + self.test_classifier + " -d " + model_name + " " + self.test_options + " -t " + train_filename + " >> " + train_log test_string = "java -Xmx1024m -cp " + path_spef_weka + ":" + path_spef_libsvm + " " + self.test_classifier + " -l " + model_name + " -T " + test_filename + " -p 0 >> " + result_filename self.printOut.pLog("PRED- Training model") os.system(train_string) self.printOut.pLog("PRED- Making predictions") os.system(test_string) #Gathering results for each test instance self.printOut.pLog("PRED- Getting results") f = open(result_filename) prediction_list = [] probability_list = [] correctly_imputed = 0 non_null_count = 0 index = 0 collect_results = False for line in f.readlines(): line_list = line.split() #Getting results if collect_results and len(line_list) > 1: tuple = line_list[2].split(":") prediction = str(tuple[1]) if not target_value_null[index] and prediction == str( target_values[index]): correctly_imputed += 1 if not target_value_null[index]: non_null_count += 1 prediction_list.append(prediction) probability_list.append(1) index += 1 #Seeing if you are at the results portion of the file if line.find("inst#") > -1: collect_results = True f.close() #Gathering accuracy estimations f = open(train_log) cross_val_info = False for line in f.readlines(): #Getting all performance related metrics if cross_val_info: line = line.rstrip('\n') line = line.rstrip('\t') line = line.rstrip('\b') line = line.rstrip(' %') if line.find('Correctly Classified Instances' ) > -1 or line.find('Kappa statistic') > -1: list = line.split(' ') if len(list) > 1: attribute = list[0] value = list[len(list) - 1] value = float(value) acc_est[attribute] = value #Finding cross validation info if line.find('Stratified cross-validation') > -1: cross_val_info = True elif line.find('Confusion Matrix') > -1: cross_val_info = False f.close() #Actual Performance Stats acc_est["Actual Correctly Imputed Percent"] = ( float(correctly_imputed) / non_null_count) * 100 #Removing files used for test os.remove(train_log) os.remove(result_filename) os.remove(test_filename) os.remove(train_filename) os.remove(model_name) #Add number of test instances to the accuracy estimation current_test_num = query_manager.current_test_block.parcel_count acc_est["test instance count"] = current_test_num acc_est["block number"] = len(query_manager.used_blocks) return Test_result(self.test_type, self.test_attribute, prediction_list, probability_list, acc_est)