def tran_file(filename): # dic_attr = name(filename1) fin = open(filename) # task1.trainSentence.clean dic_train = collections.defaultdict(dict) titlelist = [] for line in fin: line = line.strip().split("\t") dic_train[line[0]][len(dic_train[line[0]])] = {"label": int(line[4]), "title": line[3], "pair": (line[1], line[2])} titlelist.append(line[3]) dic_features = collections.defaultdict(dict) unigram = getGrams(titlelist, 3, 1) bigram = getGrams(titlelist, 3, 1) dic_func = {"同居": tongju, "昔日情敌": xiriqingdi, "闺蜜": guimi, "朋友": pengyou, "分手": fenshou, "老师": laoshi, "同学": tongxue, "前女友": qiannvyou, "翻版": fanban, "妻子": qizi, "撞衫": zhuangshan, "同为校花": tongweixiaohua, "绯闻女友": feiwennvyou, "偶像": ouxiang, "暧昧": aimei, "传闻不和": chuanwenbuhe, "老乡":laoxiang} for relation in dic_train: namelist = []; labels = []; titlelist = [] for id in dic_train[relation]: dic_ = dic_train[relation][id] namelist.append((len(namelist), dic_["pair"][0], dic_["pair"][1])) labels.append(dic_["label"]) titlelist.append(dic_["title"]) logging.info("%s:\t%d"%(relation, len(titlelist))) # features = dic_func[relation](namelist, dic_attr) uni_features = build_BOW(titlelist, unigram, 1) bi_features = build_BOW(titlelist, bigram, 1) features = uni_features features = bi_features # for index, feature in enumerate(features): # # features[index] += uni_features[index] # features[index] += bi_features[index] dic_features[relation]["features"] = features dic_features[relation]["label"] = labels dic_features[relation]["namelist"] = namelist classifier(features, labels)
def main(): if input( "Do you want to scrape (s) new images or use an existing (e) folder? (s/e): " ) == 's': folder = scrape() else: folder = None if input( "\nWould you like to use classifier.py to remove outliers? (y/n): " ) != 'n': classifier(folder) if input("\nWould you like to use average.py to generate an image? (y/n): " ) != 'n': average(folder) return
def trainingModel(self): self.registerWorking.finishThread.emit() state = 0 while True: if state == 0: # Pre-process obj = preprocessing(self.input_datadir, self.output_datadir) nrof_images_total, nrof_successfully_aligned = obj.alignProcessing( ) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) state += 1 # Classifier elif state == 1: print("Training Start") objModel = classifier( mode='TRAIN', datadir=self.datadir, modeldir=self.modeldir, classifierFilename=self.classifier_filename) get_file = objModel.main() sys.exit("All Done") state += 1 else: break
def welcome(): if request.method == 'POST': comment = request.form['comment'] label = 'Spam' if classifier(comment) else 'Ham' return render_template('home.html', comment=comment, label=label, results=True) return render_template('home.html', results=False)
def runIncrementalClustering(sClusterDirectory, sNewFPDirectory): newBSList = [] fileList = [] bitsetfromDirectory(sNewFPDirectory, newBSList, fileList) for i in range(len(fileList)): fileList[i] = join(sNewFPDirectory, fileList[i]) pairDistanceList = [] d_ok = all_pair_distance(newBSList, JACCARD, pairDistanceList) for root, dirs, files in os.walk(sClusterDirectory): for directory in dirs: sCurrentClusterDirectory = join(sClusterDirectory, directory) clusterBSList = [] clusterFileList = [] bitsetfromDirectory(sCurrentClusterDirectory, clusterBSList, clusterFileList) groupDistanceList = [] group_distance(newBSList, clusterBSList, JACCARD, groupDistanceList) setInsert = Set([]) for i in range(len(groupDistanceList)): flag = False for j in range(len(groupDistanceList[i])): if groupDistanceList[i][j] <= 0.2: flag = True break if flag: setInsert.add(i) for j in range(len(pairDistanceList[i])): if pairDistanceList[i][j] <= 0.2: setInsert.add(j) indexList = sorted(setInsert, reverse=True) for index in indexList: shutil.copy(fileList[index], sCurrentClusterDirectory) del newBSList[index] del fileList[index] for i in range(len(pairDistanceList)): del pairDistanceList[i][index] del pairDistanceList[index] classf = classifier(newBSList, pairDistanceList, fileList) classf.setThreshold(0.2) classf.clustering(SINGLE_LINKAGE) classf.separateFilesToClusters(sClusterDirectory, 1)
def runDirectoryClustering(sDirectory): bsList = [] distanceList = [] fileList = [] bitsetfromDirectory(sDirectory, bsList, fileList) d_ok = all_pair_distance(bsList, JACCARD, distanceList) classf = classifier(bsList, distanceList, []) classf.setThreshold(0.2) classf.clustering(SINGLE_LINKAGE) classf.printClusters(1)
def accuracy(): solver = classifier() correct = 0.0 total = 0.0 with open("validation.data", "r") as f: data = f.read().splitlines() for row in data: instance = row.split() predict = solver.solve(instance[1]) if (instance[0] == predict): correct = correct + 1 total = total + 1 print "Naive Bayes accuracy: %f" % (correct / total)
def runClustering(sPrefix): bsList = [] distanceList = [] bitsetFromCCS(sPrefix, bsList) d_ok = all_pair_distance(bsList, JACCARD, distanceList) classf = classifier(bsList, distanceList, []) classf.setThreshold(0.2) classf.clustering(SINGLE_LINKAGE) classf.printClusters(1)
def accuracy(): solver = classifier(); correct = 0.0; total = 0.0; with open("validation.data", "r") as f: data = f.read().splitlines(); for row in data: instance = row.split(); predict = solver.solve(instance[1]); if (instance[0] == predict): correct = correct + 1; total = total + 1; print "Naive Bayes accuracy: %f" % (correct / total);
def runIncrementalClusteringFromOneDirectory(sFPDirectory, threshold, sDstDirectory): bss = [] fpFiles = [] bitsetfromDirectory(sFPDirectory, bss, fpFiles) distanceList = [] d_ok = all_pair_distance_numpy(bss, distanceList) classf = classifier(bss, distanceList, fpFiles) classf.setThreshold(threshold) classf.clustering(SINGLE_LINKAGE) classf.separateFilesToClusters(sDstDirectory, 1)
def classify(over_sampl, tf_idf, use_idf, pca, alphas, neighbors, slack, estimators, portion): """ input: over_sampl: string variable to indicate the name of oversampling method tf_idf: boolean variable to indicate whether to use tf or not use_idf: boolean variable to indicate whether to use idf or not pca: int variable to indicate whether to use PCA or not (<=0 means no, yes otherwise) alphas: NB tuning parameter neighbors: KNN tuning parameter slack: SVM tuning parameter estimators: GradientBoosting, AdaBoost tuning parameter portion: which airline data to work with (None means all airlines) """ if not tf_idf: if pca > 0: return None else: message = "Preprocessing used is Word2Vec & Over Sampling method is " + over_sampl + " data portion " + portion else: if use_idf: message = "Preprocessing used is tf-idf & Over Sampling method is " + over_sampl + " PCA dimension = " + str( pca) + " data portion " + portion else: message = "Preprocessing used is tf & Over Sampling method is " + over_sampl + " PCA dimension = " + str( pca) + " data portion " + portion # load dataset ds = get_dataset() X_train, X_test, Y_train, Y_test = ds.load_data(tf_idf=tf_idf, use_idf=use_idf, use_pca=pca, airway_name=portion) if over_sampl == "RandomOverSampler": X_train, Y_train = RandomOverSampler().fit_sample(X_train, Y_train) elif over_sampl == "SMOTE": X_train, Y_train = SMOTE().fit_sample(X_train, Y_train) elif over_sampl == "ADASYN": X_train, Y_train = ADASYN().fit_sample(X_train, Y_train) clas = classifier() print(message) SVM_result, GB_result, AB_result, KNN_result, NB_result = clas.classify( X_train, X_test, Y_train, Y_test) compare_performance(SVM_result, GB_result, AB_result, KNN_result, NB_result, message)
def classify(i, p): differ = i if (differ == 0): # p=[] # print("\n\n\n") # print(p) name = [] # p.append("E:/guifinal/pe/hh.exe") print("\n\n\n") print(p) print("\n\n\n") # p=[['E:/guifinal/pe/HelpCtr.exe']] pp = p[:] for i in p: str = i[0] str = str[::-1] print("in for loop") print(str) # str="exe.rtCpleH/ep/lanifiug/:E" for j in range(0, len(str)): if (str[j] == "/"): break str = str[0:j:1] str = str[::-1] print(str) name.append(str) name col = funt.n print("\n\n\n") print(name) print(p) print(col) print("\n\n\n") df = create_df(col, p[0], name) pred = classifier(df) file = [pp, pred] with open('file.pkl', 'wb') as fid: cPickle.dump(file, fid) if (differ == 1): # p=[] # p.append("E:/gui/pe/") print("\n\n\n") path = p[0] path = path + '/' print(path) print("\n\n\n") col = funt.n file_list = [f for f in os.listdir(path)] print(file_list) name = file_list[:] for i in range(0, len(file_list)): file_list[i] = path + file_list[i] print("\n\n\n Below for") print(file_list) print("\n") print(name) df = create_df(col, file_list, name) pred = classifier(df) folder = [path, name, pred] with open('folder.pkl', 'wb') as fid: cPickle.dump(folder, fid) return pred
test_sentence_df=get_frame_feature(test_sentence_df) test_sentence_df=get_pos_tag(test_sentence_df) test_df=get_arg_pairs(test_sentence_df) test_df=merge_sen_df(test_sentence_df,test_df) test_df=get_arg_cosine_simialrity(test_df) test_df=get_lf_cosine_similarity(test_df) test_df=get_rf_cosine_similarity(test_df) test_df=get_entailment_score(test_df) test_df=get_pos_similarity(test_df) #classification,precision,f1 score is printed and final dataframe with results is returned print "Training and testing "+str((fold_no+1)) result_df=classifier(test_df,train_df,train_process_file_path) if int(fold_no+1)==1 : final_results_df=result_df else: final_results_df=pd.concat([final_results_df,result_df]).reset_index() #preparing plot data plot_data = final_results_df[['True_label', 'Classification_result', 'Probability of result']] plot_data.columns = ['gold_role', 'srl_role', 'srl_score'] d = {} for rid, rdata in plot_data.iterrows(): grole = rdata['gold_role'] srole = rdata['srl_role'] sscore = rdata['srl_score'] d[rid] = (grole, (srole, sscore))
def main(): start_whole = time.time() tau = 0.75 num_instances = 200 pert_left = True pert_right = True batch_size = 3 epsilon = 0.25 delta = 0.05 initial_value = 20 list_anchors = [''] * num_instances mean_vector = [0] * num_instances instance_counter = 0 b = classifier('Olaf') for instance_index in range(0, num_instances): previous_anchor = [] coverage_astar = 0.2 start = time.time() perturbed_instances_left, instance_sentiment, instance_left, b, instance_info = Anchor.get_perturbations( True, False, b, instance_index) perturbed_instances_right, instance_sentiment, instance_right, b, instance_info = Anchor.get_perturbations( False, True, b, instance_index) instance = instance_left + instance_right print('instance', instance) perturbed_instances = [''] * len(perturbed_instances_right) for i in range(len(perturbed_instances_left)): perturbed_instances[ i] = perturbed_instances_left[i] + perturbed_instances_right[i] print('pert instances', perturbed_instances) possible_anchor_list = Anchor.possible_anchor(previous_anchor, instance, coverage_astar, perturbed_instances) while possible_anchor_list != []: bbest = Anchor.bbest_anchors( batch_size, possible_anchor_list, epsilon, delta, perturbed_instances_left, perturbed_instances_right, instance_sentiment, initial_value, pert_left, pert_right, b, instance_info[0], instance_info[1], instance_info[2], instance_info[3], instance_info[4], instance_info[5], instance_info[6]) print('bbest', bbest, type(bbest)) if max(Anchor.get_lb_vector(bbest)) > tau: for y in range(len(bbest)): if bbest[y][1] > tau: anchor_cov = Anchor.get_coverage( bbest[y][3], perturbed_instances) if anchor_cov > coverage_astar: coverage_astar = anchor_cov bbest_anchor = bbest[y] previous_anchor = [bbest[j][3] for j in range(len(bbest))] possible_anchor_list = Anchor.possible_anchor( previous_anchor, instance, coverage_astar, perturbed_instances) end = time.time() print(end - start) anchor_mean = bbest_anchor[0] final_anchor = bbest_anchor[3] print('anchor', final_anchor, anchor_mean) mean_vector[instance_index] = anchor_mean list_anchors[instance_index] = final_anchor instance_counter += 1 print('mean vector', mean_vector) print('instance counter', instance_counter) fid_mean = statistics.mean(mean_vector) fid_stdev = statistics.stdev(mean_vector) end_whole = time.time() print('mean vector', mean_vector) print('anchor list', list_anchors) print('fidelity', fid_mean, fid_stdev) print(end_whole - start_whole) return bbest
e1 = Encoder() d1 = Decoder() e5 = Encoder() d5 = Decoder() i1 = Input(input_shape) i5 = Input(input_shape) auto1 = d1(e1(i1)) auto5 = d5(e5(i5)) cross51 = d1(e5(i5)) cross15 = d5(e1(i1)) D15 = classifier(cross15) D51 = classifier(cross51) M11 = Model(i1, auto1) M15 = Model(i1, D15) M55 = Model(i5, auto5) M51 = Model(i5, D51) M11.compile(optimizer='RMSProp', loss='mse', metrics=['accuracy']) M55.compile(optimizer='RMSProp', loss='mse', metrics=['accuracy']) M15.compile(optimizer='RMSProp', loss='categorical_crossentropy', metrics=['accuracy']) M51.compile(optimizer='RMSProp', loss='categorical_crossentropy', metrics=['accuracy'])
def choose_classifier(dataset, class_number, model_type, model, classifier, D, hyper_para, train_data, test_data, test_label, no_train_data, no_test_data, inm, relu, m, s): if(hyper_para.verbose==True): print('Extracting features.....') train_features = np.memmap('../../temp_files/train_features_temp.bin', dtype='float32', mode='w+', shape=(no_train_data,hyper_para.D)) train_features = torch.from_numpy(train_features) for i in range(no_train_data): temp = model(torch.autograd.Variable(train_data[i:(i+1)].cuda().contiguous().float())).float() temp = temp.view(1,1,hyper_para.D) temp = inm(temp) temp = relu(temp.view(hyper_para.D)) train_features[i:(i+1)] = temp.data.cpu() train_data = None if(hyper_para.verbose==True): print('Features extracted.') ## test on the test set test_features = np.memmap('../../temp_files/test_features_temp.bin', dtype='float32', mode='w+', shape=(no_test_data,hyper_para.D)) test_scores = np.memmap('../../temp_files/test_scores_temp.bin', dtype='float32', mode='w+', shape=(no_test_data,1)) test_features = torch.from_numpy(test_features) if(hyper_para.verbose==True): print('Computing test scores and AUC....') area_under_curve=0.0 if(hyper_para.classifier_type=='OC_CNN'): test_scores = torch.from_numpy(test_scores) k=0 print np.shape(test_features) start = time.time() for j in range(no_test_data): temp = model(AddNoise(torch.autograd.Variable(test_data[j:(j+1)].cuda().contiguous().float()), hyper_para.sigma1)).float() temp = temp.view(1,1,hyper_para.D) temp = inm(temp) temp = temp.view(hyper_para.D) test_features[k:(k+1)] = temp.data.cpu() test_scores[k:(k+1)] = classifier(relu(temp)).data.cpu()[1] # print(classifier(relu(temp)).data.cpu()) k = k+1 end = time.time() print(end-start) test_scores = test_scores.numpy() test_features = test_features.numpy() train_features = train_features.numpy() test_scores = (test_scores-np.min(test_scores))/(np.max(test_scores)-np.min(test_scores)) elif(hyper_para.classifier_type=='OC_SVM_linear'): # train one-class svm oc_svm_clf = svm.OneClassSVM(kernel='linear', nu=float(hyper_para.N)) oc_svm_clf.fit(train_features.numpy()) k=0 mean_kwn = np.zeros( (no_test_data,1) ) for j in range(no_test_data): temp = model(torch.autograd.Variable(test_data[j:(j+1)].cuda().contiguous().float())).float() temp = temp.view(1,1,hyper_para.D) temp = inm(temp) temp = temp.view(hyper_para.D) test_features[k:(k+1)] = temp.data.cpu() temp = np.reshape(relu(temp).data.cpu().numpy(), (1, hyper_para.D)) test_scores[k:(k+1)] = oc_svm_clf.decision_function(temp)[0] k = k+1 test_features = test_features.numpy() train_features = train_features.numpy() joblib.dump(oc_svm_clf,'../../save_folder/saved_models/'+dataset+'/classifier/'+str(class_number) +'/'+ model_type+'_OCCNNlin' +'_'+ str(hyper_para.iterations)+'_'+ str(hyper_para.lr) +'_'+ str(hyper_para.sigma) +'_'+ str(hyper_para.N) +'.pkl') fpr, tpr, thresholds = metrics.roc_curve(test_label, test_scores) if(hyper_para.verbose==True): print('Test scores and AUC computed.') return area_under_curve, train_features, test_scores, test_features
X_test = np.concatenate(X_test, axis=0) # extract labels of every considered points y_train = [] for cloud, indices in zip(train_clouds, train_indices): y_train.append(cloud.labels[indices]) y_train = np.concatenate(y_train) y_test = [] for cloud, indices in zip(test_clouds, test_indices): y_test.append(cloud.labels[indices]) y_test = np.concatenate(y_test) # eventually train the classifier, predict and evaluate labels # (here, 'cloud' is the last element of test_clouds) clf = classifier(X_train, X_test, y_train, y_test, cloud.label_names) rf = clf.random_forest() y_pred, measures = clf.evaluate(rf, results_dir) t1 = time.time() print('Done in %.0f seconds' % (t1 - t0)) print("Evaluation : {}% of points from the testing set were correctly classified.\n".format(np.round(measures["accuracy"],2)*100)) mess = "Other available measures (considered classes : {}): \n\t- recall by class (%) : {}\n\t- precision by class (%) : {}\n\t- F by class (%) : {}\n\t- mean recall : {}%\n\t- mean precision : {}%\n\t- global F : {}%" print(mess.format("'"+"', '".join([cloud.label_names[l] for l in measures["considered_labels"]])+"'", format_val(measures["recall_by_class"]), format_val(measures["precision_by_class"]), format_val(measures["F_by_class"]), format_val(measures["mean_recall"]), format_val(measures["mean_precision"]), format_val(measures["global_F"])))
"PaleSkin", "PointyNose", "RecedingHairline", "RosyCheeks", "Sideburn", "Smiling", "StraightHair", "WavyHair", "WearingEarrings", "WearingHat", "WearingLipstick", "WearingNecklace", "WearingNecktie", "Young" ] ### Change me! ### target_indices = feature_names.index( "Smiling" ) # Feel free to change this value to any string from feature_names! noise = get_noise(n_images, z_dim).to(device).requires_grad_() for i in range(grad_steps): opt.zero_grad() fake = gen(noise) fake_image_history += [fake] fake_classes_score = classifier(fake)[:, target_indices].mean() fake_classes_score.backward() noise.data = calculate_updated_noise(noise, 1 / grad_steps) plt.rcParams['figure.figsize'] = [n_images * 2, grad_steps * 2] show_tensor_images(torch.cat(fake_image_history[::skip], dim=2), num_images=n_images, nrow=n_images) fake_image_history = [] ### Change me! ### target_indices = feature_names.index( "Smiling" ) # Feel free to change this value to any string from feature_names from earlier! other_indices = [ cur_idx != target_indices for cur_idx, _ in enumerate(feature_names)
from classifier import * setup() string1 = "Volvió a casarse en 1118 con Agnés de Garlande, hija de Anseau de Garlande, señor de Rochefort-en-Yvelines, y de Beatrice de Rochefort. De esta unión nacieron" string2 = "Our teacher warned him not to be late again" print(classifier(string1)) print(classifier(string2))
if real_exp: f.write('Starting training...\n') total_iter = 0 for epoch in range(epoch_num): corrects = 0.0 for i, data in enumerate(train_dataloader, 0): if total_iter % validate_frequency == 0: data = next(iter(validation_dataloader)) inputs = data["image"] labels = data["class"] inputs, labels = Variable(inputs), Variable(labels) output = classifier(inputs) loss = criterion(output, labels) temp = output[:, 1].data.numpy() temp = np.apply_along_axis(lambda x: np.rint(np.exp(x)), 0, temp) temp = torch.from_numpy(temp).long() num = torch.sum(temp == labels.data) if type(num) is not int: num = num.item() accuracy = num / float(batch_size) update = None if draw_validation_graphs is None else 'append' draw_validation_graphs = vis.line( X=np.array([total_iter]), Y=np.array([loss.data[0]]),
def runDayClustering(sFamilyName, sFPDirectory, sMD5Directory, sDstDirectory, threshold): familyFiles = [ f for f in listdir(sMD5Directory) if isfile(join(sMD5Directory, f)) and f.endswith('.top5.family.txt') ] familyFiles.sort() fFirstDay = open(join(sMD5Directory, familyFiles[0])) md5List = [] while True: line = fFirstDay.readline() if not line: break tmpList = line.split() if cmp(sFamilyName, tmpList[1]) == 0: md5List.append(tmpList[0]) fFirstDay.close() fpFiles = [] bsList = [] sCurrentFPDirectory = join(sFPDirectory, familyFiles[0][0:-16]) bitsetfromDirectoryMD5(sCurrentFPDirectory, bsList, fpFiles, md5List) #print len(bsList), len(fpFiles) distanceList = [] d_ok = all_pair_distance(bsList, JACCARD, distanceList) classf = classifier(bsList, distanceList, fpFiles) classf.setThreshold(threshold) classf.clustering(SINGLE_LINKAGE) classf.separateFilesToClusters(sDstDirectory, 1) index = 1 while index < len(familyFiles): print index, familyFiles[index] fDay = open(join(sMD5Directory, familyFiles[index])) md5List = [] while True: line = fDay.readline() if not line: break tmpList = line.split() if cmp(sFamilyName, tmpList[1]) == 0: md5List.append(tmpList[0]) fDay.close() newBSList = [] newFileList = [] sCurrentFPDirectory = join(sFPDirectory, familyFiles[index][:-16]) bitsetfromDirectoryMD5(sCurrentFPDirectory, newBSList, newFileList, md5List) pairDistanceList = [] d_ok = all_pair_distance(newBSList, JACCARD, pairDistanceList) for root, dirs, files in os.walk(sDstDirectory): for directory in dirs: sCurrentClusterDirectory = join(sDstDirectory, directory) clusterBSList = [] clusterFileList = [] bitsetfromDirectory(sCurrentClusterDirectory, clusterBSList, clusterFileList) groupDistanceList = [] group_distance(newBSList, clusterBSList, JACCARD, groupDistanceList) setInsert = Set([]) for i in range(len(groupDistanceList)): flag = False for j in range(len(groupDistanceList[i])): if groupDistanceList[i][j] <= threshold: flag = True break if flag: setInsert.add(i) for j in range(len(pairDistanceList[i])): if pairDistanceList[i][j] <= threshold: setInsert.add(j) indexList = sorted(setInsert, reverse=True) for indexDel in indexList: shutil.copy(newFileList[indexDel], sCurrentClusterDirectory) del newBSList[indexDel] del newFileList[indexDel] for i in range(len(pairDistanceList)): del pairDistanceList[i][indexDel] del pairDistanceList[indexDel] if len(newFileList) > 0: classf1 = classifier(newBSList, pairDistanceList, newFileList) classf1.setThreshold(threshold) classf1.clustering(SINGLE_LINKAGE) classf1.separateFilesToClusters(sDstDirectory, 1) index += 1
def main_uni(): begin = time.time() model = 'Maria' isWSP = False batch_size = 200 #we have to implement a batch size to get the predictions of the perturbed instances num_samples = 5000 #has to be divisible by batch size seed = 2020 width = 1.0 K = 5 # number of coefficients to check B = 10 # number of instances to get input_file = 'data/programGeneratedData/300remainingtestdata2016.txt' model_path = 'trainedModelOlaf/2016/-18800' f = classifier(model) dict = f.get_Allinstances() r = check_random_state(seed) if (isWSP): write_path = 'data/Lime/WSPfh' + model + str(2016) + 'final' else: write_path = 'data/Lime/SPfh' + model + str(2016) + 'final' #Estimating Lime with multinominal logistic regression fidelity = [] correct_hit = 0 x_left = dict['x_left'] x_left_len = dict['x_left_len'] x_right = dict['x_right'] x_right_len = dict['x_right_len'] target_word = dict['target'] target_words_len = dict['target_len'] y_true = dict['y_true'] true_label = dict['true_label'] pred = dict['pred'] size = dict['size'] left_words = [] right_words = [] all_words = [] targets = [] x_len = [] coefs = [] pred_b, prob = f.get_allProb(x_left, x_left_len, x_right, x_right_len, y_true, target_word, target_words_len, size, size) with open(write_path + '.txt', 'w') as results: for index in range(size): x_inverse_left, x_lime_left, x_lime_left_len = lime_perturbation( r, x_left[index], x_left_len[index], num_samples) x_inverse_right, x_lime_right, x_lime_right_len = lime_perturbation( r, x_right[index], x_right_len[index], num_samples) target_lime_word = np.tile(target_word[index], (num_samples, 1)) target_lime_word_len = np.tile(target_words_len[index], (num_samples)) y_lime_true = np.tile(y_true[index], (num_samples, 1)) # predicting the perturbations pred_c, probabilities = f.get_allProb( x_lime_left, x_lime_left_len, x_lime_right, x_lime_right_len, y_lime_true, target_lime_word, target_lime_word_len, batch_size, num_samples) neg_labels = labels(pred_c) # Getting the weights x_w = np.append(x_left[index][0:x_left_len[index]], x_right[index][0:x_right_len[index]]) x_w_len = x_left_len[index] + x_right_len[index] x_len.append(x_w_len) x_lime_len = x_lime_left_len + x_lime_right_len x_lime = np.concatenate((x_lime_left, x_lime_right), axis=1) weights_all = get_weights(f, x_w, x_lime, x_w_len, x_lime_len, width) model_all = LogisticRegression(multi_class='ovr', solver='newton-cg') n_neg_labels = len(neg_labels) x_all = np.concatenate((x_inverse_left, x_inverse_right), axis=1) if n_neg_labels > 0: for label in neg_labels: pred_c = np.append(pred_c, label) x_all = np.concatenate( (x_all, np.zeros( (1, x_left_len[index] + x_right_len[index]))), axis=0) weights_all = np.append(weights_all, 0) model_all.fit(x_all, pred_c, sample_weight=weights_all) pred_c = pred_c[:-n_neg_labels] x_all = x_all[:-n_neg_labels, :] else: model_all.fit(x_all, pred_c, sample_weight=weights_all) yhat = model_all.predict(x_all) if (int(yhat[0]) == int(pred_b[index])): correct_hit += 1 _, acc = compare_preds(yhat, pred_c) fidelity.append(acc) # words: left_words.append(f.get_String_Sentence(x_lime_left[0])) right_words.append(f.get_String_Sentence(x_lime_right[0])) all_words.append( f.get_String_Sentence(x_lime_left[0]) + f.get_String_Sentence(x_lime_right[0])) targets.append(f.get_String_Sentence(target_word[index])) coefs.append(model_all.coef_) intercept = model_all.intercept_ classes = model_all.classes_ results.write('Instance ' + str(index) + ':' + '\n') results.write('True Label: ' + str(true_label[index]) + ', Predicted label: ' + str(int(pred[index])) + '\n') results.write('\n') results.write('Intercept: ' + str(intercept) + '\n') results.write('\n') results.write('Left words: ' + str(left_words[index]) + '\n') results.write('\n') temp = right_words.copy() temp[index].reverse() results.write('Right words: ' + str(temp[index]) + '\n') results.write('\n') results.write('All words: ' + str(all_words[index]) + '\n') results.write('Target words: ' + str(targets[index]) + '\n') results.write('\n') results.write( '________________________________________________________' + '\n') neg_coefs_k = [] neu_coefs_k = [] pos_coefs_k = [] all_coefs_k = [] e_ij = [] sum_coefs_k = [] all_words_k = [] dict_I = {} for i in range(size): K = 4 if (K > int(x_len[i])): K = int(x_len[i]) ##getting the B instances according to (W)SP neg_coefs = coefs[i][0] neu_coefs = coefs[i][1] pos_coefs = coefs[i][2] sum_coefs = np.zeros(len(neg_coefs)) for k in range(len(neg_coefs)): sum_coefs[k] += np.absolute(neg_coefs[k]) + np.absolute( pos_coefs[k]) + np.absolute(neg_coefs[k]) coefs_maxargs = np.argpartition(sum_coefs, -K)[-K:] neg_coefs_k.append(neg_coefs[coefs_maxargs]) neu_coefs_k.append(neu_coefs[coefs_maxargs]) pos_coefs_k.append(pos_coefs[coefs_maxargs]) sum_coefs_k.append(sum_coefs[coefs_maxargs]) e_ij.append(sum_coefs[coefs_maxargs]) all_coefs_k.append( [neg_coefs_k[i], neu_coefs_k[i], pos_coefs_k[i]]) temp = np.array(all_words[i]) all_words_k.append(temp[coefs_maxargs]) for j, word in enumerate(all_words_k[i]): if (inDict(dict_I, word)): dict_I[word] += e_ij[i][j] else: dict_I[word] = e_ij[i][j] results.write('Instance: ' + str(i)) results.write('k words: ' + str(all_words_k[i]) + '\n') results.write('\n') results.write('Neg coefs k: ' + str(neg_coefs_k[i]) + '\n') results.write('\n') results.write('Neu coefs k: ' + str(neu_coefs_k[i]) + '\n') results.write('\n') results.write('Pos coefs k: ' + str(pos_coefs_k[i]) + '\n') results.write('\n') results.write( '________________________________________________________' + '\n') results.close() picked_instances_all = WSP(dict_I, all_words_k, sum_coefs_k, B, isWSP) with open(write_path + 'K_instances' + '.txt', 'w') as results: for i in picked_instances_all: results.write('picked instance ' + str(i) + ":") results.write(' True Label: ' + str(true_label[i]) + ', Predicted label: ' + str(int(pred[i])) + '\n') results.write('\n') results.write('Sentence: ' + str(left_words[i]) + str(targets[i]) + str(right_words[i]) + '\n') results.write('\n') results.write('coefs: ' + str(coefs[i]) + '\n') results.write('\n') results.write('k words: ' + str(all_words_k[i]) + '\n') results.write('\n') results.write('Neg coefs k: ' + str(neg_coefs_k[i]) + '\n') results.write('\n') results.write('Neu coefs k: ' + str(neu_coefs_k[i]) + '\n') results.write('\n') results.write('Pos coefs k: ' + str(pos_coefs_k[i]) + '\n') results.write('\n') results.write('target: ' + str(targets[i]) + '\n') results.write( '___________________________________________________________________' + '\n') results.write('\n') results.write('Hit Rate measure:' + '\n') results.write('Correct: ' + str(correct_hit) + ' hit rate: ' + str(correct_hit / size) + '\n') results.write('\n') results.write('Fidelity All measure: ' + '\n') mean = np.mean(fidelity) std = np.std(fidelity) results.write('Mean: ' + str(mean) + ' std: ' + str(std)) end = time.time() print('It took: ' + str(end - begin) + ' Seconds')
def getResult(self): dataextraction() preprocess() result = classifier() return result
def main_pos(): begin = time.time() model = 'Maria' #isWSP = False batch_size = 200 #we have to implement a batch size to get the predictions of the perturbed instances num_samples = 5000 #has to be divisible by batch size seed = 2020 width = 1.0 K = 5 # number of coefficients to check B = 10 # number of instances to get nlp = en_core_web_lg.load() neighbors = Neighbors(nlp) f = classifier(model) dict = f.get_Allinstances() r = check_random_state(seed) write_path = 'data/Lime2/test' + model + str(2016) + 'final' #Estimating Lime with multinominal logistic regression n_all_features = len(f.word_id_mapping) fidelity = [] correct_hit = 0 x_left = dict['x_left'] x_left_len = dict['x_left_len'] x_right = dict['x_right'] x_right_len = dict['x_right_len'] target_word = dict['target'] target_words_len = dict['target_len'] y_true = dict['y_true'] true_label = dict['true_label'] pred = dict['pred'] size = dict['size'] left_words = [] right_words = [] all_words = [] targets = [] x_len = [] coefs = [] size = 10 pred_b, prob = f.get_allProb(x_left, x_left_len, x_right, x_right_len, y_true, target_word, target_words_len, size, size) original_x = [] with open(write_path + '.txt', 'w') as results: for index in range(size): pertleft, instance_sentiment, text, _, x = get_perturbations( True, False, neighbors, f, index, num_samples) pertright, instance_sentiment, text, _, x = get_perturbations( False, True, neighbors, f, index, num_samples) orig_left_x = x_left[index] orig_right_x = x_right[index] Z = np.zeros((num_samples, n_all_features)) X = np.zeros((n_all_features)) X[orig_left_x] += 1 X[orig_right_x] += 1 X = X.reshape(1, -1) predictions_f = [] x_lime = np.zeros( (num_samples, x_left_len[index] + x_right_len[index])) x_lime_left = np.zeros((num_samples, FLAGS.max_sentence_len)) x_lime_right = np.zeros((num_samples, FLAGS.max_sentence_len)) print('Time after perturbation: ' + str(time.time() - begin) + ' Seconds') for i in range(num_samples): x_left_ids = f.to_input(pertleft[i].split()) x_right_ids = f.to_input(pertright[i].split()) x_lime_left[i, :] = x_left_ids x_lime_right[i, :] = x_right_ids x_lime[i, 0:x_left_len[index] + x_right_len[index]] = np.append( x_left_ids[0][0:x_left_len[index]], x_right_ids[0][0:x_right_len[index]]) Z[i, x_left_ids] += 1 Z[i, x_right_ids] += 1 #pred_f, _ = f.get_prob(x_left_ids, x[1], x_right_ids, x[3], x[4], x[5], x[6]) #predictions_f.append(pred_f) target_lime_word = np.tile(target_word[index], (num_samples, 1)) target_lime_word_len = np.tile(target_words_len[index], (num_samples)) y_lime_true = np.tile(y_true[index], (num_samples, 1)) x_lime_left_len = np.tile(x[1], (num_samples)) x_lime_right_len = np.tile(x[3], (num_samples)) # predicting the perturbations predictions_f, _ = f.get_allProb(x_lime_left, x_lime_left_len, x_lime_right, x_lime_right_len, y_lime_true, target_lime_word, target_lime_word_len, batch_size, num_samples) neg_labels = labels(predictions_f) # Getting the weights orig_x = np.append(orig_left_x[0:x_left_len[index]], orig_right_x[0:x_right_len[index]]) original_x.append(orig_x) orig_x_len = int(x_left_len[index] + x_right_len[index]) x_len.append(orig_x_len) z_len = np.tile(orig_x_len, num_samples) x_lime = np.asarray(x_lime, int) weights_all = get_weights(f, orig_x, x_lime, orig_x_len, z_len, width) model_all = LogisticRegression(multi_class='ovr', solver='newton-cg') n_neg_labels = len(neg_labels) if n_neg_labels > 0: for label in neg_labels: predictions_f = np.append(predictions_f, label) Z = np.concatenate((Z, np.zeros((1, n_all_features))), axis=0) weights_all = np.append(weights_all, 0) model_all.fit(Z, predictions_f, sample_weight=weights_all) predictions_f = predictions_f[:-n_neg_labels] Z = Z[:-n_neg_labels, :] else: model_all.fit(Z, predictions_f, sample_weight=weights_all) yhat = model_all.predict(X) if (int(yhat[0]) == int(pred_b[index])): correct_hit += 1 print(pertleft[0].split()) print(pertright[0].split()) print(x_lime) get_predStats(predictions_f) print('Current instance: ' + str(index)) print('Correct hit: ' + str(correct_hit)) print('Current runtime: ' + str(time.time() - begin) + ' seconds') yhat = model_all.predict(Z) _, acc = compare_preds(yhat, predictions_f) fidelity.append(acc) # words: left_words.append(f.get_String_Sentence(orig_left_x)) right_words.append(f.get_String_Sentence(orig_right_x)) all_words.append( f.get_String_Sentence(orig_left_x) + f.get_String_Sentence(orig_right_x)) targets.append(f.get_String_Sentence(target_word[index])) coefs.append(model_all.coef_) intercept = model_all.intercept_ classes = model_all.classes_ results.write('Instance ' + str(index) + ':' + '\n') results.write('True Label: ' + str(true_label[index]) + ', Predicted label: ' + str(int(pred[index])) + '\n') results.write('\n') results.write('Intercept: ' + str(intercept) + '\n') results.write('\n') results.write('Left words: ' + str(left_words[index]) + '\n') results.write('\n') temp = right_words.copy() temp[index].reverse() results.write('Right words: ' + str(temp[index]) + '\n') results.write('\n') results.write('All words: ' + str(all_words[index]) + '\n') results.write('Target words: ' + str(targets[index]) + '\n') results.write('\n') results.write( '________________________________________________________' + '\n') neg_coefs_k = [] neu_coefs_k = [] pos_coefs_k = [] all_coefs_k = [] e_ij = [] sum_coefs_k = [] all_words_k = [] dict_I = {} for i in range(size): K = 4 if (K > int(x_len[i])): K = int(x_len[i]) ##getting the B instances according to (W)SP neg_coefs = coefs[i][0] neu_coefs = coefs[i][1] pos_coefs = coefs[i][2] sum_coefs = np.zeros(len(neg_coefs)) for j in original_x[i]: sum_coefs[j] += np.absolute(neg_coefs[j]) + np.absolute( pos_coefs[j]) + np.absolute(neg_coefs[j]) coefs_maxargs = np.argpartition(sum_coefs, -K)[-K:] neg_coefs_k.append(neg_coefs[coefs_maxargs]) neu_coefs_k.append(neu_coefs[coefs_maxargs]) pos_coefs_k.append(pos_coefs[coefs_maxargs]) sum_coefs_k.append(sum_coefs[coefs_maxargs]) e_ij.append(sum_coefs[coefs_maxargs]) all_coefs_k.append( [neg_coefs_k[i], neu_coefs_k[i], pos_coefs_k[i]]) all_words_k.append(f.get_String_Sentence(coefs_maxargs)) #temp = np.array(all_words[i]) #all_words_k.append(temp[coefs_maxargs]) for j, word in enumerate(all_words_k[i]): if (inDict(dict_I, word)): dict_I[word] += e_ij[i][j] else: dict_I[word] = e_ij[i][j] results.write('Instance: ' + str(i) + '\n') results.write('k words: ' + str(all_words_k[i]) + '\n') results.write('\n') results.write('Neg coefs k: ' + str(neg_coefs_k[i]) + '\n') results.write('\n') results.write('Neu coefs k: ' + str(neu_coefs_k[i]) + '\n') results.write('\n') results.write('Pos coefs k: ' + str(pos_coefs_k[i]) + '\n') results.write('\n') results.write( '________________________________________________________' + '\n') results.close() picked_instances_all = WSP(dict_I, all_words_k, sum_coefs_k, B, True) with open(write_path + 'B_instances' + 'WSP.txt', 'w') as results: for i in picked_instances_all: results.write('picked instance ' + str(i) + ":") results.write(' True Label: ' + str(true_label[i]) + ', Predicted label: ' + str(int(pred[i])) + '\n') results.write('\n') results.write('Sentence: ' + str(left_words[i]) + str(targets[i]) + str(right_words[i]) + '\n') results.write('\n') results.write('coefs: ' + str(coefs[i]) + '\n') results.write('\n') results.write('k words: ' + str(all_words_k[i]) + '\n') results.write('\n') results.write('Neg coefs k: ' + str(neg_coefs_k[i]) + '\n') results.write('\n') results.write('Neu coefs k: ' + str(neu_coefs_k[i]) + '\n') results.write('\n') results.write('Pos coefs k: ' + str(pos_coefs_k[i]) + '\n') results.write('\n') results.write('target: ' + str(targets[i]) + '\n') results.write( '___________________________________________________________________' + '\n') results.write('\n') results.write('Hit Rate measure:' + '\n') results.write('Correct: ' + str(correct_hit) + ' hit rate: ' + str(correct_hit / size) + '\n') results.write('\n') results.write('Fidelity All measure: ' + '\n') mean = np.mean(fidelity) std = np.std(fidelity) results.write('Mean: ' + str(mean) + ' std: ' + str(std)) picked_instances_all = WSP(dict_I, all_words_k, sum_coefs_k, B, False) with open(write_path + 'B_instances' + 'SP.txt', 'w') as results: for i in picked_instances_all: results.write('picked instance ' + str(i) + ":") results.write(' True Label: ' + str(true_label[i]) + ', Predicted label: ' + str(int(pred[i])) + '\n') results.write('\n') results.write('Sentence: ' + str(left_words[i]) + str(targets[i]) + str(right_words[i]) + '\n') results.write('\n') results.write('coefs: ' + str(coefs[i]) + '\n') results.write('\n') results.write('k words: ' + str(all_words_k[i]) + '\n') results.write('\n') results.write('Neg coefs k: ' + str(neg_coefs_k[i]) + '\n') results.write('\n') results.write('Neu coefs k: ' + str(neu_coefs_k[i]) + '\n') results.write('\n') results.write('Pos coefs k: ' + str(pos_coefs_k[i]) + '\n') results.write('\n') results.write('target: ' + str(targets[i]) + '\n') results.write( '___________________________________________________________________' + '\n') results.write('\n') results.write('Hit Rate measure:' + '\n') results.write('Correct: ' + str(correct_hit) + ' hit rate: ' + str(correct_hit / size) + '\n') results.write('\n') results.write('Fidelity All measure: ' + '\n') mean = np.mean(fidelity) std = np.std(fidelity) results.write('Mean: ' + str(mean) + ' std: ' + str(std)) end = time.time() print('It took: ' + str(end - begin) + ' Seconds')
for adv in adv_accuracy: # init dataset for TESTING data = np.load('../data/{}_xs_mnist.npy'.format(adv)) adv_data = np.load('../data/{}_advs_mnist.npy'.format(adv)) labels = np.load('../data/{}_ys_mnist.npy'.format(adv)) dataset = Dataset(data, adv_data, labels) dataloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=True, num_workers=1) for image, adv_img, label in dataloader: image = image.cuda() adv_img = adv_img.cuda() label = label.cuda() # get model output def_out, _, _, _ = model(adv_img) adv_out_class = classifier(def_out) # get model predicted class adversarial_class = torch.argmax(adv_out_class, 1) # update confusion matrix adv_accuracy[adv][(adversarial_class * 10 + label).astype(int)] += 1 output = np.zeros(100) for adv in adv_accuracy: output += adv_accuracy[adv] generate(output.tolist())
from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from scipy.ndimage.measurements import label # from moviepy.editor import VideoFileClip # from IPython.display import HTML from loadImages import * from featureFunctions import featureFunctions import extractor from drawWindows import * import classifier from heatMap import * from classifier import classifier # Load car and not car images cars = loadImages("./vehicles/", "cars") notcars = loadImages("./non-vehicles/", "notcars") # We can save the trained classifier in a .pkl file and then retrieve it to train the classifier if os.path.isfile("models/trained_model.p"): print("Model already present, retrieving to use it") train_model = False else: print("Training a classifier using new images") train_model = True # Train a classifier c = classifier(cars, notcars, train_model) c.classify()
import classifier class_dict = classifier("pet_images")
def train_classifier(filename): import seaborn as sns import matplotlib.pyplot as plt # You can run this code to train your own classifier, but there is a provided pretrained one. # If you'd like to use this, just run "train_classifier(filename)" # to train and save a classifier on the label indices to that filename. # Target all the classes, so that's how many the classifier will learn label_indices = range(40) n_epochs = 3 display_step = 500 lr = 0.001 beta_1 = 0.5 beta_2 = 0.999 image_size = 64 transform = transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) dataloader = DataLoader(CelebA(".", split='train', download=True, transform=transform), batch_size=batch_size, shuffle=True) classifier = Classifier(n_classes=len(label_indices)).to(device) class_opt = torch.optim.Adam(classifier.parameters(), lr=lr, betas=(beta_1, beta_2)) criterion = nn.BCEWithLogitsLoss() cur_step = 0 classifier_losses = [] # classifier_val_losses = [] for epoch in range(n_epochs): # Dataloader returns the batches for real, labels in tqdm(dataloader): real = real.to(device) labels = labels[:, label_indices].to(device).float() class_opt.zero_grad() class_pred = classifier(real) class_loss = criterion(class_pred, labels) class_loss.backward() # Calculate the gradients class_opt.step() # Update the weights classifier_losses += [ class_loss.item() ] # Keep track of the average classifier loss ## Visualization code ## if cur_step % display_step == 0 and cur_step > 0: class_mean = sum( classifier_losses[-display_step:]) / display_step print(f"Step {cur_step}: Classifier loss: {class_mean}") step_bins = 20 x_axis = sorted([ i * step_bins for i in range(len(classifier_losses) // step_bins) ] * step_bins) sns.lineplot(x_axis, classifier_losses[:len(x_axis)], label="Classifier Loss") plt.legend() plt.show() torch.save({"classifier": classifier.state_dict()}, filename) cur_step += 1
p_y = np.array([ 0.04259472890229834, 0.05155736977549028, 0.05075871860857219, 0.05208980388676901, 0.051024935664211554, 0.052533498979501284, 0.051646108794036735, 0.052533498979501284, 0.052888455053687104, 0.0527109770165942, 0.05306593309078002, 0.0527109770165942, 0.05244475996095483, 0.0527109770165942, 0.052622237998047744, 0.05315467210932647, 0.04836276510781791, 0.05004880646020055, 0.04117490460555506, 0.033365870973467035 ]) #read in the training data print("reading the traing data...") print("") tm = training_matrix() nb_c = classifier(tm, p_y) #classifying... really slow because I'm a terrible programmer print( "classifying... this may take a while on UNM machines. It takes like 15s on my personal laptop, but it took like 2 mins on campus" ) print("") #make p_x_given_y matrix nb_c.makeMAPMatrix(alpha) #classify nb_c.classify() print("accuracy: ") print(nb_c.calculateAccuracy(True)) print("classifiers 100 most important words: ") ent = entropy(nb_c, p_y, tm)
def clustering(distance, FileList, sDstDirectory, threshold): classf = classifier(distance, FileList) classf.setThreshold(threshold) classf.clustering(SINGLE_LINKAGE) classf.separateFilesToClusters(sDstDirectory, 1)
create_set('MNIST', './data', lab_classes, 100, unlab_classes, 50) data = dataset(num_lab_classes, './data', 128) secondary_path = main_path + '/loop_' + str(i) if not os.path.exists(secondary_path): os.makedirs(secondary_path) #generator = FCGenerator(data.img_size_x, data.img_size_y, # data.img_size_z) #critic = FCCritic("Critic", data.img_size_x, data.img_size_y, # data.img_size_z, num_lab_classes + 1) network = FCCritic("classifier", data.img_size_x, data.img_size_y, data.img_size_z, num_lab_classes + 1) classify = classifier(network=network, dataset=data, steps=15000) print("TRANING STARTED") temp = classify.call(secondary_path, main_path) del network del classify del data tf.compat.v1.reset_default_graph() print("TRAINING FINISHED") count_labels += 1
def OC_CNN(dataset, model_type, class_number, hyper_para): running_loss, inm, relu, mean, cov, imagenet_mean, imagenet_std, classifier = get_fuv(hyper_para, model_type) if(hyper_para.verbose==True): print('Loading dataset '+dataset+'...') train_data, test_data, test_label = load_dataset(dataset, class_number, imagenet_mean, imagenet_std, hyper_para) if(hyper_para.verbose==True): print(dataset+' dataset loaded.') no_train_data = np.shape(train_data.numpy())[0] no_test_data = np.shape(test_data.numpy())[0] ### choose one network which produces D dimensional features if(hyper_para.verbose==True): print('Loading network '+hyper_para.model_type+'...') model = choose_network(model_type, hyper_para.pre_trained_flag) if(hyper_para.verbose==True): print('Network '+hyper_para.model_type+' loaded.') running_cc = 0.0 running_ls = 0.0 if(hyper_para.gpu_flag): inm.cuda() relu.cuda() model.cuda() classifier.cuda() model.train() classifier.train() ### optimizer for model training (for this work we restrict to only fine-tuning FC layers) if(model_type=='vggface'): model_optimizer = optim.Adam(model[-5:].parameters(), lr=hyper_para.lr) else: model_optimizer = optim.Adam(model.classifier.parameters(), lr=hyper_para.lr) classifier_optimizer = optim.Adam(classifier.parameters(), lr=hyper_para.lr) # loss functions cross_entropy_criterion = nn.CrossEntropyLoss() for i in range(int(hyper_para.iterations)): # for i in range(int(hyper_para.iterations*no_train_data/hyper_para.batch_size)): # print i rand_id = np.asarray(random.sample( range(no_train_data), int(hyper_para.batch_size))) rand_id = torch.from_numpy(rand_id) # get the inputs inputs = torch.autograd.Variable(train_data[rand_id].cuda()).float() # get the labels labels = np.concatenate( (np.zeros( (int(hyper_para.batch_size),) ), np.ones( (int(hyper_para.batch_size),)) ), axis=0) labels = torch.from_numpy(labels) labels = torch.autograd.Variable(labels.cuda()).long() gaussian_data = np.random.normal(0, hyper_para.sigma, (int(hyper_para.batch_size), hyper_para.D)) gaussian_data = torch.from_numpy(gaussian_data) # forward + backward + optimize out1 = model(AddNoise(inputs, hyper_para.sigma1)) out1 = out1.view(int(hyper_para.batch_size), 1, hyper_para.D) out1 = inm(out1) out1 = out1.view(int(hyper_para.batch_size), hyper_para.D) out2 = torch.autograd.Variable(gaussian_data.cuda()).float() out = torch.cat( (out1, out2),0) out = relu(out) out = classifier(out) # zero the parameter gradients model_optimizer.zero_grad() classifier_optimizer.zero_grad() cc = cross_entropy_criterion(out, labels) loss = cc loss.backward() model_optimizer.step() classifier_optimizer.step() # print statistics running_cc += cc.data running_loss += loss.data if(hyper_para.verbose==True): if (i % (hyper_para.stats_freq) == (hyper_para.stats_freq-1)): # print every stats_frequency batches line = hyper_para.BLUE + '[' + str(format(i+1, '8d')) + '/'+ str(format(int(hyper_para.iterations), '8d')) + ']' + hyper_para.ENDC + \ hyper_para.GREEN + ' loss: ' + hyper_para.ENDC + str(format(running_loss/hyper_para.stats_freq, '1.8f')) + \ hyper_para.GREEN + ' cc: ' + hyper_para.ENDC + str(format(running_cc/hyper_para.stats_freq, '1.8f')) print(line) running_loss = 0.0 running_cc = 0.0 classifier.eval() model.eval() relu.eval() area_under_curve, train_features, test_scores, test_features = choose_classifier(dataset, class_number, model_type, model, classifier, D, hyper_para, train_data, test_data, test_label, no_train_data, no_test_data, inm, relu, imagenet_mean, imagenet_std) classifier.cpu() model.cpu() relu.cpu() torch.save(model,'../../save_folder/saved_models/'+dataset+'/model/'+str(class_number)+'/'+model_type +'_'+ str(hyper_para.iterations)+'_'+ str(hyper_para.lr) +'_'+ str(hyper_para.sigma) +'.pth') torch.save(model,'../../save_folder/saved_models/'+dataset+'/classifier/'+str(class_number)+'/'+model_type +'_'+ str(hyper_para.iterations)+'_'+ str(hyper_para.lr) +'_'+ str(hyper_para.sigma) +'.pth') scipy.io.savemat('../../save_folder/results/'+dataset+'/'+ str(class_number) +'/'+ model_type +'_OCCNN123_'+ str(hyper_para.iterations) +'_'+ str(hyper_para.lr) +'_'+ str(hyper_para.sigma) +'.mat', {'auc':area_under_curve, 'train_features':train_features, 'test_scores':test_scores, 'test_features':test_features, 'test_label':test_label }) if(hyper_para.verbose==True): print('model, classifier, features and results saved.') return area_under_curve
+ "\n and was trained for " + str(best_epoch) + " epoches with batch size = " + str(best_b_size) + " and drop out percentage = " + str(best_drop_perc) ) if __name__ == '__main__': # classifiers tuinind parameters neighbors = [1,3,5,7,10,15] slack = [1,.1,.2,.25,2,5,10,20] estimators = [50,100,200] # define net tuinind parameters # drop_perc: vector of lenght 5 defining the percent of dropout at every layer level1_drop = [.2,.25,.28,.3] level2_drop = [.2,.25,.28,.3] level3_drop = [.5,.3,.35,.4] level4_drop = [.3,.35,.4,.45] level5_drop = [.35,.4,.45,5] # get all combinations drop_percs = [level1_drop,level2_drop,level3_drop,level4_drop,level5_drop] drop_percs = list(itertools.product(*drop_percs)) batch_sizes = [16,32,64,128] epochs = [32,65,100,150] num_classes = 10 valid_size = .2 dropout = True data_loader = get_dataset() cnn_ds, ds = data_loader.data_preproc(use_pca=0) clas = classifier() # call CNN CNN_best_perform(drop_percs,batch_sizes,epochs) # call the rest of classifiers clas.classify(ds,neighbors = neighbors,slack = slack,estimators = estimators)