def test_mini_batch(): # Small input output = torch.FloatTensor([[0.1, 0.0], [0.1, 0.0]]) target = torch.LongTensor([0, 0]) assert accuracy(output, target)[0].cpu().numpy() == 100.0 # A bit larger input output = torch.FloatTensor([[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], [0.1, 0.7, 0.3, 0.4, 0.5, 0.6], [0.1, 0.2, 0.8, 0.4, 0.5, 0.6], [0.1, 0.2, 0.3, 0.9, 0.5, 0.6], [0.1, 0.2, 0.3, 0.4, 1.0, 0.6], [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]) target = torch.LongTensor([5, 1, 2, 3, 4, 5]) assert accuracy(output, target)[0].cpu().numpy() == 100.0 # A bit larger input - with not 100% output = torch.FloatTensor([[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], [0.1, 0.7, 0.3, 0.4, 0.5, 0.6], [0.1, 0.2, 0.8, 0.4, 0.5, 0.6], [0.1, 0.2, 0.3, 0.9, 0.5, 0.6], [0.1, 0.2, 0.3, 0.4, 1.0, 0.6], [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]) target = torch.LongTensor([1, 1, 1, 1, 1, 1]) np.testing.assert_almost_equal( accuracy(output, target)[0].cpu().numpy(), 100 / 6.0)
def test_no_batch(): # Sanity check output = torch.FloatTensor([0.0, 0.0]).unsqueeze(0) target = torch.LongTensor([0]) assert accuracy(output, target)[0].cpu().numpy() == 0.0 output = torch.FloatTensor([0.0, 1.0]).unsqueeze(0) target = torch.LongTensor([1]) assert accuracy(output, target)[0].cpu().numpy() == 100.0 output = torch.FloatTensor([0.2, 0.5, 0.7]).unsqueeze(0) target = torch.LongTensor([2]) assert accuracy(output, target)[0].cpu().numpy() == 100.0
def validate_sentences(self, data, sent_sampler, model, transformation): model.eval() x, mask, y = data # x, mask, y = sent_sampler.get_test() true_y = np.zeros(shape=(len(y), len(sent_sampler.unique_labels)), dtype=np.int32) for idx, current_y in enumerate(y): true_y[idx, current_y] = 1 x, mask, y = model.prepare_data_for_classifier(x, mask, y, transformation) if model.is_cuda: x = x.cuda() y = y.cuda() mask = mask.cuda() loss = model.classifier.get_loss(x, mask, y).data.cpu().numpy() probs = model.classifier(x, mask)[1].data.cpu().numpy() pred = np.argmax(probs, axis=1) acc = evaluation.accuracy(predicted_probs=probs, true_y=true_y) prec = {} rec = {} for cls in range(true_y.shape[1]): prec[cls] = evaluation.precision_by_class(probs, true_y, cls) rec[cls] = evaluation.recall_by_class(probs, true_y, cls) return acc, prec, rec, loss, evaluation.build_confusion_matrix( probs, true_y)
def main(): features = [ 'EMA10', 'EMA12', 'EMA20', 'EMA26', 'EMA50', 'EMA100', 'EMA200', 'SMA5', 'SMA10', 'SMA15', 'SMA20', 'SMA50', 'SMA100', 'SMA200', ] label = ['Class'] df_train, df_test = Load_data() Xtrain = df_train[features].values Ytrain = df_train[label].values.ravel() Xtest = df_test[features].values Ytest = df_test[label].values.ravel() svm_poly = SVMModel(2) svm_poly.train(Xtrain, Ytrain) Ypredicted = np.array(svm_poly.predict(Xtest)) print(eval.accuracy(prediction=Ypredicted, true_class=Ytest)) print(Ypredicted)
def test(test_model, test_data, test_labels, show_mistake=False): test_predictions = test_model.predict(test_data, verbose=0) # PRINT WRONG PREDICTIONS if show_mistake: for i in range(len(test_predictions)): stress_probability = test_predictions[i][1] score = abs(test_labels[i][1] - stress_probability) if score > 0: seq = "" for j in range(len(test_data[i])): seq += idx_to_word[test_data[i][j]].strip() + " " print(seq, ",", score, ",", test_labels[i][1], stress_probability) # TEST PERFORMANCE res_accu = eval.accuracy(test_predictions, test_labels) res_f1 = eval.fscore(test_predictions, test_labels) res_recall = eval.recall(test_predictions, test_labels) res_precision = eval.precision(test_predictions, test_labels) print('Test Accuracy: %.3f' % res_accu) print('Test F1-score: %.3f' % res_f1) print('Test Recall: %.3f' % res_recall) print('Test Precision: %.3f' % res_precision) return res_accu, res_f1, res_recall, res_precision
def online_evaluate(gtmat, pred): pred_labels = torch.argmax(pred.cpu(), dim=1).long() gt_labels = gtmat.view(-1).cpu().numpy() pred_labels = pred_labels.numpy() acc = accuracy(gt_labels, pred_labels) pre = precision(gt_labels, pred_labels) rec = recall(gt_labels, pred_labels) return acc, pre, rec
def train(train_loader, model, criterion, optimizer, epoch, print_freq, summary_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(async=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) summary_writer.add_scalar('data/losses_avg', losses.avg, epoch) summary_writer.add_scalar('data/top1_avg', top1.avg, epoch) summary_writer.add_scalar('data/top5_avg', top5.avg, epoch)
def ELM(numberofHiddenNeurons, train, test, ActivationFunction, baseclasser=False): if baseclasser == False: trainStr = ELMDataStruct(train) testStr = ELMDataStruct(test) else: trainStr = train testStr = test #(trainStr.labelsMatrix) #print(testStr.labelsMatrix) beginTrainTime = time() inputWeight = random.random(size=(numberofHiddenNeurons, trainStr.numOfFeature)) * 2 - 1 biasOfHiddenNeurons = random.random(size=(numberofHiddenNeurons, 1)) tempH = inputWeight * trainStr.X.T biasMatrix = tile(biasOfHiddenNeurons, (1, trainStr.numOfData)) tempH = tempH + biasMatrix #到tempH为止size是(隐含层节点数,样本数) print('ActivationFunction:', ActivationFunction) if ActivationFunction == 'rbf': H = getRBF(trainStr.X, inputWeight, biasOfHiddenNeurons, numberofHiddenNeurons) else: H = getH(tempH, ActivationFunction) outputWeight = (linalg.pinv(H.T) * trainStr.labelsMatrix.T) #outputWeight的尺寸:(NumberofHiddenNeurons,numOfClass) endTrainTime = time() trainTime = endTrainTime - beginTrainTime tempTest = inputWeight * testStr.X.T biasMatrixTest = tile(biasOfHiddenNeurons, (1, testStr.numOfData)) tempTest = tempTest + biasMatrixTest if ActivationFunction == 'rbf': H_test = getRBF(testStr.X, inputWeight, biasOfHiddenNeurons, numberofHiddenNeurons) else: H_test = getH(tempTest, ActivationFunction) Y = (H_test.T * outputWeight).A answer = ones((testStr.numOfData, 1)) for k in range(testStr.numOfData): answer[k, 0] = (Y[k, :].tolist().index(max(Y[k, :]))) + 1 acc = accuracy(answer, testStr.y) print('trainTime:', trainTime) gmean, Rn = G_mean(answer, testStr.y, testStr.numOfClass) if baseclasser == True: return answer else: return acc, gmean, Rn, trainTime
def eval_test(self, X, y): out = self.sess.run(self.model.out, feed_dict = {self.model.X : X}) acc = evaluation.accuracy(out, y) print 'Test accuracy:' , acc prec, rec, f1 = evaluation.prec_rec(out, y) print 'prec:' , prec, 'rec:', rec, 'f1:',f1
def evaluation_index(y_pred, y_tar): y_pred_cat = y_pred[0].reshape(-1) y_target_cat = y_tar[0].reshape(-1) for j in range(1, len(y_pred)): y_pred_cat = np.concatenate((y_pred_cat, y_pred[j].reshape(-1))) y_target_cat = np.concatenate((y_target_cat, y_tar[j].reshape(-1))) rmse = compute_RMSE(y_pred_cat, y_target_cat) r = correlation_coefficient(y_pred_cat, y_target_cat) accuracy_rate = accuracy(y_pred_cat, y_target_cat) return rmse, r, accuracy_rate
def evaluate_diso(y_label, y_conv, output, sess): y_placeholder = tf.placeholder(tf.float32, shape=[None, 2]) y_conv_placeholder = tf.placeholder(tf.float32, shape=[None, 2]) result = sess.run([evaluation.loss_cross_entropy(y_placeholder, y_conv_placeholder), evaluation.accuracy(y_placeholder, y_conv_placeholder)], feed_dict={y_placeholder: y_label, y_conv_placeholder: y_conv}) # print("\t Entropy=%g, Accuracy=%g" % (result[0], result[1])) results = [str(res) for res in result] if output != None: output.write('\t'.join(results) + '\n') return result, result[1]
def runExperiment(dataPath, resultPath): epsilons = [0.001,0.005, 0.01, 0.05, 0.1,0.5] fairMeasureCodes = ['RD', 'RR', 'RC'] i=1 text = '' while i<=3: print (i) text+='dataset No.'+str(i)+'\n' text+='---------------------------'+'\n' text+='---------------------------'+'\n' rules, hard_rules, counts, atoms = ground(dataPath+str(i)+'/') for code in fairMeasureCodes: print(code) results = map_inference(rules, hard_rules) accuracyScore = accuracy(dataPath+str(i)+'/', results, atoms) score = evaluate(results, counts, code) text+='----------'+code+'---------------'+'\n' text+='----------PSL--------------'+'\n' line = '' for epsilon in epsilons: text+=str(score)+'\t' line+=str(accuracyScore)+'\t' text+='\n'+line+'\n'+'----------FairPSL----------'+'\n' line = '' for epsilon in epsilons: print(epsilon) results = fair_map_inference(rules, hard_rules, counts, epsilon,code) accuracyScore = accuracy(dataPath+str(i)+'/', results, atoms) line+=str(accuracyScore)+'\t' score = evaluate(results, counts,code) text+=str(score)+'\t' text+='\n' text+=line+'\n' text+='---------------------------'+'\n' text+='---------------------------'+'\n' i+=1 with open(resultPath, 'w') as f: print(text, file=f)
def main(args, config): start_time = time.time() # 1) Read data from database print(20*'=') print('1. Downloading data...') data = download_data(user=args.user, password=args.password,tb_name='sketch.train_data_2') oh = download_data(user=args.user, password=args.password,tb_name='sketch.ode_school') # 2) Data preparation print(20*'=') print('2. Data processing...') data = data_preparation(data, oh) # TODO: args can be the filter of which vars to use train_data, test_data = train_val_test_split(data, config['min_train_cohort'], config['min_test_cohort']) # 3) Model print(20*'=') print('3. Training model...') model = model_dict[config['model']] clf = model(train_data, args=config['hyperparameters']) # 4) Compute metric in validation set print(20*'=') print('4. Evaluation model in validation set...') metric = metric_dict[config['metric']](clf, test_data) print('{}: {}', config['metric'], metric) # We print test and train accuracy train_accuracy = accuracy(clf, train_data) print('Train accuracy: ', train_accuracy) test_accuracy = accuracy(clf, test_data) print('Test accuracy: ', test_accuracy) # 5) Upload result to postgres print(20*'=') print('5. Uploading result to database...') upload_result(config['model_name'], config['metric'], metric, args.user, args.password) print(20*'=') print('Finished in {} seconds'.format(time.time()-start_time))
def bagging_ELM(name, numberofHiddenNeurons, Type='W1', C=64, ActivationFunction='sig'): train, test = loadData(name) shapeOfAnswer = [] numOfBaseClasser = 10 trainStr = ELMDataStruct(train) testStr = ELMDataStruct(test) beginTrainTime = time() for i in range(numOfBaseClasser): print('Begin %d th train' % (i + 1)) baggingTrain = dataBagging(trainStr) baggingTrainStr = ELMDataStruct(baggingTrain) answer = WELM(numberofHiddenNeurons, baggingTrainStr, testStr, Type, ActivationFunction, C, baseclasser=True) if i == 0: answerMatrix = answer shapeOfAnswer = shape(answer) else: answerMatrix = column_stack((answerMatrix, answer)) outputAnswer = zeros((shapeOfAnswer)) endTrainTime = time() trainTime = endTrainTime - beginTrainTime #matrix2CSV_Once(answerMatrix,[]) for j in range(shapeOfAnswer[0]): voteAnswer = 1 maxVoteNum = 0 for k in range(trainStr.numOfClass): voteNum = sum(answerMatrix[j, :] == (k + 1)) if voteNum > maxVoteNum: maxVoteNum = voteNum voteAnswer = k + 1 outputAnswer[j] = voteAnswer #print(outputAnswer) #input() acc = accuracy(answer, testStr.y) print('-' * 20, 'Bagging result', '-' * 20) print('Bagging trainTime:', trainTime) gmean, Rn = G_mean(answer, testStr.y, testStr.numOfClass) print('-' * 20, 'Bagging result', '-' * 20) return acc, gmean, Rn, trainTime
def batch_processor(model, data, train_mode): assert train_mode pred, loss = model(data, return_loss=True) log_vars = OrderedDict() log_vars['loss'] = loss.item() _, _, gt_labels = data # TODO: remove pad_label when computing batch accuracy pred_labels = torch.argmax(pred.cpu(), dim=1).long() gt_labels = gt_labels.cpu().numpy() pred_labels = pred_labels.numpy() log_vars['acc'] = accuracy(gt_labels, pred_labels) outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(data[-1])) return outputs
def task_3_logistic(x, y, x_test, y_test, args): accuracies = [] sizes = np.linspace(10, 200, num=20) N = y.shape[0] for size in sizes: acc = 0 for i in range(50): rand = np.random.randint(int(N), size=int(size)) m = LogisticRegression(x[rand], y[rand]) m.fit(lr=args[0], eps=args[1], regularization=args[2]) pred = m.predict(x_test) cm = evaluation.confusion_matrix(y_test, pred) acc += evaluation.accuracy(cm) accuracies.append(acc/50) return accuracies, sizes
def evaluate_link(class_match_set, class_nonmatch_set, true_match_set, all_comparisons): # Linkage evaluation linkage_result = evaluation.confusion_matrix(class_match_set, class_nonmatch_set, true_match_set, all_comparisons) accuracy = evaluation.accuracy(linkage_result) precision = evaluation.precision(linkage_result) recall = evaluation.recall(linkage_result) fmeasure = evaluation.fmeasure(linkage_result) print('Linkage evaluation:') print(' Accuracy: %.6f' % (accuracy)) print(' Precision: %.6f' % (precision)) print(' Recall: %.6f' % (recall)) print(' F-measure: %.6f' % (fmeasure)) print('')
def task_3_naive(df, test_df, label, cont=[], cat=[], bin=[]): accuracies = [] sizes = np.linspace(10, 200, num=20) N = df.shape[0] for size in sizes: acc = 0 for i in range(25): print(size, i) rand = np.random.randint(int(N), size=int(size)) m = NaiveBayes(df.loc[rand], label, continuous=cont, categorical=cat, binary=bin) pred = test_df.apply(m.predict, axis=1) cm = evaluation.confusion_matrix(test_df[label].to_numpy(), pred.to_numpy()) acc += evaluation.accuracy(cm) accuracies.append(acc/25) return accuracies, sizes
def validation_step(self, batch, batch_idx): metric_dict = { "u": { "loss": 0, "acc": 0, "f1": 0 }, "test": { "loss": 0, "acc": 0, "f1": 0 }, } # Loop through unlabelled and test loaders to calculate metrics # for key, data in batch.items(): x, y = data logits, y_pred, _ = self.D(x) ## Loss ## loss = F.cross_entropy(y_pred, y) self.log(f"{key}/loss", loss) metric_dict[f"{key}"]["loss"] = loss.item() ## Accuracy ## acc = accuracy(y_pred, y) self.log(f"{key}/accuracy", acc) metric_dict[f"{key}"]["acc"] = acc.item() ## F1 score ## f1 = self.f1(y_pred, y) self.log(f"{key}/f1", f1) metric_dict[f"{key}"]["f1"] = f1.item() # Log best value # ## Probability of dataset being real ## p_real = UPSoftmax(logits) self.log(f"{key}/p_real", p_real) return metric_dict
def validation_step(self, batch, batch_idx): # metric_dict = { # "u": {"loss": 0, "acc": 0, "f1": 0}, # "test": {"loss": 0, "acc": 0, "f1": 0}, # } # # Loop through unlabelled and test loaders to calculate metrics # for key, data in batch.items(): x, y = data logits, y_pred, _ = self.D(x) ## Loss ## loss = F.cross_entropy(logits, y) self.log(f"{key}/loss", loss) # metric_dict[f"{key}"]["loss"] = loss.item() ## Accuracy ## acc = accuracy(y_pred, y) self.log(f"{key}/accuracy", acc) # metric_dict[f"{key}"]["acc"] = acc.item() ## F1 score ## f1 = self.f1(y_pred, y) self.log(f"{key}/f1", f1)
def test(test_loader, model, criterion, print_freq): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() #preds = np.zeros((0,7,)) pred_labels = np.zeros([0,]) GT_labels = np.zeros([0,]) for i, (input, target) in enumerate(test_loader): target = target.cuda(async=True) input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # compute output output = model(input_var) loss = criterion(output, target_var) ''' cal_probs = torch.nn.Softmax(dim=0) probs = cal_probs(output) preds = np.concatenate([preds, probs.data.cpu().numpy()], axis=0) ''' # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) _, pred = output.data.topk(1, 1, True, True) pred_labels = np.concatenate([pred_labels, pred.cpu().numpy().flatten()], axis=0) GT_labels = np.concatenate([GT_labels, target.cpu().numpy().flatten()], axis=0) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(test_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) categories = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'] build_confusion_mtx(GT_labels, pred_labels, categories) ''' mean_score, std_score = get_inception_score(preds) print(' * IS: mean {mean_score:.3f} std {std_score:.3f}'.format(mean_score=mean_score, std_score=std_score)) ''' return top1.avg
def main(): # reading in import argparse parser = argparse.ArgumentParser() parser.add_argument("--data_dir", default='data/sampling', help='determine the base dir of the dataset document') parser.add_argument("--sample_n", default=1000, type=int, help='starting image index of preprocessing') parser.add_argument("--evidence_n", default=20, type=int, help='how many top/bottom tiles to pick from') parser.add_argument("--repl_n", default=3, type=int, help='how many resampled replications') parser.add_argument("--image_split", action='store_true', help='if use image_split') parser.add_argument("--batch_size", default=50, type=int, help="batch size") parser.add_argument("--stage_two", action='store_true', help='if only use stage two patients') parser.add_argument("--changhai", action='store_true', help='if use additional data') args = parser.parse_args() feature_size = 32 #gpu = "cuda:0" gpu = None # 5-folds cross validation dataloader = CVDataLoader(args, gpu, feature_size) n_epoch = 800 lr = 0.0005 if args.stage_two: weight_decay = 0.008 else: weight_decay = 0.005 manytimes_n = 8 if not os.path.isdir('figure'): os.mkdir('figure') if not os.path.isdir(os.path.join(args.data_dir, 'model')): os.mkdir(os.path.join(args.data_dir, 'model')) acc_folds = [] auc_folds = [] c_index_folds = [] f1_folds = [] f1_folds_pos = [] total_round = 0 model_count = 0 loss_function = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(0.8)) for _ in range(manytimes_n): # averaging for i in range(5): train_history = [] test_history = [] minimum_loss = None auc_fold = None acc_fold = None early_stop_count = 0 model = Predictor(evidence_size=args.evidence_n, layers=(100, 50, 1), feature_size=feature_size) # model.apply(weight_init) if gpu: model = model.to(gpu) optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay) # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) dataloader.set_fold(i) X_test, Y_test, df_test = dataloader.get_test() # X_train, Y_train, df_train = dataloader.get_train() print('starting fold %d' % i) for epoch in range(n_epoch): #result = model(X_train) #loss = nn.functional.binary_cross_entropy(result, Y_train) + nn.functional.mse_loss(result, Y_train) # loss = nn.functional.mse_loss(result, Y_train) #loss.backward() #optimizer.step() #optimizer.zero_grad() # batch input for X_train_batch, Y_train_batch, df_train_batch in dataloader: # print(X_train_batch.shape) result = model(X_train_batch) loss = loss_function(result, Y_train_batch) loss.backward() optimizer.step() optimizer.zero_grad() X_train, Y_train, df_train = X_train_batch, Y_train_batch, df_train_batch if epoch % 20 == 0: result_test = model(X_test) loss_test = loss_function(result_test, Y_test) #loss_test = nn.functional.mse_loss(result_test, Y_test) acc_train, acc_test = accuracy(result, Y_train), accuracy( result_test, Y_test) auc_train, auc_test = auc(result, Y_train), auc( result_test, Y_test) if args.changhai: c_index_train, c_index_test = 0, 0 else: c_index_train, c_index_test = c_index( result, df_train), c_index(result_test, df_test) recall_train, recall_test = recall(result, Y_train), recall( result_test, Y_test) precision_train, precision_test = precision( result, Y_train), precision(result_test, Y_test) f1_train_pos, f1_test_pos = f1(result, Y_train), f1( result_test, Y_test) f1_train, f1_test = f1(result, Y_train, negative=True), f1(result_test, Y_test, negative=True) train_history.append( (epoch, loss, acc_train, auc_train, c_index_train)) test_history.append( (epoch, loss_test, acc_test, auc_test, c_index_test)) if epoch % 40 == 0: print( "%s epoch:%d loss:%.3f/%.3f acc:%.3f/%.3f auc:%.3f/%.3f c_index:%.3f/%.3f recall:%.3f/%.3f prec:%.3f/%.3f f1:%.3f/%.3f f1(neg):%.3f/%.3f" % (time.strftime( '%m.%d %H:%M:%S', time.localtime( time.time())), epoch, loss, loss_test, acc_train, acc_test, auc_train, auc_test, c_index_train, c_index_test, recall_train, recall_test, precision_train, precision_test, f1_train_pos, f1_test_pos, f1_train, f1_test)) # early stop if minimum_loss is None or minimum_loss * 0.995 > loss_test: # if minimum_loss is None or minimum_loss > loss_test: if f1_train == 0: continue minimum_loss = loss_test auc_fold = auc_test acc_fold = acc_test c_index_fold = c_index_test f1_fold_pos = f1_test_pos f1_fold = f1_test early_stop_count = 0 elif auc_test > auc_fold and auc_test > 0.5 and acc_test >= acc_fold: minimum_loss = loss_test auc_fold = auc_test acc_fold = acc_test c_index_fold = c_index_test f1_fold_pos = f1_test_pos f1_fold = f1_test early_stop_count = 0 else: early_stop_count += 1 if early_stop_count > 2 and epoch > 100: if args.stage_two: if auc_fold > 0.55: print('early stop at epoch %d' % epoch) break elif early_stop_count > 3: print('early stop at epoch %d' % epoch) break if epoch > 500: optimizer = torch.optim.RMSprop( model.parameters(), lr * 0.6, weight_decay=weight_decay * 1.2) train_history = np.array(train_history) test_history = np.array(test_history) acc_folds.append(acc_fold) auc_folds.append(auc_fold) f1_folds.append(f1_fold) f1_folds_pos.append(f1_fold_pos) c_index_folds.append(c_index_fold) plt.plot(train_history[:, 0], train_history[:, 1], label='train') plt.plot(test_history[:, 0], test_history[:, 1], label='test') plt.legend() plt.savefig('figure/sample_%d_fold%d.png' % (args.sample_n, i)) plt.cla() if acc_fold > 0.7 and auc_fold > 0.6 and model_count < 10: model.save(args.data_dir + "/model/model_%d" % model_count) model_count += 1 print("acc:%.3f\tauc:%.3f\tc_index:%.3f\tf1:%.3f" % (acc_fold, auc_fold, c_index_fold, f1_fold)) total_round += 1 if gpu: del dataloader.X_train, dataloader.Y_train, dataloader.X_test, dataloader.Y_test del X_test, Y_test, X_train, Y_train, model, optimizer torch.cuda.empty_cache() print('CV-acc:%.3f CV-auc:%.3f CV-c-index:%.3f f1:%.3f f1(neg):%.3f' % (sum(acc_folds) / 5 / manytimes_n, sum(auc_folds) / 5 / manytimes_n, sum(c_index_folds) / 5 / manytimes_n, sum(f1_folds_pos) / 5 / manytimes_n, sum(f1_folds) / 5 / manytimes_n))
pc = evaluation.pairs_completeness(cand_rec_id_pair_list, true_match_set) pq = evaluation.pairs_quality(cand_rec_id_pair_list, true_match_set) print('Blocking evaluation:') print(' Reduction ratio: %.3f' % (rr)) print(' Pairs completeness: %.3f' % (pc)) print(' Pairs quality: %.3f' % (pq)) print('') # Linkage evaluation # linkage_result = evaluation.confusion_matrix(class_match_set, class_nonmatch_set, true_match_set, all_comparisons) accuracy = evaluation.accuracy(linkage_result) precision = evaluation.precision(linkage_result) recall = evaluation.recall(linkage_result) fmeasure = evaluation.fmeasure(linkage_result) print('Linkage evaluation:') print(' Accuracy: %.3f' % (accuracy)) print(' Precision: %.3f' % (precision)) print(' Recall: %.3f' % (recall)) print(' F-measure: %.3f' % (fmeasure)) print('') linkage_time = loading_time + blocking_time + comparison_time + \ classification_time print('Total runtime required for linkage: %.3f sec' % (linkage_time))
# Get cross validation accuracy for 5-fold cv print("Ionosphere validation accuracy (default parameters):") evaluation.cross_validation(5, ionosphere_train_features, ionosphere_train_labels, model=LogisticRegression) # Grid search for optimal hyperparameters print("Ionosphere grid search hyperparameters:") ionosphere_max_val_acc, ionosphere_arg_max = evaluation.grid_search(learning_rates=lrs, epsilons=eps, lambdas=lamdas, x=ionosphere_train_features, y=ionosphere_train_labels, model=LogisticRegression) # Accuracy on test split - train with best hyperparameters print("Ionosphere test accuracy:") logistic_ionosphere = LogisticRegression(ionosphere_train_features, ionosphere_train_labels) logistic_ionosphere.fit(lr=ionosphere_arg_max[0], eps=ionosphere_arg_max[1], regularization=ionosphere_arg_max[2]) ionosphere_prediction = logistic_ionosphere.predict(ionosphere_test_features) cm_ionosphere = evaluation.confusion_matrix(ionosphere_test_labels, ionosphere_prediction) print("Accuracy:", evaluation.accuracy(cm_ionosphere), "Precision:", evaluation.precision(cm_ionosphere), "Recall:", evaluation.true_positive(cm_ionosphere), "F1:", evaluation.f_score(cm_ionosphere)) # 5-fold CV for naive bayes print("Ionosphere validation accuracy (naive bayes):") evaluation.cross_validation_naive(5, ionosphere_dataset.train_data, NaiveBayes, ionosphere_dataset.label_column, ionosphere_dataset.feature_columns) naive_ionosphere = NaiveBayes(ionosphere_dataset.train_data, ionosphere_dataset.label_column, continuous=ionosphere_dataset.feature_columns) print("Ionosphere test accuracy (naive bayes):") ionosphere_pred_naive = ionosphere_dataset.test_data.apply(naive_ionosphere.predict, axis=1) cm_ionosphere_naive = evaluation.confusion_matrix(ionosphere_test_labels, ionosphere_pred_naive.to_numpy()) print("Accuracy:", evaluation.accuracy(cm_ionosphere_naive), "Precision:", evaluation.precision(cm_ionosphere_naive), "Recall:", evaluation.true_positive(cm_ionosphere_naive), "F1:", evaluation.f_score(cm_ionosphere_naive)) # Abalone -----
mean_b1 = 0 mean_b2 = 0 mean_b3 = 0 for i, file in enumerate(files): K.clear_session() model = load_model(save_path + '/' + file, custom_objects={ "Flip_Attention": Flip_Attention, "lossFunction": customLoss(K.variable(np.ones((1, 1))), 0.2) }) evaluate = model.evaluate(x_test, y_test) score = model.predict(test_embed) mean_acc += evaluate[1] bais_acc1 = accuracy(score, label, 0.45, 0.55) mean_b1 += float(bais_acc1) bais_acc2 = accuracy(score, label, 0.40, 0.60) mean_b2 += float(bais_acc2) bais_acc3 = accuracy(score, label, 0.35, 0.65) mean_b3 += float(bais_acc3) print(file, evaluate[1], bais_acc1, bais_acc2, bais_acc3) # if evaluate[1] > max_acc: # max_acc = evaluate[1] # best_name = file del model gc.collect() logging.info(file + ' ' + str(evaluate[1]) + ' ' + str(bais_acc1) + str(bais_acc2) + str(bais_acc3)) print('mean_acc:', mean_acc / times) print('mean_b1:', mean_b1 / times)
def train(self, epochs=10): if self.__xtrain and self.__ytrain and self.__xtest and self.__ytest: pass else: self.__load_dataset() # Open a writer to write summaries. self.__writer = tf.summary.FileWriter(self.__TMP_DIR, self.__session.graph) for epoch in range(epochs): #learning_rate = self.__session.run(self.__lr) #print('Learning rate', learning_rate) average_loss = 0 num_steps = len(self.__flow) for step in tqdm.tqdm(range(num_steps), desc='Epoch ' + str(epoch + 1 + self.__GLOBAL_EPOCH) + '/' + str(epochs + self.__GLOBAL_EPOCH)): batch, label = self.__flow.next() run_metadata = tf.RunMetadata() _, l = self.__session.run([self.__train_op, self.__loss], feed_dict={ self.__images: batch, self.__labels: label }, run_metadata=run_metadata) average_loss += l # print loss and accuracy on test set at the and of each epoch if step == num_steps - 1: y_true = [] y_pred = [] for i in range(len(self.__xtest)): prediction = self.__session.run( self.__labels_predicted, feed_dict={self.__images: [self.__xtest[i]]}, run_metadata=run_metadata) y_true.append(self.__ytest[i]) y_pred.append(prediction[0]) accuracy = ev.accuracy(y_true, y_pred) print('Loss:', str(average_loss / step), '\tAccuracy:', accuracy) with open(self.__TMP_DIR + '/log.txt', 'a', encoding='utf8') as f: f.write( str(accuracy) + ' ' + str(average_loss / step) + '\n') if step == (num_steps - 1) and epoch + 1 == epochs: s = self.__session.run(self.__global_step) self.__writer.add_run_metadata(run_metadata, 'step%d' % step, global_step=s) self.__saver.save(self.__session, os.path.join(self.__TMP_DIR, 'model.ckpt')) dp.global_epoch(self.__TMP_DIR + 'epoch.txt', update=self.__GLOBAL_EPOCH + epochs) self.__writer.close() pg.generate_accuracy_plot(data_dir=self.__TMP_DIR) pg.generate_loss_plot(data_dir=self.__TMP_DIR) conf_mat = ev.confusion_matrix(y_true, y_pred, len(self.__SENTIMENTS)) pg.generate_confusion_matrix_plot(conf_mat, self.__SENTIMENTS, data_dir=self.__TMP_DIR) pg.generate_confusion_matrix_plot(conf_mat, self.__SENTIMENTS, normalize=True, data_dir=self.__TMP_DIR)
def run_scheme(scheme, descriptor_type, descriptor_param, num_clusters, clf_params, plotGraphs, PCAon, num_cols): print "Running scheme with the following parameters: " print "Scheme num: " + str(scheme) + ", BoVW: num_clusters=" + str(num_clusters) +\ "; SVM: params:" + str(clf_params) + ";\n plotGraphs=" + str(plotGraphs) +\ "; PCA_on=" + str(PCAon) start = time.time() # 1) Read the train and test files train_images_filenames = cPickle.load( open('train_images_filenames.dat', 'r')) test_images_filenames = cPickle.load(open('test_images_filenames.dat', 'r')) train_labels = cPickle.load(open('train_labels.dat', 'r')) test_labels = cPickle.load(open('test_labels.dat', 'r')) print 'Loaded ' + str( len(train_images_filenames)) + ' training images filenames\ with classes ', set(train_labels) print 'Loaded ' + str( len(test_images_filenames)) + ' testing images filenames\ with classes ', set(test_labels) # 2) Extract features (train) D, Train_descriptors, kpt_dense, pca_train, sclr_train = computeTraining_descriptors( descriptor_type, descriptor_param, train_images_filenames, train_labels, PCAon, num_cols) # 3) Reduce number of features by PCA (reducing m=128 cols) # Computed internally in computeTraining_descriptors() # 4) Compute codebook codebook = computeCodebook(num_clusters, D, descriptor_type, descriptor_param, PCAon) # 5) Get training BoVW train_VW = getBoVW_train(codebook, num_clusters, Train_descriptors) # 6) Train SVM clf, train_scaler, D_scaled = clf_train(train_VW, train_labels, clf_params) # 7) Get test BoVW test_VW = getBoVW_test(codebook, num_clusters, test_images_filenames, descriptor_type, descriptor_param, kpt_dense, PCAon, pca_train, sclr_train) # 8) Get evaluation (accuracy, f-score, graphs, etc.) predictions = clf_predict(clf, clf_params, train_scaler, test_VW, D_scaled) # Get metrics and graphs: # We need to implement our own for latter integration with the rest of the project # Accuracy, F-score (multi-class=> average? add up?) acc = accuracy(test_labels, predictions) prec = precision(test_labels, predictions) rec = recall(test_labels, predictions) f1sc = f1score(test_labels, predictions) cm = confusionMatrix(test_labels, predictions) hits, misses = HitsAndMisses(cm) print "Confusion matrix:\n" print(str(cm)) print("\n") print "Results (metrics):\n" + "Accuracy= {:04.2f}%\n" \ "Precision= {:04.2f}%\n" \ "Recall= {:04.2f}%\n" \ "F1-score= {:04.2f}%\n" \ "Hits(TP)={:d}\n" \ "Misses(FN)={:d}\n".format( 100*acc, 100*prec, 100*rec, 100*f1sc, hits, misses) print("\n") if plotGraphs: # Plot confusion matrix (and any other graph) print "Plotting confusion matrix..." plotConfusionMatrix(cm, test_labels) end = time.time() print 'Everything done in ' + str(end - start) + ' secs.'
zh = energy_multi_random(X, z, 30) a = accuracy(z, zh) print "Random / Energy:", a """ for d in [5, 10, 15, 20, 25, 30, 50, 100, 200, 300, 500, 1000, 2000, 5000]: n = 1000 m1 = np.zeros(d) m1[range(0, d, 2)] = 1 s1 = np.eye(d) m2 = np.zeros(d) m2[range(0, d, 2)] = -1 s2 = np.eye(d) X, z = two_gaussians(m1, s1, m2, s2, n) zh = kmeans(X) a_kmeans = accuracy(z, zh) Y = pca_projection(X) zh = kmeans(Y) a_pca = accuracy(z, zh) zh = kmeans_multi_random(X, z, 100) a_krandom = accuracy(z, zh) zh = energy_multi_random(X, z, 100) a_erandom = accuracy(z, zh) print "%i & %f & %f & %f & %f \\\\" % (d, a_kmeans, a_pca, a_krandom, a_erandom)
num_of_documents = args.qDocs for p_doc in list(enumerate(corpora.positives[:num_of_documents])): print "extracting ngrams from positive documents" print p_doc[0] pp.extract_ngrams(p_doc[1], stopwords=args.stopwords) clear() for n_doc in list(enumerate(corpora.negatives[:num_of_documents])): print "extracting ngrams from negative documents" print n_doc[0] pp.extract_ngrams(n_doc[1], stopwords=args.stopwords) clear() print "____________________CLASSIFICATION STAGE____________________" all_documents = corpora.positives[:num_of_documents] + corpora.negatives[:num_of_documents] classifier = classification.OhanaBrendan(all_documents) classifier.rule = args.tags classifier.term_counting() print "____________________EVALUATION STAGE____________________" print args print print "Precision" print str(eval.precision(len(corpora.positives[:num_of_documents]), corpora.negatives[:num_of_documents]) * decimal.Decimal(100)) + ' %' print "Recall" print str(eval.recall(len(corpora.positives[:num_of_documents]), corpora.positives[:num_of_documents]) * decimal.Decimal(100)) + ' %' print "Accuracy" print str(eval.accuracy(len(corpora.positives), len(corpora.negatives), all_documents) * decimal.Decimal(100)) + ' %'
def test_gcn_e(model, cfg, logger): for k, v in cfg.model['kwargs'].items(): setattr(cfg.test_data, k, v) dataset = build_dataset(cfg.model['type'], cfg.test_data) pred_peaks = dataset.peaks pred_dist2peak = dataset.dist2peak ofn_pred = osp.join(cfg.work_dir, 'pred_conns.npz') if osp.isfile(ofn_pred) and not cfg.force: data = np.load(ofn_pred) pred_conns = data['pred_conns'] inst_num = data['inst_num'] if inst_num != dataset.inst_num: logger.warn( 'instance number in {} is different from dataset: {} vs {}'. format(ofn_pred, inst_num, len(dataset))) else: if cfg.random_conns: pred_conns = [] for nbr, dist, idx in zip(dataset.subset_nbrs, dataset.subset_dists, dataset.subset_idxs): for _ in range(cfg.max_conn): pred_rel_nbr = np.random.choice(np.arange(len(nbr))) pred_abs_nbr = nbr[pred_rel_nbr] pred_peaks[idx].append(pred_abs_nbr) pred_dist2peak[idx].append(dist[pred_rel_nbr]) pred_conns.append(pred_rel_nbr) pred_conns = np.array(pred_conns) else: pred_conns = test(model, dataset, cfg, logger) for pred_rel_nbr, nbr, dist, idx in zip(pred_conns, dataset.subset_nbrs, dataset.subset_dists, dataset.subset_idxs): pred_abs_nbr = nbr[pred_rel_nbr] pred_peaks[idx].extend(pred_abs_nbr) pred_dist2peak[idx].extend(dist[pred_rel_nbr]) inst_num = dataset.inst_num if len(pred_conns) > 0: logger.info( 'pred_conns (nbr order): mean({:.1f}), max({}), min({})'.format( pred_conns.mean(), pred_conns.max(), pred_conns.min())) if not dataset.ignore_label and cfg.eval_interim: subset_gt_labels = dataset.subset_gt_labels for i in range(cfg.max_conn): pred_peaks_labels = np.array([ dataset.idx2lb[pred_peaks[idx][i]] for idx in dataset.subset_idxs ]) acc = accuracy(pred_peaks_labels, subset_gt_labels) logger.info( '[{}-th] accuracy of pred_peaks labels ({}): {:.4f}'.format( i, len(pred_peaks_labels), acc)) # the rule for nearest nbr is only appropriate when nbrs is sorted nearest_idxs = np.where(pred_conns[:, i] == 0)[0] acc = accuracy(pred_peaks_labels[nearest_idxs], subset_gt_labels[nearest_idxs]) logger.info( '[{}-th] accuracy of pred labels (nearest: {}): {:.4f}'.format( i, len(nearest_idxs), acc)) not_nearest_idxs = np.where(pred_conns[:, i] > 0)[0] acc = accuracy(pred_peaks_labels[not_nearest_idxs], subset_gt_labels[not_nearest_idxs]) logger.info( '[{}-th] accuracy of pred labels (not nearest: {}): {:.4f}'. format(i, len(not_nearest_idxs), acc)) with Timer('Peaks to clusters (th_cut={})'.format(cfg.tau)): pred_labels = peaks_to_labels(pred_peaks, pred_dist2peak, cfg.tau, inst_num) if cfg.save_output: logger.info( 'save predicted connectivity and labels to {}'.format(ofn_pred)) if not osp.isfile(ofn_pred) or cfg.force: np.savez_compressed(ofn_pred, pred_conns=pred_conns, inst_num=inst_num) # save clustering results idx2lb = list2dict(pred_labels, ignore_value=-1) folder = '{}_gcne_k_{}_th_{}_ig_{}'.format(cfg.test_name, cfg.knn, cfg.th_sim, cfg.test_data.ignore_ratio) opath_pred_labels = osp.join(cfg.work_dir, folder, 'tau_{}_pred_labels.txt'.format(cfg.tau)) mkdir_if_no_exists(opath_pred_labels) write_meta(opath_pred_labels, idx2lb, inst_num=inst_num) # evaluation if not dataset.ignore_label: print('==> evaluation') for metric in cfg.metrics: evaluate(dataset.gt_labels, pred_labels, metric) # H and C-scores gt_dict = {} pred_dict = {} for i in range(len(dataset.gt_labels)): gt_dict[str(i)] = dataset.gt_labels[i] pred_dict[str(i)] = pred_labels[i] bm = ClusteringBenchmark(gt_dict) scores = bm.evaluate_vmeasure(pred_dict) # fmi_scores = bm.evaluate_fowlkes_mallows_score(pred_dict) print(scores)
print print "Some documents couldn't be predicted, then they was assigned with None and will not be evaluated" positive_docs_non_predicted = 0 list_of_true_negative_documents = [] for tn in corpora.negatives[:num_of_documents]: if tn.predicted_polarity: list_of_true_negative_documents.append(tn) else: positive_docs_non_predicted += 1 negative_docs_non_predicted = 0 list_of_true_positive_documents = [] for tp in corpora.positives[:num_of_documents]: if tp.predicted_polarity: list_of_true_positive_documents.append(tp) else: negative_docs_non_predicted += 1 print "Positive docs non predicted: " + str(positive_docs_non_predicted) print "Negative docs non predicted: " + str(negative_docs_non_predicted) print print "Precision" print str(eval.precision(len(corpora.positives[:num_of_documents]), list_of_true_negative_documents, ref=0.5) * decimal.Decimal(100)) + ' %' print "Recall" print str(eval.recall(len(corpora.positives[:num_of_documents]), list_of_true_positive_documents, ref=0.5) * decimal.Decimal(100)) + ' %' print "Accuracy" print str(eval.accuracy(len(corpora.positives), len(corpora.negatives), list_of_true_positive_documents + list_of_true_negative_documents, ref=0.5) * decimal.Decimal(100)) + ' %'
def test_gcn_v(model, cfg, logger): for k, v in cfg.model['kwargs'].items(): setattr(cfg.test_data, k, v) dataset = build_dataset(cfg.model['type'], cfg.test_data) folder = '{}_gcnv_k_{}_th_{}'.format(cfg.test_name, cfg.knn, cfg.th_sim) oprefix = osp.join(cfg.work_dir, folder) oname = osp.basename(rm_suffix(cfg.load_from)) opath_pred_confs = osp.join(oprefix, 'pred_confs', '{}.npz'.format(oname)) if osp.isfile(opath_pred_confs) and not cfg.force: data = np.load(opath_pred_confs) pred_confs = data['pred_confs'] inst_num = data['inst_num'] if inst_num != dataset.inst_num: logger.warn( 'instance number in {} is different from dataset: {} vs {}'. format(opath_pred_confs, inst_num, len(dataset))) else: pred_confs, gcn_feat = test(model, dataset, cfg, logger) inst_num = dataset.inst_num logger.info('pred_confs: mean({:.4f}). max({:.4f}), min({:.4f})'.format( pred_confs.mean(), pred_confs.max(), pred_confs.min())) logger.info('Convert to cluster') with Timer('Predition to peaks'): pred_dist2peak, pred_peaks = confidence_to_peaks( dataset.dists, dataset.nbrs, pred_confs, cfg.max_conn) if not dataset.ignore_label and cfg.eval_interim: # evaluate the intermediate results for i in range(cfg.max_conn): num = len(dataset.peaks) pred_peaks_i = np.arange(num) peaks_i = np.arange(num) for j in range(num): if len(pred_peaks[j]) > i: pred_peaks_i[j] = pred_peaks[j][i] if len(dataset.peaks[j]) > i: peaks_i[j] = dataset.peaks[j][i] acc = accuracy(pred_peaks_i, peaks_i) logger.info('[{}-th conn] accuracy of peak match: {:.4f}'.format( i + 1, acc)) acc = 0. for idx, peak in enumerate(pred_peaks_i): acc += int(dataset.idx2lb[peak] == dataset.idx2lb[idx]) acc /= len(pred_peaks_i) logger.info( '[{}-th conn] accuracy of peak label match: {:.4f}'.format( i + 1, acc)) with Timer('Peaks to clusters (th_cut={})'.format(cfg.tau_0)): pred_labels = peaks_to_labels(pred_peaks, pred_dist2peak, cfg.tau_0, inst_num) if cfg.save_output: logger.info('save predicted confs to {}'.format(opath_pred_confs)) mkdir_if_no_exists(opath_pred_confs) np.savez_compressed(opath_pred_confs, pred_confs=pred_confs, inst_num=inst_num) # save clustering results idx2lb = list2dict(pred_labels, ignore_value=-1) opath_pred_labels = osp.join( cfg.work_dir, folder, 'tau_{}_pred_labels.txt'.format(cfg.tau_0)) logger.info('save predicted labels to {}'.format(opath_pred_labels)) mkdir_if_no_exists(opath_pred_labels) write_meta(opath_pred_labels, idx2lb, inst_num=inst_num) # evaluation if not dataset.ignore_label: print('==> evaluation') for metric in cfg.metrics: evaluate(dataset.gt_labels, pred_labels, metric) if cfg.use_gcn_feat: # gcn_feat is saved to disk for GCN-E opath_feat = osp.join(oprefix, 'features', '{}.bin'.format(oname)) if not osp.isfile(opath_feat) or cfg.force: mkdir_if_no_exists(opath_feat) write_feat(opath_feat, gcn_feat) name = rm_suffix(osp.basename(opath_feat)) prefix = oprefix ds = BasicDataset(name=name, prefix=prefix, dim=cfg.model['kwargs']['nhid'], normalize=True) ds.info() # use top embedding of GCN to rebuild the kNN graph with Timer('connect to higher confidence with use_gcn_feat'): knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, ds.features, cfg.knn_method, cfg.knn, is_rebuild=True) dists, nbrs = knns2ordered_nbrs(knns) pred_dist2peak, pred_peaks = confidence_to_peaks( dists, nbrs, pred_confs, cfg.max_conn) pred_labels = peaks_to_labels(pred_peaks, pred_dist2peak, cfg.tau, inst_num) # save clustering results if cfg.save_output: oname_meta = '{}_gcn_feat'.format(name) opath_pred_labels = osp.join( oprefix, oname_meta, 'tau_{}_pred_labels.txt'.format(cfg.tau)) mkdir_if_no_exists(opath_pred_labels) idx2lb = list2dict(pred_labels, ignore_value=-1) write_meta(opath_pred_labels, idx2lb, inst_num=inst_num) # evaluation if not dataset.ignore_label: print('==> evaluation') for metric in cfg.metrics: evaluate(dataset.gt_labels, pred_labels, metric) import json import os import pdb pdb.set_trace() img_labels = json.load( open(r'/home/finn/research/data/clustering_data/test_index.json', 'r', encoding='utf-8')) import shutil output = r'/home/finn/research/data/clustering_data/mr_gcn_output' for label in set(pred_labels): if not os.path.exists(os.path.join(output, f'cluter_{label}')): os.mkdir(os.path.join(output, f'cluter_{label}')) for image in img_labels: shutil.copy2( image, os.path.join( os.path.join(output, f'cluter_{pred_labels[img_labels[image]]}'), os.path.split(image)[-1]))