def train(epoch): t = time.time() model.train() optimizer.zero_grad() output = model(features, adj) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() if not args.fastmode: # Evaluate validation set performance separately, # deactivates dropout during validation run. model.eval() output = model(features, adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) print('Epoch: {:04d}'.format(epoch+1), 'loss_train: {:.4f}'.format(loss_train.data[0]), 'acc_train: {:.4f}'.format(acc_train.data[0]), 'loss_val: {:.4f}'.format(loss_val.data[0]), 'acc_val: {:.4f}'.format(acc_val.data[0]), 'time: {:.4f}s'.format(time.time() - t)) return loss_val.data[0]
def main(): logging.info("[Normalized + Feature Selection] Features: Mean, Std") print "Reading data..." X, Y = utils.read_data("../files/train.csv") print "Preprocessing..." X = preprocess(X) print "Extracting Features..." X = extractFeatures(X) Y = [int(x) for x in Y] X, Y = np.array(X), np.array(Y) classMap = sorted(list(set(Y))) accs = [] rf = RandomForestClassifier(n_estimators=1000, n_jobs=-1) logging.info(rf) print "Selecting Features..." X = selectFeatures(X, Y, rf) folds = 5 stf = cross_validation.StratifiedKFold(Y, folds) logging.info("CV Folds: " + str(folds)) loss = [] print "Testing..." for i, (train, test) in enumerate(stf): X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] rf.fit(X_train, y_train) predicted = rf.predict(X_test) probs = rf.predict_proba(X_test) probs = [[min(max(x, 0.001), 0.999) for x in y] for y in probs] loss.append(utils.logloss(probs, y_test, classMap)) accs.append(utils.accuracy(predicted, y_test)) logging.info("Accuracy(Fold {0}): ".format(i) + str(accs[len(accs) - 1])) logging.info("Loss(Fold {0}): ".format(i) + str(loss[len(loss) - 1])) logging.info("Mean Accuracy: " + str(np.mean(accs))) logging.info("Mean Loss: " + str(np.mean(loss)))
def run_model(train_file, train_labfile, test_file=None, valid_ratio=0.1, batchsize=240, epoch=10, neurons=36, n_hiddenlayer=2, lr=1e-2, base_dir='../Data/', save_prob=False, dropout_rate=0.2): """Run the deep neural network with droput""" print("Start") st = datetime.now() data = load_data(base_dir + train_file) label_data, label_map = load_label(base_dir + train_labfile) # window size = 9, output = 48 phonemes n_input = data.shape[1] * 9 n_output = 48 N = int(data.shape[0] * (1 - valid_ratio)) print("Done loading data. Start constructing the model...") functions = construct_DNN(n_input, n_output, archi=neurons, n_hid_layers=n_hiddenlayer, lr=lr, dropout_rate=dropout_rate) gradient_update, feed_forward = functions print("Finish constructing the model. Start Training...") result = train_model(N, epoch, batchsize, gradient_update, feed_forward, data, label_data, n_output, dropout_rate) obj_history, valid_accu, cache = result # train accuracy train_accu = accuracy(0, N, data, feed_forward, n_output, label_data, cache, dropout_rate) print("Training Accuracy: %.4f %%" % (100 * train_accu)) # validation valid_accu = accuracy(N, data.shape[0], data, feed_forward, n_output, label_data, cache, dropout_rate) print("Validation Accuracy: %.4f %%" % (100 * valid_accu)) if save_prob: accuracy(0, data.shape[0], data, feed_forward, n_output, label_data, cache, dropout_rate, save_pred=True, save_name='ytrain_prob') if test_file: test_predict(base_dir + test_file, label_map, feed_forward, base_dir, dropout_rate, save_prob=save_prob) print("Done, Using %s." % str(datetime.now() - st))
def compute_test(): model.eval() output = model(features, adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.data[0]), "accuracy= {:.4f}".format(acc_test.data[0]))
def train_model(N, epoch, batchsize, gradient_update, feed_forward, data, label_data, n_output, dropout_rate): """train the deep neural network""" train_start = datetime.now() obj_history = [] valid_accu = [] cache = {} for j in range(epoch): indexes = np.random.permutation(N - 8) objective = 0 # train the model for i in range(int(N / batchsize)): if i % 1000 == 0: gc.collect() # make the minibatch data use_inds = indexes[i * batchsize:(i + 1) * batchsize] + 4 batch_X = [] for ind in use_inds: if ind < 4: sils = np.zeros((4 - ind) * data.shape[1]) dat = data.iloc[:(ind + 5)].values.ravel() batch_X.append(np.concatenate((sils, dat))) elif ind > (N - 5): dat = data.iloc[(ind - 4):].values.ravel() sils = np.zeros((5 - N + ind) * data.shape[1]) batch_X.append(np.concatenate((dat, sils))) else: dat = data.iloc[(ind - 4):(ind + 5)].values.ravel() batch_X.append(dat) batch_Y = [gen_y_hat(ind, n_output, data, label_data, cache) for ind in use_inds] # update the model objective += gradient_update(batch_X, batch_Y, 1) obj_history.append(objective / int(N / batchsize)) print('\tepoch: %d; obj: %.4f' % (j + 1, obj_history[-1])) # validation set valid_accu.append(accuracy(N, data.shape[0], data, feed_forward, n_output, label_data, cache, dropout_rate)) print("\tCost: %.4f; valid accu: %.2f %%, %.4f seconds used.\n" % (obj_history[-1], 100 * valid_accu[-1], (datetime.now() - train_start).total_seconds())) # early stop if (valid_accu[0] != valid_accu[-1]): if valid_accu[-2] * 0.98 > valid_accu[-1]: print("Validation accuracy starts decreasing, stop training") break return obj_history, valid_accu, cache
def calculate_accuracy(self, y): acc = 0 for i in range(0, self.nc): c = utils.mode(y[self.z[i]]) yhat = mat.repmat(c, len(self.z[i]), 1) acc = acc + len(self.z[i]) * utils.accuracy(y[self.z[i]], yhat) acc = acc / self.n return acc
def train(train_loader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(train_loader)) for batch_idx, (inputs, targets) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def calculate_accuracy(self, y): acc = 0 for i in range(0, self.k): ind = np.where(self.c == i)[0] c = utils.mode(y[ind]) yhat = mat.repmat(c, ind.size, 1) acc = acc + ind.size * utils.accuracy(y[ind], yhat) acc = acc / self.n return acc
def test_classify_all(self): m,n = self.X.shape _,K = self.y.shape X = np.hstack(( np.ones((m,1)), self.X )) initial_theta = np.zeros((n+1,K)) lamda = 0.1 theta = classifyall(initial_theta, X, self.y, lamda) hypo = hypothesis(X, theta) predicted_y = hypo.argmax(axis=1) expected_y = np.array([ d if d!=10 else 0 for d in self.data['y'].reshape(-1)]) acc = accuracy(predicted_y,expected_y) self.assertAlmostEqual(acc, 94.9, places=0) # I can't get 94.9, only 94.64.... close enough I guess
def test_prediction(self): m,_ = self.X.shape X = np.hstack(( np.ones((m,1)), self.X )) predictions = feedforward(X,self.theta1,self.theta2) # because self.y uses 10 for 0, so the vectorized y representation is shifted. # if y=10, the output layer will look like [0,0,0,0,0,0,0,0,0,1], so argmax == 9 (i.e. 10 in octave/matlab, which represents class 0) # if y=1 , the output layer will look like [1,0,0,0,0,0,0,0,0,0], so argmax == 0 (i.e. 1 in octave/matlab, which represents class 1) # so the fix is, we minus 1 on all elements on y, so the argmax will be 0 indexed, which is good for python. expected = (self.y - 1).reshape(-1) acc = accuracy(predictions,expected) self.assertAlmostEqual(acc, 97.5, places=1)
def test_neuralnetwork(self): lamda = 1.0 epsilon = 0.12 initial_theta1 = generate_theta(self.theta1.shape, epsilon) initial_theta2 = generate_theta(self.theta2.shape, epsilon) initial_theta = unrolltheta(initial_theta1, initial_theta2) optimized_theta1, optimized_theta2 = nnTrain(initial_theta, self.s_1, self.s_2, self.K, self.X, self.y, lamda) predictions = nnPredict(optimized_theta1, optimized_theta2, self.X) expected = (self.orig_y - 1).reshape(-1) acc = accuracy(predictions,expected) print "Accuracy: {} (Should be around 95%, plus or minus 1% due to random initialization)".format(acc) self.assertGreater(acc, 93)
def dann_loss(source_samples, target_samples, weight, scope=None): """Adds the domain adversarial (DANN) loss. Args: source_samples: a tensor of shape [num_samples, num_features]. target_samples: a tensor of shape [num_samples, num_features]. weight: the weight of the loss. scope: optional name scope for summary tags. Returns: a scalar tensor representing the correlation loss value. """ with tf.variable_scope('dann'): batch_size = tf.shape(source_samples)[0] samples = tf.concat(axis=0, values=[source_samples, target_samples]) samples = slim.flatten(samples) domain_selection_mask = tf.concat( axis=0, values=[tf.zeros((batch_size, 1)), tf.ones((batch_size, 1))]) # Perform the gradient reversal and be careful with the shape. grl = grl_ops.gradient_reversal(samples) grl = tf.reshape(grl, (-1, samples.get_shape().as_list()[1])) grl = slim.fully_connected(grl, 100, scope='fc1') logits = slim.fully_connected(grl, 1, activation_fn=None, scope='fc2') domain_predictions = tf.sigmoid(logits) domain_loss = tf.losses.log_loss( domain_selection_mask, domain_predictions, weights=weight) domain_accuracy = utils.accuracy( tf.round(domain_predictions), domain_selection_mask) assert_op = tf.Assert(tf.is_finite(domain_loss), [domain_loss]) with tf.control_dependencies([assert_op]): tag_loss = 'losses/domain_loss' tag_accuracy = 'losses/domain_accuracy' if scope: tag_loss = scope + tag_loss tag_accuracy = scope + tag_accuracy tf.summary.scalar(tag_loss, domain_loss) tf.summary.scalar(tag_accuracy, domain_accuracy) return domain_loss
def kfold(classification_algorithm, k): res = {"accuracy": 0, "precision": 0, "recall": 0, "f1": 0} for i in range(1, k + 1): validation = utils.load_train(i) validation = validation["plus"] + validation["minus"] train = {"plus": [], "minus": []} for j in range(1, k + 1): if j != i: extension = utils.load_train(j) train["plus"].extend(extension["plus"]) train["minus"].extend(extension["minus"]) classification = classification_algorithm(train, validation) res["accuracy"] += utils.accuracy(classification) res["precision"] += utils.precision(classification) res["recall"] += utils.recall(classification) res["f1"] += utils.F1_score(classification) for k in res: res[k] /= k print res return res
def main(): X, Y = utils.read_data("../files/train_10.csv") n_target = len(set(Y)) Y = map(int, Y) folds = 5 stf = cross_validation.StratifiedKFold(Y, folds) loss = [] accs = [] classMap = sorted(list(set(Y))) X, Y = np.array(X), np.array(Y) print "Testing..." for i, (train, test) in enumerate(stf): X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] probs = [[0.001 for x in range(n_target)] for y in range(len(y_test))] loss.append(utils.logloss(probs, y_test, classMap)) accs.append(utils.accuracy([1]*len(y_test), y_test)) print "Accuracy(Fold {0}): ".format(i) + str(accs[len(accs) - 1]) print "Loss(Fold {0}): ".format(i) + str(loss[len(loss) - 1]) print "Mean Accuracy: " + str(np.mean(accs)) print "Mean Loss: " + str(np.mean(loss))
def evaluate(segmentation_module, loader, args, dev_id, result_queue): segmentation_module.eval() for i, batch_data in enumerate(loader): # process data batch_data = batch_data[0] seg_label = as_numpy(batch_data['seg_label'][0]) img_resized_list = batch_data['img_data'] with torch.no_grad(): segSize = (seg_label.shape[0], seg_label.shape[1]) pred = torch.zeros(1, args.num_class, segSize[0], segSize[1]) for img in img_resized_list: feed_dict = batch_data.copy() feed_dict['img_data'] = img del feed_dict['img_ori'] del feed_dict['info'] feed_dict = async_copy_to(feed_dict, dev_id) # forward pass pred_tmp = segmentation_module(feed_dict, segSize=segSize) pred = pred + pred_tmp.cpu() / len(args.imgSize) _, preds = torch.max(pred.data.cpu(), dim=1) preds = as_numpy(preds.squeeze(0)) # calculate accuracy and SEND THEM TO MASTER acc, pix = accuracy(preds, seg_label) intersection, union = intersectionAndUnion(preds, seg_label, args.num_class) result_queue.put_nowait((acc, pix, intersection, union)) # visualization if args.visualize: visualize_result( (batch_data['img_ori'], seg_label, batch_data['info']), preds, args)
def main(): X, Y = utils.read_data("../files/train_10.csv") Y = map(int, Y) folds = 5 stf = cross_validation.StratifiedKFold(Y, folds) loss = [] svc = svm.SVC(probability=True) accs = [] classMap = sorted(list(set(Y))) X, Y = np.array(X), np.array(Y) print "Testing..." for i, (train, test) in enumerate(stf): X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] svc.fit(X_train, y_train) predicted = svc.predict(X_test) probs = svc.predict_proba(X_test) probs = [[min(max(x, 0.001), 0.999) for x in y] for y in probs] loss.append(utils.logloss(probs, y_test, classMap)) accs.append(utils.accuracy(predicted, y_test)) print "Accuracy(Fold {0}): ".format(i) + str(accs[len(accs) - 1]) print "Loss(Fold {0}): ".format(i) + str(loss[len(loss) - 1]) print "Mean Accuracy: " + str(np.mean(accs)) print "Mean Loss: " + str(np.mean(loss))
def train_one_epoch(model, criterion, optimizer, data_loader, epoch, print_freq, amp_level=None, scaler=None): model.train() # training log train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 acc1 = 0.0 acc5 = 0.0 reader_start = time.time() batch_past = 0 for batch_idx, (image, target) in enumerate(data_loader): train_reader_cost += time.time() - reader_start train_start = time.time() if amp_level is not None: with paddle.amp.auto_cast(level=amp_level): output = model(image) loss = criterion(output, target.astype("int64")) scaled = scaler.scale(loss) scaled.backward() scaler.minimize(optimizer, scaled) else: output = model(image) loss = criterion(output, target.astype("int64")) loss.backward() optimizer.step() optimizer.clear_grad() train_run_cost += time.time() - train_start acc = utils.accuracy(output, target, topk=(1, 5)) acc1 += acc[0].item() acc5 += acc[1].item() total_samples += image.shape[0] batch_past += 1 if batch_idx % print_freq == 0: msg = "[Epoch {}, iter: {}] top1: {:.5f}, top5: {:.5f}, lr: {:.5f}, loss: {:.5f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {}, avg_ips: {:.5f} images/sec.".format( epoch, batch_idx, acc1 / batch_past, acc5 / batch_past, optimizer.get_lr(), loss.item(), train_reader_cost / batch_past, (train_reader_cost + train_run_cost) / batch_past, total_samples / batch_past, total_samples / (train_reader_cost + train_run_cost)) if paddle.distributed.get_rank() <= 0: print(msg) sys.stdout.flush() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 acc1 = 0.0 acc5 = 0.0 batch_past = 0 reader_start = time.time()
def train(train_loader, model, criterion, optimizer, lr_schedule, epoch): global total_steps, exp_flops, exp_l0, args, writer, param_num batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() model.train() lr_schedule.step(epoch=epoch) end = time.time() for i, (input_, target) in enumerate(train_loader): data_time.update(time.time() - end) total_steps += 1 if torch.cuda.is_available(): target = target.cuda(non_blocking=True) input_ = input_.cuda() input_var = torch.autograd.Variable(input_) target_var = torch.autograd.Variable(target) #compute output output = model(input_var) loss = criterion(output, target_var, model) prec1 = accuracy(output.data, target, topk=(1, ))[0] losses.update(loss.item(), input_.size(0)) top1.update(100 - prec1.item(), input_.size(0)) #compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # clamp the parameters layers = model.layers if not args.multi_gpu else model.module.layers for k, layer in enumerate(layers): layer.constrain_parameters() e_fl, e_l0 = model.get_exp_flops_l0() if not args.multi_gpu else \ model.module.get_exp_flops_l0() exp_flops.append(e_fl) exp_l0.append(e_l0) if writer is not None: writer.add_scalar('stats_comp/exp_flops', e_fl, total_steps) writer.add_scalar('stats_comp/exp_l0', e_l0, total_steps) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # input() if i % args.print_freq == 0: print(' Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Err@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) w_sparsity = model.get_w_sparsity() print('sparsity w:', w_sparsity) nonzero_weight = countnonZeroWeights(model) print('Number of nonzero weights: ', nonzero_weight) neuron = model.count_active_neuron() print('Number of active neurons: ', neuron) reg_neuron = model.count_reg_neuron_sparsity() print('Regularized neuron sparsity: ', reg_neuron) # log to TensorBoard if writer is not None: writer.add_scalar('train/loss', losses.avg, epoch) writer.add_scalar('train/err', top1.avg, epoch) writer.add_scalar('w_sparsity/epoch', w_sparsity, epoch) writer.add_scalar('sparsity', 1 - (nonzero_weight / param_num), epoch) writer.add_scalar('neuron sparsity', 1 - (neuron / model.count_total_neuron()), epoch) writer.add_scalar('active neuron', neuron, epoch) writer.add_scalar('regularized neuron sparsity', reg_neuron, epoch) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, num_epochs, log_iter=1, logger=None, tag='train'): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.train() start_time = time.time() pbar = tqdm(enumerate(train_loader), total=len(train_loader), ncols=175, desc='[{tag}]'.format(tag=tag.upper())) for i, (images, target) in pbar: # measure data loading time data_time.update(time.time() - start_time) images = images.cuda() target = target.cuda() # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) # returns tensors! losses.update(loss.item(), images.size(0)) top1.update(prec1.item(), images.size(0)) top5.update(prec5.item(), images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - start_time) iter_num = epoch * len(train_loader) + i + 1 if iter_num % log_iter == 0: logger.add_scalar('({})loss'.format(tag), losses.val, iter_num) logger.add_scalar('({})top1'.format(tag), top1.val, iter_num) logger.add_scalar('({})top5'.format(tag), top5.val, iter_num) pbar.set_description( '[{tag}] ep {epoch}/{num_epochs}\t' 'loss: {loss.val:.4f} ({loss.avg:.4f})\t' 'prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'prec@5 {top5.val:.3f} ({top5.avg:.3f})\t' 'fetch {data_time.val:.3f} ({data_time.avg:.3f})\t' '{img_sec:.2f} im/s ({img_sec_avg:.2f}))'.format( tag=tag.upper(), epoch=epoch + 1, num_epochs=num_epochs, loss=losses, top1=top1, top5=top5, data_time=data_time, img_sec=len(images) / batch_time.val, img_sec_avg=len(images) / batch_time.avg)) start_time = time.time() logger.add_scalar('({})avg_loss'.format(tag), losses.avg, epoch + 1) logger.add_scalar('({})avg_top1'.format(tag), top1.avg, epoch + 1) logger.add_scalar('({})avg_top5'.format(tag), top5.avg, epoch + 1) return top1.avg, top5.avg, losses.avg
temperature=None, tanh_constant=None, entropy_reduction="mean") criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, eps=1e-3, weight_decay=2e-6) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.epochs, eta_min=1e-5) trainer = TextNASTrainer( model, loss=criterion, metrics=lambda output, target: {"acc": accuracy(output, target)}, reward_function=accuracy, optimizer=optimizer, callbacks=[LRSchedulerCallback(lr_scheduler)], batch_size=args.batch_size, num_epochs=args.epochs, dataset_train=None, dataset_valid=None, train_loader=train_loader, valid_loader=valid_loader, test_loader=test_loader, log_frequency=args.log_frequency, mutator=mutator, mutator_lr=2e-3, mutator_steps=500, mutator_steps_aggregate=1,
import classif_regres x, y = utils.abrir_dados_balance_scale( './bases/balance-scale/balance-scale.data') # a) Considerando uma distribuicao Gaussiana dos atributos; k = 10 acc = np.zeros(k) for i in range(0, k): ind_rand = np.arange(0, y.size) np.random.shuffle(ind_rand) # indices em ordem aleatoria. ind_train = ind_rand[0:.75 * y.size] ind_test = ind_rand[.75 * y.size:] yhat = classif_regres.naive_bayes( x[ind_train, :], y[ind_train], x[ind_test, :], probmod='gauss') acc[i] = utils.accuracy(y[ind_test], yhat) print 'LETRA A - Utilizando probabilidade gaussiana.' print 'A acuracia media eh de {:3.3f} % e seu d.p. eh de {:3.3f} %\n'.format( 100 * np.mean(acc), 100 * np.std(acc)) # b) Discretizando os valores (em 5 partes cada atributo); k = 10 acc = np.zeros(k) for i in range(0, k): ind_rand = np.arange(0, y.size) np.random.shuffle(ind_rand) # indices em ordem aleatoria. ind_train = ind_rand[0:.75 * y.size] ind_test = ind_rand[.75 * y.size:] yhat = classif_regres.naive_bayes( x[ind_train, :], y[ind_train], x[ind_test, :], probmod='freq') acc[i] = utils.accuracy(y[ind_test], yhat)
def train(model, device, args, *, val_interval, bn_process=False, all_iters=None, cs=None): optimizer = args.optimizer loss_function = args.loss_function scheduler = args.scheduler train_dataprovider = args.train_dataprovider t1 = time.time() Top1_err, Top5_err = 0.0, 0.0 model.train() for iters in range(1, val_interval + 1): scheduler.step() if bn_process: adjust_bn_momentum(model, iters) all_iters += 1 d_st = time.time() data, target = train_dataprovider.next() target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) data_time = time.time() - d_st # get_random_cand = lambda:tuple(np.random.randint(4) for i in range(20)) # flops_l, flops_r, flops_step = 290, 360, 10 # bins = [[i, i+flops_step] for i in range(flops_l, flops_r, flops_step)] # def get_uniform_sample_cand(*,timeout=500): # idx = np.random.randint(len(bins)) # l, r = bins[idx] # for i in range(timeout): # cand = get_random_cand() # if l*1e6 <= get_cand_flops(cand) <= r*1e6: # return cand # return get_random_cand() # cand_gen = get_uniform_sample_cand() # print("First cand gen: ", cand_gen) cand_gen = cs.get_uniform_sample_cand() # print("Can_gen: ", cand_gen) output = model(data, cand_gen) loss = loss_function(output, target) optimizer.zero_grad() loss.backward() for p in model.parameters(): if p.grad is not None and p.grad.sum() == 0: p.grad = None optimizer.step() prec1, prec5 = accuracy(output, target, topk=(1, 5)) Top1_err += 1 - prec1.item() / 100 Top5_err += 1 - prec5.item() / 100 if all_iters % args.display_interval == 0: printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval) logging.info(printInfo) t1 = time.time() Top1_err, Top5_err = 0.0, 0.0 if all_iters % args.save_interval == 0: save_checkpoint(args.exp_name, { 'state_dict': model.state_dict(), }, all_iters) return all_iters
def validate(self): batch_time = utils.AverageMeter() losses = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() training = self.model.training self.model.eval() end = time.time() for batch_idx, (imgs, target, img_files, class_ids) in tqdm.tqdm( enumerate(self.val_loader), total=len(self.val_loader), desc='Valid iteration={} epoch={}'.format(self.iteration, self.epoch), ncols=80, leave=False): gc.collect() if self.cuda: imgs, target = imgs.cuda(), target.cuda(async=True) imgs = Variable(imgs, volatile=True) target = Variable(target, volatile=True) output = self.model(imgs) loss = self.criterion(output, target) if np.isnan(float(loss.data[0])): raise ValueError('loss is nan while validating') # measure accuracy and record loss prec1, prec5 = utils.accuracy(output.data, target.data, topk=(1, 5)) losses.update(loss.data[0], imgs.size(0)) top1.update(prec1[0], imgs.size(0)) top5.update(prec5[0], imgs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % self.print_freq == 0: log_str = 'Test: [{0}/{1}/{top1.count:}]\tepoch: {epoch:}\titer: {iteration:}\t' \ 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss: {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Prec@1: {top1.val:.3f} ({top1.avg:.3f})\t' \ 'Prec@5: {top5.val:.3f} ({top5.avg:.3f})\t'.format( batch_idx, len(self.val_loader), epoch=self.epoch, iteration=self.iteration, batch_time=batch_time, loss=losses, top1=top1, top5=top5) print(log_str) self.print_log(log_str) if self.cmd == 'train': is_best = top1.avg > self.best_top1 self.best_top1 = max(top1.avg, self.best_top1) self.best_top5 = max(top5.avg, self.best_top5) log_str = 'Test_summary: [{0}/{1}/{top1.count:}] epoch: {epoch:} iter: {iteration:}\t' \ 'BestPrec@1: {best_top1:.3f}\tBestPrec@5: {best_top5:.3f}\t' \ 'Time: {batch_time.avg:.3f}\tLoss: {loss.avg:.4f}\t' \ 'Prec@1: {top1.avg:.3f}\tPrec@5: {top5.avg:.3f}\t'.format( batch_idx, len(self.val_loader), epoch=self.epoch, iteration=self.iteration, best_top1=self.best_top1, best_top5=self.best_top5, batch_time=batch_time, loss=losses, top1=top1, top5=top5) print(log_str) self.print_log(log_str) checkpoint_file = os.path.join(self.checkpoint_dir, 'checkpoint.pth.tar') torch.save({ 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.model.__class__.__name__, 'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_top1': self.best_top1, 'batch_time': batch_time, 'losses': losses, 'top1': top1, 'top5': top5, }, checkpoint_file) if is_best: shutil.copy(checkpoint_file, os.path.join(self.checkpoint_dir, 'model_best.pth.tar')) if (self.epoch + 1) % 10 == 0: # save each 10 epoch shutil.copy(checkpoint_file, os.path.join(self.checkpoint_dir, 'checkpoint-{}.pth.tar'.format(self.epoch))) if training: self.model.train()
def test(testloader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() lowrank = AverageMeter() total_loss = AverageMeter() # switch to evaluate mode model.eval() features = None end = time.time() bar = Bar('Processing', max=len(testloader)) with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # compute output outputs = model(inputs) # save features features_i = outputs[1].data.cpu().numpy() scores_i = outputs[0].data.cpu().numpy() labels_i = targets.data.cpu().numpy() if not np.any(features): features = np.copy(features_i) scores = np.copy(scores_i) labels = np.copy(labels_i) else: features = np.concatenate((features, features_i), 0) scores = np.concatenate((scores, scores_i), 0) labels = np.concatenate((labels, labels_i), 0) # criterion is a list composed of crossentropy loss and lowrank loss. losses_list = [-1,-1] # output_Var contains scores in the first element and features in the second element loss = 0 for cix, crit in enumerate(criterion): losses_list [cix] = crit(outputs[cix], targets) loss += losses_list[cix] # measure accuracy and record loss prec1, prec5 = accuracy(outputs[0].data, targets.data, topk=(1, 5)) losses.update(losses_list[0].item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) total_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '[{epoch: d}] ({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f} | lowrank: {lowrank: .4f} | total loss: {total_loss: .4f} '.format( epoch=epoch, batch=batch_idx + 1, size=len(testloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, lowrank=lowrank.avg, total_loss = total_loss.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg, features, scores, labels)
train_loss_log = AverageMeter() train_acc_log = AverageMeter() val_loss_log = AverageMeter() val_acc_log = AverageMeter() for i, (images, labels) in enumerate(train_loader): # Convert torch tensor to Variable images = Variable(images.to(device)) labels = Variable(labels.to(device)) # Forward + Backward + Optimize optimizer.zero_grad() # zero the gradient buffer outputs = net(images) train_loss = criterion(outputs, labels) train_loss.backward() optimizer.step() prec1, prec5 = accuracy(outputs.data, labels.data, topk=(1, 5)) train_loss_log.update(train_loss.item(), images.size(0)) train_acc_log.update(prec1.item(), images.size(0)) if (i + 1) % 100 == 0: print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Acc: %.8f' % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, train_loss_log.avg, train_acc_log.avg)) # Test the Model net.eval() correct = 0 loss = 0 total = 0 for images, labels in test_loader: images = Variable(images).to(device)
def train(config, writer, logger, train_loader, valid_loader, model, w_optim, lr, epoch): top1 = utils.AverageMeter() top5 = utils.AverageMeter() losses = utils.AverageMeter() cur_step = epoch * len(train_loader) writer.add_scalar('train/lr', lr, cur_step) for step, (trn_X, trn_y) in enumerate(train_loader): trn_X, trn_y = trn_X.cuda(non_blocking=True), trn_y.cuda( non_blocking=True) N = trn_X.size(0) model.train() w_optim.zero_grad() logits = model(trn_X) loss = model.criterion(logits, trn_y) loss.backward() if config.w_grad_clip != 0: # gradient clipping nn.utils.clip_grad_norm_(model.weights(), config.w_grad_clip) w_optim.step() prec1, prec5 = utils.accuracy(logits, trn_y, topk=(1, 5)) losses.update(loss.item(), N) top1.update(prec1.item(), N) top5.update(prec5.item(), N) if config.finetune_max_steps is not None or step % config.print_freq == 0 or step == len( train_loader) - 1: logger.info( "Train: [{:2d}/{}] Step {:03d}/{:03d} Loss {losses.avg:.3f} " "Prec@1 {top1.val:.1%} ({top1.avg:.1%}) Prec@5 {top5.val:.1%} ({top5.avg:.1%})" .format(epoch + 1, config.finetune_epochs, step, len(train_loader) - 1, losses=losses, top1=top1, top5=top5)) writer.add_scalar('train/loss', loss.item(), cur_step) writer.add_scalar('train/top1', prec1.item(), cur_step) writer.add_scalar('train/top5', prec5.item(), cur_step) cur_step += 1 if config.finetune_max_steps is not None: validate(config, writer, logger, valid_loader, model, epoch, cur_step, total_epochs=config.finetune_epochs) if cur_step >= config.finetune_max_steps: break logger.info("Train: [{:2d}/{}] Final Prec@1 {:.4%}".format( epoch + 1, config.finetune_epochs, top1.avg))
def train(model, device, args, *, val_interval, bn_process=False, all_iters=None, arch_loader=None, arch_batch=100): print("start training...") assert arch_loader is not None optimizer = args.optimizer loss_function = args.loss_function scheduler = args.scheduler # train_dataprovider = args.train_dataprovider train_loader = args.train_loader t1 = time.time() Top1_err, Top5_err = 0.0, 0.0 model.train() for iters in range(1, val_interval + 1): if bn_process: adjust_bn_momentum(model, iters) all_iters += 1 d_st = time.time() for data, target in train_loader: target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) data_time = time.time() - d_st arch_batches = arch_loader.sample_batch_arc(arch_batch) optimizer.zero_grad() for i in range(len(arch_batches)): # 一个批次 # with torch.cuda.amp.autocast(): output = model(data, arch_batches[i]) loss = loss_function(output, target) loss.backward() for p in model.parameters(): if p.grad is not None and p.grad.sum() == 0: p.grad = None # acc1, acc5 = accuracy(output, target, topk=(1, 5)) # print("\rsmall batch acc1:", acc1.item() / 100, end='') torch.nn.utils.clip_grad_norm_(model.parameters(), 20) optimizer.step() scheduler.step() prec1, prec5 = accuracy(output, target, topk=(1, 5)) Top1_err += 1 - prec1.item() / 100 Top5_err += 1 - prec5.item() / 100 # if all_iters % args.display_interval == 0: if True: printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 'data_time = {:.6f},\ttrain_time = {:.6f}'.format( data_time, (time.time() - t1) / args.display_interval) logging.info(printInfo) t1 = time.time() Top1_err, Top5_err = 0.0, 0.0 if all_iters % args.save_interval == 0: save_checkpoint({ 'state_dict': model.state_dict(), }, all_iters) return all_iters
def validate(val_loader, model, criterion, epoch, epoch_to_save_log, logger, tag='val'): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() pbar = tqdm(enumerate(val_loader), total=len(val_loader), ncols=180, desc='[{tag}]'.format(tag=tag.upper())) with torch.no_grad(): end = time.time() for i, (images, target) in pbar: images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(prec1.item(), images.size(0)) top5.update(prec5.item(), images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() pbar.set_description( '[{tag}] epoch {epoch}\t' 'loss: {loss.val:.4f} ({loss.avg:.4f})\t' 'prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'prec@5 {top5.val:.3f} ({top5.avg:.3f})\t' '{img_sec:.2f} im/s ({img_sec_avg:.2f} im/s))'.format( tag=tag.upper(), epoch=epoch + 1, loss=losses, top1=top1, top5=top5, img_sec=len(images) / batch_time.val, img_sec_avg=len(images) / batch_time.avg)) logger.add_scalar('({})avg_loss'.format(tag), losses.avg, epoch_to_save_log + 1) logger.add_scalar('({})avg_top1'.format(tag), top1.avg, epoch_to_save_log + 1) logger.add_scalar('({})avg_top5'.format(tag), top5.avg, epoch_to_save_log + 1) print( '[{tag}] epoch {epoch}: Loss: {loss.avg:.3f} ' 'Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(tag=tag.upper(), epoch=epoch + 1, loss=losses, top1=top1, top5=top5)) return top1.avg
output_filenames.append('./test_output/deciphered_' + str(i) + '.txt') ciphers = [utils.random_cipher() for i in range(4)] ciphertexts = [utils.encipher(ciphers[i], plaintexts[i]) for i in range(4)] """ STATISTICS """ test_mode = True if test_mode: for text in plaintexts: symbol_freqs = utils.symbol_freq(text, sort=True) freqs_followed_by = utils.freq_followed_by(text, ' ', sort=True) most_frequent_words = utils.frequent_word_freqs(text, sort=True) #print "5 most frequent symbols: {}".format(symbol_freqs[:5]) #print "Top by freq followed by space: {}".format(freqs_followed_by[:4]) print "Most frequent words: {}".format(most_frequent_words[:5]) accuracies = [] for i in range(4): f = decode(ciphertexts[i], output_filenames[i]) accuracy = utils.accuracy(f, ciphertexts[i], plaintexts[i]) print "Accuracy: {}".format(accuracy) accuracies.append(accuracy) print accuracies
def validate(val_loader, model, criterion): bar = Bar('Processing', max=len(val_loader)) batch_time = AverageMeter() data_time = AverageMeter() losses = [AverageMeter() for _ in range(len(args.selected_attrs))] top1 = [AverageMeter() for _ in range(len(args.selected_attrs))] # switch to evaluate mode model.eval() loss_avg = 0 prec1_avg = 0 top1_avg = [] with torch.no_grad(): end = time.time() for i, (input, target, target_idx) in enumerate(val_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(non_blocking=True) # compute output output = model(input) # measure accuracy and record loss loss = [] prec1 = [] count = 0 for j in range(len(output)): idx = org_idexs2selected_idxes[j] for batch_idx in range(input.size(0)): b_target_idx = target_idx[batch_idx] if idx != -1 and idx == b_target_idx: count += 1 cur_output = output[j][batch_idx] cur_output = cur_output.reshape((1, 2)) cur_target = target[batch_idx, idx] cur_target = cur_target.reshape((1, )) cur_loss = criterion(cur_output, cur_target) cur_prec1 = accuracy(cur_output, cur_target, topk=(1, )) loss.append(cur_loss) prec1.append(cur_prec1) losses[idx].update(loss[-1].item(), 1) top1[idx].update(prec1[-1][0].item(), 1) assert count == input.size( 0 ) # since for each sample, we only calcuate its target attribute losses_avg = [losses[k].avg for k in range(len(losses))] top1_avg = [top1[k].avg for k in range(len(top1))] loss_avg = sum(losses_avg) / len(losses_avg) prec1_avg = sum(top1_avg) / len(top1_avg) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format( batch=i + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=loss_avg, top1=prec1_avg, ) bar.next() bar.finish() return (loss_avg, prec1_avg, top1_avg)
if core_value != classification: confusion_matrices[core_value]["tn"] += 1 else: # This is a false positive for this classification confusion_matrices[classification]["fp"] += 1 # A FP for this one is a FN for the correct one confusion_matrices[record[arff.attr_position["class"]]]["fn"] += 1 results_dict = {} results_dict["mp"] = utils.micro_precision(arff.attributes[arff.attr_position["class"]][1], confusion_matrices) results_dict["mr"] = utils.micro_recall(arff.attributes[arff.attr_position["class"]][1], confusion_matrices) results_dict["mf1"] = utils.micfo_f1(arff.attributes[arff.attr_position["class"]][1], confusion_matrices) results_dict["Mp"] = utils.macro_precision(arff.attributes[arff.attr_position["class"]][1], confusion_matrices) results_dict["Mr"] = utils.macro_recall(arff.attributes[arff.attr_position["class"]][1], confusion_matrices) results_dict["Mf1"] = utils.macro_f1(arff.attributes[arff.attr_position["class"]][1], confusion_matrices) results_dict["ac"] = utils.accuracy(arff.attributes[arff.attr_position["class"]][1], confusion_matrices) print("Micro Precision " + str(run_num) + ": " + str(results_dict["mp"])) print("Micro Recall " + str(run_num) + ": " + str(results_dict["mr"])) print("Micro F1 " + str(run_num) + ": " + str(results_dict["mf1"])) print("Macro Precision " + str(run_num) + ": " + str(results_dict["Mp"])) print("Macro Recall " + str(run_num) + ": " + str(results_dict["Mr"])) print("Macro F1 " + str(run_num) + ": " + str(results_dict["Mf1"])) print("Accuracy " + str(run_num) + ": " + str(results_dict["ac"])) validation_results.append(results_dict) # Push the test data back into the training data arff.data.extend(training_records) # Get the averages
for batch_image, batch_label in train_loader: image = batch_image[0, :] image = image.numpy() # image=np.array(image) image = image.transpose(1, 2, 0) # 通道由[c,h,w]->[h,w,c] #image_processing.cv_show_image("image", image) print("batch_image.shape:{},batch_label:{}".format(batch_image.shape, batch_label)) # batch_x, batch_y = Variable(batch_x), Variable(batch_y) cnt += 1 if cnt > 10: break batch_image ,batch_label = batch_image.cuda(),batch_label.cuda(async=True) batch_image , batch_label = torch.autograd.Variable(batch_image, volatile=True), torch.autograd.Variable(batch_label) outputs = model(batch_image) loss = criterion(outputs, batch_label) prec1 = accuracy(outputs.data, batch_label.data, topk=(1)) optimizer.zero_grad() loss.backward() optimizer.step() # ''' # 下面两种方式,TorchDataset设置repeat=None可以实现无限循环,退出循环由max_iterate设定 # ''' # train_data = TorchDataset(filename=train_filename, image_dir=image_dir, repeat=None) # train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=False) # # [2]第2种迭代方法 # for step, (batch_image, batch_label) in enumerate(train_loader): # image = batch_image[0, :] # image = image.numpy() # image=np.array(image) # image = image.transpose(1, 2, 0) # 通道由[c,h,w]->[h,w,c]
def test(val_loader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() # bar = Bar('Processing', max=len(val_loader)) bar = tqdm(total=len(val_loader)) for batch_idx, (inputs, targets) in enumerate(val_loader): bar.update(1) # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable( inputs, volatile=True), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.set_description( '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}' .format( batch=batch_idx + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total='N/A' or bar.elapsed_td, eta='N/A' or bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, )) bar.close() # bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( # batch=batch_idx + 1, # size=len(val_loader), # data=data_time.avg, # bt=batch_time.avg, # total=bar.elapsed_td, # eta=bar.eta_td, # loss=losses.avg, # top1=top1.avg, # top5=top5.avg, # ) # bar.next() # bar.finish() return (losses.avg, top1.avg)
# For each set of parameters in 'grid_search', train and evaluate softmax classifier. # Save search history in dictionary 'results'. # - KEY: tuple of (# of epochs, learning rate) # - VALUE: accuracy on validation data # Save the best validation accuracy and optimized model in 'best_acc' and 'best_model'. for ep, lr in grid_search: # Make model & optimizer model = SoftmaxClassifier(num_features, num_label) optim = SGD() model.train(train_x, train_y, ep, batch_size, lr, optim) pred, prob = model.eval(valid_x) valid_acc = accuracy(pred, valid_y) print('Accuracy on valid data : %f\n' % valid_acc) results[ep, lr] = valid_acc if valid_acc > best_acc: best_acc = valid_acc best_model = model for ep, lr in sorted(results): valid_acc = results[(ep, lr)] print('# epochs : %d lr : %e valid accuracy : %f' % (ep, lr, valid_acc)) print('best validation accuracy achieved: %f' % best_acc) # Evaluate best model on test data
def train_eopch(train_loader, model, optimizer, criterion, epoch, epochs, total_loss): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() learned_module_list = [] # add all the LGC layers into the list # Switch to train mode model.train() # Find all learned convs to prepare for group lasso loss for m in model.modules(): if m.__str__().startswith('LearnedGroupConv'): learned_module_list.append(m) running_lr = None beginTime = time.time() for i, (input, target) in enumerate(train_loader): progress = float(epoch * len(train_loader) + i) / (epochs * len(train_loader)) args.progress = progress lr = adjust_learning_rate(optimizer, epoch, args, batch=i, nBatch=len(train_loader), method=args.lr_type) if running_lr is None: running_lr = lr data_time.update(time.time() - beginTime) # input Tensor CPU --> GPU if torch.cuda.is_available(): input = input.cuda() target = target.cuda() # Tensor --> Variable --> model inputVar = Variable(input, requires_grad=True) targetVar = Variable(target) # compute the result output, _ = model(inputVar, progress) loss = criterion(output, targetVar) # Add group lasso loss to the basic loss value if args.group_lasso_lambda > 0: lasso_loss = 0 for m in learned_module_list: lasso_loss = lasso_loss + m.lasso_loss loss = loss + args.group_lasso_lambda * lasso_loss total_loss += loss.item() # Measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # Compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - beginTime) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f}\t' # ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f}\t' # ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f}\t' # ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f}\t' # ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f}\t' # ({top5.avg:.3f})' 'lr {lr: .4f}'.format(epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=lr)) return top1.avg, top5.avg, losses.avg, lr, total_loss
import numpy as np import data import utils VALIDATION_SPLIT_PATH = "validation_split_v1.pkl" if len(sys.argv) != 2: sys.exit("Usage: eval_predictions.py <validation_predictions_path>") path = sys.argv[1] predictions = np.load(path) split = np.load(VALIDATION_SPLIT_PATH) labels_valid = data.labels_train[split["indices_valid"]] loss = utils.log_loss(predictions, labels_valid) acc = utils.accuracy(predictions, labels_valid) loss_std = utils.log_loss_std(predictions, labels_valid) print "Validation loss:\t\t\t%.6f" % loss print "Classification accuracy:\t\t%.2f%%" % (acc * 100) print "Validation loss std:\t%.6f" % loss_std print for k in xrange(5): acc_k = utils.accuracy_topn(predictions, labels_valid, n=k + 1) print "Top-%d accuracy:\t\t%.2f%%" % (k + 1, acc_k * 100)
minibatches = get_minibatch(X_train, y_train) for iter in range(1, 100 + 1): idx = np.random.randint(0, len(minibatches)) X_mini, y_mini = minibatches[idx] grad = get_minibatch_grad(model, X_mini, y_mini) for layer in grad: velocity[layer] = gamma * velocity[layer] + 1e-3 * grad[layer] model[layer] += velocity[layer] return model if __name__ == '__main__': X, y = make_moons(n_samples=5000, random_state=42, noise=0.1) x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=123) mean_accuracy = [] for j in range(15): model = make_network() model = sgd(model, x_train, y_train) y_pred = predict(model, x_test, y_test) acc = accuracy(y_test, y_pred) mean_accuracy.append(acc) print(np.mean(mean_accuracy))
def validate(model_name, args): # create model model = create_model( model_name, num_classes=args.num_classes, pretrained=args.pretrained, checkpoint_path=args.checkpoint) param_count = sum([m.numel() for m in model.parameters()]) print('Model %s created, param count: %d' % (args.model, param_count)) data_config = resolve_data_config(model, args) criterion = nn.CrossEntropyLoss() if not args.no_cuda: if args.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() else: model = model.cuda() criterion = criterion.cuda() loader = create_loader( Dataset(args.data), input_size=data_config['input_size'], batch_size=args.batch_size, use_prefetcher=not args.no_cuda, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, crop_pct=data_config['crop_pct']) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): if not args.no_cuda: target = target.cuda() input = input.cuda() # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, loss=losses, top1=top1, top5=top5)) results = OrderedDict(model=model_name, top1=round(top1.avg, 3), top1_err=round(100 - top1.avg, 3), top5=round(top5.avg, 3), top5_err=round(100 - top5.avg, 3), loss=round(losses.avg, 4), param_count=round(param_count / 1e6, 2)) print(' * Prec@1 {:.3f} ({:.3f}) Prec@5 {:.3f} ({:.3f})'.format( results['top1'], results['top1_err'], results['top5'], results['top5_err'])) return results
with net.name_scope(): net.add(gluon.nn.Flatten()) net.add(gluon.nn.Dense(10)) net.initialize() # step 3 : Loss function softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() # step 4 : Optimizer trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) # step 5 : Train import utils for epoch in range(5): train_loss = 0. train_acc = 0. for data, label in train_data: with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() trainer.step(batch_size) train_loss += nd.mean(loss).asscalar() train_acc += utils.accuracy(output, label) test_acc = utils.evaluate_accuracy(test_data, net) print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc))
def train(trainloader, model, criterion, optimizer, epoch, use_cuda): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) for batch_idx, (inputs, targets) in enumerate(trainloader): if batch_idx == len(trainloader) - 1: if not args.fixbit: resnet.RecordActivation = True # switch to train mode model.train() # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) + resnet.loss_MSE print("In method train: loss_MSE = " + str(resnet.loss_MSE) + " total loss = " + str(loss)) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() resnet.RecordActivation = False return (losses.avg, top1.avg)
def train(trainloader, model, criterion, optimizer, look_up_table, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) for batch_idx, (inputs, targets) in enumerate(trainloader): if batch_idx % period == 0: model, sub = low_rank_approx(model, look_up_table, criterion=EnergyThreshold(0.9), use_trp=args.trp, type=args.type) # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) writer.add_scalar('Loss/train', losses.avg) writer.add_scalar('Accuracy/train', top1.avg) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() # apply nuclear norm regularization if args.nuclear_weight is not None and batch_idx % period == 0: for name, m in model.named_modules(): if name in look_up_table: m.weight.grad.data.add_(args.nuclear_weight * sub[name]) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
h1 = relu(nd.dot(X, W1) + b1)# 隐含层输出 非线性激活 output = nd.dot(h1, W2) + b2 return output ##Softmax和交叉熵损失函数 ## softmax 回归实现 exp(Xi)/(sum(exp(Xi))) 归一化概率 使得 10类概率之和为1 #交叉熵损失函数 softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() ## 开始训练 learning_rate = .5#学习率 epochs = 7 ##训练迭代训练集 次数 for epoch in range(epochs):##每迭代一次训练集 train_loss = 0.##损失 train_acc = 0. ##准确度 for data, label in train_data:#训练集 with autograd.record():#自动微分 output = net(data)#模型输出 向前传播 loss = softmax_cross_entropy(output, label)#计算损失 loss.backward()#向后传播 utils.SGD(params, learning_rate/batch_size)#随机梯度下降 训练更新参数 学习率递减 train_loss += nd.mean(loss).asscalar()#损失 train_acc += utils.accuracy(output, label)#准确度 test_acc = utils.evaluate_accuracy(test_data, net)#测试集测试 print("E次数 %d. 损失: %f, 训练准确度 %f, 测试准确度%f" % ( epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc))
def train(train_loader, model, criterion, optimizer, epoch, use_cuda, logger): global batch_time_global, data_time_global # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() train_loader_len = int(train_loader._size / args.train_batch) + 1 bar = Bar('Processing', max=train_loader_len) for batch_idx, data in enumerate(train_loader): # measure data loading time data_time_lap = time.time() - end data_time.update(data_time_lap) if epoch > 0: data_time_global.update(data_time_lap) inputs = data[0]["data"] targets = data[0]["label"].squeeze().cuda().long() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True) inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(to_python_float(loss.data), inputs.size(0)) top1.update(to_python_float(prec1), inputs.size(0)) top5.update(to_python_float(prec5), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # for restarting if args.optimizer.lower() == 'srsgd' or args.optimizer.lower() == 'sradam' or args.optimizer.lower() == 'sradamw' or args.optimizer.lower() == 'srradam': iter_count, iter_total = optimizer.update_iter() # measure elapsed time batch_time_lap = time.time() - end batch_time.update(batch_time_lap) if epoch > 0: batch_time_global.update(batch_time_lap) end = time.time() # plot progress bar.suffix = '(Epoch {epoch}, {batch}/{size}) Data: {data:.3f}s/{data_global:.3f}s | Batch: {bt:.3f}s/{bt_global:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( epoch=epoch, batch=batch_idx + 1, size=train_loader_len, data=data_time.val, data_global=data_time_global.avg, bt=batch_time.val, bt_global=batch_time_global.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() logger.file.write(bar.suffix) bar.finish() if args.optimizer.lower() == 'srsgd' or args.optimizer.lower() == 'sradam' or args.optimizer.lower() == 'sradamw' or args.optimizer.lower() == 'srradam': return (losses.avg, top1.avg, top5.avg, iter_count) else: return (losses.avg, top1.avg, top5.avg)
) total_epochs = 10 for epoch in range(total_epochs): total_loss = 0 batch_generator = dataset.batch_generator() accs = 0 step_count = 0 for step, (batch_data, batch_label) in enumerate(batch_generator): data.set_value(batch_data) label.set_value(batch_label) optimizer.zero_grad() # 将参数的梯度置零 prob = model(data) loss = F.cross_entropy(prob, label) # 交叉熵损失函数 total_loss += loss.numpy().item() optimizer.backward(loss) # 反传计算梯度 optimizer.step() # 根据梯度更新参数值 acc = accuracy(prob.numpy(), batch_label) accs += acc step_count += 1 #print("step: {}, loss: {}, acc: {}".format(step, loss, ) #print(step, loss) print("epoch: {}, average loss {}, dataset len: {}, acc: {}".format( epoch, total_loss / len(dataset), len(dataset), accs / step_count)) path = '/tmp/save.mge' mge.save(model.state_dict(), path)
acc_record = AverageMeter() start = time.time() for x, target in train_loader: optimizer.zero_grad() x = x.cuda() target = target.type(torch.long).cuda() output = model(x) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() batch_acc = accuracy(output, target, topk=(1, ))[0] loss_record.update(loss.item(), x.size(0)) acc_record.update(batch_acc.item(), x.size(0)) run_time = time.time() - start total_train_time += run_time info = '\n train_Epoch:{:03d}/{:03d}\t run_time:{:.3f}\t cls_loss:{:.3f}\t cls_acc:{:.2f}'.format( epoch + 1, args.epochs, run_time, loss_record.avg, acc_record.avg) print(info) ########################## ## Testing Stage model.eval() acc_record = AverageMeter() loss_record = AverageMeter()
print("="*80) x_train, y_train, x_test, y_test = datasets.get_dataset("fashion_mnist", 1024, 128, perturb=True) validate( val_loader=datasets.minibatch( x_test, y_test, batch_size=128, train_epochs=1, key=None ), model=apply_fn, params=params, criterion=criterion, epoch=20, batch_size=128, num_images=len(x_test), ) x_train, y_train, x_test, y_test = datasets.get_dataset("fashion_mnist", 1024, 128) print("=> Running FGM attack against resulting NTK") now = time.time() x_test_fgm = fast_gradient_method(model, x_test, 0.3, np.inf) y_test_fgm = model(x_test_fgm) print(f"Took {time.time() - now:0.2f}s") print(accuracy(y_test_fgm, y_test, topk=(1, 5))) print("=> Running PGD attack against resulting NTK") now = time.time() x_test_pgd = projected_gradient_descent(model, x_test, 0.3, 0.01, 40, np.inf) y_test_pgd = model(x_test_pgd) print(f"Took {time.time() - now:0.2f}s") print(accuracy(y_test_pgd, y_test, topk=(1, 5)))
num_batches = n // batch_size for i in range(num_batches): idx = range(i*batch_size, (i+1)*batch_size) x_batch = X[idx] out = predict(x_batch) preds.append(out) if num_batches * batch_size < n: # Computing rest rest = n - num_batches * batch_size idx = range(n-rest, n) x_batch = X[idx] out = predict(x_batch) preds.append(out) # Making metadata predictions = np.concatenate(preds, axis = 0) acc_eval = utils.accuracy(predictions, y) all_accuracy.append(acc_eval) auc_eval = utils.auc(predictions, y) all_auc.append(auc_eval) roc_eval_fpr, roc_eval_tpr, roc_eval_thresholds = utils.roc(predictions, y) all_roc_fpr.append(roc_eval_fpr) all_roc_tpr.append(roc_eval_tpr) all_roc_thresholds.append(roc_eval_thresholds) if Print: print " validating: %s loss" % subset print " average evaluation accuracy (%s): %.5f" % (subset, acc_eval) print " average evaluation AUC (%s): %.5f" % (subset, auc_eval) print print "Epoch %d of %d" % (epoch + 1, num_epochs)
t = time.time() model.train() optimizer.zero_grad() # print('features/adj',features,adj) output = model(features, adj) # print('output',output, output.shape) # print(output[idx_train], labels[idx_train]) # print(output[idx_train].shape, labels[idx_train].shape) loss_train = F.cross_entropy(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() loss_val = F.cross_entropy(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item()), 'time: {:.4f}s'.format(time.time() - t)) "Testing the model"
def train(self, learning_schedule = {0: 0.015, 500: 0.0015, 800: 0.00015, 1000: 0.000015}, momentum = 0.9, max_epochs=3000, save_every = 20, save_path = os.getcwd()): self.save_every = save_every self.metadata_tmp_path = save_path+"/model_params.pkl" self.learning_rate_schedule = learning_schedule self.learning_rate = theano.shared(np.float32(self.learning_rate_schedule[0])) self.momentum = momentum #for trainer self.updates = nn.updates.nesterov_momentum(self.loss, self.all_params, self.learning_rate, self.momentum) train_fn = self.nesterov_trainer() #nesterov with momentum. train_set_iterator = DataLoader(os.getcwd(),train_test_valid='train') best_dev_loss = numpy.inf dev_set_iterator = DataLoader(os.getcwd(), train_test_valid='valid') dev_set_iterator.build_unequal_samples_map() #for loading the data onto the gpu #create_train_gen = lambda: train_set_iterator.create_gen(max_epochs) patience = 1000 patience_increase = 2. improvement_threshold = 0.995 done_looping = False print '... training the model' start_time = time.clock() epoch = 0 timer = None #for plotting self._costs = [] self._train_errors = [] self._dev_errors = [] while (epoch < max_epochs) and (not done_looping): losses_train = [] losses = [] avg_costs = [] timer = time.time() for iteration, (x, y) in enumerate(train_set_iterator): if iteration in self.learning_rate_schedule: lr = np.float32(self.learning_rate_schedule[iteration]) print " setting learning rate to %.7f" % lr self.learning_rate.set_value(lr) print " load training data onto GPU" avg_cost = train_fn(x, y) if np.isnan(avg_cost): raise RuntimeError("NaN DETECTED.") if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) #for saving the batch if ((iteration + 1) % save_every) == 0: print print "Saving metadata, parameters" with open(self.metadata_tmp_path, 'w') as f: pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)}, f, pickle.HIGHEST_PROTOCOL) mean_train_loss = numpy.mean(avg_costs) #print " mean training loss:\t\t%.6f" % mean_train_loss #losses_train.append(mean_train_loss) #accuracy assessment output = utils.one_hot(self.predict_(x)(),m=20) train_loss = utils.log_loss(output, y) acc = 1 - utils.accuracy(output, y) losses.append(train_loss) del output del x del y print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % (epoch, mean_train_loss)) print(' epoch %i, training error %f' % (epoch, acc)) #for plotting self._costs.append(mean_train_loss) self._train_errors.append(acc) #valid accuracy xd,yd = dev_set_iterator.random_batch() valid_output = utils.one_hot(self.predict_(xd)(),m=20) valid_acc = 1 - utils.accuracy(valid_output, yd) self._dev_errors.append(valid_acc) del valid_output del xd del yd if valid_acc < best_dev_loss: best_dev_loss = valid_acc best_params = copy.deepcopy(self.all_params ) print('!!! epoch %i, validation error of best model %f' % (epoch, valid_acc)) print print "Saving best performance parameters" with open(self.metadata_tmp_path, 'w') as f: pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)}, f, pickle.HIGHEST_PROTOCOL) if (valid_acc < best_dev_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) if patience <= iteration: done_looping = True break epoch += 1
def train(self, learning_schedule = {0: 0.0015, 700: 0.00015, 800: 0.000015}, momentum = 0.9, max_epochs=3000, save_every = 20, save_path = os.getcwd()): self.save_every = save_every self.metadata_tmp_path = save_path+"/model_params.pkl" self.learning_rate_schedule = learning_schedule self.learning_rate = theano.shared(np.float32(self.learning_rate_schedule[0])) self.momentum = momentum #for trainer self.updates = nn.updates.nesterov_momentum(self.loss, self.all_params, self.learning_rate, self.momentum) train_fn = self.nesterov_trainer() #nesterov with momentum. train_set_iterator = DataLoader(os.getcwd(),train_test_valid='train') best_dev_loss = numpy.inf #for loading the data onto the gpu #create_train_gen = lambda: train_set_iterator.create_gen(max_epochs) patience = 1000 patience_increase = 2. improvement_threshold = 0.995 done_looping = False print '... training the model' start_time = time.clock() epoch = 0 timer = None #for plotting self._costs = [] self._train_errors = [] self._dev_errors = [] while (epoch < max_epochs) and (not done_looping): losses_train = [] losses = [] avg_costs = [] timer = time.time() for iteration, (x, y) in enumerate(train_set_iterator): if iteration in learning_rate_schedule: lr = np.float32(learning_rate_schedule[iteration]) print " setting learning rate to %.7f" % lr learning_rate.set_value(lr) print " load training data onto GPU" avg_cost = train_fn(x, y) if np.isnan(avg_cost): raise RuntimeError("NaN DETECTED.") if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) #for saving the batch if ((iteration + 1) % save_every) == 0: print print "Saving metadata, parameters" with open(metadata_tmp_path, 'w') as f: pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)}, f, pickle.HIGHEST_PROTOCOL) mean_train_loss = numpy.mean(avg_costs) #print " mean training loss:\t\t%.6f" % mean_train_loss #losses_train.append(mean_train_loss) #accuracy assessment output = utils.one_hot(predict_(x)()) train_loss = utils.log_loss(output, y) acc = 1 - accuracy(output, y) losses.append(train_loss) del output del x del y print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % (epoch, mean_train_loss)) print(' epoch %i, training error %f' % (epoch, acc)) #for plotting self._costs.append(mean_train_loss) self._train_errors.append(acc) #dev_errors = numpy.mean(dev_scoref()) #valid accuracy dev_set_iterator = DataLoader(os.getcwd(), train_test_valid='valid') #too many open files xd,yd = dev_set_iterator.create_batch_matrix(random.sample(dev_set_iterator.files,128)) valid_output = utils.one_hot(predict_(xd)()) valid_acc = 1 - utils.accuracy(valid_test, yd) self._dev_errors.append(valid_acc) del x del y if valid_acc < best_dev_loss: best_dev_loss = valid_acc best_params = copy.deepcopy(all_params) print('!!! epoch %i, validation error of best model %f' % (epoch, valid_acc)) print print "Saving best performance parameters" with open(metadata_tmp_path, 'w') as f: pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)}, f, pickle.HIGHEST_PROTOCOL) if (valid_acc < best_dev_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) if patience <= iteration: done_looping = True break epoch += 1
def train_epoch(self): batch_time = utils.AverageMeter() data_time = utils.AverageMeter() losses = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() self.model.train() self.optim.zero_grad() end = time.time() for batch_idx, (imgs, target, img_files, class_ids) in tqdm.tqdm( enumerate(self.train_loader), total=len(self.train_loader), desc='Train epoch={}, iter={}'.format(self.epoch, self.iteration), ncols=80, leave=False): iteration = batch_idx + self.epoch * len(self.train_loader) data_time.update(time.time() - end) gc.collect() if self.iteration != 0 and (iteration - 1) != self.iteration: continue # for resuming self.iteration = iteration if (self.iteration + 1) % self.interval_validate == 0: self.validate() if self.cuda: imgs, target = imgs.cuda(), target.cuda(async=True) imgs, target = Variable(imgs), Variable(target) output = self.model(imgs) loss = self.criterion(output, target) if np.isnan(float(loss.data[0])): raise ValueError('loss is nan while training') # measure accuracy and record loss prec1, prec5 = utils.accuracy(output.data, target.data, topk=(1, 5)) losses.update(loss.data[0], imgs.size(0)) top1.update(prec1[0], imgs.size(0)) top5.update(prec5[0], imgs.size(0)) self.optim.zero_grad() loss.backward() self.optim.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if self.iteration % self.print_freq == 0: log_str = 'Train: [{0}/{1}/{top1.count:}]\tepoch: {epoch:}\titer: {iteration:}\t' \ 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t' \ 'Loss: {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Prec@1: {top1.val:.3f} ({top1.avg:.3f})\t' \ 'Prec@5: {top5.val:.3f} ({top5.avg:.3f})\tlr {lr:.6f}'.format( batch_idx, len(self.train_loader), epoch=self.epoch, iteration=self.iteration, lr=self.optim.param_groups[0]['lr'], batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) print(log_str) self.print_log(log_str) if self.lr_scheduler is not None: self.lr_scheduler.step() # update lr log_str = 'Train_summary: [{0}/{1}/{top1.count:}]\tepoch: {epoch:}\titer: {iteration:}\t' \ 'Time: {batch_time.avg:.3f}\tData: {data_time.avg:.3f}\t' \ 'Loss: {loss.avg:.4f}\tPrec@1: {top1.avg:.3f}\tPrec@5: {top5.avg:.3f}\tlr {lr:.6f}'.format( batch_idx, len(self.train_loader), epoch=self.epoch, iteration=self.iteration, lr=self.optim.param_groups[0]['lr'], batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) print(log_str) self.print_log(log_str)
res, gt = [], [] for step, pack in enumerate(valloader): img, y, imgp = pack if opt.gpu: img = img.cuda() out = encode(img) out = middle(out) out = decode(out).cpu() res.append(out) gt.append(y) res = torch.cat(res, 0) gt = torch.cat(gt, 0) top1, top2, top3 = utils.accuracy(res, gt, (1,)), utils.accuracy(res, gt, (2,)), utils.accuracy(res, gt, (3,)) print('val acc is {:.4f}, {:.4f}, {:.4f}'.format(top1[0].item(), top2[0].item(), top3[0].item())) with torch.no_grad(): res, gt = [], [] for step, pack in enumerate(testloader): img, y, imgp = pack if opt.gpu: img = img.cuda() out = encode(img) out = middle(out) out = decode(out).cpu() res.append(out) gt.append(y)
def train(train_loader, model, criterion, optimizer, epoch, use_cuda): printflag = False # switch to train mode model.train() torch.set_grad_enabled(True) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() if args.local_rank == 0: bar = Bar('Processing', max=len(train_loader)) show_step = len(train_loader) // 10 prefetcher = data_prefetcher(train_loader) inputs, targets = prefetcher.next() batch_idx = -1 while inputs is not None: # for batch_idx, (inputs, targets) in enumerate(train_loader): batch_idx += 1 batch_size = inputs.size(0) if batch_size < args.train_batch: break # measure data loading time #if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda(async=True) #inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) if (batch_idx) % show_step == 0 and args.local_rank == 0: print_flag = True else: print_flag = False if args.cutmix: if printflag==False: print('using cutmix !') printflag=True inputs, targets_a, targets_b, lam = cutmix_data(inputs, targets, args.cutmix_prob, use_cuda) outputs = model(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) old_loss = loss_func(criterion, outputs) elif args.mixup: if printflag==False: print('using mixup !') printflag=True inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, args.alpha, use_cuda) outputs = model(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) old_loss = loss_func(criterion, outputs) elif args.cutout: if printflag==False: print('using cutout !') printflag=True inputs = cutout_data(inputs, args.cutout_size, use_cuda) outputs = model(inputs) old_loss = criterion(outputs, targets) else: outputs = model(inputs) old_loss = criterion(outputs, targets) # compute gradient and do SGD step optimizer.zero_grad() # loss.backward() with amp.scale_loss(old_loss, optimizer) as loss: loss.backward() if args.el2: optimizer.step(print_flag=print_flag) else: optimizer.step() if batch_idx % args.print_freq == 0: # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) # to_python_float incurs a host<->device sync losses.update(to_python_float(reduced_loss), inputs.size(0)) top1.update(to_python_float(prec1), inputs.size(0)) top5.update(to_python_float(prec5), inputs.size(0)) torch.cuda.synchronize() # measure elapsed time batch_time.update((time.time() - end) / args.print_freq) end = time.time() if args.local_rank == 0: # plot progress bar.suffix = '({batch}/{size}) | Batch: {bt:.3f}s | Total: {total:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.val, total=bar.elapsed_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() if (batch_idx) % show_step == 0 and args.local_rank == 0: print('E%d' % (epoch) + bar.suffix) inputs, targets = prefetcher.next() if args.local_rank == 0: bar.finish() return (losses.avg, top1.avg)
def test(epoch, device, test_data_loader, model, E_model, G1_model, G2_model, D1_model, D2_model, test_file, num_class): criterion = nn.CrossEntropyLoss() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() OR_top1 = AverageMeter() OR_top5 = AverageMeter() model.eval() end = time.time() bar = Bar('Processing', max=len(test_data_loader)) #ratio_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] ratio_list = [0.5] num_total_correct = torch.zeros(10) num_total = torch.zeros(10) for i_batch, (inputs, targets, dists) in enumerate(test_data_loader): data_time.update(time.time() - end) #start = time.time() #inputs = inputs.cuda() #label_batched = targets.cuda() #dists = dists.cuda() inputs = inputs.to(device) label_batched = targets.to(device) dists = dists.to(device) with torch.no_grad(): _, data_batched = model(inputs, dists) length_full = data_batched.size(1) #data_batched = data_batched.permute(1,0,2) # B*L*D -> L*B*D for ratio in ratio_list: X_partial, length_partial = vdpro.sample_data( data_batched, length_full, ratio) max_len_partial = length_partial # temporal pooling #X_partial = X_partial.permute(0, 2, 1) #X_partial = F.avg_pool1d(X_partial, max_len_partial, stride=1) #X_partial = torch.squeeze(X_partial, dim=2) #X_partial = util.norm_data(X_partial) # forward z_sample = E_model(X_partial) progress_label = vdpro.GetProgressLabel(1.0) X_gen_full = G2_model( z_sample, progress_label) # generate fake full videos D_real_score1 = D1_model(X_gen_full) output1 = D_real_score1 # forward again D_real_score2 = D1_model(X_partial) output2 = D_real_score2 OR_outputs = output2 #torch.mean(output1, output2) outputs = (output1 + output2) / 2 #torch.mean(output1, output2) loss = criterion(outputs, label_batched) max_value, max_index = torch.max(outputs.data, 1) prec1, prec5 = accuracy(outputs.data, label_batched.data, topk=(1, 5)) OR_prec1, OR_prec5 = accuracy(OR_outputs.data, label_batched.data, topk=(1, 5)) losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) OR_top1.update(OR_prec1[0], inputs.size(0)) OR_top5.update(OR_prec5[0], inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f} | OR_top1: {OR_top1: .4f} | OR_top5: {OR_top5: .4f}'.format( batch=i_batch + 1, size=len(test_data_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, OR_top1=OR_top1.avg, OR_top5=OR_top5.avg, ) bar.next() bar.finish() #model.train() return (losses.avg, top1.avg)
x, y = utils.abrir_dados_wine('./bases/wine/wine.data') n = x.shape[0] m = x.shape[1] # a) O numero de amostras de treinamento tal que a diferenca entre o erro # assintotico e a taxa de erro seja menor ou igual a 2%. Use as estimativas do # Vizinho Mais Proximo e a metrica de acuracia. print '\nLETRA A' acc, contingencia, pr = classif_regres.kfold_cross_validation( x, y, n, utils.accuracy, classif_regres.knn, 1) pc = 1 - acc yhat = classif_regres.knn(x, y, x, 1) acc = utils.accuracy(y, yhat) pr = 1 - acc pinf = (pc + pr) / 2 # pinf = 0.1151 print 'Taxa de erro assimptotica = ' + str(pinf) print 'Diferenca entre erro assimptotico e taxa de erro usando todas as amostras eh de {:3.3f}%'.format(100*(pc-pr/(2*pinf))) # deltaf = np.zeros(n) # rpt = 10 # for k in range(0, rpt): # delta = np.array([]) # for tam in range(1, n + 1):
def test(val_loader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() # torch.set_grad_enabled(False) end = time.time() if args.local_rank == 0: bar = Bar('Processing', max=len(val_loader)) prefetcher = data_prefetcher(val_loader) inputs, targets = prefetcher.next() batch_idx = -1 while inputs is not None: # for batch_idx, (inputs, targets) in enumerate(val_loader): batch_idx += 1 #if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda() #inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets) # compute output with torch.no_grad(): outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) # to_python_float incurs a host<->device sync losses.update(to_python_float(reduced_loss), inputs.size(0)) top1.update(to_python_float(prec1), inputs.size(0)) top5.update(to_python_float(prec5), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress if args.local_rank == 0: bar.suffix = 'Valid({batch}/{size}) | Batch: {bt:.3f}s | Total: {total:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), bt=batch_time.avg, total=bar.elapsed_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() inputs, targets = prefetcher.next() if args.local_rank == 0: print(bar.suffix) bar.finish() return (losses.avg, top1.avg)
import numpy.matlib x, y = utils.abrir_dados_iris('./bases/iris/iris.data') for k in range(0, 4): print '\nInitializing test {:d}'.format(k+1) ytypes = np.union1d(y, y) new_y = np.zeros((x.shape[0], ytypes.size)) for i in range(0, x.shape[0]): new_y[i, np.where(np.reshape( numpy.matlib.repmat(y[i], 1, ytypes.size), (1, -1)) == ytypes)[1][0]] = 1 ind_rand = np.arange(0, y.shape[0]) np.random.shuffle(ind_rand) # indices em ordem aleatoria. ind_train = ind_rand[0:.75 * y.shape[0]] ind_test = ind_rand[.75 * y.shape[0]:] mlp = MultiLayerPerceptron() mlp.train(x[ind_train, :], new_y[ind_train, :], 5, eta=0.01, alpha=0.7, MAX_EPOCH=1000) yhat = mlp.activate(x[ind_test, :]) yhat = ytypes[np.argmax(yhat, 1)] acc = utils.accuracy(y[ind_test, :], yhat) print 'Acuracia = {:3.3f}'.format(100 * acc)
def train(train_queue, valid_queue, model, lr, architect, criterion, optimizer, num_classes): """ :param train_queue: Data loader that randomly picks the samples in the Dataset, as defined in the previous procedure :param valid_queue: Data loader that randomly picks the samples in the Dataset, as defined in the previous procedure :param model: Model of the network :param criterion: Criterion(Function over which the loss of the model shall be computed) :param optimizer: weights optimizer :param lr: learning rate :return: train_acc(train accuracy), train_obj(Object used to compute the train accuracy) """ global CLASSES_WINE, n_epoch objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() manual_report_freq = 2 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input.cuda() target.cuda() input = Variable(input, requires_grad=False).cuda() target = Variable(target, requires_grad=False).cuda(async=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = Variable(input_search, requires_grad=False).cuda() target_search = Variable(target_search, requires_grad=False).cuda(async=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, num_classes // 2)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % manual_report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def evaluate(predictions, splitassignments, labels, roc_fname=None, prc_fname=None): """Evaluate prediction results """ res_str = "" cv = 1 if splitassignments!=None: for split in splitassignments: if split+1>cv: cv=int(split+1) if cv>1: res_str = "Evaluating on %i examples in %i splits\n" % (len(labels),cv) else: res_str = "Evaluating on %i examples\n" % len(labels) output_splits = cv* [[]] label_splits = cv* [[]] for i in xrange(cv): label_splits[i]=[] output_splits[i]=[] for i in xrange(0,len(labels)): if cv>1: split=int(splitassignments[i]) else: split=0 output_splits[split].append(predictions[i]) label_splits[split].append(labels[i]) error = [] sum_accuracy = 0.0 sum_roc = 0.0 sum_prc = 0.0 for split in xrange(cv): res_str += 'Split %d\n' % (split+1) LTE = label_splits[split] ; svmout = output_splits[split] numpos=0 for l in LTE: if l==1: numpos+=1 istwoclass = numpos>0 and numpos<len(LTE) res_str += ' number of positive examples = %i\n' % numpos res_str += ' number of negative examples = %i\n' % (len(LTE)-numpos) if istwoclass: auROC = calcroc(svmout,LTE) res_str += ' Area under ROC curve = %2.1f %%\n' % (100.0*auROC) sum_roc += auROC if roc_fname!=None: if split!=cv-1: plots.plotroc(svmout, LTE, split==cv-1, None, "ROC curve of SVM, split %i" % (split+1)) else: plots.plotroc(svmout, LTE, split==cv-1, roc_fname, "ROC curve of SVM, split %i" % (split+1)) auPRC = calcprc(svmout,LTE) res_str += ' Area under PRC curve = %2.1f %%\n' % (100.0*auPRC) sum_prc += auPRC if prc_fname!=None: if split!=cv-1: plots.plotprc(svmout, LTE, None, "PRC curve of SVM, split %i" % (split+1)) else: plots.plotprc(svmout, LTE, prc_fname, "PRC curve of SVM, split %i" % (split+1)) acc = accuracy(svmout, LTE) res_str += ' accuracy (at threshold 0) = %2.1f %% \n' % (100.0*acc) sum_accuracy += acc numpos=0 for l in labels: if l==1: numpos+=1 mean_roc = sum_roc/cv mean_prc = sum_prc/cv mean_acc = sum_accuracy/cv res_str += 'Averages\n' res_str += ' number of positive examples = %i\n' % round(numpos/cv) res_str += ' number of negative examples = %i\n' % round((len(labels)-numpos)/cv) res_str += ' Area under ROC curve = %2.1f %%\n' % (100.0*mean_roc) res_str += ' Area under PRC curve = %2.1f %%\n' % (100.0*mean_prc) res_str += ' accuracy (at threshold 0) = %2.1f %% \n' % (100.0*mean_acc) return (res_str,mean_roc,mean_prc,mean_acc)
for x_shared, x_chunk_eval in zip(xs_shared, xs_chunk_eval): x_shared.set_value(x_chunk_eval) outputs_chunk = [] for b in xrange(num_batches_chunk_eval): out = compute_output(b) outputs_chunk.append(out) outputs_chunk = np.vstack(outputs_chunk) outputs_chunk = outputs_chunk[:chunk_length_eval] # truncate to the right length outputs.append(outputs_chunk) outputs = np.vstack(outputs) loss = utils.log_loss(outputs, labels) acc = utils.accuracy(outputs, labels) print " loss:\t%.6f" % loss print " acc:\t%.2f%%" % (acc * 100) print losses.append(loss) del outputs now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(config.num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c")