def training(params): # TRIAL_BUDGET is obtained from nni for i in range(params["TRIAL_BUDGET"]): if i == 0: # in first training round no init_model is available booster = lgb.train(params, feval=my_accuracy, train_set=train, valid_sets=val_1, early_stopping_rounds=params["early_stopping_rounds"], num_boost_round=params["num_boost_round"]) # obtain validation score on val_2 preds = booster.predict(val_2.data) score = accuracy_score(val_2.label.astype("int"), np.argmax(preds, axis=1)) nni.report_intermediate_result(score) else: booster = lgb.train(params, feval= my_accuracy, train_set = train, valid_sets=val_1, init_model=booster, early_stopping_rounds=params["early_stopping_rounds"], num_boost_round=params["num_boost_round"]) # obtain validation score on val_2 preds = booster.predict(val_2.data) score = accuracy_score(val_2.label.astype("int"), np.argmax(preds, axis=1)) nni.report_intermediate_result(score) nni.report_final_result(score)
def on_epoch_end(self, epoch, logs={}): """ Run on end of each epoch """ LOG.debug(logs) # Should this be val_acc or val_accuracy? Seems inconsistent behavior of Keras? nni.report_intermediate_result(logs["val_accuracy"])
def main(params): ''' Main function, build mnist network, run and send result to NNI. ''' # Import data mnist = input_data.read_data_sets(params['data_dir'], one_hot=True) print('Mnist download data down.') logger.debug('Mnist download data down.') # Create the model # Build the graph for the deep net mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'], channel_2_num=params['channel_2_num'], conv_size=params['conv_size'], hidden_size=params['hidden_size'], pool_size=params['pool_size'], learning_rate=params['learning_rate']) mnist_network.build_network() logger.debug('Mnist build network done.') # Write log graph_location = tempfile.mkdtemp() logger.debug('Saving graph to: %s', graph_location) train_writer = tf.summary.FileWriter(graph_location) train_writer.add_graph(tf.get_default_graph()) test_acc = 0.0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(params['batch_num']): batch = mnist.train.next_batch(params['batch_size']) mnist_network.train_step.run( feed_dict={ mnist_network.images: batch[0], mnist_network.labels: batch[1], mnist_network.keep_prob: 1 - params['dropout_rate'] }) if i % 100 == 0: test_acc = mnist_network.accuracy.eval( feed_dict={ mnist_network.images: mnist.test.images, mnist_network.labels: mnist.test.labels, mnist_network.keep_prob: 1.0 }) nni.report_intermediate_result(test_acc) logger.debug('test accuracy %g', test_acc) logger.debug('Pipe send intermediate result done.') test_acc = mnist_network.accuracy.eval( feed_dict={ mnist_network.images: mnist.test.images, mnist_network.labels: mnist.test.labels, mnist_network.keep_prob: 1.0 }) nni.report_final_result(test_acc) logger.debug('Final result is %g', test_acc) logger.debug('Send final result done.')
def fit(self, train_loader, validation_loader): for e in range(self.config.n_epochs): if self.config.verbose: lr = self.optimizer.param_groups[0]['lr'] timestamp = datetime.datetime.now().utcnow().isoformat() self.log(f'\n{timestamp}\nLR: {lr}') t = time.time() summary_loss = self.train_one_epoch(train_loader) self.log( f'[RESULT]: Train. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}') self.save(f'{self.base_dir}/last-checkpoint.bin') t = time.time() summary_loss = self.validation(validation_loader) self.log( f'[RESULT]: Val. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}') nni.report_intermediate_result(summary_loss.avg) # logger.debug(summary_loss.avg) if summary_loss.avg < self.best_summary_loss: self.best_summary_loss = summary_loss.avg self.model.eval() self.save(f'{self.base_dir}/best-checkpoint-{str(self.epoch).zfill(3)}epoch.bin') # for path in sorted(glob(f'{self.base_dir}/best-checkpoint-*epoch.bin'))[:-3]: # os.remove(path) if self.config.validation_scheduler: self.scheduler.step(metrics=summary_loss.avg) self.epoch += 1 nni.report_final_result(summary_loss.avg)
def fitF1(self, batched_X_train, batched_y_train): for epoch in range(self.EPOCHS): preds = [] truePreds = [] for batch_idx, (X_batch, y_batch) in enumerate( zip(batched_X_train, batched_y_train)): var_X_batch = Variable( torch.nn.utils.rnn.pad_sequence([ self.vectors[X] for X in X_batch ]).permute(1, 0, 2)).float().to(self.device) var_y_batch = Variable(torch.from_numpy(y_batch)).float().to( self.device) self.optimizer.zero_grad() output = self.model(var_X_batch) loss = self.error(output, var_y_batch) loss.backward() self.optimizer.step() preds = preds + [ round(float(x)) for X in output.data for x in X ] truePreds = truePreds + [ round(float(x)) for X in var_y_batch for x in X ] if batch_idx % 50 == 0: nni.report_intermediate_result(f1_score(truePreds, preds)) del var_X_batch del var_y_batch del loss del output torch.cuda.empty_cache()
def evaluate_mlp(agent, env, max_steps, use_nni=False, report_avg=None, eval_repeat=1): print("Evaluating agent over {} episodes".format(eval_repeat)) evaluation_returns = [] for _ in range(eval_repeat): state = env.reset() episode_reward = 0. for _ in range(max_steps): with torch.no_grad(): action, _, _, _ = agent.act(state, True) next_state, reward, done, _ = env.step(action) state = next_state episode_reward += reward if done: # currently all situations end with a done break evaluation_returns.append(episode_reward) eval_avg = sum(evaluation_returns) / len(evaluation_returns) print("Ave. evaluation return =", eval_avg) if use_nni: if eval_repeat == 1: nni.report_intermediate_result(eval_avg) elif eval_repeat > 1 and report_avg is not None: metric = (report_avg + eval_avg) / 2 nni.report_final_result(metric) return eval_avg
def run_train(epochs, model, train_iterator, valid_iterator, optimizer, criterion, model_type): best_valid_loss = float('inf') for epoch in range(epochs): # train the model train_loss, train_acc = train(model, train_iterator, optimizer, criterion) # evaluate the model valid_loss, valid_acc = evaluate(model, valid_iterator, criterion) # save the best model if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), 'saved_weights' + '_' + model_type + '.pt') print( f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%' ) print( f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc * 100:.2f}%' ) nni.report_intermediate_result(valid_acc)
def after_epoch(self, state): epoch = int(state['epoch'].numpy()) val_metric = self.learner.metric_history.get_metric( self.metric, "eval", epoch, epoch) if val_metric: import nni nni.report_intermediate_result(val_metric)
def execute_runner(runners, is_nni=False): train_losses = [] train_accuracies = [] test_intermediate_results = [] test_losses = [] test_accuracies = [] for idx_r, runner in enumerate(runners): rs = runner.run(verbose=2) train_losses.append(rs[0]) train_accuracies.append(rs[1]) test_intermediate_results.append(rs[2]) test_losses.append(rs[3]["loss"]) test_accuracies.append(rs[3]["acc"]) '''if idx_r == 0: plot_graphs(rs)''' if is_nni: mean_intermediate_res = np.mean(test_intermediate_results, axis=0) for i in mean_intermediate_res: nni.report_intermediate_result(i) nni.report_final_result(np.mean(test_accuracies)) runners[-1].logger.info("*" * 15 + "Final accuracy train: %3.4f" % np.mean(train_accuracies)) runners[-1].logger.info("*" * 15 + "Std accuracy train: %3.4f" % np.std(train_accuracies)) runners[-1].logger.info("*" * 15 + "Final accuracy test: %3.4f" % np.mean(test_accuracies)) runners[-1].logger.info("*" * 15 + "Std accuracy train: %3.4f" % np.std(train_accuracies)) runners[-1].logger.info("Finished") return
def train(train_loader, dev_loader, device, epochs): ''' params = {'num_conv_layers': 2, 'filter1_size': 3, 'filter2_size': 4, 'filter3_size': 3, 'num_filters1': 5, 'num_filters2': 10, 'num_filters3': 10, 'max_pool_size': 2, 'hidden1_size': 1024, 'hidden2_size': 512, 'hidden3_size': 128, 'learning_rate': 0.0001, 'weight_decay': 1e-05} ''' params = nni.get_next_parameter() loss_function = nn.CrossEntropyLoss() losses = [] model = GCommandClassifier(device, params, torch.rand([100, 1, 161, 101])) model.to(device) optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'], weight_decay=params['weight_decay']) prev_dev_acc = 0 dev_acc = 0 for epoch in range(epochs): print('epoch:', epoch + 1) loss = train_epoch(train_loader, model, loss_function, optimizer, device) losses.append(loss) acc = evaluate(model, train_loader, device) print('train accuracy:', acc) prev_dev_acc = dev_acc dev_acc = evaluate(model, dev_loader, device) print('validation accuracy:', dev_acc) nni.report_intermediate_result(dev_acc) # early stopping if epoch >= 10 and dev_acc < 0.6: break nni.report_final_result(dev_acc) return model
def main(): data_dir = '/tmp/tensorflow/mnist/input_data' mnist = input_data.read_data_sets(data_dir, one_hot=True) logger.debug('Mnist download data down.') mnist_network = MnistNetwork() mnist_network.build_network() logger.debug('Mnist build network done.') graph_location = tempfile.mkdtemp() logger.debug('Saving graph to: %s' % graph_location) train_writer = tf.summary.FileWriter(graph_location) train_writer.add_graph(tf.get_default_graph()) test_acc = 0.0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) batch_num = 200 for i in range(batch_num): batch_size = nni.choice(50, 250, 500, name='batch_size') batch = mnist.train.next_batch(batch_size) dropout_rate = nni.choice(1, 5, name='dropout_rate') mnist_network.train_step.run(feed_dict={mnist_network.x: batch[ 0], mnist_network.y: batch[1], mnist_network.keep_prob: dropout_rate}) if i % 100 == 0: test_acc = mnist_network.accuracy.eval(feed_dict={ mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0}) nni.report_intermediate_result(test_acc) test_acc = mnist_network.accuracy.eval(feed_dict={mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0}) nni.report_final_result(test_acc)
def on_epoch_end(self, epoch): # TODO: find a way to retreive metrics or evaluate model on my own (meters = AverageMeterGroup() ...), see https://nni.readthedocs.io/en/latest/_modules/nni/nas/pytorch/enas/trainer.html meters = ... if epoch >= self.epochs: nni.report_final_result(meters) else: nni.report_intermediate_result(meters)
def train(self, show_plot=False, apply_nni=False, validate_rate=10): self._init_loss_and_acc_vec() # calc number of iteration in current epoch len_data = len(self._train_loader) last_epoch = list(range(self._epochs))[-1] for epoch_num in range(self._epochs): # calc number of iteration in current epoch for batch_index, (sequence, label, missing_values) in enumerate(self._train_loader): sequence, label, missing_values = self._to_gpu(sequence, label, missing_values) # print progress self._model.train() output = self._model(sequence) # calc output of current model on the current batch """ print("label:") print(label.shape) print("seq:") print(sequence.shape) print("output:") print(output.shape) print(output.squeeze(dim=2).shape) print(label.float().squeeze(dim=1).shape) """ loss = self._loss_func(output.squeeze(dim=self._dim), label.float(), missing_values) # calculate loss # print(loss) loss.backward() # back propagation self._model.optimizer.step() # update weights self._model.zero_grad() # zero gradients if PRINT_PROGRESS: self._print_progress(batch_index, len_data, job=TRAIN_JOB) self._train_label_and_output = (label, output) # validate and print progress # /---------------------- FOR NNI ------------------------- if epoch_num % validate_rate == 0: # validate on dev set anyway save_true_and_pred = True self._validate(self._dev_loader, save_true_and_pred, job=DEV_JOB) torch.cuda.empty_cache() # report dev result as am intermediate result if apply_nni: test_loss = self._print_dev_loss nni.report_intermediate_result(test_loss) # validate on train set as well and display results else: torch.cuda.empty_cache() self._validate(self._train_valid_loader, save_true_and_pred, job=TRAIN_JOB) self._print_info(jobs=[TRAIN_JOB, DEV_JOB]) if self._early_stop and epoch_num > 30 and self._print_dev_loss > np.max(self._loss_vec_dev[-30:]): break # report final results if apply_nni: test_loss = np.max(self._print_dev_loss) nni.report_final_result(test_loss) if show_plot: self._plot_acc_dev()
def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs): logs = kwargs.get('logs') if self.hp_metric in logs.keys(): metric = logs.get(self.hp_metric) METRICS.append(metric) nni.report_intermediate_result(metric)
def on_epoch_end(self, epoch, logs={}): ''' Run on end of each epoch ''' LOG.debug(logs) nni.report_intermediate_result(logs["val_categorical_accuracy"])
def main(args, experiment_id, trial_id): use_cuda = not args['no_cuda'] and torch.cuda.is_available() torch.set_num_threads(4) torch.manual_seed(args['seed']) device = torch.device("cuda" if use_cuda else "cpu") batch_size = args['batch_size'] hidden_size = args['hidden_size'] train_loader, test_loader = data_loader(batch_size) model = Net(hidden_size=hidden_size).to(device) optimizer = optim.SGD(model.parameters(), lr=args['lr'], momentum=args['momentum']) for epoch in range(1, args['epochs'] + 1): train(args, model, device, train_loader, optimizer, epoch) test_acc = test(args, model, device, test_loader) # report intermediate result nni.report_intermediate_result(test_acc) logger.debug('test accuracy %g', test_acc) logger.debug('Pipe send intermediate result done.') torch.save( model.state_dict(), f'{os.path.join(os.getcwd())}/model_outputs/{experiment_id}-{trial_id}-model.pth' ) test_acc = test(args, model, device, test_loader) # report final result nni.report_final_result(test_acc) logger.debug('Final result is %g', test_acc) output_logger.info(f'{experiment_id}|{trial_id}|{params}|{test_acc:0.6f}') logger.debug('Send final result done.')
def fit(self, batched_X_train, batched_y_train): for epoch in range(self.EPOCHS): correct = 0 for batch_idx, (X_batch, y_batch) in enumerate( zip(batched_X_train, batched_y_train)): var_X_batch = Variable( torch.nn.utils.rnn.pad_sequence([ self.vectors[X] for X in X_batch ]).permute(1, 0, 2)).float().to(self.device) var_y_batch = Variable(torch.from_numpy(y_batch)).float().to( self.device) self.optimizer.zero_grad() output = self.model(var_X_batch) loss = self.error(output, var_y_batch) loss.backward() self.optimizer.step() # Total correct predictions predicted = output.data.round() correct += (predicted == var_y_batch).sum() #print(correct) if batch_idx % 50 == 0: nni.report_intermediate_result( float(correct * 100) / float(6 * BATCH_SIZE * (batch_idx + 1))) #print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format( # epoch, batch_idx*len(X_batch), len(batched_X_train), 100.*batch_idx / len(batched_X_train), loss.data, float(correct*100) / float(6 * BATCH_SIZE*(batch_idx+1)))) del var_X_batch del var_y_batch del loss del output del predicted torch.cuda.empty_cache()
async def query_trial_metrics(self): start_t = getattr(self, "_trial_start_time", None) if start_t is None: logger.info(f"Trial({self.cfg_hash}) is not started!") else: logger.info( f"Trial({self.cfg_hash}) has started {(datetime.now() - start_t).total_seconds()} secs" ) curr_latest_epoch, intermediate_metrics, final_val = self.metrics_reporter.query_metrics( self.latest_epoch) if curr_latest_epoch is None: return if curr_latest_epoch is not None and intermediate_metrics is not None: for metrics in intermediate_metrics: logger.info(f"report_intermediate_result:{metrics}") if os.getenv(ENV_KEY_TRIAL_IN_NNI, None): nni.report_intermediate_result( metrics) # 目前测试阶段,还不能调用 nni 的接口 self.latest_epoch = curr_latest_epoch if final_val is not None and self.final_val is None: # 第一次读取到 final val self.final_val = final_val logger.info(f"report_final_result:{self.final_val}") if os.getenv(ENV_KEY_TRIAL_IN_NNI, None): nni.report_final_result(self.final_val) self._trial_finished_future.set_result(self.final_val)
def run(params): """ Distributed Synchronous SGD Example """ rank = dist.get_rank() torch.manual_seed(1234) train_set, bsz = partition_dataset() model = Net() model = model optimizer = optim.SGD(model.parameters(), lr=params['learning_rate'], momentum=params['momentum']) num_batches = ceil(len(train_set.dataset) / float(bsz)) total_loss = 0.0 for epoch in range(3): epoch_loss = 0.0 for data, target in train_set: data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) epoch_loss += loss.item() loss.backward() average_gradients(model) optimizer.step() #logger.debug('Rank: ', rank, ', epoch: ', epoch, ': ', epoch_loss / num_batches) if rank == 0: nni.report_intermediate_result(epoch_loss / num_batches) total_loss += (epoch_loss / num_batches) total_loss /= 3 logger.debug('Final loss: {}'.format(total_loss)) if rank == 0: nni.report_final_result(total_loss)
def __call__(self): while self.cur_epoch < self.hp.max_epoch: try: if self.optimizer_config.lr_update: utils.adjust_learning_rate(self.optimizer, self.cur_epoch, self.hp.max_epoch, self.optimizer_config.lr) else: utils.set_learning_rate(self.optimizer, self.hp.lr, False) _ = self.train_epoch_dataset_first(self.cur_epoch, 'trn') except KeyboardInterrupt: self.handle_exception() print('Exit control menu.') # nni.report_intermediate_result(0.5) # if self.cur_epoch % self.config.val_interval == 0: try: ret_val, val_loss, val_eval = self.train_epoch_dataset_first( self.cur_epoch, 'val') ret_tst, tst_loss, tst_eval = self.train_epoch_dataset_first( self.cur_epoch, 'tst') if self.hp.evaluation == 'loss': self.undec = self.recoder.push_loss( self.cur_epoch, self.undec, val_loss, ret_tst) if self.hp.nni: nni.report_intermediate_result(tst_loss) elif self.hp.evaluation == 'acc': self.undec = self.recoder.push_eval( self.cur_epoch, self.undec, val_eval, ret_tst) if self.hp.nni: nni.report_intermediate_result(tst_eval) else: raise ValueError('Unknown evaluation.') if self.undec == 0: self.save_checkpoint() except KeyboardInterrupt: print(f'Skipping val and test for ctrl + c detected.') self.cur_epoch += 1 if self.undec >= self.hp.stop_val_dec: print( 'Val_loss hasn\'t decrease in the last [{}] epoches, stop training early.' .format(self.hp.stop_val_dec)) break if self.hp.evaluation == 'loss': fin_epoch, fin_loss = self.recoder.pop_via_loss() if self.hp.nni: nni.report_final_result(fin_loss) print( f'[{self.cur_epoch}] epoches complete, output results = {fin_loss} at epoch [{fin_epoch}], seed = {self.hp.seed}.' ) elif self.hp.evaluation == 'acc': fin_epoch, fin_eval = self.recoder.pop_via_eval() if self.hp.nni: nni.report_final_result(fin_eval) print( f'[{self.cur_epoch}] epoches complete, output results = {fin_eval} at epoch [{fin_epoch}], seed = {self.hp.seed}.' ) else: raise ValueError('Unknown evaluation.') self.evaluate()
def test(final=False): model.eval() z = model(data.x, data.edge_index) evaluator = MulticlassEvaluator() if args.dataset == 'WikiCS': accs = [] for i in range(20): acc = log_regression(z, dataset, evaluator, split=f'wikics:{i}', num_epochs=800)['acc'] accs.append(acc) acc = sum(accs) / len(accs) else: acc = log_regression(z, dataset, evaluator, split='rand:0.1', num_epochs=3000, preload_split=split)['acc'] if final and use_nni: nni.report_final_result(acc) elif use_nni: nni.report_intermediate_result(acc) return acc
def _validate(self): all_val_outputs = {idx: [] for idx in range(self.n_model)} for batch_idx, multi_model_batch in enumerate( zip(*self._val_dataloaders)): xs = [] ys = [] for idx, batch in enumerate(multi_model_batch): x, y = self.training_step_before_model(batch, batch_idx, f'cuda:{idx}') xs.append(x) ys.append(y) if len(ys) != len(xs): raise ValueError('len(ys) should be equal to len(xs)') y_hats = self.multi_model(*xs) for output_idx, yhat in enumerate(y_hats): if len(ys) == len(y_hats): acc = self.validation_step_after_model( xs[output_idx], ys[output_idx], yhat) elif len(ys) == 1: acc = self.validation_step_after_model( xs[0], ys[0].to(yhat.get_device()), yhat) else: raise ValueError( 'len(ys) should be either 1 or len(y_hats)') all_val_outputs[output_idx].append(acc) report_acc = {} for idx in all_val_outputs: avg_acc = np.mean([x['val_acc'] for x in all_val_outputs[idx]]).item() report_acc[self.kwargs['model_kwargs'][idx]['model_id']] = avg_acc nni.report_intermediate_result(report_acc) return report_acc
def valid(epoch): net.eval() stats = adl.Accumulator() with torch.no_grad(): for inputs, targets in validloader: inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) stats["loss_sum"] += loss.item() * targets.size(0) _, predicted = outputs.max(1) stats["total"] += targets.size(0) stats["correct"] += predicted.eq(targets).sum().item() with stats.synchronized(): stats["loss_avg"] = stats["loss_sum"] / stats["total"] stats["accuracy"] = stats["correct"] / stats["total"] writer.add_scalar("Loss/Valid", stats["loss_avg"], epoch) writer.add_scalar("Accuracy/Valid", stats["accuracy"], epoch) if adaptdl.env.replica_rank() == 0: nni.report_intermediate_result(stats["accuracy"]) print("Valid:", stats) return stats["accuracy"]
def on_epoch_end(self, epoch, logs=None): """Reports intermediate accuracy to NNI framework""" # TensorFlow 2.0 API reference claims the key is `val_acc`, but in fact it's `val_accuracy` if 'val_acc' in logs: nni.report_intermediate_result(logs['val_acc']) else: nni.report_intermediate_result(logs['val_accuracy'])
def evaluate_model(model_cls): # "model_cls" is a class, need to instantiate model = model_cls() device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) transf = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_loader = DataLoader(MNIST('data/mnist', download=True, transform=transf), batch_size=64, shuffle=True) test_loader = DataLoader(MNIST('data/mnist', download=True, train=False, transform=transf), batch_size=64) for epoch in range(3): # train the model for one epoch train_epoch(model, device, train_loader, optimizer, epoch) # test the model for one epoch accuracy = test_epoch(model, device, test_loader) # call report intermediate result. Result can be float or dict nni.report_intermediate_result(accuracy) # report final test result nni.report_final_result(accuracy)
def _log(self, logs, iterator=None): if self.epoch is not None: logs["epoch"] = self.epoch if self.global_step is None: # when logging evaluation metrics without training self.global_step = 0 if self.tb_writer: for k, v in logs.items(): if isinstance(v, (int, float)): self.tb_writer.add_scalar(k, v, self.global_step) else: logger.warning( "Trainer is attempting to log a value of " '"%s" of type %s for key "%s" as a scalar. ' "This invocation of Tensorboard's writer.add_scalar() " "is incorrect so we dropped this attribute.", v, type(v), k, ) self.tb_writer.flush() output = {**logs, **{"step": self.global_step}} #### nni if (nni is not None) and ('eval_token-f1' in logs): nni.report_intermediate_result(logs['eval_token-f1']) #### if 'eval_loss' in output.keys(): self.history.append(output) if iterator is not None: iterator.write(output) else: logger.info(output)
def main(): # global args, config # # args = parser.parse_args() # # with open(args.config) as rPtr: # config = EasyDict(yaml.load(rPtr)) # # config.save_path = os.path.dirname(args.config) # # # Random seed # torch.manual_seed(config.seed) # torch.cuda.manual_seed(config.seed) # np.random.seed(config.seed) # random.seed(config.seed) # Datasets train_transform = transforms.Compose([ transforms.RandomCrop((32, 32), padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)) ]) val_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)) ]) trainset = Datasets.CIFAR10(root='data', train=True, download=True, transform=train_transform) trainloader = Data.DataLoader(trainset, batch_size=config.batch_size, shuffle=True, num_workers=config.workers) testset = Datasets.CIFAR10(root='data', train=False, download=True, transform=val_transform) testloader = Data.DataLoader(testset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers) # Model model = resnet32() model = model.cuda() # Optimizer criterion = LabelSmoothing(config.label_smoothing) optimizer = optim.SGD(model.parameters(), lr=config.lr_scheduler.base_lr, momentum=config.momentum, weight_decay=config.weight_decay) # LR scheduler lr_scheduler = CosineAnnealing(optimizer, len(trainloader) * config.max_iter) global PCA, Writer PCA = PerClassAccuracy(num_classes=config.num_classes) Writer = SummaryWriter(config.save_path + '/events') BEST_mAP = 0.0 for iter_idx in range(config.max_iter): train(model, iter_idx, criterion, lr_scheduler, optimizer, trainloader) mAP = val(model, iter_idx, criterion, testloader) if mAP > BEST_mAP: BEST_mAP = mAP nni.report_intermediate_result(mAP) nni.report_final_result(BEST_mAP) Writer.close()
def on_epoch_end(self, epoch, logs=None): """ Run on end of each epoch """ if logs is None: logs = dict() logger.debug(logs) nni.report_intermediate_result(logs["acc"])
def export(self, estimator, export_path, checkpoint_path, eval_result, is_the_final_export): import nni result = eval_result["top_1_accuracy"] if is_the_final_export: nni.report_final_result(result) else: nni.report_intermediate_result(result)
def on_validation_epoch_end(self, trainer: Trainer, pl_module): if trainer.global_rank != 0: return if trainer.running_sanity_check: return if trainer.logged_metrics and 'val_ppl' in trainer.logged_metrics: nni.report_intermediate_result(trainer.logged_metrics['val_ppl'])