def training(params):

    # TRIAL_BUDGET is obtained from nni
    for i in range(params["TRIAL_BUDGET"]):

        if i == 0:

            # in first training round no init_model is available
            booster = lgb.train(params, feval=my_accuracy, train_set=train, valid_sets=val_1,
                                early_stopping_rounds=params["early_stopping_rounds"],
                                num_boost_round=params["num_boost_round"])

            # obtain validation score on val_2
            preds = booster.predict(val_2.data)
            score = accuracy_score(val_2.label.astype("int"), np.argmax(preds, axis=1))

            nni.report_intermediate_result(score)

        else:

            booster = lgb.train(params, feval= my_accuracy, train_set = train, valid_sets=val_1,
                                init_model=booster, early_stopping_rounds=params["early_stopping_rounds"],
                                num_boost_round=params["num_boost_round"])

            # obtain validation score on val_2
            preds = booster.predict(val_2.data)
            score = accuracy_score(val_2.label.astype("int"), np.argmax(preds, axis=1))

            nni.report_intermediate_result(score)

    nni.report_final_result(score)
示例#2
0
 def on_epoch_end(self, epoch, logs={}):
     """
     Run on end of each epoch
     """
     LOG.debug(logs)
     # Should this be val_acc or val_accuracy? Seems inconsistent behavior of Keras?
     nni.report_intermediate_result(logs["val_accuracy"])
示例#3
0
文件: mnist.py 项目: zwt233/nni
def main(params):
    '''
    Main function, build mnist network, run and send result to NNI.
    '''
    # Import data
    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
    print('Mnist download data down.')
    logger.debug('Mnist download data down.')

    # Create the model
    # Build the graph for the deep net
    mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
                                 channel_2_num=params['channel_2_num'],
                                 conv_size=params['conv_size'],
                                 hidden_size=params['hidden_size'],
                                 pool_size=params['pool_size'],
                                 learning_rate=params['learning_rate'])
    mnist_network.build_network()
    logger.debug('Mnist build network done.')

    # Write log
    graph_location = tempfile.mkdtemp()
    logger.debug('Saving graph to: %s', graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    test_acc = 0.0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(params['batch_num']):
            batch = mnist.train.next_batch(params['batch_size'])
            mnist_network.train_step.run(
                feed_dict={
                    mnist_network.images: batch[0],
                    mnist_network.labels: batch[1],
                    mnist_network.keep_prob: 1 - params['dropout_rate']
                })

            if i % 100 == 0:
                test_acc = mnist_network.accuracy.eval(
                    feed_dict={
                        mnist_network.images: mnist.test.images,
                        mnist_network.labels: mnist.test.labels,
                        mnist_network.keep_prob: 1.0
                    })

                nni.report_intermediate_result(test_acc)
                logger.debug('test accuracy %g', test_acc)
                logger.debug('Pipe send intermediate result done.')

        test_acc = mnist_network.accuracy.eval(
            feed_dict={
                mnist_network.images: mnist.test.images,
                mnist_network.labels: mnist.test.labels,
                mnist_network.keep_prob: 1.0
            })

        nni.report_final_result(test_acc)
        logger.debug('Final result is %g', test_acc)
        logger.debug('Send final result done.')
示例#4
0
    def fit(self, train_loader, validation_loader):
        for e in range(self.config.n_epochs):
            if self.config.verbose:
                lr = self.optimizer.param_groups[0]['lr']
                timestamp = datetime.datetime.now().utcnow().isoformat()
                self.log(f'\n{timestamp}\nLR: {lr}')

            t = time.time()
            summary_loss = self.train_one_epoch(train_loader)

            self.log(
                f'[RESULT]: Train. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            self.save(f'{self.base_dir}/last-checkpoint.bin')

            t = time.time()
            summary_loss = self.validation(validation_loader)

            self.log(
                f'[RESULT]: Val. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            nni.report_intermediate_result(summary_loss.avg)
            # logger.debug(summary_loss.avg)
            if summary_loss.avg < self.best_summary_loss:
                self.best_summary_loss = summary_loss.avg
                self.model.eval()
                self.save(f'{self.base_dir}/best-checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                # for path in sorted(glob(f'{self.base_dir}/best-checkpoint-*epoch.bin'))[:-3]:
                #     os.remove(path)

            if self.config.validation_scheduler:
                self.scheduler.step(metrics=summary_loss.avg)

            self.epoch += 1
        nni.report_final_result(summary_loss.avg)
    def fitF1(self, batched_X_train, batched_y_train):
        for epoch in range(self.EPOCHS):
            preds = []
            truePreds = []
            for batch_idx, (X_batch, y_batch) in enumerate(
                    zip(batched_X_train, batched_y_train)):
                var_X_batch = Variable(
                    torch.nn.utils.rnn.pad_sequence([
                        self.vectors[X] for X in X_batch
                    ]).permute(1, 0, 2)).float().to(self.device)
                var_y_batch = Variable(torch.from_numpy(y_batch)).float().to(
                    self.device)
                self.optimizer.zero_grad()
                output = self.model(var_X_batch)
                loss = self.error(output, var_y_batch)
                loss.backward()
                self.optimizer.step()

                preds = preds + [
                    round(float(x)) for X in output.data for x in X
                ]
                truePreds = truePreds + [
                    round(float(x)) for X in var_y_batch for x in X
                ]

                if batch_idx % 50 == 0:
                    nni.report_intermediate_result(f1_score(truePreds, preds))
                del var_X_batch
                del var_y_batch
                del loss
                del output
                torch.cuda.empty_cache()
def evaluate_mlp(agent, env, max_steps, use_nni=False, report_avg=None, eval_repeat=1):
    print("Evaluating agent over {} episodes".format(eval_repeat))
    evaluation_returns = []
    for _ in range(eval_repeat):
        state = env.reset()
        episode_reward = 0.
        for _ in range(max_steps):
            with torch.no_grad():
                action, _, _, _ = agent.act(state, True)
                next_state, reward, done, _ = env.step(action)

                state = next_state
                episode_reward += reward
            if done:  # currently all situations end with a done
                break

        evaluation_returns.append(episode_reward)
    eval_avg = sum(evaluation_returns) / len(evaluation_returns)
    print("Ave. evaluation return =", eval_avg)

    if use_nni:
        if eval_repeat == 1:
            nni.report_intermediate_result(eval_avg)
        elif eval_repeat > 1 and report_avg is not None:
            metric = (report_avg + eval_avg) / 2
            nni.report_final_result(metric)
    return eval_avg
示例#7
0
文件: train_test.py 项目: galhev/NNI
def run_train(epochs, model, train_iterator, valid_iterator, optimizer,
              criterion, model_type):
    best_valid_loss = float('inf')

    for epoch in range(epochs):

        # train the model
        train_loss, train_acc = train(model, train_iterator, optimizer,
                                      criterion)

        # evaluate the model
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

        # save the best model
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(),
                       'saved_weights' + '_' + model_type + '.pt')

        print(
            f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%'
        )
        print(
            f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc * 100:.2f}%'
        )
        nni.report_intermediate_result(valid_acc)
示例#8
0
 def after_epoch(self, state):
     epoch = int(state['epoch'].numpy())
     val_metric = self.learner.metric_history.get_metric(
         self.metric, "eval", epoch, epoch)
     if val_metric:
         import nni
         nni.report_intermediate_result(val_metric)
def execute_runner(runners, is_nni=False):
    train_losses = []
    train_accuracies = []
    test_intermediate_results = []
    test_losses = []
    test_accuracies = []
    for idx_r, runner in enumerate(runners):
        rs = runner.run(verbose=2)
        train_losses.append(rs[0])
        train_accuracies.append(rs[1])
        test_intermediate_results.append(rs[2])
        test_losses.append(rs[3]["loss"])
        test_accuracies.append(rs[3]["acc"])
        '''if idx_r == 0:
            plot_graphs(rs)'''
    if is_nni:
        mean_intermediate_res = np.mean(test_intermediate_results, axis=0)
        for i in mean_intermediate_res:
            nni.report_intermediate_result(i)
        nni.report_final_result(np.mean(test_accuracies))

    runners[-1].logger.info("*" * 15 + "Final accuracy train: %3.4f" %
                            np.mean(train_accuracies))
    runners[-1].logger.info("*" * 15 + "Std accuracy train: %3.4f" %
                            np.std(train_accuracies))
    runners[-1].logger.info("*" * 15 + "Final accuracy test: %3.4f" %
                            np.mean(test_accuracies))
    runners[-1].logger.info("*" * 15 + "Std accuracy train: %3.4f" %
                            np.std(train_accuracies))
    runners[-1].logger.info("Finished")
    return
示例#10
0
def train(train_loader, dev_loader, device, epochs):
    '''
    params = {'num_conv_layers': 2, 'filter1_size': 3, 'filter2_size': 4,
              'filter3_size': 3, 'num_filters1': 5, 'num_filters2': 10,
              'num_filters3': 10, 'max_pool_size': 2, 'hidden1_size': 1024,
              'hidden2_size': 512, 'hidden3_size': 128,
              'learning_rate': 0.0001, 'weight_decay': 1e-05}
    '''
    params = nni.get_next_parameter()
    loss_function = nn.CrossEntropyLoss()
    losses = []
    model = GCommandClassifier(device, params, torch.rand([100, 1, 161, 101]))
    model.to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=params['learning_rate'],
                           weight_decay=params['weight_decay'])
    prev_dev_acc = 0
    dev_acc = 0
    for epoch in range(epochs):
        print('epoch:', epoch + 1)
        loss = train_epoch(train_loader, model, loss_function, optimizer,
                           device)
        losses.append(loss)
        acc = evaluate(model, train_loader, device)
        print('train accuracy:', acc)
        prev_dev_acc = dev_acc
        dev_acc = evaluate(model, dev_loader, device)
        print('validation accuracy:', dev_acc)
        nni.report_intermediate_result(dev_acc)
        # early stopping
        if epoch >= 10 and dev_acc < 0.6:
            break
    nni.report_final_result(dev_acc)
    return model
示例#11
0
def main():
    data_dir = '/tmp/tensorflow/mnist/input_data'
    mnist = input_data.read_data_sets(data_dir, one_hot=True)
    logger.debug('Mnist download data down.')
    mnist_network = MnistNetwork()
    mnist_network.build_network()
    logger.debug('Mnist build network done.')
    graph_location = tempfile.mkdtemp()
    logger.debug('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())
    test_acc = 0.0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        batch_num = 200
        for i in range(batch_num):
            batch_size = nni.choice(50, 250, 500, name='batch_size')
            batch = mnist.train.next_batch(batch_size)
            dropout_rate = nni.choice(1, 5, name='dropout_rate')
            mnist_network.train_step.run(feed_dict={mnist_network.x: batch[
                0], mnist_network.y: batch[1], mnist_network.keep_prob:
                dropout_rate})
            if i % 100 == 0:
                test_acc = mnist_network.accuracy.eval(feed_dict={
                    mnist_network.x: mnist.test.images, mnist_network.y:
                    mnist.test.labels, mnist_network.keep_prob: 1.0})
                nni.report_intermediate_result(test_acc)
        test_acc = mnist_network.accuracy.eval(feed_dict={mnist_network.x:
            mnist.test.images, mnist_network.y: mnist.test.labels,
            mnist_network.keep_prob: 1.0})
        nni.report_final_result(test_acc)
示例#12
0
 def on_epoch_end(self, epoch):
     # TODO: find a way to retreive metrics or evaluate model on my own (meters = AverageMeterGroup() ...), see https://nni.readthedocs.io/en/latest/_modules/nni/nas/pytorch/enas/trainer.html
     meters = ...
     if epoch >= self.epochs:
         nni.report_final_result(meters)
     else:
         nni.report_intermediate_result(meters)
示例#13
0
    def train(self, show_plot=False, apply_nni=False, validate_rate=10):
        self._init_loss_and_acc_vec()
        # calc number of iteration in current epoch
        len_data = len(self._train_loader)
        last_epoch = list(range(self._epochs))[-1]
        for epoch_num in range(self._epochs):
            # calc number of iteration in current epoch
            for batch_index, (sequence, label, missing_values) in enumerate(self._train_loader):
                sequence, label, missing_values = self._to_gpu(sequence, label, missing_values)
                # print progress
                self._model.train()
                output = self._model(sequence)                  # calc output of current model on the current batch
                """
                print("label:")
                print(label.shape)
                print("seq:")
                print(sequence.shape)
                print("output:")
                print(output.shape)
                print(output.squeeze(dim=2).shape)
                print(label.float().squeeze(dim=1).shape)
                """
                loss = self._loss_func(output.squeeze(dim=self._dim), label.float(), missing_values)  # calculate loss
                # print(loss)
                loss.backward()                                 # back propagation
                self._model.optimizer.step()                    # update weights
                self._model.zero_grad()                         # zero gradients

                if PRINT_PROGRESS:
                    self._print_progress(batch_index, len_data, job=TRAIN_JOB)

                self._train_label_and_output = (label, output)
            # validate and print progress

            # /----------------------  FOR NNI  -------------------------
            if epoch_num % validate_rate == 0:
                # validate on dev set anyway
                save_true_and_pred = True
                self._validate(self._dev_loader, save_true_and_pred, job=DEV_JOB)
                torch.cuda.empty_cache()
                # report dev result as am intermediate result
                if apply_nni:
                    test_loss = self._print_dev_loss
                    nni.report_intermediate_result(test_loss)
                # validate on train set as well and display results
                else:
                    torch.cuda.empty_cache()
                    self._validate(self._train_valid_loader, save_true_and_pred, job=TRAIN_JOB)
                    self._print_info(jobs=[TRAIN_JOB, DEV_JOB])

            if self._early_stop and epoch_num > 30 and self._print_dev_loss > np.max(self._loss_vec_dev[-30:]):
                break

        # report final results
        if apply_nni:
            test_loss = np.max(self._print_dev_loss)
            nni.report_final_result(test_loss)

        if show_plot:
            self._plot_acc_dev()
示例#14
0
 def on_log(self, args: TrainingArguments, state: TrainerState,
            control: TrainerControl, **kwargs):
     logs = kwargs.get('logs')
     if self.hp_metric in logs.keys():
         metric = logs.get(self.hp_metric)
         METRICS.append(metric)
         nni.report_intermediate_result(metric)
示例#15
0
    def on_epoch_end(self, epoch, logs={}):
        '''
        Run on end of each epoch
        '''
        LOG.debug(logs)

        nni.report_intermediate_result(logs["val_categorical_accuracy"])
def main(args, experiment_id, trial_id):
    use_cuda = not args['no_cuda'] and torch.cuda.is_available()
    torch.set_num_threads(4)
    torch.manual_seed(args['seed'])
    device = torch.device("cuda" if use_cuda else "cpu")

    batch_size = args['batch_size']
    hidden_size = args['hidden_size']

    train_loader, test_loader = data_loader(batch_size)

    model = Net(hidden_size=hidden_size).to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args['lr'],
                          momentum=args['momentum'])

    for epoch in range(1, args['epochs'] + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test_acc = test(args, model, device, test_loader)

        # report intermediate result
        nni.report_intermediate_result(test_acc)
        logger.debug('test accuracy %g', test_acc)
        logger.debug('Pipe send intermediate result done.')
        torch.save(
            model.state_dict(),
            f'{os.path.join(os.getcwd())}/model_outputs/{experiment_id}-{trial_id}-model.pth'
        )

    test_acc = test(args, model, device, test_loader)
    # report final result
    nni.report_final_result(test_acc)
    logger.debug('Final result is %g', test_acc)
    output_logger.info(f'{experiment_id}|{trial_id}|{params}|{test_acc:0.6f}')
    logger.debug('Send final result done.')
    def fit(self, batched_X_train, batched_y_train):
        for epoch in range(self.EPOCHS):
            correct = 0
            for batch_idx, (X_batch, y_batch) in enumerate(
                    zip(batched_X_train, batched_y_train)):
                var_X_batch = Variable(
                    torch.nn.utils.rnn.pad_sequence([
                        self.vectors[X] for X in X_batch
                    ]).permute(1, 0, 2)).float().to(self.device)
                var_y_batch = Variable(torch.from_numpy(y_batch)).float().to(
                    self.device)
                self.optimizer.zero_grad()
                output = self.model(var_X_batch)
                loss = self.error(output, var_y_batch)
                loss.backward()
                self.optimizer.step()

                # Total correct predictions
                predicted = output.data.round()
                correct += (predicted == var_y_batch).sum()
                #print(correct)
                if batch_idx % 50 == 0:
                    nni.report_intermediate_result(
                        float(correct * 100) / float(6 * BATCH_SIZE *
                                                     (batch_idx + 1)))
                    #print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    #    epoch, batch_idx*len(X_batch), len(batched_X_train), 100.*batch_idx / len(batched_X_train), loss.data, float(correct*100) / float(6 * BATCH_SIZE*(batch_idx+1))))
                del var_X_batch
                del var_y_batch
                del loss
                del output
                del predicted
                torch.cuda.empty_cache()
示例#18
0
    async def query_trial_metrics(self):
        start_t = getattr(self, "_trial_start_time", None)
        if start_t is None:
            logger.info(f"Trial({self.cfg_hash}) is not started!")
        else:
            logger.info(
                f"Trial({self.cfg_hash}) has started {(datetime.now() - start_t).total_seconds()} secs"
            )

        curr_latest_epoch, intermediate_metrics, final_val = self.metrics_reporter.query_metrics(
            self.latest_epoch)
        if curr_latest_epoch is None:
            return
        if curr_latest_epoch is not None and intermediate_metrics is not None:
            for metrics in intermediate_metrics:
                logger.info(f"report_intermediate_result:{metrics}")
                if os.getenv(ENV_KEY_TRIAL_IN_NNI, None):
                    nni.report_intermediate_result(
                        metrics)  # 目前测试阶段,还不能调用 nni 的接口
            self.latest_epoch = curr_latest_epoch
        if final_val is not None and self.final_val is None:  # 第一次读取到 final val
            self.final_val = final_val
            logger.info(f"report_final_result:{self.final_val}")
            if os.getenv(ENV_KEY_TRIAL_IN_NNI, None):
                nni.report_final_result(self.final_val)
            self._trial_finished_future.set_result(self.final_val)
示例#19
0
def run(params):
    """ Distributed Synchronous SGD Example """
    rank = dist.get_rank()
    torch.manual_seed(1234)
    train_set, bsz = partition_dataset()
    model = Net()
    model = model
    optimizer = optim.SGD(model.parameters(), lr=params['learning_rate'], momentum=params['momentum'])

    num_batches = ceil(len(train_set.dataset) / float(bsz))
    total_loss = 0.0
    for epoch in range(3):
        epoch_loss = 0.0
        for data, target in train_set:
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            epoch_loss += loss.item()
            loss.backward()
            average_gradients(model)
            optimizer.step()
        #logger.debug('Rank: ', rank, ', epoch: ', epoch, ': ', epoch_loss / num_batches)
        if rank == 0:
            nni.report_intermediate_result(epoch_loss / num_batches)
        total_loss += (epoch_loss / num_batches)
    total_loss /= 3
    logger.debug('Final loss: {}'.format(total_loss))
    if rank == 0:
        nni.report_final_result(total_loss)
示例#20
0
    def __call__(self):
        while self.cur_epoch < self.hp.max_epoch:
            try:
                if self.optimizer_config.lr_update:
                    utils.adjust_learning_rate(self.optimizer, self.cur_epoch,
                                               self.hp.max_epoch,
                                               self.optimizer_config.lr)
                else:
                    utils.set_learning_rate(self.optimizer, self.hp.lr, False)
                _ = self.train_epoch_dataset_first(self.cur_epoch, 'trn')
            except KeyboardInterrupt:
                self.handle_exception()
                print('Exit control menu.')
            # nni.report_intermediate_result(0.5)
            # if self.cur_epoch % self.config.val_interval == 0:
            try:
                ret_val, val_loss, val_eval = self.train_epoch_dataset_first(
                    self.cur_epoch, 'val')
                ret_tst, tst_loss, tst_eval = self.train_epoch_dataset_first(
                    self.cur_epoch, 'tst')
                if self.hp.evaluation == 'loss':
                    self.undec = self.recoder.push_loss(
                        self.cur_epoch, self.undec, val_loss, ret_tst)
                    if self.hp.nni:
                        nni.report_intermediate_result(tst_loss)
                elif self.hp.evaluation == 'acc':
                    self.undec = self.recoder.push_eval(
                        self.cur_epoch, self.undec, val_eval, ret_tst)
                    if self.hp.nni:
                        nni.report_intermediate_result(tst_eval)
                else:
                    raise ValueError('Unknown evaluation.')
                if self.undec == 0: self.save_checkpoint()
            except KeyboardInterrupt:
                print(f'Skipping val and test for ctrl + c detected.')

            self.cur_epoch += 1
            if self.undec >= self.hp.stop_val_dec:
                print(
                    'Val_loss hasn\'t decrease in the last [{}] epoches, stop training early.'
                    .format(self.hp.stop_val_dec))
                break

        if self.hp.evaluation == 'loss':
            fin_epoch, fin_loss = self.recoder.pop_via_loss()
            if self.hp.nni:
                nni.report_final_result(fin_loss)
            print(
                f'[{self.cur_epoch}] epoches complete, output results = {fin_loss} at epoch [{fin_epoch}], seed = {self.hp.seed}.'
            )
        elif self.hp.evaluation == 'acc':
            fin_epoch, fin_eval = self.recoder.pop_via_eval()
            if self.hp.nni:
                nni.report_final_result(fin_eval)
            print(
                f'[{self.cur_epoch}] epoches complete, output results = {fin_eval} at epoch [{fin_epoch}], seed = {self.hp.seed}.'
            )
        else:
            raise ValueError('Unknown evaluation.')
        self.evaluate()
示例#21
0
文件: train.py 项目: CreaterLL/GCA
def test(final=False):
    model.eval()
    z = model(data.x, data.edge_index)

    evaluator = MulticlassEvaluator()
    if args.dataset == 'WikiCS':
        accs = []
        for i in range(20):
            acc = log_regression(z,
                                 dataset,
                                 evaluator,
                                 split=f'wikics:{i}',
                                 num_epochs=800)['acc']
            accs.append(acc)
        acc = sum(accs) / len(accs)
    else:
        acc = log_regression(z,
                             dataset,
                             evaluator,
                             split='rand:0.1',
                             num_epochs=3000,
                             preload_split=split)['acc']

    if final and use_nni:
        nni.report_final_result(acc)
    elif use_nni:
        nni.report_intermediate_result(acc)

    return acc
示例#22
0
    def _validate(self):
        all_val_outputs = {idx: [] for idx in range(self.n_model)}
        for batch_idx, multi_model_batch in enumerate(
                zip(*self._val_dataloaders)):
            xs = []
            ys = []
            for idx, batch in enumerate(multi_model_batch):
                x, y = self.training_step_before_model(batch, batch_idx,
                                                       f'cuda:{idx}')
                xs.append(x)
                ys.append(y)
            if len(ys) != len(xs):
                raise ValueError('len(ys) should be equal to len(xs)')

            y_hats = self.multi_model(*xs)

            for output_idx, yhat in enumerate(y_hats):
                if len(ys) == len(y_hats):
                    acc = self.validation_step_after_model(
                        xs[output_idx], ys[output_idx], yhat)
                elif len(ys) == 1:
                    acc = self.validation_step_after_model(
                        xs[0], ys[0].to(yhat.get_device()), yhat)
                else:
                    raise ValueError(
                        'len(ys) should be either 1 or len(y_hats)')
                all_val_outputs[output_idx].append(acc)

        report_acc = {}
        for idx in all_val_outputs:
            avg_acc = np.mean([x['val_acc']
                               for x in all_val_outputs[idx]]).item()
            report_acc[self.kwargs['model_kwargs'][idx]['model_id']] = avg_acc
        nni.report_intermediate_result(report_acc)
        return report_acc
示例#23
0
def valid(epoch):
    net.eval()
    stats = adl.Accumulator()
    with torch.no_grad():
        for inputs, targets in validloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            stats["loss_sum"] += loss.item() * targets.size(0)
            _, predicted = outputs.max(1)
            stats["total"] += targets.size(0)
            stats["correct"] += predicted.eq(targets).sum().item()

    with stats.synchronized():
        stats["loss_avg"] = stats["loss_sum"] / stats["total"]
        stats["accuracy"] = stats["correct"] / stats["total"]
        writer.add_scalar("Loss/Valid", stats["loss_avg"], epoch)
        writer.add_scalar("Accuracy/Valid", stats["accuracy"], epoch)

        if adaptdl.env.replica_rank() == 0:
            nni.report_intermediate_result(stats["accuracy"])

        print("Valid:", stats)
        return stats["accuracy"]
示例#24
0
 def on_epoch_end(self, epoch, logs=None):
     """Reports intermediate accuracy to NNI framework"""
     # TensorFlow 2.0 API reference claims the key is `val_acc`, but in fact it's `val_accuracy`
     if 'val_acc' in logs:
         nni.report_intermediate_result(logs['val_acc'])
     else:
         nni.report_intermediate_result(logs['val_accuracy'])
示例#25
0
文件: hello_nas.py 项目: yinfupai/nni
def evaluate_model(model_cls):
    # "model_cls" is a class, need to instantiate
    model = model_cls()

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    transf = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    train_loader = DataLoader(MNIST('data/mnist',
                                    download=True,
                                    transform=transf),
                              batch_size=64,
                              shuffle=True)
    test_loader = DataLoader(MNIST('data/mnist',
                                   download=True,
                                   train=False,
                                   transform=transf),
                             batch_size=64)

    for epoch in range(3):
        # train the model for one epoch
        train_epoch(model, device, train_loader, optimizer, epoch)
        # test the model for one epoch
        accuracy = test_epoch(model, device, test_loader)
        # call report intermediate result. Result can be float or dict
        nni.report_intermediate_result(accuracy)

    # report final test result
    nni.report_final_result(accuracy)
示例#26
0
 def _log(self, logs, iterator=None):
     if self.epoch is not None:
         logs["epoch"] = self.epoch
     if self.global_step is None:
         # when logging evaluation metrics without training
         self.global_step = 0
     if self.tb_writer:
         for k, v in logs.items():
             if isinstance(v, (int, float)):
                 self.tb_writer.add_scalar(k, v, self.global_step)
             else:
                 logger.warning(
                     "Trainer is attempting to log a value of "
                     '"%s" of type %s for key "%s" as a scalar. '
                     "This invocation of Tensorboard's writer.add_scalar() "
                     "is incorrect so we dropped this attribute.",
                     v,
                     type(v),
                     k,
                 )
         self.tb_writer.flush()
     output = {**logs, **{"step": self.global_step}}
     #### nni
     if (nni is not None) and ('eval_token-f1' in logs):
         nni.report_intermediate_result(logs['eval_token-f1'])
     ####
     if 'eval_loss' in output.keys():
         self.history.append(output)
     if iterator is not None:
         iterator.write(output)
     else:
         logger.info(output)
def main():
    # global args, config
    #
    # args = parser.parse_args()
    #
    # with open(args.config) as rPtr:
    #     config = EasyDict(yaml.load(rPtr))
    #
    # config.save_path = os.path.dirname(args.config)
    #
    # # Random seed
    # torch.manual_seed(config.seed)
    # torch.cuda.manual_seed(config.seed)
    # np.random.seed(config.seed)
    # random.seed(config.seed)

    # Datasets
    train_transform = transforms.Compose([
        transforms.RandomCrop((32, 32), padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262))
    ])
    val_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262))
    ])

    trainset = Datasets.CIFAR10(root='data', train=True, download=True, transform=train_transform)
    trainloader = Data.DataLoader(trainset, batch_size=config.batch_size, shuffle=True, num_workers=config.workers)

    testset = Datasets.CIFAR10(root='data', train=False, download=True, transform=val_transform)
    testloader = Data.DataLoader(testset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers)

    # Model
    model = resnet32()
    model = model.cuda()

    # Optimizer
    criterion = LabelSmoothing(config.label_smoothing)
    optimizer = optim.SGD(model.parameters(), lr=config.lr_scheduler.base_lr, momentum=config.momentum,
            weight_decay=config.weight_decay)

    # LR scheduler
    lr_scheduler = CosineAnnealing(optimizer, len(trainloader) * config.max_iter)

    global PCA, Writer
    PCA = PerClassAccuracy(num_classes=config.num_classes)
    Writer = SummaryWriter(config.save_path + '/events')
    BEST_mAP = 0.0
    for iter_idx in range(config.max_iter):
        train(model, iter_idx, criterion, lr_scheduler, optimizer, trainloader)
        mAP = val(model, iter_idx, criterion, testloader)
        if mAP > BEST_mAP:
            BEST_mAP = mAP
        nni.report_intermediate_result(mAP)
    nni.report_final_result(BEST_mAP)

    Writer.close()
示例#28
0
 def on_epoch_end(self, epoch, logs=None):
     """
     Run on end of each epoch
     """
     if logs is None:
         logs = dict()
     logger.debug(logs)
     nni.report_intermediate_result(logs["acc"])
示例#29
0
 def export(self, estimator, export_path, checkpoint_path, eval_result,
            is_the_final_export):
     import nni
     result = eval_result["top_1_accuracy"]
     if is_the_final_export:
         nni.report_final_result(result)
     else:
         nni.report_intermediate_result(result)
示例#30
0
    def on_validation_epoch_end(self, trainer: Trainer, pl_module):
        if trainer.global_rank != 0:
            return

        if trainer.running_sanity_check:
            return

        if trainer.logged_metrics and 'val_ppl' in trainer.logged_metrics:
            nni.report_intermediate_result(trainer.logged_metrics['val_ppl'])