Пример #1
0
    def evaluate_model(self, model_filename, imgs_validation, msks_validation):
        """
        Evaluate the best model on the validation dataset
        """

        model = K.models.load_model(model_filename,
                                    custom_objects=self.custom_objects)

        K.backend.set_learning_phase(0)
        start_inference = time.time()
        print("Evaluating model on test dataset. Please wait...")
        metrics = model.evaluate(imgs_validation,
                                 msks_validation,
                                 batch_size=self.batch_size,
                                 verbose=1)
        elapsed_time = time.time() - start_inference
        print("{} images in {:.2f} seconds => {:.3f} images per "
              "second inference".format(
                  imgs_validation.shape[0], elapsed_time,
                  imgs_validation.shape[0] / elapsed_time))

        for idx, metric in enumerate(metrics):
            print("Test dataset {} = {:.4f}".format(model.metrics_names[idx],
                                                    metric))
            foundations.log_metric(model.metrics_names[idx], metric)
Пример #2
0
 def start(self):
     self.current_epoch = 0
     for epoch in range(self.num_epochs):
         self.current_epoch = epoch
         self.step()
         self.validate()
     if settings.USE_FOUNDATIONS:
         for key, value in self.history_best.items():
             foundations.log_metric(key, float(value))
Пример #3
0
    def on_epoch_end(self, epoch, logs={}):
        y_val = self.y_val
        datagen_val = DataGenerator(self.x_val, mode='test')
        y_pred = self.model.predict_generator(datagen_val, use_multiprocessing=False, max_queue_size=50)
        y_pred_labels = np.zeros((len(y_pred)))
        y_pred_labels[y_pred.flatten() > 0.5] = 1

        self._val_f1 = f1_score(y_val, y_pred_labels.astype(int))
        print(f"val_f1: {self._val_f1:.4f}")

        self.f1_score_value.append(self._val_f1)
        if self.save_model:
            if self._val_f1 >= max(self.f1_score_value):
                print("F1 score has improved. Saving model.")
                self.model.save(self.model_save_filename)

        try:
            foundations.log_metric('epoch_val_f1_score',self._val_f1)
            foundations.log_metric('best_f1_score', max(self.f1_score_value))
        except Exception as e:
            print(e)


        return
Пример #4
0
    def validate(self):
        self.eval()

        for inputs, labels, data in self.val_dl:
            loss, output = self.forward(inputs, labels)
            output = output.detach().cpu()
            self.meter_val.update(labels, output, loss.item())

        dices, iou, loss = self.meter_val.log_metric()
        selection_metric = loss

        if selection_metric <= self.best_metric:
            self.best_metric = selection_metric
            print(f'>>> Saving best model metric={selection_metric:.4f}')
            checkpoint = {'model': self.model}
            torch.save(checkpoint, 'checkpoints/best_model.pth')
            if settings.USE_FOUNDATIONS:
                foundations.save_artifact('checkpoints/best_model.pth', key='best_model_checkpoint')

                foundations.log_metric("train_loss", float(np.mean(self.meter_train.losses)))
                foundations.log_metric("val_loss", float(loss))
                foundations.log_metric("val_dice", float(dices[0]))
                foundations.log_metric("val_iou", float(iou))

        try:
            inputs, labels, data = next(self.visual_iter)
        except:
            self.visual_iter = iter(self.val_dl)
            inputs, labels, data = next(self.visual_iter)

        _, output = self.forward(inputs, labels)
        output = torch.sigmoid(output.detach().cpu())
        self.writer.add_images(f'validate/{self.current_epoch}_inputs.png', self.unnorm(inputs), self.current_epoch)
        self.writer.add_images(f'validate/{self.current_epoch}_mask.png', labels, self.current_epoch)
        self.writer.add_images(f'validate/{self.current_epoch}_predict.png',  output, self.current_epoch)
        print(f'Epoch {self.current_epoch}: val loss={loss:.4f} | val iou={iou:.4f}')
Пример #5
0
import foundations
from foundations_contrib.global_state import current_foundations_context, redis_connection

foundations.log_metric('ugh', 10)

with open('thomas_text.txt', 'w') as f:
    f.write('ugh_square')

foundations.save_artifact('thomas_text.txt', 'just_some_artifact')
foundations.log_param('blah', 20)

redis_connection.set('foundations_testing_job_id', current_foundations_context().pipeline_context().job_id)
Пример #6
0
import numpy as np
import foundations

model_params = {
    'num_freq_bin': 240,
    'num_conv_blocks': 8,
    'num_conv_filters': 32,
    'spatial_dropout_fraction': 0.05,
    'num_dense_layers': 1,
    'num_dense_neurons': 50,
    'dense_dropout': 0,
    'learning_rate': 0.0001,
    'epochs': 100,
    'batch_size': 156,
    'residual_con': 2,
    'use_default': True,
    'model_save_dir': 'fitted_objects'
}

for k, v in model_params.items():
    foundations.log_param(k, v)

train_accuracy = np.random.rand()
foundations.log_metric("train_accuracy", train_accuracy)
foundations.log_metric("val_accuracy", train_accuracy * 0.85)

# foundations.save_artifact('visualize_inference_spectrogram.png', key='spectrogram')
Пример #7
0
def log_predictions_for_assertion(predictions):
    for prediction in predictions["Survived"]:
        foundations.log_metric("predictions", prediction)
Пример #8
0
import foundations
import sys

foundations.log_metric("Task", sys.argv[1])
Пример #9
0
import foundations
from time import sleep

foundations.log_metric("metric_int", 1)
foundations.log_metric("metric_large_int", 8888888888888888888888888)
foundations.log_metric("metric_list_of_ints", [1, 2])
foundations.log_metric("metric_long_list_of_ints", [
    1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
    2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
])
foundations.log_metric("metric_long_list_of_long_ints", [
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888, 8888888888888888888888888,
    8888888888888888888888888
])
foundations.log_metric("metric_mixed_type", 1)
for i in range(20):
    foundations.log_metric("metric_repeat", i)
    sleep(.1)
Пример #10
0
from foundations import log_metric
from foundations.global_state import redis_connection, current_foundations_context

log_metric('hello', 1)
log_metric('hello', 2)
log_metric('world', 3)
Пример #11
0
def train(train_dl, val_base_dl, val_augment_dl, display_dl_iter, model,
          optimizer, n_epochs, max_lr, scheduler, criterion, train_source):
    records = Records()
    best_metric = 0.

    os.makedirs('checkpoints', exist_ok=True)

    for epoch in range(n_epochs):
        train_one_epoch(epoch, model, train_dl, max_lr, optimizer, criterion,
                        scheduler, records)
        validate(model, val_base_dl, criterion, records, data_name='base')
        validate(model,
                 val_augment_dl,
                 criterion,
                 records,
                 data_name='augment')

        if train_source == 'both':
            selection_metric = [
                getattr(records, 'base_val_accs')[-1],
                getattr(records, 'augment_val_accs')[-1]
            ]
            selection_metric = np.mean(selection_metric)

        else:
            selection_metric = getattr(records, f"{train_source}_val_accs")[-1]

        if selection_metric >= best_metric:
            print(
                f'>>> Saving best model metric={selection_metric:.4f} compared to previous best {best_metric:.4f}'
            )
            checkpoint = {
                'model': model,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            torch.save(checkpoint, 'checkpoints/best_model.pth')
            foundations.save_artifact('checkpoints/best_model.pth',
                                      key='pretrained_model_checkpoint')

        display_filename = f'{epoch}_display.png'
        display_predictions_on_image(model,
                                     val_base_dl.dataset.cached_path,
                                     display_dl_iter,
                                     name=display_filename)

        # Save eyeball plot to Atlas GUI
        foundations.save_artifact(display_filename, key=f'{epoch}_display')

        # Save metrics plot
        visualize_metrics(records,
                          extra_metric=extra_metric,
                          name='metrics.png')

        # Save metrics plot to Atlas GUI
        foundations.save_artifact('metrics.png', key='metrics_plot')

    # Log metrics to GUI
    if train_source == 'both':
        avg_metric = [
            getattr(records, 'base_val_accs'),
            getattr(records, 'augment_val_accs')
        ]
        avg_metric = np.mean(avg_metric, axis=0)
        max_index = np.argmax(avg_metric)

    else:
        max_index = np.argmax(getattr(records, f'{train_source}_val_accs'))

    useful_metrics = records.get_metrics()
    for metric in useful_metrics:
        foundations.log_metric(metric,
                               float(getattr(records, metric)[max_index]))
Пример #12
0
def add(x, y):
    result = x + y
    foundations.log_metric('Score', result)
    return result
Пример #13
0
import foundations

foundations.log_metric('name', 'job1')

deployment = foundations.submit(command=["job2.py"])
deployment.wait_for_deployment_to_complete(wait_seconds=10)
Пример #14
0
import foundations
from time import sleep

foundations.log_metric("metric_str", str(1.))
foundations.log_metric(
    "metric_long_str",
    "asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdf"
)
foundations.log_metric("metric_long_list_of_str", [
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
    "qwe",
Пример #15
0
def train(train_dl, val_dl, test_dl, val_dl_iter, model, optimizer, scheduler,
          criterion, params):
    n_epochs = params['n_epochs']
    max_lr = params['max_lr']
    val_rate = params['val_rate']
    batch_repeat = params['batch_repeat']
    records = Records()
    best_metric = 1e9

    os.makedirs('checkpoints', exist_ok=True)

    for epoch in range(n_epochs):
        train_one_epoch(epoch, model, train_dl, max_lr, optimizer, criterion,
                        scheduler, records, batch_repeat)
        if epoch % val_rate == 0:
            validate(model, val_dl, criterion, records)
            # validate(model, test_dl, criterion, records)

            selection_metric = getattr(records, "val_losses")[-1]

            if selection_metric <= best_metric:
                best_metric = selection_metric
                print(
                    f'>>> Saving best model metric={selection_metric:.4f} compared to previous best {best_metric:.4f}'
                )
                checkpoint = {'model': model}

                torch.save(checkpoint, 'checkpoints/best_model.pth')
                if settings.USE_FOUNDATIONS:
                    foundations.save_artifact('checkpoints/best_model.pth',
                                              key='best_model_checkpoint')

            # Save eyeball plot to Atlas GUI
            if settings.USE_FOUNDATIONS:
                display_filename = f'{epoch}_display.png'
                try:
                    data = next(val_dl_iter)
                except:
                    val_dl_iter = iter(val_dl)
                    data = next(val_dl_iter)
                # display_predictions_on_image(model, data, name=display_filename)
                # foundations.save_artifact(display_filename, key=f'{epoch}_display')

            # Save metrics plot
            visualize_metrics(records,
                              extra_metric=extra_metric,
                              name='metrics.png')

            # Save metrics plot to Atlas GUI
            if settings.USE_FOUNDATIONS:
                foundations.save_artifact('metrics.png', key='metrics_plot')

    # Log metrics to GUI
    max_index = np.argmin(getattr(records, 'val_losses'))

    useful_metrics = records.get_useful_metrics()
    for metric in useful_metrics:
        if settings.USE_FOUNDATIONS:
            foundations.log_metric(metric,
                                   float(getattr(records, metric)[max_index]))
        else:
            print(metric, float(getattr(records, metric)[max_index]))
Пример #16
0
def train(train_dl, val_dl, test_dl, val_dl_iter, model, optimizer, scheduler,
          criterion, params, train_sampler, val_sampler, rank):
    n_epochs = params['n_epochs']
    max_lr = params['max_lr']
    val_rate = params['val_rate']
    batch_repeat = params['batch_repeat']
    history_best = {}
    best_metric = 0

    if rank == 0:
        os.makedirs('checkpoints', exist_ok=True)
        os.makedirs('tensorboard', exist_ok=True)
        if settings.USE_FOUNDATIONS:
            foundations.set_tensorboard_logdir('tensorboard')
        writer = SummaryWriter("tensorboard")
    else:
        writer = None

    for epoch in range(n_epochs):
        train_records = DistributedClassificationMeter(writer=writer,
                                                       phase="train",
                                                       epoch=epoch,
                                                       workers=params["gpus"],
                                                       criterion=criterion)
        if train_sampler:
            train_sampler.set_epoch(epoch)
        train_one_epoch(epoch, model, train_dl, max_lr, optimizer, criterion,
                        scheduler, train_records, batch_repeat, rank, writer,
                        params)
        if epoch % val_rate == 0:
            val_records = DistributedClassificationMeter(
                writer=writer,
                phase="validation",
                epoch=epoch,
                workers=params["gpus"],
                criterion=criterion)
            if val_sampler:
                val_sampler.set_epoch(epoch)
            validate(model, val_dl, criterion, val_records, rank)

            # 改的时候记得改大于小于啊!!!
            # aaaa记得改初始值啊
            info = val_records.log_metric(write_scalar=False)
            selection_metric = info["acc"]

            if selection_metric >= best_metric and rank == 0:
                best_metric = selection_metric
                print(
                    f'>>> Saving best model metric={selection_metric:.4f} compared to previous best {best_metric:.4f}'
                )
                checkpoint = {
                    'model': model.module.state_dict(),
                    'params': params
                }
                history_best = {
                    "train_" + key: value
                    for key, value in train_records.get_metric().items()
                }
                for key, value in val_records.get_metric().items():
                    history_best["val_" + key] = value

                torch.save(checkpoint, 'checkpoints/best_model.pth')
                if settings.USE_FOUNDATIONS:
                    foundations.save_artifact('checkpoints/best_model.pth',
                                              key='best_model_checkpoint')

    # Log metrics to GUI
    if rank == 0:
        for metric, value in history_best.items():
            if settings.USE_FOUNDATIONS:
                foundations.log_metric(metric, float(value))
            else:
                print(metric, float(value))
Пример #17
0
import foundations

foundations.log_metric('hello', 20)
foundations.set_tag('this_tag', value='this_value')
foundations.set_tag('that_tag', value='that_value')
Пример #18
0
import foundations
foundations.log_metric('int_metric', '5' * 5000)
Пример #19
0
import foundations
from foundations import set_tag

from model import *

set_tag('model', 'cnn')


def print_words():
    print('Hello World!')


print_words()

addition_result = add(82, 2)
set_tag('Loss', addition_result)

subtraction_result = subtract(44, 2)
foundations.log_metric('Accuracy', subtraction_result)

cached_subtraction_result = subtract(44, 2)
foundations.log_metric('Cached_accuracy', cached_subtraction_result)
Пример #20
0
    def evaluate(self, xtrain, ytrain, xval, yval, num_examples=1):
        ytrain_pred = self.predict_labels(xtrain, raw_prob=True)
        yval_pred = self.predict_labels(xval, raw_prob=True)
        try:
            self.optimum_threshold_filename = f"model_threshold_{'_'.join(str(v) for k, v in model_params.items())}.npy"
            self.opt_threshold = np.load(os.path.join(f"{model_params['model_save_dir']}",self.optimum_threshold_filename)).item()
            print(f"loaded optimum threshold: {self.opt_threshold}")
        except:
            self.opt_threshold = 0.5


        ytrain_pred_labels = self.get_labels_from_prob(ytrain_pred, threshold=self.opt_threshold)
        yval_pred_labels = self.get_labels_from_prob(yval_pred, threshold=self.opt_threshold)

        train_accuracy = accuracy_score(ytrain, ytrain_pred_labels)
        val_accuracy = accuracy_score(yval, yval_pred_labels)

        train_f1_score = f1_score(ytrain, ytrain_pred_labels)
        val_f1_score = f1_score(yval, yval_pred_labels)
        print (f"train accuracy: {train_accuracy}, train_f1_score: {train_f1_score},"
               f"val accuracy: {val_accuracy}, val_f1_score: {val_f1_score} ")

        try:
            foundations.log_metric('train_accuracy',np.round(train_accuracy,2))
            foundations.log_metric('val_accuracy', np.round(val_accuracy,2))
            foundations.log_metric('train_f1_score', np.round(train_f1_score,2))
            foundations.log_metric('val_f1_score', np.round(val_f1_score,2))
            foundations.log_metric('optimum_threshold', np.round(self.opt_threshold,2))
        except Exception as e:
            print(e)

        # True Positive Example
        ind_tp = np.argwhere(np.equal((yval_pred_labels + yval).astype(int), 2)).reshape(-1, )

        # True Negative Example
        ind_tn = np.argwhere(np.equal((yval_pred_labels + yval).astype(int), 0)).reshape(-1, )

        # False Positive Example
        ind_fp =np.argwhere( np.greater(yval_pred_labels, yval)).reshape(-1, )

        # False Negative Example
        ind_fn = np.argwhere(np.greater(yval, yval_pred_labels)).reshape(-1, )


        path_to_save_spetrograms = './spectrograms'
        if not os.path.isdir(path_to_save_spetrograms):
            os.makedirs(path_to_save_spetrograms)
        specs_saved = os.listdir(path_to_save_spetrograms)
        if len(specs_saved)>0:
            for file_ in specs_saved:
                os.remove(os.path.join(path_to_save_spetrograms,file_))

        ind_random_tp = np.random.choice(ind_tp, num_examples).reshape(-1,)
        tp_x = [xtrain[i] for i in ind_random_tp]

        ind_random_tn = np.random.choice(ind_tn, num_examples).reshape(-1,)
        tn_x = [xtrain[i] for i in ind_random_tn]

        ind_random_fp = np.random.choice(ind_fp, num_examples).reshape(-1,)
        fp_x = [xtrain[i] for i in ind_random_fp]

        ind_random_fn = np.random.choice(ind_fn, num_examples).reshape(-1,)
        fn_x = [xtrain[i] for i in ind_random_fn]

        print("Plotting spectrograms to show what the hell the model has learned")
        for i in range(num_examples):
            plot_spectrogram(tp_x[i], path=os.path.join(path_to_save_spetrograms, f'true_positive_{i}.png'))
            plot_spectrogram(tn_x[i], path=os.path.join(path_to_save_spetrograms,f'true_negative_{i}.png'))
            plot_spectrogram(fp_x[i], path=os.path.join(path_to_save_spetrograms,f'false_positive_{i}.png'))
            plot_spectrogram(fn_x[i], path=os.path.join(path_to_save_spetrograms,f'fale_negative_{i}.png'))

        try:
            foundations.save_artifact(os.path.join(path_to_save_spetrograms, f'true_positive_{i}.png'), key='true_positive_example')
            foundations.save_artifact(os.path.join(path_to_save_spetrograms,f'true_negative_{i}.png'), key='true_negative_example')
            foundations.save_artifact(os.path.join(path_to_save_spetrograms,f'false_positive_{i}.png'), key='false_positive_example')
            foundations.save_artifact(os.path.join(path_to_save_spetrograms,f'fale_negative_{i}.png'), key='false_negative_example')

        except Exception as e:
            print(e)
Пример #21
0
import foundations
from time import sleep

foundations.log_metric("metric_float", 1.)
foundations.log_metric("metric_large_float", 999999999.8888888888888888)
foundations.log_metric("metric_list_of_floats", [1., 2.])
foundations.log_metric("metric_long_list_of_floats", [1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., ])
foundations.log_metric("metric_long_list_of_long_floats", [999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888,
                                                    999999999.8888888888888888])

foundations.log_metric("metric_mixed_type", 2.222)
for i in range(20):
  foundations.log_metric("metric_repeat", i/3.)
  sleep(.1)
Пример #22
0
    def training_loop(iteration):
        """The main training loop encapsulated in a function."""
        step = 0
        epoch = 0
        print("Running training loop")
        while True:
            sess.run(dataset.train_initializer)
            epoch += 1

            # End training if we have passed the epoch limit.
            #if training_len[0] == 'epochs' and epoch > NUM_EPOCHS: #training_len[1]:
            if epoch > NUM_EPOCHS:  #training_len[1]:
                break

            start_time = time.time()
            # One training epoch.
            print("Epoch: {} out of {}".format(epoch,
                                               NUM_EPOCHS))  #training_len[1]))
            while True:
                try:
                    step += 1

                    # End training if we have passed the step limit.
                    # training_len = ('iterations', 50000)
                    if training_len[
                            0] == 'iterations' and step > training_len[1]:
                        return

                    # Train.

                    step_time = time.time()
                    records = sess.run([
                        optimize, model.loss, model.targets, model.outputs,
                        model.inputs
                    ] + model.train_summaries,
                                       {dataset.handle: train_handle})[1:]
                    loss, targets, outputs, inputs = records[0], records[
                        1], records[2], records[3]

                    records = records[4:]

                    record_summaries(step, records, train_file)

                    #print(step)
                    if step % 10 == 0:
                        logger.info(
                            "Step {} - Loss: {} - Time per step: {}".format(
                                step, loss,
                                time.time() - step_time))

                    collect_test_summaries(step)

                except tf.errors.OutOfRangeError:
                    break
            logger.info("Time for epoch: {}".format(time.time() - start_time))

        outputs = output_to_rgb(outputs)
        targets = output_to_rgb(targets)

        inputs_artifact_path = save_image(
            inputs, 'inputs_{}'.format(iteration) + '.png')
        targets_artifact_path = save_image(
            targets, 'targets_{}'.format(iteration) + '.png')
        outputs_artifact_path = save_image(
            outputs, 'outputs_{}'.format(iteration) + '.png')

        tensorboard_path = 'lottery_ticket/{}/unet/summaries/'.format(
            iteration)
        tensorboard_file = os.path.join(tensorboard_path,
                                        os.listdir(tensorboard_path)[0])

        f9s.save_artifact(tensorboard_file, 'tensorboard_{}'.format(iteration))

        f9s.log_metric('loss_{}'.format(iteration), float(loss))

        f9s.save_artifact(inputs_artifact_path, 'inputs_{}'.format(iteration))
        f9s.save_artifact(targets_artifact_path,
                          'targets_{}'.format(iteration))
        f9s.save_artifact(outputs_artifact_path,
                          'outputs_{}'.format(iteration))

        # End of epoch handling.
        return
Пример #23
0
import foundations

foundations.log_metric('key', 'value')
foundations.set_tag('key', value='value')
foundations.log_param('param', 'param_value')
print('Hello World!')
Пример #24
0
import foundations

print("I should not print out because the worker image is invalid")
foundations.log_metric("Worker", "invalid image")
params = load_parameters()
seed_everything(params['seed'])
log_params(params)

params = parse_params(params)
print(params)

model = CIFAR_Module(params).cuda()
lr_logger = LearningRateLogger()
logger = TensorBoardLogger("../logs", name=params["backbone"])
if USE_FOUNDATIONS:
    from foundations import set_tensorboard_logdir
    set_tensorboard_logdir(f'../logs/{params["backbone"]}')

checkpoint_callback = ModelCheckpoint(save_top_k=1,
                                      monitor='acc',
                                      prefix=str(params["seed"]))
t_params = get_trainer_params(params)
trainer = Trainer(callbacks=[lr_logger],
                  logger=logger,
                  checkpoint_callback=checkpoint_callback,
                  **t_params)
trainer.fit(model)

if USE_FOUNDATIONS and checkpoint_callback.best_model_path != "":
    from foundations import log_metric, save_artifact
    save_artifact(checkpoint_callback.best_model_path,
                  key='best_model_checkpoint')
    log_metric("val_acc", float(checkpoint_callback.best_model_score))

print("Training finished")
Пример #26
0
import foundations
import json

params = foundations.load_parameters()
foundations.log_metric('how_i_lern', params['learning_rate'])
foundations.log_metric('first_boi', params['layers'][0]['neurons'])
foundations.log_metric('second_boi', params['layers'][1]['neurons'])
Пример #27
0
"""
This sample main.py shows basic Atlas functionality.
In this script, we will log some arbitrary values & artifacts that can be viewed in the Atlas GUI
"""

import foundations

depth = 3
epochs = 5
batch_size = 256
lrate = 1e-3


# Log some hyper-parameters
foundations.log_param('depth', depth)
foundations.log_params({'epochs': epochs,
                        'batch_size': batch_size,
                        'learning_rate': lrate})

# Log some metrics
accuracy = 0.9
loss = 0.1
foundations.log_metric('accuracy', accuracy)
foundations.log_metric('loss', loss)

# Log an artifact that is already saved to disk
foundations.save_artifact('README.txt', 'Project_README')
Пример #28
0
def get_and_log_python_path_as_metric():
    import sys

    python_path = sys.executable
    foundations.log_metric("python_path", python_path)
Пример #29
0
import foundations
from foundations import set_tag
from foundations_contrib.global_state import current_foundations_job

from model import *

set_tag('model', 'cnn')


def print_words():
    print(f'Job \'{current_foundations_job().job_id}\' deployed')
    print('Hello World!')


print_words()

addition_result = add(82, 2)
set_tag('Loss', addition_result)

subtraction_result = subtract(44, 2)
foundations.log_metric('Accuracy', subtraction_result)
Пример #30
0
def experiment(make_dataset,
               make_model,
               train_model,
               prune_masks,
               iterations,
               presets=None):
    """Run the lottery ticket experiment for the specified number of iterations.

    Args:
      make_dataset: A function that, when called with no arguments, will create an
        object that descends from dataset_base.
      make_model: A function that, when called with four arguments (input_tensor,
        label_tensor, presets, masks), creates a model object that descends from
        model_base. Presets and masks are optional.
      train_model: A function that, when called with four arguments (session,
        pruning iteration number, dataset, model), trains the model using the
        dataset and returns the model's initial and final weights as dictionaries.
      prune_masks: A function that, when called with two arguments (dictionary of
        current masks, dictionary of final weights), returns a new dictionary of
        masks that have been pruned. Each dictionary key is the name of a tensor
        in the network; each value is a numpy array containing the values of the
        tensor (1/0 values for mask, weights for the dictionary of final weights).
      iterations: The number of pruning iterations to perform.
      presets: (optional) The presets to use for the first iteration of training.
        In the form of a dictionary where each key is the name of a tensor and
        each value is a numpy array of the values to which that tensor should
        be initialized.
    """

    # A helper function that trains the network once according to the behavior
    # determined internally by the train_model function.
    logger.info('Training once according to the base model behaviour')

    def train_once(iteration, presets=None, masks=None):
        tf.reset_default_graph()
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        dataset = make_dataset()
        input_tensor, label_tensor = dataset.placeholders
        model = make_model(input_tensor,
                           label_tensor,
                           presets=presets,
                           masks=masks)
        return train_model(sess, iteration, dataset, model)

    # Run once normally.
    initial, final = train_once(0, presets=presets)

    logger.info('Create the initial masks with no weights pruned.')
    masks = {}
    for k, v in initial.items():
        masks[k] = np.ones(v.shape)

    logger.info('Begin the training loop.')

    for iteration in range(1, iterations + 1):
        logger.info('Prune the network, iteration {}'.format(iteration))
        masks = prune_masks(masks, final)

        num_weights = int(sum([v.sum() for v in masks.values()]))
        f9s.log_metric('num_weights_{}'.format(iteration), num_weights)

        logger.info('Train the network again after pruning')
        _, final = train_once(iteration, presets=initial, masks=masks)