示例#1
0
    def __init__(self, json_path, data_dir, validate, ckpt_dir, log_dir,
                 restore):

        self.params = Params(json_path)
        self.valid = 1 if validate == '1' else 0
        self.model = face_model(self.params)

        self.lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            self.params.learning_rate,
            decay_steps=10000,
            decay_rate=0.96,
            staircase=True)
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=self.lr_schedule,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=0.1)

        self.checkpoint = tf.train.Checkpoint(
            model=self.model,
            optimizer=self.optimizer,
            train_steps=tf.Variable(0, dtype=tf.int64),
            valid_steps=tf.Variable(0, dtype=tf.int64),
            epoch=tf.Variable(0, dtype=tf.int64))
        self.ckptmanager = tf.train.CheckpointManager(self.checkpoint,
                                                      ckpt_dir, 3)

        if self.params.triplet_strategy == "batch_all":
            self.loss = batch_all_triplet_loss

        elif self.params.triplet_strategy == "batch_hard":
            self.loss = batch_hard_triplet_loss

        elif self.params.triplet_strategy == "batch_adaptive":
            self.loss = adapted_triplet_loss

        current_time = datetime.datetime.now().strftime("%d-%m-%Y_%H%M%S")
        log_dir += current_time + '/train/'
        self.train_summary_writer = tf.summary.create_file_writer(log_dir)

        if restore == '1':
            self.checkpoint.restore(self.ckptmanager.latest_checkpoint)
            print(
                f'\nRestored from Checkpoint : {self.ckptmanager.latest_checkpoint}\n'
            )

        else:
            print('\nIntializing from scratch\n')

        self.train_dataset, self.train_samples = get_dataset(
            data_dir, self.params, 'train')

        if self.valid:
            self.valid_dataset, self.valid_samples = get_dataset(
                data_dir, self.params, 'val')
示例#2
0
def evaluate(save_path, checkpoint_name="weights.ckpt"):
    # Load config
    config = parse_gin_config(os.path.join(save_path, "config.gin"))
    gin.parse_config_files_and_bindings([os.path.join(os.path.join(save_path, "config.gin"))], bindings=[""])

    # Create dynamically dataset generators
    train, valid, test, meta_data = get_dataset(batch_size=config['train.batch_size'], seed=config['train.seed'])

    # Load model (a bit hacky, but necessary because load_from_checkpoint seems to fail)
    ckpt_path = os.path.join(save_path, checkpoint_name)
    ckpt = torch.load(ckpt_path)
    model = models.__dict__[config['train.model']]()
    summary(model)
    pl_module = SupervisedLearning(model, lr=0.0)
    pl_module.load_state_dict(ckpt['state_dict'])

    # NOTE: This fails, probably due to a bug in Pytorch Lightning. The above is manually doing something similar
    # ckpt_path = os.path.join(save_path, checkpoint_name)
    # pl_module = SupervisedLearning.load_from_checkpoint(ckpt_path)

    trainer = pl.Trainer()
    results, = trainer.test(model=pl_module, test_dataloaders=test, ckpt_path=ckpt_path)
    logger.info(results)
    with open(os.path.join(save_path, "eval_results_{}.json".format(checkpoint_name)), "w") as f:
        json.dump(results, f)
示例#3
0
def model_search(dataset,
                 backbone,
                 val_split,
                 imgsize,
                 batch_size,
                 output_path,
                 gpu_cnt,
                 debug_mode=False):
    """
    Function for model search
    :param dataset: dataset path
    :param backbone: one of the 'Load_Base_Model' file, please refer to ./src/load_base_model.py file
    :param output_path: model .h5 file output
    :param gpu_cnt: the gpu number to use
    """
    model_path, log_path = make_file(output_path, backbone)

    gen_train, gen_valid, params = get_dataset(dataset_path=dataset,
                                               model_path=model_path,
                                               batch_size=batch_size,
                                               imgsize=imgsize,
                                               val_split=val_split,
                                               debug=debug_mode)

    ht = HyperTuner(data_params=params,
                    imgsize=imgsize,
                    backbone=backbone,
                    gen_train=gen_train,
                    gen_valid=gen_valid,
                    model_path=model_path,
                    log_path=log_path,
                    gpu_cnt=gpu_cnt)

    # HyperTuning Optimization
    ht.optimize()
示例#4
0
def train(save_path, model, lr=0.1, batch_size=128, callbacks=[]):
    # Create dynamically dataset generators
    train, valid, test, meta_data = get_dataset(batch_size=batch_size)

    # Create dynamically model
    model = models.__dict__[model]()
    summary(model)
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # Create dynamically callbacks
    callbacks_constructed = []
    for name in callbacks:
        clbk = get_callback(name, verbose=0)
        if clbk is not None:
            callbacks_constructed.append(clbk)

    # Pass everything to the training loop
    steps_per_epoch = (len(meta_data['x_train']) - 1) // batch_size + 1
    training_loop(model=model,
                  optimizer=optimizer,
                  loss_function=loss_function,
                  metrics=[acc],
                  train=train,
                  valid=test,
                  meta_data=meta_data,
                  steps_per_epoch=steps_per_epoch,
                  save_path=save_path,
                  config=_CONFIG,
                  use_tb=True,
                  custom_callbacks=callbacks_constructed)
示例#5
0
def train(save_path,
          model,
          batch_size=128,
          seed=777,
          callbacks=[],
          resume=True,
          evaluate=True):
    # Create dynamically dataset generators
    train, valid, test, meta_data = get_dataset(batch_size=batch_size,
                                                seed=seed)

    # Create dynamically model
    model = models.__dict__[model]()
    summary(model)

    # Create dynamically callbacks
    callbacks_constructed = []
    for name in callbacks:
        clbk = get_callback(name, verbose=0)
        if clbk is not None:
            callbacks_constructed.append(clbk)

    if not resume and os.path.exists(os.path.join(save_path, "last.ckpt")):
        raise IOError(
            "Please clear folder before running or pass train.resume=True")

    # Create module and pass to trianing
    checkpoint_callback = ModelCheckpoint(
        filepath=os.path.join(save_path, "weights"),
        verbose=True,
        save_last=True,  # For resumability
        monitor='valid_acc',
        mode='max')
    pl_module = supervised_training.SupervisedLearning(model,
                                                       meta_data=meta_data)
    trainer = training_loop(train,
                            valid,
                            pl_module=pl_module,
                            checkpoint_callback=checkpoint_callback,
                            callbacks=callbacks_constructed,
                            save_path=save_path)

    # Evaluate
    if evaluate:
        results, = trainer.test(test_dataloaders=test)
        logger.info(results)
        with open(os.path.join(save_path, "eval_results.json"), "w") as f:
            json.dump(results, f)
示例#6
0
def combine_weights(weights_init, weights_final, config, step=1):
    train, valid, test, meta_data = get_dataset(
        batch_size=config['train.batch_size'], seed=config['train.seed'])
    results = {'freq': [], 'train': [], 'valid': [], 'test': []}
    num_steps = int(1 / step)
    print("step_size: {step}  | num_of_steps: {num_steps}")

    for i in range(num_steps + 1):
        freq = i * step
        weights_temp = collections.OrderedDict()
        for k, v in weights_init.items():
            # it should add at the end, but i am not sure of that so i am calling it manually
            weights_temp[k] = (
                1 - freq) * weights_init[k] + freq * weights_final[k]
            weights_temp.move_to_end(k)
        print("freq: {}".format(freq))
        results_step = calculate_acc(weights_temp, meta_data, config, train,
                                     valid, test)

        results['freq'].append(freq)
        for k, v in results_step.items():
            results[k].append(v)

    return results
示例#7
0
def main():
    args = vars(parser.parse_args())
    check_args(args)
    set_seeds(2020)
    model_cfg = config.ModelConfig(args["model_config"])
    run_cfg = config.RunConfig(args["run_config"],
                               eval=True,
                               sanity_check=args["sanity_check"])
    output, writer, save_prefix = set_output(args, "eval_wrn_log")
    os.environ['CUDA_VISIBLE_DEVICES'] = args["device"] if args[
        "device"] is not None else ""
    device, data_parallel = torch.device("cuda" if torch.cuda.is_available(
    ) else "cpu"), torch.cuda.device_count() > 1
    config.print_configs(args, [model_cfg, run_cfg], device, output)

    ## Loading datasets
    start = Print(" ".join(['start loading datasets:', args["dataset"]]),
                  output)
    dataset_test, dataset_info = get_dataset(args["dataset"],
                                             test=True,
                                             sanity_check=args["sanity_check"])
    iterator_test = torch.utils.data.DataLoader(dataset_test,
                                                run_cfg.batch_size_eval,
                                                shuffle=True,
                                                num_workers=2)
    end = Print(
        " ".join(['loaded',
                  str(len(dataset_test)), 'dataset_test samples']), output)
    Print(" ".join(['elapsed time:', str(end - start)]), output, newline=True)

    ## initialize a model
    start = Print('start initializing a model', output)
    model_cfg.set_num_channels_classes(dataset_info["num_channels"],
                                       dataset_info["num_classes"])
    model_cfg.set_dropout_rate(run_cfg.dropout_rate)
    model = WideResNet(model_cfg)
    end = Print('end initializing a model', output)
    Print("".join(['elapsed time:', str(end - start)]), output, newline=True)

    ## setup trainer configurations
    start = Print('start setting trainer configurations', output)
    if not data_parallel: model = model.to(device)
    else: model = nn.DataParallel(model.to(device))
    criterion = nn.CrossEntropyLoss(reduction="none")
    run_cfg.set_adv(dataset_info, device)
    trainer = Trainer(model, criterion, run_cfg, std=True, adv=True, test=True)
    trainer.load(args["checkpoint"], save_prefix, device, output)
    end = Print('end setting trainer configurations', output)
    Print("".join(['elapsed time:', str(end - start)]), output, newline=True)

    ## train a model
    start = Print('start evaluating a model', output)
    Print(trainer.get_headline(), output)
    ### test
    for B, batch in enumerate(iterator_test):
        batch = [t.to(device) if type(t) is torch.Tensor else t for t in batch]
        trainer.std_evaluate(batch)
        trainer.adv_evaluate(batch)
        if B % 2 == 0:
            print('# test {:.1%}'.format(B / len(iterator_test)),
                  end='\r',
                  file=sys.stderr)
    print(' ' * 150, end='\r', file=sys.stderr)

    ### print log and save models
    trainer.log(output, writer)

    end = Print('end evaluating a model', output)
    Print("".join(['elapsed time:', str(end - start)]), output, newline=True)
    if not output == sys.stdout: output.close()
示例#8
0
def train(save_path,
          model,
          datasets=['cifar10'],
          optimizer="SGD",
          data_seed=777,
          seed=777,
          batch_size=128,
          lr=0.0,
          wd=0.0,
          nesterov=False,
          checkpoint_monitor='val_categorical_accuracy:0',
          loss='ce',
          steps_per_epoch=-1,
          momentum=0.9,
          testing=False,
          testing_reload_best_val=True,
          callbacks=[]):
    np.random.seed(seed)

    # Create dataset generators (seeded)
    datasets = [
        get_dataset(d, seed=data_seed, batch_size=batch_size) for d in datasets
    ]

    # Create model
    model = models.__dict__[model](input_shape=datasets[0][-1]['input_shape'],
                                   n_classes=datasets[0][-1]['num_classes'])
    logger.info("# of parameters " +
                str(sum([np.prod(p.shape) for p in model.trainable_weights])))
    model.summary()
    if loss == 'ce':
        loss_function = tf.keras.losses.categorical_crossentropy
    else:
        raise NotImplementedError()

    if optimizer == "SGD":
        optimizer = SGD(learning_rate=lr, momentum=momentum, nesterov=nesterov)
    elif optimizer == "Adam":
        optimizer = Adam(learning_rate=lr)
    else:
        raise NotImplementedError()

    # Create callbacks
    callbacks_constructed = []
    for name in callbacks:
        clbk = get_callback(name, verbose=0)
        if clbk is not None:
            callbacks_constructed.append(clbk)
        else:
            raise NotImplementedError(f"Did not find callback {name}")

    # Pass everything to the training loop
    metrics = [categorical_accuracy]

    if steps_per_epoch == -1:
        steps_per_epoch = (datasets[0][-1]['n_examples_train'] + batch_size -
                           1) // batch_size

    training_loop(model=model,
                  optimizer=optimizer,
                  loss_function=loss_function,
                  metrics=metrics,
                  datasets=datasets,
                  weight_decay=wd,
                  save_path=save_path,
                  config=_CONFIG,
                  steps_per_epoch=steps_per_epoch,
                  use_tb=True,
                  checkpoint_monitor=checkpoint_monitor,
                  custom_callbacks=callbacks_constructed,
                  seed=seed)

    if testing:
        if testing_reload_best_val:
            model = restore_model(model,
                                  os.path.join(save_path, "model_best_val.h5"))

        m_val = evaluate(model, [datasets[0][1]], loss_function, metrics)
        m_test = evaluate(model, [datasets[0][2]], loss_function, metrics)

        logger.info("Saving")
        eval_results = {}
        for k in m_test:
            eval_results['test_' + k] = float(m_test[k])
        for k in m_val:
            eval_results['val_' + k] = float(m_val[k])
        logger.info(eval_results)
        json.dump(eval_results,
                  open(os.path.join(save_path, "eval_results.json"), "w"))
示例#9
0
def main(_):
    """Builds and trains a sentiment classification RNN."""

    # prevent tf from accessing GPU
    tf.config.experimental.set_visible_devices([], "GPU")

    # Get and save config
    config = argparser.parse_args('main')
    logging.info(json.dumps(config, indent=2))

    with uv.start_run(
            experiment_name=config['save']['mlflow_expname'],
            run_name=config['save']['mlflow_runname']), uv.active_reporter(
                MLFlowReporter()):

        reporters.save_config(config)

        uv.report_params(reporters.flatten(config))

        prng_key = random.PRNGKey(config['run']['seed'])

        # Load data.
        vocab_size, train_dset, test_dset = data.get_dataset(config['data'])

        # Build network.
        cell = model_utils.get_cell(config['model']['cell_type'],
                                    num_units=config['model']['num_units'])

        init_fun, apply_fun, _, _ = network.build_rnn(
            vocab_size, config['model']['emb_size'], cell,
            config['model']['num_outputs'])

        loss_fun, acc_fun = optim_utils.loss_and_accuracy(
            apply_fun, config['model'], config['optim'])

        _, initial_params = init_fun(
            prng_key,
            (config['data']['batch_size'], config['data']['max_pad']))

        initial_params = model_utils.initialize(initial_params,
                                                config['model'])

        # get optimizer
        opt, get_params, opt_state, step_fun = optim_utils.optimization_suite(
            initial_params, loss_fun, config['optim'])

        ## Scope setup
        # Reporter setup
        data_store = {}
        reporter = reporters.build_reporters(config['save'], data_store)
        # Static state for scope
        static_state = {
            'acc_fun': acc_fun,
            'loss_fun': loss_fun,
            'param_extractor': get_params,
            'test_set': test_dset
        }

        oscilloscope = m.MetricCallback(static_state)

        def interval_trigger(interval):
            def function_to_return(x):
                return x % interval == 0

            return function_to_return

        oscilloscope.add_measurement({
            'name':
            'test_acc',
            'trigger':
            interval_trigger(config['save']['measure_test']),
            'function':
            measurements.measure_test_acc
        })
        oscilloscope.add_measurement({
            'name':
            'shuffled_test_acc',
            'trigger':
            interval_trigger(config['save']['measure_test']),
            'function':
            measurements.measure_shuffled_acc
        })
        oscilloscope.add_measurement({
            'name':
            'train_acc',
            'trigger':
            interval_trigger(config['save']['measure_train']),
            'function':
            measurements.measure_batch_acc
        })
        oscilloscope.add_measurement({
            'name':
            'train_loss',
            'trigger':
            interval_trigger(config['save']['measure_train']),
            'function':
            measurements.measure_batch_loss
        })
        oscilloscope.add_measurement({
            'name':
            'l2_norm',
            'trigger':
            interval_trigger(config['save']['measure_test']),
            'function':
            measurements.measure_l2_norm
        })
        # Train
        global_step = 0
        loss = np.nan
        for epoch in range(config['optim']['num_epochs']):

            for batch_num, batch in enumerate(tfds.as_numpy(train_dset)):
                dynamic_state = {
                    'opt_state': opt_state,
                    'batch_train_loss': loss,
                    'batch': batch
                }

                step_measurements = oscilloscope.measure(
                    int(global_step), dynamic_state)
                if step_measurements is not None:
                    reporter.report_all(int(global_step), step_measurements)

                global_step, opt_state, loss = step_fun(
                    global_step, opt_state, batch)

                if global_step % config['save']['checkpoint_interval'] == 0:
                    params = get_params(opt_state)
                    np_params = np.asarray(params, dtype=object)
                    reporters.save_dict(config, np_params,
                                        f'checkpoint_{global_step}')

        final_measurements = oscilloscope.measure(
            int(global_step),
            dynamic_state,
            measurement_list=['test_acc', 'shuffled_test_acc'])
        reporter.report_all(int(global_step), final_measurements)

        final_params = {
            'params': np.asarray(get_params(opt_state), dtype=object)
        }
        reporters.save_dict(config, final_params, 'final_params')
示例#10
0
def main(_):
    BASE_FOLDER = f'results/yelp/jointsweep/{FLAGS.epochs}Epochs/{FLAGS.arch}_eta_{FLAGS.eta}_L2_{FLAGS.l2}_*'
    data_folder = glob.glob(BASE_FOLDER)

    assert len(data_folder) == 1
    data_folder = data_folder[0]

    with open(os.path.join(data_folder, 'config.json')) as f:
        config = json.load(f)

    with open(os.path.join(data_folder, 'test_acc.jsonl')) as f:
        x = json_lines.reader(f)
        print("Non shuffled acc (recorded):")
        print(list(x)[-1]['value'])

    vocab_size, train_dset, test_dset = data.get_dataset(config['data'])

    cell = model_utils.get_cell(config['model']['cell_type'],
                                num_units=config['model']['num_units'])
    init_fun, apply_fun, emb_apply, readout_apply = network.build_rnn(
        vocab_size,
        config['model']['emb_size'],
        cell,
        num_outputs=config['model']['num_outputs'])
    emb_init, emb_apply = renn.embedding(vocab_size,
                                         config['model']['emb_size'])
    network_params = model_utils.load_params(
        os.path.join(data_folder, 'final_params'))
    emb_params, rnn_params, readout_params = network_params

    print("Loaded model and dataset")

    test_acc = measurements.AverageMeter()
    for i, batch in enumerate(tfds.as_numpy(test_dset)):
        if FLAGS.shuffle:
            batch = au.shuffle_words(batch)
        batch_final_states = au.rnn_end_states(cell, batch, rnn_params,
                                               emb_params, emb_apply)
        print(i)
        """
    logits = readout_apply(readout_params, np.vstack(batch_final_states))
    predictions = np.argmax(logits, axis=1)

    curr_acc = np.mean(predictions == batch['labels'])
    test_acc.update(curr_acc, len(batch['index']))

    print(i, len(batch['index']))

    del batch_final_states
    del logits
    del predictions
    del batch
    """

        #if i > 85:
        #  break

    if FLAGS.shuffle:
        print("Shuffled accuracy")
    else:
        print("Non-shuffled accuracy")
    print(test_acc.avg)