Пример #1
0
    def train(self):
        """
		Performs the training process given the configuration passed in
		"""
        if self.inference:
            raise ValueError("Tried to train a model in inference mode.")
        xent_epochs = self.config.max_epochs
        trainer = training.Trainer(
            self.save_fn,
            max_epochs=xent_epochs,
            max_stalled_steps=self.config.max_stalled_steps)

        with tf.Session() as sess:
            if self.model_load:
                self.train_checkpoint.restore(
                    self.model_load).assert_consumed().run_restore_ops()
                warnings.warn(
                    "You are reloading a model for training. This feature"
                    " is still not fully implemented. It restores the state"
                    " of the model variables and optimizer but not the number"
                    " of stalled steps, the validation cost record, or the"
                    " state of the shuffled corpora")
                print("Restored model at {}".format(self.model_load))
            else:
                sess.run(tf.global_variables_initializer())
            training.training_loop(
                sess,
                self.model,
                trainer,
                self.data,
                self.train_feeds,
                self.infer_feeds,
                train_batch_size=self.config.train_batch_size,
                valid_batch_size=self.config.infer_batch_size,
                min_epochs_before_validation=1)
Пример #2
0
def main():
    print("Loading data from pickle...")
    t = datetime.datetime.now()
    train_lines, charmap, inv_charmap = load_data_from_pickle("dubsmash")
    print("Finished in:", datetime.datetime.now() - t)

    args = {}
    args['lambda_'] = 10
    args['n_critic_iters_per_generator_iter'] = 10
    args['batch_size'] = 128
    args['lr'] = 1e-4
    args['adam_beta1'] = 0.5
    args['adam_beta2'] = 0.9
    args['iterations'] = 199000
    args['continue_training'] = True
    args['netG_checkpoint'] = "Checkpoints/netG-10300012:39:32AM_12-05-20"
    args['netD_checkpoint'] = "Checkpoints/netD-10300012:39:32AM_12-05-20"

    training_loop(train_lines, charmap, inv_charmap, dataloader, args)
Пример #3
0
    print("Please select an option below by entering the number:")
    print("1. Train using default settings in AirSim")
    print("2. Train using default settings without simulation")
    print("3. Train using custom settings")
    print("4. Evaluation Mode")
    print("5. View the results of the last training session in text UI")
    print("6. View the results of the last training session in AirSim")
    print("7. Change the environment settings")
    print("Enter Q at any time to quit")
    menu_in = input()

    if menu_in == "1":
        # train in airsim
        wait = input(
            "WARNING: Ensure your AirSim Unreal Environment is running")
        training.training_loop(True)

    elif menu_in == "2":
        # train without sim
        training.training_loop(False)

    elif menu_in == "3":
        # custom settings
        # if user has run custom settings, offer rerun
        if custom_set:
            if validation("Rerun with the previous custom settings? Y/N",
                          False, "Y", "N") == 0:
                # run with last settings
                training.training_loop(is_airsim_in, reuse_in, blue_in, red_in,
                                       episodes_in, steps_in, paths_in)
                continue
Пример #4
0
def main_process():

    tf.logging.set_verbosity(tf.logging.INFO)
    args = manage_arguments()

    output_dir = args.output_dir

    # define the output directory of the experiment
    output_dir += "/model-fc-{}-{}-{}-{}-{}-{}-{}".format(
        args.fc_size, args.num_fc, args.kernel_size,
        args.strides, args.pool_size,
        str(args.dropout_rate).replace('.', '_'),
        str(args.learning_rate).replace('.', '_')
    )
    tf.gfile.MakeDirs(output_dir)

    # prepare the dataset if necessary
    if not os.path.isfile(args.data_dir + '/Train_Images.TFRecord'):
        tf.gfile.MakeDirs(args.data_dir)
        dataloader.prepare_datasets(args.data_dir)

    tf.enable_eager_execution()

    # define the train/valid dataloaders
    ds_train = dataloader.input_dataset_fn(
        batch_size=args.batch_size,
        image_file=args.data_dir + 'Train_Images.TFRecord',
        label_file=args.data_dir + 'Train_Labels.TFRecord',
        repeat=True, shuffle=True, drop_remainder=False,
        data_augmentation=True,
    )
    steps_per_epoch = int(math.ceil(24287 / args.batch_size))
    ds_eval = dataloader.input_dataset_fn(
        batch_size=args.batch_size,
        image_file=args.data_dir + 'Eval_Images.TFRecord',
        label_file=args.data_dir + 'Eval_Labels.TFRecord',
        repeat=False, shuffle=False, drop_remainder=False,
        data_augmentation=False,
    )
    validation_steps = int(math.ceil(8095 / args.batch_size))

    # define the model
    conv_list = [(2, 64), (2, 128), (3, 256)]
    dense_list = [args.fc_size] * args.num_fc
    dense_list.append(17)
    network_model = model.create_model(
        input_shape=(224, 224, 3),
        conv_list=conv_list,
        dense_list=dense_list,
        kernel_size=args.kernel_size,
        strides=args.strides,
        pool_size=args.pool_size,
        dropout_rate=args.dropout_rate,
        output_activation='sigmoid',
        layer_activation='relu'
    )
    network_model.summary()

    # define the loss function, metrics, and optimizer
    loss_fn = tf.keras.losses.BinaryCrossentropy()
    metrics = [
        tf.keras.metrics.BinaryAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall()
    ]
    best_metric_indicator_index = 0
    optimizer = tf.keras.optimizers.Adam(learning_rate=args.learning_rate)

    # set up the mlflow experiment name and eventually retrieve the mlflow
    # run_id meta data. Afterward, log the params used in mlflow and start
    # the training process. At the end, log the returned metrics in mlflow
    # and return the metrics recorded within this instance of mlflow run.
    out_data = None
    mlflow.set_experiment(args.mlflow_experiment_name)
    run_id = training.get_mlflow_run_id(output_dir)
    run_metrics = None
    with mlflow.start_run(run_id=run_id) as run:
        run_id = run.info.run_id
        training.save_mlflow_run_id(run_id, output_dir)
        mlflow.log_param("pool_size", args.pool_size)
        mlflow.log_param("strides", args.strides)
        mlflow.log_param("kernel_size", args.kernel_size)
        mlflow.log_param("fc_size", args.fc_size)
        mlflow.log_param("num_fc", args.num_fc)
        mlflow.log_param("dropout_rate", args.dropout_rate)
        mlflow.log_param("learning_rate", args.learning_rate)
        mlflow.log_param("num_epochs", args.num_epochs)
        out_data = training.training_loop(
            network_model, optimizer, loss_fn,
            ds_train, args.num_epochs, steps_per_epoch, metrics=metrics,
            ds_eval=ds_eval, validation_steps=validation_steps,
            validation_freq=1, step_log_freq=20, checkpoint_path=output_dir,
            chkpt_freq=1, best_metric_coef=1.0,
            best_metric_indicator_index=best_metric_indicator_index,
            num_max_chkpts=args.num_max_chkpts,
            num_max_best_model=args.num_max_best_model, verbose=True
        )
        if out_data is not None:
            for k in out_data.keys():
                mlflow.log_metric(k, out_data[k])

        run_metrics = out_data

    return run_metrics
Пример #5
0
    def train(self, train_affect=False):
        """
		Performs the training process given the configuration passed in

		:param bool train_affect: Whether to train using cross-entropy or affective loss
		"""
        if self.inference:
            raise ValueError("Tried to train a model in inference mode.")
        xent_epochs = self.config.max_epochs

        trainer = training.Trainer(
            self.save_fn,
            max_epochs=xent_epochs,
            max_stalled_steps=self.config.max_stalled_steps)

        with tf.Session() as sess:
            if self.model_load:
                warnings.warn(
                    "You are reloading a model for training. This feature"
                    " is still not fully implemented. It restores the state"
                    " of the model variables and optimizer but not the number"
                    " of stalled steps, the validation cost record, or the"
                    " state of the shuffled corpora")
                self.train_checkpoint.restore(
                    self.model_load).assert_consumed().run_restore_ops()
                print("Restored model at {}".format(self.model_load))
            else:
                sess.run(tf.global_variables_initializer())

            training.training_loop(
                sess,
                self.model,
                trainer,
                self.data,
                self.train_feeds,
                self.infer_feeds,
                train_batch_size=self.config.train_batch_size,
                valid_batch_size=self.config.infer_batch_size,
                min_epochs_before_validation=1)

            if train_affect:
                affect_epochs = (trainer.epochs_completed //
                                 4) + 1 * (trainer.epochs_completed < 4)
                total_epochs = trainer.epochs_completed + affect_epochs
                train_feeds[self.model.train_affect] = True
                print(
                    "Switching from cross-entropy to maximum affective content . . ."
                )

                affect_trainer = training.Trainer(
                    self.checkpoint_best,
                    self.checkpoint_latest,
                    save_fn,
                    epochs_completed=trainer.epochs_completed,
                    max_epochs=total_epochs,
                    saver=trainer.saver,
                    best_valid_cost=trainer.best_valid_cost)
                training.training_loop(
                    sess,
                    self.model,
                    affect_trainer,
                    self.data,
                    self.train_feeds,
                    self.valid_feeds,
                    train_batch_size=self.config.train_batch_size,
                    valid_batch_size=self.config.infer_batch_size)
Пример #6
0
save_path = './pokemon_results_' + type1 + '_' + type2 + '/'

# Get data
Xdset = dataloader.PokemonData(type=type1, data_path='./images_with_types')
dataloader_X = DataLoader(Xdset, batch_size=8, shuffle=True)
Ydset = dataloader.PokemonData(type=type2, data_path='./images_with_types')
dataloader_Y = DataLoader(Ydset, batch_size=8, shuffle=True)

# Get the model and train
G_XtoY, G_YtoX, D_X, D_Y = network.create_model()
losses, G_XtoY, G_YtoX, D_X, D_Y = training.training_loop(
    dataloader_X=dataloader_X,
    dataloader_Y=dataloader_Y,
    n_epochs=4000,
    G_XtoY=G_XtoY,
    G_YtoX=G_YtoX,
    D_X=D_X,
    D_Y=D_Y,
    lr=0.0005,
    beta1=0.5,
    beta2=0.999,
    save_path=save_path)

# save the model
checkpoint_dir = './Checkpoints/'
G_XtoY_path = os.path.join(checkpoint_dir, 'G_XtoY.pkl')
G_YtoX_path = os.path.join(checkpoint_dir, 'G_YtoX.pkl')
D_X_path = os.path.join(checkpoint_dir, 'D_X.pkl')
D_Y_path = os.path.join(checkpoint_dir, 'D_Y.pkl')
torch.save(G_XtoY.state_dict(), G_XtoY_path)
torch.save(G_YtoX.state_dict(), G_YtoX_path)
torch.save(D_X.state_dict(), D_X_path)
Пример #7
0
def main():
    """
        Define training pipelines
    """

    dataset_train, dataset_valid = generate_default_training_pipeline(
        tfr_path,
        channels,
        n_modes,
        validation_split,
        batch_size,
        shuffle_buffer,
        n_prefetch,
        cpu=False)
    """
        Define model
    """

    model = model_cnn_mlp(channels, nz, nx, n_modes, cpu=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error',
                  optimizer=optimizer,
                  metrics=['mean_absolute_error'])
    model_loss = tf.keras.losses.MeanSquaredError()
    """
        Training loop
    """

    training_loop(dataset_train, dataset_valid, save_path, model_name, model,
                  optimizer, model_loss, epochs)

    # model_name = f"Ret_flow-reconstruction_yp{yp_flow:03d}"

    # start_time = time.time()

    # train_loss = tf.metrics.Mean()
    # valid_loss = tf.metrics.Mean()

    # for epoch in range(1, epochs + 1):

    # train_loss.reset_states()

    # for (X_target, Y_target) in dataset_train:

    # pred_loss, mae = model.train_on_batch(X_target, Y_target)
    # train_loss.update_state(pred_loss)

    # for (X_target, Y_target) in dataset_valid:
    # valid_pred_loss, valid_mae = model.test_on_batch(X_target, Y_target)
    # valid_loss.update_state(valid_pred_loss)

    # end_time = time.time()

    # if epoch > 10:

    #  model.optimizer.lr = 0.001 * tf.math.exp(0.1 * (10 - epoch))

    # print(f'Epoch {epoch:04d}/{epochs:04d}, loss: {train_loss.result().numpy()}, val_loss: {valid_loss.result().numpy()}, elapsed time from start: {end_time - start_time}')

    # predictor_name = models_path + model_name + '_predictor.tf'
    # predictor.save(predictor_name)

    return
                                                NUM_DRAWS_PHASES[0])
            solver_params_copy = solver_params.copy()
            solver_params_copy.update({
                'save_at': SAVE_MODEL_AT_PHASES[0],
            })
            fit_params_copy = fit_params.copy()
            fit_params_copy.update({
                'epochs': EPOCHS_PHASES[0],
                'basic_only': True,
                'single_annotator': annotator,
            })
            training_loop(dataset,
                          BATCH_SIZES,
                          learning_rates,
                          local_folder,
                          EPOCHS_PHASES[0],
                          solver_params_copy,
                          fit_params_copy,
                          phase_path=phase,
                          annotator_path=annotator)

    if phase is 'pretraining':
        learning_rates = get_learning_rates(LR_INT[0], LR_INT[1],
                                            NUM_DRAWS_PHASES[1])
        solver_params_copy = solver_params.copy()
        solver_params_copy.update({
            'save_at': SAVE_MODEL_AT_PHASES[1],
        })
        fit_params_copy = fit_params.copy()
        fit_params_copy.update({
            'epochs': EPOCHS_PHASES[1],
        f.close()
        dataset.remove_pseudo_labels()

    if METHOD == 'majority_voting':
        mv_labels = {data_item: Counter([label[0] for label in list(data[data_item].values())]).most_common(1)[0][0] for data_item in data}
        f = open(labels_path, "wb")
        pickle.dump(mv_labels, f)
        f.close()
        dataset.remove_pseudo_labels()

    if TRAINING and mode == 'train':
        sample_label_map = {}
        with open(labels_path, 'rb') as f:
            sample_label_map = pickle.load(f)
        dataset.use_custom_labels(sample_label_map)
        learning_rates = get_learning_rates(
            LR_INT[0], LR_INT[1], NUM_DRAWS)
        epochs = EPOCHS
        if USE_EPOCH_FACTOR:
            epochs = EPOCHS * epoch_factor
        fit_params = {
            'return_f1': True,
            'deep_randomization': DEEP_RANDOMIZATION,
            'early_stopping_interval': EARLY_STOPPING_INTERVAL,
            'epochs': epochs,
            'pretrained_basic': False,
            'basic_only': True,
        }
        training_loop(dataset, BATCH_SIZES, learning_rates, local_folder, epochs,
                    solver_params, fit_params)