Пример #1
0
def get_compiled_model(config):

  # Parameters usefull for all the models
  kwargs = {
    'input_size': config['model']['input_size'],
    'changing_ids': config['model']['changing_ids'],
    'num_classes': config['model']['num_classes'],
    'factor': config['model']['factor'],
    'upstride_type': config['model']['upstride_type'],
    'tf2upstride_strategy': config['model']['conversion_params']['tf2up_strategy'],
    'upstride2tf_strategy': config['model']['conversion_params']['up2tf_strategy'],
    'weight_decay': config['optimizer']['weight_decay'],
  }

  # for architecture search
  if config['load_searched_arch']:
    kwargs['load_searched_arch'] = config['load_searched_arch']

  model = model_name_to_class[config['model']['name']](**kwargs).build()
  model.summary()
  # calculates FLOPs for the Model.
  if config['model']['calculate_flops']:
    calc_flops = metrics.count_flops_efficient(model, config['model']['upstride_type'])
    print(f"Total FLOPs for {config['model']['name']}: {calc_flops}")
  optimizer = get_optimizer(config['optimizer'])
  model.compile(optimizer=optimizer, loss='categorical_crossentropy',
                metrics=['accuracy', 'top_k_categorical_accuracy'])
  # output the optimizer to save it in the checkpoint
  return model, optimizer
Пример #2
0
def get_model(args):
    model = model_name_to_class[args['model_name']](
        args['framework'], args['factor'], args['input_size'],
        args['num_classes'], args['n_layers_before_tf'], False).model
    model.summary()
    optimizer = get_optimizer(args['optimizer'])
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy', 'top_k_categorical_accuracy'])
    # output the optimizer to save it in the checkpoint
    return model, optimizer
 def build_model(hp):
     factor = hp.Choice('factor',
                        get_values_from_args(args['factor']),
                        ordered=True)
     framework = hp.Choice('framework', args['frameworks'])
     model = model_name_to_class[args['model_name']](framework,
                                                     factor,
                                                     args['input_size'],
                                                     args['num_classes'],
                                                     hp=hp).model
     model.compile(optimizer=get_optimizer(args['optimizer']),
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
     return model
Пример #4
0
def get_model(args):
  load_arch = args['load_searched_arch'] if args['load_searched_arch'] else None
  model = model_name_to_class[args['model_name']](args['framework'], # TODO replace args[] by args['model']
                                                  args['conversion_params'],
                                                  args['factor'],
                                                  args['input_size'],
                                                  args['num_classes'],
                                                  args['n_layers_before_tf'],
                                                  False,
                                                  load_searched_arch=load_arch,
                                                  args=args).model
  model.summary()
  optimizer = get_optimizer(args['optimizer'])
  model.compile(optimizer=optimizer, loss='categorical_crossentropy',
                metrics=['accuracy', 'top_k_categorical_accuracy'])
  # output the optimizer to save it in the checkpoint
  return model, optimizer
Пример #5
0
def progressive_training(model, config, train_dataset, val_dataset, callbacks,
                         latest_epoch, max_queue_size=16, optimizer=None):
  """Trains the model using progressive resizing

  Images resolution will be modified during the training following config["resizing_sizes"]
  The specific portions of the total number of epochs dedicated to each size
  is defined in config["resizing_training_portions"]

  example:
  image resolution = 768 x 512
  config["resizing_sizes"] = [0.5, 1]
  config["resizing_training_portions"] = [0.7,0.3]
  config['num_epochs'] = 100
   ->
  For the 0.7*100=70 first epochs the network will train
      with (0.5*768 x 0.5*512) = (384 x 256) resolution
  For the 0.3*100=30 following epochs, the network will train
      with (1*768 x 1*512) = (768 x 512) resolution

  if config["reset_after_resize"] is True, the callbacks and optimizer will be
  reset after each change in resolution.
  """

  sizes_list = config["resizing_sizes"]
  resizing_portions = config["resizing_training_portions"]
  assert len(sizes_list) == len(resizing_portions), "resizing_sizes and resizing_portions must have the same length"
  assert sum(resizing_portions) == 1, "the sum of resizing_portions must equal 1"

  reset = config["reset_after_resize"]
  if reset:
    callbacks_tmp = [copy.deepcopy(callbacks) for _ in range(len(sizes_list))]

  total_epochs = config['num_epochs']
  epochs_by_portion = [int(total_epochs * resizing_portion) for resizing_portion in resizing_portions]
  epochs_by_portion[-1] = total_epochs - sum(epochs_by_portion[:-1])  # to ensure the right total nb of epochs
  current_epoch = 0


  for element in train_dataset:
    original_size = element[0].shape  # N, H, W, C
    break

  for i, size in enumerate(sizes_list):
    if latest_epoch >= current_epoch + epochs_by_portion[i]:
      current_epoch += epochs_by_portion[i]
      continue
    current_epoch = max(latest_epoch, current_epoch)

    if abs(size-1) > 0.001:
      resize = tf.keras.layers.experimental.preprocessing.Resizing(int(original_size[1] * size),
                                                                   int(original_size[2] * size))
      resized_train_set = train_dataset.map(lambda x, y: (resize(x), y)).prefetch(tf.data.experimental.AUTOTUNE)
    else:
      resized_train_set = train_dataset
    if reset:
      callbacks = callbacks_tmp[i]
      optimizer = get_optimizer(config['optimizer'])
      model.compile(optimizer=optimizer, loss='categorical_crossentropy',
                    metrics=['accuracy', 'top_k_categorical_accuracy'])

    model.fit(x=resized_train_set,
              validation_data=val_dataset,
              epochs=current_epoch + epochs_by_portion[i],
              callbacks=callbacks,
              max_queue_size=max_queue_size,
              initial_epoch=current_epoch
              )
    current_epoch += epochs_by_portion[i]
Пример #6
0
def train(args):
    # config_tf2(args['configuration']['xla'])
    # Create log, checkpoint and export directories
    checkpoint_dir, log_dir, export_dir = create_env_directories(
        args, get_experiment_name(args))
    train_log_dir = os.path.join(log_dir, 'train')
    val_log_dir = os.path.join(log_dir, 'validation')
    arch_log_dir = os.path.join(log_dir, 'arch')
    summary_writers = {
        'train': tf.summary.create_file_writer(train_log_dir),
        'val': tf.summary.create_file_writer(val_log_dir),
        'arch': tf.summary.create_file_writer(arch_log_dir)
    }

    # Prepare the 3 datasets
    train_weight_dataset = dataloader.get_dataset(
        args['dataloader'],
        transformation_list=args['dataloader']['train_list'],
        num_classes=args["num_classes"],
        split='train_weights')
    train_arch_dataset = dataloader.get_dataset(
        args['dataloader'],
        transformation_list=args['dataloader']['train_list'],
        num_classes=args["num_classes"],
        split='train_arch')
    val_dataset = dataloader.get_dataset(
        args['dataloader'],
        transformation_list=args['dataloader']['val_list'],
        num_classes=args["num_classes"],
        split='test')

    # define model, optimizer and checkpoint callback
    setup_mp(args)
    model = model_name_to_class[args['model_name']](
        args['framework'],
        input_shape=args['input_size'],
        label_dim=args['num_classes']).model
    model.summary()

    alchemy_api.send_model_info(model, args['server'])
    weights, arch_params = fbnetv2.split_trainable_weights(model)
    weight_opt = get_optimizer(args['optimizer'])
    arch_opt = get_optimizer(args['arch_search']['optimizer'])
    model_checkpoint_cb, latest_epoch = init_custom_checkpoint_callbacks(
        {'model': model}, checkpoint_dir, args['max_checkpoints'],
        args['checkpoint_freq'])
    callbacks = [model_checkpoint_cb]

    temperature_decay_fn = fbnetv2.exponential_decay(
        args['arch_search']['temperature']['init_value'],
        args['arch_search']['temperature']['decay_steps'],
        args['arch_search']['temperature']['decay_rate'])

    lr_decay_fn = CosineDecay(
        args['optimizer']['lr'],
        alpha=args["optimizer"]["lr_decay_strategy"]["lr_params"]["alpha"],
        total_epochs=args['num_epochs'])

    lr_decay_fn_arch = CosineDecay(args['arch_search']['optimizer']['lr'],
                                   alpha=0.000001,
                                   total_epochs=args['num_epochs'])

    metrics = {
        'arch': {
            'latency_reg_loss': tf.keras.metrics.Mean()
        },
        'train': {
            'total_loss': tf.keras.metrics.Mean(),
            'accuracy': tf.keras.metrics.CategoricalAccuracy(),
            'cross_entropy_loss': tf.keras.metrics.Mean(),
        },
        'val': {
            'accuracy': tf.keras.metrics.CategoricalAccuracy(),
            'cross_entropy_loss': tf.keras.metrics.Mean(),
        }
    }

    train_step = get_train_step_function(model, weights, weight_opt,
                                         metrics['train'])
    train_step_arch = get_train_step_arch_function(model, arch_params,
                                                   arch_opt, metrics['train'],
                                                   metrics['arch'])
    evaluation_step = get_eval_step_function(model, metrics['val'])

    for epoch in range(latest_epoch, args['num_epochs']):
        print(f'Epoch: {epoch}/{args["num_epochs"]}')
        # Update both LR
        weight_opt.learning_rate = lr_decay_fn(epoch)
        arch_opt.learning_rate = lr_decay_fn_arch(epoch)
        # Updating the weight parameters using a subset of the training data
        for step, (x_batch, y_batch) in tqdm.tqdm(
                enumerate(train_weight_dataset, start=1)):
            train_step(x_batch, y_batch)
        # Evaluate the model on validation subset
        for x_batch, y_batch in val_dataset:
            evaluation_step(x_batch, y_batch)
        # Handle metrics
        template = f"Weights updated, Epoch {epoch}"
        template = metrics_processing(metrics, summary_writers,
                                      ['train', 'val'], template, epoch)
        template += f", lr: {float(weight_opt.learning_rate)}"
        print(template)

        new_temperature = temperature_decay_fn(epoch)
        with summary_writers['train'].as_default():
            tf.summary.scalar('temperature', new_temperature, step=epoch)
        define_temperature(new_temperature)

        if epoch >= args['arch_search']['num_warmup']:
            # Updating the architectural parameters on another subset
            for step, (x_batch, y_batch) in tqdm.tqdm(
                    enumerate(train_arch_dataset, start=1)):
                train_step_arch(x_batch, y_batch)
            # Evaluate the model on validation subset
            for x_batch, y_batch in val_dataset:
                evaluation_step(x_batch, y_batch)
            # Handle metrics
            template = f'Architecture updated, Epoch {epoch}'
            template = metrics_processing(metrics,
                                          summary_writers,
                                          ['train', 'val', 'arch'],
                                          template,
                                          epoch,
                                          postfix='_arch')
            template += f", lr: {float(arch_opt.learning_rate)}"
            print(template)
        # move saved outside of condition so we save starting from the begining
        fbnetv2.save_arch_params(model, epoch, log_dir)

        # manually call the callbacks
        for callback in callbacks:
            callback.on_epoch_end(epoch, logs=None)

    print("Training Completed!!")

    print("Architecture params: ")
    print(arch_params)
    fbnetv2.post_training_analysis(
        model, args['arch_search']['exported_architecture'])
Пример #7
0
def train(args):
    # config_tf2(args['configuration']['xla'])
    # Create log, checkpoint and export directories
    checkpoint_dir, log_dir, export_dir = create_env_directories(
        args, get_experiment_name(args))

    train_weight_dataset = dataloader.get_dataset(
        args['dataloader'],
        transformation_list=args['dataloader']['train_list'],
        num_classes=args["num_classes"],
        split='train_weights')
    train_arch_dataset = dataloader.get_dataset(
        args['dataloader'],
        transformation_list=args['dataloader']['train_list'],
        num_classes=args["num_classes"],
        split='train_arch')
    val_dataset = dataloader.get_dataset(
        args['dataloader'],
        transformation_list=args['dataloader']['val_list'],
        num_classes=args["num_classes"],
        split='validation')

    setup_mp(args)

    # define model, optimizer and checkpoint callback
    model = model_name_to_class[args['model_name']](
        args['framework'],
        input_shape=args['input_size'],
        label_dim=args['num_classes']).model
    model.summary()
    alchemy_api.send_model_info(model, args['server'])
    weight_opt = get_optimizer(args['optimizer'])
    arch_opt = get_optimizer(args['arch_optimizer_param'])
    model_checkpoint_cb, latest_epoch = init_custom_checkpoint_callbacks(
        {'model': model}, checkpoint_dir)

    weights, arch_params = split_trainable_weights(model)
    temperature_decay_fn = exponential_decay(
        args['temperature']['init_value'], args['temperature']['decay_steps'],
        args['temperature']['decay_rate'])

    lr_decay_fn = CosineDecay(
        args['optimizer']['lr'],
        alpha=args["optimizer"]["lr_decay_strategy"]["lr_params"]["alpha"],
        total_epochs=args['num_epochs'])

    loss_fn = CategoricalCrossentropy()
    accuracy_metric = CategoricalAccuracy()
    loss_metric = Mean()
    val_accuracy_metric = CategoricalAccuracy()
    val_loss_metric = Mean()

    train_log_dir = os.path.join(args['log_dir'], 'train')
    val_log_dir = os.path.join(args['log_dir'], 'validation')
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    val_summary_writer = tf.summary.create_file_writer(val_log_dir)

    @tf.function
    def train_step(x_batch, y_batch):
        with tf.GradientTape() as tape:
            y_hat = model(x_batch, training=True)
            loss = loss_fn(y_batch, y_hat)

        accuracy_metric.update_state(y_batch, y_hat)
        loss_metric.update_state(loss)
        grads = tape.gradient(loss, weights)
        weight_opt.apply_gradients(zip(grads, weights))

    @tf.function
    def train_step_arch(x_batch, y_batch):
        with tf.GradientTape() as tape:
            y_hat = model(x_batch, training=False)
            loss = loss_fn(y_batch, y_hat)

        accuracy_metric.update_state(y_batch, y_hat)
        loss_metric.update_state(loss)
        grads = tape.gradient(loss, arch_params)
        arch_opt.apply_gradients(zip(grads, arch_params))

    @tf.function
    def evaluation_step(x_batch, y_batch):
        y_hat = model(x_batch, training=False)
        loss = loss_fn(y_batch, y_hat)

        val_accuracy_metric.update_state(y_batch, y_hat)
        val_loss_metric.update_state(loss)

    for epoch in range(latest_epoch, args['num_epochs']):
        print(f'Epoch: {epoch}/{args["num_epochs"]}')

        weight_opt.learning_rate = lr_decay_fn(epoch)

        # Updating the weight parameters using a subset of the training data
        for step, (x_batch, y_batch) in tqdm.tqdm(
                enumerate(train_weight_dataset, start=1)):
            train_step(x_batch, y_batch)

        # Evaluate the model on validation subset
        for x_batch, y_batch in val_dataset:
            evaluation_step(x_batch, y_batch)

        train_accuracy = accuracy_metric.result()
        train_loss = loss_metric.result()
        val_accuracy = val_accuracy_metric.result()
        val_loss = val_loss_metric.result()

        template = f'Weights updated, Epoch {epoch}, Train Loss: {float(train_loss)}, Train Accuracy: ' \
            f'{float(train_accuracy)}, Val Loss: {float(val_loss)}, Val Accuracy: {float(val_accuracy)}, ' \
            f'lr: {float(weight_opt.learning_rate)}'
        print(template)

        new_temperature = temperature_decay_fn(epoch)

        with train_summary_writer.as_default():
            tf.summary.scalar('loss', train_loss, step=epoch)
            tf.summary.scalar('accuracy', train_accuracy, step=epoch)
            tf.summary.scalar('temperature', new_temperature, step=epoch)

        with val_summary_writer.as_default():
            tf.summary.scalar('loss', val_loss, step=epoch)
            tf.summary.scalar('accuracy', val_accuracy, step=epoch)

        # Resetting metrices for reuse
        accuracy_metric.reset_states()
        loss_metric.reset_states()
        val_accuracy_metric.reset_states()
        val_loss_metric.reset_states()

        if epoch >= 10:
            # Updating the architectural parameters on another subset
            for step, (x_batch, y_batch) in tqdm.tqdm(
                    enumerate(train_arch_dataset, start=1)):
                train_step_arch(x_batch, y_batch)

            # Evaluate the model on validation subset
            for x_batch, y_batch in val_dataset:
                evaluation_step(x_batch, y_batch)

            train_accuracy = accuracy_metric.result()
            train_loss = loss_metric.result()
            val_accuracy = val_accuracy_metric.result()
            val_loss = val_loss_metric.result()

            template = f'Arch params updated, Epoch {epoch}, Train Loss: {float(train_loss)}, Train Accuracy: ' \
                f'{float(train_accuracy)}, Val Loss: {float(val_loss)}, Val Accuracy: {float(val_accuracy)}'
            print(template)
            with train_summary_writer.as_default():
                tf.summary.scalar('loss_after_arch_params_update',
                                  train_loss,
                                  step=epoch)
                tf.summary.scalar('accuracy_after_arch_params_update',
                                  train_accuracy,
                                  step=epoch)

            with val_summary_writer.as_default():
                tf.summary.scalar('loss_after_arch_params_update',
                                  val_loss,
                                  step=epoch)
                tf.summary.scalar('accuracy_after_arch_params_update',
                                  val_accuracy,
                                  step=epoch)

            # Resetting metrices for reuse
            accuracy_metric.reset_states()
            loss_metric.reset_states()
            val_accuracy_metric.reset_states()
            val_loss_metric.reset_states()

        define_temperature(new_temperature)

    print("Training Completed!!")

    print("Architecture params: ")
    print(arch_params)
    post_training_analysis(model, args['exported_architecture'])