示例#1
0
def Test():

    # Default TF1.x uses reference variables that are not supported by SavedModel
    # v1 Importer. To use SavedModel V1 Importer, resource variables should be
    # enabled.
    tf.enable_resource_variables()

    tf.compat.v1.disable_eager_execution()

    x = tf.constant([[1.0], [1.0], [1.0]])
    y = tf.get_variable(name='y',
                        shape=(1, 3),
                        initializer=tf.random_normal_initializer(),
                        trainable=True)
    r = tf.matmul(x, y)

    tensor_info_x = tf.saved_model.utils.build_tensor_info(x)
    tensor_info_r = tf.saved_model.utils.build_tensor_info(r)

    signature_def = tf.saved_model.signature_def_utils.build_signature_def(
        inputs={'x': tensor_info_x},
        outputs={'r': tensor_info_r},
        method_name=tf.saved_model.PREDICT_METHOD_NAME)

    # Create two signatures that share the same variable.
    return {'basic': signature_def, 'basic_2': signature_def}
示例#2
0
 def export_saved_model(self, **kwargs):
     tf.enable_resource_variables()
     driver = inference.ServingDriver(self.model_name, self.ckpt_path,
                                      self.image_size)
     driver.build(min_score_thresh=kwargs.get('min_score_thresh', 0.2),
                  max_boxes_to_draw=kwargs.get('max_boxes_to_draw', 50))
     driver.export(self.saved_model_dir)
示例#3
0
def main(_):
  logging.set_verbosity(logging.INFO)
  tf.enable_resource_variables()
  runner = abps_runners.EvalRunner(
      root_dir=FLAGS.root_dir,
      env_name=suite_atari.game(name=FLAGS.game_name),
      **get_run_args())
  runner.run()
示例#4
0
def main(_):
    logging.set_verbosity(logging.INFO)
    tf.enable_resource_variables()
    if FLAGS.select_policy_way == 'independent':
        runner = baseline_runners.EvalRunner(
            root_dir=FLAGS.root_dir,
            env_name=suite_atari.game(name=FLAGS.game_name),
            **get_run_args())
    runner.run()
示例#5
0
def main(_):
  tf.disable_eager_execution()
  logging.set_verbosity(logging.INFO)
  tf.enable_resource_variables()
  runner = abps_runners.TrainRunner(
      root_dir=FLAGS.root_dir,
      env_name=suite_atari.game(name=FLAGS.game_name),
      **get_run_args())
  runner.run()
示例#6
0
 def export_saved_model(self, **kwargs):
     """Export a saved model for inference."""
     tf.enable_resource_variables()
     driver = inference.ServingDriver(self.model_name,
                                      self.ckpt_path,
                                      enable_ema=self.enable_ema)
     driver.build(params_override=self.model_overrides,
                  min_score_thresh=kwargs.get('min_score_thresh', 0.2),
                  max_boxes_to_draw=kwargs.get('max_boxes_to_draw', 50))
     driver.export(self.saved_model_dir)
示例#7
0
 def export_saved_model(self, **kwargs):
     """Export a saved model for inference."""
     tf.enable_resource_variables()
     driver = inference.ServingDriver(self.model_name,
                                      self.ckpt_path,
                                      enable_ema=self.enable_ema,
                                      use_xla=self.use_xla,
                                      data_format=self.data_format,
                                      **kwargs)
     driver.build(params_override=self.model_overrides)
     driver.export(self.saved_model_dir)
示例#8
0
文件: main.py 项目: yyht/lamb
def main(argv, tuner=None):
    """Main function."""

    assert argv is None or len(argv) == 1, (
        'This program expects no non-option arguments. Got {}.'.format(argv))

    tf.enable_resource_variables()
    lamb_flags.initialize()

    if FLAGS.use_old_linear_names:
        utils._BIAS_VARIABLE_NAME = 'biases'  # pylint: disable=protected-access
        utils._WEIGHTS_VARIABLE_NAME = 'weights'  # pylint: disable=protected-access

    # Set seeds. The tensorflow seed is set later.
    random.seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)

    # Load the files.
    assert FLAGS.training_file, 'No training file specified.'
    training_file_data = read_corpus(FLAGS.training_file)
    if FLAGS.test_file and FLAGS.eval_on_test:
        test_file_data = read_corpus(FLAGS.test_file)
    else:
        test_file_data = corpus.Corpus(data=[])

    # Let's assemble the 'folds': training and eval set combinations,
    # plus the vocabulary.
    folds = []

    def add_fold(training_corpus, eval_corpus, test_corpus):
        fold = _make_fold(training_corpus, eval_corpus, test_corpus)
        logging.info('number of examples in fold %d', len(folds))
        logging.info('  training: %d', fold[0]['training'].size())
        logging.info('  valid: %d', fold[0]['valid'].size())
        logging.info('  test: %d', fold[0]['test'].size())
        folds.append(fold)

    if FLAGS.crossvalidate:
        logging.info('Doing cross-validation.')
        assert FLAGS.validation_file == ''  # pylint: disable=g-explicit-bool-comparison
        for _ in six.moves.range(FLAGS.crossvalidation_rounds):
            for training_set, validation_set in utils.cv_splits(
                    training_file_data.data(), FLAGS.crossvalidation_folds):
                add_fold(corpus.Corpus(data=training_set),
                         corpus.Corpus(data=validation_set), test_file_data)
    else:
        logging.info('Using dedicated eval data.')
        assert FLAGS.validation_file, 'No eval file specified.'
        validation_file_data = read_corpus(FLAGS.validation_file)
        add_fold(training_file_data, validation_file_data, test_file_data)

    experiment = Experiment(lamb_flags.get_config(), FLAGS.experiment_dir,
                            tuner)
    experiment.run_training(folds=folds)
示例#9
0
def main(_):
  logging.set_verbosity(logging.INFO)
  tf.enable_v2_behavior()
  tf.enable_resource_variables()
  tf.enable_control_flow_v2()
  logging.info('Executing eagerly: %s', tf.executing_eagerly())
  logging.info('parsing config files: %s', FLAGS.gin_file)
  gin.parse_config_files_and_bindings(
      FLAGS.gin_file, FLAGS.gin_bindings, skip_unknown=True)

  trainer.train(root_dir, eval_metrics_callback=metrics_callback)
示例#10
0
 def export_saved_model(self, **kwargs):
     """Export a saved model for inference."""
     tf.enable_resource_variables()
     driver = inference.ServingDriver(
         self.model_name,
         self.ckpt_path,
         batch_size=self.batch_size,
         use_xla=self.use_xla,
         model_params=self.model_config.as_dict(),
         **kwargs)
     driver.build()
     driver.export(self.saved_model_dir, self.tflite_path, self.tensorrt)
示例#11
0
def main(_):
  tf.disable_v2_behavior()
  tf.enable_resource_variables()
  tf.logging.set_verbosity(tf.logging.INFO)
  trainer_lib.set_random_seed(FLAGS.random_seed)
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

  if FLAGS.use_hpu:
    if FLAGS.use_bf16:
      if not is_workaround_enabled('FORCE_FP32'):
          os.environ['TF_BF16_CONVERSION'] = FLAGS.bf16_config_path
      else:
          print("Warning! BF16 precision is not supported in inference mode. Switching back to fp32...")
    if is_workaround_enabled('DISABLE_DYNAMIC_SHAPES'):
        os.environ['TF_ENABLE_DYNAMIC_SHAPES'] = 'false'
    from habana_frameworks.tensorflow import load_habana_module
    load_habana_module()
    prepare_recipe_cache()

  if FLAGS.score_file:
    filename = os.path.expanduser(FLAGS.score_file)
    if not tf.gfile.Exists(filename):
      raise ValueError("The file to score doesn't exist: %s" % filename)
    results = score_file(filename)
    if not FLAGS.decode_to_file:
      raise ValueError("To score a file, specify --decode_to_file for results.")
    write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "w")
    for score in results:
      write_file.write("%.6f\n" % score)
    write_file.close()
    return

  hp = create_hparams()
  hp.add_hparam("use_hpu", FLAGS.use_hpu)
  decode_hp = create_decode_hparams()
  run_config = trainer.create_run_config(hp)
  if FLAGS.disable_grappler_optimizations:
    run_config.session_config.graph_options.rewrite_options.disable_meta_optimizer = True

  # summary-hook in tf.estimator.EstimatorSpec requires
  # hparams.model_dir to be set.
  hp.add_hparam("model_dir", run_config.model_dir)

  estimator = trainer_lib.create_estimator(
      FLAGS.model,
      hp,
      run_config,
      decode_hparams=decode_hp,
      use_tpu=FLAGS.use_tpu)

  decode(estimator, hp, decode_hp)
示例#12
0
def main(argv):
    del argv
    tf.enable_resource_variables()
    tf.disable_eager_execution()
    params = PARAMETERS[FLAGS.model]
    learned_model = core_model.EncodeProcessDecode(output_size=params['size'],
                                                   latent_size=128,
                                                   num_layers=2,
                                                   message_passing_steps=15)
    model = params['model'].Model(learned_model)
    if FLAGS.mode == 'train':
        learner(model, params)
    elif FLAGS.mode == 'eval':
        evaluator(model, params)
def main(_):
  logging.set_verbosity(logging.INFO)
  tf.enable_resource_variables()
  if FLAGS.select_policy_way == 'independent':
    # runner = abps_runners.TrainIndependRunner(
    #     root_dir=FLAGS.root_dir,
    #     env_name=suite_atari.game(name=FLAGS.game_name),
    #     **get_run_args())
    runner = baseline_runners.PBTRunner(
        root_dir=FLAGS.root_dir,
        env_name=suite_atari.game(name=FLAGS.game_name),
        **get_run_args())
  elif FLAGS.select_policy_way == 'controller':
    runner = baseline_runners.PBTController(
        root_dir=FLAGS.root_dir,
        env_name=suite_atari.game(name=FLAGS.game_name),
        **get_run_args())
  runner.run()
示例#14
0
def main(argv):
    del argv  # Unused.

    # If using update_damping_immediately resource variables must be enabled.
    # (Although they probably will be by default on TPUs.)
    if FLAGS.update_damping_immediately:
        tf.enable_resource_variables()

    tf.set_random_seed(FLAGS.seed)
    # Invert using cholesky decomposition + triangular solve.  This is the only
    # code path for matrix inversion supported on TPU right now.
    kfac.utils.set_global_constants(posdef_inv_method='cholesky')
    kfac.fisher_factors.set_global_constants(
        eigenvalue_decomposition_threshold=10000)

    if not FLAGS.use_sua_approx:
        if FLAGS.use_custom_patches_op:
            kfac.fisher_factors.set_global_constants(
                use_patches_second_moment_op=True)
        else:
            # Temporary measure to save memory with giant batches:
            kfac.fisher_factors.set_global_constants(
                sub_sample_inputs=True, inputs_to_extract_patches_factor=0.1)

    config = make_tpu_run_config(FLAGS.master, FLAGS.seed, FLAGS.model_dir,
                                 FLAGS.iterations_per_loop,
                                 FLAGS.save_checkpoints_steps)

    estimator = contrib_tpu.TPUEstimator(use_tpu=True,
                                         model_fn=_model_fn,
                                         config=config,
                                         train_batch_size=FLAGS.batch_size,
                                         eval_batch_size=1024)

    estimator.train(input_fn=mnist_input_fn,
                    max_steps=FLAGS.train_steps,
                    hooks=[])
示例#15
0
from __future__ import division, print_function
import random
import scipy
import scipy.io
import numpy as np
import tensorflow.compat.v1 as tf
import Environment_marl_test
import os
from replay_memory import ReplayMemory
import sys

tf.enable_resource_variables()
tf.disable_eager_execution()

os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'

my_config = tf.ConfigProto()
my_config.gpu_options.allow_growth = True


class Agent(object):
    def __init__(self, memory_entry_size):
        self.discount = 1
        self.double_q = True
        self.memory_entry_size = memory_entry_size
        self.memory = ReplayMemory(self.memory_entry_size)


# ################## SETTINGS ######################
up_lanes = [
    i / 2.0 for i in [
示例#16
0
def set_tf_options():
    # Default TF1.x uses reference variables that are not supported by SavedModel
    # v1 Importer. To use SavedModel V1 Importer, resource variables should be
    # enabled.
    tf.enable_resource_variables()
    tf.compat.v1.disable_eager_execution()
示例#17
0
    "Jonas Eschle <*****@*****.**>",
    "Albert Puig <*****@*****.**",
    "Rafael Silva Coutinho <*****@*****.**>",
]

__all__ = [
    "ztf", "z", "constraint", "pdf", "minimize", "loss", "core", "data",
    "func", "Parameter", "ComposedParameter", "ComplexParameter",
    "convert_to_parameter", "Space", "convert_to_space", "supports", "run",
    "settings"
]

#  Copyright (c) 2019 zfit
import tensorflow.compat.v1 as tf

tf.enable_resource_variables()  # forward compat
tf.enable_v2_tensorshape()  # forward compat
tf.disable_eager_execution()

from . import ztf  # legacy
from . import ztf as z
from .settings import ztypes

# tf.get_variable_scope().set_use_resource(True)
# tf.get_variable_scope().set_dtype(ztypes.float)

from . import constraint, pdf, minimize, loss, core, data, func, param
from .core.parameter import Parameter, ComposedParameter, ComplexParameter, convert_to_parameter
from .core.limits import Space, convert_to_space, supports
from .core.data import Data
示例#18
0
def main(_):
    tf.disable_v2_behavior()
    tf.enable_resource_variables()

    if FLAGS.hparams is None:
        hparams = hparams_flags.hparams_from_flags()
    else:
        hparams = hparams_lib.HParams(FLAGS.hparams)

    cluster = None
    if FLAGS.use_tpu and FLAGS.master is None:
        if FLAGS.tpu_name:
            cluster = tf.distribute.cluster_resolver.TPUClusterResolver(
                FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
        else:
            cluster = tf.distribute.cluster_resolver.TPUClusterResolver()
            tf.config.experimental_connect_to_cluster(cluster)
            tf.tpu.experimental.initialize_tpu_system(cluster)

    session_config = tf.ConfigProto()
    # Workaround for https://github.com/tensorflow/tensorflow/issues/26411 where
    # convolutions (used in blurring) get confused about data-format when used
    # inside a tf.data pipeline that is run on GPU.
    if (tf.test.is_built_with_cuda()
            and not hparams.input_data.preprocessing.defer_blurring):
        # RewriterConfig.OFF = 2
        session_config.graph_options.rewrite_options.layout_optimizer = 2
    run_config = tf.estimator.tpu.RunConfig(
        master=FLAGS.master,
        cluster=cluster,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=FLAGS.save_interval_steps,
        keep_checkpoint_max=FLAGS.max_checkpoints_to_keep,
        keep_checkpoint_every_n_hours=(FLAGS.keep_checkpoint_interval_secs /
                                       (60.0 * 60.0)),
        log_step_count_steps=100,
        session_config=session_config,
        tpu_config=tf.estimator.tpu.TPUConfig(
            iterations_per_loop=FLAGS.steps_per_loop,
            per_host_input_for_training=True,
            experimental_host_call_every_n_steps=FLAGS.summary_interval_steps,
            tpu_job_name='train_tpu_worker' if FLAGS.mode == 'train' else None,
            eval_training_input_configuration=(
                tf.estimator.tpu.InputPipelineConfig.SLICED if FLAGS.use_tpu
                else tf.estimator.tpu.InputPipelineConfig.PER_HOST_V1)))
    params = {
        'hparams': hparams,
        'use_tpu': FLAGS.use_tpu,
        'data_dir': FLAGS.data_dir,
    }
    estimator = tf.estimator.tpu.TPUEstimator(
        model_fn=model_fn,
        use_tpu=FLAGS.use_tpu,
        config=run_config,
        params=params,
        train_batch_size=hparams.bs,
        eval_batch_size=hparams.eval.batch_size)

    if hparams.input_data.input_fn not in dir(inputs):
        raise ValueError('Unknown input_fn: {hparams.input_data.input_fn}')
    input_fn = getattr(inputs, hparams.input_data.input_fn)

    training_set_size = inputs.get_num_train_images(hparams)
    steps_per_epoch = training_set_size / hparams.bs
    stage_1_epochs = hparams.stage_1.training.train_epochs
    stage_2_epochs = hparams.stage_2.training.train_epochs
    total_steps = int((stage_1_epochs + stage_2_epochs) * steps_per_epoch)

    num_eval_examples = inputs.get_num_eval_images(hparams)
    eval_steps = num_eval_examples // hparams.eval.batch_size

    if FLAGS.mode == 'eval':
        for ckpt_str in tf.train.checkpoints_iterator(
                FLAGS.model_dir,
                min_interval_secs=FLAGS.eval_interval_secs,
                timeout=60 * 60):
            result = estimator.evaluate(input_fn=input_fn,
                                        checkpoint_path=ckpt_str,
                                        steps=eval_steps)
            estimator.export_saved_model(
                os.path.join(FLAGS.model_dir, 'exports'),
                lambda: input_fn(tf.estimator.ModeKeys.PREDICT, params),
                checkpoint_path=ckpt_str)
            if result['global_step'] >= total_steps:
                return
    else:  # 'train' or 'train_then_eval'.
        estimator.train(input_fn=input_fn, max_steps=total_steps)
        if FLAGS.mode == 'train_then_eval':
            result = estimator.evaluate(input_fn=input_fn, steps=eval_steps)
            estimator.export_saved_model(
                os.path.join(FLAGS.model_dir, 'exports'),
                lambda: input_fn(tf.estimator.ModeKeys.PREDICT, params))
示例#19
0
    def __init__(self,
                 use_xla=False,
                 optimizer=None,
                 mixed_precision=False,
                 single_device=False,
                 optimizer_type='adamw',
                 learning_rate=5e-5,
                 num_train_epochs=1,
                 train_steps=0,
                 num_warmup_steps=0,
                 warmup_proportion=0.,
                 gradient_accumulation_steps=1,
                 max_checkpoints=1,
                 max_grad=1.0,
                 decay_method='poly',
                 logging=True):
        """
        trainer基类
        :param use_xla: 是否使用xla优化
        :param optimizer: 自定义优化器,若是不传入,需要定义下方的优化器参数
        :param optimizer_type: 优化器类型,目前支持 tfbert.optimization.create_optimizer内部的优化器
        :param learning_rate: 学习率
        :param num_train_epochs: 训练轮次
        :param train_steps: 每一轮训练步数
        :param gradient_accumulation_steps: 梯度累积步数
        :param max_checkpoints: 最大保持的ckpt数量
        :param max_grad: 最大梯度,超过进行裁剪
        :param warmup_proportion: warmup比例
        :param num_warmup_steps: warmup步数,如果传入了warmup_proportion,就不需要传了
        :param decay_method: 学习率衰减方法,见 tfbert.optimization.create_optimizer方法
        :param mixed_precision: 是否使用混合精度
        :param single_device: 是否只使用一个卡,否则使用全部卡
        :param logging: 是否显示 tf logging日志
        """
        utils.setup_xla_flags()
        if logging:
            tf.logging.set_verbosity(tf.logging.INFO)

        # 获取环境变量的devices
        self.devices = utils.devices()
        if single_device:
            self.devices = [self.devices[0]]

        # 优化节点
        self.train_op = None

        self.grads_and_vars = None

        self.train_outputs = {}
        self.eval_outputs = {}
        self.test_outputs = {}

        sess_conf = tf.ConfigProto()
        if use_xla:
            sess_conf.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
            if mixed_precision:
                tf.enable_resource_variables()
        sess_conf.gpu_options.allow_growth = True
        sess_conf.allow_soft_placement = True

        self.session = tf.Session(config=sess_conf)
        self.saver = None
        self.inited = False
        self.compiled = False
        self.finished_build = False

        self.num_train_epochs = num_train_epochs
        self.max_checkpoints = max_checkpoints
        self.max_grad = max_grad
        self.num_train_steps = (train_steps * num_train_epochs //
                                gradient_accumulation_steps)
        self.learning_rate = learning_rate
        self.num_warmup_steps = num_warmup_steps
        self.warmup_proportion = warmup_proportion
        if warmup_proportion > 0:
            self.num_warmup_steps = self.num_train_steps * warmup_proportion

        self.gradient_accumulation_steps = gradient_accumulation_steps
        self.decay_method = None if self.num_train_steps == 0 else decay_method
        self.optimizer_type = optimizer_type
        self.optimizer = optimizer
        self.mixed_precision = mixed_precision

        self.global_step = 0  # 全局步数
        self.forward_steps = 0  # 前向步数
        self.global_step_changed = False  # 标识优化步数是否变换,避免梯度累积时重复验证的情况
示例#20
0
def main(argv):
  tf.disable_v2_behavior()
  tf.enable_resource_variables()

  if FLAGS.use_hpu and FLAGS.recipe_cache:
    prepare_recipe_cache()

  if FLAGS.use_horovod:
    if FLAGS.use_hpu:
      from TensorFlow.common.horovod_helpers import hvd_init, horovod_enabled, hvd
      hvd_init()
      assert horovod_enabled()
      if FLAGS.recipe_cache:
        # Other ranks should wait for recipe cache to be removed.
        # This operation can't be done before hvd_init.
        from mpi4py import MPI
        MPI.COMM_WORLD.Barrier()
    else:
      import horovod.tensorflow as hvd
      hvd.init()
      assert hvd.size() > 1
      os.environ['CUDA_VISIBLE_DEVICES'] = str(hvd.local_rank())

  if FLAGS.use_hpu:
    if FLAGS.use_bf16:
      os.environ['TF_BF16_CONVERSION'] = FLAGS.bf16_config_path

    dyn_shapes_flag = 'TF_ENABLE_DYNAMIC_SHAPES'
    if dyn_shapes_flag not in os.environ:
        os.environ[dyn_shapes_flag] = 'false'

    from habana_frameworks.tensorflow import load_habana_module  # noqa
    load_habana_module()

  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

  # If we just have to print the registry, do that and exit early.
  maybe_log_registry_and_exit()

  # Create HParams.
  if argv:
    set_hparams_from_args(argv[1:])
  if FLAGS.schedule != "run_std_server":
    hparams = create_hparams()
  if FLAGS.gpu_automatic_mixed_precision:
    setattr(hparams, "gpu_automatic_mixed_precision", True)
  if FLAGS.deterministic_dataset:
    hparams.add_hparam("deterministic_dataset", True)

  hparams.add_hparam("use_horovod", FLAGS.use_horovod)
  hparams.add_hparam("use_hpu", FLAGS.use_hpu)
  if FLAGS.use_horovod:
    hparams.add_hparam("hvd_worker_id", hvd.rank())
    hparams.add_hparam("hvd_size", hvd.size())

  if FLAGS.schedule == "run_std_server":
    run_std_server()
  trainer_lib.set_random_seed(FLAGS.random_seed)

  if FLAGS.generate_data:
    generate_data()

  exp_fn = create_experiment_fn()
  exp = exp_fn(create_run_config(hparams), hparams)
  if is_chief():
    save_metadata(hparams)

  with dump_callback():
    execute_schedule(exp)
示例#21
0
def main(_):

    # If using update_damping_immediately resource variables must be enabled.
    if FLAGS.update_damping_immediately:
        tf.enable_resource_variables()

    if not FLAGS.use_sua_approx:
        if FLAGS.use_custom_patches_op:
            kfac.fisher_factors.set_global_constants(
                use_patches_second_moment_op=True)
        else:
            # Temporary measure to save memory with giant batches:
            kfac.fisher_factors.set_global_constants(
                sub_sample_inputs=True, inputs_to_extract_patches_factor=0.2)

    tf.set_random_seed(FLAGS.seed)
    (train_op, opt, batch_loss, batch_error, batch_size_schedule, batch_size,
     eval_loss, eval_error, eval_loss_avg,
     eval_error_avg) = construct_train_quants()

    global_step = tf.train.get_or_create_global_step()

    if FLAGS.optimizer == 'kfac':
        # We need to put the control depenency on train_op here so that we are
        # guaranteed to get the up-to-date values of these various quantities.
        # Otherwise there is a race condition and we might get the old values,
        # nondeterministically. Another solution would be to get these values in
        # a separate sess.run call, but this can sometimes cause problems with
        # training frameworks that use hooks (see the comments below).
        with tf.control_dependencies([train_op]):
            learning_rate = opt.learning_rate
            momentum = opt.momentum
            damping = opt.damping
            rho = opt.rho
            qmodel_change = opt.qmodel_change

    # Without setting allow_soft_placement=True there will be problems when
    # the optimizer tries to place certain ops like "mod" on the GPU (which isn't
    # supported).
    config = tf.ConfigProto(allow_soft_placement=True)

    # Train model.

    # It's good practice to put everything into a single sess.run call. The
    # reason is that certain "training frameworks" like to run hooks at each
    # sess.run call, and there is an implicit expectation there will only
    # be one sess.run call every "iteration" of the "optimizer". For example,
    # a framework might try to print the loss at each sess.run call, causing
    # the mini-batch to be advanced, thus completely breaking the "cached
    # batch" mechanism that the damping adaptation method may rely on. (Plus
    # there will also be the extra cost of having to reevaluate the loss
    # twice.)  That being said we don't completely do that here because it's
    # inconvenient.
    with tf.train.MonitoredTrainingSession(save_checkpoint_secs=30,
                                           config=config) as sess:
        for _ in range(FLAGS.train_steps):
            i = sess.run(global_step)

            if FLAGS.use_batch_size_schedule:
                batch_size_ = batch_size_schedule[min(
                    i,
                    len(batch_size_schedule) - 1)]
            else:
                batch_size_ = FLAGS.batch_size

            if FLAGS.optimizer == 'kfac':
                (_, batch_loss_, batch_error_, learning_rate_, momentum_,
                 damping_, rho_, qmodel_change_) = sess.run(
                     [
                         train_op, batch_loss, batch_error, learning_rate,
                         momentum, damping, rho, qmodel_change
                     ],
                     feed_dict={batch_size: batch_size_})
            else:
                _, batch_loss_, batch_error_ = sess.run(
                    [train_op, batch_loss, batch_error],
                    feed_dict={batch_size: batch_size_})

            # Print training stats.
            tf.logging.info('iteration: %d', i)
            tf.logging.info(
                'mini-batch size: %d | mini-batch loss = %f | mini-batch error = %f ',
                batch_size_, batch_loss_, batch_error_)

            if FLAGS.optimizer == 'kfac':
                tf.logging.info('learning_rate = %f | momentum = %f',
                                learning_rate_, momentum_)
                tf.logging.info('damping = %f | rho = %f | qmodel_change = %f',
                                damping_, rho_, qmodel_change_)

            # "Eval" here means just compute stuff on the full training set.
            if (i + 1) % FLAGS.eval_every == 0:
                eval_loss_, eval_error_, eval_loss_avg_, eval_error_avg_ = sess.run(
                    [eval_loss, eval_error, eval_loss_avg, eval_error_avg])
                tf.logging.info(
                    '-----------------------------------------------------')
                tf.logging.info('eval_loss = %f | eval_error = %f', eval_loss_,
                                eval_error_)
                tf.logging.info('eval_loss_avg = %f | eval_error_avg = %f',
                                eval_loss_avg_, eval_error_avg_)
                tf.logging.info(
                    '-----------------------------------------------------')
            else:
                tf.logging.info('----')
示例#22
0
def run_finetuning(train_tfrecord,
                   dev_tfrecord,
                   train_eval_fun=None,
                   use_tpu=False,
                   additional_train_params=None):
    """Main function to train and eval BLEURT."""

    logging.info("Initializing BLEURT training pipeline.")

    bleurt_params = checkpoint_lib.get_bleurt_params_from_flags_or_ckpt()
    max_seq_length = bleurt_params["max_seq_length"]
    bert_config_file = bleurt_params["bert_config_file"]
    init_checkpoint = bleurt_params["init_checkpoint"]

    logging.info("Creating input data pipeline.")
    logging.info("Train/Eval batch size: {}".format(str(FLAGS.batch_size)))

    train_input_fn = input_fn_builder(train_tfrecord,
                                      seq_length=max_seq_length,
                                      is_training=True,
                                      batch_size=FLAGS.batch_size,
                                      drop_remainder=use_tpu)

    dev_input_fn = input_fn_builder(dev_tfrecord,
                                    seq_length=max_seq_length,
                                    is_training=False,
                                    batch_size=FLAGS.batch_size,
                                    drop_remainder=use_tpu)

    logging.info("Creating model.")
    bert_config = modeling.BertConfig.from_json_file(bert_config_file)
    num_train_steps = FLAGS.num_train_steps
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=use_tpu,
                                use_one_hot_embeddings=use_tpu,
                                n_hidden_layers=FLAGS.n_hidden_layers,
                                hidden_layers_width=FLAGS.hidden_layers_width,
                                dropout_rate=FLAGS.dropout_rate)

    logging.info("Creating TF Estimator.")
    exporters = [
        tf.estimator.BestExporter(
            "bleurt_best",
            serving_input_receiver_fn=_serving_input_fn_builder(
                max_seq_length),
            event_file_pattern="eval_default/*.tfevents.*",
            compare_fn=_model_comparator,
            exports_to_keep=1)
    ]
    tf.enable_resource_variables()

    logging.info("*** Entering the Training / Eval phase ***")
    if not additional_train_params:
        additional_train_params = {}
    train_eval_fun(model_fn=model_fn,
                   train_input_fn=train_input_fn,
                   eval_input_fn=dev_input_fn,
                   exporters=exporters,
                   **additional_train_params)
示例#23
0
def main(argv):
    del argv  # Unused.

    tf.enable_resource_variables()
    tf.set_random_seed(FLAGS.seed)
    set_lr_schedule()
    set_custom_sparsity_map()
    folder_stub = os.path.join(FLAGS.training_method, str(FLAGS.end_sparsity),
                               str(FLAGS.maskupdate_begin_step),
                               str(FLAGS.maskupdate_end_step),
                               str(FLAGS.maskupdate_frequency),
                               str(FLAGS.drop_fraction),
                               str(FLAGS.label_smoothing),
                               str(FLAGS.weight_decay))

    output_dir = FLAGS.output_dir
    if FLAGS.use_folder_stub:
        output_dir = os.path.join(output_dir, folder_stub)

    export_dir = os.path.join(output_dir, 'export_dir')

    # we pass the updated eval and train string to the params dictionary.
    params = {}
    params['output_dir'] = output_dir
    params['training_method'] = FLAGS.training_method
    params['use_tpu'] = FLAGS.use_tpu

    dataset_func = functools.partial(
        imagenet_input.ImageNetInput,
        data_dir=FLAGS.data_directory,
        transpose_input=False,
        num_parallel_calls=FLAGS.num_parallel_calls,
        use_bfloat16=False)
    imagenet_train, imagenet_eval = [
        dataset_func(is_training=is_training) for is_training in [True, False]
    ]

    run_config = tpu_config.RunConfig(
        master=FLAGS.master,
        model_dir=output_dir,
        save_checkpoints_steps=FLAGS.steps_per_checkpoint,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        session_config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False),
        tpu_config=tpu_config.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_cores,
            tpu_job_name=FLAGS.tpu_job_name))

    classifier = tpu_estimator.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=resnet_model_fn_w_pruning,
        params=params,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)

    cpu_classifier = tpu_estimator.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=resnet_model_fn_w_pruning,
        params=params,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        export_to_tpu=False,
        eval_batch_size=FLAGS.eval_batch_size)

    if FLAGS.num_eval_images % FLAGS.eval_batch_size != 0:
        raise ValueError(
            'eval_batch_size (%d) must evenly divide num_eval_images(%d)!' %
            (FLAGS.eval_batch_size, FLAGS.num_eval_images))

    eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
    if FLAGS.mode == 'eval_once':
        ckpt_path = os.path.join(output_dir, FLAGS.eval_once_ckpt_prefix)
        dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval
        classifier.evaluate(input_fn=dataset.input_fn,
                            steps=eval_steps,
                            checkpoint_path=ckpt_path,
                            name='{0}'.format(FLAGS.eval_once_ckpt_prefix))
    elif FLAGS.mode == 'eval':
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(output_dir):
            tf.logging.info('Starting to evaluate.')
            try:
                dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval
                classifier.evaluate(input_fn=dataset.input_fn,
                                    steps=eval_steps,
                                    checkpoint_path=ckpt,
                                    name='eval')
                # Terminate eval job when final checkpoint is reached
                global_step = int(os.path.basename(ckpt).split('-')[1])
                if global_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d' %
                        global_step)
                    break

            except tf.errors.NotFoundError:
                logging('Checkpoint no longer exists,skipping checkpoint.')

    else:
        global_step = estimator._load_global_step_from_checkpoint_dir(
            output_dir)
        # Session run hooks to export model for prediction
        export_hook = ExportModelHook(cpu_classifier, export_dir)
        hooks = [export_hook]

        if FLAGS.mode == 'train':
            tf.logging.info('start training...')
            classifier.train(input_fn=imagenet_train.input_fn,
                             hooks=hooks,
                             max_steps=FLAGS.train_steps)
        else:
            assert FLAGS.mode == 'train_and_eval'
            tf.logging.info('start training and eval...')
            while global_step < FLAGS.train_steps:
                next_checkpoint = min(global_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                classifier.train(input_fn=imagenet_train.input_fn,
                                 max_steps=next_checkpoint)
                global_step = next_checkpoint
                logging('Completed training up to step :', global_step)
                classifier.evaluate(input_fn=imagenet_eval.input_fn,
                                    steps=eval_steps)
示例#24
0
def a(demand_size):
    tf.enable_resource_variables()
    tf.disable_eager_execution()

    os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'

    my_config = tf.ConfigProto()
    my_config.gpu_options.allow_growth=True



    class Agent(object):
        def __init__(self, memory_entry_size):
            self.discount = 1
            self.double_q = True
            self.memory_entry_size = memory_entry_size
            self.memory = ReplayMemory(self.memory_entry_size)


    # ################## SETTINGS ######################
    up_lanes = [i/2.0 for i in [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]]
    down_lanes = [i/2.0 for i in [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]]
    left_lanes = [i/2.0 for i in [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]]
    right_lanes = [i/2.0 for i in [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]]

    width = 750/2
    height = 1298/2

    # This main file is for testing only
    IS_TRAIN = 0 # hard-coded to 0
    IS_TEST = 1-IS_TRAIN

    label = 'marl_model'
    label_sarl = 'sarl_model'

    n_veh = 4
    n_neighbor = 1
    n_RB = n_veh

    env = Environment_marl_test.Environ(down_lanes, up_lanes, left_lanes, right_lanes, width, height, n_veh, n_neighbor, demand_size)
    env.new_random_game()  # initialize parameters in env

    n_episode = 3000
    n_step_per_episode = int(env.time_slow/env.time_fast)
    epsi_final = 0.02
    epsi_anneal_length = int(0.8*n_episode)
    mini_batch_step = n_step_per_episode
    target_update_step = n_step_per_episode*4

    n_episode_test = 5  # test episodes

    ######################################################


    def get_state(env, idx=(0,0), ind_episode=1., epsi=0.02):
        """ Get state from the environment """

        # V2I_channel = (env.V2I_channels_with_fastfading[idx[0], :] - 80) / 60
        V2I_fast = (env.V2I_channels_with_fastfading[idx[0], :] - env.V2I_channels_abs[idx[0]] + 10)/35

        # V2V_channel = (env.V2V_channels_with_fastfading[:, env.vehicles[idx[0]].destinations[idx[1]], :] - 80) / 60
        V2V_fast = (env.V2V_channels_with_fastfading[:, env.vehicles[idx[0]].destinations[idx[1]], :] - env.V2V_channels_abs[:, env.vehicles[idx[0]].destinations[idx[1]]] + 10)/35

        V2V_interference = (-env.V2V_Interference_all[idx[0], idx[1], :] - 60) / 60

        V2I_abs = (env.V2I_channels_abs[idx[0]] - 80) / 60.0
        V2V_abs = (env.V2V_channels_abs[:, env.vehicles[idx[0]].destinations[idx[1]]] - 80)/60.0

        load_remaining = np.asarray([env.demand[idx[0], idx[1]] / env.demand_size])
        time_remaining = np.asarray([env.individual_time_limit[idx[0], idx[1]] / env.time_slow])

        # return np.concatenate((np.reshape(V2V_channel, -1), V2V_interference, V2I_abs, V2V_abs, time_remaining, load_remaining, np.asarray([ind_episode, epsi])))
        return np.concatenate((V2I_fast, np.reshape(V2V_fast, -1), V2V_interference, np.asarray([V2I_abs]), V2V_abs, time_remaining, load_remaining, np.asarray([ind_episode, epsi])))


    def get_state_sarl(env, idx=(0,0), ind_episode=1., epsi=0.02):
        """ Get state from the environment """

        # V2I_channel = (env.V2I_channels_with_fastfading[idx[0], :] - 80) / 60
        V2I_fast = (env.V2I_channels_with_fastfading[idx[0], :] - env.V2I_channels_abs[idx[0]] + 10)/35

        # V2V_channel = (env.V2V_channels_with_fastfading[:, env.vehicles[idx[0]].destinations[idx[1]], :] - 80) / 60
        V2V_fast = (env.V2V_channels_with_fastfading[:, env.vehicles[idx[0]].destinations[idx[1]], :] - env.V2V_channels_abs[:, env.vehicles[idx[0]].destinations[idx[1]]] + 10)/35

        V2V_interference = (-env.V2V_Interference_all_sarl[idx[0], idx[1], :] - 60) / 60

        V2I_abs = (env.V2I_channels_abs[idx[0]] - 80) / 60.0
        V2V_abs = (env.V2V_channels_abs[:, env.vehicles[idx[0]].destinations[idx[1]]] - 80)/60.0

        load_remaining = np.asarray([env.demand_sarl[idx[0], idx[1]] / env.demand_size])
        time_remaining = np.asarray([env.individual_time_limit_sarl[idx[0], idx[1]] / env.time_slow])

        # return np.concatenate((np.reshape(V2V_channel, -1), V2V_interference, V2I_abs, V2V_abs, time_remaining, load_remaining, np.asarray([ind_episode, epsi])))
        return np.concatenate((V2I_fast, np.reshape(V2V_fast, -1), V2V_interference, np.asarray([V2I_abs]), V2V_abs, time_remaining, load_remaining, np.asarray([ind_episode, epsi])))


    # -----------------------------------------------------------
    n_hidden_1 = 500
    n_hidden_2 = 250
    n_hidden_3 = 120
    n_input = len(get_state(env=env))
    n_output = n_RB * len(env.V2V_power_dB_List)

    g = tf.Graph()
    with g.as_default():
        # ============== Training network ========================
        x = tf.placeholder(tf.float32, [None, n_input])

        w_1 = tf.Variable(tf.truncated_normal([n_input, n_hidden_1], stddev=0.1))
        w_2 = tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2], stddev=0.1))
        w_3 = tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3], stddev=0.1))
        w_4 = tf.Variable(tf.truncated_normal([n_hidden_3, n_output], stddev=0.1))

        b_1 = tf.Variable(tf.truncated_normal([n_hidden_1], stddev=0.1))
        b_2 = tf.Variable(tf.truncated_normal([n_hidden_2], stddev=0.1))
        b_3 = tf.Variable(tf.truncated_normal([n_hidden_3], stddev=0.1))
        b_4 = tf.Variable(tf.truncated_normal([n_output], stddev=0.1))

        layer_1 = tf.nn.relu(tf.add(tf.matmul(x, w_1), b_1))
        layer_1_b = tf.layers.batch_normalization(layer_1)
        layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1_b, w_2), b_2))
        layer_2_b = tf.layers.batch_normalization(layer_2)
        layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2_b, w_3), b_3))
        layer_3_b = tf.layers.batch_normalization(layer_3)
        y = tf.nn.relu(tf.add(tf.matmul(layer_3, w_4), b_4))
        g_q_action = tf.argmax(y, axis=1)

        # compute loss
        g_target_q_t = tf.placeholder(tf.float32, None, name="target_value")
        g_action = tf.placeholder(tf.int32, None, name='g_action')
        action_one_hot = tf.one_hot(g_action, n_output, 1.0, 0.0, name='action_one_hot')
        q_acted = tf.reduce_sum(y * action_one_hot, reduction_indices=1, name='q_acted')

        g_loss = tf.reduce_mean(tf.square(g_target_q_t - q_acted), name='g_loss')
        optim = tf.train.RMSPropOptimizer(learning_rate=0.001, momentum=0.95, epsilon=0.01).minimize(g_loss)

        # ==================== Prediction network ========================
        x_p = tf.placeholder(tf.float32, [None, n_input])

        w_1_p = tf.Variable(tf.truncated_normal([n_input, n_hidden_1], stddev=0.1))
        w_2_p = tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2], stddev=0.1))
        w_3_p = tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3], stddev=0.1))
        w_4_p = tf.Variable(tf.truncated_normal([n_hidden_3, n_output], stddev=0.1))

        b_1_p = tf.Variable(tf.truncated_normal([n_hidden_1], stddev=0.1))
        b_2_p = tf.Variable(tf.truncated_normal([n_hidden_2], stddev=0.1))
        b_3_p = tf.Variable(tf.truncated_normal([n_hidden_3], stddev=0.1))
        b_4_p = tf.Variable(tf.truncated_normal([n_output], stddev=0.1))

        layer_1_p = tf.nn.relu(tf.add(tf.matmul(x_p, w_1_p), b_1_p))
        layer_1_p_b = tf.layers.batch_normalization(layer_1_p)

        layer_2_p = tf.nn.relu(tf.add(tf.matmul(layer_1_p_b, w_2_p), b_2_p))
        layer_2_p_b = tf.layers.batch_normalization(layer_2_p)

        layer_3_p = tf.nn.relu(tf.add(tf.matmul(layer_2_p_b, w_3_p), b_3_p))
        layer_3_p_b = tf.layers.batch_normalization(layer_3_p)

        y_p = tf.nn.relu(tf.add(tf.matmul(layer_3_p_b, w_4_p), b_4_p))

        g_target_q_idx = tf.placeholder('int32', [None, None], 'output_idx')
        target_q_with_idx = tf.gather_nd(y_p, g_target_q_idx)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()


    def predict(sess, s_t, ep, test_ep = False):

        n_power_levels = len(env.V2V_power_dB_List)
        if np.random.rand() < ep and not test_ep:
            pred_action = np.random.randint(n_RB*n_power_levels)
        else:
            pred_action = sess.run(g_q_action, feed_dict={x: [s_t]})[0]
        return pred_action

    def predict_sarl(sess, s_t):
        pred_action = sess.run(g_q_action, feed_dict={x: [s_t]})[0]
        return pred_action


    def q_learning_mini_batch(current_agent, current_sess):
        """ Training a sampled mini-batch """

        batch_s_t, batch_s_t_plus_1, batch_action, batch_reward = current_agent.memory.sample()

        if current_agent.double_q:  # double q-learning
            pred_action = current_sess.run(g_q_action, feed_dict={x: batch_s_t_plus_1})
            q_t_plus_1 = current_sess.run(target_q_with_idx, {x_p: batch_s_t_plus_1, g_target_q_idx: [[idx, pred_a] for idx, pred_a in enumerate(pred_action)]})
            batch_target_q_t = current_agent.discount * q_t_plus_1 + batch_reward
        else:
            q_t_plus_1 = current_sess.run(y_p, {x_p: batch_s_t_plus_1})
            max_q_t_plus_1 = np.max(q_t_plus_1, axis=1)
            batch_target_q_t = current_agent.discount * max_q_t_plus_1 + batch_reward

        _, loss_val = current_sess.run([optim, g_loss], {g_target_q_t: batch_target_q_t, g_action: batch_action, x: batch_s_t})
        return loss_val


    def update_target_q_network(sess):
        """ Update target q network once in a while """

        sess.run(w_1_p.assign(sess.run(w_1)))
        sess.run(w_2_p.assign(sess.run(w_2)))
        sess.run(w_3_p.assign(sess.run(w_3)))
        sess.run(w_4_p.assign(sess.run(w_4)))

        sess.run(b_1_p.assign(sess.run(b_1)))
        sess.run(b_2_p.assign(sess.run(b_2)))
        sess.run(b_3_p.assign(sess.run(b_3)))
        sess.run(b_4_p.assign(sess.run(b_4)))


    def save_models(sess, model_path):
        """ Save models to the current directory with the name filename """

        current_dir = os.path.dirname(os.path.realpath(__file__))
        model_path = os.path.join(current_dir, "model/" + model_path)
        if not os.path.exists(os.path.dirname(model_path)):
            os.makedirs(os.path.dirname(model_path))
        saver.save(sess, model_path, write_meta_graph=False)


    def load_models(sess, model_path):
        """ Restore models from the current directory with the name filename """

        dir_ = os.path.dirname(os.path.realpath(__file__))
        model_path = os.path.join(dir_, "model/" + model_path)
        saver.restore(sess, model_path)


    def print_weight(sess, target=False):
        """ debug """

        if not target:
            print(sess.run(w_1[0, 0:4]))
        else:
            print(sess.run(w_1_p[0, 0:4]))


    # --------------------------------------------------------------
    agents = []
    sesses = []
    for ind_agent in range(n_veh * n_neighbor):  # initialize agents
        # print("Initializing agent", ind_agent)
        agent = Agent(memory_entry_size=len(get_state(env)))
        agents.append(agent)

        sess = tf.Session(graph=g,config=my_config)
        sess.run(init)
        sesses.append(sess)

    agent_sarl = Agent(memory_entry_size=len(get_state(env)))
    sess_sarl = tf.Session(graph=g,config=my_config)
    sess_sarl.run(init)

    # -------------- Testing --------------
    if IS_TEST:
        print("\nRestoring the model...")

        for i in range(n_veh):
            for j in range(n_neighbor):
                model_path = label + '/agent_' + str(i * n_neighbor + j)
                load_models(sesses[i * n_neighbor + j], model_path)
        # restore the single-agent model
        model_path_single = label_sarl + '/agent'
        load_models(sess_sarl, model_path_single)

        V2I_rate_list = []
        V2V_success_list = []

        V2I_rate_list_rand = []
        V2V_success_list_rand = []

        V2I_rate_list_sarl = []
        V2V_success_list_sarl = []

        V2I_rate_list_dpra = []
        V2V_success_list_dpra = []

        rate_marl = np.zeros([n_episode_test, n_step_per_episode, n_veh, n_neighbor])
        rate_rand = np.zeros([n_episode_test, n_step_per_episode, n_veh, n_neighbor])
        demand_marl = env.demand_size * np.ones([n_episode_test, n_step_per_episode+1, n_veh, n_neighbor])
        demand_rand = env.demand_size * np.ones([n_episode_test, n_step_per_episode+1, n_veh, n_neighbor])

        action_all_testing_sarl = np.zeros([n_veh, n_neighbor, 2], dtype='int32')
        action_all_testing_dpra = np.zeros([n_veh, n_neighbor, 2], dtype='int32')
        for idx_episode in range(n_episode_test):
            
            if idx_episode%100 == 0:
                print(demand_size, '----- Episode', idx_episode, '-----')

            env.renew_positions()
            env.renew_neighbor()
            env.renew_channel()
            env.renew_channels_fastfading()

            env.demand = env.demand_size * np.ones((env.n_Veh, env.n_neighbor))
            env.individual_time_limit = env.time_slow * np.ones((env.n_Veh, env.n_neighbor))
            env.active_links = np.ones((env.n_Veh, env.n_neighbor), dtype='bool')

            env.demand_rand = env.demand_size * np.ones((env.n_Veh, env.n_neighbor))
            env.individual_time_limit_rand = env.time_slow * np.ones((env.n_Veh, env.n_neighbor))
            env.active_links_rand = np.ones((env.n_Veh, env.n_neighbor), dtype='bool')

            env.demand_sarl = env.demand_size * np.ones((env.n_Veh, env.n_neighbor))
            env.individual_time_limit_sarl = env.time_slow * np.ones((env.n_Veh, env.n_neighbor))
            env.active_links_sarl = np.ones((env.n_Veh, env.n_neighbor), dtype='bool')

            env.demand_dpra = env.demand_size * np.ones((env.n_Veh, env.n_neighbor))
            env.individual_time_limit_dpra = env.time_slow * np.ones((env.n_Veh, env.n_neighbor))
            env.active_links_dpra = np.ones((env.n_Veh, env.n_neighbor), dtype='bool')

            V2I_rate_per_episode = []
            V2I_rate_per_episode_rand = []
            V2I_rate_per_episode_sarl = []
            V2I_rate_per_episode_dpra = []

            for test_step in range(n_step_per_episode):
                # trained models
                action_all_testing = np.zeros([n_veh, n_neighbor, 2], dtype='int32')
                for i in range(n_veh):
                    for j in range(n_neighbor):
                        state_old = get_state(env, [i, j], 1, epsi_final)
                        action = predict(sesses[i*n_neighbor+j], state_old, epsi_final, True)
                        action_all_testing[i, j, 0] = action % n_RB  # chosen RB
                        action_all_testing[i, j, 1] = int(np.floor(action / n_RB))  # power level

                action_temp = action_all_testing.copy()
                V2I_rate, V2V_success, V2V_rate = env.act_for_testing(action_temp)
                V2I_rate_per_episode.append(np.sum(V2I_rate))  # sum V2I rate in bps

                rate_marl[idx_episode, test_step,:,:] = V2V_rate
                demand_marl[idx_episode, test_step+1,:,:] = env.demand

                # random baseline
                action_rand = np.zeros([n_veh, n_neighbor, 2], dtype='int32')
                action_rand[:, :, 0] = np.random.randint(0, n_RB, [n_veh, n_neighbor]) # band
                action_rand[:, :, 1] = np.random.randint(0, len(env.V2V_power_dB_List), [n_veh, n_neighbor]) # power
                V2I_rate_rand, V2V_success_rand, V2V_rate_rand = env.act_for_testing_rand(action_rand)
                V2I_rate_per_episode_rand.append(np.sum(V2I_rate_rand))  # sum V2I rate in bps

                rate_rand[idx_episode, test_step, :, :] = V2V_rate_rand
                demand_rand[idx_episode, test_step+1,:,:] = env.demand_rand

                # SARL
                remainder = test_step % (n_veh * n_neighbor)
                i = int(np.floor(remainder/n_neighbor))
                j = remainder % n_neighbor
                state_sarl = get_state_sarl(env, [i, j], 1, epsi_final)
                action = predict_sarl(sess_sarl, state_sarl)
                action_all_testing_sarl[i, j, 0] = action % n_RB  # chosen RB
                action_all_testing_sarl[i, j, 1] = int(np.floor(action / n_RB))  # power level
                action_temp_sarl = action_all_testing_sarl.copy()
                V2I_rate_sarl, V2V_success_sarl, V2V_rate_sarl = env.act_for_testing_sarl(action_temp_sarl)
                V2I_rate_per_episode_sarl.append(np.sum(V2I_rate_sarl))  # sum V2I rate in bps

                # # Used as V2I upper bound only, no V2V transmission
                # action_all_testing_dpra[i, j, 0] = 0  # chosen RB
                # action_all_testing_dpra[i, j, 1] = 3  # power level, fixed to -100 dBm, no V2V transmission
                #
                # action_temp_dpra = action_all_testing_dpra.copy()
                # V2I_rate_dpra, V2V_success_dpra, V2V_rate_dpra = env.act_for_testing_dpra(action_temp_dpra)
                # V2I_rate_per_episode_dpra.append(np.sum(V2I_rate_dpra))  # sum V2I rate in bps

                # # V2V Upper bound only, centralized maxV2V
                # The following applies to n_veh = 4 and n_neighbor = 1 only
                action_dpra = np.zeros([n_veh, n_neighbor, 2], dtype='int32')
                # n_power_level = len(env.V2V_power_dB_List)
                n_power_level = 1
                store_action = np.zeros([(n_RB*n_power_level)**4, 4])
                rate_all_dpra = []
                t = 0
                # for i in range(n_RB*len(env.V2V_power_dB_List)):\
                for i in range(n_RB):
                    for j in range(n_RB):
                        for m in range(n_RB):
                            for n in range(n_RB):
                                action_dpra[0, 0, 0] = i % n_RB
                                action_dpra[0, 0, 1] = int(np.floor(i / n_RB))  # power level

                                action_dpra[1, 0, 0] = j % n_RB
                                action_dpra[1, 0, 1] = int(np.floor(j / n_RB))  # power level

                                action_dpra[2, 0, 0] = m % n_RB
                                action_dpra[2, 0, 1] = int(np.floor(m / n_RB))  # power level

                                action_dpra[3, 0, 0] = n % n_RB
                                action_dpra[3, 0, 1] = int(np.floor(n / n_RB))  # power level

                                action_temp_findMax = action_dpra.copy()
                                V2I_rate_findMax, V2V_rate_findMax = env.Compute_Rate(action_temp_findMax)
                                rate_all_dpra.append(np.sum(V2V_rate_findMax))

                                store_action[t, :] = [i,j,m,n]
                                t += 1

                i = store_action[np.argmax(rate_all_dpra), 0]
                j = store_action[np.argmax(rate_all_dpra), 1]
                m = store_action[np.argmax(rate_all_dpra), 2]
                n = store_action[np.argmax(rate_all_dpra), 3]

                action_testing_dpra = np.zeros([n_veh, n_neighbor, 2], dtype='int32')

                action_testing_dpra[0, 0, 0] = i % n_RB
                action_testing_dpra[0, 0, 1] = int(np.floor(i / n_RB))  # power level

                action_testing_dpra[1, 0, 0] = j % n_RB
                action_testing_dpra[1, 0, 1] = int(np.floor(j / n_RB))  # power level

                action_testing_dpra[2, 0, 0] = m % n_RB
                action_testing_dpra[2, 0, 1] = int(np.floor(m / n_RB))  # power level

                action_testing_dpra[3, 0, 0] = n % n_RB
                action_testing_dpra[3, 0, 1] = int(np.floor(n / n_RB))  # power level

                V2I_rate_findMax, V2V_rate_findMax = env.Compute_Rate(action_testing_dpra)
                check_sum = np.sum(V2V_rate_findMax)

                action_temp_dpra = action_testing_dpra.copy()
                V2I_rate_dpra, V2V_success_dpra, V2V_rate_dpra = env.act_for_testing_dpra(action_temp_dpra)
                V2I_rate_per_episode_dpra.append(np.sum(V2I_rate_dpra))  # sum V2I rate in bps

                # update the environment and compute interference
                env.renew_channels_fastfading()
                env.Compute_Interference(action_temp)
                env.Compute_Interference_sarl(action_temp_sarl)
                env.Compute_Interference_dpra(action_temp_dpra)

                if test_step == n_step_per_episode - 1:
                    V2V_success_list.append(V2V_success)
                    V2V_success_list_rand.append(V2V_success_rand)
                    V2V_success_list_sarl.append(V2V_success_sarl)
                    V2V_success_list_dpra.append(V2V_success_dpra)

            V2I_rate_list.append(np.mean(V2I_rate_per_episode))
            V2I_rate_list_rand.append(np.mean(V2I_rate_per_episode_rand))
            V2I_rate_list_sarl.append(np.mean(V2I_rate_per_episode_sarl))
            V2I_rate_list_dpra.append(np.mean(V2I_rate_per_episode_dpra))

            # print('marl', round(np.average(V2I_rate_per_episode), 2), 'sarl', round(np.average(V2I_rate_per_episode_sarl), 2), 'rand', round(np.average(V2I_rate_per_episode_rand), 2), 'dpra', round(np.average(V2I_rate_per_episode_dpra), 2))
            # print('marl', V2V_success_list[idx_episode], 'sarl', V2V_success_list_sarl[idx_episode], 'rand', V2V_success_list_rand[idx_episode], 'dpra', V2V_success_list_dpra[idx_episode])



        return [
                demand_size,
                round(np.average(V2I_rate_list), 2), 
                round(np.average(V2V_success_list), 4), 
                round(np.average(V2I_rate_list_sarl), 2),
                round(np.average(V2V_success_list_sarl), 4),
                round(np.average(V2I_rate_list_rand), 2),
                round(np.average(V2V_success_list_rand), 4),
                round(np.average(V2I_rate_list_dpra), 2),
                round(np.average(V2V_success_list_dpra), 4)
            ]


        print('-------- marl -------------')
        print('n_veh:', n_veh, ', n_neighbor:', n_neighbor)
        print('Sum V2I rate:', round(np.average(V2I_rate_list), 2), 'Mbps')
        print('Pr(V2V success):', round(np.average(V2V_success_list), 4))
        #
        print('-------- sarl -------------')
        print('n_veh:', n_veh, ', n_neighbor:', n_neighbor)
        print('Sum V2I rate:', round(np.average(V2I_rate_list_sarl), 2), 'Mbps')
        print('Pr(V2V success):', round(np.average(V2V_success_list_sarl), 4))

        print('-------- random -------------')
        print('n_veh:', n_veh, ', n_neighbor:', n_neighbor)
        print('Sum V2I rate:', round(np.average(V2I_rate_list_rand), 2), 'Mbps')
        print('Pr(V2V success):', round(np.average(V2V_success_list_rand), 4))

        print('-------- DPRA -------------')
        print('n_veh:', n_veh, ', n_neighbor:', n_neighbor)
        print('Sum V2I rate:', round(np.average(V2I_rate_list_dpra), 2), 'Mbps')
        print('Pr(V2V success):', round(np.average(V2V_success_list_dpra), 4))

    # The name "DPRA" is used for historical reasons. Not really the case...

        with open("Data.txt", "a") as f:
            f.write('-------- marl, ' + label + '------\n')
            f.write('n_veh: ' + str(n_veh) + ', n_neighbor: ' + str(n_neighbor) + '\n')
            f.write('Sum V2I rate: ' + str(round(np.average(V2I_rate_list), 5)) + ' Mbps\n')
            f.write('Pr(V2V): ' + str(round(np.average(V2V_success_list), 5)) + '\n')
            f.write('-------- sarl, ' + label_sarl + '------\n')
            f.write('n_veh: ' + str(n_veh) + ', n_neighbor: ' + str(n_neighbor) + '\n')
            f.write('Sum V2I rate: ' + str(round(np.average(V2I_rate_list_sarl), 5)) + ' Mbps\n')
            f.write('Pr(V2V): ' + str(round(np.average(V2V_success_list_sarl), 5)) + '\n')
            f.write('--------random ------------\n')
            f.write('Rand Sum V2I rate: ' + str(round(np.average(V2I_rate_list_rand), 5)) + ' Mbps\n')
            f.write('Rand Pr(V2V): ' + str(round(np.average(V2V_success_list_rand), 5)) + '\n')
            f.write('--------DPRA ------------\n')
            f.write('Dpra Sum V2I rate: ' + str(round(np.average(V2I_rate_list_dpra), 5)) + ' Mbps\n')
            f.write('Dpra Pr(V2V): ' + str(round(np.average(V2V_success_list_dpra), 5)) + '\n')
            f.write('----Payload----\n')
            f.write(str(env.demand_size) + '\n')


        current_dir = os.path.dirname(os.path.realpath(__file__))
        marl_path = os.path.join(current_dir, "model/" + label + '/rate_marl.mat')
        scipy.io.savemat(marl_path, {'rate_marl': rate_marl})
        rand_path = os.path.join(current_dir, "model/" + label + '/rate_rand.mat')
        scipy.io.savemat(rand_path, {'rate_rand': rate_rand})

        demand_marl_path = os.path.join(current_dir, "model/" + label + '/demand_marl.mat')
        scipy.io.savemat(demand_marl_path, {'demand_marl': demand_marl})
        demand_rand_path = os.path.join(current_dir, "model/" + label + '/demand_rand.mat')
        scipy.io.savemat(demand_rand_path, {'demand_rand': demand_rand})


    # close sessions
    for sess in sesses:
        sess.close()
示例#25
0
文件: model.py 项目: johntzwei/bleurt
def run_finetuning(train_set,
                   dev_set,
                   scratch_dir,
                   train_tfrecord,
                   dev_tfrecord,
                   train_eval_fun=None,
                   use_tpu=False,
                   additional_train_params=None):
    """Main function to train and eval BLEURT."""

    logging.info("Initializing BLEURT training pipeline.")

    bleurt_params = checkpoint_lib.get_bleurt_params_from_flags_or_ckpt()
    max_seq_length = bleurt_params["max_seq_length"]
    bert_config_file = bleurt_params["bert_config_file"]
    init_checkpoint = bleurt_params["init_checkpoint"]

    logging.info("Creating input data pipeline.")
    logging.info("Train/Eval batch size: {}".format(str(FLAGS.batch_size)))

    # set up the training "reverse-dictionary" to capture year-lp
    logging.info("Starting to populate reverse group dictionary.")
    train_df = pd.read_json(train_set, lines=True)
    dev_df = pd.read_json(dev_set, lines=True)
    examples_df = pd.concat([train_df, dev_df])
    #group_hash_dict = {}
    for g in examples_df['group'].unique():
        h = hash_md5_16(g)
        year_lp = '|'.join(g.split('|')[1:])
        group_hash_dict[h] = year_lp

        # debugging
        logging.info(f"Example - {g}:{h}:{group_hash_dict[h]}\n")
    logging.info("Group hash dict populated!")

    #   == also, save the dictionary to a file for debugging purposes
    # with open(os.path.join(scratch_dir, 'group_hash_dict'), 'w') as f:
    #   f.write(str(group_hash_dict)+'\n')

    train_input_fn = input_fn_builder(train_tfrecord,
                                      seq_length=max_seq_length,
                                      is_training=True,
                                      batch_size=FLAGS.batch_size,
                                      drop_remainder=use_tpu)

    dev_input_fn = input_fn_builder(dev_tfrecord,
                                    seq_length=max_seq_length,
                                    is_training=False,
                                    batch_size=FLAGS.batch_size,
                                    drop_remainder=use_tpu)

    logging.info("Creating model.")
    bert_config = modeling.BertConfig.from_json_file(bert_config_file)
    num_train_steps = FLAGS.num_train_steps
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=use_tpu,
                                use_one_hot_embeddings=use_tpu,
                                n_hidden_layers=FLAGS.n_hidden_layers,
                                hidden_layers_width=FLAGS.hidden_layers_width,
                                dropout_rate=FLAGS.dropout_rate)

    logging.info("Creating TF Estimator.")
    exporters = [
        tf.estimator.BestExporter(
            "bleurt_best",
            serving_input_receiver_fn=_serving_input_fn_builder(
                max_seq_length),
            event_file_pattern="eval_default/*.tfevents.*",
            compare_fn=_model_comparator,
            exports_to_keep=1)
    ]
    tf.enable_resource_variables()

    logging.info("*** Entering the Training / Eval phase ***")
    if not additional_train_params:
        additional_train_params = {}
    train_eval_fun(model_fn=model_fn,
                   train_input_fn=train_input_fn,
                   eval_input_fn=dev_input_fn,
                   exporters=exporters,
                   **additional_train_params)