示例#1
0
 def __init__(self,
              *,
              alpha=0.003,
              gamma_kwargs=DEFAULT_KWARGS["gamma_kwargs"],
              **kwargs):
     super(Reinforce, self).__init__(**kwargs)
     self.alpha = alpha
     self.gamma_kwargs = gamma_kwargs
     self.gamma_scheduler = get_scheduler(gamma_kwargs)
     self.schedulers += (self.gamma_scheduler, )
     self.targets_ph = tf_v1.placeholder("float32",
                                         shape=(None, 1),
                                         name="target_ph")
     self.states_ph = tf_v1.placeholder("float32",
                                        shape=[None, *self.obs_shape],
                                        name="states_ph")
     if self.policy_type == "DiscretePolicy":
         self.actions_ph = tf_v1.placeholder("int32",
                                             shape=(None, ),
                                             name=f"actions_ph")
     else:
         self.actions_ph = tf_v1.placeholder("float32",
                                             shape=(None,
                                                    *self.action_shape),
                                             name=f"actions_ph")
     self.field_names = ("state", "action", "reward"
                         )  # Used to sample out the trajectory
     self.scalar_summaries += ("gamma", )
示例#2
0
 def __init__(self,
              *,
              lr_kwargs=DEFAULT_KWARGS["lr_kwargs"],
              layers=None,
              preprocessors=None,
              **kwargs):
     super(DiscretePolicy, self).__init__(**kwargs)
     self.lr_kwargs = lr_kwargs
     self.lr_scheduler = get_scheduler(lr_kwargs)
     self.schedulers += (self.lr_scheduler, )
     # Placeholders
     self.lr_ph = tf_v1.placeholder("float32",
                                    shape=[],
                                    name=f"{self.scope}/lr_ph")
     if layers is None:
         layers = DEFAULT_LAYERS
     layers[-1]["units"] = self.action_size
     self.layers = layers
     self.preprocessors = preprocessors
     self.model = NeuralNetwork(self.scope,
                                input_shapes=[self.obs_shape],
                                layers=self.layers,
                                preprocessors=self.preprocessors)
     # Loss parameters
     self._loss = None
     self.train_op = None
     # Summary parameters
     self.scalar_summaries_tf += ("loss", )
     self.scalar_summaries += ("lr", )
示例#3
0
    def __init__(self, masker, task_lst, vocabs, optimizer, args):
        """
        :param model: 模型
        :param description: 模型描述
        :param task_lst: 任务列表
        :param optimizer: 优化器
        :param log_path: TensorboardX存储文件夹
        :param save_path: 模型存储位置
        :param accumulation_steps: 累积梯度
        :param print_every: 评估间隔
        """
        self.logger = fastNLP.logger

        self.masker = masker
        self.task_lst = task_lst
        self.save_path = args.save_path
        self.description = args.exp_name
        self.optim = optimizer
        self.vocabs = vocabs
        n_steps = (int(
            len(task_lst) * len(task_lst[0].train_set) * 100 / args.batch_size)
                   + 1)
        args.n_steps = n_steps
        self.epoch_scheduler = get_scheduler(args, self.optim)
        self.scheduler = None
        self.logger.info('Using scheduler {}'.format(self.scheduler))
        self.accumulation_steps = args.accumulation_steps
        self.print_every = args.print_every
        self.batch_size = args.batch_size
        self.save_ep = args.save_ep

        include_tasks = args.tasks
        if include_tasks is None:
            self.empty_tasks = set()
        else:
            self.empty_tasks = set(range(len(
                self.task_lst))) - set(include_tasks)

        self.steps = 0
        self.best_acc = 0
        self.best_epoch = 0

        self.metrics = []
        for t in task_lst:
            if has_acc(t.task_name):
                self.metrics.append(AccuracyMetric())
            else:
                self.metrics.append(
                    SpanFPreRecMetric(
                        self.vocabs[t.task_name],
                        encoding_type="bioes"
                        if t.task_name == "ner" else "bio",
                    ))
        # self.logger.info(self.metrics)

        tb_path = "eval" if args.evaluate else "train"
        self.summary_writer = SummaryWriter(os.path.join(
            args.tb_path, tb_path))
示例#4
0
 def __init__(self,
              lr_kwargs=DEFAULT_KWARGS["lr_kwargs"],
              gamma_kwargs=DEFAULT_KWARGS["gamma_kwargs"],
              reward_scale=1.0,
              **kwargs):
     super(Sarsa, self).__init__(**kwargs)
     self.lr_kwargs = lr_kwargs
     self.gamma_kwargs = gamma_kwargs
     self.lr_scheduler = get_scheduler(lr_kwargs)
     self.gamma_scheduler = get_scheduler(gamma_kwargs)
     self.schedulers += (self.lr_scheduler, self.gamma_scheduler)
     self.reward_scale = reward_scale
     self.field_names = ("state", "action", "reward", "next_state", "done")
     self.q_net = QNetwork(input_shapes=[self.obs_shape],
                           output_size=self.action_size,
                           layers=self._layers,
                           preprocessors=self.preprocessors,
                           scope="q_network")
     self.target_q = self.q_net
     # Placeholders
     self.lr_ph = tf_v1.placeholder("float32", shape=[], name="lr_ph")
     self.gamma_ph = tf_v1.placeholder("float32", shape=[], name="gamma_ph")
     self.states_ph = tf_v1.placeholder("float32",
                                        shape=[None, *self.obs_shape],
                                        name="states_ph")
     self.actions_ph = tf_v1.placeholder("int32",
                                         shape=[None],
                                         name="actions_ph")
     self.rewards_ph = tf_v1.placeholder("float32",
                                         shape=[None],
                                         name="rewards_ph")
     self.next_states_ph = tf_v1.placeholder("float32",
                                             shape=[None, *self.obs_shape],
                                             name="next_states_ph")
     self.dones_ph = tf_v1.placeholder("float32",
                                       shape=[None],
                                       name="dones_ph")
     self.next_actions_ph = tf_v1.placeholder("int32",
                                              shape=[None],
                                              name="next_actions_ph")
     # Summary ops
     self.summary_init_objects += (self.q_net, )
     self.scalar_summaries += ("gamma", "lr")
示例#5
0
 def __init__(self, *, reward_scale=1.0, gamma_kwargs=DEFAULT_KWARGS["gamma_kwargs"], alpha_lr_kwargs=DEFAULT_KWARGS["alpha_lr_kwargs"], 
     q_lr_kwargs=DEFAULT_KWARGS["q_lr_kwargs"], tau=5e-3, update_interval=1, num_q_nets=2, auto_ent=True, target_entropy="auto", 
     init_log_alpha=0.0, **kwargs):
     super(SAC, self).__init__(**kwargs)
     assert num_q_nets > 1, f"Minimum number of Q network is 2 but given '{num_q_nets}'"
     self.reward_scale = reward_scale
     self.q_lr_kwargs = q_lr_kwargs
     self.gamma_kwargs = gamma_kwargs
     self.alpha_lr_kwargs = alpha_lr_kwargs
     self.q_lr_scheduler = get_scheduler(q_lr_kwargs)
     self.gamma_scheduler = get_scheduler(gamma_kwargs)
     self.alpha_lr_scheduler = get_scheduler(alpha_lr_kwargs)
     self.schedulers += (self.q_lr_scheduler, self.gamma_scheduler, self.alpha_lr_scheduler)
     self.tau = tau
     self.update_interval = update_interval
     self.num_q_nets = num_q_nets
     self.auto_ent = auto_ent
     self.init_log_alpha = init_log_alpha
     if self.auto_ent:
         assert target_entropy == "auto" or isinstance(target_entropy, (int, float))
         self.target_entropy = -float(self.action_size) if target_entropy == "auto" else target_entropy
         self.log_alpha_tf = tf_v1.get_variable('log_alpha', dtype="float32", initializer=float(init_log_alpha))
     else:
         self.target_entropy = None
         self.log_alpha_tf = tf_v1.constant(init_log_alpha, dtype="float32")
     self.alpha_tf = tf_v1.exp(self.log_alpha_tf)
     self.log_alpha = 0.0
     self.alpha_loss_tf = None
     self.alpha_train_op = None
     self.critics = []
     self.targets = []
     # Placeholders
     self.q_lr_ph = tf_v1.placeholder("float32", shape=[], name="q_lr_ph")
     self.alpha_lr_ph = tf_v1.placeholder("float32", shape=[], name="alpha_lr_ph")
     self.gamma_ph = tf_v1.placeholder("float32", shape=[], name="gamma_ph")
     self.states_ph = tf_v1.placeholder("float32", shape=[None, *self.obs_shape], name="states_ph")
     self.actions_ph = tf_v1.placeholder("float32", shape=[None, self.action_size], name="actions_ph")
     self.rewards_ph = tf_v1.placeholder("float32", shape=[None], name="rewards_ph")
     self.next_states_ph = tf_v1.placeholder("float32", shape=[None, *self.obs_shape], name="next_states_ph")
     self.dones_ph = tf_v1.placeholder("float32", shape=[None], name="dones_ph")
     # Summary parameters
     self.alpha_loss = None
     self.scalar_summaries += ("alpha_loss", "log_alpha", "alpha", "q_lr", "alpha_lr")
示例#6
0
 def __init__(self,
              *,
              tau=0.003,
              update_interval=10,
              lr_kwargs=DEFAULT_KWARGS["lr_kwargs"],
              gamma_kwargs=DEFAULT_KWARGS["gamma_kwargs"],
              sigma_kwargs=DEFAULT_KWARGS["sigma_kwargs"],
              reward_scale=1.0,
              **kwargs):
     super(DDPG, self).__init__(**kwargs)
     self.tau = tau
     self.reward_scale = reward_scale
     self.update_interval = update_interval
     self.lr_kwargs = lr_kwargs
     self.gamma_kwargs = gamma_kwargs
     self.sigma_kwargs = sigma_kwargs
     self.lr_scheduler = get_scheduler(lr_kwargs)
     self.gamma_scheduler = get_scheduler(gamma_kwargs)
     self.sigma_scheduler = get_scheduler(sigma_kwargs)
     self.schedulers += (self.lr_scheduler, self.gamma_scheduler,
                         self.sigma_scheduler)
     self.critic = None
     self.target = None
     # Placeholders
     self.lr_ph = tf_v1.placeholder("float32", shape=[], name="lr_ph")
     self.gamma_ph = tf_v1.placeholder("float32", shape=[], name="gamma_ph")
     self.states_ph = tf_v1.placeholder("float32",
                                        shape=[None, *self.obs_shape],
                                        name="states_ph")
     self.actions_ph = tf_v1.placeholder("float32",
                                         shape=[None, self.action_size],
                                         name="actions_ph")
     self.rewards_ph = tf_v1.placeholder("float32",
                                         shape=[None],
                                         name="rewards_ph")
     self.next_states_ph = tf_v1.placeholder("float32",
                                             shape=[None, *self.obs_shape],
                                             name="next_states_ph")
     self.dones_ph = tf_v1.placeholder("float32",
                                       shape=[None],
                                       name="dones_ph")
     # Summary parameters
     self.scalar_summaries += ("gamma", "sigma", "lr")
示例#7
0
 def __init__(self, *, lr_kwargs=DEFAULT_KWARGS["lr_kwargs"], **kwargs):
     super(A2C, self).__init__(**kwargs)
     self.critic = QNetwork(input_shapes=[self.obs_shape], output_size=1, layers=self._layers, 
         preprocessors=self.preprocessors, scope="critic")
     self.lr_kwargs = lr_kwargs
     self.lr_scheduler = get_scheduler(lr_kwargs)
     self.schedulers += (self.lr_scheduler, )
     # Placeholders
     self.lr_ph = tf_v1.placeholder("float32", shape=(), name="lr_ph")
     self.summary_init_objects += (self.critic, )
     self.scalar_summaries += ("lr", )
示例#8
0
 def __init__(self, *, eps_kwargs=DEFAULT_KWARGS["eps_kwargs"], explore_ratio=0.60, explore_exploit_interval=20, **kwargs):
     super(GreedyEpsilonPolicy, self).__init__(**kwargs)
     self.eps_kwargs = eps_kwargs
     self.eps_scheduler = get_scheduler(eps_kwargs)
     self.schedulers += (self.eps_scheduler, )
     self.scalar_summaries += ("eps", )
     ######################### Experimental feature ##########################
     """
     Idea:
     Instead of choosing actions greedily with some random actions based on the epsilon value, 
     we introduce some epoch intervals periodically where the policy operates deterministically
     by temporarily setting the epsilon value to 0. 
     """
     # TODO: Implement it as a scheduler in all policies
     self.explore_ratio = explore_ratio
     self.explore_exploit_interval = explore_exploit_interval
     self.explore_interval = self.explore_ratio*self.explore_exploit_interval
     self.eps_mask = 1
示例#9
0
class OffPolicyAlgorithm(BaseAlgorithm):
    PARAMETERS = BaseAlgorithm.PARAMETERS.union({
        "batch_size_kwargs", "num_init_exp_samples", "max_init_exp_timestep",
        "buffer_size"
    })

    def __init__(self,
                 *,
                 batch_size_kwargs=DEFAULT_KWARGS["batch_size_kwargs"],
                 num_init_exp_samples=10000,
                 max_init_exp_timestep="auto",
                 buffer_size=1_000_000,
                 **kwargs):
        super(OffPolicyAlgorithm, self).__init__(**kwargs)
        self.buffer_size = buffer_size
        self.replay_buffer = ReplayBuffer(size=buffer_size)
        self.batch_size_kwargs = batch_size_kwargs
        self.batch_size_schedhuler = get_scheduler(batch_size_kwargs)
        self.schedulers += (self.batch_size_schedhuler, )
        self.num_init_exp_samples = None if num_init_exp_samples is None else int(
            num_init_exp_samples)
        self.max_init_exp_timestep = self.max_episode_steps if max_init_exp_timestep == "auto" else max_init_exp_timestep
        self.scalar_summaries += ("replay_buffer_size", "batch_size")
def pretrain(cfg):
    print(cfg.pretty())
    pretrain_config_validator(cfg)
    fix_seed(cfg.seed)

    controller = load_pretrained_weights(
        NAO(**cfg.controller).to(0), cfg.pretrained_model_path)
    models = {'trunk': controller}
    dataset = get_dataset(seed=cfg.seed, **cfg.dataset)
    optimizers = {
        'trunk_optimizer':
        get_optimizer(parameters=models['trunk'].parameters(), **cfg.optimizer)
    }
    lr_schedulers = {
        'trunk_scheduler_by_iteration':
        get_scheduler(optimizer=optimizers['trunk_optimizer'], **cfg.scheduler)
    }
    loss_funcs = {
        'reconstruction_loss': torch.nn.NLLLoss(),
        'metric_loss': get_loss(**cfg.loss)
    }
    mining_funcs = {"tuple_miner": get_miner(**cfg.miner)}
    visualizers = [umap.UMAP(**params) for params in cfg.visualizers]
    end_of_iteration_hook = TensorboardHook(visualizers).end_of_iteration_hook
    end_of_epoch_hook = ModelSaverHook().end_of_epoch_hook
    get_trainer(
        models=models,
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        loss_funcs=loss_funcs,
        mining_funcs=mining_funcs,
        dataset=dataset,
        end_of_iteration_hook=end_of_iteration_hook,
        end_of_epoch_hook=end_of_epoch_hook,
        **cfg.trainer,
    ).train()
示例#11
0
def train(cfg):
    print(cfg.pretty())
    train_config_validator(cfg)
    fix_seed(cfg.seed)

    writer = SummaryWriter(log_dir='logs')
    controller = load_pretrained_weights(
        NAO(**cfg.controller).to(0), cfg.pretrained_model_path)
    dataset = get_dataset(writer=writer, seed=cfg.seed, **cfg.dataset)
    optimizer = get_optimizer(parameters=_get_target_parameters(
        controller, cfg.freeze_encoder_decoder),
                              **cfg.optimizer)
    lr_scheduler = get_scheduler(optimizer=optimizer, **cfg.scheduler)
    end_of_epoch_hook = ModelSaverHook().end_of_epoch_hook

    get_trainer(
        controller=controller,
        dataset=dataset,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        writer=writer,
        end_of_epoch_hook=end_of_epoch_hook,
        **cfg.trainer,
    ).train()
示例#12
0
 def __init__(self,
              *,
              lr_kwargs,
              layers=None,
              preprocessors=None,
              learn_std=True,
              std_value=0.1,
              mu_range=None,
              log_std_range=None,
              **kwargs):
     super(GaussianPolicy, self).__init__(**kwargs)
     self.std_value = std_value
     self.lr_kwargs = lr_kwargs
     self.lr_scheduler = get_scheduler(lr_kwargs)
     self.schedulers += (self.lr_scheduler, )
     self.learn_std = learn_std
     self.mu_range = (-2.0, 2.0) if mu_range is None else mu_range
     self.log_std_range = (-10,
                           0.3) if log_std_range is None else log_std_range
     assert not self.discrete_action_space, "Action space for the Gaussian Policy must be continuous!"
     # Placeholders
     self.lr_ph = tf_v1.placeholder("float32", shape=(), name="lr_ph")
     # Create model
     if layers is None:
         layers = DEFAULT_LAYERS
     self.layers = layers
     self.preprocessors = preprocessors
     self.base_model = NeuralNetwork(self.scope,
                                     input_shapes=[self.obs_shape],
                                     layers=self.layers,
                                     preprocessors=self.preprocessors)
     self.mu = tf_v1.keras.layers.Dense(self.action_size, activation=None)(
         self.base_model.output)
     self.mu = tf_v1.clip_by_value(self.mu, *self.mu_range)
     if self.learn_std:
         self.log_std = tf_v1.keras.layers.Dense(self.action_size)(
             self.base_model.output)
         self.log_std = tf_v1.clip_by_value(self.log_std,
                                            *self.log_std_range)
         self.std = tf_v1.exp(self.log_std)
         self.raw_action_model = tf_v1.keras.Model(
             inputs=[self.base_model.input], outputs=[self.mu, self.std])
     else:
         self.std = tf_v1.constant([std_value] * self.action_size,
                                   dtype="float32")
         self.raw_action_model = tf_v1.keras.Model(
             inputs=[self.base_model.input], outputs=[self.mu])
     batch_size = tf_v1.shape(self.mu)[0]
     norm_dist = tfd.Normal(loc=tf_v1.zeros(self.action_size),
                            scale=tf_v1.ones(self.action_size))
     z = norm_dist.sample(batch_size)
     raw_actions = self.mu + z * self.std  # Reparameterization trick
     self.actions = tf_v1.tanh(raw_actions)
     self.deterministic_actions = tf_v1.tanh(self.mu)
     self.model = tf_v1.keras.Model(inputs=[self.base_model.input],
                                    outputs=[self.actions])
     # Loss parameters
     self._loss = None
     self.train_op = None
     # Summary parameters
     self.scalar_summaries += ("lr", )
     self.scalar_summaries_tf += ("loss", "mean_log_actions", "min_mu",
                                  "mean_mu", "max_mu", "min_std",
                                  "mean_std", "max_std")
     self.histogram_summaries_tf += ("actions", "mu", "std", "log_actions")
 def configure_optimizers(self):
     self.optimizer = get_optimizer(self.hparams, self.models)
     scheduler = get_scheduler(self.hparams, self.optimizer)
     return [self.optimizer], [scheduler]