示例#1
0
 def __init__(self,
              input_dim=None,
              hidden_dim=200,
              output_dim=100,
              batch_size=1,
              p_dropout=0.2,
              num_layers=2):
     super(LSTMModel, self).__init__()
     self.hidden_dim = hidden_dim
     self.batch_size = batch_size
     self.bidirectional = True
     self.num_layers = num_layers
     self.bidir_mult = 2 if self.bidirectional else 1
     self.dimension_mult = self.num_layers * self.bidir_mult
     # The LSTM takes sequences of spectrograms/MFCCs as inputs, and outputs hidden states
     # with dimensionality hidden_dim.
     self.lstm = to_gpu(
         nn.LSTM(input_dim,
                 hidden_dim,
                 bidirectional=self.bidirectional,
                 num_layers=self.num_layers,
                 dropout=p_dropout))
     self.dropout_1 = nn.Dropout(p_dropout)
     # The linear layer that maps from hidden state space to tag space
     self.hidden2tag = to_gpu(
         nn.Linear(hidden_dim * self.bidir_mult, output_dim))
     self.reset_hidden()
 def predict(self, obs, action=None, remember_step=True):
     #obs_orig = obs
     try:
         # past actions come in as numpy
         obs = to_gpu(torch.from_numpy(obs))
     except:
         pass  # None actions for the first step
     phi = self.network.phi_body(obs, remember_step=remember_step)
     phi_a = self.network.actor_body(phi, remember_step=remember_step)
     phi_v = self.network.critic_body(phi, remember_step=remember_step)
     logits = self.network.fc_action(phi_a)
     v = self.network.fc_critic(phi_v)
     if self.mask_gen is not None:
         if not remember_step:
             # mask_gen is stateful, so need to copy it if don't want to remember the step
             tmp_mask_gen = copy.deepcopy(self.mask_gen)
             mask = tmp_mask_gen(obs)
         else:
             mask = self.mask_gen(obs)
         logits = logits - to_gpu(torch.Tensor(1e6 * (1 - mask)))
     dist = torch.distributions.Categorical(logits=logits)
     if action is None:
         action = dist.sample().to(torch.int64)
     if 75 in action:
         pass
     log_prob = dist.log_prob(action).unsqueeze(-1)
     return action, log_prob, dist.entropy().unsqueeze(-1), v
示例#3
0
 def __getitem__(self, idx):
     if idx < self.batches:
         x = to_gpu(torch.randn(self.x_shape))
         y = to_gpu(self.model(Variable(x)).data)
         return (x, y)
     else:
         raise StopIteration()
示例#4
0
 def __init__(self, model, output_dim, drop_rate=0.2):
     super().__init__()
     self.z_size = output_dim
     self.model = model
     #self.dropout = nn.Dropout(drop_rate)
     self.fc_mu = to_gpu(nn.Linear(self.model.output_shape[-1], output_dim))
     self.fc_var = to_gpu(nn.Linear(self.model.output_shape[-1], output_dim))
     self.fc_skew = to_gpu(nn.Linear(self.model.output_shape[-1], output_dim))
     self.output_shape = [None, output_dim]
示例#5
0
 def scoring_fun(x):
     if isinstance(x, tuple) or isinstance(x, list):
         x = {'actions': x[0], 'smiles': x[1]}
     out_x = to_gpu(x['actions'])
     end_of_slice = randint(3, out_x.size()[1])
     #TODO inject random slicing back
     out_x = out_x[:, 0:end_of_slice]
     smiles = x['smiles']
     scores = to_gpu(
         torch.from_numpy(property_scorer(smiles).astype(np.float32)))
     return out_x, scores
示例#6
0
def to_pytorch(x):
    if 'ndarray' in str(type(x)):
        if 'bool' in str(type(x[0])):
            x = np.array([1.0 if xi else 0.0 for xi in x]).astype(np.float32)
            return to_gpu(torch.from_numpy(x)).to(torch.float32)
        else:
            if 'float' in str(type(x[0])):
                return to_gpu(torch.from_numpy(x)).to(torch.float32)
            else:
                return to_gpu(torch.from_numpy(x))
    else:
        return x
示例#7
0
    def __init__(
            self,
            num_actions,
            max_seq_len,
            n_layers=6,  #6
            n_head=6,  #8,
            d_k=16,  #64,
            d_v=16,  #64,
            d_model=128,  #512,
            d_inner_hid=256,  #1024,
            drop_rate=0.1,
            enc_output_size=76,
            batch_size=None):

        super().__init__()  # TODO: properly integrate this with the parent
        n_position = max_seq_len + 1  # Why the +1? Because of the dummy prev action for first step
        self.max_seq_len = max_seq_len
        self.d_model = d_model
        self.enc_output_size = enc_output_size
        self.batch_size = batch_size

        self.position_enc = nn.Embedding(n_position,
                                         d_model,
                                         padding_idx=Constants.PAD)
        self.position_enc.weight.data = position_encoding_init(
            n_position, d_model)
        self.position_enc.weight.requires_grad = False  # will this suffice to make them not trainable?

        # TODO: do we want relu after embedding? Probably not; make consistent
        self.embedder = nn.Embedding(
            num_actions, d_model, padding_idx=num_actions -
            1)  # Assume the padding index is the max possible?
        self.dropout = nn.Dropout(drop_rate)

        self.layer_stack = nn.ModuleList([
            DecoderLayerStep(d_model,
                             d_inner_hid,
                             n_head,
                             d_k,
                             d_v,
                             dropout=drop_rate) for _ in range(n_layers)
        ])
        # make sure encoder output has correct dim
        if enc_output_size != self.d_model:
            self.enc_output_transform = to_gpu(
                nn.Linear(enc_output_size, self.d_model))
        else:
            self.enc_output_transform = lambda x: x
        self.dec_output_transform = to_gpu(nn.Linear(self.d_model,
                                                     num_actions))
        self.all_actions = None
        self.output_shape = [None, self.max_seq_len, num_actions]
示例#8
0
 def init_hidden(self, batch_size=None):
     if batch_size is None:
         batch_size = self.batch_size
     # Before we've done anything, we dont have any hidden state.
     # Refer to the Pytorch documentation to see exactly
     # why they have this dimensionality.
     # The axes semantics are (num_layers, minibatch_size, hidden_dim)
     return (autograd.Variable(
         to_gpu(
             torch.zeros(self.dimension_mult, batch_size,
                         self.hidden_dim))),
             autograd.Variable(
                 to_gpu(
                     torch.zeros(self.dimension_mult, batch_size,
                                 self.hidden_dim))))
示例#9
0
def a2c_sequence(name = 'a2c_sequence', task=None, body=None):
    config = Config()
    config.num_workers = batch_size # same thing as batch size
    config.task_fn = lambda: task
    config.optimizer_fn = lambda params: torch.optim.RMSprop(params, lr=0.0007)
    config.network_fn = lambda state_dim, action_dim: \
                            to_gpu(CategoricalActorCriticNet(state_dim,
                                                      action_dim,
                                                      body,
                                                      gpu=0,
                                                      mask_gen=mask_gen))
    #config.policy_fn = SamplePolicy # not used
    config.state_normalizer = lambda x: x
    config.reward_normalizer = lambda x: x
    config.discount = 0.99
    config.use_gae = False #TODO: for now, MUST be false as our RNN network isn't com
    config.gae_tau = 0.97
    config.entropy_weight = 0.01
    config.rollout_length = 5
    config.gradient_clip = 0.5
    config.logger = logging.getLogger()#get_logger(file_name='deep_rl_a2c', skip=True)
    config.logger.info('test')
    config.iteration_log_interval
    config.max_steps = 100000
    dash_name = 'DeepRL'
    visdom = Dashboard(dash_name)
    run_iterations(MyA2CAgent(config), visdom, invalid_value=invalid_value)
    def forward(self, src_seq, src_pos=None):
        '''
        Embed the source sequence, with optional position specification
        :param src_seq: batch x num_steps long or batch x num_steps x src_vocab one-hot or float
        :param src_pos: batch x num_steps long, or None
        :return:
        '''
        if isinstance(src_seq, tuple):
            predefined_emb = src_seq[1]
            src_seq = src_seq[0]

        src_seq, src_seq_for_masking = self.normalizer(src_seq)
        if src_seq.dtype == torch.int64:  # indices of discrete actions
            enc_input = self.src_word_emb(src_seq)
            if self.include_predefined:
                enc_input = torch.cat([enc_input, predefined_emb], dim=2)
                enc_input = self.transform_to_d_model(enc_input)
        elif src_seq.dtype == torch.float32 and len(src_seq.size()) == 3:  # if the input is continuous
            enc_input = src_seq
            if self.include_predefined:
                enc_input = torch.cat([enc_input, predefined_emb], dim=2)
            enc_input = self.transform_to_d_model(enc_input)

        # Position Encoding addition
        if self.encode_position:
            if src_pos is None:
                batch_size = src_seq.size()[0]
                seq_len = src_seq.size()[1]
                src_pos = to_gpu(torch.arange(seq_len).unsqueeze(0).expand(batch_size,seq_len).type(LongTensor))

            enc_input += self.position_enc(src_pos)
        return enc_input, src_seq_for_masking
示例#11
0
    def forward(self, last_action=None, last_action_pos=None):
        '''
        One step of the RNN model
        :param enc_output: batch x z_size, so don't support sequences
        :param last_action: batch of ints, all equaling None for first step
        :param last_action_pos: ignored, used by the attention decoder, here just to get the signature right
        :return:
        '''

        if self.hidden is None:  # first step after reset
            # need to do it here as batch size might be different for each sequence
            self.hidden = self.init_hidden(batch_size=self.batch_size)
            self.one_hot_action = to_gpu(
                torch.zeros(self.batch_size, self.output_feature_size))

        encoded = self.encode(self.enc_output, last_action)

        # copy the latent state to length of sequence, instead of sampling inputs
        embedded = F.relu(self.fc_input(self.batch_norm(encoded))) \
            .view(self.batch_size, 1, self.hidden_n) \
            .repeat(1, self.max_seq_length, 1)
        embedded = self.dropout_1(embedded)
        # run the GRU on it
        out_3, self.hidden = self.gru_1(embedded, self.hidden)
        # tmp has dim (batch_size*seq_len)xhidden_n, so we can apply the linear transform to it
        tmp = self.dropout_2(out_3.contiguous().view(-1, self.hidden_n))
        out = self.fc_out(tmp).view(self.batch_size, self.max_seq_length,
                                    self.output_feature_size)

        # just return the logits
        #self.hidden = None
        return out  #, hidden_1
示例#12
0
    def encode(self, x):
        '''

        :param x: a numpy array batch x seq x feature
        :return:
        '''
        out, hidden = self.forward(to_gpu(Variable(FloatTensor(x))))
        return out.data.cpu().numpy()
示例#13
0
 def __init__(self, model):
     '''
     Wrapper for a continuous decoder that doesn't look at last action chosen, eg simple RNN
     :param model:
     '''
     super().__init__()
     self.model = to_gpu(model)
     self.model.eval()
示例#14
0
def to_variable(x):
    if type(x) == tuple:
        return tuple([to_variable(xi) for xi in x])
    elif 'ndarray' in str(type(x)):
        return to_gpu(torch.from_numpy(x))
    elif 'Variable' not in str(type(x)):
        return Variable(x)
    else:
        return x
 def decode(self, z):
     '''
     Converts a batch of latent space vectors into a batch of action ints
     :param z: batch x z_size
     :return: smiles: list(str) of len batch, actions: LongTensor batch_size x max_seq_len
     '''
     actions, logits = self.decoder(to_gpu(z))
     smiles = self.decode_from_actions(actions)
     return smiles, actions
示例#16
0
 def forward(self, last_action, *args, **kwargs):
     '''
     One step of the RNN model
     :param last_action: batch of ints, all equaling None for first step
     :return: batch x feature_len zeros
     '''
     self.register_step()
     if self.output_shape[0] is None:
         self.output_shape[0] = len(last_action)
     return self.dummy_fc(to_gpu(torch.zeros(*self.output_shape)))
        def gen():
            iter1 = iter(self.main_loader)
            iter2 = iter(self.valid_ds)
            iter3 = iter(self.invalid_ds)
            while True:
                # make sure we iterate fully over the first dataset, others will likely be shorter
                x1 = next(iter1).float()
                try:
                    x2 = next(iter2).float()
                except StopIteration:
                    iter2 = iter(self.valid_ds)
                    x2 = next(iter2).float()
                try:
                    x3 = next(iter3).float()
                except StopIteration:
                    iter3 = iter(self.valid_ds)
                    x3 = next(iter3).float()

                x = to_gpu(torch.cat([x1, x2, x3], dim=0))
                y = to_gpu(torch.zeros([len(x), 1]))
                y[:(len(x1) + len(x2))] = 1
                yield x, y
 def __init__(self, config):
     '''
     A new agent gets spawned for every new sequence, reusing the same network
     So this is where we need to init_encoder_output for the network,
     so it knows a new sequence has started
     :param config: the DeepRL config object
     '''
     super().__init__(config)
     try:
         self.dummy_enc_output = to_gpu(torch.zeros(config.num_workers,5)) # 5 just because :)
         self.network.network.phi_body.model.init_encoder_output(self.dummy_enc_output)
     except:
         pass
示例#19
0
    def forward(self, last_action=None, last_action_pos=None, remember_step=True):
        '''
        One step of the RNN model
        :param enc_output: batch x z_size, so don't support sequences
        :param last_action: batch of ints, all equaling None for first step
        :param last_action_pos: ignored, used by the attention decoder, here just to get the signature right
        :return: batch x steps x feature_len
        '''
        # check we don't exceed max sequence length
        # TODO: use parent's method instead
        if self.n == self.max_seq_length:
            raise StopIteration()

        if remember_step:
            self.n += self.steps

        if self.enc_output is None:
            self.batch_size = len(last_action)
            self.enc_output = torch.zeros(self.batch_size, self.z_size, device=device)

        if self.hidden is None: # first step after reset
            # need to do it here as batch size might be different for each sequence
            self.hidden = self.init_hidden(batch_size=self.batch_size)
            self.one_hot_action = to_gpu(torch.zeros(self.batch_size, self.output_feature_size))

        encoded = self.encode(self.enc_output, last_action)

        # copy the latent state to length of sequence, instead of sampling inputs
        embedded = F.relu(self.fc_input(
            encoded
            #self.layer_norm(encoded) \
            #self.batch_norm(encoded) # we don't want to batch norm one-hot encoded actions!
                )) \
            .view(self.batch_size, 1, self.hidden_n) \
            .repeat(1, self.steps, 1)
        embedded =self.dropout_1(embedded)
        # run the GRU on i
        out_3, new_hidden = self.gru_1(embedded, self.hidden)
        if remember_step:
            self.hidden = new_hidden

        # don't need the linear mapping below as that'll be done by the relevant head
        # tmp has dim (batch_size*seq_len)xhidden_n, so we can apply the linear transform to it
        tmp = self.dropout_2(out_3.contiguous().view(-1, self.hidden_n))
        out = self.fc_out(tmp).view(self.batch_size,
                                    self.steps,
                                    self.output_feature_size)

        return out
示例#20
0
 def __init__(self,
              encoder=None,
              decoder=None,
              sample_z=True,
              epsilon_std=0.01,
              z_size=None,
              return_mu_log_var=True):
     '''
     Initialize the autoencoder
     :param encoder: A model mapping batches of one-hot sequences (batch x seq x num_actions) to batches of logits
     :param decoder: Model mapping latent z (batch x z_size) to  batches of one-hot sequences, and corresponding logits
     :param sample_z: Whether to sample z = N(mu, std) or just take z=mu
     :param epsilon_std: Scaling factor for samling, low values help convergence
     https://github.com/mkusner/grammarVAE/issues/7
     '''
     super(VariationalAutoEncoderHead, self).__init__()
     self.sample_z = sample_z
     self.encoder = to_gpu(encoder)
     self.decoder = to_gpu(decoder)
     self.epsilon_std = epsilon_std
     # TODO: should I be using the multipleOutputHead instead?
     self.mu_var_layer = to_gpu(MeanVarianceSkewHead(self.encoder, z_size))
     self.output_shape = [None, z_size]
     self.return_mu_log_var = return_mu_log_var
示例#21
0
def get_encoder(feature_len=12,
                max_seq_length=15,
                cnn_encoder_params={
                    'kernel_sizes': (2, 3, 4),
                    'filters': (2, 3, 4),
                    'dense_size': 100
                },
                drop_rate=0.0,
                encoder_type='cnn',
                rnn_encoder_hidden_n=200):
    if encoder_type == 'rnn':
        rnn_model = SimpleRNN(hidden_n=rnn_encoder_hidden_n,
                              feature_len=feature_len,
                              drop_rate=drop_rate)
        encoder = to_gpu(
            AttentionAggregatingHead(rnn_model, drop_rate=drop_rate))

    elif encoder_type == 'cnn':
        encoder = to_gpu(
            SimpleCNNEncoder(params=cnn_encoder_params,
                             max_seq_length=max_seq_length,
                             feature_len=feature_len,
                             drop_rate=drop_rate))
    elif encoder_type == 'attention':
        encoder = to_gpu(
            AttentionAggregatingHead(TransformerEncoder(
                feature_len,
                max_seq_length,
                dropout=drop_rate,
                padding_idx=feature_len - 1),
                                     drop_rate=drop_rate))

    else:
        raise NotImplementedError()

    return encoder
def concat(x):
    if type(x) in (list, tuple):
        if type(x[0]) == torch.Tensor:
            return to_gpu(torch.cat(x, dim=0))
        elif type(x[0]) in (list, tuple):
            out = []
            for i in x:
                out += i
            return out
            #return [concat(elem) for elem in zip(*x)]
        elif type(x[0]) in (dict, OrderedDict):
            raise NotImplementedError(
                "Can't handle loaders returning dicts yet")
        else:
            return x
    else:
        raise ValueError("Can only concatenate lists and tuples as yet")
    def forward(self, input_seq, hidden=None):
        '''

        :param input_seq: batch_size x seq_len x feature_len
        :param hidden: hidden state from earlier
        :return: batch_size x hidden_n
        '''
        batch_size = input_seq.size()[0]
        if hidden is None:
            hidden = Variable(to_gpu(
                torch.zeros(self.dimension_mult, batch_size, self.hidden_n)),
                              requires_grad=False)
            # self.init_hidden(batch_size)

        # run the GRU on it
        gru_out, hidden = self.gru_1(input_seq, hidden)
        out = self.normalize_output(self.dropout(F.relu(gru_out)))
        return out
    def forward(self,
                last_action=None,
                last_action_pos=None,
                remember_step=True):
        '''
        One step of the RNN model
        :param enc_output: batch x z_size, so don't support sequences
        :param last_action: batch of ints, all equaling None for first step
        :param last_action_pos: ignored, used by the attention decoder, here just to get the signature right
        :return: batch x steps x feature_len
        '''
        # check we don't exceed max sequence length
        if self.n == self.max_seq_length:
            raise StopIteration()
        if remember_step:
            self.n += self.steps

        if self.one_hot_action is None:  # first step after reset
            # need to do it here as batch size might be different for each sequence
            self.one_hot_action = to_gpu(
                torch.zeros(self.batch_size, self.output_feature_size))

        encoded = self.encode(self.enc_output, last_action)

        # copy the latent state to length of sequence, instead of sampling inputs
        embedded = F.relu(self.fc_input(
                                        #self.batch_norm(encoded)
                                        encoded
                                        )) \
            .view(self.batch_size, 1, self.hidden_n)# \
        #.repeat(1, self.steps, 1)
        out = self.dropout_1(embedded)
        # run the GRUs on it
        for dec_layer in self.layer_stack:
            out = dec_layer(out, remember_step)
        # tmp has dim (batch_size*seq_len)xhidden_n, so we can apply the linear transform to it
        #tmp = self.dropout_2(out.contiguous().view(-1, self.hidden_n))

        tmp = out.contiguous().view(-1, self.hidden_n)
        out = self.fc_out(tmp).view(self.batch_size, 1,
                                    self.output_feature_size)

        return out
示例#25
0
 def forward(self, inputs):
     '''
     Calculates targets and values for simple deepQ-learning
     :param z: latent variable input for decoder, batch_size x z_size
     :param actions: History of actions, batch_size x max_len of ints
     :param sample_ind: Index we want to sample each sequence at, batch_size
     :param values: Value of objective function where sequence comleted, None otherwise : batch_size
     :return: values: value of action at sample_ind predicted by decoder value; targets: one-step Bellman equation target
     '''
     actions = inputs
     batch_size = len(actions)
     orig_policy = self.decoder.policy
     self.decoder.policy = PolicyFromTarget(actions)
     # TODO: replace this ugly ugly hack!
     true_z_size = 56  # self.decoder.z_size
     _, logits = self.decoder.forward(
         to_gpu(torch.zeros(batch_size, true_z_size)))
     self.decoder.policy = orig_policy
     # todo: a better head, now values and policy too entangled
     value_est = logits  #F.tanh(logits)
     return actions, logits, value_est
示例#26
0
 def __init__(self,
              stepper: Stepper,
              policy: SimplePolicy,
              task=None,
              mask_gen=None,
              batch_size=None):
     '''
     A simple discrete decoder, alternating getting logits from model and actions from policy
     :param stepper:
     :param policy: choose an action from the logits, can be max, or random sample,
     or choose from pre-determined target sequence. Only depends on current logits + history,
     can't handle multi-step strategies like beam search
     :param mask_fun: takes in one-hot encoding of previous action (for now that's all we care about)
     :param task: environment that returns rewards and whether the episode is finished
     '''
     super().__init__()
     self.stepper = to_gpu(stepper)
     self.policy = policy
     self.task = task
     self.bypass_actions = False  # legacy
     # self.mask_gen = mask_gen
     self.output_shape = [None, None, self.stepper.output_shape[-1]]
     self.batch_size = batch_size
示例#27
0
    def forward(self, model_out, target_x):
        """gives the batch normalized Variational Error."""
        model_out_x, mu, log_var = model_out
        batch_size = target_x.size()[0]
        seq_len = target_x.size()[1]
        z_size = mu.size()[1]
        model_out_x = F.softmax(model_out_x, dim=2)
        #following mkusner/grammarVAE
        BCE = seq_len * self.bce_loss(model_out_x, target_x)
        # this normalizer is for when we're not sampling so only have mus, not sigmas
        avg_mu = torch.sum(mu, dim=0) / batch_size
        var = torch.mm(mu.t(), mu) / batch_size
        var_err = var - Variable(to_gpu(torch.eye(z_size)))
        var_err = torch.tanh(
            var_err) * var_err  # so it's ~ x^2 asymptotically, not x^4
        mom_err = (avg_mu * avg_mu).sum() / z_size + var_err.sum() / (z_size *
                                                                      z_size)
        if self.sample_z:
            # see Appendix B from VAE paper:
            # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
            # https://arxiv.org/abs/1312.6114
            # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
            KLD_element = (1 + log_var - mu * mu - log_var.exp())
            KLD = -0.5 * torch.mean(KLD_element)
            KLD_ = KLD.data.item()
            my_loss = BCE + self.reg_weight * KLD
        else:
            my_loss = BCE + self.reg_weight * mom_err
            KLD_ = 0
        if not self.training:
            # ignore regularizers when computing validation loss
            my_loss = BCE

        self.metrics = OrderedDict([('BCE', BCE.data.item()), ('KLD', KLD_),
                                    ('ME', mom_err.data.item())])
        return my_loss
 def my_gen():
     for _ in range(1000):
         yield to_gpu(torch.zeros(BATCH_SIZE, settings['z_size']))
示例#29
0
def get_decoder(
        molecules=True,
        grammar=True,
        z_size=200,
        decoder_hidden_n=200,
        feature_len=12,  # TODO: remove this
        max_seq_length=15,
        drop_rate=0.0,
        decoder_type='step',
        task=None,
        node_policy=None,
        rule_policy=None,
        reward_fun=lambda x: -1 * np.ones(len(x)),
        batch_size=None,
        priors=True):
    codec = get_codec(molecules, grammar, max_seq_length)

    if decoder_type == 'old':
        stepper = ResettingRNNDecoder(z_size=z_size,
                                      hidden_n=decoder_hidden_n,
                                      feature_len=codec.feature_len(),
                                      max_seq_length=max_seq_length,
                                      steps=max_seq_length,
                                      drop_rate=drop_rate)
        stepper = OneStepDecoderContinuous(stepper)
    elif 'graph' in decoder_type and decoder_type not in [
            'attn_graph', 'rnn_graph'
    ]:
        return get_node_decoder(grammar, max_seq_length, drop_rate,
                                decoder_type, rule_policy, reward_fun,
                                batch_size, priors)

    elif decoder_type in ['attn_graph', 'rnn_graph']:  # deprecated
        assert 'hypergraph' in grammar, "Only the hypergraph grammar can be used with attn_graph decoder type"
        if 'attn' in decoder_type:
            encoder = GraphEncoder(grammar=codec.grammar,
                                   d_model=512,
                                   drop_rate=drop_rate,
                                   model_type='transformer')
        elif 'rnn' in decoder_type:
            encoder = GraphEncoder(grammar=codec.grammar,
                                   d_model=512,
                                   drop_rate=drop_rate,
                                   model_type='rnn')

        model = MultipleOutputHead(
            model=encoder,
            output_spec={
                'node': 1,  # to be used to select next node to expand
                'action': codec.feature_len()
            },  # to select the action for chosen node
            drop_rate=drop_rate)

        # don't support using this model in VAE-style models yet
        model.init_encoder_output = lambda x: None
        mask_gen = HypergraphMaskGenerator(max_len=max_seq_length,
                                           grammar=codec.grammar)
        mask_gen.priors = priors
        # bias=codec.grammar.get_log_frequencies())
        if node_policy is None:
            node_policy = SoftmaxRandomSamplePolicy()
        if rule_policy is None:
            rule_policy = SoftmaxRandomSamplePolicy()
        if 'node' in decoder_type:
            stepper = GraphDecoderWithNodeSelection(model,
                                                    node_policy=node_policy,
                                                    rule_policy=rule_policy)
            env = GraphEnvironment(mask_gen,
                                   reward_fun=reward_fun,
                                   batch_size=batch_size)
            decoder = DecoderWithEnvironmentNew(stepper, env)
        else:

            stepper = GraphDecoder(model=model, mask_gen=mask_gen)
            decoder = to_gpu(
                SimpleDiscreteDecoderWithEnv(stepper,
                                             rule_policy,
                                             task=task,
                                             batch_size=batch_size))
        return decoder, stepper

    else:
        if decoder_type == 'step':
            stepper = SimpleRNNDecoder(z_size=z_size,
                                       hidden_n=decoder_hidden_n,
                                       feature_len=codec.feature_len(),
                                       max_seq_length=max_seq_length,
                                       drop_rate=drop_rate,
                                       use_last_action=False)

        elif decoder_type == 'action':
            stepper = SimpleRNNDecoder(
                z_size=z_size,  # + feature_len,
                hidden_n=decoder_hidden_n,
                feature_len=codec.feature_len(),
                max_seq_length=max_seq_length,
                drop_rate=drop_rate,
                use_last_action=True)

        elif decoder_type == 'action_resnet':
            stepper = ResNetRNNDecoder(
                z_size=z_size,  # + feature_len,
                hidden_n=decoder_hidden_n,
                feature_len=codec.feature_len(),
                max_seq_length=max_seq_length,
                drop_rate=drop_rate,
                use_last_action=True)

        elif decoder_type == 'attention':
            stepper = SelfAttentionDecoderStep(num_actions=codec.feature_len(),
                                               max_seq_len=max_seq_length,
                                               drop_rate=drop_rate,
                                               enc_output_size=z_size)
        elif decoder_type == 'random':
            stepper = RandomDecoder(feature_len=codec.feature_len(),
                                    max_seq_length=max_seq_length)
        else:
            raise NotImplementedError('Unknown decoder type: ' +
                                      str(decoder_type))

    if grammar is not False and '_graph' not in decoder_type:
        # add a masking layer
        mask_gen = get_codec(molecules, grammar, max_seq_length).mask_gen
        stepper = MaskingHead(stepper, mask_gen)

    policy = SoftmaxRandomSamplePolicy(
    )  # bias=codec.grammar.get_log_frequencies())

    decoder = to_gpu(
        SimpleDiscreteDecoderWithEnv(
            stepper, policy, task=task,
            batch_size=batch_size))  # , bypass_actions=True))

    return decoder, stepper
 def init_hidden(self, batch_size):
     # NOTE: assume only 1 layer no bi-direction
     h1 = Variable(to_gpu(torch.zeros(1, batch_size, self.hidden_n)),
                   requires_grad=False)
     return h1