def __init__(self, num_layers, input_size, rnn_size, dropout, rnn_type='rnn'): super(StackedRNN, self).__init__() assert rnn_type in ['rnn', 'gru'] self.dropout = nn.Dropout(dropout) self.num_layers = num_layers for i in range(num_layers): if rnn_type == 'rnn': layer = nn.RNNCell(input_size, rnn_size) if rnn_type == 'gru': layer = nn.GRUCell(input_size, rnn_size) self.add_module('layer_%d' % i, layer) input_size = rnn_size
def __init__(self, numLayers=1, inputDim=1, hiddenDim=32, outputDim=1, device='cpu'): """An example MLP network for actor-critic learning. Note that the network outputs both action and value # Argument numLstms: number of LSTM layers inputDim: dimensionality of input feature hiddenDim: dimensionality of hidden feature in LSTM outputDim: dimensionality of output feature """ super(RNN, self).__init__() self.device = device self.numLayers = numLayers self.hiddenDim = hiddenDim self.RNNLayers = torch.nn.ModuleList() for i in range(numLayers): if i == 0: RNNLayer = nn.RNNCell(inputDim, hiddenDim) else: RNNLayer = nn.RNNCell(hiddenDim, hiddenDim) self.RNNLayers.append(RNNLayer) self.linear = nn.Linear(hiddenDim, outputDim)
def __init__(self, column_units): super(Model, self).__init__() self.cnn = nn.Sequential( nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(16), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) self.rnn = nn.RNNCell(16 * (32 // 2) * (32 // 2), rnn_units) self.classifier = nn.Sequential( nn.Dropout(0.5), nn.Linear(rnn_units, column_units), )
def __init__(self, args, data): super().__init__() self.n_input = 1 self.m = data.m self.w = args.window self.hid = args.n_hidden self.rnn_cell = nn.RNNCell(input_size=self.n_input, hidden_size=self.hid) self.V = Parameter(torch.Tensor(self.hid, 1)) self.Wx = Parameter(torch.Tensor(self.hid, self.n_input)) self.Wtlt = Parameter(torch.Tensor(self.hid, self.hid)) self.Wh = Parameter(torch.Tensor(self.hid, self.hid)) self.init_weights() self.out = nn.Linear(self.hid, 1)
def __init__(self, input_size, rnn_hidden, use_gpu, rnn_type='RNN'): super(core_network, self).__init__() self.input_size = input_size self.rnn_hidden = rnn_hidden self.use_gpu = use_gpu self.rnn_type = rnn_type if rnn_type == 'RNN': self.rnn = nn.RNNCell(input_size, rnn_hidden, bias=True, nonlinearity='relu') if rnn_type == 'LSTM': self.rnn = nn.LSTMCell(input_size, rnn_hidden, bias=True)
def test_rnn(): @batch def simple_rnn(x, h0, cell): h = h0 for xt in x.unbind(1): h = cell(xt, h) return h def SimpleRNN(cell): def inner(x, h0): return simple_rnn(x, h0, cell) return inner mb_test(SimpleRNN(nn.RNNCell(2, 2)), (4, (True, 3), (False, 2)), (4, (False, 2)))
def __init__(self, input_size, hidden_size, batch_first=False): """ Args: input_size (int): size of the input vectors hidden_size (int): size of the hidden state vectors bathc_first (bool): whether the 0th dimension is batch """ super(ElmanRNN, self).__init__() self.rnn_cell = nn.RNNCell(input_size, hidden_size) self.batch_first = batch_first self.hidden_size = hidden_size
def __init__(self, agent, vocab_size, embed_dim, hidden_size, max_len, temperature, cell='rnn', force_eos=True, trainable_temperature=False, straight_through=False): super(RnnSenderGS, self).__init__() self.agent = agent self.force_eos = force_eos self.max_len = max_len if self.force_eos: assert self.max_len > 1, "Cannot force eos when max_len is below 1" self.max_len -= 1 self.hidden_to_output = nn.Linear(hidden_size, vocab_size) self.embedding = nn.Linear(vocab_size, embed_dim) self.sos_embedding = nn.Parameter(torch.zeros(embed_dim)) self.embed_dim = embed_dim self.vocab_size = vocab_size if not trainable_temperature: self.temperature = temperature else: self.temperature = torch.nn.Parameter(torch.tensor([temperature]), requires_grad=True) self.straight_through = straight_through self.cell = None cell = cell.lower() if cell == 'rnn': self.cell = nn.RNNCell(input_size=embed_dim, hidden_size=hidden_size) elif cell == 'gru': self.cell = nn.GRUCell(input_size=embed_dim, hidden_size=hidden_size) elif cell == 'lstm': self.cell = nn.LSTMCell(input_size=embed_dim, hidden_size=hidden_size) else: raise ValueError(f"Unknown RNN Cell: {cell}") self.reset_parameters()
def __init__(self, cell_type="lstm", input_size=1, hidden_size=20, output_size=1, nonlinearity="tanh"): super(lstm_rnn_gru, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.nonlinearity = nonlinearity.lower() assert self.nonlinearity in ['tanh', 'relu'] self.cell_type = cell_type.lower() if self.cell_type == "lstm": self.layer1 = nn.LSTMCell(input_size=self.input_size, hidden_size=self.hidden_size) self.layer2 = nn.LSTMCell(input_size=self.hidden_size, hidden_size=self.output_size) elif self.cell_type == "rnn": self.layer1 = nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size, nonlinearity=self.nonlinearity) self.layer2 = nn.RNNCell(input_size=self.hidden_size, hidden_size=self.output_size, nonlinearity=self.nonlinearity) elif self.cell_type == "gru": self.layer1 = nn.GRUCell(input_size=self.input_size, hidden_size=self.hidden_size) self.layer2 = nn.GRUCell(input_size=self.hidden_size, hidden_size=self.output_size) else: raise ("Please enter a good cell type (LSTM/RNN/GRU)") self.layer1.weight_hh.data.normal_(0.0, 0.1) self.layer1.weight_ih.data.normal_(0.0, 0.1) self.layer2.weight_hh.data.normal_(0.0, 0.1) self.layer2.weight_ih.data.normal_(0.0, 0.1)
def __init__(self, column_units, score, d_rate, inv): super(Model, self).__init__() self.cnn = nn.Sequential( nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(16), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2)) self.rnn = nn.RNNCell(16 * (32 // 2) * (32 // 2), rnn_units) self.classifier = nn.Sequential( nn.Dropout(0.5), nn.Linear(rnn_units, column_units), ) mask = selected_dropout.dropout_mask(score, d_rate, inv) self.register_buffer('mask', mask)
def __init__(self, *args, **kwargs): super(Net, self).__init__() self.state_dim = 4 self.input_dim = kwargs.get('input_dim', 2) self.output_dim = kwargs.get('output_dim', 2) self.hidden_dim = kwargs.get('hidden_dim', 32) self.dist_threshold = kwargs.get('dist_threshold', -1.0) self.rtype = kwargs.get('rtype', 'lstm') self.attention_params = kwargs.get('attention_params', {}) self.use_vel = kwargs.get('use_vel', True) self.use_mask = kwargs.get('use_mask', False) self.FQA_block = FQA(self.input_dim, self.hidden_dim, **self.attention_params) self.vel_predictor = VelPredictor(self.input_dim, self.hidden_dim, self.output_dim) if self.rtype == 'lstm': self.rnn_cell = nn.LSTMCell(self.input_dim, self.hidden_dim) elif self.rtype == 'gru': self.rnn_cell = nn.GRUCell(self.input_dim, self.hidden_dim) elif self.rtype == 'rnn_tanh': self.rnn_cell = nn.RNNCell(self.input_dim, self.hidden_dim, nonlinearity='tanh') elif self.rtype == 'rnn_relu': self.rnn_cell = nn.RNNCell(self.input_dim, self.hidden_dim, nonlinearity='relu')
def build_model(self): self.rnn = nn.RNNCell(self.input_size, self.hidden_size, nonlinearity='relu').cuda() self.fc = nn.Linear(self.hidden_size, self.num_classes).cuda() self.criterion = nn.CrossEntropyLoss().cuda() #self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr_prm) self.optimizer = torch.optim.RMSprop(self.parameters(), lr=self.lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=self.momentum, centered=False)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) if rnn_type in ['LSTM', 'GRU']: self.encoder = getattr(nn, rnn_type + 'Cell')(ninp, nhid) self.decoder = getattr(nn, rnn_type + 'Cell')(ninp, nhid) else: try: nonlinearity = { 'RNN_TANH': 'tanh', 'RNN_RELU': 'relu' }[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""" ) self.encoder = nn.RNNCell(ninp, nhid, nonlinearity=nonlinearity) self.decoder = nn.RNNCell(ninp, nhid, nonlinearity=nonlinearity) self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def __init__(self, model_dim=None, mlp_dim=None, num_classes=None, word_embedding_dim=None, initial_embeddings=None, **kwargs): super(Net, self).__init__() self.word_embedding_dim = word_embedding_dim self.model_dim = model_dim self.initial_embeddings = initial_embeddings self.rnn = nn.RNNCell(word_embedding_dim, model_dim) self.l0 = nn.Linear(model_dim, mlp_dim) self.l1 = nn.Linear(mlp_dim, num_classes)
def __init__(self, u_size, v_size, t_size, emb_dim_u=32, emb_dim_v=32, emb_dim_t=16, hidden_dim=32, nb_cnt=15, sampling_list=None, vid_coor_nor=None, vid_pop=None, dropout=0.5, mod=0): super(AttentionModelNew, self).__init__() self.u_size = u_size self.v_size = v_size self.t_size = t_size self.emb_dim_u = emb_dim_u self.emb_dim_v = emb_dim_v self.emb_dim_t = emb_dim_t self.hidden_dim = hidden_dim self.nb_cnt = nb_cnt self.sampling_list = sampling_list self.vid_coor_nor = vid_coor_nor self.vid_pop = vid_pop self.dropout = dropout self.mod = mod self.tree = KDTree(self.vid_coor_nor.values()) self.embedder_u = nn.Embedding(self.u_size, self.emb_dim_u) self.embedder_v = nn.Embedding(self.v_size, self.emb_dim_v) self.embedder_t = nn.Embedding(self.t_size, self.emb_dim_t) self.rid_sampling_info = {} self.rnn_short = nn.RNNCell(self.emb_dim_v, self.hidden_dim) self.rnn_long = nn.GRUCell(self.emb_dim_v, self.hidden_dim) self.decoder_hl = IndexLinear(self.hidden_dim, v_size) self.decoder_hs = IndexLinear(self.hidden_dim, v_size) self.decoder_t = IndexLinear(self.emb_dim_t, v_size) self.decoder_u = IndexLinear(self.emb_dim_u, v_size) if self.mod == 0: self.merger_weight = nn.Parameter(torch.ones(1, 5) / 5.0) elif self.mod == 1: self.merger_weight = nn.Parameter(torch.ones(1, 6) / 6.0) elif self.mod in {2, 3}: self.merger_weight_al = [] for _ in xrange(7): self.merger_weight_al.append(nn.Parameter(torch.ones(1, 6) / 6.0)) self.att_dim = self.emb_dim_t + self.hidden_dim * 2 self.att_M = nn.Parameter(torch.ones(self.att_dim, self.att_dim) / self.att_dim) # TODO change back for i in xrange(self.att_dim): for j in xrange(self.att_dim): if i < self.hidden_dim and j < self.hidden_dim: continue if i >= self.hidden_dim and i < self.hidden_dim * 2 and j >= self.hidden_dim and j < self.hidden_dim * 2: continue if i >= self.hidden_dim * 2 and j >= self.hidden_dim * 2: continue self.att_M.data[i, j] = 0.0 self.att_merger = nn.Linear(2, 1, bias=None) self.att_merger.weight.data[0, 0] = 0.5 self.att_merger.weight.data[0, 1] = -0.5
def __init__(self, bit_len, batch_size, fea_len_low, fea_len_mid, cgc, gap=16): super(imgNet, self).__init__() self.gap = gap self.lstm_hidden = 1024 self.hidden = 1024 self.bit_len = bit_len self.batch_size = batch_size self.cgc = cgc self.lstm = nn.RNNCell(self.hidden, self.lstm_hidden) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.actor_linear = make_fc_layer(self.lstm_hidden, 1) self.low_fc = nn.Sequential(nn.Linear(fea_len_low, self.hidden), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(self.hidden, self.hidden), nn.ReLU(inplace=True), nn.Dropout(p=0.5)) _initialize_weights(self.low_fc) self.mid_fc = nn.Sequential(nn.Linear(fea_len_mid, self.hidden), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(self.hidden, self.hidden), nn.ReLU(inplace=True), nn.Dropout(p=0.5)) _initialize_weights(self.mid_fc) self.opt2 = optim.SGD(self.lstm.parameters(), lr=0.001, momentum=0.9, weight_decay=0.005) self.opt3 = optim.SGD(self.actor_linear.parameters(), lr=0.001, momentum=0.9, weight_decay=0.005) self.opt4 = optim.SGD(self.low_fc.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005) self.opt5 = optim.SGD(self.mid_fc.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)
def __init__(self, cell_type, input_size, hidden_size, use_cuda): super(S2S_BA_Model, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.use_cuda = use_cuda self.cell_type = cell_type if self.cell_type not in ['rnn', 'gru', 'lstm']: raise ValueError( self.cell_type, " is not an appropriate cell type. Please select one of rnn, gru, or lstm." ) if self.cell_type == 'rnn': self.Ecell = nn.RNNCell(self.input_size, self.hidden_size) self.Dcell = nn.RNNCell(1 + self.hidden_size, self.hidden_size) if self.cell_type == 'gru': self.Ecell = nn.GRUCell(self.input_size, self.hidden_size) self.Dcell = nn.GRUCell(1 + self.hidden_size, self.hidden_size) if self.cell_type == 'lstm': self.Ecell = nn.LSTMCell(self.input_size, self.hidden_size) self.Dcell = nn.LSTMCell(1 + self.hidden_size, self.hidden_size) self.Wattn_energies = nn.Linear(self.hidden_size * 2, self.hidden_size) self.Wusage = nn.Linear(self.hidden_size, 1) self.Wout = nn.Linear(1 + self.hidden_size * 2, self.hidden_size) self.v = nn.Parameter(torch.rand(self.hidden_size)) stdv = 1. / math.sqrt(self.v.size(0)) self.v.data.normal_(mean=0, std=stdv) self.init()
def __init__( self, agent, vocab_size, embed_dim, hidden_size, max_len, temperature, cell="rnn", trainable_temperature=False, straight_through=False, ): super(RnnSenderGS, self).__init__() self.agent = agent assert max_len >= 1, "Cannot have a max_len below 1" self.max_len = max_len self.hidden_to_output = nn.Linear(hidden_size, vocab_size) self.embedding = nn.Linear(vocab_size, embed_dim) self.sos_embedding = nn.Parameter(torch.zeros(embed_dim)) self.embed_dim = embed_dim self.vocab_size = vocab_size if not trainable_temperature: self.temperature = temperature else: self.temperature = torch.nn.Parameter(torch.tensor([temperature]), requires_grad=True) self.straight_through = straight_through self.cell = None cell = cell.lower() if cell == "rnn": self.cell = nn.RNNCell(input_size=embed_dim, hidden_size=hidden_size) elif cell == "gru": self.cell = nn.GRUCell(input_size=embed_dim, hidden_size=hidden_size) elif cell == "lstm": self.cell = nn.LSTMCell(input_size=embed_dim, hidden_size=hidden_size) else: raise ValueError(f"Unknown RNN Cell: {cell}") self.reset_parameters()
def __init__(self, agent, vocab_size, embed_dim, hidden_size, cell='rnn'): super(RnnReceiverGS, self).__init__() self.agent = agent self.cell = None cell = cell.lower() if cell == 'rnn': self.cell = nn.RNNCell(input_size=embed_dim, hidden_size=hidden_size) elif cell == 'gru': self.cell = nn.GRUCell(input_size=embed_dim, hidden_size=hidden_size) elif cell == 'lstm': self.cell = nn.LSTMCell(input_size=embed_dim, hidden_size=hidden_size) else: raise ValueError(f"Unknown RNN Cell: {cell}") self.embedding = nn.Linear(vocab_size, embed_dim)
def __init__(self, opt): super(RNN, self).__init__() self.module_name = 'RNN' self.opt = opt self.input_size = opt.input_size self.output_size = opt.output_size self.encoder_hidden_size = opt.encoder_hidden_size self.decoder_hidden_size = opt.decoder_hidden_size print('input_size:', self.input_size, 'output_size:', self.output_size) self.encoder = nn.RNN(self.input_size, self.encoder_hidden_size, 1) self.decoder_in = nn.Linear(self.encoder_hidden_size, self.output_size) self.decoder = nn.RNNCell(self.input_size, self.decoder_hidden_size) self.out_linear = nn.Linear( self.decoder_hidden_size + self.output_size, self.output_size)
def __init__(self): super(CaptionNet, self).__init__() # Make VGG net self.vgg = VGG(make_layers(VGG_MODEL_CFG)) self.vgg.load_state_dict(torch.load(VGG_MODEL_FILE)) # Recurrent layer self.rnn_cell = nn.RNNCell( input_size=WORDVEC_SIZE, hidden_size=RNN_HIDDEN_SIZE, nonlinearity='relu', ) # Linear layer to convert hidden layer to word in vocab self.hidden_to_vocab = nn.Linear(RNN_HIDDEN_SIZE, VOCABULARY_SIZE)
def __init__(self, u_size, v_size, emb_dim=50, nb_cnt=100, sampling_list=None, mod=0): super(JNTM, self).__init__() self.emb_dim = emb_dim self.u_size = u_size self.v_size = v_size self.nb_cnt = nb_cnt self.sampling_list = sampling_list self.mod = mod self.rnn_cell = nn.RNNCell(emb_dim, emb_dim) self.gru_cell = nn.GRUCell(emb_dim, emb_dim) self.embedder_u = nn.Embedding(u_size, emb_dim) self.embedder_v = nn.Embedding(v_size, emb_dim) if mod == 0: self.decoder = IndexLinear(emb_dim * 3, v_size) else: self.decoder = IndexLinear(emb_dim * 2, v_size)
def __init__(self, input_size=100, hidden_size=200, output_size=1, out_nlin='linear'): super(RNN, self).__init__() self.hidden_size = hidden_size self.input_size = input_size self.rnn = nn.RNNCell(input_size, hidden_size, nonlinearity='relu') self.i2o = nn.Linear(hidden_size, output_size) if out_nlin == 'linear': self.non_linearity = nn.Identity() elif out_nlin == 'sigmoid': self.non_linearity = nn.Sigmoid()
def __init__(self, choice, triple=False): super(Controller_NAS, self).__init__(choice, triple) self.space = len(PRIMITIVES_NAS) self.num_action = 5 self.choice = choice if choice == 'RNN': self.controller = nn.RNNCell(input_size=self.space, hidden_size=self.space) elif choice == 'PURE': self._arch_parameters = [] for _ in range(self.num_action): alpha = torch.ones([1, self.space], dtype=torch.float, device='cuda') / self.space alpha = alpha + torch.randn(self.space, device='cuda') * 1e-2 self._arch_parameters.append( Variable(alpha, requires_grad=True))
def __init__(self, input_size, hidden_size, batch_first=False): """ Args: input_dim (int): the size of the input feature vector hidden_size (int): the size of the hidden state vectors batch_first (bool): flag whether the batch is the 0th dimension in the input tensor """ # call the base initialization super(ElmanRNN, self).__init__() # Define the model self.rnn_cell = nn.RNNCell(input_size, hidden_size) # store the rest of the parameters self.batch_first = batch_first self.hidden_size = hidden_size
def __init__(self, coref_tagger): super(CorefTagger, self).__init__() self.coref_tagger = coref_tagger # for param in self.coref_tagger.parameters(): # freeze the model # param.requires_grad = False decoder_size = self.coref_tagger.Decoder.out_features self.Review = nn.RNNCell(decoder_size * 3, 64, nonlinearity='tanh') self.h0 = nn.Parameter(torch.zeros(64).type(torch.cuda.FloatTensor)) self.Harmonize = nn.Linear(64, 3) self.optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=0.005, momentum=0.9, weight_decay=1e-4) self.label_constraint = self.coref_tagger.label_constraint
def __init__(self, u_size, v_size, t_size, emb_dim_u=32, emb_dim_v=32, emb_dim_t=16, hidden_dim=32, nb_cnt=100, sampling_list=None, vid_coor_rad=None, vid_pop=None, dropout=0.5): super(SpatioTemporalModelNCF, self).__init__() self.emb_dim_u = emb_dim_u self.emb_dim_v = emb_dim_v self.emb_dim_t = emb_dim_t self.hidden_dim = hidden_dim self.u_size = u_size self.v_size = v_size self.t_size = t_size self.nb_cnt = nb_cnt self.dropout = dropout self.sampling_list = sampling_list self.vid_coor_rad = vid_coor_rad self.vid_pop = vid_pop self.tree = BallTree(vid_coor_rad.values(), leaf_size=40, metric='haversine') self.dist_metric = DistanceMetric.get_metric('haversine') self.uid_rid_sampling_info = {} for uid in range(0, u_size): self.uid_rid_sampling_info[uid] = {} self.rnn_short = nn.RNNCell(self.emb_dim_v, self.hidden_dim) #TODO check GRU self.rnn_long = nn.GRUCell(self.emb_dim_v, self.hidden_dim) self.embedder_u = nn.Embedding(self.u_size, self.emb_dim_u) self.embedder_v = nn.Embedding(self.v_size, self.emb_dim_v) self.embedder_v_context = nn.Embedding(self.v_size, self.hidden_dim) self.embedder_t = nn.Embedding(self.t_size, self.emb_dim_t) dim_merged = self.hidden_dim * 2 + self.emb_dim_u + self.emb_dim_t + self.emb_dim_v self.ff1 = nn.Linear(dim_merged, dim_merged / 2) self.ff2 = nn.Linear(dim_merged / 2, dim_merged / 4) self.ff3 = nn.Linear(dim_merged / 4, 1)
def __init__(self, input_size, read_size, output_size, custom_initialization=False, discourage_pop=False, hidden_size=16, n_args=4, **kwargs): super(PDARNNSimpleStructController, self).__init__(input_size, read_size, output_size, n_args=n_args) for param_name, arg_value in kwargs.items(): unused_init_param(param_name, arg_value, self) self._hidden = None self._cell_state = None # Create an RNN Module object nn_input_size = self._input_size + self._read_size nn_output_size = self._n_args + self._read_size * 2 + self._output_size self._rnn = nn.RNNCell(nn_input_size, hidden_size) self._linear_nargs = nn.Linear(hidden_size, self._n_args) self._sigmoid_nargs = Sigmaid.apply #self._sigmoid_nargs = Sigmaid() self._linear_v1 = nn.Linear(hidden_size, self._read_size) self._tanh_v1 = nn.Tanh() self._linear_v2 = nn.Linear(hidden_size, self._read_size) self._tanh_v2 = nn.Tanh() self._linear_o = nn.Linear(hidden_size, self._output_size) self._tanh_o = nn.Tanh() if custom_initialization: PDARNNSimpleStructController.init_normal(self._rnn.weight_hh) PDARNNSimpleStructController.init_normal(self._rnn.weight_ih) self._rnn.bias_hh.data.fill_(0) self._rnn.bias_ih.data.fill_(0) PDARNNSimpleStructController.init_normal(self._linear.weight) self._linear.bias.data.fill_(0) if discourage_pop: self._linear.bias.data[0] = -1. # Discourage popping if n_args >= 4: self._linear.bias.data[2] = 1. # Encourage reading self._linear.bias.data[3] = 1. # Encourage writing
def __init__(self, cfg): super(BaseACT, self).__init__() self.cfg = cfg if cfg.MODEL.GRU: controller = nn.GRUCell(cfg.INPUT.DIM + 1, cfg.MODEL.CONTROLLER.HIDDEN_SIZE) elif cfg.MODEL.LSTM: controller = nn.LSTMCell(cfg.INPUT.DIM + 1, cfg.MODEL.CONTROLLER.HIDDEN_SIZE) else: controller = nn.RNNCell(cfg.INPUT.DIM + 1, cfg.MODEL.CONTROLLER.HIDDEN_SIZE) if cfg.MODEL.RNN_BASELINE: self.cell = SRNCell(cfg, controller) else: self.cell = ACTCell(cfg, controller)
def __init__(self, params): super(Policy, self).__init__() #for p in params: # setattr(self, p, params[p]) self.input_size = params['input_size'] self.hidden_size = params['hidden_size'] self.batch_size = params['batch_size'] self.output_size = params['output_size'] self.rnn_cell = nn.RNNCell(self.input_size, self.hidden_size) self.hx = torch.zeros(self.batch_size, self.hidden_size) self.hidden_states = [] self.linear = nn.Linear(self.hidden_size, self.output_size) self.saved_log_probs = [] self.rewards = []