def forward(self, x): T = 7 batch_size = x.shape[0] hidden1 = self.init_hidden_state(batch_size) hidden2 = self.init_hidden_state(batch_size) hidden3 = self.init_hidden_state(batch_size) u1 = self.init_gate_state(batch_size) u2 = self.init_gate_state(batch_size) outputs = [] season_cached = [] smooth_cached = [] res_loss = 0 seasonal_loss = 0 smooth_loss = 0 for input_t in x.split(1, dim=1): hidden1 = self.layer1(input_t.squeeze(1), hidden1) hidden2 = u1 * self.layer2(hidden1, hidden2) + (1 - u1) * hidden2 hidden3 = u2 * self.layer3(hidden2, hidden3) + (1 - u2) * hidden3 output1 = torch.tanh(self.h2o1(hidden1)) output2 = torch.tanh(self.h2o2(hidden2)) output3 = torch.tanh(self.h2o3(hidden3)) output = output1 + output2 + output3 u1 = self.get_gate_state(hidden1, hidden2, self.gate1) u2 = self.get_gate_state(hidden2, hidden3, self.gate2) outputs += [output] res_loss += F.mse_loss(output1, to_gpu(torch.zeros_like(output1))) if len(season_cached) == T: s_w = season_cached.pop(0) seasonal_loss += F.mse_loss(output2, s_w) if smooth_cached == []: ma = moving_average(output3.cpu().data.numpy(), N=5) smooth = output3.data smooth[:, 2:-2] = torch.Tensor(ma) else: tmp = torch.cat((smooth_cached.pop(), output3), 1)[:, 12:] ma = moving_average(tmp.cpu().data.numpy(), N=5)[:, 10:] smooth = output3.data smooth[:, :-2] = torch.Tensor(ma) smooth_loss += F.mse_loss(output3, to_gpu(smooth)) var = torch.std(output3, dim=1) smooth_loss += F.mse_loss(var, to_gpu(torch.zeros_like(var))) season_cached.append(output2) smooth_cached.append(output3) outputs = torch.stack(outputs, 1).squeeze(2) return outputs, res_loss, seasonal_loss, smooth_loss
def forward(self, x): T = 7 batch_size = x.shape[0] h0 = self.init_hidden_state(batch_size) cache1=[] cache2=[] cache3=[] for _ in range(self.dilation[0]): cache1.append(h0) for _ in range(self.dilation[1]): cache2.append(h0) for _ in range(self.dilation[2]): cache3.append(h0) outputs = [] s_cached = [] res_loss = 0 seasonal_loss = 0 smooth_loss = 0 for input_t in x.split(1, dim=1): h1, h2, h3 = cache1.pop(0), cache2.pop(0), cache3.pop(0) hidden1 = self.layer1(input_t.squeeze(1), h1) hidden2 = self.layer2(hidden1, h2) hidden3 = self.layer3(hidden2, h3) cache1.append(hidden1) cache2.append(hidden2) cache3.append(hidden3) output1 = torch.tanh(self.h2o1(hidden1)) output2 = torch.tanh(self.h2o2(hidden2)) output3 = torch.tanh(self.h2o3(hidden3)) output = output1 + output2 + output3 res_loss += F.mse_loss(output1, to_gpu(torch.zeros_like(output1))) if len(s_cached) == T: s_w = s_cached.pop(0) seasonal_loss += F.mse_loss(output2, s_w) var = torch.std(output3, dim=1) smooth_loss += F.mse_loss(var, to_gpu(torch.zeros_like(var))) s_cached.append(output2) outputs += [output] outputs = torch.stack(outputs, 1).squeeze(2) return outputs, res_loss, seasonal_loss, smooth_loss
def forward(self, x): T = 7 batch_size = x.shape[0] hidden1 = self.init_hidden_state(batch_size) hidden2 = self.init_hidden_state(batch_size) hidden3 = self.init_hidden_state(batch_size) u1 = self.init_gate_state(batch_size) u2 = self.init_gate_state(batch_size) outputs = [] s_cached1 = [] s_cached2 = [] res_loss = 0 seasonal_loss = 0 smooth_loss = 0 for input_t in x.split(1, dim=1): hidden1 = self.layer1(input_t.squeeze(1), hidden1) hidden2 = u1 * self.layer2(hidden1, hidden2) + (1 - u1) * hidden2 hidden3 = u2 * self.layer3(hidden2, hidden3) + (1 - u2) * hidden3 output1 = torch.tanh(self.h2o1(hidden1)) output2 = torch.tanh(self.h2o2(hidden2)) output3 = torch.tanh(self.h2o3(hidden3)) output = output1 + output2 + output3 u1 = self.get_gate_state(hidden1, hidden2, self.gate1) u2 = self.get_gate_state(hidden2, hidden3, self.gate2) outputs += [output] res_loss += F.mse_loss(output1, to_gpu(torch.zeros_like(output1))) if len(s_cached1) == T: s_w = s_cached1.pop(0) seasonal_loss += F.mse_loss(output2, s_w) if len(s_cached2) == 2: s_2 = s_cached2.pop(0) smooth_loss += 30 * F.mse_loss(s_2.mean(dim=1), output3.mean(dim=1)) var = torch.std(output3, dim=1) smooth_loss += F.mse_loss(var, to_gpu(torch.zeros_like(var))) s_cached1.append(output2) outputs = torch.stack(outputs, 1).squeeze(2) return outputs, res_loss, seasonal_loss, smooth_loss
def forward(self, x): (h_t1, c_t1, z_t1, h_t2, c_t2, z_t2) = self.init_hidden() z_one = to_gpu(torch.ones(1, 1)) outputs = [] for input_t in x.split(1, dim=1): h_t1, c_t1, z_t1 = self.cell_1(c_t1, input_t.squeeze(1), h_t1, h_t2, z_t1, z_one) h_t2, c_t2, z_t2 = self.cell_2(c_t2, h_t1, h_t2, None, z_t2, z_t1) output = self.l1_h2o(h_t1) + self.l2_h2o(h_t2) outputs += [output] outputs = torch.stack(outputs, 1).squeeze(2) return outputs
def forward(self, c, h_bottom, h, h_top, z, z_bottom): # h_bottom.size = bottom_size * batch_size # s_recur = torch.mm(self.W_01, h_bottom) # 是不是写错了 s_recur = torch.mm(h, self.U_11) if not self.last_layer: s_topdown_ = torch.mm(h_top, self.U_21) s_topdown = z.expand_as(s_topdown_) * s_topdown_ else: s_topdown = to_gpu(torch.zeros(s_recur.size())) # s_bottomup_ = torch.mm(self.U_11, h) # 是不是写错了 s_bottomup_ = torch.mm(h_bottom, self.W_01) s_bottomup = z_bottom.expand_as(s_bottomup_) * s_bottomup_ # print(self.bias.unsqueeze(1).shape, s_recur.shape) # print(s_recur.is_cuda, s_topdown.is_cuda, s_bottomup.is_cuda, self.bias.unsqueeze(0).expand_as(s_recur).is_cuda) f_s = s_recur + s_topdown + s_bottomup + self.bias.unsqueeze( 0).expand_as(s_recur) # f_s.size = (4 * hidden_size + 1) * batch_size f = F.sigmoid(f_s[:, 0:self.hidden_size]) # hidden_size * batch_size i = F.sigmoid(f_s[:, self.hidden_size:self.hidden_size * 2]) o = F.sigmoid(f_s[:, self.hidden_size * 2:self.hidden_size * 3]) g = F.tanh(f_s[:, self.hidden_size * 3:self.hidden_size * 4]) z_hat = hard_sigm( self.a, f_s[:, self.hidden_size * 4:self.hidden_size * 4 + 1]) one = to_gpu(torch.ones(f.size())) z = z.expand_as(f) z_bottom = z_bottom.expand_as(f) c_new = z * (i * g) + (one - z) * (one - z_bottom) * c + ( one - z) * z_bottom * (f * c + i * g) h_new = (one - z) * (one - z_bottom) * h + ( z + (one - z) * z_bottom) * o * F.tanh(c_new) z_new = self.binary(z_hat) return h_new, c_new, z_new
def init_hidden(self): h_t1 = to_gpu(torch.zeros(1, self.hidden_size)) c_t1 = to_gpu(torch.zeros(1, self.hidden_size)) z_t1 = to_gpu(torch.zeros(1, 1)) h_t2 = to_gpu(torch.zeros(1, self.hidden_size)) c_t2 = to_gpu(torch.zeros(1, self.hidden_size)) z_t2 = to_gpu(torch.zeros(1, 1)) hidden = (h_t1, c_t1, z_t1, h_t2, c_t2, z_t2) return hidden
def self_forecast(self, x, step): x = x.unsqueeze(0) # non-batch (h_t1, c_t1, z_t1, h_t2, c_t2, z_t2) = self.init_hidden() z_one = to_gpu(torch.ones(1, 1)) outputs = [] for input_t in x.split(1, dim=1): h_t1, c_t1, z_t1 = self.cell_1(c_t1, input_t.squeeze(1), h_t1, h_t2, z_t1, z_one) h_t2, c_t2, z_t2 = self.cell_2(c_t2, h_t1, h_t2, None, z_t2, z_t1) output = self.l1_h2o(h_t1) + self.l2_h2o(h_t2) outputs += [output] for i in range(step - 1): # if we should predict the future h_t1, c_t1, z_t1 = self.cell_1(c_t1, output, h_t1, h_t2, z_t1, z_one) h_t2, c_t2, z_t2 = self.cell_2(c_t2, h_t1, h_t2, None, z_t2, z_t1) output = self.l1_h2o(h_t1) + self.l2_h2o(h_t2) outputs += [output] outputs = torch.stack(outputs, 1).squeeze(2) return outputs[0]
def forecast(self, x): x = x.unsqueeze(0) # non-batch (h_t1, c_t1, z_t1, h_t2, c_t2, z_t2) = self.init_hidden() z_one = to_gpu(torch.ones(1, 1)) outputs = [] outputs1 = [] outputs2 = [] for input_t in x.split(1, dim=1): h_t1, c_t1, z_t1 = self.cell_1(c_t1, input_t.squeeze(1), h_t1, h_t2, z_t1, z_one) h_t2, c_t2, z_t2 = self.cell_2(c_t2, h_t1, h_t2, None, z_t2, z_t1) output1, output2 = self.l1_h2o(h_t1), self.l2_h2o(h_t2) output = output1 + output2 outputs += [output] outputs1 += [output1] outputs2 += [output2] outputs = torch.stack(outputs, 1).squeeze(2) outputs1 = torch.stack(outputs1, 1).squeeze(2) outputs2 = torch.stack(outputs2, 1).squeeze(2) return outputs[0], outputs1[0], outputs2[0]
def init_hidden_state(self, batch_size): return to_gpu(torch.zeros(batch_size, self.hidden_size))
def init_attention(self, batch_size, n=0): if n == 0: return to_gpu(torch.zeros(batch_size, self.input_dim)) else: return to_gpu(torch.ones(batch_size, self.input_dim))
def init_gate_state(self, batch_size): return to_gpu(torch.ones(batch_size, self.hidden_size))
if __name__ == '__main__': args = get_args() if args.flag is None: writer = SummaryWriter(log_dir=None) writer_path = list(writer.all_writers.keys())[0] else: writer_path = os.path.join('runs/pi', args.flag) writer = SummaryWriter(log_dir=writer_path) with open(os.path.join(writer_path, 'config.json'), 'w') as f: f.write(json.dumps(vars(args), indent=4)) model = to_gpu( NN(input_dim=336, hidden=[128], output_dim=24, bn=0, dropout=0)) nn.init.kaiming_normal_(model.layers.Linear0.weight) nn.init.kaiming_normal_(model.h2o.weight) dataset = DailyDataset_nn(N=2000, W=14) testX, testY = dataset.get_io(start_date='2016-01-01', end_date='2016-06-30') loader = get_loader(dataset, batch_size=64, shuffle=True, num_workers=1) with torch.no_grad(): testX = to_gpu(testX) testY = to_gpu(testY) * TOTAL_STD criterion = nn.MSELoss() metric = nn.L1Loss() optimizer = optim.Adam(model.parameters(), lr=0.001)