def jojo_12(self, input_vars_125, var_1203, var_1159, input_vars_128, input_vars_123, input_vars_124, input_vars_122, input_vars_127, input_vars_130, input_vars_131, input_vars_129): var_1208 = torch.batch_norm(var_1203, input_vars_122, input_vars_123, input_vars_124, input_vars_125, False, 0.1, 1e-05, True) var_1210 = torch.add(var_1208, var_1159, alpha=1) var_1211 = torch.relu_(var_1210) var_1230 = torch._convolution(var_1211, input_vars_127, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_1235 = torch.batch_norm(var_1230, input_vars_128, input_vars_129, input_vars_130, input_vars_131, False, 0.1, 1e-05, True) return var_1211, var_1235
def jojo_14(self, var_1127, input_vars_104, input_vars_111, input_vars_110, input_vars_112, input_vars_106, input_vars_109, input_vars_113, var_1083, input_vars_105, input_vars_107): var_1132 = torch.batch_norm(var_1127, input_vars_104, input_vars_105, input_vars_106, input_vars_107, False, 0.1, 1e-05, True) var_1151 = torch._convolution(var_1083, input_vars_109, None, [2, 2, ], [0, 0, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_1156 = torch.batch_norm(var_1151, input_vars_110, input_vars_111, input_vars_112, input_vars_113, False, 0.1, 1e-05, True) var_1158 = torch.add(var_1132, var_1156, alpha=1) var_1159 = torch.relu_(var_1158) return var_1159
def jojo_5(self, input_vars_50, var_586, input_vars_51, input_vars_49, input_vars_57, input_vars_64, input_vars_52, input_vars_55, input_vars_61, input_vars_63, input_vars_58, input_vars_56, input_vars_53, input_vars_65, input_vars_62, input_vars_59): var_587 = torch.relu_(var_586) var_606 = torch._convolution(var_587, input_vars_49, None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_611 = torch.batch_norm(var_606, input_vars_50, input_vars_51, input_vars_52, input_vars_53, False, 0.1, 1e-05, True) var_612 = torch.relu_(var_611) var_631 = torch._convolution(var_612, input_vars_55, None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_636 = torch.batch_norm(var_631, input_vars_56, input_vars_57, input_vars_58, input_vars_59, False, 0.1, 1e-05, True) var_638 = torch.add(var_636, var_587, alpha=1) var_639 = torch.relu_(var_638) var_658 = torch._convolution(var_639, input_vars_61, None, [ 2, 2, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_663 = torch.batch_norm(var_658, input_vars_62, input_vars_63, input_vars_64, input_vars_65, False, 0.1, 1e-05, True) return var_663, var_639
def jojo_6(self, input_vars_44, input_vars_34, input_vars_37, var_459, input_vars_45, var_508, input_vars_31, input_vars_38, input_vars_41, input_vars_47, input_vars_40, input_vars_43, input_vars_33, input_vars_35, input_vars_46, input_vars_32, input_vars_39): var_510 = torch.add(var_508, var_459, alpha=1) var_511 = torch.relu_(var_510) var_530 = torch._convolution(var_511, input_vars_31, None, [ 2, 2, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_535 = torch.batch_norm(var_530, input_vars_32, input_vars_33, input_vars_34, input_vars_35, False, 0.1, 1e-05, True) var_536 = torch.relu_(var_535) var_555 = torch._convolution(var_536, input_vars_37, None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_560 = torch.batch_norm(var_555, input_vars_38, input_vars_39, input_vars_40, input_vars_41, False, 0.1, 1e-05, True) var_579 = torch._convolution(var_511, input_vars_43, None, [ 2, 2, ], [ 0, 0, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_584 = torch.batch_norm(var_579, input_vars_44, input_vars_45, input_vars_46, input_vars_47, False, 0.1, 1e-05, True) return var_560, var_584
def jojo_25(self, input_vars_15, input_vars_14, input_vars_8, input_vars_11, input_vars_10, input_vars_17, input_vars_9, var_681, input_vars_16, input_vars_7, input_vars_13): var_695 = torch.max_pool2d(var_681, [3, 3, ], [2, 2, ], [1, 1, ], [1, 1, ], False) var_714 = torch._convolution(var_695, input_vars_7, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_719 = torch.batch_norm(var_714, input_vars_8, input_vars_9, input_vars_10, input_vars_11, False, 0.1, 1e-05, True) var_720 = torch.relu_(var_719) var_739 = torch._convolution(var_720, input_vars_13, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_744 = torch.batch_norm(var_739, input_vars_14, input_vars_15, input_vars_16, input_vars_17, False, 0.1, 1e-05, True) var_746 = torch.add(var_744, var_695, alpha=1) return var_746
def jojo_7(self, input_vars_25, var_431, input_vars_16, input_vars_13, input_vars_15, input_vars_23, input_vars_19, input_vars_21, input_vars_22, var_407, input_vars_17, input_vars_14, input_vars_20): var_432 = torch.relu_(var_431) var_451 = torch._convolution(var_432, input_vars_13, None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_456 = torch.batch_norm(var_451, input_vars_14, input_vars_15, input_vars_16, input_vars_17, False, 0.1, 1e-05, True) var_458 = torch.add(var_456, var_407, alpha=1) var_459 = torch.relu_(var_458) var_478 = torch._convolution(var_459, input_vars_19, None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_483 = torch.batch_norm(var_478, input_vars_20, input_vars_21, input_vars_22, input_vars_23, False, 0.1, 1e-05, True) var_484 = torch.relu_(var_483) var_503 = torch._convolution(var_484, input_vars_25, None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) return var_459, var_503
def jojo_19(self, var_851, input_vars_71, input_vars_70, input_vars_67, input_vars_55, input_vars_57, input_vars_56, var_900, input_vars_62, input_vars_65, input_vars_73, input_vars_61, input_vars_68, input_vars_63, input_vars_69, input_vars_59, input_vars_64, input_vars_58): var_919 = torch._convolution(var_851, input_vars_55, None, [2, 2, ], [0, 0, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_924 = torch.batch_norm(var_919, input_vars_56, input_vars_57, input_vars_58, input_vars_59, False, 0.1, 1e-05, True) var_926 = torch.add(var_900, var_924, alpha=1) var_927 = torch.relu_(var_926) var_946 = torch._convolution(var_927, input_vars_61, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_951 = torch.batch_norm(var_946, input_vars_62, input_vars_63, input_vars_64, input_vars_65, False, 0.1, 1e-05, True) var_952 = torch.relu_(var_951) var_971 = torch._convolution(var_952, input_vars_67, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_976 = torch.batch_norm(var_971, input_vars_68, input_vars_69, input_vars_70, input_vars_71, False, 0.1, 1e-05, True) var_978 = torch.add(var_976, var_927, alpha=1) var_979 = torch.relu_(var_978) var_998 = torch._convolution(var_979, input_vars_73, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) return var_979, var_998
def _instance_norm(raw, input, running_mean=None, running_var=None, weight=None, bias=None, use_input_stats=True, momentum=0.1, eps=1e-5): # TODO: the batch size!=1 view operations print("WARNING: The Instance Normalization transfers to Caffe using BatchNorm, so the batch size should be 1") if running_var is not None or weight is not None: # TODO: the affine=True or track_running_stats=True case raise NotImplementedError("not implement the affine=True or track_running_stats=True case InstanceNorm") x= torch.batch_norm( input, weight, bias, running_mean, running_var, use_input_stats, momentum, eps,torch.backends.cudnn.enabled) bottom_blobs = [log.blobs(input)] layer_name1 = log.add_layer(name='instance_norm') top_blobs = log.add_blobs([x], name='instance_norm_blob') layer1 = caffe_net.Layer_param(name=layer_name1, type='BatchNorm', bottom=bottom_blobs, top=top_blobs) if running_mean is None or running_var is None: # not use global_stats, normalization is performed over the current mini-batch layer1.batch_norm_param(use_global_stats=0,eps=eps) running_mean=torch.zeros(input.size()[1]) running_var=torch.ones(input.size()[1]) else: layer1.batch_norm_param(use_global_stats=1, eps=eps) running_mean_clone = running_mean.clone() running_var_clone = running_var.clone() layer1.add_data(running_mean_clone.cpu().numpy(), running_var_clone.cpu().numpy(), np.array([1.0])) log.cnet.add_layer(layer1) if weight is not None and bias is not None: layer_name2 = log.add_layer(name='bn_scale') layer2 = caffe_net.Layer_param(name=layer_name2, type='Scale', bottom=top_blobs, top=top_blobs) layer2.param.scale_param.bias_term = True layer2.add_data(weight.cpu().data.numpy(), bias.cpu().data.numpy()) log.cnet.add_layer(layer2) return x
def forward(ctx, input, running_mean, running_var, weight=None, bias=None, training=False, momentum=0.1, eps=1e-5): # Training not currently supported for torchtorchexplain # if training: # size = input.size() # # # # # # from operator import mul # # from functools import reduce # # # # if reduce(mul, size[2:], size[0]) == 1 # size_prods = size[0] # for i in range(len(size) - 2): # size_prods *= size[i + 2] # if size_prods == 1: # raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) output = torch.batch_norm(input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled) ctx.save_for_backward(input, output.clone()) ctx.hparams = (weight, bias, running_mean, running_var, eps) return output
def forward(self, x, opt): # break up channels of x batch_size = x.size(0) in_channels = x.size(1) x = x.view(-1, 1, x.size(2), x.size(3)) # perform optics individually on each channel meas = self.optics(x) # put channels back together meas = meas.view(batch_size, in_channels, meas.size(2), meas.size(3)) # normalize mask measurements if opt.normalize_feats: # 1: normalize to have max of 1 # view is used to get max over the 3D image # unsqueeze is to get the dimensions correct for elementwise division meas = meas / meas.view(meas.size(0), -1).max( dim=1)[0].unsqueeze(1).unsqueeze(2).unsqueeze(3) # 2: normalize [0, 1] to [-1, 1] mean = torch.Tensor([0.5, 0.5, 0.5]).cuda(opt.gpu) var = torch.Tensor([0.25, 0.25, 0.25]).cuda(opt.gpu) # std squared meas = torch.batch_norm(meas, None, None, mean, var, False, 0, 0, torch.backends.cudnn.enabled) # forward through VGG net output = self.features(meas) output = output.view(output.size(0), -1) output = self.classifier(output) return output
def jojo_20(self, input_vars_45, input_vars_44, var_848, var_799, input_vars_47, input_vars_46, input_vars_49, input_vars_43): var_850 = torch.add(var_848, var_799, alpha=1) var_851 = torch.relu_(var_850) var_870 = torch._convolution(var_851, input_vars_43, None, [2, 2, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_875 = torch.batch_norm(var_870, input_vars_44, input_vars_45, input_vars_46, input_vars_47, False, 0.1, 1e-05, True) var_876 = torch.relu_(var_875) var_895 = torch._convolution(var_876, input_vars_49, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) return var_851, var_895
def forward(self, inp): out = self.bn(inp) out = out.view(1, inp.size(0) * self.num_groups, -1) out = torch.batch_norm(out, None, None, None, None, True, 0, self.eps, True) out = out.view(inp.size(0), self.num_groups, -1) out = self.weight * out + self.bias out = out.view_as(inp) return out
def _nn_functional_batch_norm(input, running_mean, running_var, weight=None, bias=None, training=False, momentum=0.1, eps=1e-5): return torch.batch_norm(input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled)
def forward(self, x): return torch.batch_norm( x, self.weight, self.bias, self.running_mean, self.running_var, False, # training self.exponential_average_factor, self.eps, False, # cuda_enabled )
def jojo_4(self, input_vars_69, var_683, input_vars_75, input_vars_71, input_vars_68, input_vars_70, input_vars_74, input_vars_77, input_vars_76, var_639, input_vars_73): var_688 = torch.batch_norm(var_683, input_vars_68, input_vars_69, input_vars_70, input_vars_71, False, 0.1, 1e-05, True) var_707 = torch._convolution(var_639, input_vars_73, None, [ 2, 2, ], [ 0, 0, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_712 = torch.batch_norm(var_707, input_vars_74, input_vars_75, input_vars_76, input_vars_77, False, 0.1, 1e-05, True) return var_712, var_688
def jojo_8(self, input_vars_7, input_vars_5, input_vars_4, input_vars_1, input_vars_2, input_vars_3, input_vars_0): var_387 = torch._convolution(input_vars_0, input_vars_1, None, [ 2, 2, ], [ 3, 3, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_392 = torch.batch_norm(var_387, input_vars_2, input_vars_3, input_vars_4, input_vars_5, False, 0.1, 1e-05, True) var_393 = torch.relu_(var_392) var_407 = torch.max_pool2d(var_393, [ 3, 3, ], [ 2, 2, ], [ 1, 1, ], [ 1, 1, ], False) var_426 = torch._convolution(var_407, input_vars_7, None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) return var_407, var_426
def backward(ctx, grad_output): #|z|-rule as proposed in BatchNorm Decomposition for Deep Neural Network Optimisation input, weight = ctx.saved_tensors bias, running_mean, running_var, training, momentum, eps = ctx.hparams output = torch.batch_norm(input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled) w_in = torch.zeros_like(input) b_in = torch.zeros_like(input) for w in range(len(weight)): w_in[:, w, ...] = abs(weight[w] * input[:, w, ...]) b_in[:, w, ...] = w_in[:, w, ...] + abs(bias[w]) out = w_in / b_in norm_grad = out * grad_output # f = (input.grad).cpu() # mdl = "sf" # plt.imsave(f"{save_path}{mdl}.png",f) return norm_grad, None, None, None, None, None, None, None
def jojo_2(self, input_vars_89, input_vars_85, input_vars_87, var_740, input_vars_86, var_715, input_vars_88): var_759 = torch._convolution(var_740, input_vars_85, None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_764 = torch.batch_norm(var_759, input_vars_86, input_vars_87, input_vars_88, input_vars_89, False, 0.1, 1e-05, True) var_766 = torch.add(var_764, var_715, alpha=1) return var_766
def jojo_1(self, input_vars_95, input_vars_92, input_vars_94, var_786, input_vars_93): var_791 = torch.batch_norm(var_786, input_vars_92, input_vars_93, input_vars_94, input_vars_95, False, 0.1, 1e-05, True) return var_791
def jojo_5(self, input_vars_170, input_vars_172, var_1411, input_vars_171, input_vars_173): var_1416 = torch.batch_norm(var_1411, input_vars_170, input_vars_171, input_vars_172, input_vars_173, False, 0.1, 1e-05, True) return var_1416
def forward(self, x): assert self.params_set, 'model.set_params(...) must be called before the forward pass' return torch.batch_norm(x, self.scale, self.bias, self.running_mean, self.running_var, False, self.momentum, self.eps, torch.backends.cudnn.enabled)
def jojo_6(self, input_vars_166, input_vars_164, input_vars_167, var_1366, input_vars_165, input_vars_163): var_1367 = torch.relu_(var_1366.clone()) var_1386 = torch._convolution(var_1367, input_vars_163, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_1391 = torch.batch_norm(var_1386, input_vars_164, input_vars_165, input_vars_166, input_vars_167, False, 0.1, 1e-05, True) return var_1391, var_1367
def jojo_11(self, input_vars_135, var_1255, var_1211, input_vars_134, input_vars_136, input_vars_137): var_1260 = torch.batch_norm(var_1255, input_vars_134, input_vars_135, input_vars_136, input_vars_137, False, 0.1, 1e-05, True) var_1262 = torch.add(var_1260, var_1211, alpha=1) return var_1262
def forward( self, obs, deterministic=False, return_log_prob=False, pol_idx=None, optimize_policies=True, ): """ Args: obs (Tensor): Observation(s) deterministic (bool): return_log_prob (bool): pol_idx (int): optimize_policies (bool): Returns: action (Tensor): pol_info (dict): """ h = obs nbatch = obs.shape[0] # ############# # # Shared Layers # # ############# # if self.sfc_input is not None: # h = self.sfc_input(h) if nbatch > 1: h = self.sfc_input(h) else: h = torch.batch_norm( h, self.sfc_input.weight, self.sfc_input.bias, self.sfc_input.running_mean, self.sfc_input.running_var, True, # TODO: True or False?? self.sfc_input.momentum, self.sfc_input.eps, torch.backends.cudnn.enabled) for ss, fc in enumerate(self._sfcs): h = fc(h) if self._mixture_layer_norm: h = self._sfc_norms[ss](h) h = self._hidden_activation(h) # ############## # # Multi Policies # # ############## # hs = [h.clone() for _ in range(self._n_subpolicies)] # Hidden Layers if len(self._pfcs) > 0: for pp in range(self._n_subpolicies): for ii, fc in enumerate(self._pfcs[pp]): hs[pp] = fc(hs[pp]) if self._policies_layer_norm: hs[pp] = self._pfc_norms[pp][ii](hs[pp]) hs[pp] = self._hidden_activation(hs[pp]) subpol_means = \ [self._pol_output_activation(self._pfc_lasts[pp](hs[pp])) for pp in range(self._n_subpolicies)] subpols = torch.cat(subpol_means, dim=-1) if torch.isnan(subpols).any(): raise ValueError('Some subpols are NAN:', subpols) # ############## # # Mixing Weigths # # ############## # mh = torch.cat([h.clone(), subpols], dim=-1) # N x dZ if not optimize_policies: mh = mh.detach() if len(self._mfcs) > 0: for mm, mfc in enumerate(self._mfcs): mh = mfc(mh) if self._mixture_layer_norm: mh = self._norm_mfcs[mm](mh) mh = self._hidden_activation(mh) # NO nonlinear transformation mpol_mean = self.mfc_last(mh) if self.mfc_softmax is not None: raise NotImplementedError # mixture_coeff = self.mfc_softmax(mixture_coeff) # Final Policy final_pol_inputs = [ ii.unsqueeze(-2) for ii in (subpol_means + [mpol_mean]) ] fph = torch.cat( final_pol_inputs, dim=-2, ) for ff, fpfc in enumerate(self._fpfcs): fph = fpfc(fph) if self._final_policy_layer_norm: fph = self._norm_mfcs[ff](fph) fph = self._hidden_activation(fph) means = self._final_pol_output_activation(self.fpfc_last(fph)) log_stds = self._final_pol_output_activation( self.fpfc_last_log_std(fph)) log_stds = torch.clamp(log_stds, LOG_SIG_MIN, LOG_SIG_MAX) stds = torch.exp(log_stds) variances = torch.pow(stds, 2) if pol_idx is None: index = self._compo_pol_idx else: index = self._pols_idxs[pol_idx] mean = \ torch.index_select(means, dim=-2, index=index).squeeze(-2) std = \ torch.index_select(stds, dim=-2, index=index).squeeze(-2) log_std = \ torch.index_select(log_stds, dim=-2, index=index).squeeze(-2) variance = \ torch.index_select(variances, dim=-2, index=index).squeeze(-2) means = \ torch.index_select(means, dim=-2, index=self._pols_idxs).squeeze(-2) stds = \ torch.index_select(stds, dim=-2, index=self._pols_idxs).squeeze(-2) log_stds = \ torch.index_select(log_stds, dim=-2, index=self._pols_idxs).squeeze(-2) variances = \ torch.index_select(variances, dim=-2, index=self._pols_idxs).squeeze(-2) pre_tanh_value = None log_prob = None entropy = None mean_action_log_prob = None log_probs = None pre_tanh_values = None mixture_coeff = ptu.ones((nbatch, self.n_heads, self.action_dim)) if deterministic: action = torch.tanh(mean) actions = torch.tanh(means) else: noise = self._normal_dist.sample((nbatch, )) pre_tanh_value = std * noise + mean pre_tanh_values = stds * noise.unsqueeze(1) + means action = torch.tanh(pre_tanh_value) actions = torch.tanh(pre_tanh_values) if return_log_prob: # Log probability: Main Policy log_prob = -((pre_tanh_value - mean) ** 2) / (2 * variance) \ - torch.log(std) - math.log(math.sqrt(2 * math.pi)) log_prob -= torch.log(1. - action**2 + self._epsilon) log_prob = log_prob.sum(dim=-1, keepdim=True) # Log probability: Sub-Policies log_probs = -((pre_tanh_values - means) ** 2) / (2 * variances) \ - torch.log(stds) - math.log(math.sqrt(2 * math.pi)) log_probs -= torch.log(1. - actions**2 + self._epsilon) log_probs = log_probs.sum(dim=-1, keepdim=True) if torch.isnan(action).any(): raise ValueError('ACTION NAN') if torch.isnan(actions).any(): raise ValueError('ACTION NAN') info_dict = dict( mean=mean, log_std=log_std, log_prob=log_prob, entropy=entropy, std=std, mean_action_log_prob=mean_action_log_prob, pre_tanh_value=pre_tanh_value, # log_mixture_coeff=log_mixture_coeff, mixing_coeff=mixture_coeff, pol_actions=actions, pol_means=means, pol_stds=stds, pol_log_stds=log_stds, pol_log_probs=log_probs, pol_pre_tanh_values=pre_tanh_values, ) return action, info_dict
def forward( self, obs, deterministic=False, return_log_prob=False, pol_idx=None, optimize_policies=True, ): """ Args: obs (Tensor): Observation(s) deterministic (bool): return_log_prob (bool): pol_idx (int): optimize_policies (bool): Returns: action (Tensor): pol_info (dict): """ # pol_idx = int(0) h = obs nbatch = obs.shape[0] # ############# # # Shared Layers # # ############# # if self.sfc_input is not None: # h = self.sfc_input(h) if nbatch > 1: h = self.sfc_input(h) else: h = torch.batch_norm( h, self.sfc_input.weight, self.sfc_input.bias, self.sfc_input.running_mean, self.sfc_input.running_var, True, # TODO: True or False?? self.sfc_input.momentum, self.sfc_input.eps, torch.backends.cudnn.enabled ) for ss, fc in enumerate(self._sfcs): h = fc(h) if self._mixture_layer_norm: h = self._sfc_norms[ss](h) h = self._hidden_activation(h) # ############## # # Multi Policies # # ############## # hs = [h.clone() for _ in range(self._n_subpolicies)] # Hidden Layers if len(self._pfcs) > 0: for pp in range(self._n_subpolicies): for ii, fc in enumerate(self._pfcs[pp]): hs[pp] = fc(hs[pp]) if self._policies_layer_norm: hs[pp] = self._pfc_norms[pp][ii](hs[pp]) hs[pp] = self._hidden_activation(hs[pp]) # Last Mean Layers means_list = \ [(self._pol_output_activation(self._pfc_lasts[pp](hs[pp]))).unsqueeze(dim=1) for pp in range(self._n_subpolicies)] means = torch.cat(means_list, dim=1) # Last Log-Std Layers if self.stds is None: log_stds_list = [ (self._pol_output_activation( self._pfc_log_std_lasts[pp](hs[pp]) ) ).unsqueeze(dim=1) for pp in range(self._n_subpolicies)] log_stds = torch.cat(log_stds_list, dim=1) log_stds = torch.clamp(log_stds, min=LOG_SIG_MIN, max=LOG_SIG_MAX) stds = torch.exp(log_stds) variances = stds**2 else: stds = self.stds variances = stds**2 log_stds = self.log_std # ############## # # Mixing Weigths # # ############## # mh = h.clone() if len(self._mfcs) > 0: for mm, mfc in enumerate(self._mfcs): mh = mfc(mh) if self._mixture_layer_norm: mh = self._norm_mfcs[mm](mh) mh = self._hidden_activation(mh) # NO nonlinear transformation log_mixture_coeff = \ self.mfc_last(mh).reshape(-1, self._n_subpolicies, self.action_dim) # log_mixture_coeff = torch.clamp(log_mixture_coeff, # min=LOG_MIX_COEFF_MIN, # max=LOG_MIX_COEFF_MAX) # NxK # if self.mfc_softmax is not None: mixture_coeff = self.mfc_softmax(log_mixture_coeff) # mixture_coeff = torch.exp(log_mixture_coeff) \ # / torch.sum(torch.exp(log_mixture_coeff), dim=-1, # keepdim=True) if torch.isnan(log_mixture_coeff).any(): raise ValueError('Some mixture coeff(s) is(are) NAN:', log_mixture_coeff) if torch.isnan(means).any(): raise ValueError('Some means are NAN:', means) if torch.isnan(stds).any(): raise ValueError('Some stds are NAN:', stds) if pol_idx is None: # TODO: CHECK IF NOT PROPAGATING GRADIENTS HERE IS A PROBLEM # Sample latent variables z = Multinomial( logits=log_mixture_coeff.transpose(-2, -1) ).sample().transpose(-2, -1) # NxK # Choose mixture component corresponding mean = torch.sum(means*z, dim=-2) std = torch.sum(stds*z, dim=-2) log_std = torch.sum(log_stds*z, dim=-2) variance = torch.sum(variances*z, dim=-2) else: index = self._pols_idxs[pol_idx] mean = \ torch.index_select(means, dim=1, index=index).squeeze(1) std = \ torch.index_select(stds, dim=1, index=index).squeeze(1) log_std = \ torch.index_select(log_stds, dim=1, index=index).squeeze(1) variance = \ torch.index_select(variances, dim=1, index=index).squeeze(1) pre_tanh_value = None log_prob = None entropy = None mean_action_log_prob = None log_probs = None pre_tanh_values = None if deterministic: action = torch.tanh(mean) actions = torch.tanh(means) else: noise = self._normal_dist.sample((nbatch,)) pre_tanh_value = std*noise + mean pre_tanh_values = stds*noise.unsqueeze(1) + means action = torch.tanh(pre_tanh_value) actions = torch.tanh(pre_tanh_values) if return_log_prob: # temp_pre_tanh_vals = pre_tanh_values # temp_actions = actions temp_pre_tanh_vals = pre_tanh_value.unsqueeze(-2).expand((nbatch, self.n_heads, self.action_dim)) temp_actions = action.unsqueeze(-2).expand((nbatch, self.n_heads, self.action_dim)) # Log probability: Sub-Policies | log(x|z) # log_probs = -((pre_tanh_values - means) ** 2) / (2 * variances) \ temp_log_probs = -((temp_pre_tanh_vals - means) ** 2) / (2 * variances) \ - log_stds - math.log(math.sqrt(2 * math.pi)) # log_probs -= torch.log(1. - temp_actions**2 + self._epsilon) # Log probability: Main Policy log_prob = (torch.logsumexp(temp_log_probs.detach() + log_mixture_coeff, dim=-2, keepdim=True) - torch.logsumexp(log_mixture_coeff, dim=-2, keepdim=True) ).squeeze(-2) log_prob -= torch.log(1. - action**2 + self._epsilon) log_prob = log_prob.sum(dim=-1, keepdim=True) log_probs = -((pre_tanh_values - means) ** 2) / (2 * variances) \ - log_stds - math.log(math.sqrt(2 * math.pi)) log_probs = log_probs.sum(dim=-1, keepdim=True) if (torch.abs(log_probs) > 1e5).any(): print('---MEAN0--') print(means[:, 0, :]) print('-----') print('-----') print('-----') print('---MEAN1--') print(means[:, 1, :]) print('-----') print('-----') print('-----') print('--STD---') print(stds[:, 1, :]) print('-----') print('-----') print('-----') print('--PRE_TANH---') print(temp_pre_tanh_vals[:, 1, :]) print('-----') print('-----') print('-----') print('--LOG_PROB---') print(log_probs[:, 1]) raise ValueError if torch.isnan(log_prob).any(): raise ValueError('LOG_PROB NAN') if torch.isnan(log_probs).any(): raise ValueError('LOG_PROBS NAN') if torch.isnan(action).any(): raise ValueError('ACTION NAN') if torch.isnan(actions).any(): raise ValueError('ACTION NAN') info_dict = dict( mean=mean, log_std=log_std, log_prob=log_prob, entropy=entropy, std=std, mean_action_log_prob=mean_action_log_prob, pre_tanh_value=pre_tanh_value, # log_mixture_coeff=log_mixture_coeff, mixing_coeff=mixture_coeff, pol_actions=actions, pol_means=means, pol_stds=stds, pol_log_stds=log_stds, pol_log_probs=log_probs, pol_pre_tanh_values=pre_tanh_values, ) return action, info_dict
def forward_(self, input_vars): var_407, var_426 = torch.utils.checkpoint.checkpoint( self.jojo_8, input_vars[7], input_vars[5], input_vars[4], input_vars[1], input_vars[2], input_vars[3], input_vars[0]) var_431 = torch.batch_norm(var_426, input_vars[8], input_vars[9], input_vars[10], input_vars[11], False, 0.1, 1e-05, True) var_459, var_503 = torch.utils.checkpoint.checkpoint( self.jojo_7, input_vars[25], var_431, input_vars[16], input_vars[13], input_vars[15], input_vars[23], input_vars[19], input_vars[21], input_vars[22], var_407, input_vars[17], input_vars[14], input_vars[20]) var_508 = torch.batch_norm(var_503, input_vars[26], input_vars[27], input_vars[28], input_vars[29], False, 0.1, 1e-05, True) var_560, var_584 = torch.utils.checkpoint.checkpoint( self.jojo_6, input_vars[44], input_vars[34], input_vars[37], var_459, input_vars[45], var_508, input_vars[31], input_vars[38], input_vars[41], input_vars[47], input_vars[40], input_vars[43], input_vars[33], input_vars[35], input_vars[46], input_vars[32], input_vars[39]) var_586 = torch.add(var_560, var_584, alpha=1) var_663, var_639 = torch.utils.checkpoint.checkpoint( self.jojo_5, input_vars[50], var_586, input_vars[51], input_vars[49], input_vars[57], input_vars[64], input_vars[52], input_vars[55], input_vars[61], input_vars[63], input_vars[58], input_vars[56], input_vars[53], input_vars[65], input_vars[62], input_vars[59]) var_664 = torch.relu_(var_663) var_683 = torch._convolution(var_664, input_vars[67], None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_712, var_688 = torch.utils.checkpoint.checkpoint( self.jojo_4, input_vars[69], var_683, input_vars[75], input_vars[71], input_vars[68], input_vars[70], input_vars[74], input_vars[77], input_vars[76], var_639, input_vars[73]) var_714 = torch.add(var_688, var_712, alpha=1) var_715 = torch.utils.checkpoint.checkpoint(self.jojo_3, var_714) var_734 = torch._convolution(var_715, input_vars[79], None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_739 = torch.batch_norm(var_734, input_vars[80], input_vars[81], input_vars[82], input_vars[83], False, 0.1, 1e-05, True) var_740 = torch.relu_(var_739) var_766 = torch.utils.checkpoint.checkpoint( self.jojo_2, input_vars[89], input_vars[85], input_vars[87], var_740, input_vars[86], var_715, input_vars[88]) var_767 = torch.relu_(var_766) var_786 = torch._convolution(var_767, input_vars[91], None, [ 2, 2, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_791 = torch.utils.checkpoint.checkpoint(self.jojo_1, input_vars[95], input_vars[92], input_vars[94], var_786, input_vars[93]) var_792 = torch.relu_(var_791) var_811 = torch._convolution(var_792, input_vars[97], None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_816 = torch.batch_norm(var_811, input_vars[98], input_vars[99], input_vars[100], input_vars[101], False, 0.1, 1e-05, True) var_835 = torch._convolution(var_767, input_vars[103], None, [ 2, 2, ], [ 0, 0, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_840 = torch.batch_norm(var_835, input_vars[104], input_vars[105], input_vars[106], input_vars[107], False, 0.1, 1e-05, True) var_842 = torch.add(var_816, var_840, alpha=1) var_843 = torch.relu_(var_842) var_862 = torch._convolution(var_843, input_vars[109], None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_867 = torch.batch_norm(var_862, input_vars[110], input_vars[111], input_vars[112], input_vars[113], False, 0.1, 1e-05, True) var_868 = torch.relu_(var_867) var_887 = torch._convolution(var_868, input_vars[115], None, [ 1, 1, ], [ 1, 1, ], [ 1, 1, ], False, [ 0, 0, ], 1, False, False, True) var_892 = torch.batch_norm(var_887, input_vars[116], input_vars[117], input_vars[118], input_vars[119], False, 0.1, 1e-05, True) var_894 = torch.add(var_892, var_843, alpha=1) var_895 = torch.relu_(var_894) var_911 = self.layer_0(var_895) var_914 = torch.flatten(var_911, 1, -1) var_915 = torch.utils.checkpoint.checkpoint(self.jojo_0, input_vars[121]) var_918 = torch.addmm(input_vars[122], var_914, var_915, beta=1, alpha=1) return var_918
def jojo_13(self, input_vars_117, input_vars_119, input_vars_118, input_vars_116, var_1178): var_1183 = torch.batch_norm(var_1178, input_vars_116, input_vars_117, input_vars_118, input_vars_119, False, 0.1, 1e-05, True) var_1184 = torch.relu_(var_1183) return var_1184
def jojo_15(self, input_vars_100, var_1082, input_vars_97, input_vars_99, input_vars_98, input_vars_101): var_1083 = torch.relu_(var_1082.clone()) var_1102 = torch._convolution(var_1083, input_vars_97, None, [2, 2, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_1107 = torch.batch_norm(var_1102, input_vars_98, input_vars_99, input_vars_100, input_vars_101, False, 0.1, 1e-05, True) return var_1107, var_1083
def jojo_16(self, input_vars_95, input_vars_94, input_vars_92, var_1075, input_vars_93): var_1080 = torch.batch_norm(var_1075, input_vars_92, input_vars_93, input_vars_94, input_vars_95, False, 0.1, 1e-05, True) return var_1080
def jojo_17(self, input_vars_89, var_1031, input_vars_86, input_vars_85, input_vars_87, input_vars_88): var_1050 = torch._convolution(var_1031, input_vars_85, None, [1, 1, ], [1, 1, ], [1, 1, ], False, [0, 0, ], 1, False, False, True) var_1055 = torch.batch_norm(var_1050, input_vars_86, input_vars_87, input_vars_88, input_vars_89, False, 0.1, 1e-05, True) var_1056 = torch.relu_(var_1055) return var_1056