def genotype(self): # Note: Since we stack cells by s0: prev prev cells output; s1: prev cells output # and when a cell is a up cell, the s0 will be horizontal input and can't do up operation # which is different from down cells (s0 and s1 all need down operation). so when # parse a up cell string, the string operations is |_|*|_|...|_|, where * indicate up operation # mask1 and mask2 below is convenient to handle it. geno_parser = GenoParser(self._meta_node_num) gene_down = geno_parser.parse(F.softmax(self.alphas_normal_down, dim=-1).detach().cpu().numpy(), F.softmax(self.alphas_down, dim=-1).detach().cpu().numpy(), cell_type='down') gene_up = geno_parser.parse(F.softmax(self.alphas_normal_up, dim=-1).detach().cpu().numpy(), F.softmax(self.alphas_up, dim=-1).detach().cpu().numpy(), cell_type='up') concat = range(2, self._meta_node_num + 2) geno_type = Genotype(down=gene_down, down_concat=concat, up=gene_up, up_concat=concat) return geno_type
def forward(self, E, mask=False, static_dim=0): W = torch.matmul(self.WQ(E), self.WK(E).transpose(-1, -2)) / self.scale_factor if mask: if static_dim > 0: M = W.new_ones(W.shape[1:]) M[:static_dim, static_dim:] = 1 M[static_dim:, :static_dim] = 1 else: M = torch.tril(W.new_ones(W.shape[1:])) M = torch.where(W == 0, torch.zeros_like(M), M) W = masked_softmax(W, M) # W = softmax_tril(W) # W += torch.triu(torch.full_like(W, -1e6, device=self.device), 1) else: W = F.softmax(W, -1) return torch.matmul(W, self.WV(E))
def forward(self, query, keys, values): # Query = [BxQ] # Keys = [TxBxK] # Values = [TxBxV] # Outputs = a:[TxB], lin_comb:[BxV] # Here we assume q_dim == k_dim (dot product attention) query = query.unsqueeze(1) # [BxQ] -> [Bx1xQ] keys = keys.transpose(1, 2) # [TxBxK] -> [BxKxT] energy = torch.bmm(query, keys) # [Bx1xQ]x[BxKxT] -> [Bx1xT] energy = F.softmax(energy.mul_(self.scale), dim=2) # scale, normalize # values = values.transpose(0,1) # [TxBxV] -> [BxTxV] linear_combination = torch.bmm(energy, values) #[Bx1xT]x[BxTxV] -> [BxV] return energy, linear_combination
def compute_actor_loss(self, trajectory, advantages): states = torch.cat([transition[0] for transition in trajectory]).to(self.device) actions = torch.FloatTensor( [transition[1] for transition in trajectory]).to(self.device) action_logits, _ = self.model.forward(states) action_probs = F.softmax(action_logits, dim=1) action_dists = Categorical(action_probs) # compute the entropy entropy = action_dists.entropy().sum() policy_loss = -action_dists.log_prob(actions).view(-1, 1) * advantages policy_loss = policy_loss.mean() return policy_loss - self.entropy_scaling * entropy
def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = torch.flatten(x, 1) label_out = torch.sigmoid(self.fc(x)) rank_input = label_out if self.cat else x rank_out = F.softmax(self.rank_classifier(rank_input), dim=1) return label_out, rank_out
def forward(self, inp, mask): if self.att: weights_unnorm = self.att(inp).squeeze(-1) weights_unnorm = weights_unnorm.masked_fill_(mask, self.pre_softmax_mask_fill) weights = F.softmax(weights_unnorm, dim=1) else: weights_unnorm = mask.logical_not().type_as(inp) weights = weights_unnorm / weights_unnorm.sum(dim=1)[:, None] self.last_weights = weights.detach().cpu() if self.agg_dims: to_agg = self.agg(inp) else: to_agg = inp self.last_features = to_agg.detach().cpu() weighted = to_agg * weights.unsqueeze(-1).expand_as(to_agg) res = weighted.sum(1) return res
def forward(self, inputs, hidden, encoder_outputs): embedded = self.embedding(inputs).view(1, 1, -1) embedded = self.dropout(embedded) attn_weights = F.softmax(self.attn( torch.cat((embedded[0], hidden[0]), 1)), dim=1) attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) output = torch.cat((embedded[0], attn_applied[0]), 1) output = self.attn_combine(output).unsqueeze(0) output = F.relu(output) output, hidden = self.gru(output, hidden) output = F.log_softmax(self.out(output[0]), dim=1) return output, hidden, attn_weights
def forward(self, s, enc_output): """ :param s: (batch_size, dec_hid_dim) :param enc_output: (seq_len, batch_size, enc_hidden_dim) :return: """ batch_size = enc_output.shape[1] src_len = enc_output.shape[0] # 对于Linear层,我们第一个维度需要是batch_size # 将s维度变为 (batch_size, seq_len, dec_hid_dim) # enc_output维度 (batch_size, seq_len, enc_hidden_dim) s = s.unsqueeze(1).repeat(1,src_len,1) enc_output = enc_output.transpose(0,1) # (batch_size, seq_len, 1).squeeze(2) score = self.v(torch.tanh(self.attn(torch.cat((s,enc_output),dim=2)))).squeeze(2) return F.softmax(score)
def forward(self, h, encoder_out): attended_encoder_out = torch.zeros_like(encoder_out) for seq_index in range(self.max_seq_len): cat_for_attn = torch.cat((h[seq_index], encoder_out[seq_index]), 1) attn = self.attn(cat_for_attn) attn = F.softmax(attn, dim=1) attn_applied = torch.bmm(attn.unsqueeze(1), torch.transpose(encoder_out, 0, 1)) temp_encoder_out = torch.cat((h[seq_index], attn_applied[:, 0, :]), 1) attended_encoder_out[seq_index] = self.attn_combine( temp_encoder_out) batch_size = h.size(1) h0 = torch.zeros(1, batch_size, self.hidden_size, device=h.device) c0 = torch.zeros(1, batch_size, self.hidden_size, device=h.device) decoder_out, (ht, ct) = self.decoder(h, (h0, c0)) return decoder_out
def forward(self, lstm_output): # page 1482 top right # eq 5, with tanh (from our report) u_it = torch.tanh(self.dropout(self.word_attn(lstm_output))) # eq 6 a_it = F.softmax(self.dropout(self.context_vec(u_it)), dim=1) # eq 7 attns = torch.Tensor().to(device) for (h, a) in zip(lstm_output, a_it): h_i = a * h h_i = h_i.unsqueeze(0) # add them to the attention vectors attns = torch.cat([attns, h_i]) s_i = torch.sum(self.dropout(attns), 1) # unsqueeze to give back to FC layers s_i = s_i.unsqueeze(0) return s_i, attns
def __call__(self, outputs, targets): loss_dice = 0 eps = 1e-7 smooth = 1. outputs = F.softmax(outputs, dim=1) for cls in range(self.num_classes): jaccard_target = (targets == cls).float() jaccard_output = outputs[:, cls] intersection = (jaccard_output * jaccard_target).sum() if self.class_weights is not None: w = self.class_weights[cls] else: w = 1. union = jaccard_output.sum() + jaccard_target.sum() # loss -= torch.log((intersection + eps) / (union - intersection + eps)) * self.jaccard_weight loss_dice += w * (1 - (2. * intersection + smooth) / (union + smooth + eps)) # three kinds of loss formulas: (1) 1 - iou (2) -iou (3) -torch.log(iou) return loss_dice / self.num_classes
def attention( self, char_embed_matrix, batch_size, hid, ): att = self.v_c( torch.tanh( self.Wchar(char_embed_matrix.contiguous().view( -1, char_embed_matrix.size(2))) + self.Wh(hid.squeeze(0)))) # (b*6,2h) + (b*6,2h) --> (b*6,1) # print(att.size()) # print(hid.size()) attn_score = F.softmax(att.view(batch_size, hid.squeeze(0).size()[-1]), dim=1) # (b, 1) # char_attn = torch.bmm(attn_score.unsqueeze(0), hid) # [b x 1 x 6] * [b x 6 x hidden*2] char_attn = attn_score.unsqueeze(0) * hid char_attn = char_attn.squeeze(1) # [x b hidden*2] return char_attn
def forward(self, query, keys, keys_length): """ Parameters ---------- query: 2D tensor, [B, H] kerys: 3D tensor, [B, T, H] keys_length: 1D tensor, [B] Returns ------- outputs: 2D tensor, if return_scores=False [B, H], otherwise [B, T] """ batch_size, max_length, dim = keys.size() query = query.unsqueeze(1).expand(-1, max_length, -1) din_all = torch.cat([query, keys, query - keys, query * keys], dim=-1) din_all = din_all.view(batch_size * max_length, -1) outputs = self.mlp(din_all) outputs = self.fc(outputs).view(batch_size, max_length) # [B, T] # Scale outputs = outputs / (dim**0.5) # Mask mask = (torch.arange(max_length, device=keys_length.device).repeat( batch_size, 1) < keys_length.view(-1, 1)) outputs[~mask] = -np.inf # Activation outputs = F.softmax(outputs, dim=1) # [B, T] if not self.return_scores: # Weighted sum outputs = torch.matmul(outputs.unsqueeze(1), keys).squeeze() # [B, H] return outputs
def attention(self, dec_inp, encoder_attn, encoder_out, dec_h): # hid_attn(bs, nh) = dec_h(bs, em_sz_dec) hid_att = self.attn_hidden(dec_h) if hid_att.shape[-1] != self.nh: raise ValueError( 'Hidden attn output shape {} not equal to defeined hidden size {}' .format(hid_att.shape, self.nh)) # encoder_attn(bs, enc_seq_len, nh) + hid_att(bs, 1, nh) u = encoder_attn + hid_att.unsqueeze(1) u = torch.tanh(u) # (bs, enc_seq_len,1) = u(bs, seq_len, nh) * v(nh, 1) -> bbm doesnt boradcast to BS, use @ z = u @ self.V # attn_weights(bs, enc_seq_len, 1) attn_weights = F.softmax(z, 1) # (bs, nh, 1) = encoder_out(bs, enc_seq_len, nh) * attn_weight(bs, enc_seq_len, 1) context = torch.bmm(encoder_out.permute(0, 2, 1), attn_weights) if context.shape[-1] != 1: raise ValueError(context.shape) context = context[:, :, 0] # attn_out(bs, nh+ size(dec_inp)) attn_out = torch.cat([dec_inp, context], axis=1) return attn_out, attn_weights
def rnn_sample(self, controller: Controller, **kwargs): list_result = [] sum_log_proba = 0 emb = controller.start_of_sequence h, c = controller.lstm_cell(emb) for i in range(self.space): logits = controller.decoders[i](h).squeeze(0) probas = F.softmax(logits, dim=0) log_probas = F.log_softmax(logits, dim=0) # result = torch.argmax(probas) result = torch.multinomial(probas, num_samples=1)[0] emb = controller.encoders[i](result.unsqueeze(0)) h, c = controller.lstm_cell(emb, (h, c)) list_result.append(self.ind_to_val[result.item()]) sum_log_proba += log_probas[result.item()] return list_result, sum_log_proba
def forward(self, pred, target): b, c, h, w = pred.size() #val, ind = torch.max(target) target = target.view(-1) valid_mask = target.ne(self.ignore_label) #print(target[1],'...',valid_mask.long()[1]) target = target * valid_mask.long() num_valid = valid_mask.sum() prob = F.softmax(pred, dim=1) prob = (prob.transpose(0, 1)).reshape(c, -1) if self.min_kept > num_valid: # logger.info('Labels: {}'.format(num_valid)) print('Labels: {}'.format(num_valid)) elif num_valid > 0: prob = prob.masked_fill_(~valid_mask, 1) mask_prob = prob[target, torch.arange(len(target), dtype=torch.long)] #output, counts = torch.unique_consecutive(target, return_counts=True) #print('prob.shape: ',prob.shape,' mask_prob.shape: ',mask_prob.shape,' val: ',val) threshold = self.thresh if self.min_kept > 0: index = mask_prob.argsort() threshold_index = index[min(len(index), self.min_kept) - 1] if mask_prob[threshold_index] > self.thresh: threshold = mask_prob[threshold_index] kept_mask = mask_prob.le(threshold) target = target * kept_mask.long( ) # these are thought to be hard examples valid_mask = valid_mask * kept_mask # logger.info('Valid Mask: {}'.format(valid_mask.sum())) target = target.masked_fill_(~valid_mask, self.ignore_label) target = target.view(b, h, w) return self.criterion(pred, target)
def forward(self, X, X_padding_mask=None, coverage=None, dropout=0.1): """ K / key: (L, B, H) encoder_outputs, encoder feature V / value: (L, B, H) to calculate the context vector Q / query: (L, B, H) last_hidden, deocder feature X_padding_mask: (B, 1, L) coverage: (B, L) """ X_dim = X.size(-1) X_query = X.transpose(0, 1) # -> (B, L, H) X_key = X.transpose(0, 1) # -> (B, L, H) X_value = X.transpose(0, 1) # -> (B, L, H) scores = torch.matmul(X_query, X_key.transpose(-2, -1)) / math.sqrt( X_dim) # (B, L, H) x (B, H, L) -> (B, L, L) attn_dist = F.softmax(scores, dim=-1) # (B, L, L) attn_dist = F.dropout(attn_dist, p=dropout) context = torch.matmul(attn_dist, X_value) # (B, L, L) x (B, L, H) -> (B, L, H) # calculate average context = context.sum(1) / context.size(1) return context, attn_dist
def forward(self, mask, query=None, proj_key=None, value=None): assert mask is not None, "mask is required" # We first project the query (the decoder state). # The projected keys (the encoder states) were already pre-computated. query = self.query_layer(query) # Calculate scores. scores = self.energy_layer(torch.tanh(query + proj_key)) scores = scores.squeeze(2).unsqueeze(1) # Mask out invalid positions. # The mask marks valid positions so we invert it using `mask & 0`. scores.data.masked_fill_(mask == 0, -float('inf')) # Turn scores to probabilities. alphas = F.softmax(scores, dim=-1) self.alphas = alphas # The context vector is the weighted sum of the values. context = torch.bmm(alphas, value) # context shape: [B, 1, 2D], alphas shape: [B, 1, M] return context, alphas
def forward(self, input_states: torch.Tensor) -> torch.Tensor: """ Compute the sum of the input states :param input_states: List of the y_i states over i sources :return: the sum of the input states """ # input states is shape: (batch_size, num_srcs, tgt_time_steps, embed_dim) assert input_states.size(1) == self.n_sources # print(input_states.shape) group_size, _, tgt_time_steps, _ = input_states.shape # (group_size, tgt_time_steps, embed_dim * num_srcs) # import pdb;pdb.set_trace() input_states_flat = input_states.transpose(1, 2).contiguous().view( group_size, tgt_time_steps, -1) # print(input_states_flat.shape) projection_out = torch.tanh(self._hidden_projection(input_states_flat)) # print(projection_out.shape) # (group_size, tgt_time_steps, num_srcs) gate_values = F.softmax(self._gate_projection(projection_out), dim=-1) # print(gate_values.shape) # (batch_size, tgt_time_steps, num_srcs, embed_dim) * (group_size, tgt_time_steps, num_srcs, 1) -> # (group_size, tgt_time_steps, num_srcs, embed_dim) gated_inputs = input_states.transpose(1, 2) * gate_values.unsqueeze(-1) # print(gated_inputs.shape) # (group_size, tgt_time_steps, embed_dim) summed_inputs = torch.sum(gated_inputs, dim=-2, keepdim=False) # print(summed_inputs.shape) return summed_inputs
def forward(self, query, enc_out, src_lens, u_align): # Create a mask from source lengths # to zero padding indices out during # batch processing. max_len = enc_out.size(0) arange = torch.arange(max_len)[None, :] mask = arange < src_lens[:, None] mask = mask.transpose(0, 1) # Compute alignment scores layers = query.size(0) top_layer = query[layers - 1, :, :] top_layer = top_layer.unsqueeze(0) w_align = self.linear_q(top_layer) tanh_align = torch.tanh(w_align + u_align) score_align = self.linear_v(tanh_align) score_align = score_align.squeeze(2) score_align[~mask] = float('-inf') # Compute attention weights. attn_weights = F.softmax(score_align, dim=0) attn_weights = attn_weights.unsqueeze(2) return attn_weights
def forward(self, f): heatmaps = self.conv(f) B, J, H, W = heatmaps.shape if self.normalization_method == 'softmax': # use softmax to normalize heatmap heatmaps = heatmaps.view(B, J, -1) heatmaps = F.softmax(self.w * heatmaps, dim=2) heatmaps = heatmaps.view(B, J, H, W) else: # use sum to normalize heatmap heatmaps = F.relu(heatmaps, inplace=True) heatmaps = heatmaps + 1e-14 # prevent all zero heatmap heatmaps = heatmaps / heatmaps.sum(dim=(2, 3), keepdim=True) u = torch.sum(self.filter[0].view(1, 1, H, W) * heatmaps, dim=(2, 3)).unsqueeze(-1) v = torch.sum(self.filter[1].view(1, 1, H, W) * heatmaps, dim=(2, 3)).unsqueeze(-1) plane_coordinates = torch.cat([u, v], dim=2) return heatmaps, plane_coordinates
image[..., 2] = (image[..., 2] - image[..., 2].mean()) / image[..., 2].std() image = np.expand_dims(image, axis=0) image = torch.from_numpy(image) image = image.permute(0, 3, 1, 2) count = 0 total_time = 0.0 while True: count += 1 t = float(cv2.getTickCount()) outputs = net(image) outputs = F.softmax(outputs) total_time += (float(cv2.getTickCount()) - t) / cv2.getTickFrequency() print(outputs, '\t execute time: ', total_time / count) # 生成一个样本供网络前向传播 forward() example = torch.rand(1, 3, input_size, input_size) # 使用 torch.jit.trace 生成 torch.jit.ScriptModule 来跟踪 traced_script_module = torch.jit.trace(net, example) count = 0 total_time = 0.0 while True: count += 1 t = float(cv2.getTickCount())
def nms(self, conf, loc, priors, conf_threshold=0.01, iou_threshold=0.45, top_k=200, use_trained_model=True): ''' Description: greedy nms Arguments: conf_threshold: int, default=0.45 iou_threshold: int, defualt=0.01 top_k: int, default=200 conf: loc: priors ''' # 1. get the conf_score, conf_cls, bboxes and areaes loc = loc.cpu() priors = priors.cpu() loc_ = decode(loc[0], priors, [0.1, 0.2]) * 300 conf_ = F.softmax(conf[0]) # ignore the bkg class conf_score, conf_cls = torch.max(conf_[:, 1:], dim=1) if use_trained_model: conf_cls += 1 # conf_score, conf_cls = torch.max(conf_, dim=1) # conf_bkg_mask = conf_cls != 0 # conf_score, conf_cls, loc_ = conf_score[conf_bkg_mask], conf_cls[conf_bkg_mask], loc_[conf_bkg_mask] conf_mask = conf_score > conf_threshold conf_score, conf_cls, loc_ = conf_score[conf_mask], conf_cls[ conf_mask], loc_[conf_mask] conf_score, conf_idx = torch.sort(conf_score, descending=True) conf_cls, bboxes = conf_cls[conf_idx], loc_[conf_idx] res_score, res_bbox, res_cls = [], [], [] # keep top 200 results for each class conf_score, conf_cls, bboxes = conf_score[:top_k * 3], conf_cls[:top_k * 3], bboxes[: top_k * 3] for class_idx in range(1, 21): class_mask = (conf_cls == class_idx) if torch.sum(class_mask) == 0: continue # else: # print(class_idx, torch.sum(class_mask)) conf_score_, bboxes_ = conf_score[class_mask][:200], bboxes[ class_mask][:200] wh = bboxes_[:, 2:] - bboxes_[:, :2] areaes = wh[:, 0] * wh[:, 1] while len(conf_score_) > 0: cur_bbox = bboxes_[0] cur_score = conf_score_[0] cur_area = areaes[0] res_score.append(cur_score) res_bbox.append(cur_bbox) res_cls.append(class_idx) conf_score_ = conf_score_[1:] bboxes_ = bboxes_[1:] areaes = areaes[1:] if len(conf_score_) == 0: break max_x1 = torch.clamp(bboxes_[:, 0], min=float(cur_bbox[0])) max_y1 = torch.clamp(bboxes_[:, 1], min=float(cur_bbox[1])) min_x2 = torch.clamp(bboxes_[:, 2], max=float(cur_bbox[2])) min_y2 = torch.clamp(bboxes_[:, 3], max=float(cur_bbox[3])) w = torch.clamp(min_x2 - max_x1, min=0) h = torch.clamp(min_y2 - max_y1, min=0) intercests = w * h iou = intercests / (cur_area + areaes - intercests) iou_mask = iou < 0.45 bboxes_ = bboxes_[iou_mask] conf_score_ = conf_score_[iou_mask] areaes = areaes[iou_mask] # for idx in range(len(conf_score_)): # if conf_score_[idx] != 0: # res_score.append(conf_score_[idx]) # res_bbox.append(bboxes_[idx]) # res_cls.append(class_idx) # for i_head in range(idx + 1, len(conf_score_)): # if conf_score_[i_head] != 0: # max_xy = torch.max(bboxes_[idx][:2], bboxes_[i_head][:2]) # min_xy = torch.min(bboxes_[idx][2:], bboxes_[i_head][2:]) # wh = torch.clamp(min_xy - max_xy, min=0) # intersect = wh[0] * wh[1] # iou = intersect / (areaes[idx] + areaes[i_head] - intersect) # if iou > iou_threshold: # conf_score_[i_head] = 0 # wh = bboxes[:, 2:] - bboxes[:, :2] # areaes = wh[:, 0] * wh[:, 1] # # 2. get the result bbox and result class # res_score, res_bbox, res_cls = [], [], [] # # how many detections for each class could been found # cls_count = {_: 200 for _ in range(1, 21)} # for idx in range(len(conf_score)): # if conf_score[idx] != 0: # cur_class = int(conf_cls[idx]) # if cls_count[cur_class] > 0: # res_score.append(conf_score[idx]) # res_bbox.append(bboxes[idx]) # res_cls.append(conf_cls[idx]) # cls_count[cur_class] -= 1 # for i_head in range(idx + 1, len(conf_score)): # if conf_cls[i_head] != cur_class: # continue # else: # if conf_score[i_head] != 0: # max_xy = torch.max(bboxes[idx][:2], bboxes[i_head][:2]) # min_xy = torch.min(bboxes[idx][2:], bboxes[i_head][2:]) # wh = torch.clamp(min_xy - max_xy, min=0) # intersect = wh[0] * wh[1] # iou = intersect / (areaes[idx] + areaes[i_head] - intersect) # if iou > iou_threshold: # conf_score[i_head] = 0 # no need to restrict top k # res_score, res_bbox, res_cls = res_score[:top_k], res_bbox[:top_k], res_cls[:top_k] # new_res_score, new_res_bbox, new_res_cls = [], [], [] # for i in range(len(res_score)): # if res_score[i] > 0.6: # new_res_score.append(res_score[i]) # new_res_bbox.append(res_bbox[i]) # new_res_cls.append(res_cls[i]) return res_score, res_bbox, res_cls
def gumbel_softmax_sample(logits, temperature): """ Draw a sample from the Gumbel-Softmax distribution""" y = logits + sample_gumbel(logits.shape, tens_type=type(logits.data)) return F.softmax(y / temperature, dim=1)
args = parser.parse_args() src_dir = args.src_dir dst_dir = args.dst_dir temp_scaling = args.temp_scaling temp_tensor = torch.FloatTensor([temp_scaling]) print("Temperature tensor: ", temp_tensor) filelist = os.listdir(src_dir) for file in filelist: src_file = os.path.join(src_dir, file) try: with open(src_file) as json_data: rgb_scores = json.load(json_data) except: print("Exception") continue print("Video", file) rgb_window_scores = [] output_dict = defaultdict(lambda: defaultdict(list)) for key in rgb_scores: score_tensor = torch.FloatTensor(rgb_scores[key]["rgb_scores"]) score_tensor = score_tensor / temp_tensor rgb_score = F.softmax(score_tensor, dim=-1) rgb_score = rgb_score.cpu().detach().numpy().flatten() output_dict[key]["rgb_scores"] = rgb_score.tolist() with open(dst_dir + "/" + file, 'w') as outfile: json.dump(output_dict, outfile)
y_train = [2, 2, 2, 1, 1, 1, 0, 0] x_train = torch.FloatTensor(x_train) y_train = torch.LongTensor(y_train) W = torch.zeros((4, 3), requires_grad=True) b = torch.zeros(1, requires_grad=True) # optimizer 설정 optimizer = optim.Adam([W, b], lr=0.1) nb_epochs = 50000 for epoch in range(nb_epochs + 1): # Cost 계산 (1) te = x_train.matmul(W) + b hypothesis = F.softmax(x_train.matmul(W) + b, dim=1) # or .mm or @ y_one_hot = torch.zeros_like(hypothesis) y_one_hot.scatter_(1, y_train.unsqueeze(1), 1) temp = F.softmax(hypothesis, dim=1) cost = (y_one_hot * -torch.log(F.softmax(hypothesis, dim=1))).sum(dim=1).mean() # cost로 H(x) 개선 optimizer.zero_grad() cost.backward() optimizer.step() # 100번마다 로그 출력 if epoch % 100 == 0: print('Epoch {:4d}/{} Cost: {:.6f}'.format( epoch, nb_epochs, cost.item() ))
def forward(self, x): return self.kernel(x, F.softmax(self.alpha1_down, dim=-1), F.softmax(self.alpha1_up, dim=-1), F.softmax(self.alpha2_down, dim=-1), F.softmax(self.alpha2_up, dim=-1))
def forward(self, x): l1 = F.tanh(self.l1(x)) l2 = F.tanh(self.l2(l1)) pred = F.softmax(self.l3(l2)) return pred
def softmax(input, dim=None): """Apply a softmax function.""" return F.softmax(input, dim)
def forward(self, input_): if not self.training: return F.softmax(input_, dim=-1) else: return input_