def triplet_loss(x, M=0.5): ''' See Wang et al. 2015, Doersch et al. 2017 ''' x_1 = x[0::3] x_2 = x[1::3] x_3 = x[2::3] x_pos = F.cosine_similarity(x_1, x_2, dim=1) x_neg = F.cosine_similarity(x_1, x_3, dim=1) # raise AssertionError return F.relu(x_pos - x_neg + M).mean()
def pdist(self, fX): """Compute pdist à-la scipy.spatial.distance.pdist Parameters ---------- fX : (n, d) torch.Tensor Embeddings. Returns ------- distances : (n * (n-1) / 2,) torch.Tensor Condensed pairwise distance matrix """ n_sequences, _ = fX.size() distances = [] for i in range(n_sequences - 1): if self.metric in ('cosine', 'angular'): d = 1. - F.cosine_similarity( fX[i, :].expand(n_sequences - 1 - i, -1), fX[i+1:, :], dim=1, eps=1e-8) if self.metric == 'angular': d = torch.acos(torch.clamp(1. - d, -1 + 1e-6, 1 - 1e-6)) elif self.metric == 'euclidean': d = F.pairwise_distance( fX[i, :].expand(n_sequences - 1 - i, -1), fX[i+1:, :], p=2, eps=1e-06).view(-1) distances.append(d) return torch.cat(distances)
def forward(self, input, target): input = input.view(input.shape[0], 1, -1) target = target.view(input.shape[0], 1, -1) input_features = self._transform(input).view(input.shape[0], -1) target_features = self._transform(target).view(input.shape[0], -1) if self.cosine_similarity: spectral_error = \ -(F.cosine_similarity(input_features, target_features).mean()) return spectral_error else: return ((input_features - target_features) ** 2).mean()
def multi_perspective_match(vector1: torch.Tensor, vector2: torch.Tensor, weight: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Calculate multi-perspective cosine matching between time-steps of vectors of the same length. Parameters ---------- vector1 : ``torch.Tensor`` A tensor of shape ``(batch, seq_len, hidden_size)`` vector2 : ``torch.Tensor`` A tensor of shape ``(batch, seq_len or 1, hidden_size)`` weight : ``torch.Tensor`` A tensor of shape ``(num_perspectives, hidden_size)`` Returns ------- A tuple of two tensors consisting multi-perspective matching results. The first one is of the shape (batch, seq_len, 1), the second one is of shape (batch, seq_len, num_perspectives) """ assert vector1.size(0) == vector2.size(0) assert weight.size(1) == vector1.size(2) == vector1.size(2) # (batch, seq_len, 1) similarity_single = F.cosine_similarity(vector1, vector2, 2).unsqueeze(2) # (1, 1, num_perspectives, hidden_size) weight = weight.unsqueeze(0).unsqueeze(0) # (batch, seq_len, num_perspectives, hidden_size) vector1 = weight * vector1.unsqueeze(2) vector2 = weight * vector2.unsqueeze(2) similarity_multi = F.cosine_similarity(vector1, vector2, dim=3) return similarity_single, similarity_multi
def cos(a, b): print("a:", a) print("b:", b) if 1 == len(a.shape): assert len(a.shape) == len(b.shape) a_duplicate = a.repeat(1,1) b_duplicate = b.repeat(1,1) else: vec_dim = a.shape[-1] assert b.shape[-1] == vec_dim a_size = a.shape[0] b_size = b.shape[0] a_duplicate = torch.t(a).repeat(b_size,1,1).transpose(1,2).transpose(0,1) #print("a_duplicate:", a_duplicate) #print("a_duplicate.shape:",a_duplicate.shape) b_duplicate = b.repeat(a_size, 1, 1) #print("b_duplicate:", b_duplicate) #print("b_duplicate.shape:", b_duplicate.shape) cos = F.cosine_similarity(a_duplicate, b_duplicate, dim=-1) print("cos:", cos) return cos
def track(self, f2): z2 = self.projector(f2) p2 = self.predictor(z2) s1 = F.cosine_similarity(self.z1, p2, dim=1) s2 = F.cosine_similarity(self.p1, z2, dim=1) return s1, s2
def otf_bt(self, batch, lambda_xe, backprop_temperature): """ On the fly back-translation. """ params = self.params lang1, sent1, len1 = batch['lang1'], batch['sent1'], batch['len1'] lang2, sent2, len2 = batch['lang2'], batch['sent2'], batch['len2'] lang3, sent3, len3 = batch['lang3'], batch['sent3'], batch['len3'] if lambda_xe == 0: logger.warning( "Unused generated CPU batch for direction %s-%s-%s!" % (lang1, lang2, lang3)) return lang1_id = params.lang2id[lang1] lang2_id = params.lang2id[lang2] lang3_id = params.lang2id[lang3] direction = (lang1, lang2, lang3) assert direction in params.pivo_directions loss_fn = self.decoder.loss_fn[lang3_id] n_words2 = params.n_words[lang2_id] n_words3 = params.n_words[lang3_id] self.encoder.train() self.decoder.train() # prepare batch sent1, sent2, sent3 = sent1.cuda(), sent2.cuda(), sent3.cuda() bs = sent1.size(1) if backprop_temperature == -1: # lang2 -> lang3 encoded = self.encoder(sent2, len2, lang_id=lang2_id) else: # lang1 -> lang2 encoded = self.encoder(sent1, len1, lang_id=lang1_id) scores = self.decoder(encoded, sent2[:-1], lang_id=lang2_id) assert scores.size() == (len2.max() - 1, bs, n_words2) # lang2 -> lang3 bos = torch.cuda.FloatTensor(1, bs, n_words2).zero_() bos[0, :, params.bos_index[lang2_id]] = 1 sent2_input = torch.cat( [bos, F.softmax(scores / backprop_temperature, -1)], 0) encoded = self.encoder(sent2_input, len2, lang_id=lang2_id) if params.lambda_ubwe > 0: lang1_dico = self.data['dico'][ lang1] #.id2word[0] #.word2id['<s>'] lang2_dico = self.data['dico'][lang2] #.id2word[0] enc_lang1_embedding = self.encoder.embeddings[lang1_id].weight.data enc_lang2_embedding = self.encoder.embeddings[lang2_id].weight.data (lang1_dicopairs, lang2_dicopairs) = build_dictionary(enc_lang1_embedding, enc_lang2_embedding, cuda=True) # lang1_dicopairs ==> [[s1,t1],[s2,t2],] langemb12_loss = torch.sum(1 - F.cosine_similarity( self.encoder.embeddings[lang1_id] (lang1_dicopairs[:, 0]), self.encoder.embeddings[lang2_id] (lang1_dicopairs[:, 1]), 1)) / lang1_dicopairs.size(0) langemb21_loss = torch.sum(1 - F.cosine_similarity( self.encoder.embeddings[lang2_id] (lang2_dicopairs[:, 0]), self.encoder.embeddings[lang1_id] (lang2_dicopairs[:, 1]), 1)) / lang2_dicopairs.size(0) lagreement = langemb12_loss + langemb21_loss lagreement = params.lambda_ubwe * lagreement self.stats['lagreement_loss_%s_%s' % (lang1, lang2)] = self.stats.get( 'lagreement_loss_%s_%s' % (lang1, lang2), []) + [lagreement.item()] # cross-entropy scores / loss scores = self.decoder(encoded, sent3[:-1], lang_id=lang3_id) xe_loss = loss_fn(scores.view(-1, n_words3), sent3[1:].view(-1)) self.stats['xe_costs_%s_%s_%s' % direction].append(xe_loss.item()) assert lambda_xe > 0 loss = lambda_xe * xe_loss if params.lambda_ubwe > 0: loss = loss + lagreement # check NaN if (loss != loss).data.any(): logger.error("NaN detected") exit() # optimizer assert params.otf_update_enc or params.otf_update_dec to_update = [] if params.otf_update_enc: to_update.append('enc') if params.otf_update_dec: to_update.append('dec') self.zero_grad(to_update) loss.backward() self.update_params(to_update) # number of processed sentences / words self.stats['processed_s'] += len3.size(0) self.stats['processed_w'] += len3.sum()
def forward(self, x): if isinstance(x, Variable): _, _, h, w = x.size() elif isinstance(x, tuple) or isinstance(x, list): var_input = x while not isinstance(var_input, Variable): var_input = var_input[0] _, _, h, w = var_input.size() else: raise RuntimeError('unknown input type: ', type(x)) if self.backbone == 'resnet18' or self.backbone == 'resnet50' or self.backbone == 'resnet101' \ or self.backbone == 'resnet152': # pre-trained ResNet feature x = self.pretrained.conv1(x) x = self.pretrained.bn1(x) x = self.pretrained.relu(x) x = self.pretrained.maxpool(x) x = self.pretrained.layer1(x) x = self.pretrained.layer2(x) x = self.pretrained.layer3(x) x = self.pretrained.layer4(x) extract_patches = x.unfold(2, 3, 1).unfold(3, 3, 1) # 64, 512, 6, 6, 3, 3 patches = extract_patches.permute(0, 2, 3, 1, 4, 5).contiguous().view( x.shape[0], -1, 512, 3, 3) # 64, 36, 512, 3, 3 patch_pool = patches.contiguous().view( patches.shape[0], patches.shape[1], 512, -1).mean(3) # 64, 36, 512, 9 => 64, 36, 512 sub_patch_pool = patches[:, :, :, 1:2, 1:2].squeeze(-1).squeeze(-1) # 64x36x512 cos_sim_x = F.cosine_similarity( patch_pool, sub_patch_pool, 2 ) / 0.5 - 1 # =. Extent of Texture Information [0 -1] Normalizeds cos_sim_y = 1. - cos_sim_x # - Extent of Shape Information patches_texture = [] patches_shape = [] for i in range(patches.shape[1]): patches_texture.append(self.head(patches[:, i, :, :, :])) patches_shape.append(self.pool(patches[:, i, :, :, :])) patches_texture = torch.stack(patches_texture, 1) patches_shape = torch.stack(patches_shape, 1) # GAT networks patches_texture = self.intradomain_texture_1(patches_texture) patches_shape = self.intradomain_shape_1(patches_shape) patches_texture, patches_shape = self.interdomain_1( x=patches_texture, y=patches_shape, x_cos=cos_sim_x, y_cos=cos_sim_y) patches_texture = self.intradomain_texture_2(patches_texture) patches_shape = self.intradomain_shape_2(patches_shape) patches_texture, patches_shape = self.interdomain_2( x=patches_texture, y=patches_shape, x_cos=cos_sim_x, y_cos=cos_sim_y) texture_vector = torch.matmul(patches_texture.permute(0, 2, 1), self.W_feature).squeeze() shape_vector = torch.matmul(patches_shape.permute(0, 2, 1), self.W_shape).squeeze() output = torch.cat([texture_vector, shape_vector], dim=-1) x = self.fc_layer(output) else: x = self.pretrained(x) return x
def get_similarities(embeddings): pos_embed, neg_embed, query_embed = embeddings pos_similarity = F.cosine_similarity(query_embed, pos_embed) neg_similarity = F.cosine_similarity(query_embed, neg_embed) return pos_similarity, neg_similarity, query_embed.size(0)
def forward(self, x, return_positive_pairs = False): shape, device, prob_flip = x.shape, x.device, self.prob_rand_hflip rand_flip_fn = lambda t: torch.flip(t, dims = (-1,)) flip_image_one, flip_image_two = rand_true(prob_flip), rand_true(prob_flip) flip_image_one_fn = rand_flip_fn if flip_image_one else identity flip_image_two_fn = rand_flip_fn if flip_image_two else identity cutout_coordinates_one, _ = cutout_coordinates(x, self.cutout_ratio_range) cutout_coordinates_two, _ = cutout_coordinates(x, self.cutout_ratio_range) image_one_cutout = cutout_and_resize(x, cutout_coordinates_one, mode = self.cutout_interpolate_mode) image_two_cutout = cutout_and_resize(x, cutout_coordinates_two, mode = self.cutout_interpolate_mode) image_one_cutout = flip_image_one_fn(image_one_cutout) image_two_cutout = flip_image_two_fn(image_two_cutout) image_one_cutout, image_two_cutout = self.augment1(image_one_cutout), self.augment2(image_two_cutout) proj_pixel_one, proj_instance_one = self.online_encoder(image_one_cutout) proj_pixel_two, proj_instance_two = self.online_encoder(image_two_cutout) image_h, image_w = shape[2:] proj_image_shape = proj_pixel_one.shape[2:] proj_image_h, proj_image_w = proj_image_shape coordinates = torch.meshgrid( torch.arange(image_h, device = device), torch.arange(image_w, device = device) ) coordinates = torch.stack(coordinates).unsqueeze(0).float() coordinates /= math.sqrt(image_h ** 2 + image_w ** 2) coordinates[:, 0] *= proj_image_h coordinates[:, 1] *= proj_image_w proj_coors_one = cutout_and_resize(coordinates, cutout_coordinates_one, output_size = proj_image_shape, mode = self.coord_cutout_interpolate_mode) proj_coors_two = cutout_and_resize(coordinates, cutout_coordinates_two, output_size = proj_image_shape, mode = self.coord_cutout_interpolate_mode) proj_coors_one = flip_image_one_fn(proj_coors_one) proj_coors_two = flip_image_two_fn(proj_coors_two) proj_coors_one, proj_coors_two = map(lambda t: rearrange(t, 'b c h w -> (b h w) c'), (proj_coors_one, proj_coors_two)) pdist = nn.PairwiseDistance(p = 2) num_pixels = proj_coors_one.shape[0] proj_coors_one_expanded = proj_coors_one[:, None].expand(num_pixels, num_pixels, -1).reshape(num_pixels * num_pixels, 2) proj_coors_two_expanded = proj_coors_two[None, :].expand(num_pixels, num_pixels, -1).reshape(num_pixels * num_pixels, 2) distance_matrix = pdist(proj_coors_one_expanded, proj_coors_two_expanded) distance_matrix = distance_matrix.reshape(num_pixels, num_pixels) positive_mask_one_two = distance_matrix < self.distance_thres positive_mask_two_one = positive_mask_one_two.t() with torch.no_grad(): target_encoder = self._get_target_encoder() target_proj_pixel_one, target_proj_instance_one = target_encoder(image_one_cutout) target_proj_pixel_two, target_proj_instance_two = target_encoder(image_two_cutout) # flatten all the pixel projections flatten = lambda t: rearrange(t, 'b c h w -> b c (h w)') target_proj_pixel_one, target_proj_pixel_two = list(map(flatten, (target_proj_pixel_one, target_proj_pixel_two))) # get total number of positive pixel pairs positive_pixel_pairs = positive_mask_one_two.sum() # get instance level loss pred_instance_one = self.online_predictor(proj_instance_one) pred_instance_two = self.online_predictor(proj_instance_two) loss_instance_one = loss_fn(pred_instance_one, target_proj_instance_two.detach()) loss_instance_two = loss_fn(pred_instance_two, target_proj_instance_one.detach()) instance_loss = (loss_instance_one + loss_instance_two).mean() if positive_pixel_pairs == 0: ret = (instance_loss, 0) if return_positive_pairs else instance_loss return ret if not self.use_pixpro: # calculate pix contrast loss proj_pixel_one, proj_pixel_two = list(map(flatten, (proj_pixel_one, proj_pixel_two))) similarity_one_two = F.cosine_similarity(proj_pixel_one[..., :, None], target_proj_pixel_two[..., None, :], dim = 1) / self.similarity_temperature similarity_two_one = F.cosine_similarity(proj_pixel_two[..., :, None], target_proj_pixel_one[..., None, :], dim = 1) / self.similarity_temperature loss_pix_one_two = -torch.log( similarity_one_two.masked_select(positive_mask_one_two[None, ...]).exp().sum() / similarity_one_two.exp().sum() ) loss_pix_two_one = -torch.log( similarity_two_one.masked_select(positive_mask_two_one[None, ...]).exp().sum() / similarity_two_one.exp().sum() ) pix_loss = (loss_pix_one_two + loss_pix_two_one) / 2 else: # calculate pix pro loss propagated_pixels_one = self.propagate_pixels(proj_pixel_one) propagated_pixels_two = self.propagate_pixels(proj_pixel_two) propagated_pixels_one, propagated_pixels_two = list(map(flatten, (propagated_pixels_one, propagated_pixels_two))) propagated_similarity_one_two = F.cosine_similarity(propagated_pixels_one[..., :, None], target_proj_pixel_two[..., None, :], dim = 1) propagated_similarity_two_one = F.cosine_similarity(propagated_pixels_two[..., :, None], target_proj_pixel_one[..., None, :], dim = 1) loss_pixpro_one_two = - propagated_similarity_one_two.masked_select(positive_mask_one_two[None, ...]).mean() loss_pixpro_two_one = - propagated_similarity_two_one.masked_select(positive_mask_two_one[None, ...]).mean() pix_loss = (loss_pixpro_one_two + loss_pixpro_two_one) / 2 # total loss loss = pix_loss * self.alpha + instance_loss ret = (loss, positive_pixel_pairs) if return_positive_pairs else loss return ret
def scoring(self, qt_repr, cand_repr): sim = F.cosine_similarity(qt_repr, cand_repr) return sim
def translate_vector(self, batch, data): def assemble_src_representation(src): src_representation = None for b_id in range(src.size()[0]): a = torch.squeeze(src[b_id], 1) sr = torch.cumsum(a, dim=0)[src.size()[1] - 1] if src_representation is None: src_representation = sr.unsqueeze(0) else: src_representation = torch.cat((src_representation, sr.unsqueeze(0)), 0) return src_representation # Encoder forward. src = inputters.make_features(batch, 'src', data.data_type) if self.model.encoder is not None: # enc_final, memory_bank, lengths = self.model.encoder(src, None) # enc_state = \ # self.model.decoder.init_decoder_state(src, memory_bank, enc_final) # # if len(src.size())<3: # enc_state.input_feed = torch.unsqueeze(src,0) # enc_state.input_feed = torch.unsqueeze(enc_final[0][0,:,:],0) #last hidden layer of top bi-lstm # # decoder_outputs, dec_state, attns = \ # self.model.decoder(enc_state.input_feed, memory_bank, # enc_state , # memory_lengths=lengths) if len(src.size()) == 4: src = torch.squeeze(src, 2) enc_final, memory_bank, lengths = self.model.encoder(src, None) src_sizes = src.size() if hasattr(self.model.decoder, 'd_model'): scores = [] if self.model.decoder.decoder_type == 'vecdif_multi': if hasattr(batch.dataset.examples[0], "eos_np"): eos_np = torch.from_numpy( batch.dataset.examples[0].eos_np.astype(np.float32)) else: eos_np = None src_representation = assemble_src_representation(src) src_lengths = src.size() final_vectors = [] covered_target = torch.zeros((src_lengths[0], 512), dtype=torch.float) eos_angle = 0 for target_id in range(4): prev_prediction = torch.zeros(1, self.model.decoder.d_model) scores.append([]) for i in range(src_lengths[1]): decoder_outputs, score = self.model.decoder(i, src, memory_bank, prev_prediction, enc_final[0], covered_target,src_representation, target_id) prev_prediction = decoder_outputs covered_target += decoder_outputs.detach() scores[target_id].append(score.item()) if eos_np is not None: eos_angle = F.cosine_similarity(prev_prediction, eos_np, dim=1).item() if eos_angle>0.9 and target_id>0: print("JEST finish " + str(eos_angle)) break else: final_vectors.append(prev_prediction) break final_vectors.append(prev_prediction) return torch.cat(final_vectors), np.array(scores) else: src_representation = assemble_src_representation(src) prev_prediction = None for i in range(src_sizes[1]): decoder_outputs, score = \ self.model.decoder(i, src, memory_bank, prev_prediction, enc_final[0], source_vector=src_representation) #, source_vector=src_representation prev_prediction = decoder_outputs scores.append(score.item()) return prev_prediction, np.array(scores) else: enc_state = \ self.model.decoder.init_decoder_state(src, memory_bank, enc_final) decoder_outputs, score = \ self.model.decoder(src, memory_bank=memory_bank, state=enc_state,memory_lengths=None) else: # enc_state = \ # self.decoder.init_decoder_state(src, None, None) decoder_outputs, dec_state, attns = \ self.model.decoder(src, memory_lengths=None) # Generator forward. ret = self.model.generator.forward(decoder_outputs.squeeze(0)) return ret, []
def get_n_closest_vectors(vec, vector_table, n=10): dist = F.cosine_similarity(vector_table, vec.unsqueeze(dim=1).transpose(0, 1)) index_sorted = dist.argsort() return index_sorted[:n]
def run(pointcloud_path, out_dir, decoder_type='siren', resume=True, **kwargs): """ test_implicit_siren_noisy_wNormals """ device = torch.device('cuda:0') if not os.path.exists(out_dir): os.makedirs(out_dir) # data points, normals = np.split( read_ply(pointcloud_path).astype('float32'), (3,), axis=1) pmax, pmin = points.max(axis=0), points.min(axis=0) scale = (pmax - pmin).max() pcenter = (pmax + pmin) /2 points = (points - pcenter) / scale * 1.5 scale_mat = scale_mat_inv = np.identity(4) scale_mat[[0,1,2], [0,1,2]] = 1/scale * 1.5 scale_mat[[0,1,2], [3,3,3]] = - pcenter / scale * 1.5 scale_mat_inv = np.linalg.inv(scale_mat) normals = normals @ np.linalg.inv(scale_mat[:3, :3].T) object_bounding_sphere = np.linalg.norm(points, axis=1).max() pcl = trimesh.Trimesh( vertices=points, vertex_normals=normals, process=False) pcl.export(os.path.join(out_dir, "input_pcl.ply"), vertex_normal=True) assert(np.abs(points).max() < 1) dataset = torch.utils.data.TensorDataset( torch.from_numpy(points), torch.from_numpy(normals)) data_loader = torch.utils.data.DataLoader( dataset, batch_size=6000, num_workers=1, shuffle=True, collate_fn=tolerating_collate, ) gt_surface_pts_all = torch.from_numpy(points).unsqueeze(0).float() gt_surface_normals_all = torch.from_numpy(normals).unsqueeze(0).float() gt_surface_normals_all = F.normalize(gt_surface_normals_all, dim=-1) if kwargs['use_off_normal_loss']: # subsample from pointset sub_idx = torch.randperm(gt_surface_normals_all.shape[1])[:20000] gt_surface_pts_sub = torch.index_select(gt_surface_pts_all, 1, sub_idx).to(device=device) gt_surface_normals_sub = torch.index_select(gt_surface_normals_all, 1, sub_idx).to(device=device) from DSS.core.cloud import denoise_normals gt_surface_normals_sub = denoise_normals(gt_surface_pts_sub, gt_surface_normals_sub, neighborhood_size=30) if decoder_type == 'siren': decoder_params = { 'dim': 3, "out_dims": {'sdf': 1}, "c_dim": 0, "hidden_size": 256, 'n_layers': 3, "first_omega_0": 30, "hidden_omega_0": 30, "outermost_linear": True, } decoder = Siren(**decoder_params) # pretrained_model_file = os.path.join('data', 'trained_model', 'siren_l{}_c{}_o{}.pt'.format( # decoder_params['n_layers'], decoder_params['hidden_size'], decoder_params['first_omega_0'])) # loaded_state_dict = torch.load(pretrained_model_file) # decoder.load_state_dict(loaded_state_dict) elif decoder_type == 'sdf': decoder_params = { 'dim': 3, "out_dims": {'sdf': 1}, "c_dim": 0, "hidden_size": 512, 'n_layers': 8, 'bias': 1.0, } decoder = SDF(**decoder_params) else: raise ValueError print(decoder) decoder = decoder.to(device) # training total_iter = 30000 optimizer = torch.optim.Adam(decoder.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [10000, 20000], gamma=0.5) shape = Shape(gt_surface_pts_all.cuda(), n_points=gt_surface_pts_all.shape[1]//8, normals=gt_surface_normals_all.cuda()) # initialize siren with sphere_initialization checkpoint_io = CheckpointIO(out_dir, model=decoder, optimizer=optimizer) load_dict = dict() if resume: models_avail = [f for f in os.listdir(out_dir) if f[-3:] == '.pt'] if len(models_avail) > 0: models_avail.sort() load_dict = checkpoint_io.load(models_avail[-1]) it = load_dict.get('it', 0) if it > 0: try: iso_point_files = [f for f in os.listdir(out_dir) if f[-7:] == 'iso.ply'] iso_point_iters = [int(os.path.basename(f[:-len('_iso.ply')])) for f in iso_point_files] iso_point_iters = np.array(iso_point_iters) idx = np.argmax(iso_point_iters[(iso_point_iters - it)<=0]) iso_point_file = np.array(iso_point_files)[(iso_point_iters - it)<=0][idx] iso_points = torch.from_numpy(read_ply(os.path.join(out_dir, iso_point_file))[...,:3]) shape.points = iso_points.to(device=shape.points.device).view(1, -1 ,3) print('Loaded iso-points from %s' % iso_point_file) except Exception as e: pass # loss eikonal_loss = NormalLengthLoss(reduction='mean') # start training # save_ply(os.path.join(out_dir, 'in_iso_points.ply'), (to_homogen(shape.points).cpu().detach().numpy() @ scale_mat_inv.T)[...,:3].reshape(-1,3)) save_ply(os.path.join(out_dir, 'in_iso_points.ply'), shape.points.cpu().view(-1,3)) # autograd.set_detect_anomaly(True) iso_points = shape.points iso_points_normal = None while True: if (it > total_iter): checkpoint_io.save('model_{:04d}.pt'.format(it), it=it) mesh = get_surface_high_res_mesh( lambda x: decoder(x).sdf.squeeze(), resolution=512) mesh.apply_transform(scale_mat_inv) mesh.export(os.path.join(out_dir, "final.ply")) break for batch in data_loader: gt_surface_pts, gt_surface_normals = batch gt_surface_pts.unsqueeze_(0) gt_surface_normals.unsqueeze_(0) gt_surface_pts = gt_surface_pts.to(device=device).detach() gt_surface_normals = gt_surface_normals.to( device=device).detach() optimizer.zero_grad() decoder.train() loss = defaultdict(float) lambda_surface_sdf = 1e3 lambda_surface_normal = 1e2 if kwargs['warm_up'] >= 0 and it >= kwargs['warm_up']: lambda_surface_sdf = kwargs['lambda_surface_sdf'] lambda_surface_normal = kwargs['lambda_surface_normal'] # debug if (it - kwargs['warm_up']) % 1000 == 0: # generate iso surface with torch.autograd.no_grad(): box_size = (object_bounding_sphere * 2 + 0.2, ) * 3 imgs = plot_cuts(lambda x: decoder(x).sdf.squeeze().detach(), box_size=box_size, max_n_eval_pts=10000, thres=0.0, imgs_per_cut=1, save_path=os.path.join(out_dir, '%010d_iso.html' % it)) mesh = get_surface_high_res_mesh( lambda x: decoder(x).sdf.squeeze(), resolution=200) mesh.apply_transform(scale_mat_inv) mesh.export(os.path.join(out_dir, '%010d_mesh.ply' % it)) if it % 2000 == 0: checkpoint_io.save('model.pt', it=it) pred_surface_grad = gradient(gt_surface_pts.clone(), lambda x: decoder(x).sdf) # every once in a while update shape and points # sample points in space and on the shape # use iso points to weigh data points loss weights = 1.0 if kwargs['warm_up'] >= 0 and it >= kwargs['warm_up']: if it == kwargs['warm_up'] or kwargs['resample_every'] > 0 and (it - kwargs['warm_up']) % kwargs['resample_every'] == 0: # if shape.points.shape[1]/iso_points.shape[1] < 1.0: # idx = fps(iso_points.view(-1,3), torch.zeros(iso_points.shape[1], dtype=torch.long, device=iso_points.device), shape.points.shape[1]/iso_points.shape[1]) # iso_points = iso_points.view(-1,3)[idx].view(1,-1,3) iso_points = shape.get_iso_points(iso_points+0.1*(torch.rand_like(iso_points)-0.5), decoder, ear=kwargs['ear'], outlier_tolerance=kwargs['outlier_tolerance']) # iso_points = shape.get_iso_points(shape.points, decoder, ear=kwargs['ear'], outlier_tolerance=kwargs['outlier_tolerance']) iso_points_normal = estimate_pointcloud_normals(iso_points.view(1,-1,3), 8, False) if kwargs['denoise_normal']: iso_points_normal = denoise_normals(iso_points, iso_points_normal, num_points=None) iso_points_normal = iso_points_normal.view_as(iso_points) elif iso_points_normal is None: iso_points_normal = estimate_pointcloud_normals(iso_points.view(1,-1,3), 8, False) # iso_points = resample_uniformly(iso_points.view(1,-1,3)) # TODO: use gradient from network or neighborhood? iso_points_g = gradient(iso_points.clone(), lambda x: decoder(x).sdf) if it == kwargs['warm_up'] or kwargs['resample_every'] > 0 and (it - kwargs['warm_up']) % kwargs['resample_every'] == 0: # save_ply(os.path.join(out_dir, '%010d_iso.ply' % it), (to_homogen(iso_points).cpu().detach().numpy() @ scale_mat_inv.T)[...,:3].reshape(-1,3), normals=iso_points_g.view(-1,3).detach().cpu()) save_ply(os.path.join(out_dir, '%010d_iso.ply' % it), iso_points.cpu().detach().view(-1,3), normals=iso_points_g.view(-1,3).detach().cpu()) if kwargs['weight_mode'] == 1: weights = get_iso_bilateral_weights(gt_surface_pts, gt_surface_normals, iso_points, iso_points_g).detach() elif kwargs['weight_mode'] == 2: weights = get_laplacian_weights(gt_surface_pts, gt_surface_normals, iso_points, iso_points_g).detach() elif kwargs['weight_mode'] == 3: weights = get_heat_kernel_weights(gt_surface_pts, gt_surface_normals, iso_points, iso_points_g).detach() if (it - kwargs['warm_up']) % 1000 == 0 and kwargs['weight_mode'] != -1: print("min {:.4g}, max {:.4g}, std {:.4g}, mean {:.4g}".format(weights.min(), weights.max(), weights.std(), weights.mean())) colors = scaler_to_color(1-weights.view(-1).cpu().numpy(), cmap='Reds') save_ply(os.path.join(out_dir, '%010d_batch_weight.ply' % it), (to_homogen(gt_surface_pts).cpu().detach().numpy() @ scale_mat_inv.T)[...,:3].reshape(-1,3), colors=colors) sample_idx = torch.randperm(iso_points.shape[1])[:min(10000, iso_points.shape[1])] iso_points_sampled = iso_points.detach()[:, sample_idx, :] # iso_points_sampled = iso_points.detach() iso_points_sdf = decoder(iso_points_sampled.detach()).sdf loss_iso_points_sdf = iso_points_sdf.abs().mean()* kwargs['lambda_iso_sdf'] * iso_points_sdf.nelement() / (iso_points_sdf.nelement()+8000) loss['loss_sdf_iso'] = loss_iso_points_sdf.detach() loss['loss'] += loss_iso_points_sdf # TODO: predict iso_normals from local_frame iso_normals_sampled = iso_points_normal.detach()[:, sample_idx, :] iso_g_sampled = iso_points_g[:, sample_idx, :] loss_normals = torch.mean((1 - F.cosine_similarity(iso_normals_sampled, iso_g_sampled, dim=-1).abs())) * kwargs['lambda_iso_normal'] * iso_points_sdf.nelement() / (iso_points_sdf.nelement()+8000) # loss_normals = torch.mean((1 - F.cosine_similarity(iso_points_normal, iso_points_g, dim=-1).abs())) * kwargs['lambda_iso_normal'] loss['loss_normal_iso'] = loss_normals.detach() loss['loss'] += loss_normals idx = torch.randperm(gt_surface_pts.shape[1]).to(device=gt_surface_pts.device)[:(gt_surface_pts.shape[1]//2)] tmp = torch.index_select(gt_surface_pts, 1, idx) space_pts = torch.cat( [torch.rand_like(tmp) * 2 - 1, torch.randn_like(tmp, device=tmp.device, dtype=tmp.dtype) * 0.1+tmp], dim=1) space_pts.requires_grad_(True) pred_space_sdf = decoder(space_pts).sdf pred_space_grad = torch.autograd.grad(pred_space_sdf, [space_pts], [ torch.ones_like(pred_space_sdf)], create_graph=True)[0] # 1. eikonal term loss_eikonal = (eikonal_loss(pred_surface_grad) + eikonal_loss(pred_space_grad)) * kwargs['lambda_eikonal'] loss['loss_eikonal'] = loss_eikonal.detach() loss['loss'] += loss_eikonal # 2. SDF loss # loss on iso points pred_surface_sdf = decoder(gt_surface_pts).sdf loss_sdf = torch.mean(weights * pred_surface_sdf.abs()) * lambda_surface_sdf if kwargs['warm_up'] >= 0 and it >= kwargs['warm_up'] and kwargs['lambda_iso_sdf'] != 0: # loss_sdf = 0.5 * loss_sdf loss_sdf = loss_sdf * pred_surface_sdf.nelement() / (pred_surface_sdf.nelement() + iso_points_sdf.nelement()) if kwargs['use_sal_loss'] and iso_points is not None: dists, idxs, _ = knn_points(space_pts.view(1,-1,3), iso_points.view(1,-1,3).detach(), K=1) dists = dists.view_as(pred_space_sdf) idxs = idxs.view_as(pred_space_sdf) loss_inter = ((eps_sqrt(dists).sqrt() - pred_space_sdf.abs())**2).mean() * kwargs['lambda_inter_sal'] else: alpha = (it / total_iter + 1)*100 loss_inter = torch.exp(-alpha * pred_space_sdf.abs()).mean() * kwargs['lambda_inter_sdf'] loss_sald = torch.tensor(0.0).cuda() if kwargs['use_off_normal_loss'] and it < 1000: dists, idxs, _ = knn_points(space_pts.view(1,-1,3), gt_surface_pts_sub.view(1,-1,3).cuda(), K=1) knn_normal = knn_gather(gt_surface_normals_sub.cuda().view(1,-1,3), idxs).view(1,-1,3) direction_correctness = -F.cosine_similarity(knn_normal, pred_space_grad, dim=-1) direction_correctness[direction_correctness < 0] = 0 loss_sald = torch.mean(direction_correctness*torch.exp(-2*dists)) * 2 # 3. normal direction loss_normals = torch.mean( weights * (1 - F.cosine_similarity(gt_surface_normals, pred_surface_grad, dim=-1))) * lambda_surface_normal if kwargs['warm_up'] >= 0 and it >= kwargs['warm_up'] and kwargs['lambda_iso_normal'] != 0: # loss_normals = 0.5 * loss_normals loss_normals = loss_normals * gt_surface_normals.nelement() / (gt_surface_normals.nelement() + iso_normals_sampled.nelement()) loss['loss_sdf'] = loss_sdf.detach() loss['loss_inter'] = loss_inter.detach() loss['loss_normals'] = loss_normals.detach() loss['loss_sald'] = loss_sald loss['loss'] += loss_sdf loss['loss'] += loss_inter loss['loss'] += loss_sald loss['loss'] += loss_normals loss['loss'].backward() torch.nn.utils.clip_grad_norm_(decoder.parameters(), max_norm=1.) optimizer.step() scheduler.step() if it % 20 == 0: print("iter {:05d} {}".format(it, ', '.join( ['{}: {}'.format(k, v.item()) for k, v in loss.items()]))) it += 1
def compute(self, vec1, vec2, **kwargs): """ F.cosine_similarity """ return 1 - F.cosine_similarity(vec1, vec2)
def postprocesses_trajectories(policy, sample_batch, other_agent_batches=None, episode=None): """ Postprocesses individual trajectories. Inputs are numpy arrays with shape [Time, Feature Dims...] or [Time] if there is only one feature. Note that inputs are not batched. Computes advantages. """ # -------------------------------- # Compute FuN manager intrinsic rewards # -------------------------------- horizon = policy.config['fun_horizon'] seq_len = sample_batch[SampleBatch.REWARDS].shape[0] manager_latent_state = torch.Tensor(sample_batch['manager_latent_state']) manager_goal = torch.Tensor(sample_batch['manager_goal']) fun_intrinsic_reward = np.zeros_like(sample_batch[SampleBatch.REWARDS]) for i in range(seq_len): reward = 0.0 for j in range(1, horizon + 1): if i - j >= 0: manager_latent_state_current = manager_latent_state[i] manager_latent_state_prev = manager_latent_state[i - j] manager_latent_state_diff = manager_latent_state_current - manager_latent_state_prev manager_goal_prev = manager_goal[i - j] reward = reward + F.cosine_similarity( manager_latent_state_diff, manager_goal_prev, dim=0) fun_intrinsic_reward[i] = reward / horizon sample_batch['fun_intrinsic_reward'] = fun_intrinsic_reward # -------------------------------- # Compute ICM exploration intrinsic rewards # -------------------------------- _ = policy.model.icm_forward( torch.Tensor(sample_batch[SampleBatch.OBS]), torch.Tensor(sample_batch[SampleBatch.NEXT_OBS])) exploration_rewards = 0.005 * policy.model.icm_fwd_forward( torch.Tensor(sample_batch[SampleBatch.ACTIONS])) exploration_rewards = exploration_rewards.numpy() sample_batch['exploration_rewards'] = exploration_rewards # -------------------------------- # Estimate last reward if trajectory was truncated # -------------------------------- completed = sample_batch[SampleBatch.DONES][-1] if completed: manager_last_r = 0.0 worker_last_r = 0.0 else: # Trajectory has been truncated, estimate final reward using the # value function from the terminal observation and # internal recurrent state if any next_state = [] for i in range(policy.num_state_tensors()): next_state.append(sample_batch['state_out_{}'.format(i)][-1]) manager_last_r, worker_last_r = policy._value( sample_batch[SampleBatch.NEXT_OBS][-1], sample_batch[SampleBatch.ACTIONS][-1], sample_batch[SampleBatch.REWARDS][-1], *next_state) manager_last_r = manager_last_r[0] worker_last_r = worker_last_r[0] # -------------------------------- # Add ICM exploration intrinsic reward to manager # and compute manager advantages / value targets # -------------------------------- original_rewards = sample_batch[SampleBatch.REWARDS] sample_batch[SampleBatch.REWARDS] += 0.9 * exploration_rewards # sample_batch[SampleBatch.REWARDS] = np.clip( # sample_batch[SampleBatch.REWARDS], -1, 1) # Compute advantages and value targets for the manager sample_batch[SampleBatch.VF_PREDS] = sample_batch['manager_values'] sample_batch = compute_advantages(sample_batch, manager_last_r, policy.config['gamma'], policy.config['lambda'], policy.config['use_gae'], policy.config['use_critic']) sample_batch['manager_advantages'] = sample_batch[ Postprocessing.ADVANTAGES] sample_batch['manager_value_targets'] = sample_batch[ Postprocessing.VALUE_TARGETS] # -------------------------------- # Add FuN manager and ICM exploration intrinsic rewards to worker # and compute worker advantages / value targets # -------------------------------- sample_batch[SampleBatch.REWARDS] = original_rewards sample_batch[SampleBatch.REWARDS] += 0.9 * fun_intrinsic_reward sample_batch[SampleBatch.REWARDS] += 0.1 * exploration_rewards # sample_batch[SampleBatch.REWARDS] = np.clip( # sample_batch[SampleBatch.REWARDS], -1, 1) # Compute advantages and value targets for the worker sample_batch[SampleBatch.VF_PREDS] = sample_batch['worker_values'] sample_batch = compute_advantages(sample_batch, worker_last_r, policy.config['gamma'], policy.config['lambda'], policy.config['use_gae'], policy.config['use_critic']) sample_batch['worker_advantages'] = sample_batch[Postprocessing.ADVANTAGES] sample_batch['worker_value_targets'] = sample_batch[ Postprocessing.VALUE_TARGETS] # WARNING: These values are only used temporarily. Do not use: # sample_batch[SampleBatch.REWARDS] # sample_batch[SampleBatch.VF_PREDS] # sample_batch[Postprocessing.ADVANTAGES] # sample_batch[Postprocessing.VALUE_TARGETS] return sample_batch
def actor_critic_loss(policy, model, dist_class, train_batch): assert policy.is_recurrent(), "policy must be recurrent" seq_lens = train_batch['seq_lens'] batch_size = seq_lens.shape[0] max_seq_len = torch.max(seq_lens) mask_orig = sequence_mask(seq_lens, max_seq_len) mask = torch.reshape(mask_orig, [-1]) horizon = policy.config['fun_horizon'] manager_horizon_mask = mask_orig.clone() manager_horizon_mask[:, -horizon:] = False manager_horizon_mask = manager_horizon_mask.reshape(-1) _ = model.icm_forward(train_batch[SampleBatch.OBS], train_batch[SampleBatch.NEXT_OBS]) icm_fwd_loss = model.icm_fwd_forward(train_batch[SampleBatch.ACTIONS]) icm_inv_loss = model.icm_inv_forward(train_batch[SampleBatch.ACTIONS]) icm_loss = 0.995 * icm_fwd_loss + 0.005 * icm_inv_loss icm_loss = torch.sum(icm_loss * mask) icm_loss /= batch_size * max_seq_len policy.icm_loss = icm_loss # Hacky way of passing data from sample batch to train batch model.random_select = train_batch['random_select'].reshape( (batch_size, -1)) model.random_goal = train_batch['random_goal'].reshape( (batch_size, max_seq_len, -1)) logits, _ = model.from_batch(train_batch) manager_values, worker_values = model.value_function() manager_latent_state, manager_goal = model.manager_features() manager_latent_state_future = torch.roll(manager_latent_state, -horizon, 1) manager_latent_state_diff = (manager_latent_state_future - manager_latent_state).detach() policy.manager_loss = 10.0 * -torch.sum( train_batch['manager_advantages'] * F.cosine_similarity( manager_latent_state_diff, manager_goal, dim=-1).reshape(-1) * manager_horizon_mask) / (batch_size * max_seq_len) dist = dist_class(logits, model) log_probs = dist.logp(train_batch[SampleBatch.ACTIONS]) policy.entropy = 3e-4 * -torch.sum(dist.entropy() * mask) / (batch_size * max_seq_len) policy.pi_err = 0.1 * -torch.sum( train_batch['worker_advantages'] * log_probs.reshape(-1) * mask) / ( batch_size * max_seq_len) policy.manager_value_err = torch.sum( torch.pow( (manager_values.reshape(-1) - train_batch['manager_value_targets']) * mask, 2.0)) / (batch_size * max_seq_len) policy.worker_value_err = 0.01 * torch.sum( torch.pow( (worker_values.reshape(-1) - train_batch['worker_value_targets']) * mask, 2.0)) / (batch_size * max_seq_len) overall_err = sum([ policy.pi_err, policy.manager_value_err, policy.worker_value_err, policy.entropy, policy.manager_loss, policy.icm_loss, ]) return overall_err
def track(self, f2): x2 = self.pool(f2) z2 = self.projector(x2) s = F.cosine_similarity(self.z1, z2, dim=1) return s
def pearson_similarity(x1, x2, dim=-1, eps=1e-8): centered_x1 = x1 - x1.mean(dim=dim, keepdim=True) centered_x2 = x2 - x2.mean(dim=dim, keepdim=True) return F.cosine_similarity(centered_x1, centered_x2, dim=dim, eps=eps).unsqueeze(dim=-1)
def translate(self, src_path=None, src_data_iter=None, tgt_path=None, tgt_data_iter=None, src_dir=None, batch_size=None, attn_debug=False, translate_size=-1, force_target_split=False): """ Translate content of `src_data_iter` (if not None) or `src_path` and get gold scores if one of `tgt_data_iter` or `tgt_path` is set. Note: batch_size must not be None Note: one of ('src_path', 'src_data_iter') must not be None Args: src_path (str): filepath of source data src_data_iter (iterator): an interator generating source data e.g. it may be a list or an openned file tgt_path (str): filepath of target data tgt_data_iter (iterator): an interator generating target data src_dir (str): source directory path (used for Audio and Image datasets) batch_size (int): size of examples per mini-batch attn_debug (bool): enables the attention logging Returns: (`list`, `list`) * all_scores is a list of `batch_size` lists of `n_best` scores * all_predictions is a list of `batch_size` lists of `n_best` predictions """ assert src_data_iter is not None or src_path is not None if batch_size is None: raise ValueError("batch_size must be set") data = inputters. \ build_dataset(self.fields, self.data_type, src_path=src_path, src_data_iter=src_data_iter, tgt_path=tgt_path, tgt_data_iter=tgt_data_iter, src_dir=src_dir, sample_rate=self.sample_rate, window_size=self.window_size, window_stride=self.window_stride, window=self.window, use_filter_pred=self.use_filter_pred, image_channel_size=self.image_channel_size) if self.cuda: cur_device = "cuda" else: cur_device = "cpu" if not self.decoder_type.startswith("vecdif"): data_iter = inputters.OrderedIterator( dataset=data, device=cur_device, batch_size=batch_size, train=False, sort=False, sort_within_batch=True, shuffle=False) else: data_iter = inputters.USEIterator( dataset=data, batch_size=batch_size, device=cur_device, train=False, sort=False, sort_within_batch=True, repeat=False, shuffle=False, use_port=self.use_port, force_target_split=force_target_split) builder = onmt.translate.TranslationBuilder( data, self.fields, self.n_best, self.replace_unk, tgt_path) # Statistics counter = count(1) pred_score_total, pred_words_total = 0, 0 gold_score_total, gold_words_total = 0, 0 all_scores = [] all_predictions = [] all_vectors = [] i=0 penalty = torch.tensor([[0.0]], dtype=torch.float) il_nietrafionych = 0 for batch in data_iter: if not self.decoder_type.startswith("vecdif"): batch_data = self.translate_batch(batch, data, fast=self.fast, i=i) translations = builder.from_batch(batch_data) #print("translations in batch "+str(len(translations))) for trans in translations: local_scores = [] for si in range(self.n_best): if i < len(trans.pred_scores): local_scores.append(trans.pred_scores[i].item()) all_scores += local_scores #[trans.pred_scores[:self.n_best]] pred_score_total += trans.pred_scores[0] pred_words_total += len(trans.pred_sents[0]) if tgt_path is not None: gold_score_total += trans.gold_score gold_words_total += len(trans.gold_sent) + 1 n_best_preds = [" ".join(pred) for pred in trans.pred_sents[:self.n_best]] all_predictions += [n_best_preds] self.out_file.write('\n'.join(n_best_preds) + '\n') self.out_file.flush() if self.verbose: sent_number = next(counter) output = trans.log(sent_number) if self.logger: self.logger.info(output) else: os.write(1, output.encode('utf-8')) # Debug attention. if attn_debug: preds = trans.pred_sents[0] preds.append('</s>') attns = trans.attns[0].tolist() if self.data_type == 'text': srcs = trans.src_raw else: srcs = [str(item) for item in range(len(attns[0]))] header_format = "{:>10.10} " + "{:>10.7} " * len(srcs) row_format = "{:>10.10} " + "{:>10.7f} " * len(srcs) output = header_format.format("", *srcs) + '\n' for word, row in zip(preds, attns): max_index = row.index(max(row)) row_format = row_format.replace( "{:>10.7f} ", "{:*>10.7f} ", max_index + 1) row_format = row_format.replace( "{:*>10.7f} ", "{:>10.7f} ", max_index) output += row_format.format(word, *row) + '\n' row_format = "{:>10.10} " + "{:>10.7f} " * len(srcs) os.write(1, output.encode('utf-8')) if translate_size>0 and len(all_predictions)> translate_size: break else: batch_data, scores = self.translate_vector(batch, data) p = batch_data t = batch.tgt if self.report_score: t_size = t.size() if len(t_size)==3: pred_adapted = p[:t_size[len(t_size) - 2]].unsqueeze(0) t_adapted = t[:,:p.size()[0]] else: pred_adapted = p t_adapted = t v1 = F.cosine_similarity(pred_adapted, t_adapted, dim= len(t.size())-1 ) for over_gen in range(abs(p.size()[0] - t.size()[len(t_size) - 2])): v1 = torch.cat((v1, penalty ), 1) il_nietrafionych += 1 v2 = torch.acos(v1) print("angle " + str(math.degrees(v2.item()))+" radians "+str(v2.item()) ) #+ " dist from 0 mse "+str(dist_from_zero.item()) all_scores.append(math.degrees(v2.item())) all_predictions.append([p.detach().numpy().tolist()]) all_vectors.append(p.detach().numpy()) else: ret = p.detach().numpy().tolist() all_scores.append(scores.tolist()) all_predictions.append(ret) # [batch_data.item()] i+=1 if self.report_score: iv = 0 degrs = [] for v in all_vectors: if iv==0: iv+=1 continue v1 = all_vectors[iv-1] v1 = F.cosine_similarity(torch.from_numpy(v), torch.from_numpy(v1), dim=(len(v1.shape)-1) ) v1 = torch.acos(v1) degr = math.degrees(v1) degrs.append(degr) iv +=1 if self.decoder_type.startswith("vecdif"): all_scores = np.array(all_scores) degrs = np.array(degrs) print("Average error " + str( np.mean(all_scores) )+" std dev= "+str(np.std(all_scores))+ " sum = "+str(np.sum(all_scores))+" len = "+str(all_scores.shape[0])) print("average angle between predictions "+ str( np.mean(degrs) ) ) msg = self._report_score('PRED', pred_score_total, pred_words_total) if self.logger: self.logger.info(msg) else: print(msg) if tgt_path is not None: msg = self._report_score('GOLD', gold_score_total, gold_words_total) if self.logger: self.logger.info(msg) else: print(msg) if self.report_bleu: msg = self._report_bleu(tgt_path) if self.logger: self.logger.info(msg) else: print(msg) if self.report_rouge: msg = self._report_rouge(tgt_path) if self.logger: self.logger.info(msg) else: print(msg) if self.dump_beam: import json json.dump(self.translator.beam_accum, codecs.open(self.dump_beam, 'w', 'utf-8')) return all_scores, all_predictions
def run_tfidf(n_gram, stop=False): print "*************************************" path = android_corpus_file fopen = gzip.open if path.endswith(".gz") else open lines = [] id_to_index = {} i = 0 with fopen(path) as corpus: print "Reading query corpus" for line in corpus: query_id, title, body = line.split("\t") lines.append(title.strip() + " " + body.strip()) id_to_index[query_id] = i i += 1 if stop: vectorizer = TfidfVectorizer(ngram_range=(1, n_gram), stop_words='english') else: vectorizer = TfidfVectorizer() vectors = vectorizer.fit_transform(lines).toarray() # print np.array(lines).shape # (42970, ) # print np.array(vectors).shape # (42970, 36404) print "Creating meter" m = meter.AUCMeter() cos_sims = [] labels = [] with open(android_test_pos) as test_file: print "Reading test.pos" for line in test_file: qid, rid = line.strip().split(" ") q_vec = vectors[id_to_index[qid]] r_vec = vectors[id_to_index[rid]] q_emb = Variable(torch.FloatTensor(q_vec)) r_emb = Variable(torch.FloatTensor(r_vec)) cos_sim = F.cosine_similarity(q_emb, r_emb, dim=0, eps=1e-6) cos_sims.append(cos_sim.data[0]) labels.append(1) print "Adding positive output and target to meter" # print cos_sims m.add(torch.FloatTensor(cos_sims), torch.IntTensor(labels)) with open(android_test_neg) as test_file: cos_sims = [] labels = [] i = 0 print "Reading test.neg" for line in test_file: qid, rid = line.strip().split(" ") q_vec = vectors[id_to_index[qid]] r_vec = vectors[id_to_index[rid]] q_emb = Variable(torch.FloatTensor(q_vec)) r_emb = Variable(torch.FloatTensor(r_vec)) cos_sim = F.cosine_similarity(q_emb, r_emb, dim=0, eps=1e-6) cos_sims.append(cos_sim.data[0]) labels.append(0) i += 1 if i % 4000 == 0: print "index: ", i # print cos_sims m.add(torch.FloatTensor(cos_sims), torch.IntTensor(labels)) cos_sims = [] labels = [] print m.value(max_fpr=0.05) print "*************************************"
def get_knns(embeddings, document_id, k=10): x = embeddings[document_id] distances = 1 - F.cosine_similarity(x, embeddings, dim=-1) sort_keys = torch.argsort(distances) return sort_keys[1:k + 1], distances[sort_keys][1:k + 1]
def score(self, x, y): return F.cosine_similarity(x, y, dim=1)
def evaluate(model): questions = preprocessing.id_to_question eval_dev_data = [] eval_test_data = [] dev_ids = sorted(preprocessing.dev_set.keys()) test_ids = sorted(preprocessing.test_set.keys()) ## ## Dev Set ## for i in range(len(dev_ids)): print(str(i) + '/' + str(len(dev_ids))) qr = dev_ids[i] # the word matrices for each sequence in the entire batch batch_title_data = [] batch_body_data = [] # the ordered, true lengths of each sequence before padding batch_title_lengths = [] batch_body_lengths = [] # masks for title and body batch_title_mask = [] batch_body_mask = [] # question of interest q = questions[qr] pos_indices = preprocessing.dev_positive_indices[qr] batch_title_data.append(preprocessing.sentence_to_embeddings(q[0])) batch_title_lengths.append( 1.0 / min(preprocessing.MAX_SEQUENCE_LENGTH, len(q[0]))) batch_title_mask.append(qr_lstm.get_mask(q[0], qr_lstm.HIDDEN_SIZES[2])) batch_body_data.append(preprocessing.sentence_to_embeddings(q[1])) batch_body_lengths.append( 1.0 / min(preprocessing.MAX_SEQUENCE_LENGTH, len(q[1]))) batch_body_mask.append(qr_lstm.get_mask(q[1], qr_lstm.HIDDEN_SIZES[2])) # negative examples for c in preprocessing.dev_set[qr][1]: cand = questions[c] batch_title_data.append( preprocessing.sentence_to_embeddings(cand[0])) batch_title_lengths.append( 1.0 / min(preprocessing.MAX_SEQUENCE_LENGTH, len(cand[0]))) batch_title_mask.append( qr_lstm.get_mask(cand[0], qr_lstm.HIDDEN_SIZES[2])) batch_body_data.append( preprocessing.sentence_to_embeddings(cand[1])) batch_body_lengths.append( 1.0 / min(preprocessing.MAX_SEQUENCE_LENGTH, len(cand[1]))) batch_body_mask.append( qr_lstm.get_mask(cand[1], qr_lstm.HIDDEN_SIZES[2])) # convert batch data and lengths to Variables batch_title_data = preprocessing.to_float_variable(batch_title_data) batch_body_data = preprocessing.to_float_variable(batch_body_data) batch_title_lengths = preprocessing.to_float_variable( batch_title_lengths) batch_body_lengths = preprocessing.to_float_variable( batch_body_lengths) batch_title_mask = preprocessing.to_float_variable(batch_title_mask) batch_body_mask = preprocessing.to_float_variable(batch_body_mask) forward_start = time.time() title_states, title_out = model(batch_title_data) print("the title forward lstm took ", time.time() - forward_start) forward_start = time.time() body_states, body_out = model(batch_body_data) print("the body forward lstm took ", time.time() - forward_start) ############################################ ## Re-arrange Data For Cosine Calculation ## ############################################ title_states = title_states * batch_title_mask body_states = body_states * batch_body_mask # mean pooling of the hidden states of each question's title and body sequences title_states = torch.sum(title_states, dim=1, keepdim=False) averaged_title_states = title_states * batch_title_lengths.repeat( title_states.size(dim=1), 1).t() body_states = torch.sum(body_states, dim=1, keepdim=False) averaged_body_states = body_states * batch_body_lengths.repeat( body_states.size(dim=1), 1).t() # take the average between the title and body representations for the final representation final_question_reps = (averaged_title_states + averaged_body_states).div(2) ############################################### ## Calculate Cosines and Construct Eval Data ## ############################################### cosine_scores = F.cosine_similarity(final_question_reps[1:], final_question_reps[0], -1) scores = list(cosine_scores.data) sorted_eval = [ x for _, x in sorted(zip(scores, pos_indices), reverse=True) ] eval_dev_data.append(sorted_eval) ## ## Duplicate code but on test set ## for i in range(len(test_ids)): print(str(i) + '/' + str(len(test_ids))) qr = test_ids[i] # the word matrices for each sequence in the entire batch batch_title_data = [] batch_body_data = [] # the ordered, true lengths of each sequence before padding batch_title_lengths = [] batch_body_lengths = [] # masks for title and body batch_title_mask = [] batch_body_mask = [] # question of interest q = questions[qr] pos_indices = preprocessing.test_positive_indices[qr] batch_title_data.append(preprocessing.sentence_to_embeddings(q[0])) batch_title_lengths.append( 1.0 / min(preprocessing.MAX_SEQUENCE_LENGTH, len(q[0]))) batch_title_mask.append(qr_lstm.get_mask(q[0], qr_lstm.HIDDEN_SIZES[2])) batch_body_data.append(preprocessing.sentence_to_embeddings(q[1])) batch_body_lengths.append( 1.0 / min(preprocessing.MAX_SEQUENCE_LENGTH, len(q[1]))) batch_body_mask.append(qr_lstm.get_mask(q[1], qr_lstm.HIDDEN_SIZES[2])) # negative examples for c in preprocessing.test_set[qr][1]: cand = questions[c] batch_title_data.append( preprocessing.sentence_to_embeddings(cand[0])) batch_title_lengths.append( 1.0 / min(preprocessing.MAX_SEQUENCE_LENGTH, len(cand[0]))) batch_title_mask.append( qr_lstm.get_mask(cand[0], qr_lstm.HIDDEN_SIZES[2])) batch_body_data.append( preprocessing.sentence_to_embeddings(cand[1])) batch_body_lengths.append( 1.0 / min(preprocessing.MAX_SEQUENCE_LENGTH, len(cand[1]))) batch_body_mask.append( qr_lstm.get_mask(cand[1], qr_lstm.HIDDEN_SIZES[2])) # convert batch data and lengths to Variables batch_title_data = preprocessing.to_float_variable(batch_title_data) batch_body_data = preprocessing.to_float_variable(batch_body_data) batch_title_lengths = preprocessing.to_float_variable( batch_title_lengths) batch_body_lengths = preprocessing.to_float_variable( batch_body_lengths) batch_title_mask = preprocessing.to_float_variable(batch_title_mask) batch_body_mask = preprocessing.to_float_variable(batch_body_mask) forward_start = time.time() title_states, title_out = model(batch_title_data) print("the title forward lstm took ", time.time() - forward_start) forward_start = time.time() body_states, body_out = model(batch_body_data) print("the body forward lstm took ", time.time() - forward_start) ############################################ ## Re-arrange Data For Cosine Calculation ## ############################################ title_states = title_states * batch_title_mask body_states = body_states * batch_body_mask # mean pooling of the hidden states of each question's title and body sequences title_states = torch.sum(title_states, dim=1, keepdim=False) averaged_title_states = title_states * batch_title_lengths.repeat( title_states.size(dim=1), 1).t() body_states = torch.sum(body_states, dim=1, keepdim=False) averaged_body_states = body_states * batch_body_lengths.repeat( body_states.size(dim=1), 1).t() # take the average between the title and body representations for the final representation final_question_reps = (averaged_title_states + averaged_body_states).div(2) ############################################### ## Calculate Cosines and Construct Eval Data ## ############################################### cosine_scores = F.cosine_similarity(final_question_reps[1:], final_question_reps[0], -1) scores = list(cosine_scores.data) sorted_eval = [ x for _, x in sorted(zip(scores, pos_indices), reverse=True) ] eval_test_data.append(sorted_eval) evaluation_of_dev = Evaluation(eval_dev_data) evaluation_of_test = Evaluation(eval_test_data) print("\n") print("DEV") print("\n") print("MAP", evaluation_of_dev.MAP()) print("MRR", evaluation_of_dev.MRR()) print("P@1", evaluation_of_dev.Precision(1)) print("P@5", evaluation_of_dev.Precision(5)) print("\n") print("TEST") print("\n") print("MAP", evaluation_of_test.MAP()) print("MRR", evaluation_of_test.MRR()) print("P@1", evaluation_of_test.Precision(1)) print("P@5", evaluation_of_test.Precision(5))
# Set Loader train_loader = torch.utils.data.DataLoader(train, batch_size=args.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(val, batch_size=500, shuffle=False) test_loader = torch.utils.data.DataLoader(test, batch_size=500, shuffle=False) print("Dataloaders generated {}".format( timeSince(start) ),file=Logger) # nicer euclidean similarity matrix at https://discuss.pytorch.org/t/build-your-own-loss-function-in-pytorch/235/7 # np.sqrt(sum((mat[valtestdex[i]]-mat[traindex[j]])**2)) # TODO: better euclidean knn implementation! valtestdex = np.concatenate([val.expn_dex,test.expn_dex]) traindex = train.expn_dex simmat = torch.zeros(7,49) # mat = pinned_lookup.weight mat = torch.Tensor(np.vstack([np.zeros((1,30)),np.load('mu.npy')])).cuda() for i in range(7): for j in range(49): simmat[i,j] = F.cosine_similarity(mat[valtestdex[i]],mat[traindex[j]],dim=0).item() k_weights, k_nearest = simmat.sort(descending=False) # args.num_k=1 k_weights, k_nearest = k_weights[:,:args.num_k], k_nearest[:,:args.num_k] k_weights = F.normalize(k_weights, p=1, dim=1) tensor1 = torch.zeros(7,49) tensor1.scatter_(1, k_nearest, k_weights) tensor2 = torch.zeros(57,49) tensor2[valtestdex,:] = tensor1 tensor2 = tensor2.cuda() # take 7,49 thing and make it bigger so easy to index into with geneexpr #criterion = torch.nn.MultiLabelSoftMarginLoss() # Loss function criterion = torch.nn.BCEWithLogitsLoss(size_average=False)
def calculate_loss(self, a, b): a = pitchyaw_to_vector(a) b = pitchyaw_to_vector(b) sim = F.cosine_similarity(a, b, dim=1, eps=1e-8) sim = F.hardtanh_(sim, min_val=-1 + 1e-8, max_val=1 - 1e-8) return torch.acos(sim) * self._to_degrees
def chamfer_distance( x, y, x_lengths=None, y_lengths=None, x_normals=None, y_normals=None, weights=None, batch_reduction: Union[str, None] = "mean", point_reduction: str = "mean", ): """ Chamfer distance between two pointclouds x and y. Args: x: FloatTensor of shape (N, P1, D) or a Pointclouds object representing a batch of point clouds with at most P1 points in each batch element, batch size N and feature dimension D. y: FloatTensor of shape (N, P2, D) or a Pointclouds object representing a batch of point clouds with at most P2 points in each batch element, batch size N and feature dimension D. x_lengths: Optional LongTensor of shape (N,) giving the number of points in each cloud in x. y_lengths: Optional LongTensor of shape (N,) giving the number of points in each cloud in x. x_normals: Optional FloatTensor of shape (N, P1, D). y_normals: Optional FloatTensor of shape (N, P2, D). weights: Optional FloatTensor of shape (N,) giving weights for batch elements for reduction operation. batch_reduction: Reduction operation to apply for the loss across the batch, can be one of ["mean", "sum"] or None. point_reduction: Reduction operation to apply for the loss across the points, can be one of ["mean", "sum"]. Returns: 2-element tuple containing - **loss**: Tensor giving the reduced distance between the pointclouds in x and the pointclouds in y. - **loss_normals**: Tensor giving the reduced cosine distance of normals between pointclouds in x and pointclouds in y. Returns None if x_normals and y_normals are None. """ _validate_chamfer_reduction_inputs(batch_reduction, point_reduction) x, x_lengths, x_normals = _handle_pointcloud_input(x, x_lengths, x_normals) y, y_lengths, y_normals = _handle_pointcloud_input(y, y_lengths, y_normals) return_normals = x_normals is not None and y_normals is not None N, P1, D = x.shape P2 = y.shape[1] # Check if inputs are heterogeneous and create a lengths mask. is_x_heterogeneous = ~(x_lengths == P1).all() is_y_heterogeneous = ~(y_lengths == P2).all() x_mask = (torch.arange(P1, device=x.device)[None] >= x_lengths[:, None] ) # shape [N, P1] y_mask = (torch.arange(P2, device=y.device)[None] >= y_lengths[:, None] ) # shape [N, P2] if y.shape[0] != N or y.shape[2] != D: raise ValueError("y does not have the correct shape.") if weights is not None: if weights.size(0) != N: raise ValueError("weights must be of shape (N,).") if not (weights >= 0).all(): raise ValueError("weights cannot be negative.") if weights.sum() == 0.0: weights = weights.view(N, 1) if batch_reduction in ["mean", "sum"]: return ( (x.sum((1, 2)) * weights).sum() * 0.0, (x.sum((1, 2)) * weights).sum() * 0.0, ) return ((x.sum((1, 2)) * weights) * 0.0, (x.sum( (1, 2)) * weights) * 0.0) cham_norm_x = x.new_zeros(()) cham_norm_y = x.new_zeros(()) x_nn = knn_points(x, y, lengths1=x_lengths, lengths2=y_lengths, K=1) y_nn = knn_points(y, x, lengths1=y_lengths, lengths2=x_lengths, K=1) cham_x = x_nn.dists[..., 0] # (N, P1) cham_y = y_nn.dists[..., 0] # (N, P2) if is_x_heterogeneous: cham_x[x_mask] = 0.0 if is_y_heterogeneous: cham_y[y_mask] = 0.0 if weights is not None: cham_x *= weights.view(N, 1) cham_y *= weights.view(N, 1) if return_normals: # Gather the normals using the indices and keep only value for k=0 x_normals_near = knn_gather(y_normals, x_nn.idx, y_lengths)[..., 0, :] y_normals_near = knn_gather(x_normals, y_nn.idx, x_lengths)[..., 0, :] cham_norm_x = 1 - torch.abs( F.cosine_similarity(x_normals, x_normals_near, dim=2, eps=1e-6)) cham_norm_y = 1 - torch.abs( F.cosine_similarity(y_normals, y_normals_near, dim=2, eps=1e-6)) if is_x_heterogeneous: cham_norm_x[x_mask] = 0.0 if is_y_heterogeneous: cham_norm_y[y_mask] = 0.0 if weights is not None: cham_norm_x *= weights.view(N, 1) cham_norm_y *= weights.view(N, 1) # Apply point reduction cham_x = cham_x.sum(1) # (N,) cham_y = cham_y.sum(1) # (N,) if return_normals: cham_norm_x = cham_norm_x.sum(1) # (N,) cham_norm_y = cham_norm_y.sum(1) # (N,) if point_reduction == "mean": cham_x /= x_lengths cham_y /= y_lengths if return_normals: cham_norm_x /= x_lengths cham_norm_y /= y_lengths if batch_reduction is not None: # batch_reduction == "sum" cham_x = cham_x.sum() cham_y = cham_y.sum() if return_normals: cham_norm_x = cham_norm_x.sum() cham_norm_y = cham_norm_y.sum() if batch_reduction == "mean": div = weights.sum() if weights is not None else N cham_x /= div cham_y /= div if return_normals: cham_norm_x /= div cham_norm_y /= div cham_dist = cham_x + cham_y cham_normals = cham_norm_x + cham_norm_y if return_normals else None return cham_dist, cham_normals
def _similarity(self, k, β): k = k.view(self.batch_size, 1, -1) w = F.softmax( β * F.cosine_similarity(self.memory + 1e-16, k + 1e-16, dim=-1), dim=1) return w
def evaluateFromList(self, test_list, test_path, nDataLoaderThread, eval, print_interval=100, num_eval=10, **kwargs): self.__model__.eval(); lines = [] files = [] feats = {} tstart = time.time() ## Read all lines with open(test_list) as f: lines = f.readlines() ## Get a list of unique file names files = sum([x.strip().split()[-2:] for x in lines],[]) setfiles = list(set(files)) setfiles.sort() ## Define test data loader test_dataset = test_dataset_loader(setfiles, test_path, num_eval=num_eval, **kwargs) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=1, shuffle=False, num_workers=nDataLoaderThread, drop_last=False, ) ## Extract features for every image for idx, data in enumerate(test_loader): inp1 = data[0][0].cuda() ref_feat = self.__model__(inp1).detach().cpu() feats[data[1][0]] = ref_feat telapsed = time.time() - tstart if idx % print_interval == 0: sys.stdout.write("\rReading %d of %d: %.2f Hz, embedding size %d"%(idx,len(setfiles),idx/telapsed,ref_feat.size()[1])); print('') all_scores = []; all_labels = []; all_trials = []; tstart = time.time() ## Read files and compute all scores for idx, line in enumerate(lines): data = line.split(); ## Append random label if missing if len(data) == 2: data = [random.randint(0,1)] + data ref_feat = feats[data[1]].cuda() com_feat = feats[data[2]].cuda() if self.__model__.module.__L__.test_normalize: ref_feat = F.normalize(ref_feat, p=2, dim=1) com_feat = F.normalize(com_feat, p=2, dim=1) if eval == True: dist = F.cosine_similarity(ref_feat.unsqueeze(-1), com_feat.unsqueeze(-1)).detach().cpu().numpy(); score = numpy.mean(dist); else: dist = F.pairwise_distance(ref_feat.unsqueeze(-1), com_feat.unsqueeze(-1).transpose(0,2)).detach().cpu().numpy(); score = -1 * numpy.mean(dist); all_scores.append(score); all_labels.append(int(data[0])); all_trials.append(data[1]+" "+data[2]) if idx % print_interval == 0: telapsed = time.time() - tstart sys.stdout.write("\rComputing %d of %d: %.2f Hz"%(idx,len(lines),idx/telapsed)); sys.stdout.flush(); if eval == True: all_scores = preprocessing.MinMaxScaler().fit_transform(numpy.asarray(all_scores).reshape(-1, 1)) print('') return (all_scores, all_labels, all_trials);
def score(emb1, emb2): dist = F.cosine_similarity(emb1, emb2, dim=-1, eps=1e-08) return dist
def forward(self, context_1: torch.Tensor, mask_1: torch.Tensor, context_2: torch.Tensor, mask_2: torch.Tensor) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: # pylint: disable=arguments-differ """ Given the forward (or backward) representations of sentence1 and sentence2, apply four bilateral matching functions between them in one direction. Parameters ---------- context_1 : ``torch.Tensor`` Tensor of shape (batch_size, seq_len1, hidden_dim) representing the encoding of the first sentence. mask_1 : ``torch.Tensor`` Binary Tensor of shape (batch_size, seq_len1), indicating which positions in the first sentence are padding (0) and which are not (1). context_2 : ``torch.Tensor`` Tensor of shape (batch_size, seq_len2, hidden_dim) representing the encoding of the second sentence. mask_2 : ``torch.Tensor`` Binary Tensor of shape (batch_size, seq_len2), indicating which positions in the second sentence are padding (0) and which are not (1). Returns ------- A tuple of matching vectors for the two sentences. Each of which is a list of matching vectors of shape (batch, seq_len, num_perspectives or 1) """ assert (not mask_2.requires_grad) and (not mask_1.requires_grad) assert context_1.size(-1) == context_2.size(-1) == self.hidden_dim # (batch,) len_1 = get_lengths_from_binary_sequence_mask(mask_1) len_2 = get_lengths_from_binary_sequence_mask(mask_2) # (batch, seq_len*) mask_1, mask_2 = mask_1.float(), mask_2.float() # explicitly set masked weights to zero # (batch_size, seq_len*, hidden_dim) context_1 = context_1 * mask_1.unsqueeze(-1) context_2 = context_2 * mask_2.unsqueeze(-1) # array to keep the matching vectors for the two sentences matching_vector_1: List[torch.Tensor] = [] matching_vector_2: List[torch.Tensor] = [] # Step 0. unweighted cosine # First calculate the cosine similarities between each forward # (or backward) contextual embedding and every forward (or backward) # contextual embedding of the other sentence. # (batch, seq_len1, seq_len2) cosine_sim = F.cosine_similarity(context_1.unsqueeze(-2), context_2.unsqueeze(-3), dim=3) # (batch, seq_len*, 1) cosine_max_1 = masked_max(cosine_sim, mask_2.unsqueeze(-2), dim=2, keepdim=True) cosine_mean_1 = masked_mean(cosine_sim, mask_2.unsqueeze(-2), dim=2, keepdim=True) cosine_max_2 = masked_max(cosine_sim.permute(0, 2, 1), mask_1.unsqueeze(-2), dim=2, keepdim=True) cosine_mean_2 = masked_mean(cosine_sim.permute(0, 2, 1), mask_1.unsqueeze(-2), dim=2, keepdim=True) matching_vector_1.extend([cosine_max_1, cosine_mean_1]) matching_vector_2.extend([cosine_max_2, cosine_mean_2]) # Step 1. Full-Matching # Each time step of forward (or backward) contextual embedding of one sentence # is compared with the last time step of the forward (or backward) # contextual embedding of the other sentence if self.with_full_match: # (batch, 1, hidden_dim) if self.is_forward: # (batch, 1, hidden_dim) last_position_1 = (len_1 - 1).clamp(min=0) last_position_1 = last_position_1.view(-1, 1, 1).expand(-1, 1, self.hidden_dim) last_position_2 = (len_2 - 1).clamp(min=0) last_position_2 = last_position_2.view(-1, 1, 1).expand(-1, 1, self.hidden_dim) context_1_last = context_1.gather(1, last_position_1) context_2_last = context_2.gather(1, last_position_2) else: context_1_last = context_1[:, 0:1, :] context_2_last = context_2[:, 0:1, :] # (batch, seq_len*, num_perspectives) matching_vector_1_full = multi_perspective_match(context_1, context_2_last, self.full_match_weights) matching_vector_2_full = multi_perspective_match(context_2, context_1_last, self.full_match_weights_reversed) matching_vector_1.extend(matching_vector_1_full) matching_vector_2.extend(matching_vector_2_full) # Step 2. Maxpooling-Matching # Each time step of forward (or backward) contextual embedding of one sentence # is compared with every time step of the forward (or backward) # contextual embedding of the other sentence, and only the max value of each # dimension is retained. if self.with_maxpool_match: # (batch, seq_len1, seq_len2, num_perspectives) matching_vector_max = multi_perspective_match_pairwise(context_1, context_2, self.maxpool_match_weights) # (batch, seq_len*, num_perspectives) matching_vector_1_max = masked_max(matching_vector_max, mask_2.unsqueeze(-2).unsqueeze(-1), dim=2) matching_vector_1_mean = masked_mean(matching_vector_max, mask_2.unsqueeze(-2).unsqueeze(-1), dim=2) matching_vector_2_max = masked_max(matching_vector_max.permute(0, 2, 1, 3), mask_1.unsqueeze(-2).unsqueeze(-1), dim=2) matching_vector_2_mean = masked_mean(matching_vector_max.permute(0, 2, 1, 3), mask_1.unsqueeze(-2).unsqueeze(-1), dim=2) matching_vector_1.extend([matching_vector_1_max, matching_vector_1_mean]) matching_vector_2.extend([matching_vector_2_max, matching_vector_2_mean]) # Step 3. Attentive-Matching # Each forward (or backward) similarity is taken as the weight # of the forward (or backward) contextual embedding, and calculate an # attentive vector for the sentence by weighted summing all its # contextual embeddings. # Finally match each forward (or backward) contextual embedding # with its corresponding attentive vector. # (batch, seq_len1, seq_len2, hidden_dim) att_2 = context_2.unsqueeze(-3) * cosine_sim.unsqueeze(-1) # (batch, seq_len1, seq_len2, hidden_dim) att_1 = context_1.unsqueeze(-2) * cosine_sim.unsqueeze(-1) if self.with_attentive_match: # (batch, seq_len*, hidden_dim) att_mean_2 = masked_softmax(att_2.sum(dim=2), mask_1.unsqueeze(-1)) att_mean_1 = masked_softmax(att_1.sum(dim=1), mask_2.unsqueeze(-1)) # (batch, seq_len*, num_perspectives) matching_vector_1_att_mean = multi_perspective_match(context_1, att_mean_2, self.attentive_match_weights) matching_vector_2_att_mean = multi_perspective_match(context_2, att_mean_1, self.attentive_match_weights_reversed) matching_vector_1.extend(matching_vector_1_att_mean) matching_vector_2.extend(matching_vector_2_att_mean) # Step 4. Max-Attentive-Matching # Pick the contextual embeddings with the highest cosine similarity as the attentive # vector, and match each forward (or backward) contextual embedding with its # corresponding attentive vector. if self.with_max_attentive_match: # (batch, seq_len*, hidden_dim) att_max_2 = masked_max(att_2, mask_2.unsqueeze(-2).unsqueeze(-1), dim=2) att_max_1 = masked_max(att_1.permute(0, 2, 1, 3), mask_1.unsqueeze(-2).unsqueeze(-1), dim=2) # (batch, seq_len*, num_perspectives) matching_vector_1_att_max = multi_perspective_match(context_1, att_max_2, self.max_attentive_match_weights) matching_vector_2_att_max = multi_perspective_match(context_2, att_max_1, self.max_attentive_match_weights_reversed) matching_vector_1.extend(matching_vector_1_att_max) matching_vector_2.extend(matching_vector_2_att_max) return matching_vector_1, matching_vector_2
def calculate_loss(self, flow, target_mask, layer, mask=None, use_bilinear_sampling=False): target_vgg = self.target_vgg[layer] source_vgg = self.source_vgg[layer] [b, c, h, w] = target_vgg.shape # maps = F.interpolate(maps, [h,w]).view(b,-1) flow = F.interpolate(flow, [h, w]) target_mask = F.interpolate(target_mask.float(), [h, w]) flow = flow.view(3, self.opt.batchSize, flow.size(1), h, w) target_mask = target_mask.view(3, self.opt.batchSize, target_mask.size(1), h, w) target_all = target_vgg.view(b, c, -1) #[b C N2] source_all = source_vgg.view(b, c, -1).transpose(1, 2) #[b N2 C] source_norm = source_all / (source_all.norm(dim=2, keepdim=True) + self.eps) target_norm = target_all / (target_all.norm(dim=1, keepdim=True) + self.eps) try: correction = torch.bmm(source_norm, target_norm) #[b N2 N2] except: print("An exception occurred") print(source_norm.shape) print(target_norm.shape) (correction_max, max_indices) = torch.max(correction, dim=1) # interple with bilinear sampling if use_bilinear_sampling: input_sample = self.bilinear_warp(source_vgg, flow).view(b, c, -1) else: input_sample = self.resample(source_vgg, flow[0]) * target_mask[0] input_sample += self.resample(source_vgg, flow[1]) * target_mask[1] input_sample += self.resample(source_vgg, flow[2]) * target_mask[2] input_sample = input_sample.view(b, c, -1) correction_sample = F.cosine_similarity(input_sample, target_all) #[b 1 N2] loss_map = torch.exp(-correction_sample / (correction_max + self.eps)) if mask is None: loss = torch.mean(loss_map) - torch.exp( torch.tensor(-1).type_as(loss_map)) else: mask = F.interpolate(mask, size=(target_vgg.size(2), target_vgg.size(3))) mask = mask.view(-1, target_vgg.size(2) * target_vgg.size(3)) loss_map = loss_map - torch.exp(torch.tensor(-1).type_as(loss_map)) loss = torch.sum(mask * loss_map) / (torch.sum(mask) + self.eps) # print(correction_sample[0,2076:2082]) # print(correction_max[0,2076:2082]) # coor_x = [32,32] # coor = max_indices[0,32+32*64] # coor_y = [int(coor%64), int(coor/64)] # source = F.interpolate(self.source, [64,64]) # target = F.interpolate(self.target, [64,64]) # source_i = source[0] # target_i = target[0] # source_i = source_i.view(3, -1) # source_i[:,coor]=-1 # source_i[0,coor]=1 # source_i = source_i.view(3,64,64) # target_i[:,32,32]=-1 # target_i[0,32,32]=1 # lists = str(int(torch.rand(1)*100)) # img_numpy = util.tensor2im(source_i.data) # util.save_image(img_numpy, 'source'+lists+'.png') # img_numpy = util.tensor2im(target_i.data) # util.save_image(img_numpy, 'target'+lists+'.png') return loss
def train(self): save_dict = {} self.feature.train() ep_bar = tqdm(range(self.opt.epoch)) for ep in ep_bar: batch_bar = tqdm(self.train_dataloader) for batch, data in enumerate(batch_bar): it = ep * len(self.train_dataloader) + batch ############################################################### # 0. Get the data ############################################################### if self.opt.hard_mining: bases, positives, negative_candidates, confidences = data bases, positives, negative_candidates, confidences = bases.cuda( ), positives.cuda(), negative_candidates.cuda( ), confidences.cuda() negatives = torch.zeros(bases.size()[0], 3, self.opt.npoints).cuda() for i, (negative_candidate, base) in enumerate(zip(negative_candidates, bases)): base = base.unsqueeze(0).repeat((10, 1, 1)) negative_embedding, _, _ = self.feature( negative_candidate) base_embedding, _, _ = self.feature(base) output = F.cosine_similarity(negative_embedding, base_embedding, dim=1) scores = output.detach().cpu().numpy() idx = np.argmax(scores) negatives[i] = negative_candidate[idx] else: bases, positives, negatives, confidences = data bases, positives, negatives, confidences = bases.cuda( ), positives.cuda(), negatives.cuda(), confidences.cuda() ############################################################### # 1. Get Features ############################################################### bases_features, _, _ = self.feature(bases) positives_features, _, _ = self.feature(positives) negatives_features, _, _ = self.feature(negatives) ############################################################### # 2. Compute triplet loss ############################################################### if self.opt.confidence: loss, _, _ = self.triplet_loss(bases_features, positives_features, negatives_features, confidences) else: loss, _, _ = self.triplet_loss(bases_features, positives_features, negatives_features, None) # Aggregate the loss logs save_dict['loss'] = loss.item() if it % self.opt.log_iter == 0: self.log('train', save_dict, ep, it, batch_bar) self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_bar.close() if ep % self.opt.save_iter == 0: self.save_model(ep) if ep % self.opt.val_iter == 0: self.val(ep, it, batch_bar) ep_bar.close()
def train_seq2seq_batch_newloss(data_batch, model, optimizer, pos_weight, device, beta=0.7): sigmoid = torch.nn.Sigmoid() document = data_batch['document'] label = data_batch['labels'] input_length = data_batch['input_length'] indicators = data_batch['indicators'] padded_lengths = data_batch['padded_lengths'] # summary_representation = data_batch['summary_representation'] sentence_lengths = data_batch['sentence_lengths'] # sentences_batch = data_batch['sentences'] # references_batch = data_batch['summary_text'] # rouge_matrix_batch =data_batch['rouge_matrix'] total_data = torch.sum(input_length) end = torch.clamp(torch.cumsum(padded_lengths, 1), 0, input_length[0]) begin = torch.cat((torch.zeros( (len(input_length), 1), dtype=torch.long), end[:, :-1]), 1) if torch.cuda.is_available(): document = document.to(device) label = label.to(device) input_length = input_length.to(device) indicators = indicators.to(device) end = end.to(device) begin = begin.to(device) # summary_representation = summary_representation.to(device) out = model(document, input_length, indicators, begin, end, device) output, _ = pad_packed_sequence(document) out1 = out.squeeze(-1) scores = sigmoid(out1) scores = scores.permute(1, 0) mask = label.gt(-1).float() # Option 1 loss_ce = F.binary_cross_entropy_with_logits(out, label, weight=mask, reduction='sum', pos_weight=pos_weight) o1 = output.permute(1, 0, 2).unsqueeze(3) o1.requires_grad = False # sim_mat = F.cosine_similarity(o1,o1.permute(0,3,2,1),dim=2) # try: # sim_mat = torch.stack([F.cosine_similarity(o1[i],o1[i].permute(2,1,0),dim=1) for i in range(o1.size()[0])],0) # except: o1 = o1.cpu() mask = (torch.eye(o1.shape[1], o1.shape[1]) == 1) sim_mat = torch.stack([ F.cosine_similarity(o1[i], o1[i].permute(2, 1, 0), dim=1).masked_fill_( mask, 0) for i in range(o1.size()[0]) ], 0) sim_mat = sim_mat.to(device) loss_redundancy = torch.sum( torch.bmm(torch.bmm(scores.unsqueeze(1), sim_mat), scores.unsqueeze(2))) loss = (1 - beta) * loss_ce + beta * loss_redundancy model.zero_grad() loss.backward() optimizer.step() l = loss.data del document, label, input_length, indicators, end, begin, loss, out, sim_mat, loss_redundancy torch.cuda.empty_cache() return l, total_data
def train( self, epoch, max_epoch, writer, print_freq=10, fixbase_epoch=0, open_layers=None ): losses_t = AverageMeter() losses_x = AverageMeter() accs = AverageMeter() accs_b = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() center_loss=CenterLoss(num_classes=751, feat_dim=4608) #center_loss_h=CenterLoss(num_classes=751, feat_dim=256) self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print( '* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch ) ) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) num_batches = len(self.train_loader) end = time.time() layer_nums=3 for batch_idx, data in enumerate(self.train_loader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() outputs, features,h, b,cls_score,b_classify= self.model(imgs) #print(len(logits_list)) #print(logits_list[0].shape) pids_g=self.parse_pids(pids,self.datamanager.num_train_pids) x=features pids_ap=pids_g.cuda() #AP_loss=aploss_criterion(b_classify,pids_ap) target_b = F.cosine_similarity(b[:pids_g.size(0) // 2], b[pids_g.size(0) // 2:]) target_x = F.cosine_similarity(x[:pids_g.size(0) // 2], x[pids_g.size(0) // 2:]) loss1 = F.mse_loss(target_b, target_x) loss2 = torch.mean(torch.abs(torch.pow(torch.abs(h) - Variable(torch.ones(h.size()).cuda()), 3))) loss_greedy = loss1 + 0.1 * loss2 loss_batchhard_hash=self.compute_hashbatchhard(b,pids) #print(features.shape) loss_t = self._compute_loss(self.criterion_t, features, pids)#+self._compute_loss(self.criterion_t,b,pids) loss_x = self._compute_loss(self.criterion_x, outputs, pids)+self._compute_loss(self.criterion_x, b_classify, pids)+self._compute_loss(self.criterion_x, cls_score, pids) centerloss=0#center_loss(features,pids)#+center_loss_h(h,pids) centerloss=centerloss*0.0005 #print(centerloss) loss =centerloss+self.weight_t * loss_t + self.weight_x * loss_x+loss_greedy+loss_batchhard_hash*2#+AP_loss # loss =centerloss + self.weight_x * loss_x+loss_greedy+loss_batchhard_hash*2#+AP_loss loss.backward() self.optimizer.step() batch_time.update(time.time() - end) #losses_t.update(loss_t.item(), pids.size(0)) losses_t.update(loss_t.item(), pids.size(0)) losses_x.update(loss_x.item(), pids.size(0)) accs.update(metrics.accuracy(outputs, pids)[0].item()) accs_b.update(metrics.accuracy(b_classify, pids)[0].item()) if (batch_idx+1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * ( num_batches - (batch_idx+1) + (max_epoch - (epoch+1)) * num_batches ) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print( 'Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss_t {loss_t.val:.4f} ({loss_t.avg:.4f})\t' 'Loss_x {loss_x.val:.4f} ({loss_x.avg:.4f})\t' 'Loss_g {loss_g:.4f} )\t' 'Loss_p {loss_p:.4f} )\t' 'Loss_cl {loss_cl:.4f} )\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Acc_b {acc_b.val:.2f} ({acc_b.avg:.2f})\t' 'Lr {lr:.6f}\t' 'eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, data_time=data_time, loss_t=losses_t, loss_x=losses_x, loss_g=loss_greedy, loss_p=loss_batchhard_hash, loss_cl=centerloss, #loss_ap=AP_loss, acc=accs, acc_b=accs_b, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str ) ) if writer is not None: n_iter = epoch*num_batches + batch_idx writer.add_scalar('Train/Time', batch_time.avg, n_iter) writer.add_scalar('Train/Data', data_time.avg, n_iter) writer.add_scalar('Train/Loss_t', losses_t.avg, n_iter) writer.add_scalar('Train/Loss_x', losses_x.avg, n_iter) writer.add_scalar('Train/Acc', accs.avg, n_iter) writer.add_scalar( 'Train/Lr', self.optimizer.param_groups[0]['lr'], n_iter ) end = time.time() if self.scheduler is not None: self.scheduler.step()
def cos_v1(a, b): for bb in b: yield F.cosine_similarity(a, bb)
def predict_redundancy_max(score_batch, ids, src_txt_list,hyp_path,ref_path, \ tgt_txt, word_length_limit, sent_length_limit,\ output,device,lamb,attn_weight = None): #score_batch = [batch,seq_len] correct_num = 0 summaryfile_batch = [] reffile_batch = [] selections = [] # dim = output.size()[-1] # dim=output[0].size()[-1] for i in range(len(src_txt_list)): summary = [] scores = score_batch[i, :len(src_txt_list[i])] sorted_linenum = [ x for _, x in sorted(zip(scores, list(range(len(src_txt_list[i])))), reverse=True) ] wc = 0 uc = 0 selected_ids = [] summary_representation = [] ###### sent representation all_sent = output[:scores.size()[0], i, :].unsqueeze(2) while len(selected_ids) <= len(src_txt_list[i]): j = sorted_linenum[0] summary.append(' '.join(src_txt_list[i][j])) selected_ids.append(j) ###### sent representation summary_representation.append(output[j, i, :]) s = torch.stack(summary_representation, 1).unsqueeze(0) redundancy_score = torch.max(F.cosine_similarity(all_sent, s, 1), 1)[0] # print(redundancy_score) # print(redundancy_score) scores[j] = -100 final_scores = lamb * scores - ((1 - lamb) * redundancy_score) sorted_linenum = [ x for _, x in sorted(zip(final_scores, list(range(len(src_txt_list[i])))), reverse=True) ] wc += len(src_txt_list[i][j]) uc += 1 if uc >= sent_length_limit: break if wc >= word_length_limit: break summary = '\n'.join(summary) selections.append(selected_ids) fname = hyp_path + ids[i] + '.txt' of = open(fname, 'w') of.write(summary) summaryfile_batch.append(fname) refname = ref_path + ids[i] + '.txt' of = open(refname, 'w') of.write(tgt_txt[i]) of.close() reffile_batch.append(refname) return summaryfile_batch, reffile_batch, selections