def __init__(self, pos_wt=0.0, qtn_wt=0.0, learn_flag=False): super().__init__() self.pos_loss = PairwiseDistance() self.qtn_loss = PairwiseDistance() self.learn_flag = learn_flag self.pos_wt = torch.nn.Parameter(torch.Tensor([pos_wt]), requires_grad=self.learn_flag) self.qtn_wt = torch.nn.Parameter(torch.Tensor([qtn_wt]), requires_grad=self.learn_flag)
def __init__(self, lambda_reconstruction=1): super(BaurLoss).__init__() self.lambda_reconstruction = lambda_reconstruction self.lambda_gdl = 0 self.l1_loss = lambda x, y: PairwiseDistance(p=1)(x.view( x.shape[0], -1), y.view(y.shape[0], -1)).sum() self.l2_loss = lambda x, y: PairwiseDistance(p=2)(x.view( x.shape[0], -1), y.view(y.shape[0], -1)).sum()
def create_classes_data_frame(dataset_name, distance="cosine", tsne_dimension=2): """Create a new classes dataframe for the specified dataset. The dataset must be registered in the project settings. The data frame is pickled before function return, to prevent re-calculating things. Args: dataset_name: the name of the dataset distance: which distance function to be used for nearest neighbor computation. Either 'cosine' or 'pairwise' (Default value = "cosine") tsne_dimension: the dimensions for the lower dimensional vector projections (Default value = 2) Returns: a pandas DataFrame with "class", "vector" (document embeddings) and "tsne" columns """ dataset_dir = DATA_SOURCES[dataset_name]["images"] paths = classes_set(dataset_dir) classes = pd.DataFrame(columns=["class", "vector", "tsne"]) classes["classes"] = sorted(list(paths)) tqdm.pandas(desc="Removing special characters.") classes["classes"] = classes["classes"].progress_apply(lambda cls: " ".join(re.split(r"[_\-]", cls))) tqdm.pandas(desc="Applying full clean.") classes["classes"] = classes["classes"].progress_apply(full_clean) tqdm.pandas(desc="Creating document vectors.") vectors = torch.tensor(np.vstack(classes["classes"].progress_apply(document_vector))) classes["vectors"] = vectors p_dist = PairwiseDistance(p=2) if distance == "pairwise" else CosineSimilarity() classes["distances"] = p_dist( # distance from every node to every node vectors.repeat_interleave(vectors.shape[0], 0), # each index repeated num_edges times vectors.repeat(vectors.shape[0], 1), # the index range repeated num_edges times ).reshape( vectors.shape[0], -1 ) # convert to 2D matrix with shape [vectors.shape[0], vectors.shape[0]] classes["tsne"] = torch.tensor(TSNE(n_components=tsne_dimension).fit_transform(vectors)) pickle.dump(classes, open(os.path.join(dataset_dir, "classes.pickle"), "wb")) return classes
def evaluate_model(settings_model: ModelSettings, settings_data: DataSettings): data_loader_validate: DataLoader = get_validation_data_loader( settings_model, settings_data) distance_l2: Module = PairwiseDistance(2).cuda() model: Module = ModelBuilder.build( settings_model.model_architecture, settings_model.embedding_dim, imagenet_pretrained=False, ) model = model.cuda() checkpoint = load_checkpoint(checkpoint_path=settings_data.checkpoint_path, model=model) model = checkpoint.model epoch_last = checkpoint.epoch model.eval() figure_name = f"roc_eval_{epoch_last}.png" figure_path: Path = settings_data.output_dir.joinpath(figure_name) metrics: EvaluationMetrics = evaluate(model, distance_l2, data_loader_validate, figure_path) pprint(dataclasses.asdict(metrics))
def ccdist(self, input1, input2): 'Calculate the pair-wise distance between two empirical samples' output = torch.empty(len(input1),len(input2)) pdist = PairwiseDistance(p=2) for i in range( len(input1) ): dup_input1 = input1[i].repeat(len(input2),1) output[i] = pdist(dup_input1, input2) return output
def nearest_center_face_point(self, point: torch.Tensor) -> torch.Tensor: """ Returns the closest face center of the box to the given point Args: point: tensor (3,) x,y,z coordinate: an arbitrary point in 3d space Returns: tensor (3,) x,y,z coordinate: the center face of the box closest to the function argument """ face_centers = self.get_face_centers() return face_centers[torch.argmin(PairwiseDistance().forward( face_centers, point.expand(6, 3)))]
def nearest_center_face_distance_from_point( self, point: torch.Tensor) -> torch.Tensor: """ returns the distance from the closest face center to the given point Args: point: tensor (3,) of a coordinate to check distance from this box's face centers Returns: tensor (1,) """ centers = self.get_face_centers() return torch.min(PairwiseDistance().forward(centers, point.expand(6, 3)))
def get_top_k(query_embedding, queried_embeddings, k, distance): """Returns the distances and indices of the k nearest embeddings in the `queried_embeddings` tensor to the `query_embedding` tensor. Args: query_embedding: tensor with the embedding of the query image. queried_embeddings: tensor with the stacked embeddings of the queried dataset. k: the number of most similar images to be returned. distance: which distance function to be used for nearest neighbor computation. Either 'cosine' or 'pairwise' Returns: the closest k embeddings in the `embeddings` tensor to the `query_embedding`. A 2-tuple with shape `[k]` tensor with their distances and indices are returned (respectively). """ p_dist = PairwiseDistance( p=2) if distance == "pairwise" else CosineSimilarity() distances = p_dist(queried_embeddings, query_embedding) return torch.topk(distances, k) # return the top k results
def __init__(self, name, model, data_set, optimizer, scheduler, criterion, plot, batch_size=64, max_epoch=50, log_interval=15): train, val, test = data_set self.batch_size = batch_size self.train_set = DataLoader(train, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True) self.test_set = DataLoader(test, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True) self.validate_set = DataLoader(val, batch_size=batch_size, shuffle=False, pin_memory=False, drop_last=True) self.distance = PairwiseDistance() self.optimizer = optimizer self.scheduler = scheduler self.criterion = criterion self.name = name self.model = model self.plot = plot self.max_epoch = max_epoch self.log_interval = log_interval self.best_accuracy = 0
def __init__(self, name, model, data_set, optimizer, scheduler, criterion, plot, batch_size=64, max_epoch=50, log_interval=15): train, val, test = data_set self.batch_size = batch_size self.train_set = tuple([ DataLoader(t, batch_size=batch_size, drop_last=True) for t in train ]) self.train_batches = len(self.train_set[0]) self.train_len = len(self.train_set[0].dataset) self.test_set = tuple([ DataLoader(t, batch_size=batch_size, drop_last=True) for t in test ]) self.test_batches = len(self.test_set[0]) self.validate_set = tuple([ DataLoader(v, batch_size=batch_size, drop_last=True) for v in val ]) self.validate_len = len(self.validate_set[0].dataset) self.distance = PairwiseDistance() self.optimizer = optimizer self.scheduler = scheduler self.criterion = criterion self.model = model self.plot = plot self.max_epoch = max_epoch self.log_interval = log_interval self.name = name self.best_accuracy = 0
noisy_train_data = [{ 'text': [x['text']], 'label': x['label'] } for x in original_train_data] for item in noisy_train_data_raw: idx = item['idx'] noisy_train_data[idx]['text'].append(item['text']) learning_rate_lst = [5e-8, 5e-7] batch_size_lst = [5] original_loss_tradeoff_lst = [0.75, 0.50, 1.0] # 1.0 means no stability loss standard_loss_fn = F.cross_entropy stability_loss_fn = PairwiseDistance(p=2) # L2 distance for stability loss for learning_rate in learning_rate_lst: for batch_size in batch_size_lst: for original_loss_tradeoff in original_loss_tradeoff_lst: is_this_model_trained = False model_prefix = 'NAT_{}_finetune_lr{}_bs{}_tradeoff{}'.format(checkpoint.split('/')[-1].replace('.pt', ''), \ learning_rate, batch_size, original_loss_tradeoff) done_model_lst = [x for x in os.listdir(best_model_save_path)] for done_model in done_model_lst: if (model_prefix in done_model_lst): is_this_model_trained = True if (is_this_model_trained): print('{} is already trained, continue...'.format( model_prefix))
import numpy as np import torch from torch.nn import PairwiseDistance cpu = torch.device("cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") l2_dist = PairwiseDistance(2) def rank1(embeddings_anc, embeddings_pos, clf): n = len(embeddings_anc) n_good = 0 A = conjagate_matrix(embeddings_anc, embeddings_pos, clf) for i, anc_base_dists in enumerate(A): j = np.argmin(anc_base_dists) if i == j: n_good += 1 # print(i,j) return n_good / n def roc_curve(embeddings_anc, embeddings_pos, clf): A = conjagate_matrix(embeddings_anc, embeddings_pos, clf) A = (A - A.min()) / (A.max() - A.min()) trshs = [] tprs = [0] fprs = [0] for th in np.sort(np.unique(A.ravel())): tpr, fpr = tpr_fpr(A, th)
def train( settings_model: ModelSettings, settings_data: DataSettings, settings_federated: FederatedSettings, ): output_dir: Path = settings_data.output_dir output_dir_logs = output_dir.joinpath("logs") output_dir_plots = output_dir.joinpath("plots") output_dir_checkpoints = output_dir.joinpath("checkpoints") output_dir_tensorboard = output_dir.joinpath("tensorboard") output_dir_logs.mkdir(exist_ok=True, parents=True) output_dir_plots.mkdir(exist_ok=True, parents=True) output_dir_checkpoints.mkdir(exist_ok=True, parents=True) model_architecture = settings_model.model_architecture start_epoch: int = 0 global_step: int = 0 data_loader_validate: DataLoader = get_validation_data_loader( settings_model, settings_data) model: Module = ModelBuilder.build( settings_model.model_architecture, settings_model.embedding_dim, settings_model.pretrained_on_imagenet, ) print("Using {} model architecture.".format(model_architecture)) # Load model to GPU or multiple GPUs if available if torch.cuda.is_available(): print("Using single-gpu training.") model.cuda() if settings_data.checkpoint_path: checkpoint = load_checkpoint(settings_data.checkpoint_path, model, None) model = checkpoint.model start_epoch = checkpoint.epoch global_step = checkpoint.global_step # Start Training loop face_local__meta_dataset = FaceMetaDataset( root_dir=settings_data.dataset_local_dir, csv_name=settings_data.dataset_local_csv_file, min_images_per_class=2, ) face_remote_meta_dataset = FaceMetaDataset( root_dir=settings_data.dataset_remote_dir, csv_name=settings_data.dataset_remote_csv_file, min_images_per_class=1, ) l2_distance = PairwiseDistance(2).cuda() tensorboard = Tensorboard(output_dir_tensorboard) federated_training( model=model, global_step=global_step, start_epoch=start_epoch, face_local_meta_dataset=face_local__meta_dataset, face_remote_meta_dataset=face_remote_meta_dataset, validate_dataloader=data_loader_validate, settings_federated=settings_federated, settings_model=settings_model, tensorboard=tensorboard, distance_fn=l2_distance, checkpoint_path=output_dir_checkpoints, )