def main(args, _=None): """Run ``catalyst-contrib project-embeddings`` script.""" df = pd.read_csv(args.in_csv) os.makedirs(args.out_dir, exist_ok=True) if args.meta_cols is not None: meta_header = args.meta_cols.split(",") else: meta_header = None # raise ValueError("meta-cols must not be None") features = np.load(args.in_npy, mmap_mode="r") assert len(df) == len(features) if args.num_rows is not None: indices = np.random.choice(len(df), args.num_rows) features = features[indices, :] df = df.iloc[indices] if args.img_col is not None: img_data = _load_image_data(rootpath=args.img_rootpath, paths=df[args.img_col].values, img_size=args.img_size) else: img_data = None if meta_header is not None: metadata = df[meta_header].values.tolist() metadata = [[ str(text).replace("\n", " ").replace(r"\s", " ").replace(r"\s\s+", " ").strip() for text in texts ] for texts in metadata] assert len(metadata) == len(features) elif args.img_col is not None: def _image_name(s): splitted = s.rsplit("/", 1) return splitted[1] if len(splitted) else splitted[0] metadata = [_image_name(str(path)) for path in df[args.img_col].values] else: metadata = None summary_writer = SummaryWriter(args.out_dir) summary_writer.add_embedding( features, label_img=img_data, metadata=metadata, # metadata_header=( # meta_header # if meta_header is not None and len(meta_header) # else None # ), ) summary_writer.close() print(f"Done. Run `tensorboard --logdir={args.out_dir}` " + "to view in Tensorboard")
def normal_feature_visualization(): global feature_data_tsne, label_data, label_img writer = SummaryWriter( comment="Label & Inference Visualization", log_dir="normal_run") import pdb; pdb.set_trace() writer.add_embedding(feature_data_tsne, metadata=label_data, label_img=label_img, global_step=0)
def val_epoch(epoch, data_loader, model, criterion, opt, logger): print('validation at epoch {}'.format(epoch)) model.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accuracies = AverageMeter() end_time = time.time() write_embedding = True writer = None embedding_log = 20 with torch.no_grad(): for i, (inputs, targets) in enumerate(data_loader): data_time.update(time.time() - end_time) if not opt.no_cuda: targets = targets.cuda(async=True) outputs = model(inputs) loss = criterion(outputs, targets) acc = calculate_accuracy(outputs, targets) losses.update(loss.item(), inputs.size(0)) accuracies.update(acc, inputs.size(0)) batch_time.update(time.time() - end_time) end_time = time.time() print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i + 1, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) if write_embedding and epoch % embedding_log == 0: if writer is None: writer = SummaryWriter(comment='_embedding_val_'+str(i)) n_iter = (epoch * len(data_loader)) + i middle_frame = math.floor(inputs.data.shape[2] / 2) writer.add_embedding( outputs.data, metadata=targets.data, label_img=torch.squeeze( inputs.data[:, :, middle_frame, :, :], 2), global_step=n_iter) logger.log({'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg}) return epoch, losses.avg, accuracies.avg
def main(): """takes command line arguement meta-data file name followed by vectors coressponding to the meta-data, note the data should be saved in tab seperated format (delimiter= '\t') Example uses: >>python embedding.py meta_data.tsv vectors.tsv >>tensorboard logdir= ./ Go to the localhost:6060 or (based on output in terminal) in browser & select projector from drop down menu to visualize the embedding """ read_data = sys.argv print(read_data) # read metadata label = pd.read_csv(str(read_data[1]),sep='\t') # read vectors data = pd.read_csv(str(read_data[2]),sep='\t') # converting to numpy array label = np.array(label) data = np.array(data) # reshape the meta-data label= label.reshape(label.shape[0]) writer = SummaryWriter() writer.add_embedding(data,metadata=label) writer.export_scalars_to_json("./all_scalars.json") writer.close() print("Working")
def vis(ids, embeddings, images, args): print("visualizing embeddings...\n") writer = SummaryWriter( os.path.join(CONF.PATH.OUTPUT_EMBEDDING, "{}/tensorboard".format(args.path))) writer.add_embedding(embeddings, metadata=ids, label_img=images) writer.close()
def get_tsne_embeddings_last_three_tasks(self, dataset, model): from tensorboardX import SummaryWriter # Test final model on last 3 tasks: model.eval() tag = '_diff_{}'.format(self.args.diff) for t in [17, 18, 19]: all_images, all_labels, all_shared, all_private = [], [], [], [] writer = SummaryWriter() for itr, (data, target, tt, td) in enumerate(dataset[t]['tsne']): x = data.to(device=self.device) y = target.to(device=self.device, dtype=torch.long) tt = tt.to(device=self.device) output = model.forward(x, x, tt, t) shared_out, private_out = model.get_encoded_ftrs(x, x, t) # print (shared_out.size()) all_shared.append(shared_out) all_private.append(private_out) all_images.append(x) all_labels.append(y) writer.add_embedding(mat=torch.stack(all_shared, dim=1).data, label_img=torch.stack(all_images, dim=1).data, metadata=list(range(1, 6)), tag='Shared_{}_{}'.format(t, tag)) # ,metadata_header=list(range(1,6))) writer.add_embedding(mat=torch.stack(all_private, dim=1).data, label_img=torch.stack(all_images, dim=1).data, metadata=list(range(1, 6)), tag='Private_{}_{}'.format(t, tag)) # ,metadata_header=list(range(1,6))) writer.close()
def _save_embeddings(da_embs, dset): tile_type = "anchor" da_embs = da_embs.sel(tile_type=tile_type) # before we get the images we remove all the transforms so that we get the # original RGB image dset.transform = None label_img = [] img_first_tile = dset[0][0] nc, nx, ny = img_first_tile.shape ntiles = int(da_embs.tile_id.count()) label_img = torch.zeros((ntiles, nc, nx, ny)) for i, tile_id in enumerate(tqdm(da_embs.tile_id.values)): # for i, triplet in enumerate(tqdm(dset)): label_img[i] = dset[tile_id][0] writer = SummaryWriter() writer.add_embedding( da_embs.transpose("tile_id", "emb_dim").values, label_img=label_img, ) writer.close() print( """ embeddings saved for tensorboard to `runs/` now start tensorboard: $> tensorboard --logdir runs and open a browser to view the tensorboard embedding projector: http://localhost:6006/#projector """ )
def prepareDatasetAndLogging(args): # choose the dataset if args.dataset == 'mnist': DatasetClass = datasets.MNIST elif args.dataset == 'fashion_mnist': DatasetClass = datasets.FashionMNIST else: raise ValueError('unknown dataset: ' + args.dataset + ' try mnist or fashion_mnist') training_run_name = timeStamped(args.dataset + '_' + args.name) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} # Create the dataset, mnist or fasion_mnist dataset_dir = os.path.join(args.data_dir, args.dataset) training_run_dir = os.path.join(args.data_dir, training_run_name) train_dataset = DatasetClass( dataset_dir, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) test_dataset = DatasetClass( dataset_dir, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs) # Set up visualization and progress status update code callback_params = {'epochs': args.epochs, 'samples': len(train_loader) * args.batch_size, 'steps': len(train_loader), 'metrics': {'acc': np.array([]), 'loss': np.array([]), 'val_acc': np.array([]), 'val_loss': np.array([])}} if args.print_log: output_on_train_end = os.sys.stdout else: output_on_train_end = None callbacklist = callbacks.CallbackList( [callbacks.BaseLogger(), callbacks.TQDMCallback(), callbacks.CSVLogger(filename=training_run_dir + training_run_name + '.csv', output_on_train_end=output_on_train_end)]) callbacklist.set_params(callback_params) tensorboard_writer = SummaryWriter(log_dir=training_run_dir, comment=args.dataset + '_embedding_training') # show some image examples in tensorboard projector with inverted color images = 255 - test_dataset.test_data[:100].float() label = test_dataset.test_labels[:100] features = images.view(100, 784) tensorboard_writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1)) return tensorboard_writer, callbacklist, train_loader, test_loader
def main(args, _=None): df = pd.read_csv(args.in_csv) os.makedirs(args.out_dir, exist_ok=True) if args.meta_cols is not None: meta_header = args.meta_cols.split(",") else: raise ValueError("meta-cols must not be None") features = np.load(args.in_npy, mmap_mode="r") if args.n_rows is not None: df = df.sample(n=args.n_rows) if args.img_col is not None: image_names = [ path.join(args.img_datapath, name) for name in df[args.img_col].values ] img_data = np.stack( [load_image(name, args.img_size) for name in image_names], axis=0) img_data = (img_data.transpose( (0, 3, 1, 2)) / 255.0).astype(np.float32) img_data = torch.from_numpy(img_data) else: img_data = None summary_writer = SummaryWriter(args.out_dir) summary_writer.add_embedding(features, metadata=df[meta_header].astype(str).values, label_img=img_data, metadata_header=meta_header) print(f"Done. Run `tensorboard --logdir={args.out_dir}` " f"to view in Tensorboard")
def get_tsne_embeddings_first_ten_tasks(self, dataset, model): from tensorboardX import SummaryWriter model.eval() tag_ = '_diff_{}'.format(self.args.diff) all_images, all_shared, all_private = [], [], [] # Test final model on first 10 tasks: writer = SummaryWriter() for t in range(10): for itr, (data, _, tt, td) in enumerate(dataset[t]['tsne']): x = data.to(device=self.device) tt = tt.to(device=self.device) output = model.forward(x, x, tt, t) shared_out, private_out = model.get_encoded_ftrs(x, x, t) all_shared.append(shared_out) all_private.append(private_out) all_images.append(x) print(torch.stack(all_shared).size()) tag = ['Shared10_{}_{}'.format(tag_, i) for i in range(1, 11)] writer.add_embedding(mat=torch.stack(all_shared, dim=1).data, label_img=torch.stack(all_images, dim=1).data, metadata=list(range(1, 11)), tag=tag) #, metadata_header=list(range(1,11))) tag = ['Private10_{}_{}'.format(tag_, i) for i in range(1, 11)] writer.add_embedding(mat=torch.stack(all_private, dim=1).data, label_img=torch.stack(all_images, dim=1).data, metadata=list(range(1, 11)), tag=tag) #,metadata_header=list(range(1,11))) writer.close()
def test(args, model, data_loader, show_image_on_board=True, show_all_embedding=False): model.eval() writer = SummaryWriter() weights = [] images = [] labels = [] with torch.no_grad(): for i, (image, cat) in enumerate(data_loader): if i == 1000: break image = image.to(device) cat = cat.to(device) labels.append(idx2label[cat.item()]) images.append(image.squeeze(0).numpy()) if show_image_on_board: if show_all_embedding: embedded_vec = model.predict(x=image, category=cat) else: embedded_vec = model.predict(x=image, category=None) else: embedded_vec = model.predict(x=None, category=cat) weights.append(embedded_vec.squeeze(0).numpy()) weights = torch.FloatTensor(weights) images = torch.FloatTensor(images) if show_image_on_board: writer.add_embedding(weights, label_img=images) else: writer.add_embedding(weights, metadata=labels) print("done")
class LogHandler: def __init__(self, logdir, module_id): self.logdir = logdir self.log = SummaryWriter(self.logdir) self.module_id = module_id def scalar(self, key, val, step): ''' val can either be a scalar or a dictionary e.g. {'a': 3, 'b': 2} to plot e.g. the values of a and b onto the same graph ''' if isinstance(val, dict): self.log.add_scalars('{}_{}'.format(self.module_id, key), val, step) else: self.log.add_scalar('{}_{}'.format(self.module_id, key), val, step) def text(self, key, val, step): self.log.add_text('{}_{}'.format(self.module_id, key), val, step) def image(self, key, val, step): self.log.add_image('{}_{}'.format(self.module_id, key), val, step) def figure(self, key, val, step): self.log.add_figure('{}_{}'.format(self.module_id, key), val, step) def embedding(self, key, val, meta, step): self.log.add_embedding(val, tag=key, metadata=meta, global_step=step)
def visualize(args): saved_path = constant.EXP_ROOT model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) model.cuda() model.eval() model.load_state_dict( torch.load(saved_path + '/' + args.model_id + '_best.pt')["state_dict"]) label2id = constant.ANS2ID_DICT["open"] visualize = SummaryWriter("../visualize/" + args.model_id) # label_list = ["person", "leader", "president", "politician", "organization", "company", "athlete","adult", "male", "man", "television_program", "event"] label_list = list(label2id.keys()) ids = [label2id[_] for _ in label_list] if args.gcn: # connection_matrix = model.decoder.label_matrix + model.decoder.weight * model.decoder.affinity connection_matrix = model.decoder.label_matrix + model.decoder.weight * model.decoder.affinity label_vectors = model.decoder.transform( connection_matrix.mm(model.decoder.linear.weight) / connection_matrix.sum(1, keepdim=True)) else: label_vectors = model.decoder.linear.weight.data interested_vectors = torch.index_select( label_vectors, 0, torch.tensor(ids).to(torch.device("cuda"))) visualize.add_embedding(interested_vectors, metadata=label_list, label_img=None)
class Logger: def __init__(self, console_logger): self.console_logger = console_logger self.use_tb = False self.use_sacred = False self.use_hdf = False self.stats = defaultdict(lambda: []) def setup_tb(self, directory_name): # Import here so it doesn't have to be installed if you don't use it from tensorboard_logger import configure, log_value configure(directory_name) self.tb_logger = log_value self.use_tb = True from tensorboardX import SummaryWriter self.writer = SummaryWriter(directory_name + "-latent") def setup_sacred(self, sacred_run_dict): return self.sacred_info = sacred_run_dict.info self.use_sacred = True def log_stat(self, key, value, t, to_sacred=True): self.stats[key].append((t, value)) if self.use_tb: self.tb_logger(key, value, t) if self.use_sacred and to_sacred: if key in self.sacred_info: self.sacred_info["{}_T".format(key)].append(t) self.sacred_info[key].append(value) else: self.sacred_info["{}_T".format(key)] = [t] self.sacred_info[key] = [value] def log_vec(self, mat, metadata, global_step, tag): if self.use_tb: self.writer.add_embedding(mat, metadata, global_step=global_step, tag=tag) def print_recent_stats(self): log_str = "Recent Stats | t_env: {:>10} | Episode: {:>8}\n".format( *self.stats["episode"][-1]) i = 0 for (k, v) in sorted(self.stats.items()): if k == "episode": continue i += 1 window = 5 if k != "epsilon" else 1 item = "{:.4f}".format( np.mean([x[1] for x in self.stats[k][-window:]])) log_str += "{:<25}{:>8}".format(k + ":", item) log_str += "\n" if i % 4 == 0 else "\t" self.console_logger.info(log_str)
def test_embedding_square(self): w = SummaryWriter(comment='sq') all_features = torch.rand(228, 256) all_images = torch.rand(228, 3, 32, 32) for i in range(all_images.shape[0]): all_images[i] *= (float(i) + 60) / (all_images.shape[0] + 60) w.add_embedding(all_features, label_img=all_images, global_step=2)
def _main(args): """ Main routine of script to generate embeddings. Parameters ---------- args : argparse.Namespace contains all arguments parsed from input Returns ------- None """ with open(args.results_file, 'rb') as f: results = pickle.load(f) features, preds, labels, filenames = results if args.tensorboard: writer = SummaryWriter(log_dir=os.path.dirname(args.output_file)) with Pool(16) as pool: images = pool.map(_load_thumbnail, filenames) writer.add_embedding( torch.from_numpy(features), metadata=torch.from_numpy(labels), # label_img=torch.from_numpy(np.array(images)).unsqueeze(1)) label_img=None) return viz_img = _make_embedding(features=features, labels=labels, embedding=args.embedding, three_d=args.three_d) cv2.imwrite(args.output_file, viz_img) return
def compute_embeddings_lfw(args, dataset, model, batch_size, dump_embeddings=False, pdist=lambda x, y: 1. - F.cosine_similarity(x, y), flipped_embeddings=False): """Computes embeddings of all images from the LFW dataset using PyTorch""" val_loader = DataLoader(dataset, batch_size=batch_size, num_workers=4, shuffle=False) scores_with_gt = [] embeddings = [] ids = [] for batch_idx, data in enumerate(tqdm(val_loader, 'Computing embeddings')): images_1 = data['img1'] images_2 = data['img2'] is_same = data['is_same'] if torch.cuda.is_available() and args.devices[0] != -1: images_1 = images_1.cuda() images_2 = images_2.cuda() emb_1 = model(images_1) emb_2 = model(images_2) if flipped_embeddings: images_1_flipped = flip_tensor(images_1, 3) images_2_flipped = flip_tensor(images_2, 3) emb_1_flipped = model(images_1_flipped) emb_2_flipped = model(images_2_flipped) emb_1 = (emb_1 + emb_1_flipped) * .5 emb_2 = (emb_2 + emb_2_flipped) * .5 scores = pdist(emb_1, emb_2).data.cpu().numpy() for i, _ in enumerate(scores): scores_with_gt.append({ 'score': scores[i], 'is_same': is_same[i], 'idx': batch_idx * batch_size + i }) if dump_embeddings: id0 = data['id0'] id1 = data['id1'] ids.append(id0) ids.append(id1) to_dump_1 = emb_1.data.cpu() to_dump_2 = emb_2.data.cpu() embeddings.append(to_dump_1) embeddings.append(to_dump_2) if dump_embeddings: total_emb = np.concatenate(embeddings, axis=0) total_ids = np.concatenate(ids, axis=0) log_path = './logs/{:%Y_%m_%d_%H_%M}'.format(datetime.datetime.now()) writer = SummaryWriter(log_path) writer.add_embedding(torch.from_numpy(total_emb), total_ids) return scores_with_gt
def visualize_embeddings(v): """Visualizes loaded vectors from pretrained embeddings into tensorboard. Args: v: The torchtext.vocab.Vector object that contains weights of the embeddings. """ writer = SummaryWriter() writer.add_embedding(v.vectors, v.itos)
def test_embedding_fail(self): with self.assertRaises(AssertionError): w = SummaryWriter(comment='shouldfail') all_features = torch.rand(228, 256) all_images = torch.rand(228, 3, 16, 32) for i in range(all_images.shape[0]): all_images[i] *= (float(i) + 60) / (all_images.shape[0] + 60) w.add_embedding(all_features, label_img=all_images, global_step=2)
def load_then_visualize_embeddings(path): """Visualizes pretrained embeddings into tensorboard. Args: path: Path to the pretrained vector file. """ writer = SummaryWriter() v = vocab.Vectors(path) writer.add_embedding(v.vectors, v.itos)
def train(args): if args.multi_node: init_process(args) device = args.device text_loader = TextDataLoader(args.batch_size, args.multi_node, args.num_workers, args.data_dir, args.dataset, args.window_size, args.neg_sample_size, args.remove_th, args.subsample_th, args.embed_size, args.is_character, args.seed) eval_loader = EvalDataLoader(args.batch_size, args.num_workers, args.data_dir) if args.is_character: model = word_embed_ng(args.vocab_size, args.char_embed_size, args.hidden_size, args.num_layer, args.dropout, args.mlp_size, args.embed_size, args.neg_sample_size, args.bidirectional, args.device) else: model = SGNS(len(text_loader.dataset.vocabs), args.embed_size) if args.load_model is not None: model.load_state_dict( torch.load(args.log_dir + args.load_model, map_location=lambda storage, loc: storage)) if args.multi_gpu: print("Let's use", args.num_gpu, "GPUs!") model = nn.DataParallel(model, device_ids=[i for i in range(args.num_gpu)]) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) writer = SummaryWriter(args.log_dir) trainer = Trainer(args, model, optimizer, writer, text_loader) for epoch in range(args.epochs): epoch += 1 trainer.monitor_loss = 0 trainer.epoch = epoch start_time = time.time() piploss = evaluate(args, model, text_loader, eval_loader) loss = trainer.train_epoch() if not args.multi_node or (args.multi_node and distributed.get_rank() == 0): piploss = evaluate(args, model, text_loader, eval_loader) print( '====> Epoch: {} Average loss: {:.4f} / PIP loss: {:.4f} / Time: {:.4f}' .format(epoch, loss / len(text_loader.dataset), piploss, time.time() - start_time)) writer.add_scalar('Epoch time', time.time() - start_time, epoch) writer.add_scalar('PIP loss', piploss, epoch) writer.add_scalar('Train loss', loss / len(text_loader.dataset), epoch) if epoch % args.save_interval == 0: torch.save(model.state_dict(), os.path.join(args.log_dir, 'model.pt')) if not args.is_character: features = plot_embedding(args, model, text_loader) writer.add_embedding(features, metadata=text_loader.vocabs, global_step=epoch)
class Visualizer: def __init__(self, tb_path): self.tb_path = tb_path if os.path.exists(tb_path): # if prompt_yes_no('{} already exists. Proceed?'.format(tb_path)): os.system('rm -r {}'.format(tb_path)) # else: # exit(0) self.writer = SummaryWriter(tb_path) self.savedir = '/storage/armand/results/thesis/loAE' self.eval_every = 20 def add_scalar(self, scalar_dict, epoch, global_step=None): for tag, scalar in scalar_dict.items(): if isinstance(scalar, dict): self.writer.add_scalars(tag, scalar, epoch) elif isinstance(scalar, plt.figure.Figure): self.writer.add_figure(tag, scalar, epoch) elif tag == 'Embedding' or tag == 'Original-Domain': # labels = np.linspace(0, scalar.shape[0], scalar.shape[0]) # labels = np.expand_dims(np.arange(scalar.shape[0]), axis=1) # labels = np.expand_dims(labels, axis=1) # labels = torch.tensor(np.expand_dims(labels, axis=1)) self.writer.add_embedding(scalar, tag=tag, global_step=global_step) elif isinstance(scalar, list) or isinstance(scalar, np.ndarray): continue else: self.writer.add_scalar(tag, scalar, epoch) def add_images(self, image_dict, epoch, global_step=None, prefix=None): for tag, images in image_dict.items(): if prefix is not None: tag = '{}/{}'.format(prefix, tag) images = torch.clamp(images, -1, 1) images = vutils.make_grid(images, nrow=images.size(0), normalize=True, range=(-1, 1)) '''Save images of results''' if epoch % self.eval_every == 0 and epoch != 0: case = self.tb_path.split('/')[-2] resImageDir = os.path.join(self.savedir, 'figures', case) if not os.path.exists(resImageDir): os.makedirs(resImageDir) scipy.misc.imsave( os.path.join( resImageDir, prefix + '_step-' + str(global_step).zfill(5) + '_epoch-' + str(epoch).zfill(3) + '.png'), images[:, :130].permute(1, 2, 0)) self.writer.add_image(tag, images, global_step)
class TensorboardLogger(Callback): def __init__(self, model, md, log_name, metrics_names=[], path=None, histogram_freq=100): super().__init__() self.model = model self.md = md self.metrics_names = ["validation_loss"] self.metrics_names += metrics_names self.histogram_freq = histogram_freq path = path or os.path.join(md.path, "logs") self.log_dir = os.path.join(path, log_name) def on_train_begin(self): self.iteration = 0 self.epoch = 0 self.writer = SummaryWriter(log_dir=self.log_dir) def on_batch_begin(self): pass def on_phase_begin(self): pass def on_epoch_end(self, metrics): self.epoch += 1 for val, name in zip(metrics, self.metrics_names): self.writer.add_scalar(name, val, self.iteration) for name, emb in self.model.named_children(): if isinstance(emb, nn.Embedding): self.writer.add_embedding(list(emb.parameters())[0], global_step=self.iteration, tag=name) def on_phase_end(self): pass def on_batch_end(self, loss): self.iteration += 1 self.writer.add_scalar("loss", loss, self.iteration) if self.iteration % self.histogram_freq == 0: for name, param in self.model.named_parameters(): self.writer.add_histogram(name, param, self.iteration) def on_train_end(self): try: with warnings.catch_warnings(): warnings.simplefilter("ignore") dummy_input = tuple(next(iter(self.md.trn_dl))[:-1]) self.writer.add_graph(self.model, dummy_input) except Exception as e: print("Unable to create graph.") print(e) self.writer.close()
def visualize(self): logdir = "tensorboard/cbow" os.makedirs(logdir, exist_ok=True) mat = self.model.embedding.weight.data groups = [self.idx2word[i] for i in range(len(self.word2idx))] writer = SummaryWriter(logdir) writer.add_embedding(mat, metadata=groups) writer.close()
def normal_kmeans(dataloader1, dataloader2, N1, N2, k, key_name, DC=None): # batch_time = AverageMeter() # end = time.time() print("### Start Normal Kmeans:{}".format(key_name)) features1 = normal_feature(dataloader1, N1) features2 = normal_feature(dataloader2, N2) print("weak:", features1.shape) print("strong:", features2.shape) I, J, loss = new_run_kmeans(features1, features2, k, verbose=args.verbose) # Clustering.cluster(features, verbose=args.verbose) LOG_DIR = "./normal/{}".format(current_time) writer = SummaryWriter(log_dir=LOG_DIR) label_cnt = "/".join([LOG_DIR, "label_cnt.tsv"]) maximum = 0 for number in range(k): cnt = 0 for j in range(len(J)): if number == J[j]: cnt = cnt + 1 print("{}:{}".format(number, cnt)) if cnt > maximum: maximum = cnt spe_number = number # print("\nClustering:{}\n".format(key)) print("Specified_number:{}\n".format(spe_number)) # exit() LABEL_CNT = {} labels = I for i in labels: if i not in LABEL_CNT: LABEL_CNT[i] = 1 else: LABEL_CNT[i] += 1 LABEL_CNT = sorted(LABEL_CNT.items()) print(LABEL_CNT) with open(label_cnt, "a") as label_cnt_file: label_cnt_file.write("{}\n".format(key_name)) label_cnt_file.write("{}\n".format(LABEL_CNT)) label_cnt_file.write("{}\n".format(spe_number)) writer.add_embedding(features1, metadata=labels) writer.close() # DC.SED_Single(key_name, I, spe_number) # DC.create_csv_wav_file_Single(key_name) # DC.merge_single(key_name) # 指定したキーの区間抽出 DC.SED_Single(key_name, I, spe_number) # 区間抽出したデータを強ラベルデータ化 DC.create_csv_wav_file_Single(key_name, resume="normal")
def test_embedding(self): w = SummaryWriter() all_features = torch.Tensor([[1, 2, 3], [5, 4, 1], [3, 7, 7]]) all_labels = torch.Tensor([33, 44, 55]) all_images = torch.zeros(3, 3, 5, 5) w.add_embedding(all_features, metadata=all_labels, label_img=all_images, global_step=2) dataset_label = ['test'] * 2 + ['train'] * 2 all_labels = list(zip(all_labels, dataset_label)) w.add_embedding(all_features, metadata=all_labels, label_img=all_images, metadata_header=['digit', 'dataset'], global_step=2)
def main(target_file_path, wordlist_file_path, log_file_path): writer = SummaryWriter(log_file_path) word_list = [] with open(wordlist_file_path, 'r') as f: for lines in f: word_list.append(lines.strip()) embeddings = np.loadtxt(target_file_path) vocab_size, embbeing_len = np.shape(embeddings) embedding = T.nn.Embedding(vocab_size, embbeing_len) embedding.weight.data.copy_(T.from_numpy(embeddings)) embedding.weight.requires_grad = False writer.add_embedding(embedding.weight.data, word_list, global_step=1) writer.close()
class Logger: def __init__(self, ckpt_path, tsbd_path, global_step=0, best_metric_val=float('-inf')): if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) if not os.path.exists(tsbd_path): os.makedirs(tsbd_path) self.ckpt_path = ckpt_path self.writer = SummaryWriter(tsbd_path) self.global_step = global_step self.best_metric_val = best_metric_val def reset(self): self.global_step = 0 def step(self, step): self.global_step += step def add_scalar(self, name, val): self.writer.add_scalar(name, val, self.global_step) self.writer.file_writer.flush() def add_image(self, name, img): self.writer.add_image(name, img, self.global_step) def add_histogram(self, tag, values, bins=1000): self.writer.add_histogram(tag, values, self.global_step, bins) def add_embedding(self, tag, feats, labels): """Log a graph of embeddings of given features with labels""" self.writer.add_embedding(mat=feats, tag=tag, metadata=labels, global_step=self.global_step) def save_ckpt(self, state, cur_metric_val): path_latest = os.path.join(self.ckpt_path, 'checkpoint.pth') path_best = os.path.join(self.ckpt_path, 'best_model.pth') torch.save(state, path_latest) if cur_metric_val > self.best_metric_val: shutil.copyfile(path_latest, path_best) self.best_metric_val = cur_metric_val def save_ckpt_iter(self, state, iter): path_latest = os.path.join(self.ckpt_path, 'checkpoint_' + str(iter) + '.pth') torch.save(state, path_latest)
def tb_train2(): import torchvision.utils as vutils import torchvision.models as models from torchvision import datasets resnet18 = models.resnet18(False) writer = SummaryWriter() sample_rate = 44100 freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440] for n_iter in range(100): dummy_s1 = torch.rand(1) dummy_s2 = torch.rand(1) # data grouping by `slash` writer.add_scalar('data/scalar1', dummy_s1[0], n_iter) writer.add_scalar('data/scalar2', dummy_s2[0], n_iter) writer.add_scalars('data/scalar_group', {'xsinx': n_iter * np.sin(n_iter), 'xcosx': n_iter * np.cos(n_iter), 'arctanx': np.arctan(n_iter)}, n_iter) dummy_img = torch.rand(32, 3, 64, 64) # output from network if n_iter % 10 == 0: x = vutils.make_grid(dummy_img, normalize=True, scale_each=True) writer.add_image('Image', x, n_iter) dummy_audio = torch.zeros(sample_rate * 2) for i in range(x.size(0)): # amplitude of sound should in [-1, 1] dummy_audio[i] = np.cos(freqs[n_iter // 10] * np.pi * float(i) / float(sample_rate)) writer.add_audio('myAudio', dummy_audio, n_iter, sample_rate=sample_rate) writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter) for name, param in resnet18.named_parameters(): writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter) # needs tensorboard 0.4RC or later writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(100), n_iter) dataset = datasets.MNIST('mnist', train=False, download=True) images = dataset.test_data[:100].float() label = dataset.test_labels[:100] features = images.view(100, 784) writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1)) # export scalar data to JSON for external processing writer.export_scalars_to_json("./all_scalars.json") writer.close()
class TensorboardLogger(Callback): learn: Learner run_name: str histogram_freq: int = 100 path: str = None def __post_init__(self): self.path = self.path or os.path.join(self.learn.path, "logs") self.log_dir = os.path.join(self.path, self.run_name) def on_train_begin(self, **kwargs): self.writer = SummaryWriter(log_dir=self.log_dir) def on_epoch_end(self, **kwargs): iteration = kwargs["iteration"] metrics = kwargs["last_metrics"] metrics_names = ["valid_loss" ] + [o.__name__ for o in self.learn.metrics] for val, name in zip(metrics, metrics_names): self.writer.add_scalar(name, val, iteration) for name, emb in self.learn.model.named_children(): if isinstance(emb, nn.Embedding): self.writer.add_embedding(list(emb.parameters())[0], global_step=iteration, tag=name) def on_batch_end(self, **kwargs): iteration = kwargs["iteration"] loss = kwargs["last_loss"] self.writer.add_scalar("learning_rate", self.learn.opt.lr, iteration) self.writer.add_scalar("momentum", self.learn.opt.mom, iteration) self.writer.add_scalar("loss", loss, iteration) if iteration % self.histogram_freq == 0: for name, param in self.learn.model.named_parameters(): self.writer.add_histogram(name, param, iteration) def on_train_end(self, **kwargs): try: with warnings.catch_warnings(): warnings.simplefilter("ignore") dummy_input = next(iter(self.learn.data.train_dl))[0] self.writer.add_graph(self.learn.model, tuple(dummy_input)) except Exception as e: print("Unable to create graph.") print(e) self.writer.close()
class TensorBoardProjector(Callback): """The TensorBoardProjector callback is used to write images from the validation pass to Tensorboard using the TensorboardX library. """ def __init__(self, log_dir='./logs', comment='torchbearer', num_images=100, avg_pool_size=1, avg_data_channels=True, write_data=True, write_features=True, features_key=torchbearer.Y_PRED): """Construct a TensorBoardProjector callback which writes images to the given directory and, if required, associated features. :param log_dir: The tensorboard log path for output :type log_dir: str :param comment: Descriptive comment to append to path :type comment: str :param num_images: The number of images to write :type num_images: int :param avg_pool_size: Size of the average pool to perform on the image. This is recommended to reduce the overall image sizes and improve latency :type avg_pool_size: int :param avg_data_channels: If True, the image data will be averaged in the channel dimension :type avg_data_channels: bool :param write_data: If True, the raw data will be written as an embedding :type write_data: bool :param write_features: If True, the image features will be written as an embedding :type write_features: bool :param features_key: The key in state to use for the embedding. Typically model output but can be used to show features from any layer of the model. :type features_key: str """ self.log_dir = log_dir self.comment = comment self.num_images = num_images self.avg_pool_size = avg_pool_size self.avg_data_channels = avg_data_channels self.write_data = write_data self.write_features = write_features self.features_key = features_key self._writer = None self.done = False def on_start(self, state): log_dir = os.path.join(self.log_dir, state[torchbearer.MODEL].__class__.__name__ + '_' + self.comment) self._writer = SummaryWriter(log_dir=log_dir) def on_step_validation(self, state): if not self.done: x = state[torchbearer.X].data.clone() if len(x.size()) == 3: x = x.unsqueeze(1) x = F.avg_pool2d(x, self.avg_pool_size).data data = None if state[torchbearer.EPOCH] == 0 and self.write_data: if self.avg_data_channels: data = torch.mean(x, 1) else: data = x data = data.view(data.size(0), -1) feature = None if self.write_features: feature = state[self.features_key].data.clone() feature = feature.view(feature.size(0), -1) label = state[torchbearer.Y_TRUE].data.clone() if state[torchbearer.BATCH] == 0: remaining = self.num_images if self.num_images < label.size(0) else label.size(0) self._images = x[:remaining].to('cpu') self._labels = label[:remaining].to('cpu') if data is not None: self._data = data[:remaining].to('cpu') if feature is not None: self._features = feature[:remaining].to('cpu') else: remaining = self.num_images - self._labels.size(0) if remaining > label.size(0): remaining = label.size(0) self._images = torch.cat((self._images, x[:remaining].to('cpu')), dim=0) self._labels = torch.cat((self._labels, label[:remaining].to('cpu')), dim=0) if data is not None: self._data = torch.cat((self._data, data[:remaining].to('cpu')), dim=0) if feature is not None: self._features = torch.cat((self._features, feature[:remaining].to('cpu')), dim=0) if self._labels.size(0) >= self.num_images: if state[torchbearer.EPOCH] == 0 and self.write_data: self._writer.add_embedding(self._data, metadata=self._labels, label_img=self._images, tag='data', global_step=-1) if self.write_features: self._writer.add_embedding(self._features, metadata=self._labels, label_img=self._images, tag='features', global_step=state[torchbearer.EPOCH]) self.done = True def on_end_epoch(self, state): if self.write_features: self.done = False def on_end(self, state): self._writer.close()
def main(_run, _log, seed, dataset, filter_class_ids, input_image_size, patch_size, batch_size, num_epochs, loss_lambda): # Set the RNG seed for torch torch.manual_seed(seed) # Check input parameters are in expected format assert filter_class_ids is None or type(filter_class_ids) is list if type(filter_class_ids) is list: assert all(type(class_id) is int for class_id in filter_class_ids) else: _log.warning('Training on all classes!!') confirm = input('Continue? [y/n] ') if confirm.lower() != 'y': return None # Provision the `sacred` run directory for this experiment RUN_DIR = _run.observers[0].dir LOGS_DIR, IMAGES_DIR, PROTOTYPES_DIR = _provision_run_dir(RUN_DIR) # Initialize log writer for tensorboard writer = SummaryWriter(LOGS_DIR) # Load datasets for training and testing Dataset = DATASET_MAP[dataset] data_dir = DATA_DIR_MAP[dataset] train_dataset, train_dataset_with_non_random_transformation, \ test_dataset = Dataset.load_dataset_splits( data_dir, input_image_size, filter_class_ids) # Initialize the data loader train_dataloader = DataLoader( train_dataset, collate_fn=Dataset.custom_collate_fn, batch_size=batch_size, shuffle=True) # Define variables for attributes num_attributes = train_dataset.num_attributes all_attribute_labels = range(1, num_attributes + 1) attribute_names = [train_dataset.get_attribute(al).name for al in all_attribute_labels] # Initialize the model model = _make_cuda(SemanticAutoencoder( input_image_size, patch_size, num_attributes)) # Initialize the loss function and optimizer epoch_loss = None criterion = _make_cuda(CustomLoss2(lambda_val=loss_lambda)) optimizer = optim.Adam(ifilter(lambda p: p.requires_grad, model.parameters())) # Initiate training pbar, steps = tqdm(range(1, num_epochs + 1)), 0 for epoch in pbar: epoch_loss = 0. model.train() # Setting the model in training mode for training for image, label, attribute_labels, padding_idx in train_dataloader: steps += 1 # Incrementing the global step model.zero_grad() # Clearing the gradients for each mini-batch # Create the input variable and get the output from the model x = _make_cuda(torch.autograd.Variable(image)) z, z_patches, reconstructed_x = model(x) # Get the associated prototypes for each image in the batch prototype_labels = _make_cuda(attribute_labels) positive_prototypes = model.prototypes(prototype_labels) # Get the *non-associated* prototypes for each image in the batch negative_prototypes = list() for img_al in attribute_labels: negative_al = _make_cuda(torch.LongTensor(list(filter( lambda al: al not in img_al, all_attribute_labels)))) negative_prototypes.append(model.prototypes(negative_al)) # Compute the loss loss = criterion(reconstructed_x, z_patches, positive_prototypes, padding_idx, x, negative_prototypes=negative_prototypes) # Do backprop and update the weights loss.backward() optimizer.step() # Update the epoch loss and add the step loss to tensorboard epoch_loss += loss.item() writer.add_scalar('loss/step_loss', loss, steps) # Add the epoch loss to tensorboard and update the progressbar writer.add_scalar('loss/epoch_loss', epoch_loss, steps) pbar.set_postfix(epoch_loss=epoch_loss) model.eval() # Setting the model in evaluation mode for testing if (epoch % 5 == 0) or (epoch == num_epochs): # Compute the nearest patch for each prototype nearest_patches_for_prototypes = \ model.get_nearest_patches_for_prototypes( train_dataset_with_non_random_transformation) # Update each prototype to be equal to the nearest patch model.reproject_prototypes(nearest_patches_for_prototypes) if (epoch % 1000 == 0) or (epoch == num_epochs): # Save the prototype visualization save_prototype_patch_visualization( model, train_dataset_with_non_random_transformation, nearest_patches_for_prototypes, PROTOTYPES_DIR) # Save the reconstructed images for the test dataset # for every 1000 epochs for i_, (image, image_label, attribute_labels, _) \ in enumerate(test_dataset): x = image.view((1,) + image.size()) x = _make_cuda(torch.autograd.Variable(x)) z, z_patches, reconstructed_x = model(x) reconstructed_image = \ get_image_from_tensor(reconstructed_x) reconstructed_image.save( os.path.join(IMAGES_DIR, '%d-%d.png' % (epoch, i_))) # Save the intermediate model model.save_weights(os.path.join(RUN_DIR, MODEL_FILE_NAME)) # Add the prototype embeddings to tensorboard at the end if epoch == num_epochs: writer.add_embedding( model.prototypes.weight[1:], metadata=attribute_names, global_step=steps) # Save the final model and commit the tensorboard logs model.save_weights(os.path.join(RUN_DIR, MODEL_FILE_NAME)) writer.close() return epoch_loss