Exemplo n.º 1
0
def main(args, _=None):
    """Run ``catalyst-contrib project-embeddings`` script."""
    df = pd.read_csv(args.in_csv)
    os.makedirs(args.out_dir, exist_ok=True)

    if args.meta_cols is not None:
        meta_header = args.meta_cols.split(",")
    else:
        meta_header = None
        # raise ValueError("meta-cols must not be None")

    features = np.load(args.in_npy, mmap_mode="r")
    assert len(df) == len(features)

    if args.num_rows is not None:
        indices = np.random.choice(len(df), args.num_rows)
        features = features[indices, :]
        df = df.iloc[indices]

    if args.img_col is not None:
        img_data = _load_image_data(rootpath=args.img_rootpath,
                                    paths=df[args.img_col].values,
                                    img_size=args.img_size)
    else:
        img_data = None

    if meta_header is not None:
        metadata = df[meta_header].values.tolist()
        metadata = [[
            str(text).replace("\n", " ").replace(r"\s",
                                                 " ").replace(r"\s\s+",
                                                              " ").strip()
            for text in texts
        ] for texts in metadata]
        assert len(metadata) == len(features)
    elif args.img_col is not None:

        def _image_name(s):
            splitted = s.rsplit("/", 1)
            return splitted[1] if len(splitted) else splitted[0]

        metadata = [_image_name(str(path)) for path in df[args.img_col].values]
    else:
        metadata = None

    summary_writer = SummaryWriter(args.out_dir)
    summary_writer.add_embedding(
        features,
        label_img=img_data,
        metadata=metadata,
        # metadata_header=(
        #     meta_header
        #     if meta_header is not None and len(meta_header)
        #     else None
        # ),
    )
    summary_writer.close()

    print(f"Done. Run `tensorboard --logdir={args.out_dir}` " +
          "to view in Tensorboard")
Exemplo n.º 2
0
 def normal_feature_visualization():
    global feature_data_tsne, label_data, label_img
    writer = SummaryWriter(
        comment="Label & Inference Visualization", log_dir="normal_run")
    import pdb; pdb.set_trace()
    writer.add_embedding(feature_data_tsne,
        metadata=label_data, label_img=label_img, global_step=0)
Exemplo n.º 3
0
def val_epoch(epoch, data_loader, model, criterion, opt, logger):
    print('validation at epoch {}'.format(epoch))

    model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()

    end_time = time.time()

    write_embedding = True
    writer = None
    embedding_log = 20

    with torch.no_grad():
        for i, (inputs, targets) in enumerate(data_loader):
            data_time.update(time.time() - end_time)

            if not opt.no_cuda:
                targets = targets.cuda(async=True)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            acc = calculate_accuracy(outputs, targets)

            losses.update(loss.item(), inputs.size(0))
            accuracies.update(acc, inputs.size(0))

            batch_time.update(time.time() - end_time)
            end_time = time.time()

            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                      epoch,
                      i + 1,
                      len(data_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      acc=accuracies))

            if write_embedding and epoch % embedding_log == 0:
                if writer is None:
                    writer = SummaryWriter(comment='_embedding_val_'+str(i))
                n_iter = (epoch * len(data_loader)) + i
                middle_frame = math.floor(inputs.data.shape[2] / 2)
                writer.add_embedding(
                    outputs.data,
                    metadata=targets.data,
                    label_img=torch.squeeze(
                        inputs.data[:, :, middle_frame, :, :], 2),
                    global_step=n_iter)

    logger.log({'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg})

    return epoch, losses.avg, accuracies.avg
Exemplo n.º 4
0
def main():
    """takes command line arguement meta-data file name followed 
    by vectors coressponding to the meta-data, note the data should be saved in tab
    seperated format (delimiter= '\t')

    Example uses:
    >>python embedding.py meta_data.tsv vectors.tsv
    >>tensorboard logdir= ./

    Go to the localhost:6060 or (based on  output in terminal) in browser & 
    select projector from drop down menu to visualize the embedding
    """
    
    read_data = sys.argv
    print(read_data)
    # read metadata
    label = pd.read_csv(str(read_data[1]),sep='\t')
    # read vectors
    data = pd.read_csv(str(read_data[2]),sep='\t')

    # converting to numpy array
    label = np.array(label) 
    data = np.array(data)

    # reshape the meta-data
    label= label.reshape(label.shape[0])


    writer = SummaryWriter()
    writer.add_embedding(data,metadata=label)
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
    print("Working")
Exemplo n.º 5
0
def vis(ids, embeddings, images, args):
    print("visualizing embeddings...\n")
    writer = SummaryWriter(
        os.path.join(CONF.PATH.OUTPUT_EMBEDDING,
                     "{}/tensorboard".format(args.path)))
    writer.add_embedding(embeddings, metadata=ids, label_img=images)
    writer.close()
Exemplo n.º 6
0
    def get_tsne_embeddings_last_three_tasks(self, dataset, model):
        from tensorboardX import SummaryWriter

        # Test final model on last 3 tasks:
        model.eval()
        tag = '_diff_{}'.format(self.args.diff)

        for t in [17, 18, 19]:
            all_images, all_labels, all_shared, all_private = [], [], [], []
            writer = SummaryWriter()
            for itr, (data, target, tt, td) in enumerate(dataset[t]['tsne']):
                x = data.to(device=self.device)
                y = target.to(device=self.device, dtype=torch.long)
                tt = tt.to(device=self.device)
                output = model.forward(x, x, tt, t)
                shared_out, private_out = model.get_encoded_ftrs(x, x, t)
                # print (shared_out.size())

                all_shared.append(shared_out)
                all_private.append(private_out)
                all_images.append(x)
                all_labels.append(y)

            writer.add_embedding(mat=torch.stack(all_shared, dim=1).data,
                                 label_img=torch.stack(all_images, dim=1).data,
                                 metadata=list(range(1, 6)),
                                 tag='Shared_{}_{}'.format(t, tag))
            # ,metadata_header=list(range(1,6)))
            writer.add_embedding(mat=torch.stack(all_private, dim=1).data,
                                 label_img=torch.stack(all_images, dim=1).data,
                                 metadata=list(range(1, 6)),
                                 tag='Private_{}_{}'.format(t, tag))
            # ,metadata_header=list(range(1,6)))

        writer.close()
Exemplo n.º 7
0
def _save_embeddings(da_embs, dset):
    tile_type = "anchor"
    da_embs = da_embs.sel(tile_type=tile_type)

    # before we get the images we remove all the transforms so that we get the
    # original RGB image
    dset.transform = None
    label_img = []

    img_first_tile = dset[0][0]
    nc, nx, ny = img_first_tile.shape
    ntiles = int(da_embs.tile_id.count())
    label_img = torch.zeros((ntiles, nc, nx, ny))

    for i, tile_id in enumerate(tqdm(da_embs.tile_id.values)):
        # for i, triplet in enumerate(tqdm(dset)):
        label_img[i] = dset[tile_id][0]

    writer = SummaryWriter()
    writer.add_embedding(
        da_embs.transpose("tile_id", "emb_dim").values,
        label_img=label_img,
    )
    writer.close()

    print(
        """
    embeddings saved for tensorboard to `runs/`
    now start tensorboard:
        $> tensorboard --logdir runs
    and open a browser to view the tensorboard embedding projector:
        http://localhost:6006/#projector
    """
    )
def prepareDatasetAndLogging(args):
    # choose the dataset
    if args.dataset == 'mnist':
        DatasetClass = datasets.MNIST
    elif args.dataset == 'fashion_mnist':
        DatasetClass = datasets.FashionMNIST
    else:
        raise ValueError('unknown dataset: ' + args.dataset + ' try mnist or fashion_mnist')

    training_run_name = timeStamped(args.dataset + '_' + args.name)

    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

    # Create the dataset, mnist or fasion_mnist
    dataset_dir = os.path.join(args.data_dir, args.dataset)
    training_run_dir = os.path.join(args.data_dir, training_run_name)
    train_dataset = DatasetClass(
        dataset_dir, train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ]))
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs)
    test_dataset = DatasetClass(
        dataset_dir, train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs)

    # Set up visualization and progress status update code
    callback_params = {'epochs': args.epochs,
                       'samples': len(train_loader) * args.batch_size,
                       'steps': len(train_loader),
                       'metrics': {'acc': np.array([]),
                                   'loss': np.array([]),
                                   'val_acc': np.array([]),
                                   'val_loss': np.array([])}}
    if args.print_log:
        output_on_train_end = os.sys.stdout
    else:
        output_on_train_end = None

    callbacklist = callbacks.CallbackList(
        [callbacks.BaseLogger(),
         callbacks.TQDMCallback(),
         callbacks.CSVLogger(filename=training_run_dir + training_run_name + '.csv',
                             output_on_train_end=output_on_train_end)])
    callbacklist.set_params(callback_params)

    tensorboard_writer = SummaryWriter(log_dir=training_run_dir, comment=args.dataset + '_embedding_training')

    # show some image examples in tensorboard projector with inverted color
    images = 255 - test_dataset.test_data[:100].float()
    label = test_dataset.test_labels[:100]
    features = images.view(100, 784)
    tensorboard_writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))
    return tensorboard_writer, callbacklist, train_loader, test_loader
Exemplo n.º 9
0
def main(args, _=None):
    df = pd.read_csv(args.in_csv)
    os.makedirs(args.out_dir, exist_ok=True)

    if args.meta_cols is not None:
        meta_header = args.meta_cols.split(",")
    else:
        raise ValueError("meta-cols must not be None")

    features = np.load(args.in_npy, mmap_mode="r")

    if args.n_rows is not None:
        df = df.sample(n=args.n_rows)

    if args.img_col is not None:
        image_names = [
            path.join(args.img_datapath, name)
            for name in df[args.img_col].values
        ]
        img_data = np.stack(
            [load_image(name, args.img_size) for name in image_names], axis=0)
        img_data = (img_data.transpose(
            (0, 3, 1, 2)) / 255.0).astype(np.float32)
        img_data = torch.from_numpy(img_data)
    else:
        img_data = None

    summary_writer = SummaryWriter(args.out_dir)
    summary_writer.add_embedding(features,
                                 metadata=df[meta_header].astype(str).values,
                                 label_img=img_data,
                                 metadata_header=meta_header)

    print(f"Done. Run `tensorboard --logdir={args.out_dir}` "
          f"to view in Tensorboard")
Exemplo n.º 10
0
    def get_tsne_embeddings_first_ten_tasks(self, dataset, model):
        from tensorboardX import SummaryWriter

        model.eval()

        tag_ = '_diff_{}'.format(self.args.diff)
        all_images, all_shared, all_private = [], [], []

        # Test final model on first 10 tasks:
        writer = SummaryWriter()
        for t in range(10):
            for itr, (data, _, tt, td) in enumerate(dataset[t]['tsne']):
                x = data.to(device=self.device)
                tt = tt.to(device=self.device)
                output = model.forward(x, x, tt, t)
                shared_out, private_out = model.get_encoded_ftrs(x, x, t)
                all_shared.append(shared_out)
                all_private.append(private_out)
                all_images.append(x)

        print(torch.stack(all_shared).size())

        tag = ['Shared10_{}_{}'.format(tag_, i) for i in range(1, 11)]
        writer.add_embedding(mat=torch.stack(all_shared, dim=1).data,
                             label_img=torch.stack(all_images, dim=1).data,
                             metadata=list(range(1, 11)),
                             tag=tag)  #, metadata_header=list(range(1,11)))

        tag = ['Private10_{}_{}'.format(tag_, i) for i in range(1, 11)]
        writer.add_embedding(mat=torch.stack(all_private, dim=1).data,
                             label_img=torch.stack(all_images, dim=1).data,
                             metadata=list(range(1, 11)),
                             tag=tag)  #,metadata_header=list(range(1,11)))
        writer.close()
Exemplo n.º 11
0
def test(args,
         model,
         data_loader,
         show_image_on_board=True,
         show_all_embedding=False):
    model.eval()
    writer = SummaryWriter()
    weights = []
    images = []
    labels = []
    with torch.no_grad():
        for i, (image, cat) in enumerate(data_loader):
            if i == 1000: break
            image = image.to(device)
            cat = cat.to(device)
            labels.append(idx2label[cat.item()])
            images.append(image.squeeze(0).numpy())

            if show_image_on_board:
                if show_all_embedding:
                    embedded_vec = model.predict(x=image, category=cat)
                else:
                    embedded_vec = model.predict(x=image, category=None)
            else:
                embedded_vec = model.predict(x=None, category=cat)
            weights.append(embedded_vec.squeeze(0).numpy())

    weights = torch.FloatTensor(weights)
    images = torch.FloatTensor(images)
    if show_image_on_board:
        writer.add_embedding(weights, label_img=images)
    else:
        writer.add_embedding(weights, metadata=labels)
    print("done")
Exemplo n.º 12
0
class LogHandler:
    def __init__(self, logdir, module_id):
        self.logdir = logdir
        self.log = SummaryWriter(self.logdir)
        self.module_id = module_id

    def scalar(self, key, val, step):
        '''
        val can either be a scalar or a dictionary e.g.
        {'a': 3, 'b': 2} to plot e.g. the values of a and b
        onto the same graph
        '''
        if isinstance(val, dict):
            self.log.add_scalars('{}_{}'.format(self.module_id, key), val,
                                 step)
        else:
            self.log.add_scalar('{}_{}'.format(self.module_id, key), val, step)

    def text(self, key, val, step):
        self.log.add_text('{}_{}'.format(self.module_id, key), val, step)

    def image(self, key, val, step):
        self.log.add_image('{}_{}'.format(self.module_id, key), val, step)

    def figure(self, key, val, step):
        self.log.add_figure('{}_{}'.format(self.module_id, key), val, step)

    def embedding(self, key, val, meta, step):
        self.log.add_embedding(val, tag=key, metadata=meta, global_step=step)
def visualize(args):
    saved_path = constant.EXP_ROOT
    model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal])
    model.cuda()
    model.eval()
    model.load_state_dict(
        torch.load(saved_path + '/' + args.model_id +
                   '_best.pt')["state_dict"])

    label2id = constant.ANS2ID_DICT["open"]
    visualize = SummaryWriter("../visualize/" + args.model_id)
    # label_list = ["person", "leader", "president", "politician", "organization", "company", "athlete","adult",  "male",  "man", "television_program", "event"]
    label_list = list(label2id.keys())
    ids = [label2id[_] for _ in label_list]
    if args.gcn:
        # connection_matrix = model.decoder.label_matrix + model.decoder.weight * model.decoder.affinity
        connection_matrix = model.decoder.label_matrix + model.decoder.weight * model.decoder.affinity
        label_vectors = model.decoder.transform(
            connection_matrix.mm(model.decoder.linear.weight) /
            connection_matrix.sum(1, keepdim=True))
    else:
        label_vectors = model.decoder.linear.weight.data

    interested_vectors = torch.index_select(
        label_vectors, 0,
        torch.tensor(ids).to(torch.device("cuda")))
    visualize.add_embedding(interested_vectors,
                            metadata=label_list,
                            label_img=None)
Exemplo n.º 14
0
class Logger:
    def __init__(self, console_logger):
        self.console_logger = console_logger

        self.use_tb = False
        self.use_sacred = False
        self.use_hdf = False

        self.stats = defaultdict(lambda: [])

    def setup_tb(self, directory_name):
        # Import here so it doesn't have to be installed if you don't use it
        from tensorboard_logger import configure, log_value
        configure(directory_name)
        self.tb_logger = log_value
        self.use_tb = True

        from tensorboardX import SummaryWriter
        self.writer = SummaryWriter(directory_name + "-latent")

    def setup_sacred(self, sacred_run_dict):
        return
        self.sacred_info = sacred_run_dict.info
        self.use_sacred = True

    def log_stat(self, key, value, t, to_sacred=True):
        self.stats[key].append((t, value))

        if self.use_tb:
            self.tb_logger(key, value, t)

        if self.use_sacred and to_sacred:
            if key in self.sacred_info:
                self.sacred_info["{}_T".format(key)].append(t)
                self.sacred_info[key].append(value)
            else:
                self.sacred_info["{}_T".format(key)] = [t]
                self.sacred_info[key] = [value]

    def log_vec(self, mat, metadata, global_step, tag):
        if self.use_tb:
            self.writer.add_embedding(mat,
                                      metadata,
                                      global_step=global_step,
                                      tag=tag)

    def print_recent_stats(self):
        log_str = "Recent Stats | t_env: {:>10} | Episode: {:>8}\n".format(
            *self.stats["episode"][-1])
        i = 0
        for (k, v) in sorted(self.stats.items()):
            if k == "episode":
                continue
            i += 1
            window = 5 if k != "epsilon" else 1
            item = "{:.4f}".format(
                np.mean([x[1] for x in self.stats[k][-window:]]))
            log_str += "{:<25}{:>8}".format(k + ":", item)
            log_str += "\n" if i % 4 == 0 else "\t"
        self.console_logger.info(log_str)
Exemplo n.º 15
0
 def test_embedding_square(self):
     w = SummaryWriter(comment='sq')
     all_features = torch.rand(228, 256)
     all_images = torch.rand(228, 3, 32, 32)
     for i in range(all_images.shape[0]):
         all_images[i] *= (float(i) + 60) / (all_images.shape[0] + 60)
     w.add_embedding(all_features, label_img=all_images, global_step=2)
Exemplo n.º 16
0
def _main(args):
    """
    Main routine of script to generate embeddings.
    Parameters
    ----------
    args : argparse.Namespace
        contains all arguments parsed from input

    Returns
    -------
    None

    """
    with open(args.results_file, 'rb') as f:
        results = pickle.load(f)

    features, preds, labels, filenames = results

    if args.tensorboard:
        writer = SummaryWriter(log_dir=os.path.dirname(args.output_file))
        with Pool(16) as pool:
            images = pool.map(_load_thumbnail, filenames)
        writer.add_embedding(
            torch.from_numpy(features),
            metadata=torch.from_numpy(labels),
            # label_img=torch.from_numpy(np.array(images)).unsqueeze(1))
            label_img=None)
        return
    viz_img = _make_embedding(features=features,
                              labels=labels,
                              embedding=args.embedding,
                              three_d=args.three_d)
    cv2.imwrite(args.output_file, viz_img)
    return
def compute_embeddings_lfw(args,
                           dataset,
                           model,
                           batch_size,
                           dump_embeddings=False,
                           pdist=lambda x, y: 1. - F.cosine_similarity(x, y),
                           flipped_embeddings=False):
    """Computes embeddings of all images from the LFW dataset using PyTorch"""
    val_loader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=4,
                            shuffle=False)
    scores_with_gt = []
    embeddings = []
    ids = []

    for batch_idx, data in enumerate(tqdm(val_loader, 'Computing embeddings')):
        images_1 = data['img1']
        images_2 = data['img2']
        is_same = data['is_same']
        if torch.cuda.is_available() and args.devices[0] != -1:
            images_1 = images_1.cuda()
            images_2 = images_2.cuda()
        emb_1 = model(images_1)
        emb_2 = model(images_2)
        if flipped_embeddings:
            images_1_flipped = flip_tensor(images_1, 3)
            images_2_flipped = flip_tensor(images_2, 3)
            emb_1_flipped = model(images_1_flipped)
            emb_2_flipped = model(images_2_flipped)
            emb_1 = (emb_1 + emb_1_flipped) * .5
            emb_2 = (emb_2 + emb_2_flipped) * .5
        scores = pdist(emb_1, emb_2).data.cpu().numpy()

        for i, _ in enumerate(scores):
            scores_with_gt.append({
                'score': scores[i],
                'is_same': is_same[i],
                'idx': batch_idx * batch_size + i
            })

        if dump_embeddings:
            id0 = data['id0']
            id1 = data['id1']
            ids.append(id0)
            ids.append(id1)
            to_dump_1 = emb_1.data.cpu()
            to_dump_2 = emb_2.data.cpu()
            embeddings.append(to_dump_1)
            embeddings.append(to_dump_2)

    if dump_embeddings:
        total_emb = np.concatenate(embeddings, axis=0)
        total_ids = np.concatenate(ids, axis=0)
        log_path = './logs/{:%Y_%m_%d_%H_%M}'.format(datetime.datetime.now())
        writer = SummaryWriter(log_path)
        writer.add_embedding(torch.from_numpy(total_emb), total_ids)

    return scores_with_gt
Exemplo n.º 18
0
def visualize_embeddings(v):
	"""Visualizes loaded vectors from pretrained embeddings into tensorboard.

	Args:
		v: The torchtext.vocab.Vector object that contains weights of the embeddings.
	"""
	writer = SummaryWriter()
	writer.add_embedding(v.vectors, v.itos) 
Exemplo n.º 19
0
 def test_embedding_fail(self):
     with self.assertRaises(AssertionError):
         w = SummaryWriter(comment='shouldfail')
         all_features = torch.rand(228, 256)
         all_images = torch.rand(228, 3, 16, 32)
         for i in range(all_images.shape[0]):
             all_images[i] *= (float(i) + 60) / (all_images.shape[0] + 60)
         w.add_embedding(all_features, label_img=all_images, global_step=2)
Exemplo n.º 20
0
def load_then_visualize_embeddings(path):
	"""Visualizes pretrained embeddings into tensorboard.

	Args:
		path: Path to the pretrained vector file.
	"""
	writer = SummaryWriter()
	v = vocab.Vectors(path)
	writer.add_embedding(v.vectors, v.itos) 
Exemplo n.º 21
0
def train(args):
    if args.multi_node:
        init_process(args)
    device = args.device
    text_loader = TextDataLoader(args.batch_size, args.multi_node,
                                 args.num_workers, args.data_dir, args.dataset,
                                 args.window_size, args.neg_sample_size,
                                 args.remove_th, args.subsample_th,
                                 args.embed_size, args.is_character, args.seed)
    eval_loader = EvalDataLoader(args.batch_size, args.num_workers,
                                 args.data_dir)
    if args.is_character:
        model = word_embed_ng(args.vocab_size, args.char_embed_size,
                              args.hidden_size, args.num_layer, args.dropout,
                              args.mlp_size, args.embed_size,
                              args.neg_sample_size, args.bidirectional,
                              args.device)
    else:
        model = SGNS(len(text_loader.dataset.vocabs), args.embed_size)
    if args.load_model is not None:
        model.load_state_dict(
            torch.load(args.log_dir + args.load_model,
                       map_location=lambda storage, loc: storage))
    if args.multi_gpu:
        print("Let's use", args.num_gpu, "GPUs!")
        model = nn.DataParallel(model,
                                device_ids=[i for i in range(args.num_gpu)])
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    writer = SummaryWriter(args.log_dir)
    trainer = Trainer(args, model, optimizer, writer, text_loader)
    for epoch in range(args.epochs):
        epoch += 1
        trainer.monitor_loss = 0
        trainer.epoch = epoch
        start_time = time.time()
        piploss = evaluate(args, model, text_loader, eval_loader)
        loss = trainer.train_epoch()
        if not args.multi_node or (args.multi_node
                                   and distributed.get_rank() == 0):
            piploss = evaluate(args, model, text_loader, eval_loader)
            print(
                '====> Epoch: {} Average loss: {:.4f} / PIP loss: {:.4f} / Time: {:.4f}'
                .format(epoch, loss / len(text_loader.dataset), piploss,
                        time.time() - start_time))
            writer.add_scalar('Epoch time', time.time() - start_time, epoch)
            writer.add_scalar('PIP loss', piploss, epoch)
            writer.add_scalar('Train loss', loss / len(text_loader.dataset),
                              epoch)
            if epoch % args.save_interval == 0:
                torch.save(model.state_dict(),
                           os.path.join(args.log_dir, 'model.pt'))
                if not args.is_character:
                    features = plot_embedding(args, model, text_loader)
                    writer.add_embedding(features,
                                         metadata=text_loader.vocabs,
                                         global_step=epoch)
Exemplo n.º 22
0
class Visualizer:
    def __init__(self, tb_path):
        self.tb_path = tb_path

        if os.path.exists(tb_path):
            # if prompt_yes_no('{} already exists. Proceed?'.format(tb_path)):
            os.system('rm -r {}'.format(tb_path))
            # else:
            #   exit(0)

        self.writer = SummaryWriter(tb_path)
        self.savedir = '/storage/armand/results/thesis/loAE'
        self.eval_every = 20

    def add_scalar(self, scalar_dict, epoch, global_step=None):
        for tag, scalar in scalar_dict.items():
            if isinstance(scalar, dict):
                self.writer.add_scalars(tag, scalar, epoch)
            elif isinstance(scalar, plt.figure.Figure):
                self.writer.add_figure(tag, scalar, epoch)
            elif tag == 'Embedding' or tag == 'Original-Domain':
                # labels = np.linspace(0, scalar.shape[0], scalar.shape[0])
                # labels = np.expand_dims(np.arange(scalar.shape[0]), axis=1)
                # labels = np.expand_dims(labels, axis=1)
                # labels = torch.tensor(np.expand_dims(labels, axis=1))
                self.writer.add_embedding(scalar,
                                          tag=tag,
                                          global_step=global_step)
            elif isinstance(scalar, list) or isinstance(scalar, np.ndarray):
                continue
            else:
                self.writer.add_scalar(tag, scalar, epoch)

    def add_images(self, image_dict, epoch, global_step=None, prefix=None):
        for tag, images in image_dict.items():
            if prefix is not None:
                tag = '{}/{}'.format(prefix, tag)
            images = torch.clamp(images, -1, 1)
            images = vutils.make_grid(images,
                                      nrow=images.size(0),
                                      normalize=True,
                                      range=(-1, 1))
            '''Save images of results'''
            if epoch % self.eval_every == 0 and epoch != 0:
                case = self.tb_path.split('/')[-2]
                resImageDir = os.path.join(self.savedir, 'figures', case)
                if not os.path.exists(resImageDir):
                    os.makedirs(resImageDir)
                scipy.misc.imsave(
                    os.path.join(
                        resImageDir,
                        prefix + '_step-' + str(global_step).zfill(5) +
                        '_epoch-' + str(epoch).zfill(3) + '.png'),
                    images[:, :130].permute(1, 2, 0))

            self.writer.add_image(tag, images, global_step)
Exemplo n.º 23
0
class TensorboardLogger(Callback):
    def __init__(self, model, md, log_name, metrics_names=[], path=None, histogram_freq=100):
        super().__init__()
        self.model = model
        self.md = md
        self.metrics_names = ["validation_loss"]
        self.metrics_names += metrics_names
        self.histogram_freq = histogram_freq

        path = path or os.path.join(md.path, "logs")
        self.log_dir = os.path.join(path, log_name)

    def on_train_begin(self):
        self.iteration = 0
        self.epoch = 0
        self.writer = SummaryWriter(log_dir=self.log_dir)

    def on_batch_begin(self):
        pass

    def on_phase_begin(self):
        pass

    def on_epoch_end(self, metrics):
        self.epoch += 1

        for val, name in zip(metrics, self.metrics_names):
            self.writer.add_scalar(name, val, self.iteration)

        for name, emb in self.model.named_children():
            if isinstance(emb, nn.Embedding):
                self.writer.add_embedding(list(emb.parameters())[0], global_step=self.iteration, tag=name)

    def on_phase_end(self):
        pass

    def on_batch_end(self, loss):
        self.iteration += 1
        self.writer.add_scalar("loss", loss, self.iteration)

        if self.iteration % self.histogram_freq == 0:
            for name, param in self.model.named_parameters():
                self.writer.add_histogram(name, param, self.iteration)

    def on_train_end(self):
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                dummy_input = tuple(next(iter(self.md.trn_dl))[:-1])
                self.writer.add_graph(self.model, dummy_input)
        except Exception as e:
            print("Unable to create graph.")
            print(e)


        self.writer.close()
Exemplo n.º 24
0
    def visualize(self):
        logdir = "tensorboard/cbow"
        os.makedirs(logdir, exist_ok=True)

        mat = self.model.embedding.weight.data
        groups = [self.idx2word[i] for i in range(len(self.word2idx))]

        writer = SummaryWriter(logdir)
        writer.add_embedding(mat, metadata=groups)
        writer.close()
def normal_kmeans(dataloader1, dataloader2, N1, N2, k, key_name, DC=None):
    # batch_time = AverageMeter()
    # end = time.time()
    print("### Start Normal Kmeans:{}".format(key_name))
    features1 = normal_feature(dataloader1, N1)
    features2 = normal_feature(dataloader2, N2)
    print("weak:", features1.shape)
    print("strong:", features2.shape)

    I, J, loss = new_run_kmeans(features1, features2, k, verbose=args.verbose)
    # Clustering.cluster(features, verbose=args.verbose)
    LOG_DIR = "./normal/{}".format(current_time)
    writer = SummaryWriter(log_dir=LOG_DIR)
    label_cnt = "/".join([LOG_DIR, "label_cnt.tsv"])

    maximum = 0
    for number in range(k):
        cnt = 0
        for j in range(len(J)):
            if number == J[j]: cnt = cnt + 1
        print("{}:{}".format(number, cnt))
        if cnt > maximum:
            maximum = cnt
            spe_number = number
    # print("\nClustering:{}\n".format(key))
    print("Specified_number:{}\n".format(spe_number))
    # exit()

    LABEL_CNT = {}
    labels = I
    for i in labels:
        if i not in LABEL_CNT:
            LABEL_CNT[i] = 1
        else:
            LABEL_CNT[i] += 1
    LABEL_CNT = sorted(LABEL_CNT.items())
    print(LABEL_CNT)

    with open(label_cnt, "a") as label_cnt_file:
        label_cnt_file.write("{}\n".format(key_name))
        label_cnt_file.write("{}\n".format(LABEL_CNT))
        label_cnt_file.write("{}\n".format(spe_number))

    writer.add_embedding(features1, metadata=labels)
    writer.close()

    # DC.SED_Single(key_name, I, spe_number)
    # DC.create_csv_wav_file_Single(key_name)
    # DC.merge_single(key_name)

    # 指定したキーの区間抽出
    DC.SED_Single(key_name, I, spe_number)
    # 区間抽出したデータを強ラベルデータ化
    DC.create_csv_wav_file_Single(key_name, resume="normal")
Exemplo n.º 26
0
    def test_embedding(self):
        w = SummaryWriter()
        all_features = torch.Tensor([[1, 2, 3], [5, 4, 1], [3, 7, 7]])
        all_labels = torch.Tensor([33, 44, 55])
        all_images = torch.zeros(3, 3, 5, 5)

        w.add_embedding(all_features, metadata=all_labels, label_img=all_images, 
            global_step=2)

        dataset_label = ['test'] * 2 + ['train'] * 2
        all_labels = list(zip(all_labels, dataset_label))
        w.add_embedding(all_features, metadata=all_labels, label_img=all_images, 
            metadata_header=['digit', 'dataset'], global_step=2)
Exemplo n.º 27
0
def main(target_file_path, wordlist_file_path, log_file_path):
    writer = SummaryWriter(log_file_path)
    word_list = []
    with open(wordlist_file_path, 'r') as f:
        for lines in f:
            word_list.append(lines.strip())
    embeddings = np.loadtxt(target_file_path)
    vocab_size, embbeing_len = np.shape(embeddings)
    embedding = T.nn.Embedding(vocab_size, embbeing_len)
    embedding.weight.data.copy_(T.from_numpy(embeddings))
    embedding.weight.requires_grad = False
    writer.add_embedding(embedding.weight.data, word_list, global_step=1)
    writer.close()
Exemplo n.º 28
0
class Logger:
    def __init__(self,
                 ckpt_path,
                 tsbd_path,
                 global_step=0,
                 best_metric_val=float('-inf')):
        if not os.path.exists(ckpt_path):
            os.makedirs(ckpt_path)
        if not os.path.exists(tsbd_path):
            os.makedirs(tsbd_path)
        self.ckpt_path = ckpt_path
        self.writer = SummaryWriter(tsbd_path)
        self.global_step = global_step
        self.best_metric_val = best_metric_val

    def reset(self):
        self.global_step = 0

    def step(self, step):
        self.global_step += step

    def add_scalar(self, name, val):
        self.writer.add_scalar(name, val, self.global_step)
        self.writer.file_writer.flush()

    def add_image(self, name, img):
        self.writer.add_image(name, img, self.global_step)

    def add_histogram(self, tag, values, bins=1000):
        self.writer.add_histogram(tag, values, self.global_step, bins)

    def add_embedding(self, tag, feats, labels):
        """Log a graph of embeddings of given features with labels"""
        self.writer.add_embedding(mat=feats,
                                  tag=tag,
                                  metadata=labels,
                                  global_step=self.global_step)

    def save_ckpt(self, state, cur_metric_val):
        path_latest = os.path.join(self.ckpt_path, 'checkpoint.pth')
        path_best = os.path.join(self.ckpt_path, 'best_model.pth')
        torch.save(state, path_latest)
        if cur_metric_val > self.best_metric_val:
            shutil.copyfile(path_latest, path_best)
            self.best_metric_val = cur_metric_val

    def save_ckpt_iter(self, state, iter):
        path_latest = os.path.join(self.ckpt_path,
                                   'checkpoint_' + str(iter) + '.pth')
        torch.save(state, path_latest)
Exemplo n.º 29
0
def tb_train2():
    import torchvision.utils as vutils
    import torchvision.models as models
    from torchvision import datasets

    resnet18 = models.resnet18(False)
    writer = SummaryWriter()
    sample_rate = 44100
    freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440]

    for n_iter in range(100):
        dummy_s1 = torch.rand(1)
        dummy_s2 = torch.rand(1)
        # data grouping by `slash`
        writer.add_scalar('data/scalar1', dummy_s1[0], n_iter)
        writer.add_scalar('data/scalar2', dummy_s2[0], n_iter)

        writer.add_scalars('data/scalar_group', {'xsinx': n_iter * np.sin(n_iter),
                                                 'xcosx': n_iter * np.cos(n_iter),
                                                 'arctanx': np.arctan(n_iter)}, n_iter)

        dummy_img = torch.rand(32, 3, 64, 64)  # output from network
        if n_iter % 10 == 0:
            x = vutils.make_grid(dummy_img, normalize=True, scale_each=True)
            writer.add_image('Image', x, n_iter)

            dummy_audio = torch.zeros(sample_rate * 2)
            for i in range(x.size(0)):
                # amplitude of sound should in [-1, 1]
                dummy_audio[i] = np.cos(freqs[n_iter // 10] * np.pi * float(i) / float(sample_rate))
            writer.add_audio('myAudio', dummy_audio, n_iter, sample_rate=sample_rate)

            writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)

            for name, param in resnet18.named_parameters():
                writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter)

            # needs tensorboard 0.4RC or later
            writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(100), n_iter)

    dataset = datasets.MNIST('mnist', train=False, download=True)
    images = dataset.test_data[:100].float()
    label = dataset.test_labels[:100]

    features = images.view(100, 784)
    writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))

    # export scalar data to JSON for external processing
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
class TensorboardLogger(Callback):
    learn: Learner
    run_name: str
    histogram_freq: int = 100
    path: str = None

    def __post_init__(self):
        self.path = self.path or os.path.join(self.learn.path, "logs")
        self.log_dir = os.path.join(self.path, self.run_name)

    def on_train_begin(self, **kwargs):
        self.writer = SummaryWriter(log_dir=self.log_dir)

    def on_epoch_end(self, **kwargs):
        iteration = kwargs["iteration"]
        metrics = kwargs["last_metrics"]
        metrics_names = ["valid_loss"
                         ] + [o.__name__ for o in self.learn.metrics]

        for val, name in zip(metrics, metrics_names):
            self.writer.add_scalar(name, val, iteration)

        for name, emb in self.learn.model.named_children():
            if isinstance(emb, nn.Embedding):
                self.writer.add_embedding(list(emb.parameters())[0],
                                          global_step=iteration,
                                          tag=name)

    def on_batch_end(self, **kwargs):
        iteration = kwargs["iteration"]
        loss = kwargs["last_loss"]

        self.writer.add_scalar("learning_rate", self.learn.opt.lr, iteration)
        self.writer.add_scalar("momentum", self.learn.opt.mom, iteration)

        self.writer.add_scalar("loss", loss, iteration)
        if iteration % self.histogram_freq == 0:
            for name, param in self.learn.model.named_parameters():
                self.writer.add_histogram(name, param, iteration)

    def on_train_end(self, **kwargs):
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                dummy_input = next(iter(self.learn.data.train_dl))[0]
                self.writer.add_graph(self.learn.model, tuple(dummy_input))
        except Exception as e:
            print("Unable to create graph.")
            print(e)
        self.writer.close()
Exemplo n.º 31
0
class TensorBoardProjector(Callback):
    """The TensorBoardProjector callback is used to write images from the validation pass to Tensorboard using the
    TensorboardX library.
    """

    def __init__(self, log_dir='./logs',
                 comment='torchbearer',
                 num_images=100,
                 avg_pool_size=1,
                 avg_data_channels=True,
                 write_data=True,
                 write_features=True,
                 features_key=torchbearer.Y_PRED):
        """Construct a TensorBoardProjector callback which writes images to the given directory and, if required,
        associated features.

        :param log_dir: The tensorboard log path for output
        :type log_dir: str
        :param comment: Descriptive comment to append to path
        :type comment: str
        :param num_images: The number of images to write
        :type num_images: int
        :param avg_pool_size: Size of the average pool to perform on the image. This is recommended to reduce the
        overall image sizes and improve latency
        :type avg_pool_size: int
        :param avg_data_channels: If True, the image data will be averaged in the channel dimension
        :type avg_data_channels: bool
        :param write_data: If True, the raw data will be written as an embedding
        :type write_data: bool
        :param write_features: If True, the image features will be written as an embedding
        :type write_features: bool
        :param features_key: The key in state to use for the embedding. Typically model output but can be used to show
        features from any layer of the model.
        :type features_key: str
        """
        self.log_dir = log_dir
        self.comment = comment
        self.num_images = num_images
        self.avg_pool_size = avg_pool_size
        self.avg_data_channels = avg_data_channels
        self.write_data = write_data
        self.write_features = write_features
        self.features_key = features_key

        self._writer = None

        self.done = False

    def on_start(self, state):
        log_dir = os.path.join(self.log_dir, state[torchbearer.MODEL].__class__.__name__ + '_' + self.comment)
        self._writer = SummaryWriter(log_dir=log_dir)

    def on_step_validation(self, state):
        if not self.done:
            x = state[torchbearer.X].data.clone()

            if len(x.size()) == 3:
                x = x.unsqueeze(1)

            x = F.avg_pool2d(x, self.avg_pool_size).data

            data = None

            if state[torchbearer.EPOCH] == 0 and self.write_data:
                if self.avg_data_channels:
                    data = torch.mean(x, 1)
                else:
                    data = x

                data = data.view(data.size(0), -1)

            feature = None

            if self.write_features:
                feature = state[self.features_key].data.clone()
                feature = feature.view(feature.size(0), -1)

            label = state[torchbearer.Y_TRUE].data.clone()

            if state[torchbearer.BATCH] == 0:
                remaining = self.num_images if self.num_images < label.size(0) else label.size(0)

                self._images = x[:remaining].to('cpu')
                self._labels = label[:remaining].to('cpu')

                if data is not None:
                    self._data = data[:remaining].to('cpu')

                if feature is not None:
                    self._features = feature[:remaining].to('cpu')
            else:
                remaining = self.num_images - self._labels.size(0)

                if remaining > label.size(0):
                    remaining = label.size(0)

                self._images = torch.cat((self._images, x[:remaining].to('cpu')), dim=0)
                self._labels = torch.cat((self._labels, label[:remaining].to('cpu')), dim=0)

                if data is not None:
                    self._data = torch.cat((self._data, data[:remaining].to('cpu')), dim=0)

                if feature is not None:
                    self._features = torch.cat((self._features, feature[:remaining].to('cpu')), dim=0)

            if self._labels.size(0) >= self.num_images:
                if state[torchbearer.EPOCH] == 0 and self.write_data:
                    self._writer.add_embedding(self._data, metadata=self._labels, label_img=self._images, tag='data', global_step=-1)
                if self.write_features:
                    self._writer.add_embedding(self._features, metadata=self._labels, label_img=self._images, tag='features', global_step=state[torchbearer.EPOCH])
                self.done = True

    def on_end_epoch(self, state):
        if self.write_features:
            self.done = False

    def on_end(self, state):
        self._writer.close()
Exemplo n.º 32
0
def main(_run, _log,
         seed,
         dataset,
         filter_class_ids,
         input_image_size,
         patch_size,
         batch_size,
         num_epochs,
         loss_lambda):

    # Set the RNG seed for torch
    torch.manual_seed(seed)

    # Check input parameters are in expected format
    assert filter_class_ids is None or type(filter_class_ids) is list
    if type(filter_class_ids) is list:
        assert all(type(class_id) is int for class_id in filter_class_ids)
    else:
        _log.warning('Training on all classes!!')
        confirm = input('Continue? [y/n] ')
        if confirm.lower() != 'y':
            return None

    # Provision the `sacred` run directory for this experiment
    RUN_DIR = _run.observers[0].dir
    LOGS_DIR, IMAGES_DIR, PROTOTYPES_DIR = _provision_run_dir(RUN_DIR)

    # Initialize log writer for tensorboard
    writer = SummaryWriter(LOGS_DIR)

    # Load datasets for training and testing
    Dataset = DATASET_MAP[dataset]
    data_dir = DATA_DIR_MAP[dataset]
    train_dataset, train_dataset_with_non_random_transformation, \
        test_dataset = Dataset.load_dataset_splits(
        data_dir, input_image_size, filter_class_ids)

    # Initialize the data loader
    train_dataloader = DataLoader(
        train_dataset, collate_fn=Dataset.custom_collate_fn,
        batch_size=batch_size, shuffle=True)

    # Define variables for attributes
    num_attributes = train_dataset.num_attributes
    all_attribute_labels = range(1, num_attributes + 1)
    attribute_names = [train_dataset.get_attribute(al).name
                       for al in all_attribute_labels]

    # Initialize the model
    model = _make_cuda(SemanticAutoencoder(
        input_image_size, patch_size, num_attributes))

    # Initialize the loss function and optimizer
    epoch_loss = None
    criterion = _make_cuda(CustomLoss2(lambda_val=loss_lambda))
    optimizer = optim.Adam(ifilter(lambda p: p.requires_grad,
                                   model.parameters()))

    # Initiate training
    pbar, steps = tqdm(range(1, num_epochs + 1)), 0
    for epoch in pbar:
        epoch_loss = 0.

        model.train()  # Setting the model in training mode for training
        for image, label, attribute_labels, padding_idx in train_dataloader:
            steps += 1  # Incrementing the global step
            model.zero_grad()  # Clearing the gradients for each mini-batch

            # Create the input variable and get the output from the model
            x = _make_cuda(torch.autograd.Variable(image))
            z, z_patches, reconstructed_x = model(x)

            # Get the associated prototypes for each image in the batch
            prototype_labels = _make_cuda(attribute_labels)
            positive_prototypes = model.prototypes(prototype_labels)

            # Get the *non-associated* prototypes for each image in the batch
            negative_prototypes = list()
            for img_al in attribute_labels:
                negative_al = _make_cuda(torch.LongTensor(list(filter(
                    lambda al: al not in img_al,
                    all_attribute_labels))))
                negative_prototypes.append(model.prototypes(negative_al))

            # Compute the loss
            loss = criterion(reconstructed_x, z_patches,
                             positive_prototypes, padding_idx, x,
                             negative_prototypes=negative_prototypes)

            # Do backprop and update the weights
            loss.backward()
            optimizer.step()

            # Update the epoch loss and add the step loss to tensorboard
            epoch_loss += loss.item()
            writer.add_scalar('loss/step_loss', loss, steps)

        # Add the epoch loss to tensorboard and update the progressbar
        writer.add_scalar('loss/epoch_loss', epoch_loss, steps)
        pbar.set_postfix(epoch_loss=epoch_loss)

        model.eval()  # Setting the model in evaluation mode for testing
        if (epoch % 5 == 0) or (epoch == num_epochs):
            # Compute the nearest patch for each prototype
            nearest_patches_for_prototypes = \
                model.get_nearest_patches_for_prototypes(
                    train_dataset_with_non_random_transformation)

            # Update each prototype to be equal to the nearest patch
            model.reproject_prototypes(nearest_patches_for_prototypes)

            if (epoch % 1000 == 0) or (epoch == num_epochs):
                # Save the prototype visualization
                save_prototype_patch_visualization(
                    model, train_dataset_with_non_random_transformation,
                    nearest_patches_for_prototypes, PROTOTYPES_DIR)

                # Save the reconstructed images for the test dataset
                # for every 1000 epochs
                for i_, (image, image_label, attribute_labels, _) \
                        in enumerate(test_dataset):
                    x = image.view((1,) + image.size())
                    x = _make_cuda(torch.autograd.Variable(x))
                    z, z_patches, reconstructed_x = model(x)

                    reconstructed_image = \
                        get_image_from_tensor(reconstructed_x)
                    reconstructed_image.save(
                        os.path.join(IMAGES_DIR, '%d-%d.png' % (epoch, i_)))

                # Save the intermediate model
                model.save_weights(os.path.join(RUN_DIR, MODEL_FILE_NAME))

        # Add the prototype embeddings to tensorboard at the end
        if epoch == num_epochs:
            writer.add_embedding(
                model.prototypes.weight[1:],
                metadata=attribute_names,
                global_step=steps)

    # Save the final model and commit the tensorboard logs
    model.save_weights(os.path.join(RUN_DIR, MODEL_FILE_NAME))
    writer.close()

    return epoch_loss