def visualisation(self): """ Main process of visualisation Returns: """ # get embedding_arrays, sprite_images, meta_data embedding, imgs, labels = self.process_embedding() self.create_sprite_image(imgs) self.create_meta_data(labels) emb = tf.Variable(embedding, name=TENSOR_NAME) # embedding configs config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = emb.name embedding.metadata_path = META_FILE embedding.sprite.image_path = SPRITE_FILE embedding.sprite.single_image_dim.extend([32, 32]) # save data summary_writer = tf.summary.FileWriter(LOG_DIR) projector.visualize_embeddings(summary_writer, config) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.save(sess, os.path.join(LOG_DIR, "emb"), self.predictor.global_step) summary_writer.close()
def initialize_training(self): optimizer = tf.train.AdamOptimizer() self.train = optimizer.minimize(self.loss) self.sess = tf.Session() with self.sess.as_default(): tf.global_variables_initializer().run() variable_summaries('alpha', self.alpha) with tf.name_scope('objective'): tf.summary.scalar('loss', self.loss) tf.summary.scalar('priors', self.log_prior) tf.summary.scalar('ll_pos', self.ll_pos) tf.summary.scalar('ll_neg', self.ll_neg) self.summaries = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter(self.logdir, self.sess.graph) self.saver = tf.train.Saver() config = projector.ProjectorConfig() alpha = config.embeddings.add() alpha.tensor_name = 'model/embeddings/alpha' alpha.metadata_path = '../vocab.tsv' if not self.dynamic: rho = config.embeddings.add() rho.tensor_name = 'model/embeddings/rho' rho.metadata_path = '../vocab.tsv' else: for t in range(self.T): rho = config.embeddings.add() rho.tensor_name = 'model/embeddings/rho_'+str(t) rho.metadata_path = '../vocab.tsv' projector.visualize_embeddings(self.train_writer, config)
def register_embedding(weights, labels, log_dir) -> None: """Saves a metadata file (labels) and a checkpoint (derived from weights) and configures the Embedding Projector to read from the appropriate locations. Args: weights: tf.Variable with the weights of the embedding layer to be displayed. labels: list of labels corresponding to the weights. logdir: Directory into which to store the config file, as a `str`. """ # Create a checkpoint from embedding, the filename and key are # name of the tensor. checkpoint = tf.train.Checkpoint(embedding=weights) checkpoint.save(os.path.join(LOG_DIR, "embedding.ckpt")) # Save Labels separately on a line-by-line manner. with open(os.path.join(log_dir, METADATA_FNAME), "w") as f: for label in labels: f.write("{}\n".format(label)) # Set up config config = projector.ProjectorConfig() embedding = config.embeddings.add() # The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE` embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE" embedding.metadata_path = METADATA_FNAME projector.visualize_embeddings(log_dir, config)
def visualisation(vectors): """ param: 需要可视化的高维向量集合, list, [SAMPLE_NUM, dim] return: None """ # PROJECTOR可视化的都是TensorFlow中的变量类型。 y = tf.Variable(vectors) checkpoint = tf.train.Checkpoint(embedding=y) checkpoint.save(os.path.join(LOG_DIR, "embedding.ckpt")) # 通过project.ProjectorConfig类来帮助生成日志文件 config = projector.ProjectorConfig() # 增加一个需要可视化的embedding结果 embedding = config.embeddings.add() # 指定这个embedding结果所对应的Tensorflow变量名称 embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE" # 指定embedding结果所对应的数据标签文件,改设置可选, 如果没有提供,可视化结果 # 每个点颜色都是一样的 # embedding.metadata_path = META_FIEL # 指定sprite 图像。这个也是可选的,如果没有提供sprite 图像,那么可视化的结果 # 每一个点就是一个小困点,而不是具体的图片。 embedding.sprite.image_path = SPRITE_FILE # 这将用于从sprite图像中截取正确的原始图片。 embedding.sprite.single_image_dim.extend([IMAGE_SIZE, IMAGE_SIZE]) # 将PROJECTOR所需要的内容写入日志文件。 projector.visualize_embeddings(LOG_DIR, config)
def write_embeddings(self, Wv, name="WordVectors"): """Write embedding matrix to the right place. Args: Wv: (numpy.ndarray) |V| x d matrix of word embeddings """ with tf.Graph().as_default(), tf.Session() as session: ## # Feed embeddings to tf, and save. embedding_var = tf.Variable(Wv, name=name, dtype=tf.float32) session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.save(session, self.CHECKPOINT_FILE, 0) ## # Save metadata summary_writer = tf.summary.FileWriter(self.LOGDIR) config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = embedding_var.name embedding.metadata_path = self.VOCAB_FILE_BASE projector.visualize_embeddings(summary_writer, config) msg = "Saved {s0:d} x {s1:d} embedding matrix '{name}'" msg += " to LOGDIR='{logdir}'" print(msg.format(s0=Wv.shape[0], s1=Wv.shape[1], name=name, logdir=self.LOGDIR)) print("To view, run:") print("\n tensorboard --logdir=\"{logdir}\"\n".format(logdir=self.LOGDIR)) print("and navigate to the \"Embeddings\" tab in the web interface.")
def _visualize(): log.info("Visualizing all samples with tensorboard/t-sne...") samples = glob(cfg.Paths.samples + "/*sample*.npz") metadata = os.path.join(cfg.Paths.latent_space, 'metadata.txt') with open(metadata, 'w') as m: vecs = [] count = 0 for sample in samples: sample_name = sample.split(os.sep)[-1] composer = sample_name.split("_")[0] m.write(f"{composer}\n") npz = np.load(sample) npz = npz['sample'] npz = dataset.preprocess_single(npz) encoding = best_encoder.predict(npz) vecs.append(np.squeeze(encoding)) count += 1 c = projector.ProjectorConfig() vecs = np.asarray(vecs) vecs = tf.Variable(vecs, trainable=False, name='vectors') embed = c.embeddings.add() embed.tensor_name = vecs.name embed.metadata_path = metadata sess = tf.InteractiveSession() tf.global_variables_initializer().run() saver = tf.train.Saver() saver.save(sess, os.path.join(cfg.Paths.latent_space, 'meta.ckpt')) writer = tf.summary.FileWriter(cfg.Paths.latent_space, sess.graph) projector.visualize_embeddings(writer, c) sess.close()
def _get_projector_data(user_embedding, user_indices, item_embedding, item_indices): """Samples the given embeddings, joins them, and creates a projector config. Args: user_embedding: a (num_users x embedding_dim) embedding of users. user_indices: the indices to take from the user embedding. item_embedding: a (num_items x embedding_dim) embedding of items. item_indices: the indices to take from the item embedding. Returns: A tuple of (sample, config): sample: a tensor of samples of the user and item embeddings. config: a ProjectorConfig for the sample. """ user_sample = tf.gather(user_embedding, user_indices) item_sample = tf.gather(item_embedding, item_indices) combined_samples = tf.concat([user_sample, item_sample], 0) sample = tf.get_variable(constants.PROJECTOR_NAME, combined_samples.shape) sample = tf.assign(sample, combined_samples) config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = constants.PROJECTOR_NAME embedding.metadata_path = constants.PROJECTOR_PATH return sample, config
def log_tensorboard(embedding_vectors, embedding_name, log_dir, metadata=None): if not os.path.isdir(log_dir): os.mkdir(log_dir) tensor_embedding = tf.Variable(embedding_vectors, name=embedding_name) config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = tensor_embedding.name # set metadata if metadata is not None: with open(os.path.join(log_dir, metadata_file), 'w') as f: for label in metadata: f.write(f'{label}\n') embedding.metadata_path = metadata_file # Specify sprite for image embeddings # embedding.sprite.image_path = path_for sprites image # embedding.sprite.single_image_dim.extend([28, 28]) sprite thumbnail size projector.visualize_embeddings(log_dir, config) saver = tf.compat.v1.train.Saver([tensor_embedding ]) # Must pass list or dict saver.save(sess=None, global_step=0, save_path=log_dir)
def generate_embedding_op(vec, N, spritesize=50): """ Add a Variable to the graph to store embeddings for some of your data points. :vec: tensor in your graph that outputs embedded vectors :N: number of data points you're storing :spritesize: number of pixels for your sprites Returns :store_embeddings: graph op to update your embedding vector :config: projector config """ embed_dummy = tf.get_variable("dense_embeddings", shape=[N, vec.get_shape().as_list()[1]], initializer=tf.initializers.random_uniform()) store_embeddings = tf.assign(embed_dummy, vec) config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = embed_dummy.name embedding.metadata_path = "metadata.tsv" embedding.sprite.image_path = "sprites.png" embedding.sprite.single_image_dim.extend([spritesize, spritesize]) return store_embeddings, config
def register_embedding(feature, img_data, labels, log_dir) -> None: #在register_embedding注册中要作2个事情:1. 保存特征变量在ckpt文件,2. 配置projector_config.pbtxt 文件。 #给配置文件准备个文件夹 if not os.path.exists(log_dir): os.makedirs(log_dir) #准备一个变量去保存embedding数据三元组里的特征 #名字必须是embedding,不要问我为什么,也不要问我怎么知道的 embeding_name = "embedding" embedding_var = K.variable(feature, name=embeding_name) # 保存embedding变量在 “1. ckpt文件” # 用checkpoint保存embedding配置信息,指定其embedding是我们创建的embedding变量 checkpoint = tf.train.Checkpoint(embedding=embedding_var) checkpoint.save(os.path.join(log_dir, "em.ckpt")) #创建 sprite 图像文件,label的meta文件 sprite_and_meta_writer(img_data, labels, log_dir) # 配置投影 “2.projector_config.pbtxt” config = projector.ProjectorConfig() # 由于embedding是复合类型,这儿需要调用add()方法实例化一个embedding embedding = config.embeddings.add() # The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE` #一定要加`/.ATTRIBUTES/VARIABLE_VALUE`!!!!! embedding.tensor_name = embeding_name + "/.ATTRIBUTES/VARIABLE_VALUE" embedding.metadata_path = METADATA_FNAME embedding.sprite.image_path = SPRITES_FNAME #'mnistdigits.png' embedding.sprite.single_image_dim.extend( [img_data.shape[1], img_data.shape[2]]) projector.visualize_embeddings(log_dir, config)
def save_embeddings_metadata(log_dir, variable_name, vocabulary_file, num_oov_buckets=1): """Registers an embedding variable for visualization in TensorBoard. This function registers :obj:`variable_name` in the ``projector_config.pbtxt`` file and generates metadata from :obj:`vocabulary_file` to attach a label to each word ID. Args: log_dir: The active log directory. variable_name: The variable name in the checkpoint. vocabulary_file: The associated vocabulary file. num_oov_buckets: The number of additional unknown tokens. """ # Assume it ends with /.ATTRIBUTES/VALUE filename = "%s.txt" % "_".join(variable_name.split("/")[:-2]) metadata_path = os.path.join(log_dir, filename) with tf.io.gfile.GFile(vocabulary_file, mode="rb") as src, \ tf.io.gfile.GFile(metadata_path, mode="wb") as dst: ws_index = 0 for line in src: # The TensorBoard code checks line.trim().length == 0 when loading the # metadata file so make sure lines are not dropped. if not line.decode("utf-8").replace(u"\uFEFF", u"").strip(): dst.write(tf.compat.as_bytes("<whitespace%d>\n" % ws_index)) ws_index += 1 else: dst.write(line) if num_oov_buckets == 1: dst.write(b"<unk>\n") else: for i in range(num_oov_buckets): dst.write(tf.compat.as_bytes("<unk%d>\n" % i)) config = projector.ProjectorConfig() # If the projector file exists, load it. config_path = os.path.join(log_dir, "projector_config.pbtxt") if tf.io.gfile.exists(config_path): with tf.io.gfile.GFile(config_path, mode="rb") as config_file: text_format.Merge(config_file.read(), config) # If this embedding is already registered, just update the metadata path. exists = False for meta in config.embeddings: if meta.tensor_name == variable_name: meta.metadata_path = filename exists = True break if not exists: embedding = config.embeddings.add() embedding.tensor_name = variable_name embedding.metadata_path = filename with tf.io.gfile.GFile(config_path, "w") as config_file: config_file.write(text_format.MessageToString(config))
def tb_projector(X_test, y_test, log_dir): """ TB的映射器 :param X_test: 数据 :param y_test: 标签, 数值型 :param log_dir: 文件夹 :return: 写入日志 """ print "展示数据: %s" % str(X_test.shape) print "展示标签: %s" % str(y_test.shape) print "日志目录: %s" % str(log_dir) metadata = os.path.join(log_dir, 'metadata.tsv') images = tf.Variable(X_test) # 把标签写入metadata with open(metadata, 'w') as metadata_file: for row in y_test: metadata_file.write('%d\n' % row) with tf.Session() as sess: saver = tf.train.Saver([images]) # 把数据存储为矩阵 sess.run(images.initializer) # 图像初始化 saver.save(sess, os.path.join(log_dir, 'images.ckpt')) # 图像存储于images.ckpt config = projector.ProjectorConfig() # 配置 # One can add multiple embeddings. embedding = config.embeddings.add() # 嵌入向量添加 embedding.tensor_name = images.name # Tensor名称 # Link this tensor to its metadata file (e.g. labels). embedding.metadata_path = metadata # Metadata的路径 # Saves a config file that TensorBoard will read during startup. projector.visualize_embeddings(tf.summary.FileWriter(log_dir), config) # 可视化嵌入向量
def visualize_in_tensorboard(embedding_model, output_dirname=None, host="127.0.0.1"): tf.compat.v1.disable_eager_execution() if output_dirname is None: output_dirname = embedding_model.model_name.split('.')[0] meta_file = f"{output_dirname}_metadata.tsv" output_path = os.path.join(os.getcwd(), output_dirname) if not os.path.isdir(output_path): os.mkdir(output_path) with open(os.path.join(output_path, meta_file), "wb") as file_metadata: for word in embedding_model.wv.index2word: file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8("\n")) embedding = tf.Variable(embedding_model.wv.vectors, trainable = False, name = f"{output_dirname}_tensor") init_op = tf.compat.v1.global_variables_initializer() saver = tf.compat.v1.train.Saver() with tf.compat.v1.Session() as sess: sess.run(init_op) writer = tf.compat.v1.summary.FileWriter(output_path, sess.graph) # adding into projector config = projector.ProjectorConfig() embed = config.embeddings.add() embed.tensor_name = f"{output_dirname}_tensor" embed.metadata_path = meta_file # Specify the width and height of a single thumbnail. projector.visualize_embeddings(writer, config) saver.save(sess, os.path.join(output_path,f"{output_dirname}_metadata.ckpt")) print(f"Run `tensorboard --logdir={output_path} --host {host}` to run visualize result on tensorboard")
def visualize_embeddings(lexicon, embed_lookup): embeds = [] labels = [] for i, label in enumerate(lexicon): labels.append(label) embeds.append(embed_lookup[i]) if i > 5000: break if not os.path.exists(os.path.join('log', 'projector')): os.makedirs(os.path.join('log', 'projector')) embeddings = tf.Variable(np.array(embeds), name='embeddings') meta_path = os.path.join('log', 'projector', 'metadata.tsv') embeddings_path = os.path.join('log', 'projector', 'embeddings.ckpt') with open(meta_path, 'w') as f: for label in labels: f.write('%s\n' % label) with tf.Session() as sess: saver = tf.train.Saver([embeddings]) sess.run(embeddings.initializer) saver.save(sess, embeddings_path) writer = tf.summary.FileWriter(os.path.join('log', 'projector')) config = projector.ProjectorConfig() embed = config.embeddings.add() embed.tensor_name = embeddings.name embed.metadata_path = 'metadata.tsv' projector.visualize_embeddings(writer, config) print('embeddings visualised in tensorboard')
def register_embeddings(embeddings_name=EMBEDDINGS_TENSOR_NAME, meta_data_fname=META_DATA_FNAME, log_dir=LOG_DIR): config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = embeddings_name embedding.metadata_path = meta_data_fname projector.visualize_embeddings(log_dir, config)
def save_embeddings(images_features_labels, save_dir): ''' Function to save embeddings (with corresponding labels and images) to a specified directory. Point tensorboard to that directory with tensorboard --logdir=<save_dir> and your embeddings will be viewable. Arguments: images_features_labels : dict each key in the dict should be the desired name for that embedding, and each element should be a list of [images, embeddings, labels] where images are a numpy array of images between 0. and 1. of shape [N*W*H*D] or [N*H*W] if grayscale (or None if no images), embeddings is a numpy array of shape [N*D], and labels is a numpy array of something that can be converted to string of shape D (or None if no labels available) save_dir : str path to save tensorboard checkpoints ''' assert len(list(images_features_labels.keys())), 'Nothing in dictionary!' # Make directory if necessary if not os.path.exists(save_dir): os.makedirs(save_dir) # Reset graph and initialise file writer and session tf.reset_default_graph() writer = tf.summary.FileWriter(os.path.join(save_dir), graph=None) sess = tf.Session() config = projector.ProjectorConfig() # For each embedding name in the provided dictionary of embeddings for name in list(images_features_labels.keys()): [ims, fts, labs] = images_features_labels[name] # Save sprites and metadata if labs is not None: metadata_path = os.path.join(save_dir, name + '-metadata.tsv') save_metadata(labs, metadata_path) if ims is not None: sprites_path = os.path.join(save_dir, name + '.png') save_sprite_image(ims, path=sprites_path, invert=len(ims.shape) < 4) # Make a variable with the embeddings we want to visualise embedding_var = tf.Variable(fts, name=name, trainable=False) # Add this to our config with the image and metadata properties embedding = config.embeddings.add() embedding.tensor_name = embedding_var.name if labs is not None: embedding.metadata_path = name + '-metadata.tsv' if ims is not None: embedding.sprite.image_path = name + '.png' embedding.sprite.single_image_dim.extend(ims[0].shape) # Save the embeddings projector.visualize_embeddings(writer, config) saver = tf.train.Saver(max_to_keep=1) sess.run(tf.global_variables_initializer()) saver.save(sess, os.path.join(save_dir, 'ckpt'))
def export_projector_data(embeddings, meta_path, logs_path): embeddings_var = tf.Variable(embeddings, name='embeddings') checkpoint = tf.train.Checkpoint(embedding=embeddings_var) checkpoint.save(os.path.join(logs_path, 'embeddings.ckpt')) config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = 'embeddings' embedding.metadata_path = meta_path projector.visualize_embeddings(logs_path, config)
def _visualize(embedding, vocab_file, num_oov_buckets=1): text_inputter.save_embeddings_metadata( log_dir, embedding, vocab_file, num_oov_buckets=num_oov_buckets ) projector_config = projector.ProjectorConfig() projector_config_path = os.path.join(log_dir, "projector_config.pbtxt") self.assertTrue(os.path.exists(projector_config_path)) with open(projector_config_path) as projector_config_file: text_format.Merge(projector_config_file.read(), projector_config) return projector_config
def testVisualizeEmbeddings(self): # Create a dummy configuration. config = projector.ProjectorConfig() config.model_checkpoint_path = 'test' emb1 = config.embeddings.add() emb1.tensor_name = 'tensor1' emb1.metadata_path = 'metadata1' # Call the API method to save the configuration to a temporary dir. temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir) writer = tf.summary.FileWriter(temp_dir) projector.visualize_embeddings(writer, config) # Read the configurations from disk and make sure it matches the original. with tf.gfile.GFile(os.path.join(temp_dir, 'projector_config.pbtxt')) as f: config2 = projector.ProjectorConfig() text_format.Parse(f.read(), config2) self.assertEqual(config, config2)
def create_dummy_config(): return projector.ProjectorConfig( model_checkpoint_path="test", embeddings=[ projector.EmbeddingInfo( tensor_name="tensor1", metadata_path="metadata1", ), ], )
def test_visualize_embeddings_with_logdir(self): logdir = self.get_temp_dir() config = create_dummy_config() projector.visualize_embeddings(logdir, config) # Read the configurations from disk and make sure it matches the original. with tf.io.gfile.GFile(os.path.join(logdir, 'projector_config.pbtxt')) as f: config2 = projector.ProjectorConfig() text_format.Parse(f.read(), config2) self.assertEqual(config, config2)
def main(): print(pg_model) bg = bottle_neck_graph(pg_model, None) # for all documents compute doc id keys, sample = load_data(raw_data) query_sample = "Having issues with SDB start blt --project --sdb-go" query_sample = query_sample.split() sample.append(query_sample) total_documents = len(sample) # write words and ids to metadata for tensorboard with open(metadata_path, 'w') as f: for key in keys: f.write("doc_id_" + str(key) + '\n') f.write("doc_id_query" + '\n') # collect all document vectors embeds = list() for id, sam in enumerate(sample): doc_vec = get_document_vector(bg, sam) embeds.append(doc_vec) embeds = np.asarray(embeds) embeds = np.reshape(embeds, [total_documents, embedding_size]) print(embeds.shape) # save it to a json file # {doc_id: doc_vec, .. } # setup tensorbord for visualizing graph = tf.Graph() with tf.Session(graph=graph) as sess: input = tf.placeholder(tf.int32, shape=[None], name="input") with tf.name_scope('embeddings'): embeddings = tf.Variable(initial_value=embeds, name="doc_embeddings") #embed = tf.nn.embedding_lookup(embeddings, input, name="embed_look_up") writer = tf.summary.FileWriter(log_path, sess.graph) tf.global_variables_initializer().run() saver = tf.train.Saver() saver.save(sess, ckptdata_path) print("setting up tensorboard projector...") config = projector.ProjectorConfig() embedding_config = config.embeddings.add() embedding_config.tensor_name = embeddings.name embedding_config.metadata_path = metadata_path projector.visualize_embeddings(tf.summary.FileWriter(model_path), config)
def output(self): x_test_embeddings = self.embedding_model.predict(self.x_test) test_emb_tensor = tf.Variable(x_test_embeddings) checkpoint = tf.train.Checkpoint(embedding=test_emb_tensor) checkpoint.save(os.path.join(self.log_dir, "embedding.ckpt")) # Set up config config = projector.ProjectorConfig() embedding = config.embeddings.add() # The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE` embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE" embedding.metadata_path = self.metadata projector.visualize_embeddings(self.log_dir, config)
def register_embedding(self): config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = EMBEDDINGS embedding.metadata_path = METAFILE # this adds the sprite images if self.show_images: embedding.sprite.image_path = SPRITESFILE embedding.sprite.single_image_dim.extend( (self.image_size, self.image_size)) projector.visualize_embeddings(self.log_dir, config)
def _add_emb_vis(self, embedding_var): """Do setup so that we can view word embedding visualization in Tensorboard, as described here: https://www.tensorflow.org/get_started/embedding_viz Make the vocab metadata file, then make the projector config file pointing to it.""" train_dir = os.path.join(FLAGS.log_root, "train") vocab_metadata_path = os.path.join(train_dir, "vocab_metadata.tsv") self._vocab.write_metadata(vocab_metadata_path) # write metadata file summary_writer = tf.summary.FileWriter(train_dir) config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = embedding_var.name embedding.metadata_path = vocab_metadata_path projector.visualize_embeddings(summary_writer, config)
def visualize_data(data: CSVData, ids: List[str], name: str, log_directory: str = LOG_DIRECTORY): """Visualize the given data by creating files for the Tensorboard projector. Creates `.tsv` and `.pbtxt` files in the log directory. To use it in Tensorboard, run "tensorboard --logdir <log directory path>" and choose PROJECTOR from the dropdown list." Parameters ---------- data (CSVData): np array containing the data to be visualized ids (List[str]): ID for each vector contained in the data array (i.e. `len(ids) == `data.shape[0]`) name (str): Name for the dataset contained in the data array () log_directory (str, optional): Directory name to store the created Tensorboard logfiles in. Defaults to LOG_DIRECTORY """ Path(log_directory).mkdir(parents=True, exist_ok=True) if " " in name: print("Visualization error: Please specify a name without whitespaces") return data_file_name = f"{name}.tsv" metadata_file_name = f"{name}_metadata.tsv" np.savetxt(path.join(log_directory, data_file_name), data, delimiter="\t", fmt="%f") with open(path.join(log_directory, metadata_file_name), "w") as metadata_writer: for data_id in ids: metadata_writer.write(f"{data_id}\n") config = projector.ProjectorConfig() embedding = cast(Any, config).embeddings.add() embedding.tensor_path = data_file_name embedding.metadata_path = metadata_file_name embedding.tensor_name = name projector.visualize_embeddings(log_directory, config) print( f'Run "tensorboard --logdir {log_directory}" and choose PROJECTOR', "to see the data visualization\n", )
def embedding_projector_files(source_tokenizer, target_tokenizer, model, sentence_pair, log_dir, agg='mean'): #words = [] source_sentence_vector = [] target_sentence_vector = [] souce_sentences = [] target_sentences = [] # Remove start and end token embedding target_embedding_layer = model.layers[1].get_weights()[0][1:-1, :] source_embedding_layer = model.layers[0].get_weights()[0][1:-1, :] with open(os.path.join(log_dir, 'metadata_source.tsv'), "w", encoding='utf-8') as out_meta_source: with open(os.path.join(log_dir, 'metadata_target.tsv'), "w", encoding='utf-8') as out_meta_target: out_meta_source.write('source' + "\t" + 'target' + "\n") out_meta_target.write('source' + "\t" + 'target' + "\n") # Remove tabs, newlines and spaces from the paragraph for source, target in sentence_pair: source_embedding_vector = tokenize_and_aggregate( source, source_tokenizer, agg, source_embedding_layer) target_embedding_vector = tokenize_and_aggregate( target, target_tokenizer, agg, target_embedding_layer) #test the above out_meta_source.write(source + "\t" + target + "\n") out_meta_target.write(source + "\t" + target + "\n") souce_sentences.append(source) target_sentences.append(target) source_sentence_vector.append(source_embedding_vector) target_sentence_vector.append(target_embedding_vector) source_sentence_vector = display_embedding_shape( source_sentence_vector, souce_sentences, 'source') target_sentence_vector = display_embedding_shape( target_sentence_vector, target_sentences, 'target') checkpoint = tf.train.Checkpoint( source_embedding=tf.Variable(source_sentence_vector), target_embedding=tf.Variable(target_sentence_vector)) config = projector.ProjectorConfig() config = save_checkpoint_create_config(checkpoint, config, 'source', log_dir) config = save_checkpoint_create_config(checkpoint, config, 'target', log_dir) projector.visualize_embeddings(log_dir, config) return (souce_sentences, target_sentences, source_sentence_vector, target_sentence_vector)
def projector_add(embedding, writer, SPRITES=None, LABELS=None, IMG_DIM=None): # config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig() config = projector.ProjectorConfig() embedding_config = config.embeddings.add() embedding_config.tensor_name = embedding.name if SPRITES: embedding_config.sprite.image_path = SPRITES if LABELS: embedding_config.metadata_path = LABELS if IMG_DIM: # Specify the width and height of a single thumbnail. embedding_config.sprite.single_image_dim.extend(IMG_DIM) # like [28, 28] #tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config) projector.visualize_embeddings(writer, config)
def embedding_tensorboard(model, lookup_dict, embedd): # Set up a logs directory, so Tensorboard knows where to look for files log_dir = 'logs/%s' % embedd if not os.path.exists(log_dir): os.mkdir(log_dir) if embedd == "type": column_string = "combined_type" else: column_string = "Gen" layer_type = model.get_layer('embedding-%s' % embedd) output_embeddings_type = layer_type.get_weights() output_embeddings_type_df = pd.DataFrame(output_embeddings_type[0]) output_embeddings_type_df = output_embeddings_type_df.reset_index() output_embeddings_type_df.columns = [ column_string, 'embedding_1', 'embedding_2', 'embedding_3' ] m = output_embeddings_type_df.iloc[:, 1:].values labels = output_embeddings_type_df.iloc[:, 0:1].values if embedd == "type": def get_key(val): for key, value in lookup_dict.items(): if val == value: return key labels = [get_key(label) for label in labels] else: labels += 1 labels = [y for x in labels for y in x] # Save Labels separately on a line-by-line manner. with open(os.path.join(log_dir, 'metadata.tsv'), "w") as f: for label in labels: f.write("{}\n".format(label)) weights = tf.Variable( model.get_layer("embedding-%s" % embedd).get_weights()[0][0:]) # Create a checkpoint from embedding, the filename and key are # name of the tensor. checkpoint = tf.train.Checkpoint(embedding=weights) checkpoint.save(os.path.join(log_dir, "embedding.ckpt")) # Set up config config = projector.ProjectorConfig() embedding = config.embeddings.add() # The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE` embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE" embedding.metadata_path = 'metadata.tsv' projector.visualize_embeddings(log_dir, config)
def _create_projector_config( path_metadata: str, path_sprite: Optional[str] ) -> projector.ProjectorConfig: """Creates a projector-config as needed to show the embedding in Tensorboard.""" config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE" embedding.metadata_path = path_metadata if path_sprite is not None: embedding.sprite.image_path = path_sprite embedding.sprite.single_image_dim.extend(IMAGE_SIZE_IN_SPRITE) return config