def gen_y_test(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Dataset functions envityvectorpath = args.ev relationvectorpath = args.rv entityvector = loadvector(envityvectorpath) relationvector = loadvector(relationvectorpath) vector = dict(entityvector, **relationvector) print('Loading vectors.') input_vocab = Vocabulary(args.invocab, vector, padding=args.padding) output_vocab_entity = Vocabulary(args.evocab, vector, padding=args.padding) output_vocab_relation = Vocabulary(args.revocab, vector, padding=args.padding) print('Loading datasets.') #save y_test test2 = Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation) test2.load() target_list1 = test2.targets1 #target_list2 = test2.targets2 path = './results/y_test' with open(path, 'w') as f: for i in range(len(target_list1)): #f.write(str(i) + '\t'+target_list1[i]+'\t'+target_list2[i]+'\n') f.write(str(i) + '\t' + target_list1[i] + '\n') print('ytest in file')
def __init__(self, padding=None): """ Visualizes attention maps :param padding: the padding to use for the sequences. """ self.padding = padding self.input_vocab = Vocabulary('./data/human_vocab.json', padding=padding) self.output_vocab = Vocabulary('./data/machine_vocab.json', padding=padding)
def __init__(self, opts): self.opts = opts self.src_length = opts.sequence_length self.tgt_length = 11 # YYYY-MM-DD<eot> self.host_embeddings = opts.host_embeddings self.input_vocab = Vocabulary("./data/human_vocab.json", padding=self.src_length) self.output_vocab = Vocabulary("./data/machine_vocab.json", padding=self.tgt_length) self.src_vocab_size = self.input_vocab.size() self.tgt_vocab_size = self.output_vocab.size()
def main(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu # Dataset functions input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding) output_vocab = Vocabulary('./data/machine_vocab.json', padding=args.padding) print('Loading datasets.') training = Data(args.training_data, input_vocab, output_vocab) validation = Data(args.validation_data, input_vocab, output_vocab) training.load() validation.load() training.transform() validation.transform() print('Datasets Loaded.') print('Compiling Model.') model = simpleNMT(pad_length=args.padding, n_chars=input_vocab.size(), n_labels=output_vocab.size(), embedding_learnable=False, encoder_units=256, decoder_units=256, trainable=True, return_probabilities=False) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', all_acc]) print('Model Compiled.') print('Training. Ctrl+C to end early.') try: model.fit_generator(generator=training.generator(args.batch_size), steps_per_epoch=100, validation_data=validation.generator(args.batch_size), validation_steps=100, callbacks=[cp], workers=1, verbose=1, epochs=args.epochs) except KeyboardInterrupt as e: print('Model training stopped early.') print('Model training complete.') run_examples(model, input_vocab, output_vocab)
def __init__(self, padding=None, input_vocab=SAMPLE_HUMAN_VOCAB, output_vocab=SAMPLE_MACHINE_VOCAB): """ Visualizes attention maps :param padding: the padding to use for the sequences. :param input_vocab: the location of the input human vocabulary file :param output_vocab: the location of the output machine vocabulary file """ self.padding = padding self.input_vocab = Vocabulary(input_vocab, padding=padding) self.output_vocab = Vocabulary(output_vocab, padding=padding)
def main(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Dataset functions envityvectorpath =args.ev relationvectorpath =args.rv entityvector = loadvector(envityvectorpath) relationvector = loadvector(relationvectorpath) vector = dict(entityvector, **relationvector) print('Loading vectors.') input_vocab = Vocabulary(args.invocab, vector, padding=args.padding) output_vocab_entity = Vocabulary(args.evocab, vector, padding=args.padding) output_vocab_relation = Vocabulary(args.revocab, vector, padding=args.padding) print('Loading datasets.') training = Data(args.training_data, input_vocab, output_vocab_entity,output_vocab_relation) validation = Data(args.validation_data, input_vocab, output_vocab_entity,output_vocab_relation) test=Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation) training.load() validation.load() test.load() training.transform(vector) validation.transform(vector) test.transform(vector) print('Datasets Loaded.') print('Compiling Model.') model = simpleNMT2(pad_length=args.padding, n_chars=100, entity_labels=output_vocab_entity.size(), relation_labels=output_vocab_relation.size(), dim=100, embedding_learnable=False, encoder_units=args.units, decoder_units=args.units, trainable=True, return_probabilities=False, ) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print('Model Compiled.') print('Training. Ctrl+C to end early.') try: hist=model.fit([training.inputs1,training.inputs2,training.inputs3,training.inputs4,training.inputs5],[training.targets1],epochs=args.epochs,batch_size=args.batch_size,validation_split=0.05) except KeyboardInterrupt as e: print('Model training stopped early.') model.save('./savemodel/model1.h5') print('Model training complete.')
def testmodel(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Dataset functions envityvectorpath = args.ev relationvectorpath = args.rv entityvector = loadvector(envityvectorpath) relationvector = loadvector(relationvectorpath) vector = dict(entityvector, **relationvector) print('Loading vectors.') input_vocab = Vocabulary(args.invocab, vector,padding=args.padding) output_vocab_entity = Vocabulary(args.evocab, vector,padding=args.padding) output_vocab_relation = Vocabulary(args.revocab, vector, padding=args.padding) print('Loading datasets.') test=Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation) test.load() test.transform(vector) print('Test Datasets Loaded.') model=load_model('./savemodel/model1.h5',custom_objects={'AttentionLayer': AttentionLayer}) print('Model Loaded. Start test.') #prediction = model.predict([test.inputs1, test.inputs2,test.inputs3,test.inputs4, test.inputs5]) prediction = model.predict([test.inputs1, test.inputs2, test.inputs3]) #/result/y_pre p_prediction1 = list(prediction.flatten()) #p_prediction2 = list(prediction[1].flatten()) #num_entity = output_vocab_entity.size() num_relation = output_vocab_relation.size() # for m in range(int(len(p_prediction)/num)): # prediction_list.append('') prediction_list1 = [[0 for col in range(num_relation)] for row in range(int(len(p_prediction1)/num_relation))] #prediction_list2 = [[0 for col in range(num_entity)] for row in range(int(len(p_prediction2) / num_entity))] for i in range(len(p_prediction1)): j = int(i / num_relation) k = i % num_relation prediction_list1[j][k]=[k,p_prediction1[i]] # for i in range(len(p_prediction2)): # j = int(i / num_entity) # k = i % num_entity # prediction_list2[j][k]=[k,p_prediction2[i]] pretarget1 = [] pretarget2 = [] for i in range(len(prediction_list1)): templist1 = prediction_list1[i] templist1.sort(key=takeSecond, reverse=True) templist11 = output_vocab_relation.int_to_string(templist1) pretarget1.append(templist11[:5]) pretarget2.append(templist1) listinfile(pretarget1, './results/y_pre1') listinfile(pretarget2, './results/y_pre2') print('ypre1 in file')
def __init__(self, padding=None, input_vocab=SAMPLE_HUMAN_VOCAB, output_vocab=SAMPLE_MACHINE_VOCAB): """ Visualizes attention maps :param padding: the padding to use for the sequences. :param input_vocab: the location of the input human vocabulary file :param output_vocab: the location of the output machine vocabulary file """ self.padding = padding self.input_vocab = Vocabulary( input_vocab, padding=padding) self.output_vocab = Vocabulary( output_vocab, padding=padding)
def main(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu # Dataset functions input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding) output_vocab = Vocabulary('./data/machine_vocab.json', padding=args.padding) print('Loading datasets.') training = Data(args.training_data, input_vocab, output_vocab) validation = Data(args.validation_data, input_vocab, output_vocab) training.load() validation.load() training.transform() validation.transform() print('Datasets Loaded.') print('Compiling Model.') model = simpleNMT(pad_length=args.padding, n_chars=input_vocab.size(), n_labels=output_vocab.size(), embedding_learnable=False, encoder_units=256, decoder_units=256, trainable=True, return_probabilities=False) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', all_acc]) print('Model Compiled.') print('Training. Ctrl+C to end early.') try: kwargs = dict(generator=training.generator(args.batch_size), steps_per_epoch=100, validation_data=validation.generator(args.batch_size), validation_steps=100, callbacks=[cp], workers=1, verbose=1, epochs=args.epochs) model.fit_generator(**kwargs) except KeyboardInterrupt as e: print('Model training stopped early.') print('Model training complete.') run_examples(model, input_vocab, output_vocab)
class Visualizer(object): def __init__(self, padding=None, input_vocab=SAMPLE_HUMAN_VOCAB, output_vocab=SAMPLE_MACHINE_VOCAB): """ Visualizes attention maps :param padding: the padding to use for the sequences. :param input_vocab: the location of the input human vocabulary file :param output_vocab: the location of the output machine vocabulary file """ self.padding = padding self.input_vocab = Vocabulary( input_vocab, padding=padding) self.output_vocab = Vocabulary( output_vocab, padding=padding) def set_models(self, pred_model, proba_model): """ Sets the models to use :param pred_model: the prediction model :param proba_model: the model that outputs the activation maps """ self.pred_model = pred_model self.proba_model = proba_model def attention_map(self, text): """ Text to visualze attention map for. """ # encode the string d = self.input_vocab.string_to_int(text) # get the output sequence predicted_text = run_example( self.pred_model, self.input_vocab, self.output_vocab, text) text_ = list(text) + ['<eot>'] + ['<unk>'] * self.input_vocab.padding # get the lengths of the string input_length = len(text)+1 output_length = predicted_text.index('<eot>')+1 # get the activation map activation_map = np.squeeze(self.proba_model.predict(np.array([d])))[ 0:output_length, 0:input_length] # import seaborn as sns plt.clf() f = plt.figure(figsize=(8, 8.5)) ax = f.add_subplot(1, 1, 1) # add image i = ax.imshow(activation_map, interpolation='nearest', cmap='gray') # add colorbar cbaxes = f.add_axes([0.2, 0, 0.6, 0.03]) cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal') cbar.ax.set_xlabel('Probability', labelpad=2) # add labels ax.set_yticks(range(output_length)) ax.set_yticklabels(predicted_text[:output_length]) ax.set_xticks(range(input_length)) ax.set_xticklabels(text_[:input_length], rotation=45) ax.set_xlabel('Input Sequence') ax.set_ylabel('Output Sequence') # add grid and legend ax.grid() # ax.legend(loc='best') f.savefig(os.path.join(HERE, 'attention_maps', text.replace('/', '')+'.pdf'), bbox_inches='tight') f.show()
class Nmt(object): def __init__(self, opts): self.opts = opts self.src_length = opts.sequence_length self.tgt_length = 11 # YYYY-MM-DD<eot> self.host_embeddings = opts.host_embeddings self.input_vocab = Vocabulary("./data/human_vocab.json", padding=self.src_length) self.output_vocab = Vocabulary("./data/machine_vocab.json", padding=self.tgt_length) self.src_vocab_size = self.input_vocab.size() self.tgt_vocab_size = self.output_vocab.size() def _build_dataset(self): self.start_id = start_id(self.output_vocab) self.end_id = end_id(self.output_vocab) data_file = ("./data/validation.csv" if self.opts.infer else "./data/training.csv") data = Data(data_file, self.input_vocab, self.output_vocab) data.load() transform(data) vocab = (self.input_vocab, self.output_vocab) self.generator = DataGenerator(data, vocab, self.opts, self.start_id, self.end_id) items = next(self.generator) output_types = {i: tf.dtypes.as_dtype(items[i].dtype) for i in items} output_shapes = {i: tf.TensorShape(items[i].shape) for i in items} total_bytes = 0 for i in items: total_bytes += items[i].nbytes dataset = tf.data.Dataset.from_generator(self.generator, output_types=output_types, output_shapes=output_shapes) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "InfeedQueue", replication_factor=1) data_init = infeed_queue.initializer return dataset, infeed_queue, data_init, vocab def infer(self): with tf.device("cpu"): dataset, infeed_queue, data_init, vocab = self._build_dataset() outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue( feed_name="outfeed") if self.host_embeddings: src_embedding = Nmt._build_embedding( self.src_vocab_size, self.opts.embedding_size, self.opts.host_embeddings, name="source_embedding", ) tgt_embedding = Nmt._build_embedding( self.tgt_vocab_size, self.opts.embedding_size, self.opts.host_embeddings, name="tgt_embedding", ) def build_common(src_embedding, tgt_embedding, source): input_, encoder_outputs, encoder_state = self._build_encoder( src_embedding, source) samples, logits = self._build_decoder(encoder_outputs, encoder_state, tgt_embedding, None, train=False) outfeed = outfeed_queue.enqueue({"samples": samples}) return outfeed def build_infer(source): src_embedding = Nmt._build_embedding( self.src_vocab_size, self.opts.embedding_size, self.opts.host_embeddings, name="source_embedding", ) tgt_embedding = Nmt._build_embedding( self.tgt_vocab_size, self.opts.embedding_size, self.opts.host_embeddings, name="tgt_embedding", ) return build_common(src_embedding, tgt_embedding, source) def build_infer_host_embeddings(source): nonlocal src_embedding, tgt_embedding return build_common(src_embedding, tgt_embedding, source) with ipu_scope("/device:IPU:0"): build = build_infer_host_embeddings if self.host_embeddings else build_infer batch = ipu_compiler.compile(lambda: loops.repeat( 1, build, infeed_queue=infeed_queue, inputs=[])) # Create a restoring object saver = tf.train.Saver() ipu_options = util.get_config(report_n=0) utils.configure_ipu_system(ipu_options) session = tf.Session() checkpoint = CHECKPOINT_FILE + ("host_ckpt" if self.opts.host_embeddings else "ckpt") saver.restore(session, checkpoint) session.run(data_init) if self.host_embeddings: batch = [ batch, src_embedding(1, 1, False), tgt_embedding(1, 1, False) ] result_queue = outfeed_queue.dequeue() # Run a dummy value to force the graph compilation session.run(batch) result = session.run(result_queue) predictions = result["samples"] print_data(self.generator.query, vocab[0], predictions, vocab[1]) while True: session.run(batch) result = session.run(result_queue) predictions = result["samples"] print_data(self.generator.query, vocab[0], predictions, vocab[1]) if not self.opts.interact: break def train(self): with tf.device("cpu"): dataset, infeed_queue, data_init, vocab = self._build_dataset() outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue( feed_name="outfeed") if self.host_embeddings: src_embedding = Nmt._build_embedding( self.src_vocab_size, self.opts.embedding_size, self.opts.host_embeddings, name="source_embedding", ) tgt_embedding = Nmt._build_embedding( self.tgt_vocab_size, self.opts.embedding_size, self.opts.host_embeddings, name="tgt_embedding", ) def build_common(src_embedding, tgt_embedding, source, target, label, mask): nonlocal outfeed_queue input_, encoder_outputs, encoder_state = self._build_encoder( src_embedding, source) samples, logits = self._build_decoder(encoder_outputs, encoder_state, tgt_embedding, target, train=True) loss = self._build_optimiser(logits, label, mask) outfeed = outfeed_queue.enqueue({"loss": loss, "logits": logits}) return outfeed def build_train(source, target, label, mask): src_embedding = Nmt._build_embedding( self.src_vocab_size, self.opts.embedding_size, self.opts.host_embeddings, name="source_embedding", ) tgt_embedding = Nmt._build_embedding( self.tgt_vocab_size, self.opts.embedding_size, self.opts.host_embeddings, name="tgt_embedding", ) return build_common(src_embedding, tgt_embedding, source, target, label, mask) def build_train_host_embeddings(source, target, label, mask): nonlocal src_embedding, tgt_embedding return build_common(src_embedding, tgt_embedding, source, target, label, mask) with ipu_scope("/device:IPU:0"): build = build_train_host_embeddings if self.host_embeddings else build_train batch = ipu_compiler.compile(lambda: loops.repeat( self.opts.batches_per_step, build, infeed_queue=infeed_queue, inputs=[], )) # Create a restoring object saver = tf.train.Saver() if self.opts.save_graph: # Dump the graph to a logdir writer = tf.summary.FileWriter( os.path.join("./logs", "NMT", time.strftime("%Y%m%d_%H%M%S_%Z"))) writer.add_graph(tf.get_default_graph()) ipu_options = util.get_config(report_n=0) utils.configure_ipu_system(ipu_options) session = tf.Session() checkpoint = CHECKPOINT_FILE + ("host_ckpt" if self.opts.host_embeddings else "ckpt") if self.opts.ckpt: saver.restore(session, checkpoint) else: utils.move_variable_initialization_to_cpu() session.run(tf.global_variables_initializer()) session.run(data_init) print("Init done.") if self.host_embeddings: batch = [ batch, src_embedding(self.opts.batches_per_step, 1), tgt_embedding(self.opts.batches_per_step, 1), ] result_queue = outfeed_queue.dequeue() session.run(batch) # Warmup best_loss = float("Inf") for e in range(self.opts.iterations): start = time.time() session.run(batch) result = session.run(result_queue) l = result["loss"] avg_loss = np.mean(l) duration = (time.time() - start) / self.opts.batches_per_step print( "Step: {:>5}. Average Loss {:.3}. Items/sec {:.4}. Tokens/sec {}" .format( (e + 1), avg_loss, self.opts.batch_size / duration, self.opts.batch_size * (self.src_length + self.tgt_length) / duration, )) if avg_loss < best_loss: best_loss = avg_loss saver.save(session, checkpoint) @staticmethod def _build_embedding(vocab_size, embedding_size, host_embeddings, name="embedding"): if host_embeddings: embedding = embedding_ops.create_host_embedding( name, shape=[vocab_size, embedding_size], dtype=DTYPE, optimizer_spec=embedding_ops.HostEmbeddingOptimizerSpec(0.03), initializer=tf.initializers.random_uniform(maxval=1.0, dtype=DTYPE), ) else: with tf.variable_scope("embedding", dtype=DTYPE, use_resource=True) as scope: # Random embedding embedding = tf.get_variable( name, [vocab_size, embedding_size], scope.dtype, initializer=tf.initializers.random_uniform( maxval=1.0, dtype=scope.dtype), trainable=True, ) return embedding @staticmethod def _build_cell(num_units, num_layers): if num_layers is 1: return tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=forget_bias, state_is_tuple=False) cell_list = [] for i in range(num_layers): cell_list.append( tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=forget_bias, state_is_tuple=False)) return tf.contrib.rnn.MultiRNNCell(cell_list) def _build_encoder(self, embedding, source): with tf.variable_scope("input", dtype=DTYPE, use_resource=True): if self.host_embeddings: encoder_emb_inp = embedding.lookup(source) else: encoder_emb_inp = tf.nn.embedding_lookup(embedding, source) with tf.variable_scope("encoder", dtype=DTYPE, use_resource=True) as scope: # use resource dtype = scope.dtype cell = Nmt._build_cell(self.opts.num_units, self.opts.num_layers) if self.opts.bi: outputs, states = tf.nn.bidirectional_dynamic_rnn( cell, Nmt._build_cell(self.opts.num_units, self.opts.num_layers), encoder_emb_inp, dtype=dtype, time_major=time_major, swap_memory=False, ) encoder_outputs = tf.add_n(outputs) encoder_state = states[0] + states[1] else: encoder_outputs, encoder_state = tf.nn.dynamic_rnn( cell, encoder_emb_inp, dtype=dtype, time_major=time_major, swap_memory=False, ) return source, encoder_outputs, encoder_state def _build_decoder(self, encoder_outputs, encoder_state, embedding, target=None, train=False): with tf.variable_scope("decoder", dtype=DTYPE, use_resource=True) as decoder_scope: dtype = decoder_scope.dtype tgt_length = self.src_length * 2 decoder_num_units = self.opts.num_units atten_num_units = self.opts.num_units # RNN Cell cell = Nmt._build_cell(decoder_num_units, self.opts.num_layers) initial_state = encoder_state # Attention wrapper if self.opts.attention: cell = self._build_attention(encoder_outputs, cell) initial_state = tf.contrib.seq2seq.AttentionWrapperState( cell_state=encoder_state, attention=tf.zeros([self.opts.batch_size, atten_num_units], dtype), time=tf.constant(0, tf.int32), alignments=tf.zeros( [self.opts.batch_size, self.src_length], dtype), alignment_history=(), attention_state=tf.zeros( [self.opts.batch_size, self.src_length], dtype), ) # Projection Layer projection_layer = tf.layers.Dense(units=self.tgt_vocab_size, use_bias=False, name="projection") if train: tgt_length = self.tgt_length if self.host_embeddings: decoder_emb_inp = embedding.lookup(target) else: decoder_emb_inp = tf.nn.embedding_lookup(embedding, target) helper = TrainingHelperNoCond( decoder_emb_inp, np.full([self.opts.batch_size], tgt_length, dtype=np.int32), time_major=time_major, ) else: # Inference tgt_sos_id = self.start_id tgt_eos_id = self.end_id start_tokens = np.full([self.opts.batch_size], tgt_sos_id, dtype=np.int32) end_token = tgt_eos_id if self.host_embeddings: helper = GreedyEmbeddingHelperNoCond( lambda i: embedding.lookup(i), start_tokens, end_token) else: helper = GreedyEmbeddingHelperNoCond( embedding, start_tokens, end_token) decoder = tf.contrib.seq2seq.BasicDecoder( cell, helper, initial_state=initial_state, output_layer=projection_layer if not train else None, # applied per timestep ) # Dynamic decoding outputs, final_context_state, _ = dynamic_decode( # Contains the XLA check decoder, maximum_iterations= tgt_length, # Required for static TensorArrays output_time_major=time_major, swap_memory=False, scope=decoder_scope, ) if train: # Specify dynamic shapes to avoid Assert logits = outputs.rnn_output logits.set_shape( [tgt_length, self.opts.batch_size, atten_num_units]) logits = projection_layer(logits) return outputs.sample_id, logits else: samples = outputs.sample_id samples.set_shape([tgt_length, self.opts.batch_size]) return samples, outputs.rnn_output def _build_attention(self, encoder_outputs, decoder_cell): with tf.variable_scope("attention", dtype=DTYPE, use_resource=True) as scope: # Attention is batch major inputs = tf.transpose(encoder_outputs, [1, 0, 2]) if self.opts.attention == "luong": attention_mechanism = tf.contrib.seq2seq.LuongAttention( self.opts.num_units, inputs, dtype=scope.dtype, ) else: attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( self.opts.num_units, inputs, dtype=scope.dtype, ) return AttentionWrapperNoAssert(decoder_cell, attention_mechanism) def _build_optimiser(self, logits, labels, mask): with tf.variable_scope("loss", use_resource=True): # Logits is dynamic so an Assert is added to check shapes crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits) train_loss = tf.reduce_sum(crossent * mask) / self.opts.batch_size # Calculate and clip gradients params = tf.trainable_variables() gradients = tf.gradients(train_loss, params) clipped_gradients = [ grad if grad is None else tf.clip_by_norm(grad, max_gradient_norm) for grad in gradients ] optimizer = tf.train.GradientDescentOptimizer(learning_rate) update_step = optimizer.apply_gradients(zip(clipped_gradients, params)) with tf.control_dependencies([update_step]): mean_loss = tf.reduce_mean(train_loss, name="train_loss") return mean_loss
class Visualizer(object): def __init__(self, padding=None, input_vocab=SAMPLE_HUMAN_VOCAB, output_vocab=SAMPLE_MACHINE_VOCAB): """ Visualizes attention maps :param padding: the padding to use for the sequences. :param input_vocab: the location of the input human vocabulary file :param output_vocab: the location of the output machine vocabulary file """ self.padding = padding self.input_vocab = Vocabulary( input_vocab, padding=padding) self.output_vocab = Vocabulary( output_vocab, padding=padding) def set_models(self, pred_model, proba_model): """ Sets the models to use :param pred_model: the prediction model :param proba_model: the model that outputs the activation maps """ self.pred_model = pred_model self.proba_model = proba_model def attention_map(self, text): """ Text to visualze attention map for. """ # encode the string d = self.input_vocab.string_to_int(text) print('d: ', d) # get the output sequence predicted_text = run_example(self.pred_model, self.input_vocab, self.output_vocab, text) print('predicted_text: ', predicted_text) text_ = list(text) + ['<eot>'] + ['<unk>'] * self.input_vocab.padding # get the lengths of the string input_length = len(text)+1 output_length = predicted_text.index('<eot>')+1 # get the activation map activation_map = np.squeeze(self.proba_model.predict(np.array([d])))[0:output_length, 0:input_length] print('activation_map: ', activation_map) # [[1.04707105e-05 1.22802967e-05 8.08871482e-06 2.06340337e-05 # 9.13377789e-06 8.17141245e-06 2.89358250e-05 1.30348863e-05 # 3.70874773e-06 1.70587246e-05 7.16923250e-06 4.97975234e-05 # 4.53671564e-05 2.57728461e-05 2.45305255e-05 3.59793594e-05 # 1.75800902e-04 3.21106811e-04 2.58878747e-04 9.57598037e-04] # [2.88392557e-03 2.04139692e-03 8.94600758e-04 1.82232610e-03 # ... # import seaborn as sns plt.clf() f = plt.figure(figsize=(8, 8.5)) ax = f.add_subplot(1, 1, 1) # add image i = ax.imshow(activation_map, interpolation='nearest', cmap='gray') #weight값을 회색으로 표시... # add colorbar cbaxes = f.add_axes([0.2, 0, 0.6, 0.03]) cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal') cbar.ax.set_xlabel('Probability', labelpad=2) # add labels ax.set_yticks(range(output_length)) ax.set_yticklabels(predicted_text[:output_length]) ax.set_xticks(range(input_length)) ax.set_xticklabels(text_[:input_length], rotation=45) ax.set_xlabel('Input Sequence') ax.set_ylabel('Output Sequence') # add grid and legend ax.grid() # ax.legend(loc='best') f.savefig(os.path.join(HERE, 'attention_maps', text.replace('/', '')+'.pdf'), bbox_inches='tight') f.show()