def build_model(sess, graph, loss_model): """ Builds a tensor graph model """ model = None with graph.as_default(): # Ops and variables pinned to the CPU because of missing GPU implementation with tf.device('/cpu:0'): # Input data. train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) global_step = tf.Variable(0, trainable=False) # Look up embeddings for inputs. embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) embed = tf.nn.embedding_lookup(embeddings, train_inputs) sm_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) # sm_weights = tf.Print(sm_weights, [sm_weights], 'smweights: ', summarize=32) # Get context embeddings from lables true_w = tf.nn.embedding_lookup(sm_weights, train_labels) true_w = tf.reshape(true_w, [-1, embedding_size]) # Construct the variables for the NCE loss nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) if loss_model == 'cross_entropy': loss = tf.reduce_mean(tf_func.cross_entropy_loss(embed, true_w)) else: #sample negative examples with unigram probability sample = np.random.choice(vocabulary_size, num_sampled, p=unigram_prob, replace=False) loss = tf.reduce_mean( tf_func.nce_loss(embed, nce_weights, nce_biases, train_labels, sample, unigram_prob)) # tf.summary.scalar('loss', loss) # Construct the SGD optimizer using a learning rate of 1.0. optimizer = tf.train.GradientDescentOptimizer(1.0).minimize( loss, global_step=global_step) # Compute the cosine similarity between minibatch examples and all embeddings. norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) saver = tf.train.Saver(tf.global_variables()) # Save summary # summary = tf.summary.merge_all() # summary_writer = tf.summary.FileWriter(summary_path + '/summary', sess.graph) summary = None summary_writer = None tf.global_variables_initializer().run() print("Initialized") model = Word2Vec(train_inputs, train_labels, loss, optimizer, global_step, embeddings, normalized_embeddings, valid_embeddings, similarity, saver, summary, summary_writer) return model
def build_model(sess, graph, loss_model): """ Builds a tensor graph model """ model = None with graph.as_default(): # Ops and variables pinned to the CPU because of missing GPU implementation with tf.device('/cpu:0'): # Input data. train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) global_step = tf.Variable(0, trainable=False) # Look up embeddings for inputs. embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) embed = tf.nn.embedding_lookup(embeddings, train_inputs) sm_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) # Get context embeddings from lables true_w = tf.nn.embedding_lookup(sm_weights, train_labels) true_w = tf.reshape(true_w, [-1, embedding_size]) # Construct the variables for the NCE loss nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) if loss_model == 'cross_entropy': loss = tf.reduce_mean(tf_func.cross_entropy_loss(embed, true_w)) else: # sample negative examples with unigram probability sample = np.random.choice(vocabulary_size, num_sampled, p=unigram_prob, replace=False) # negative samples for max accuracy nce loss mode # [ 1717,3137, 6,448,56, 614,2857, 115,2799,18, 3,44,36,13,20, 1,1614,23, 932, 299,1585,3663, 422,2153, 2,5224,10,22,3320,24,1463,79,31,222,15986,3178,\ # 20188,569, 102, 5,6145,27,57, 9,2251,5545,1449, 758 # , 8,1772,47,237, 0,32, 13934, 224,29,6628,15,16 # ,4105,339,3310,597] loss = tf.reduce_mean( tf_func.nce_loss(embed, nce_weights, nce_biases, train_labels, sample, unigram_prob)) # tf.summary.scalar('loss', loss) # Construct the SGD optimizer using a learning rate of 1.0. optimizer = tf.train.GradientDescentOptimizer(1.0).minimize( loss, global_step=global_step) # Compute the cosine similarity between minibatch examples and all embeddings. norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) saver = tf.train.Saver(tf.global_variables()) # Save summary # summary = tf.summary.merge_all() # summary_writer = tf.summary.FileWriter(summary_path + '/summary', sess.graph) summary = None summary_writer = None tf.global_variables_initializer().run() print("Initialized") model = Word2Vec(train_inputs, train_labels, loss, optimizer, global_step, embeddings, normalized_embeddings, valid_embeddings, similarity, saver, summary, summary_writer) return model