def load_validation(self): data_reader = utils.DataReader(data_filename="input_seqs_validation", batch_size=16) inputs_seqs_batch, outputs_batch = data_reader.read(False, 1) init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) sess = tf.Session() sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) self.validation_inputs = [] self.validation_targets = [] try: while not coord.should_stop(): input_data, targets = sess.run([inputs_seqs_batch, outputs_batch]) self.validation_inputs.append(input_data) self.validation_targets.append(targets) except tf.errors.OutOfRangeError: pass finally: coord.request_stop() coord.join(threads) sess.close() self.validation_inputs = np.array(self.validation_inputs).reshape([-1, self.config.input_length]) self.validation_targets = np.array(self.validation_targets).reshape([-1, 1])
def vxn_tests(self): reader = utils.DataReader(data_filename="input_seqs_validation", batch_size=16) input_batch, output = reader.read(False, 1) init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) session = tf.Session() session.run(init_op) coordinator = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coordinator) self.vxn_input = [] self.vxn_answers = [] try: while not coordinator.should_stop(): input_data, ans = session.run([input_batch, output]) self.vxn_input.append(input_data) self.vxn_answers.append(ans) except tf.errors.OutOfRangeError: pass finally: coordinator.request_stop() coordinator.join(threads) session.close() tmp = [-1, self.configuration.input_length] self.vxn_input = np.array(self.vxn_input).reshape(tmp) self.vxn_answers = np.array(self.vxn_answers).reshape([-1, 1])
def build_graph(self): config = self.config self.reader = utils.DataReader(seq_len=config.seq_length, batch_size=config.batch_size, data_filename=config.data_filename) self.cell = LayerNormFastWeightsBasicRNNCell(num_units=config.rnn_size) self.input_data = tf.placeholder(tf.int32, [None, config.input_length]) self.targets = tf.placeholder(tf.int32, [None, 1]) self.initial_state = self.cell.zero_state( tf.shape(self.targets)[0], tf.float32) self.initial_fast_weights = self.cell.zero_fast_weights( tf.shape(self.targets)[0], tf.float32) with tf.variable_scope("input_embedding"): embedding = tf.get_variable( "embedding", [config.vocab_size, config.embedding_size]) inputs = tf.split( 1, config.input_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input, [1]) for input in inputs] with tf.variable_scope("send_to_rnn"): state = (self.initial_state, self.initial_fast_weights) output = None for i, input in enumerate(inputs): if i > 0: tf.get_variable_scope().reuse_variables() output, state = self.cell(input, state) with tf.variable_scope("softmax"): softmax_w = tf.get_variable("softmax_w", [config.rnn_size, config.vocab_size]) softmax_b = tf.get_variable("softmax_b", [config.vocab_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.output = tf.cast( tf.reshape(tf.arg_max(self.probs, 1), [-1, 1]), tf.int32) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.output, self.targets), tf.float32)) loss = seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([config.batch_size])], config.vocab_size) self.cost = tf.reduce_mean(loss) self.final_state = state # self.lr = tf.Variable(0.001, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), config.grad_clip) optimizer = tf.train.AdamOptimizer() # self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.summary_accuracy = tf.scalar_summary('accuracy', self.accuracy) tf.scalar_summary('cost', self.cost) self.summary_all = tf.merge_all_summaries()
def build_graph(self): config = self.configuration self.reader = utils.DataReader(seq_len=config.seq_length, batch_size=config.batch_size, data_filename=config.data_filename) self.cell = FWRNNCell(num_units=config.rnn_size) self.input_data = tf.placeholder(tf.int32, [None, config.input_length]) self.answers = tf.placeholder(tf.int32, [None, 1]) self.initial_state = self.cell.zero_state( tf.shape(self.answers)[0], tf.float32) self.fw_initial = self.cell.fw_zero( tf.shape(self.answers)[0], tf.float32) with tf.variable_scope("emb_input"): embedding = tf.get_variable( "emb", [config.size_chars, config.embedding_size]) inputs = tf.split( tf.nn.embedding_lookup(embedding, self.input_data), config.input_length, 1) inputs = [tf.squeeze(input, [1]) for input in inputs] with tf.variable_scope("rnn_desig"): state = (self.initial_state, self.fw_initial) output = None for i, input in enumerate(inputs): if i > 0: tf.get_variable_scope().reuse_variables() output, state = self.cell(input, state) with tf.variable_scope("softmax"): softmax_w = tf.get_variable("softmax_w", [config.rnn_size, config.size_chars]) softmax_b = tf.get_variable("softmax_b", [config.size_chars]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.p = tf.nn.softmax(self.logits) self.output = tf.cast(tf.reshape(tf.arg_max(self.p, 1), [-1, 1]), tf.int32) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.output, self.answers), tf.float32)) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.answers, [-1])], [tf.ones([config.batch_size])], config.size_chars) self.cost = tf.reduce_mean(loss) self.end_state = state train_vars = tf.trainable_variables() gradients, _ = tf.clip_by_global_norm( tf.gradients(self.cost, train_vars), config.grad_clip) optimizer = tf.train.AdamOptimizer() self.train_op = optimizer.apply_gradients(zip(gradients, train_vars)) self.summary_accuracy = tf.summary.scalar('accuracy', self.accuracy) tf.summary.scalar('cost', self.cost) self.summary_all = tf.summary.merge_all()
def main(): #input_file = 'data/edges3437_50remained_G1.pkl' #input_file = 'data/facebook_4k_50remained_G1.pkl' #input_file = 'data/1k_fb_50remained.pkl' input_file = 'data/calls_500_50remained.pkl' #input_file = 'data/emails_500_50remained.pkl' nx_G = read_nxgraph(input_file) G = custome_Graph.Graph(nx_G, is_directed=False, p=1, q=1) G.preprocess_transition_probs() walks = G.simulate_walks(num_walks=10, walk_length=80) t0 = time() with open('tmp.txt', 'w') as file: for walk in walks: for i in walk: file.write('%s ' % (i)) file.write('\n') print('write into tmp.txt file in %.4fs' % (time() - t0)) data = utils.DataReader('tmp.txt', min_count=0) dataset = utils.Word2vecDataset(data, window_size=5) dataloader = DataLoader(dataset, batch_size=16, shuffle=False, num_workers=0, collate_fn=dataset.collate) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') emb_size = len(data.word2id) emb_dimension = 64 #128 skip_gram_model = SkipGramModel(emb_size, emb_dimension).to(device) skip_gram_model.device = device for iteration in range(16): print("\n\n\nIteration: " + str(iteration + 1)) #optimizer = optim.SparseAdam(skip_gram_model.parameters(), lr=0.001) optimizer = optim.Adam(skip_gram_model.parameters(), lr=0.025) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, len(dataloader)) running_loss = 0.0 for i, sample_batched in enumerate(tqdm(dataloader)): if len(sample_batched[0]) > 1: pos_u = sample_batched[0] pos_v = sample_batched[1] neg_v = sample_batched[2] scheduler.step() optimizer.zero_grad() loss = skip_gram_model.forward(pos_u, pos_v, neg_v) loss.backward() optimizer.step() running_loss = running_loss * 0.9 + loss.item() * 0.1 if i > 0 and i % 500 == 0: print(" Loss: " + str(running_loss)) skip_gram_model.save_embedding(data.id2word, 'out.vec')
def __init__(self, config=None, mode=None): self.config = config self.mode = mode self.reader = utils.DataReader(seq_len=config.seq_length, batch_size=config.batch_size, data_filename=config.data_filename) self.cell = rnn_cell.BasicLSTMCell(config.rnn_size, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [None, config.input_length]) self.targets = tf.placeholder(tf.int32, [None, 1]) self.initial_state = self.cell.zero_state(tf.shape(self.targets)[0], tf.float32) with tf.variable_scope("input_embedding"): embedding = tf.get_variable("embedding", [config.vocab_size, config.rnn_size]) inputs = tf.split(1, config.input_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input, [1]) for input in inputs] with tf.variable_scope("send_to_rnn"): state = self.initial_state output = None for i, input in enumerate(inputs): if i > 0: tf.get_variable_scope().reuse_variables() output, state = self.cell(input, state) with tf.variable_scope("softmax"): softmax_w = tf.get_variable("softmax_w", [config.rnn_size, config.vocab_size]) softmax_b = tf.get_variable("softmax_b", [config.vocab_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([config.batch_size])], config.vocab_size) self.cost = tf.reduce_mean(loss) self.final_state = state # self.lr = tf.Variable(0.001, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), config.grad_clip) optimizer = tf.train.AdamOptimizer()#self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def get_python_data(): dr = utils.DataReader(FILEPATH) return dr.jsonify()