def combine_regression_data(): utils.write_data( input("Output File: "), utils.join_data( [input("file 1: "), input("file 2: ")], expt=[5, 3], d_filter=[[ utils.sanitize_lower, utils.sanitize_lower, float, float, float ], [utils.sanitize_lower, utils.sanitize_lower, float]]))
def sample_topk(self, prefix, end_tag=[u' ', u'.', u',', u'\n', u'\t'], k=5, B=100, N=30): # B, N: width, depth of beam search prefix = u" " + prefix prefix = utils.split_raw(prefix, self.text_modeling) prefix = np.array(list(map(self.vocab.get, prefix))) current_state = self.model.initial_rnn_state(1) for x in prefix[:-1]: _, current_state, _ = self.model.sample_output( self.sess, x, current_state) end_tag = [self.vocab.get(e, -1) for e in end_tag] # not so efficient beam search. just initial version candidates = [{ 'p': 1., 'seq': prefix[1:], 'rnn_state': current_state, 'done': False }] for depth in range(len(prefix) - 1, N): new_candidates = [] for d in candidates: if d['done']: new_candidates.append(d) continue _, next_rnn_state, next_x_prob = self.model.sample_output( self.sess, d['seq'][-1], d['rnn_state']) for next_x in range(len(next_x_prob)): next_p = d['p'] * next_x_prob[next_x] next_seq = np.concatenate((d['seq'], [next_x])) done = next_x in end_tag new_candidates.append({ 'p': next_p, 'seq': next_seq, 'rnn_state': next_rnn_state, 'done': done }) candidates = [] new_candidates.sort(key=lambda x: x['p'], reverse=True) for idx in range(min(B, len(new_candidates))): candidates.append(new_candidates[idx]) result = [] for idx in range(min(k, len(candidates))): d = candidates[idx] seq = list(map(self.inv_vocab.get, d['seq'])) result.append((utils.join_data(seq, self.text_modeling), d['p'])) return result
def random_sentence(self, prefix=u"", seq_length=100): prefix = u" " + prefix prefix = utils.split_raw(prefix, self.text_modeling) prefix = np.array(list(map(self.vocab.get, prefix))) current_state = self.model.initial_rnn_state(1) output = [] for x in prefix[:-1]: output.append(x) _, current_state, _ = self.model.sample_output( self.sess, x, current_state) x = prefix[-1] output.append(x) while len(output) < seq_length: x, current_state, _ = self.model.sample_output( self.sess, x, current_state) output.append(x) output = list(map(self.inv_vocab.get, output)) output_str = utils.join_data(output, self.text_modeling) return output_str
def setup_data(npath, ppath): from ml_data import SequenceDinucLabelsProperties global data global limits global lengths global shared_input_length # Join data into a single input vector data = join_data(SequenceDinucLabelsProperties(npath, ppath)) # Get lenghts of input vectors lengths = get_input_lengths(data) # Define limits of data type on single vector limits = calc_limits(data) # Compute shared input vector length shared_input_length = 79
def setup_data(npath, ppath): from ml_data import SimpleHistData from ml_data import DinucCrossCovarData global data global limits global lengths global shared_input_length # Join data into a single input vector data = join_data( SimpleHistData(npath, ppath, k=4, upto=True), DinucCrossCovarData(npath, ppath, k=3, upto=True), ) # Get lenghts of input vectors lengths = get_input_lengths(data) # Define limits of data type on single vector limits = calc_limits(data) # Compute shared input vector length shared_input_length = limits[-1][-1]
if args.load_dir is not None: print "Continue from {}".format(args.load_dir) saver.restore(sess, args.load_dir) start_time = time.time() for epoch in range(args.n_epochs): losses = [] n_batch = train_loader.n_batch(args.batch_size, args.seq_length) for idx, (batch_x, batch_y) in enumerate( train_loader.get_batch(args.batch_size, args.seq_length)): loss = model.run_train_op(sess, train_op, batch_x, batch_y, model.initial_rnn_state(args.batch_size)) losses.append(loss) print "Epoch {} ({} / {}), loss: {:.4f}, elapsed time: {:.1f}s".format( epoch, idx, n_batch, loss, time.time() - start_time) writer.add_summary( sess.run(loss_summary, feed_dict={loss_log: np.mean(losses)}), epoch) saver.save(sess, args.save_dir) output, x, current_state = [], train_loader.vocab.get( unichr(32)), model.initial_rnn_state(1) for _ in range(100): x, current_state, _ = model.sample_output(sess, x, current_state) output.append(x) output = list(map(train_loader.inv_vocab.get, output)) print output print utils.join_data(output, args.text_modeling)