def test2(): hiddenDim = 3 classes = 2 vocab = Vocab() vocab.construct(["i", "am", "a", "an", "student"]) tree = tr.Tree("(0 (0 I) (1 (1 am) (0 (0 a) (0 student))))") rnn = RecursiveNeuralNet(hiddenDim, classes, vocab) rnn.initialize_matrices(W=np.matrix( [[1.0, -1.0, 0.9, -0.6, 0.2, 0.0], [-0.3, 1.2, 0.0, 0.4, -0.4, 0.0], [-0.8, 0.1, 1.1, 0.0, -2.0, 0.0]], dtype=np.float32), b=np.matrix([[-0.4], [0.0], [0.0]], dtype=np.float32), Ws=np.matrix([[0.0, 0.0, -0.5], [1.0, -0.9, 0.0]], dtype=np.float32), bs=np.matrix([[-0.4], [0.0]], dtype=np.float32), L=np.matrix([[0.4, -0.3, -0.1], [0.6, -0.3, 0.4], [0.04, -0.08, 1.25], [0.2, 0.5, 0.6], [0.1, 0.7, 0.6], [0.1, 0.3, 0.0]], dtype=np.float32)) result = rnn.forward_prop(tree.root) rnn.backward_prop(tree.root) return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
def __init__(self, embed_size, hidden_size, vocabList, device, dropout_rate=0.2): super(RNN, self).__init__() self.embed_size = embed_size self.hidden_size = hidden_size self.vocab = Vocab(vocabList) self.model_embeddings = loadWordEmbedding(self.vocab) self.device = device self.lstm = nn.LSTM(embed_size, hidden_size) self.wy_projection = nn.Linear(hidden_size, hidden_size, bias=False) # called Wy in the paper self.wh_projection = nn.Linear(hidden_size, hidden_size, bias=False) # called Wh in the paper self.w_projeciton = nn.Linear(hidden_size, 1, bias=False) # called w in the paper self.wp_projection = nn.Linear(hidden_size, hidden_size, bias=False) # called Wp in the paper self.wx_projection = nn.Linear(hidden_size, hidden_size, bias=False) # called Wx in the paper self.dropout = nn.Dropout(dropout_rate) self.fc = nn.Linear(hidden_size, 3)
def test3(): hiddenDim = 3 classes = 2 vocab = Vocab() vocab.construct(["i", "hate", "cat", "fur"]) tree = tr.Tree("(0 (0 I) (1 (1 hate) (1 (1 cat) (0 (0 cat) (0 fur)))))") rnn = RecursiveNeuralNet(hiddenDim, classes, vocab) rnn.initialize_matrices(W=np.matrix( [[1.0, -1.0, 0.5, -0.6, 0.2, -0.8], [-0.3, 1.2, 2.0, 0.4, -0.4, 0.2], [-0.8, 0.9, 1.1, 1.0, -2.0, 0.1]], dtype=np.float32), b=np.matrix([[-0.4], [0.3], [0.4]], dtype=np.float32), Ws=np.matrix([[0.2, 0.1, -0.5], [1.2, -0.9, 0.3]], dtype=np.float32), bs=np.matrix([[-0.4], [0.5]], dtype=np.float32), L=np.matrix([[0.4, -0.3, -0.1], [0.6, -0.3, 0.4], [0.04, -0.08, 1.25], [0.2, 0.7, 0.6], [0.2, 0.7, 0.6]], dtype=np.float32)) result = rnn.forward_prop(tree.root) rnn.backward_prop(tree.root) return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data( 300, 70, 100) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) self.w2v_vocab, w2v_embd, embedding_dict = self.load_w2v() self.embedding_dim = len(w2v_embd[0]) self.w2v_vocab_size = len(self.w2v_vocab) self.vocab_size = len(self.vocab) embeddings_tmp = [] for i in range(self.vocab_size): item = self.vocab.decode(i) if item in self.w2v_vocab: embeddings_tmp.append(embedding_dict[item]) # print("Found word {}".format(item)) else: # print("Couldn't find {}.".format(item)) rand_num = np.random.uniform(low=-0.2, high=0.2, size=self.embedding_dim) embeddings_tmp.append(rand_num) self.embed = np.asarray(embeddings_tmp)
def main(): config = get_config(mode='test') vocab = Vocab() vocab.load(config.word2id_path, config.id2word_path) print(f'Vocabulary size: {vocab.vocab_size}') config.vocab_size = vocab.vocab_size if config.users: test_users = load_pickle(config.convs_users_path) config.user_size = max([x for xx in test_users for x in xx]) + 1 print(f'User size: {config.user_size}') else: test_users = None data_loader = get_loader(convs=load_pickle(config.convs_path), convs_length=load_pickle(config.conversations_length_path), utterances_length=load_pickle(config.utterances_length_path), vocab=vocab, batch_size=config.batch_size, shuffle=False, convs_users=test_users) model_solver = getattr(solvers, "Solver{}".format(config.model)) test_solver = model_solver(config, None, data_loader, vocab=vocab, is_train=False) test_solver.build() test_solver.export_samples()
def load_own_data(self, filename, filename2, filename3, debug=False, encoding='utf-8'): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_datafile(filename)) # self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array([ self.vocab.encode(word) for word in get_datafile(filename, encoding=encoding) ], dtype=np.int32) self.encoded_valid = np.array([ self.vocab.encode(word) for word in get_datafile(filename2, encoding=encoding) ], dtype=np.int32) self.encoded_test = np.array([ self.vocab.encode(word) for word in get_datafile(filename3, encoding=encoding) ], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug]
def train(config): vocab = Vocab(config) train_data = vocab.get_train_dev_test() train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data] train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data] train_data = train1 + train2 train_dataset = BuildDataSet(train_data) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) train_load = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn, sampler=train_sampler) for model_name in config.model_name: if config.local_rank in [0, -1]: msg = 'model_name:{},train_nums:{},train_iter:{},batch_size:{}' print( msg.format(model_name, len(train_data), len(train_load), config.batch_size)) train_process(config, train_load, train_sampler, model_name) torch.distributed.barrier()
def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data. """ self.vocab = Vocab() self.vocab.construct( get_dataset(self.config.merged_data, self.config.ingredients_data)) self.encoded_train = np.array([ self.vocab.encode(word) for word in get_dataset( self.config.encoded_train, self.config.ingredients_data) ], dtype=np.int32) self.encoded_valid = np.array([ self.vocab.encode(word) for word in get_dataset( self.config.encoded_valid, self.config.ingredients_data) ], dtype=np.int32) self.encoded_test = np.array([ self.vocab.encode(word) for word in get_dataset( self.config.encoded_test, self.config.ingredients_data) ], dtype=np.int32) if debug: num_debug = 1024 * 3 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug]
def test1(): hiddenDim = 3 classes = 2 vocab = Vocab() vocab.construct(["i", "love", "apple", "juice"]) tree = tr.Tree( "(1 (0 I) (1 (1 love) (1 (1 love) (0 (0 apple) (0 juice)))))") rnn = RecursiveNeuralNet(hiddenDim, classes, vocab) rnn.initialize_matrices(W=np.matrix( [[1.0, 2.0, 0.0, -0.4, 0.2, -0.8], [-0.5, 1.0, 2.0, 0.0, -0.4, 0.2], [-0.6, 0.9, 1.1, 1.0, -2.0, 0.0]], dtype=np.float32), b=np.matrix([[-0.4], [0.5], [0.2]], dtype=np.float32), Ws=np.matrix([[0.0, 0.1, -0.2], [1.4, -0.7, 0.1]], dtype=np.float32), bs=np.matrix([[-0.1], [0.4]], dtype=np.float32), L=np.matrix([[0.4, -0.3, -0.1], [0.1, 0.1, 0.2], [0.04, -0.9, 1.2], [0.2, 0.5, 0.6], [0.2, 0.5, 0.6]], dtype=np.float32)) result = rnn.forward_prop(tree.root) rnn.backward_prop(tree.root) return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
def __init__(self, input_dim, hid_dim, n_layers, n_heads, pf_dim, dropout, device, vocabList, max_length=100): super().__init__() self.device = device self.vocab = Vocab(vocabList) self.embed_size = input_dim self.n_heads = n_heads # self.tok_embedding = nn.Embedding(input_dim, hid_dim) # self.pos_embedding = nn.Embedding(max_length, hid_dim) self.tok_embedding = loadWordEmbedding(self.vocab) self.pos_embedding = loadPosEmbedding(max_length, hid_dim) self.layers = nn.ModuleList([ EncoderLayer(hid_dim, n_heads, pf_dim, dropout, device) for _ in range(n_layers) ]) self.dropout = nn.Dropout(dropout) self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device) self.drop = nn.Dropout(dropout) self.classify = nn.Linear(hid_dim, 5)
def main(_): if not os.path.exists(FLAGS.output_dir): print('Creating directory: %s' % FLAGS.output_dir) os.mkdir(FLAGS.output_dir) desc_counter = Counter() attr_counter = Counter() partial_counts = defaultdict(Counter) print('Processing data...') n = len(FLAGS.inputs) for i, fname in enumerate(FLAGS.inputs): print('File %i of %i: %s' % (i, n, fname)) with open(fname, 'r') as f: data = json.load(f) for product in data: desc = product['clean_text'].split() + \ product['clean_title'].split() desc_counter.update(desc) for attr, value in product['specs'].items(): attr_counter.update((attr,)) partial_counts[attr].update((value,)) # Filter values partial_counts = {x: {y: z for y, z in y.items() if z >= FLAGS.min_value } for x, y in partial_counts.items()} # Remove singular attributes singular = {x for x, y in partial_counts.items() if len(y) <= 1} attr_counter = Counter({x: y for x, y in attr_counter.items() if x not in singular}) partial_counts = {x: y for x, y in partial_counts.items() if x not in singular} # Filter attrs if FLAGS.max_attr is not None: attr_counter = {x: y for x, y in attr_counter.most_common(FLAGS.max_attr)} # Filter desc desc_counter = Counter({x: y for x, y in desc_counter.items() if y >= FLAGS.min_desc}) desc_vocab = Vocab.build_from_counter(desc_counter) attr_vocab = Vocab.build_from_counter(attr_counter) value_set = ValueSet.build_from_partial_counts(partial_counts) print('Writing to disk...') desc_fname = os.path.join(FLAGS.output_dir, 'desc.txt') with open(desc_fname, 'w') as f: desc_vocab.write(f) attr_fname = os.path.join(FLAGS.output_dir, 'attr.txt') with open(attr_fname, 'w') as f: attr_vocab.write(f) value_fname = os.path.join(FLAGS.output_dir, 'value.txt') with open(value_fname, 'w') as f: value_set.write(f) stats_fname = os.path.join(FLAGS.output_dir, 'stats.txt') with open(stats_fname, 'w') as f: f.write('num_attrs: %i\n' % len(attr_vocab)) f.write('num_vals: %i\n' % len(value_set)) f.write('num_words: %i\n' % len(desc_vocab)) print('Done')
def prep_to_entityduet_format(): train_file = os.path.join(args.data_dir, 'train.prep.pairwise') dev_file = os.path.join(args.data_dir, 'test.prep.pointwise') test_file = os.path.join(args.data_dir, 'test.prep.pointwise') vocab_file = os.path.join(args.data_dir, 'vocab') emb_file = os.path.join(args.data_dir, 'w2v') train_file_out = os.path.join(args.out_dir, 'train_pair.pkl') dev_file_out = os.path.join(args.out_dir, 'dev.pkl') test_file_out = os.path.join(args.out_dir, 'test.pkl') vocab_file_out = os.path.join(args.out_dir, 'vocab.txt') emb_file_out = os.path.join(args.out_dir, 'embed.txt') def id_map_fn(ids): return [id + 1 for id in ids] def label_map_fn(label): if label > 0: return 1 return 0 # save train, dev, test data for in_file, out_file in [(train_file, train_file_out), (dev_file, dev_file_out), (test_file, test_file_out)]: transformed_data = [] print('transforming {} ...'.format(in_file)) if in_file.endswith('pointwise'): mode = 1 func = int elif in_file.endswith('pairwise'): mode = 2 func = float for sample in prep_file_iterator(in_file, method='sample', func=func, parse=True): if mode == 1: transformed_data.append( (id_map_fn(sample.query), id_map_fn(sample.doc), label_map_fn(sample.label), sample.qid)) elif mode == 2: transformed_data.append( (id_map_fn(sample.query), id_map_fn(sample.doc1), id_map_fn(sample.doc2))) print('saving to {}'.format(out_file)) with open(out_file, 'wb') as fout: pickle.dump(transformed_data, fout, protocol=2) # save vocab print('saving to {}'.format(vocab_file_out)) vocab = Vocab(filepath=vocab_file, file_format=args.format) words = ['<PAD>'] + vocab.get_word_list() with open(vocab_file_out, 'w') as fout: fout.write('\n'.join(words) + '\n') # save emb print('saving to {}'.format(emb_file_out)) wv = WordVector(filepath=emb_file, first_line=args.first_line) vector = np.concatenate([np.zeros_like(wv.vectors[:1]), wv.vectors], axis=0) vector.dump(emb_file_out)
def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))
def load_vocab(self,debug): self.vocab = Vocab() if debug: self.vocab.construct(get_words_dataset('dev')) else: self.vocab.construct(get_words_dataset('train')) self.vocab.build_embedding_matrix(self.config.word_embed_size) self.embedding_matrix = self.vocab.embedding_matrix
def load_data(self): self.vocab = Vocab() self.vocab.construct(get_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_dataset('valid')], dtype=np.int32)
def __init__(self): self.batch_size = 32 self.embed_size = 300 self.label_size = 3 self.max_epochs = 30 self.lr = 0.01 self.use_pretrained_embeddings = True # Fix the embeddings parameters during training self.fix_embeddings = False self.vocab = Vocab()
def load_data(self, debug=False): self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array([self.vocab.encode(word) for word in get_ptb_dataset('train')],dtype=np.int32) self.encoded_test = np.array([self.vocab.encode(word) for word in get_ptb_dataset('test')],dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug]
def word_vector_transform(): print('loading word vector ...') wv = WordVector(filepath=args.word_vector_path, first_line=True) vocab = Vocab(filepath=os.path.join(args.data_dir, 'vocab'), file_format='ir') print('transforming ...') wv.transform(vocab.get_word_list(), oov_filepath=os.path.join(args.data_dir, 'oov.txt'), oov_at_end=True) print('saving ...') wv.save_to_file(os.path.join(args.data_dir, 'w2v'))
class WhoseLineModel(object): def __init__(self, config): self.config = config self.load_data(debug=False) self.add_common_model_vars() def load_data(self, debug=False): self.wordvecs = gensim.models.Word2Vec.load_word2vec_format( self.config.wordvecpath, binary=False) self.vocab = Vocab() self.vocab.construct(self.wordvecs.index2word) self.embedding_matrix = np.vstack([ self.wordvecs[self.vocab.index_to_word[i]] for i in range(len(self.vocab)) ]) # next line is "unk" surgery cf. https://groups.google.com/forum/#!searchin/globalvectors/unknown/globalvectors/9w8ZADXJclA/X6f0FgxUnMgJ self.embedding_matrix[0, :] = np.mean(self.embedding_matrix, axis=0) chapter_split = load_chapter_split(self.config.datasplitpath) self.speakers = Speakers() for line in open(self.config.datapath): ch, speaker, line = line.split("\t") if chapter_split[ch] == 0: self.speakers.add_speaker(speaker) self.speakers.prune(self.config.speaker_count - 1) # -1 for OTHER self.train_data = [] self.dev_data = [] self.test_data = [] oldch = None for ln in open(self.config.datapath): ch, speaker, line = ln.split("\t") encoded_line = (np.array( [self.vocab.encode(word) for word in line.split()], dtype=np.int32), self.speakers.encode(speaker)) if chapter_split[ch] == 0: dataset = self.train_data elif chapter_split[ch] == 1: dataset = self.dev_data else: dataset = self.test_data if self.config.batch_size == "chapter": if ch == oldch: dataset[-1].append(encoded_line) else: dataset.append([encoded_line]) else: dataset.append(encoded_line) oldch = ch def add_common_model_vars(self): with tf.variable_scope("word_vectors"): self.tf_embedding_matrix = tf.constant(self.embedding_matrix, name="embedding")
def k_fold(config): vocab = Vocab(config) # vocab.add_words() # vocab.build_bert_vocab() train, test = vocab.get_train_dev_test() test_data = [(x[0] + ' ' + x[1], x[2]) for x in test] test_dataset = BuildDataSet(test_data) test_load = DataLoader(dataset=test_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn) kf = KFold(n_splits=config.kfold, shuffle=False, random_state=config.seed) for k, (train_index, dev_index) in enumerate(kf.split(train)): # pdb.set_trace() train_data, valid_data = train[train_index], train[dev_index] train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data] train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data] train_data = train1 + train2 valid_data = [(x[0] + ' ' + x[1], x[2]) for x in valid_data] train_dataset = BuildDataSet(train_data) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) train_load = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn, sampler=train_sampler) valid_dataset = BuildDataSet(valid_data) valid_sampler = torch.utils.data.distributed.DistributedSampler( valid_dataset) valid_load = DataLoader(dataset=valid_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn, sampler=valid_sampler) if config.local_rank in [0, -1]: msg = '{} fold,train_nums:{},train_iter:{},dev_nums:{},dev_iter:{},batch_size:{},test_nums:{},test_iter:{}' print( msg.format(k + 1, len(train_data), len(train_load), len(valid_data), len(valid_load), config.batch_size, len(test_data), len(test_load))) train_process(config, train_load, valid_load, test_load, k, train_sampler) torch.distributed.barrier()
def word_vector_transform(): print('loading word vector ...') wv = WordVector(filepath=args.word_vector_path, first_line=args.first_line) vocab = Vocab(filepath=os.path.join(args.data_dir, 'vocab'), file_format=args.format) print('transforming ...') wv.transform( vocab.get_word_list(), oov_filepath=os.path.join(args.data_dir, 'oov.txt'), oov_at_end=False) # don't use oov_at_end because it is problematic print('saving ...') wv.save_to_file(os.path.join(args.data_dir, 'w2v'))
def __init__(self, embed_size, hidden_size, vocabList, device): super(RNN, self).__init__() self.vocab = Vocab(vocabList) self.device = device self.embed_size = embed_size self.hidden_size = hidden_size self.model_embeddings = loadWordEmbedding(self.vocab) self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True) self.classify = nn.Linear(2*hidden_size, 5) self.drop = nn.Dropout(0.8) self.activate = nn.ReLU()
def load_data(self, LOAD_DATA=False): """Loads train/dev/test data and builds vocabulary.""" if LOAD_DATA: self.vocab = Vocab( ) # only initialize the Vocab class because of the embedding matrix else: self.train_data, self.dev_data, self.test_data = tr.simplified_data( 600, 40) #self.train_data, self.dev_data , self.test_data = tr.simplified_data(2000, 500) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct( list(itertools.chain.from_iterable(train_sents)))
class WhoseLineModel(object): def __init__(self, config): self.config = config self.load_data(debug=False) self.add_common_model_vars() def load_data(self, debug=False): self.wordvecs = gensim.models.Word2Vec.load_word2vec_format(self.config.wordvecpath, binary=False) self.vocab = Vocab() self.vocab.construct(self.wordvecs.index2word) self.embedding_matrix = np.vstack([self.wordvecs[self.vocab.index_to_word[i]] for i in range(len(self.vocab))]) # next line is "unk" surgery cf. https://groups.google.com/forum/#!searchin/globalvectors/unknown/globalvectors/9w8ZADXJclA/X6f0FgxUnMgJ self.embedding_matrix[0,:] = np.mean(self.embedding_matrix, axis=0) chapter_split = load_chapter_split(self.config.datasplitpath) self.speakers = Speakers() for line in open(self.config.datapath): ch, speaker, line = line.split("\t") if chapter_split[ch] == 0: self.speakers.add_speaker(speaker) self.speakers.prune(self.config.speaker_count-1) # -1 for OTHER self.train_data = [] self.dev_data = [] self.test_data = [] oldch = None for ln in open(self.config.datapath): ch, speaker, line = ln.split("\t") encoded_line = (np.array([self.vocab.encode(word) for word in line.split()], dtype=np.int32), self.speakers.encode(speaker)) if chapter_split[ch] == 0: dataset = self.train_data elif chapter_split[ch] == 1: dataset = self.dev_data else: dataset = self.test_data if self.config.batch_size == "chapter": if ch == oldch: dataset[-1].append(encoded_line) else: dataset.append([encoded_line]) else: dataset.append(encoded_line) oldch = ch def add_common_model_vars(self): with tf.variable_scope("word_vectors"): self.tf_embedding_matrix = tf.constant(self.embedding_matrix, name="embedding")
def __init__(self, hparams: HyperParams, data_split: str, sep_line="\n"): assert Splits.check_split(data_split) self.hparams = hparams self.root = Path(self.hparams.root) self.data_split = data_split self.sep_line = sep_line self.path_data = self.download() self.lines = self.preprocess_data() self.vocab = Vocab(list(self.sep_line.join(self.lines))) self.text = self.train_val_test_split() self.tensor = self.get_sequences() if self.hparams.verbose: self.show_samples() print(dict(vocab_size=len(self.vocab)))
def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data( 700, 100, 200) # build vocab from training data self.vocab = Vocab() # train_sents = [t.get_words() for t in self.train_data] # self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) all_sents = [t.get_words() for t in self.train_data] + [ t.get_words() for t in self.dev_data ] + [t.get_words() for t in self.test_data] self.vocab.construct(list(itertools.chain.from_iterable(all_sents))) for k in self.vocab.word_to_index.keys(): print '\t {} : {}'.format(k, self.vocab.word_to_index[k])
def preprocess(self): self.log.info('Getting Vocabulary...') if os.path.exists(os.path.join(self.config.data_path, 'vocab.pkl')): with open(os.path.join(self.config.data_path, 'vocab.pkl'), 'rb') as fr: vocab = pickle.load(fr) else: if not self.config.debug: with open(os.path.join(self.config.data_path, 'vocab.pkl'), 'wb') as fw: vocab = Vocab(self.config) pickle.dump(vocab, fw) else: vocab = Vocab(self.config) return vocab
class RNN(nn.Module): def __init__(self, embed_size, hidden_size, vocabList, device): super(RNN, self).__init__() self.vocab = Vocab(vocabList) self.device = device self.embed_size = embed_size self.hidden_size = hidden_size self.model_embeddings = loadWordEmbedding(self.vocab) self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True) self.classify = nn.Linear(2*hidden_size, 5) self.drop = nn.Dropout(0.8) self.activate = nn.ReLU() def forward(self, data): x = self.vocab.to_input_tensor(data, self.device, -1) x = self.model_embeddings(x) x = x.permute(1, 0, 2) # (seq_len, batch, input_size) # rnn, lstm x, (_, _) = self.lstm(x) # x: (seq_len, batch, num_direction*hidden_size) x = self.activate(x) x = x.permute(1, 2, 0) # x: (batch, num_direction*hidden_size, seq_len) # max pooling x = torch.max(x, dim=2)[0] # dropout prevent overfitting x = self.drop(x) # fulling connection x = self.classify(x) return F.log_softmax(x, dim=1)
def load_data(self): pair_fname = '../lastfm_train_mappings.txt' lyrics_path = '../data/lyrics/train/' # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics. # word_counts is a dictionary that maps if self.config.debug: X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, '../glove.6B.50d.txt', threshold_down=0, threshold_up=float('inf'), npos=100, nneg=100) else: X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold_down=100, threshold_up=4000, npos=10000, nneg=10000) self.labels_train = np.zeros((len(X_train),self.config.n_class)) self.labels_train[range(len(X_train)),l_train] = 1 x = collections.Counter(l_train) for k in x.keys(): print 'class:', k, x[k] print '' self.vocab = Vocab() self.vocab.construct(self.word_counts.keys()) self.wv = self.vocab.get_wv('../glove.6B.50d.txt') with open('word_hist.csv', 'w') as f: for w in self.word_counts.keys(): f.write(w+','+str(self.word_counts[w])+'\n') self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps)) for i in range(len(X_train)): self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]] self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]] self.sequence_len1 = np.array(seq_len1) self.sequence_len2 = np.array(seq_len2)
def build_vocab(args): f = open(args.embed) embed_dim = int(next(f).split()[1]) word2id = {} id2word = {} word2id[PAD_TOKEN] = PAD_IDX word2id[UNK_TOKEN] = UNK_IDX id2word[PAD_IDX] = PAD_TOKEN id2word[UNK_IDX] = UNK_TOKEN embed_list = [] # fill PAD and UNK vector embed_list.append([0 for _ in range(embed_dim)]) embed_list.append([0 for _ in range(embed_dim)]) # build Vocab for line in f: tokens = line.split() word = tokens[0] vector = [float(num) for num in tokens[1:]] embed_list.append(vector) word2id[word] = len(word2id) id2word[len(id2word)] = word embed = torch.FloatTensor(embed_list) vocab = Vocab(embed, word2id, id2word) torch.save(vocab, args.vocab)
def read_edge(self, filename: Path): if "txt" in filename.suffix: read_func = read_txt elif "csv" in filename.suffix: read_func = read_csv else: read_func = read_txt node_list = list() for row in read_func(filename): node_list.append(row[0]) node_list.append(row[1]) self._vocab = Vocab(collections.Counter(node_list)) edge_array = [] for row in read_func(filename): n1 = self._vocab.stoi[row[0]] n2 = self._vocab.stoi[row[1]] t = int(row[2]) edge_array.append([n1, n2, t]) edge_array.append([n2, n1, t]) edge_array = np.asarray(edge_array, dtype=np.int32) self._adj = coo_matrix((np.ones(len(edge_array)), (edge_array[:, 0], edge_array[:, 1])), shape=(len(self._vocab), len(self._vocab))) self._adj_t = coo_matrix((edge_array[:, 2], (edge_array[:, 0], edge_array[:, 1])), shape=(len(self._vocab), len(self._vocab))) self._adj_csr = self._adj.tocsr() + sp.eye(len(self._vocab)) self._adj = self._adj_csr.tocoo() self._adj_t_csr = self._adj_t.tocsr()
def main(): with open(data_file, 'rb') as f: data = pickle.load(f) vocab = Vocab(data) sg_loader = create_skipgram_dataset(chorales=data['train'], vocab=vocab, batch_size=32) sg_model, sg_losses = train_skipgram(vocab, sg_loader)
def prep_data(trees, X_vocab=None, y_vocab=None): update_vocab = False if X_vocab is None: X_vocab, y_vocab = Vocab(), Vocab() update_vocab = True X, y = [], [] for tree in tqdm(trees): if len(tree.tokens) < 2: continue #TODO accumulate features without iterating over all states try: for state, decision in tree.iter_oracle_states(): feats = state.extract_features() if update_vocab: X_vocab.add_words(feats) y_vocab.add_word(decision) X.append([X_vocab.encode(f) for f in feats]) y.append(y_vocab.encode(decision)) except: pass return X, y, X_vocab, y_vocab
def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug]
def load_data(self): pair_fname = '../lastfm_train_mappings.txt' lyrics_path = '../lyrics/data/lyrics/train/' # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics. # word_counts is a dictionary that maps X_train, l_train, self.word_counts, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold=100, n_class=self.config.n_class) self.labels_train = np.zeros((len(X_train),self.config.n_class)) self.labels_train[range(len(X_train)),l_train] = 1 self.vocab = Vocab() self.vocab.construct(self.word_counts.keys()) self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps)) for i in range(len(X_train)): self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]] self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]
class Model(): def __init__(self, config): self.config = config self.load_data(debug=False) self.build_model() def load_vocab(self,debug): self.vocab = Vocab() if debug: self.vocab.construct(get_words_dataset('dev')) else: self.vocab.construct(get_words_dataset('train')) self.vocab.build_embedding_matrix(self.config.word_embed_size) self.embedding_matrix = self.vocab.embedding_matrix def load_data(self, debug=False): """ Loads starter word-vectors and train/dev/test data. """ self.load_vocab(debug) config = self.config if debug: # Load the training set train_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'dev', 'post')) ( self.sent1_train, self.sent2_train, self.len1_train, self.len2_train, self.y_train ) = zip(*train_data) self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train) self.len1_train, self.len2_train = ( np.array(self.len1_train), np.array(self.len2_train) ) self.y_train = np.array(self.y_train) print('# training examples: %d' %len(self.y_train)) # Load the validation set dev_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_dev, self.sent2_dev, self.len1_dev, self.len2_dev, self.y_dev ) = zip(*dev_data) self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev) self.len1_dev, self.len2_dev = ( np.array(self.len1_dev), np.array(self.len2_dev) ) self.y_dev = np.array(self.y_dev) print('# dev examples: %d' %len(self.y_dev)) # Load the test set test_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_test, self.sent2_test, self.len1_test, self.len2_test, self.y_test ) = zip(*test_data) self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test) self.len1_test, self.len2_test = ( np.array(self.len1_test), np.array(self.len2_test) ) self.y_test = np.array(self.y_test) print('# test examples: %d' %len(self.y_test)) else: # Load the training set train_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'train', 'post')) ( self.sent1_train, self.sent2_train, self.len1_train, self.len2_train, self.y_train ) = zip(*train_data) self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train) self.len1_train, self.len2_train = ( np.array(self.len1_train), np.array(self.len2_train) ) self.y_train = np.array(self.y_train) print('# training examples: %d' %len(self.y_train)) # Load the validation set dev_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'dev', 'post')) ( self.sent1_dev, self.sent2_dev, self.len1_dev, self.len2_dev, self.y_dev ) = zip(*dev_data) self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev) self.len1_dev, self.len2_dev = ( np.array(self.len1_dev), np.array(self.len2_dev) ) self.y_dev = np.array(self.y_dev) print('# dev examples: %d' %len(self.y_dev)) # Load the test set test_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_test, self.sent2_test, self.len1_test, self.len2_test, self.y_test ) = zip(*test_data) self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test) self.len1_test, self.len2_test = ( np.array(self.len1_test), np.array(self.len2_test) ) self.y_test = np.array(self.y_test) print('# test examples: %d' %len(self.y_test)) print('min len: ', np.min(self.len2_train)) def build_model(self): config = self.config k = config.sentence_embed_size L = config.sent_len # input tensors self.sent1_ph = tf.placeholder(tf.int32, shape=[None, L], name='sent1') self.sent2_ph = tf.placeholder(tf.int32, shape=[None, L], name='sent2') self.len1_ph = tf.placeholder(tf.int32, shape=[None], name='len1') self.len2_ph = tf.placeholder(tf.int32, shape=[None], name='len2') self.labels_ph = tf.placeholder(tf.float32, shape=[None, config.label_size], name='label') self.kp_ph = tf.placeholder(tf.float32, name='kp') kp = self.kp_ph # set embedding matrix to pretrained embedding init_embeds = tf.constant(self.embedding_matrix, dtype='float32') word_embeddings = tf.get_variable( dtype='float32', name='word_embeddings', initializer=init_embeds, trainable=False) # no fine-tuning of word embeddings x1 = tf.nn.embedding_lookup(word_embeddings, self.sent1_ph) x2 = tf.nn.embedding_lookup(word_embeddings, self.sent2_ph) x1, x2 = tf.nn.dropout(x1, kp), tf.nn.dropout(x2, kp) def lstmn(x, length, scope): with tf.variable_scope(scope): W_h = tf.get_variable(name='W_h', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_hs = tf.get_variable(name='W_hs', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_x = tf.get_variable(name='W_x', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_M = tf.get_variable(name='b_M', initializer=tf.zeros([L, k])) w = tf.get_variable(name='w', shape=[k, 1], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_a = tf.get_variable(name='b_a', initializer=tf.zeros([L])) W_rnn_h_i = tf.get_variable(name='W_rnn_h_i', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_rnn_x_i = tf.get_variable(name='W_rnn_x_i', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_rnn_i = tf.get_variable(name='b_rnn_i', initializer=tf.zeros([k])) W_rnn_h_f = tf.get_variable(name='W_rnn_h_f', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_rnn_x_f = tf.get_variable(name='W_rnn_x_f', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_rnn_f = tf.get_variable(name='b_rnn_f', initializer=tf.zeros([k])) W_rnn_h_o = tf.get_variable(name='W_rnn_h_o', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_rnn_x_o = tf.get_variable(name='W_rnn_x_o', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_rnn_o = tf.get_variable(name='b_rnn_o', initializer=tf.zeros([k])) W_rnn_h_c = tf.get_variable(name='W_rnn_h_c', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_rnn_x_c = tf.get_variable(name='W_rnn_x_c', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_rnn_c = tf.get_variable(name='b_rnn_c', initializer=tf.zeros([k])) c0 = tf.zeros([tf.shape(length)[0], k]) h0 = tf.zeros([tf.shape(length)[0], k]) hst_1 = tf.zeros([tf.shape(length)[0], k]) Cl, Hl = [c0], [h0] for t in range(L): Ct_1 = tf.stack(Cl, axis=1) Ht_1 = tf.stack(Hl, axis=1) H_mod = tf.reshape(Ht_1, [-1, k]) xt = x[:,t,:] Xt = tf.reshape(tf.tile(xt, [1, t+1]), [-1, t+1, k]) Xt_mod = tf.reshape(Xt, [-1, k]) Hst_1 = tf.reshape(tf.tile(hst_1, [1, t+1]), [-1, t+1, k]) Hst_1_mod = tf.reshape(Hst_1, [-1, k]) Mt = tf.nn.tanh( tf.reshape(tf.matmul(H_mod, W_h), [-1, t+1, k]) + tf.reshape(tf.matmul(Xt_mod, W_x), [-1, t+1, k]) + tf.reshape(tf.matmul(Hst_1_mod, W_hs), [-1, t+1, k]) + b_M[:t+1]) Mt_w = tf.matmul(tf.reshape(Mt, [-1, k]), w) alphat = tf.nn.softmax(tf.reshape(Mt_w, [-1, 1, t+1]) + b_a[:t+1]) cst = tf.reshape(tf.matmul(alphat, Ct_1), [-1, k]) hst = tf.reshape(tf.matmul(alphat, Ht_1), [-1, k]) hst_1 = hst it = tf.sigmoid(tf.matmul(hst, W_rnn_h_i) + tf.matmul(xt, W_rnn_x_i) + b_rnn_i) ft = tf.sigmoid(tf.matmul(hst, W_rnn_h_f) + tf.matmul(xt, W_rnn_x_f) + b_rnn_f) ot = tf.sigmoid(tf.matmul(hst, W_rnn_h_o) + tf.matmul(xt, W_rnn_x_o) + b_rnn_o) cht = tf.nn.tanh(tf.matmul(hst, W_rnn_h_c) + tf.matmul(xt, W_rnn_x_c) + b_rnn_c) ct = ft*cst + it*cht ht = ot*tf.nn.tanh(ct) Cl.append(ct) Hl.append(ht) return ( tf.transpose(tf.stack(Hl), [1, 0, 2]), tf.transpose(tf.stack(Cl), [1, 0, 2]) ) H1, _ = lstmn(x1, self.len1_ph, 'lstmn1') H2, _ = lstmn(x2, self.len2_ph, 'lstmn2') def get_last_relevant_output(out, seq_len): rng = tf.range(0, tf.shape(seq_len)[0]) indx = tf.stack([rng, seq_len - 1], 1) last = tf.gather_nd(out, indx) return last h1 = get_last_relevant_output(H1, self.len1_ph) h2 = get_last_relevant_output(H2, self.len2_ph) h_s = tf.concat([h1, h2], 1) y = h_s # MLP classifier on top hidden_sizes = config.hidden_sizes for layer, size in enumerate(hidden_sizes): if layer > 0: previous_size = hidden_sizes[layer-1] else: previous_size = 2*k W = tf.get_variable(name='W{}'.format(layer), shape=[previous_size, size], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b = tf.get_variable(name='b{}'.format(layer), initializer=tf.zeros([size])) y = tf.nn.relu(tf.matmul(y, W) + b) W_softmax = tf.get_variable(name='W_softmax', shape=[hidden_sizes[-1], config.label_size], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_softmax = tf.get_variable(name='b_softmax', initializer=tf.zeros([config.label_size])) logits = tf.matmul(y, W_softmax) + b_softmax cross_entropy_loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(self.labels_ph, logits) ) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) self.loss = cross_entropy_loss #+ tf.add_n(reg_losses) self.train_op = ( tf.train.AdamOptimizer(learning_rate=config.lr) .minimize(self.loss) ) self.probs = tf.nn.softmax(logits) self.predictions = tf.argmax(self.probs, 1) correct_prediction = tf.equal( tf.argmax(self.labels_ph, 1), self.predictions) self.correct_predictions = tf.reduce_sum(tf.cast(correct_prediction, 'int32')) def create_feed_dict(self, sent1_batch, sent2_batch, len1_batch, len2_batch, label_batch, keep_prob): feed_dict = { self.sent1_ph: sent1_batch, self.sent2_ph: sent2_batch, self.len1_ph: len1_batch, self.len2_ph: len2_batch, self.labels_ph: label_batch, self.kp_ph: keep_prob } return feed_dict def run_epoch(self, session, sent1_data, sent2_data, len1_data, len2_data, input_labels, verbose=100): orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y = ( sent1_data, sent2_data, len1_data, len2_data, input_labels ) kp = self.config.kp total_loss = [] total_correct_examples = 0 total_processed_examples = 0 total_steps = int( orig_sent1.shape[0] / self.config.batch_size) for step, (sent1, sent2, len1, len2, y) in enumerate( data_iterator(orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y, batch_size=self.config.batch_size, label_size=self.config.label_size)): feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp) loss, total_correct, _ = session.run( [self.loss, self.correct_predictions, self.train_op], feed_dict=feed) total_processed_examples += len(y) total_correct_examples += total_correct total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : loss = {}'.format( step, total_steps, np.mean(total_loss))) sys.stdout.flush() if verbose: sys.stdout.write('\r') sys.stdout.flush() return np.mean(total_loss), total_correct_examples / float(total_processed_examples), total_loss def predict(self, session, sent1_data, sent2_data, len1_data, len2_data, y=None): """Make predictions from the provided model.""" # If y is given, the loss is also calculated # We deactivate dropout by setting it to 1 kp = 1.0 losses = [] results = [] if np.any(y): data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, y, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) else: data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) for step, (sent1, sent2, len1, len2, y) in enumerate(data): feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp) if np.any(y): loss, preds = session.run( [self.loss, self.predictions], feed_dict=feed) losses.append(loss) else: preds = session.run(self.predictions, feed_dict=feed) results.extend(preds) return np.mean(losses), np.array(results)
class RNN_Model(): def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def inference(self, tree, predict_only_root=False): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root: node_tensors = node_tensors[tree.root] else: node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2] node_tensors = tf.concat(0, node_tensors) return self.add_projections(node_tensors) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' embed_size = self.config.embed_size vocab_size = len(self.vocab) output_size = self.config.label_size with tf.variable_scope('Composition'): ### YOUR CODE HERE embedding = tf.get_variable("embedding", shape=(vocab_size, embed_size)) W1 = tf.get_variable("W1", shape=(2 * embed_size, embed_size)) b1 = tf.get_variable("b1", shape=(1, embed_size)) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", shape=(embed_size, output_size)) bs = tf.get_variable("bs", shape=(1, output_size)) ### END YOUR CODE self.optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr) # dummy_total is a simple sum to ensure that the variables for the AdamOptimizer # are created for initialization and before restore the variables later. # It should never actually get executed. dummy_total = tf.constant(0.0) for v in tf.trainable_variables(): dummy_total +=tf.reduce_sum(v) self.dummy_minimizer = self.optimizer.minimize(dummy_total) # we then initialize variables, and because of the self.dummy_minimizer, # all of the necessary variable/slot pairs get added and included in the # saver variables def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here --Hint: Store a node's vector representation in node.tensor so it can be used by it's parent-- Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ with tf.variable_scope('Composition', reuse=True): ### YOUR CODE HERE embedding = tf.get_variable("embedding") W1 = tf.get_variable("W1") b1 = tf.get_variable("b1") ### END YOUR CODE # THOUGHT: Batch together all leaf nodes and all non leaf nodes node_tensors = OrderedDict() curr_node_tensor = None if node.isLeaf: ### YOUR CODE HERE curr_node_tensor = tf.gather(embedding, tf.constant([node.label]), name="leaf_lookup") ### END YOUR CODE else: node_tensors.update(self.add_model(node.left)) node_tensors.update(self.add_model(node.right)) ### YOUR CODE HERE left = node_tensors[node.left] right = node_tensors[node.right] concat = tf.concat(1, [left, right]) composition = tf.matmul(concat, W1) + b1 # TODO save on number of zero tensors... curr_node_tensor = tf.maximum(composition, tf.zeros_like(composition)) ### END YOUR CODE node_tensors[node] = curr_node_tensor return node_tensors def add_projections(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope('Projection', reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") # NOTE: tf.add supports Broadcast logits = tf.matmul(node_tensors, U) + bs ### END YOUR CODE return logits def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ loss = None # YOUR CODE HERE labels = tf.convert_to_tensor(labels, dtype=tf.int64) softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) l2 = self.config.l2 with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable("W1") with tf.variable_scope('Projection', reuse=True): U = tf.get_variable("U") l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U) l2_loss *= l2 loss = tf.reduce_sum(softmax_loss) + l2_loss # END YOUR CODE return loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ train_op = None # YOUR CODE HERE train_op = self.optimizer.minimize(loss) # END YOUR CODE return train_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ predictions = None # YOUR CODE HERE # pick max of softmax predictions in each batch predictions = tf.argmax(tf.nn.softmax(tf.cast(y, tf.float64)), dimension=1) # END YOUR CODE return predictions def __init__(self, config): self.config = config self.load_data() def predict(self, trees, weights_path, get_loss = False): """Make predictions from the provided model.""" results = [] losses = [] for i in xrange(int(math.ceil(len(trees)/float(RESET_AFTER)))): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() saver = tf.train.Saver() saver.restore(sess, weights_path) for tree in trees[i*RESET_AFTER: (i+1)*RESET_AFTER]: logits = self.inference(tree, True) predictions = self.predictions(logits) root_prediction = sess.run(predictions)[0] if get_loss: root_label = tree.root.label loss = sess.run(self.loss(logits, [root_label])) losses.append(loss) results.append(root_prediction) return results, losses def run_epoch(self, new_model = False, verbose=True): step = 0 loss_history = [] while step < len(self.train_data): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() if new_model: init = tf.initialize_all_variables() sess.run(init) new_model = False else: saver = tf.train.Saver() #saver.restore(sess, './weights/%s.temp'%self.config.model_name) saver.restore(sess, './weights_l2/%s.temp'%self.config.model_name) for _ in xrange(RESET_AFTER): if step>=len(self.train_data): break tree = self.train_data[step] logits = self.inference(tree) labels = [l for l in tree.labels if l!=2] loss = self.loss(logits, labels) train_op = self.training(loss) loss, _ = sess.run([loss, train_op]) loss_history.append(loss) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() step+=1 saver = tf.train.Saver() if not os.path.exists("./weights_l2"): os.makedirs("./weights_l2") saver.save(sess, './weights_l2/%s.temp'%self.config.model_name, write_meta_graph=False) train_preds, _ = self.predict(self.train_data, './weights_l2/%s.temp'%self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights_l2/%s.temp'%self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print print 'Training acc (only root node): {}'.format(train_acc) print 'Valiation acc (only root node): {}'.format(val_acc) print self.make_conf(train_labels, train_preds) print self.make_conf(val_labels, val_preds) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = -1 for epoch in xrange(self.config.max_epochs): print 'epoch %d'%epoch if epoch==0: train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch() complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss>prev_epoch_loss*self.config.anneal_threshold: self.config.lr/=self.config.anneal_by print 'annealed lr to %f'%self.config.lr prev_epoch_loss = epoch_loss #save if model has improved on val if val_loss < best_val_loss: shutil.copyfile('./weights_l2/%s.temp'%self.config.model_name, './weights_l2/%s'%self.config.model_name) best_val_loss = val_loss best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print '\n\nstopped at %d\n'%stopped writeToResults('%s,%s,%s,%s,%s'%(self.config.model_name,stopped,complete_loss_history[-1],train_acc_history[-1],val_acc_history[-1])) return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l,p in itertools.izip(labels, predictions): confmat[l, p] += 1 return confmat
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder(tf.int32, shape=[None, self.config.num_steps], name='Input') self.labels_placeholder = tf.placeholder(tf.float32, shape=[None, self.config.num_steps], name='Target') self.dropout_placeholder = tf.placeholder(tf.int64, name='Dropout') ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE embeddings = tf.get_variable('Embedding', [len(self.vocab), self.config.embed_size], trainable=True) inputs = tf.nn.embedding_lookup(embeddings, self.input_placeholder) inputs = [tf.squeeze(x, [1]) for x in tf.split(1, self.config.num_steps, inputs)] ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE with tf.name_scope('Projection Layer'): U = tf.get_variable('U', [self.config.hidden_size, len(self.vocab)]) b2 = tf.get_variable('b2', len(self.vocab)) outputs = [tf.nn.softmax(tf.matmul(o,U)+b2) for o in rnn_outputs] ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder,[-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE optimizer = tf.train.AdamOptimizer(self.config.lr) train_op = optimizer.minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE with tf.variable_scope('InputDropout'): inputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in inputs] with tf.variable_scope('RNN') as scope: self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size]) state = self.initial_state rnn_outputs = [] for tstep, current_input in enumerate(inputs): if tstep > 0: scope.reuse_variables() H = tf.get_variable('H', [self.config.hidden_size, self.config.hidden_size]) I = tf.get_variable('I', [self.config.embed_size, self.config.hidden_size]) b1 = tf.get_variable('b1', [self.config.hidden_size]) state = tf.nn.sigmoid(tf.matmul(state, H) + tf.matmul(current_input, I) + b1) rnn_outputs.append(state) self.final_state = rnn_outputs[-1] with tf.variable_scope('RNNDropout'): rnn_outputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in rnn_outputs] ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1.0 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNN_Model(): def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def inference(self, tree, predict_only_root=False): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root: node_tensors = node_tensors[tree.root] else: node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2] node_tensors = tf.concat(0, node_tensors) return self.add_projections(node_tensors) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Composition'): ### YOUR CODE HERE embed_size = self.config.embed_size #epsilon = 0.4 #initializer = tf.random_uniform_initializer(-epsilon, epsilon) initializer = None embedding = tf.get_variable('embedding', [len(self.vocab), self.config.embed_size], initializer=initializer) W1 = tf.get_variable("W1", [2 * embed_size, embed_size], initializer=initializer) b1 = tf.get_variable("b1", [1, embed_size], initializer=initializer) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", [embed_size, self.config.label_size], initializer=initializer) bs = tf.get_variable("bs", [1, self.config.label_size], initializer=initializer) ### END YOUR CODE def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by it's parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ with tf.variable_scope('Composition', reuse=True): ### YOUR CODE HERE embedding = tf.get_variable("embedding") W1 = tf.get_variable("W1") b1 = tf.get_variable("b1") ### END YOUR CODE node_tensors = OrderedDict() curr_node_tensor = None if node.isLeaf: ### YOUR CODE HERE curr_node_tensor = tf.gather(embedding, [self.vocab.encode(node.word)]) ### END YOUR CODE else: node_tensors.update(self.add_model(node.left)) node_tensors.update(self.add_model(node.right)) ### YOUR CODE HERE node_input = tf.concat(1, [node_tensors[node.left], node_tensors[node.right]]) curr_node_tensor = tf.matmul(node_input, W1) + b1 curr_node_tensor = tf.nn.relu(curr_node_tensor) ### END YOUR CODE node_tensors[node] = curr_node_tensor return node_tensors def add_projections(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope("Projection", reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") multi = tf.matmul(node_tensors, U) logits = multi + bs ### END YOUR CODE return logits def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ loss = None # YOUR CODE HERE cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) cost = tf.reduce_sum(cost) with tf.variable_scope("Composition", reuse=True): W1 = tf.get_variable("W1") with tf.variable_scope("Projection", reuse=True): U = tf.get_variable("U") regularization = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U) loss = cost + self.config.l2 * regularization #loss = cost + self.config.l2 * tf.nn.l2_loss(W1) # END YOUR CODE return loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ train_op = None # YOUR CODE HERE optim = tf.train.GradientDescentOptimizer(self.config.lr) #optim = tf.train.AdamOptimizer(0.003) train_op = optim.minimize(loss) # END YOUR CODE return train_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ predictions = None # YOUR CODE HERE yhat = tf.nn.softmax(y) predictions = tf.argmax(yhat, 1) #predictions = tf.Print(predictions,[yhat, predictions], summarize=30) # END YOUR CODE return predictions def __init__(self, config): self.config = config self.load_data() def predict(self, trees, weights_path, get_loss = False): """Make predictions from the provided model.""" results = [] losses = [] for i in xrange(int(math.ceil(len(trees)/float(RESET_AFTER)))): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() saver = tf.train.Saver() saver.restore(sess, weights_path) for tree in trees[i*RESET_AFTER: (i+1)*RESET_AFTER]: logits = self.inference(tree, True) predictions = self.predictions(logits) root_prediction = sess.run(predictions)[0] if get_loss: root_label = tree.root.label loss = sess.run(self.loss(logits, [root_label])) losses.append(loss) results.append(root_prediction) return results, losses def run_epoch(self, new_model = False, verbose=True): step = 0 loss_history = [] while step < len(self.train_data): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() if new_model: init = tf.initialize_all_variables() sess.run(init) new_model = False else: saver = tf.train.Saver() saver.restore(sess, './weights/%s.temp'%self.config.model_name) for _ in xrange(RESET_AFTER): if step>=len(self.train_data): break tree = self.train_data[step] logits = self.inference(tree) labels = [l for l in tree.labels if l!=2] loss = self.loss(logits, labels) train_op = self.training(loss) loss, _ = sess.run([loss, train_op]) loss_history.append(loss) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() step+=1 saver = tf.train.Saver() if not os.path.exists("./weights"): os.makedirs("./weights") saver.save(sess, './weights/%s.temp'%self.config.model_name) train_preds, _ = self.predict(self.train_data, './weights/%s.temp'%self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print print 'Training acc (only root node): {}'.format(train_acc) print 'Valiation acc (only root node): {}'.format(val_acc) print self.make_conf(train_labels, train_preds) print self.make_conf(val_labels, val_preds) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') #best_val_loss = float('inf') best_val_acc = 0 best_val_epoch = 0 stopped = -1 for epoch in xrange(self.config.max_epochs): print 'epoch %d'%epoch if epoch==0: train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch() complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss>prev_epoch_loss*self.config.anneal_threshold: self.config.lr/=self.config.anneal_by print 'annealed lr to %f'%self.config.lr prev_epoch_loss = epoch_loss #save if model has improved on val print 'validation loss %f' % val_loss #if val_loss < best_val_loss: if val_acc > best_val_acc: shutil.copyfile('./weights/%s.temp'%self.config.model_name, './weights/%s'%self.config.model_name) #best_val_loss = val_loss best_val_acc = val_acc best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print '\n\nstopped at %d\n'%stopped return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l,p in itertools.izip(labels, predictions): confmat[l, p] += 1 return confmat
class Model_RNN(LanguageModel): def load_data(self): pair_fname = '../lastfm_train_mappings.txt' lyrics_path = '../lyrics/data/lyrics/train/' # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics. # word_counts is a dictionary that maps X_train, l_train, self.word_counts, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold=100, n_class=self.config.n_class) self.labels_train = np.zeros((len(X_train),self.config.n_class)) self.labels_train[range(len(X_train)),l_train] = 1 self.vocab = Vocab() self.vocab.construct(self.word_counts.keys()) self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps)) for i in range(len(X_train)): self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]] self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]] def add_placeholders(self): self.X1 = tf.placeholder(tf.int32, shape=(None, self.config.max_steps), name='X1') self.X2 = tf.placeholder(tf.int32, shape=(None, self.config.max_steps), name='X2') self.labels = tf.placeholder(tf.float32, shape=(None, self.config.n_class), name='labels') #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state') self.seq_len1 = tf.placeholder(tf.int32, shape=(None), name='seq_len1') # for variable length sequences self.seq_len2 = tf.placeholder(tf.int32, shape=(None), name='seq_len2') # for variable length sequences def add_embedding(self): L = tf.get_variable('L', shape=(len(self.word_counts.keys()), self.config.embed_size), dtype=tf.float32) inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size inputs1 = [tf.squeeze(x) for x in inputs1] inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size inputs2 = [tf.squeeze(x) for x in inputs2] print 'onh' print inputs1[0].get_shape return inputs1, inputs2 def add_model(self, inputs1, inputs2, seq_len1, seq_len2): #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32) print 'adsf add_model' self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32) rnn_outputs = [] rnn_outputs1 = [] rnn_outputs2 = [] h_curr1 = self.initial_state h_curr2 = self.initial_state print 'nthgnghn' with tf.variable_scope('rnn'): Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32) Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size), dtype=tf.float32) b1 = tf.get_variable('bhx', shape=(self.config.hidden_size,), dtype=tf.float32) print Wxh.get_shape print inputs1[0].get_shape print inputs2[0].get_shape for i in range(self.config.max_steps): h_curr2 = tf.matmul(h_curr2,Whh) h_curr2 += tf.matmul(inputs2[i],Wxh) h_curr2 += b1 h_curr2 = tf.sigmoid(h_curr2) h_curr1 = tf.sigmoid(tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1) rnn_outputs1.append(h_curr1) rnn_outputs2.append(h_curr2) rnn_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)] return rnn_states def add_projection(self, rnn_states): # rnn_outputs is a list of length batch_size of lengths = seq_len. Where each list element is ??. I think. Whc = tf.get_variable('Whc', shape=(2*self.config.hidden_size,self.config.n_class)) bhc = tf.get_variable('bhc', shape=(self.config.n_class,)) projections = tf.matmul(rnn_states[-1],Whc) + bhc # in case we stop short sequences, the rnn_state in further time_steps should be unch return projections def add_loss_op(self, y): loss = tf.nn.softmax_cross_entropy_with_logits(y, self.labels) loss = tf.reduce_sum(loss) return loss def add_training_op(self, loss): #train_op = tf.train.AdamOptimizer(learning_rate=self.config.lr).minimize(loss) train_op = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr).minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data() self.add_placeholders() print 'adsf __init__' print self.X1.get_shape self.inputs1, self.inputs2 = self.add_embedding() self.rnn_states = self.add_model(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2) self.projections = self.add_projection(self.rnn_states) self.loss = self.add_loss_op(self.projections) self.train_step = self.add_training_op(self.loss) self.predictions = tf.argmax(tf.nn.softmax(self.projections),1) self.correct_predictions = tf.equal(self.predictions,tf.argmax(self.labels,1)) self.correct_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, 'int32')) def run_epoch(self, session, X1, X2, labels, train_op, verbose=10): # X and y are 2D np arrays print 'adsf run_epoch' config = self.config #state = tf.zeros([self.config.batch_size, self.config.hidden_size]) state = self.initial_state.eval() data_len = np.shape(X1)[0] index = np.arange(data_len) np.random.shuffle(index) n_batches = data_len // self.config.batch_size loss = 0.0 for batch_num in range(n_batches): print 'sadf batch_num', str(batch_num) x1_batch = X1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] x2_batch = X2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] seq_len_batch1 = [1 for i in range(X1.shape[0])] seq_len_batch2 = [1 for i in range(X1.shape[0])] labels_batch = labels[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] print 'qwer', x1_batch.shape print 'qwer', x2_batch.shape feed_dict = {self.X1: x1_batch, self.X2: x2_batch, self.labels: labels_batch, self.seq_len1: seq_len_batch1, self.seq_len2: seq_len_batch2} #self.initial_state: state} loss, total_correct, _ = session.run([self.loss, self.correct_predictions, train_op], feed_dict=feed_dict) total_loss.append(loss) if verbose and (batch_num+1)%verbose==0: sys.stdout.write('\r{} / {} : pp = {}'.format(batch_num+1, n_batches, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
""" Forward function accepts input data and returns a Variable of output data """ self.node_list = [] root_node = self.walk_tree(x.root) all_nodes = torch.cat(self.node_list) #now I need to project out return all_nodes def main(): print("do nothing") if __name__ == '__main__': train_data, dev_data, test_data = tr.simplified_data(train_size, 100, 200) vocab = Vocab() train_sents = [t.get_words() for t in train_data] vocab.construct(list(itertools.chain.from_iterable(train_sents))) model = RNN_Model(vocab, embed_size=50) main() lr = 0.01 loss_history = [] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0.0) # params (iterable): iterable of parameters to optimize or dicts defining # parameter groups # lr (float): learning rate # momentum (float, optional): momentum factor (default: 0) # weight_decay (float, optional): weight decay (L2 penalty) (default: 0) #torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0, weight_decay=0) # print(model.fcl._parameters['weight'])
class Model_RNN(LanguageModel): def load_data(self): pair_fname = '../lastfm_train_mappings.txt' lyrics_path = '../data/lyrics/train/' # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics. # word_counts is a dictionary that maps if self.config.debug: X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, '../glove.6B.50d.txt', threshold_down=0, threshold_up=float('inf'), npos=100, nneg=100) else: X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold_down=100, threshold_up=4000, npos=10000, nneg=10000) self.labels_train = np.zeros((len(X_train),self.config.n_class)) self.labels_train[range(len(X_train)),l_train] = 1 x = collections.Counter(l_train) for k in x.keys(): print 'class:', k, x[k] print '' self.vocab = Vocab() self.vocab.construct(self.word_counts.keys()) self.wv = self.vocab.get_wv('../glove.6B.50d.txt') with open('word_hist.csv', 'w') as f: for w in self.word_counts.keys(): f.write(w+','+str(self.word_counts[w])+'\n') self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps)) for i in range(len(X_train)): self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]] self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]] self.sequence_len1 = np.array(seq_len1) self.sequence_len2 = np.array(seq_len2) def add_placeholders(self): self.X1 = tf.placeholder(tf.int32, shape=(None, self.config.max_steps), name='X1') self.X2 = tf.placeholder(tf.int32, shape=(None, self.config.max_steps), name='X2') self.labels = tf.placeholder(tf.float32, shape=(None, self.config.n_class), name='labels') #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state') self.seq_len1 = tf.placeholder(tf.int32, shape=(None), name='seq_len1') # for variable length sequences self.seq_len2 = tf.placeholder(tf.int32, shape=(None), name='seq_len2') # for variable length sequences def add_embedding(self): #L = tf.get_variable('L', shape=(len(self.vocab), self.config.embed_size), dtype=tf.float32) L = tf.Variable(tf.convert_to_tensor(self.wv, dtype=tf.float32), name='L') #L = tf.constant(tf.convert_to_tensor(self.wvi), dtype=tf.float32, name='L') inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size inputs1 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs1] inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size inputs2 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs2] return inputs1, inputs2 def add_model_rnn(self, inputs1, inputs2, seq_len1, seq_len2): #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32) self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32) rnn_outputs = [] rnn_outputs1 = [] rnn_outputs2 = [] h_curr1 = self.initial_state h_curr2 = self.initial_state with tf.variable_scope('rnn'): Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32) Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size), dtype=tf.float32) b1 = tf.get_variable('bhx', shape=(4*self.config.hidden_size,), dtype=tf.float32) for i in range(self.config.max_steps): if self.config.batch_size==1: if i==seq_len1[0]: breaka tmp = tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1 rnn_outputs1.append(h_curr1) for i in range(self.config.max_steps): if self.config.batch_size==1: if i==seq_len2[0]: breaka h_curr2 = tf.sigmoid(tf.matmul(h_curr2,Whh) + tf.matmul(inputs2[i],Wxh) + b1) rnn_outputs2.append(h_curr2) #lstm_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)] rnn_final_states = tf.concat(1, [rnn_outputs1[-1], rnn_outputs2[-1]]) return rnn_final_states def add_model_lstm(self, inputs1, inputs2, seq_len1, seq_len2): #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32) self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32) lstm_outputs1 = [] lstm_outputs2 = [] h_curr1 = self.initial_state h_curr2 = self.initial_state cell1 = self.initial_state cell2 = self.initial_state with tf.variable_scope('lstm'): Whc = tf.get_variable('Whh', shape=(self.config.hidden_size,4*self.config.hidden_size), dtype=tf.float32, initializer=tf.random_normal_initializer()) Wxc = tf.get_variable('Wxh', shape=(self.config.embed_size,4*self.config.hidden_size), dtype=tf.float32, initializer=tf.random_normal_initializer()) b1 = tf.get_variable('bhx', shape=(self.config.hidden_size,), dtype=tf.float32, initializer=tf.random_normal_initializer()) for i in range(self.config.max_steps): if self.config.batch_size==1: if i==seq_len1[0]: break ifog1 = tf.matmul(h_curr1,Whc) + tf.matmul(inputs1[i],Wxc) i1, f1, o1, g1 = tf.split(1, 4, ifog1) i1 = tf.sigmoid(i1) f1 = tf.sigmoid(f1) o1 = tf.sigmoid(o1) g1 = tf.tanh(g1) cell1 = f1*cell1 + i1*g1 h_curr1 = o1*tf.tanh(cell1) lstm_outputs1.append(h_curr1) for i in range(self.config.max_steps): if self.config.batch_size==1: if i==seq_len2[0]: break ifog2 = tf.matmul(h_curr2,Whc) + tf.matmul(inputs2[i],Wxc) i2, f2, o2, g2 = tf.split(1, 4, ifog2) i2 = tf.sigmoid(i2) f2 = tf.sigmoid(f2) o2 = tf.sigmoid(o2) g2 = tf.tanh(g2) cell2 = f2*cell2 + i2*g2 h_curr2 = o2*tf.tanh(cell2) lstm_outputs2.append(h_curr2) lstm_final_states = tf.concat(1, [lstm_outputs1[-1], lstm_outputs2[-1]]) return lstm_final_states def add_final_projections(self, rnn_final_states): # rnn_outputs is a list of length batch_size of lengths = seq_len. Where each list element is ??. I think. Whu = tf.get_variable('Whu', shape=(2*self.config.hidden_size,self.config.n_class), initializer=tf.random_normal_initializer()) bhu = tf.get_variable('bhu', shape=(self.config.n_class,), initializer=tf.random_normal_initializer()) final_projections = tf.matmul(rnn_final_states,Whu) + bhu # in case we stop short sequences, the rnn_state in further time_steps should be unch return final_projections def add_loss_op(self, y): loss = tf.nn.softmax_cross_entropy_with_logits(y, self.labels) loss = tf.reduce_mean(loss) return loss def add_training_op(self, loss): #train_op = tf.train.AdamOptimizer(learning_rate=self.config.lr).minimize(loss) train_op = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr).minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data() self.add_placeholders() self.inputs1, self.inputs2 = self.add_embedding() if self.config.model=='rnn': self.final_hidden_states = self.add_model_rnn(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2) elif self.config.model=='lstm': self.final_hidden_states = self.add_model_lstm(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2) self.final_projections = self.add_final_projections(self.final_hidden_states) self.loss = self.add_loss_op(self.final_projections) self.train_step = self.add_training_op(self.loss) self.predictions = tf.argmax(tf.nn.softmax(self.final_projections),1) self.correct_predictions = tf.equal(self.predictions,tf.argmax(self.labels,1)) self.correct_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, 'int32')) def run_epoch(self, session, X1, X2, labels, sequence_len1, sequence_len2, train_op, verbose=10): # X and y are 2D np arrays config = self.config #state = tf.zeros([self.config.batch_size, self.config.hidden_size]) state = self.initial_state.eval() data_len = np.shape(X1)[0] index = np.arange(data_len) np.random.shuffle(index) n_batches = data_len // self.config.batch_size loss = 0.0 total_loss = [] total_correct = 0 all_preds = -np.ones((data_len,)) for batch_num in range(n_batches): x1_batch = X1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] x2_batch = X2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] labels_batch = labels[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] seq_len_batch1 = sequence_len1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] seq_len_batch2 = sequence_len2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] feed_dict = {self.X1: x1_batch, self.X2: x2_batch, self.labels: labels_batch, self.seq_len1: seq_len_batch1, self.seq_len2: seq_len_batch2} #self.initial_state: state} loss, preds, correct, final_projections, _ = session.run([self.loss, self.predictions, self.correct_predictions, self.final_projections, train_op], feed_dict=feed_dict) #print str(batch_num)+'/'+str(n_batches)+' : '+str(final_projections[0][0])+' '+str(final_projections[0][1]) total_loss.append(loss) total_correct += correct all_preds[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] = preds if verbose and (batch_num+1)%verbose==0: sys.stdout.write('\r{} / {} : loss = {:.4f} : train_acc = {:.2f}%'.format(batch_num+1, n_batches, np.mean(total_loss), 100.0*total_correct/((batch_num+1)*self.config.batch_size))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.mean(total_loss), all_preds
class Model(): def __init__(self, config): self.config = config self.load_data() self.build_model() def load_vocab(self,debug): self.vocab = Vocab() if debug: self.vocab.construct(get_words_dataset('dev')) else: self.vocab.construct(get_words_dataset('train')) self.vocab.build_embedding_matrix(self.config.word_embed_size) self.embedding_matrix = self.vocab.embedding_matrix def load_data(self, debug=False): """ Loads starter word-vectors and train/dev/test data. """ self.load_vocab(debug) config = self.config if debug: # Load the training set train_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'dev', 'post')) ( self.sent1_train, self.sent2_train, self.len1_train, self.len2_train, self.y_train ) = zip(*train_data) self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train) self.len1_train, self.len2_train = ( np.array(self.len1_train), np.array(self.len2_train) ) self.y_train = np.array(self.y_train) print('# training examples: %d' %len(self.y_train)) # Load the validation set dev_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_dev, self.sent2_dev, self.len1_dev, self.len2_dev, self.y_dev ) = zip(*dev_data) self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev) self.len1_dev, self.len2_dev = ( np.array(self.len1_dev), np.array(self.len2_dev) ) self.y_dev = np.array(self.y_dev) print('# dev examples: %d' %len(self.y_dev)) # Load the test set test_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_test, self.sent2_test, self.len1_test, self.len2_test, self.y_test ) = zip(*test_data) self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test) self.len1_test, self.len2_test = ( np.array(self.len1_test), np.array(self.len2_test) ) self.y_test = np.array(self.y_test) print('# test examples: %d' %len(self.y_test)) else: # Load the training set train_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'train', 'post')) ( self.sent1_train, self.sent2_train, self.len1_train, self.len2_train, self.y_train ) = zip(*train_data) self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train) self.len1_train, self.len2_train = ( np.array(self.len1_train), np.array(self.len2_train) ) self.y_train = np.array(self.y_train) print('# training examples: %d' %len(self.y_train)) # Load the validation set dev_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'dev', 'post')) ( self.sent1_dev, self.sent2_dev, self.len1_dev, self.len2_dev, self.y_dev ) = zip(*dev_data) self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev) self.len1_dev, self.len2_dev = ( np.array(self.len1_dev), np.array(self.len2_dev) ) self.y_dev = np.array(self.y_dev) print('# dev examples: %d' %len(self.y_dev)) # Load the test set test_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_test, self.sent2_test, self.len1_test, self.len2_test, self.y_test ) = zip(*test_data) self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test) self.len1_test, self.len2_test = ( np.array(self.len1_test), np.array(self.len2_test) ) self.y_test = np.array(self.y_test) print('# test examples: %d' %len(self.y_test)) print('min len: ', np.min(self.len2_train)) def build_model(self): config = self.config k = config.sentence_embed_size L = config.sent_len # input tensors self.sent1_ph = tf.placeholder(tf.int32, shape=[None, L], name='sent1') self.sent2_ph = tf.placeholder(tf.int32, shape=[None, L], name='sent2') self.len1_ph = tf.placeholder(tf.int32, shape=[None], name='len1') self.len2_ph = tf.placeholder(tf.int32, shape=[None], name='len2') self.labels_ph = tf.placeholder(tf.float32, shape=[None, config.label_size], name='label') self.kp_ph = tf.placeholder(tf.float32, name='kp') kp = self.kp_ph # set embedding matrix to pretrained embedding init_embeds = tf.constant(self.embedding_matrix, dtype='float32') word_embeddings = tf.get_variable( dtype='float32', name='word_embeddings', initializer=init_embeds, trainable=False) # no fine-tuning of word embeddings # x1 and x2 have shape (?, L, k) x1 = tf.nn.embedding_lookup(word_embeddings, self.sent1_ph) x2 = tf.nn.embedding_lookup(word_embeddings, self.sent2_ph) x1, x2 = tf.nn.dropout(x1, kp), tf.nn.dropout(x2, kp) # encode premise sentence with 1st LSTM with tf.variable_scope('rnn1'): cell1 = tf.contrib.rnn.LSTMCell(num_units=k, state_is_tuple=True) cell1 = tf.contrib.rnn.DropoutWrapper(cell1, input_keep_prob=kp, output_keep_prob=kp) out1, fstate1 = tf.nn.dynamic_rnn( cell=cell1, inputs=x1, sequence_length=self.len1_ph, dtype=tf.float32) # encode hypothesis with 2nd LSTM # using final state of 1st LSTM as initial state with tf.variable_scope('rnn2'): cell2 = tf.contrib.rnn.LSTMCell(num_units=k, state_is_tuple=True) cell2 = tf.contrib.rnn.DropoutWrapper(cell2, input_keep_prob=kp, output_keep_prob=kp) out2, fstate2 = tf.nn.dynamic_rnn( cell=cell2, inputs=x2, sequence_length=self.len2_ph, initial_state=fstate1, dtype=tf.float32) Y = out1 Y_mod =tf.reshape(Y, [-1, k]) W_y = tf.get_variable(name='W_y', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_h = tf.get_variable(name='W_h', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_M = tf.get_variable(name='b_M', initializer=tf.zeros([L, k])) W_r = tf.get_variable(name='W_r', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_t = tf.get_variable(name='W_t', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_r = tf.get_variable(name='b_r', initializer=tf.zeros([k])) w = tf.get_variable(name='w', shape=[k, 1], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_a = tf.get_variable(name='b_a', initializer=tf.zeros([L])) rt_1 = tf.zeros([tf.shape(self.len1_ph)[0], k]) attention = [] r_outputs = [] for t in range(L): ht = out2[:,t,:] Ht = tf.reshape(tf.tile(ht, [1, L]), [-1, L, k]) Ht_mod = tf.reshape(Ht, [-1, k]) Rt_1 = tf.reshape(tf.tile(rt_1, [1, L]), [-1, L, k]) Rt_1_mod = tf.reshape(Rt_1, [-1, k]) Mt = tf.nn.tanh( tf.reshape(tf.matmul(Y_mod, W_y), [-1, L, k]) + tf.reshape(tf.matmul(Ht_mod, W_h), [-1, L, k]) + tf.reshape(tf.matmul(Rt_1_mod, W_r), [-1, L, k]) ) Mt_w = tf.matmul(tf.reshape(Mt, [-1, k]), w) alphat = tf.nn.softmax(tf.reshape(Mt_w, [-1, 1, L]) ) alphat_Y = tf.reshape(tf.matmul(alphat, Y), [-1, k]) rt = alphat_Y + tf.nn.tanh(tf.matmul(rt_1, W_t) ) rt_1 = rt attention.append(alphat) r_outputs.append(rt) r_outputs = tf.stack(r_outputs) self.attention = tf.stack(attention) r_outputs = tf.transpose(r_outputs, [1, 0, 2]) def get_last_relevant_output(out, seq_len): rng = tf.range(0, tf.shape(seq_len)[0]) indx = tf.stack([rng, seq_len - 1], 1) last = tf.gather_nd(out, indx) return last rN = get_last_relevant_output(r_outputs, self.len2_ph) hN = get_last_relevant_output(out2, self.len2_ph) W_p = tf.get_variable(name='W_p', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_x = tf.get_variable(name='W_x', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_hs = tf.get_variable(name='b_hs', initializer=tf.zeros([k])) # sentence pair representation h_s = tf.nn.tanh(tf.matmul(rN, W_p) + tf.matmul(hN, W_x) ) y = h_s # MLP classifier on top hidden_sizes = config.hidden_sizes for layer, size in enumerate(hidden_sizes): if layer > 0: previous_size = hidden_sizes[layer-1] else: previous_size = k W = tf.get_variable(name='W{}'.format(layer), shape=[previous_size, size], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b = tf.get_variable(name='b{}'.format(layer), initializer=tf.zeros([size])) y = tf.nn.relu(tf.matmul(y, W) + b) y = tf.nn.dropout(y, kp) W_softmax = tf.get_variable(name='W_softmax', shape=[hidden_sizes[-1], config.label_size], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_softmax = tf.get_variable(name='b_softmax', initializer=tf.zeros([config.label_size])) logits = tf.matmul(y, W_softmax) + b_softmax cross_entropy_loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(self.labels_ph, logits) ) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) self.loss = cross_entropy_loss #+ tf.add_n(reg_losses) optimizer = tf.train.AdamOptimizer(learning_rate=config.lr) gradients, variables = zip(*optimizer.compute_gradients(self.loss)) gradients, _ = tf.clip_by_global_norm(gradients, config.max_grad_norm) self.train_op = optimizer.apply_gradients(zip(gradients, variables)) self.probs = tf.nn.softmax(logits) self.predictions = tf.argmax(self.probs, 1) correct_prediction = tf.equal( tf.argmax(self.labels_ph, 1), self.predictions) self.correct_predictions = tf.reduce_sum(tf.cast(correct_prediction, 'int32')) def create_feed_dict(self, sent1_batch, sent2_batch, len1_batch, len2_batch, label_batch, keep_prob): feed_dict = { self.sent1_ph: sent1_batch, self.sent2_ph: sent2_batch, self.len1_ph: len1_batch, self.len2_ph: len2_batch, self.labels_ph: label_batch, self.kp_ph: keep_prob } return feed_dict def run_epoch(self, session, sent1_data, sent2_data, len1_data, len2_data, input_labels, verbose=100): orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y = ( sent1_data, sent2_data, len1_data, len2_data, input_labels ) kp = self.config.kp total_loss = [] total_correct_examples = 0 total_processed_examples = 0 total_steps = int( orig_sent1.shape[0] / self.config.batch_size) for step, (sent1, sent2, len1, len2, y) in enumerate( data_iterator(orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y, batch_size=self.config.batch_size, label_size=self.config.label_size)): feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp) loss, total_correct, _ = session.run( [self.loss, self.correct_predictions, self.train_op], feed_dict=feed) total_processed_examples += len(y) total_correct_examples += total_correct total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : loss = {}'.format( step, total_steps, np.mean(total_loss))) sys.stdout.flush() if verbose: sys.stdout.write('\r') sys.stdout.flush() return np.mean(total_loss), total_correct_examples / float(total_processed_examples), total_loss def predict(self, session, sent1_data, sent2_data, len1_data, len2_data, y=None): """Make predictions from the provided model.""" # If y is given, the loss is also calculated # We deactivate dropout by setting it to 1 kp = 1.0 losses = [] results = [] if np.any(y): data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, y, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) else: data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) for step, (sent1, sent2, len1, len2, y) in enumerate(data): feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp) if np.any(y): loss, preds = session.run( [self.loss, self.predictions], feed_dict=feed) losses.append(loss) else: preds = session.run(self.predictions, feed_dict=feed) results.extend(preds) return np.mean(losses), np.array(results) def get_attention(self, session, sent1, sent2): kp = 1.0 sent1 = utils.encode_sentence(self.vocab, sent1) print(sent1) sent2 = utils.encode_sentence(self.vocab, sent2) print(sent2) sent1 = utils.pad_sentence(self.vocab, sent1, self.config.sent_len, 'post') sent2 = utils.pad_sentence(self.vocab, sent2, self.config.sent_len, 'post') len1, len2 = np.array([len(sent1)]), np.array([len(sent2)]) sent1_arr = np.array(sent1).reshape((1,-1)) sent2_arr = np.array(sent2).reshape((1,-1)) y = np.array([0,1,0]).reshape((1,-1)) feed = self.create_feed_dict(sent1_arr, sent2_arr, len1, len2, y, kp) preds, alphas = session.run([self.predictions, self.attention], feed_dict=feed) return preds, alphas
import sys import os from utils import Vocab import numpy as np import pickle if __name__ == "__main__": #Create a set of all words all_words = set() vocab = Vocab() count_files = 0 for name in ['test', 'train', 'val']: filename = name + '_tokens.txt' f = open(filename, 'r') for line in f: sp_line = line.strip().split() for token in sp_line: all_words.add(token) vocab.add_word(token) f.close() glove_dir = '/media/sf_kickstarter/CS224D/Project/glove.840B.300d' glove_f = open(os.path.join(glove_dir, 'glove.840B.300d.txt'), 'r') embedding_matrix = np.zeros((len(vocab.word_to_index),300)) count = 0 for line in glove_f: line_sp = line.strip().split()
class RNN_Model(): def __init__(self, config): self.config = config self.load_data() self.merged_summaries = None self.summary_writer = None self.is_a_leaf = tf.placeholder(tf.bool, [None], name="is_a_leaf") self.left_child = tf.placeholder(tf.int32, [None], name="lchild") self.right_child = tf.placeholder(tf.int32, [None], name="rchild") self.word_index = tf.placeholder(tf.int32, [None], name="word_index") self.labelholder = tf.placeholder(tf.int32, [None], name="labels_holder") self.add_model_vars() self.tensor_array = tf.TensorArray(tf.float32, size=0, dynamic_size=True, clear_after_read=False, infer_shape=False) #tensor array stores the vectors (embedded or composed) self.tensor_array_op = None self.prediction = None self.logits = None self.root_logits = None self.root_predict = None self.root_loss = None self.full_loss = None self.training_op = None #tensor_array_op is the operation on the TensorArray # private functions used to construct the graph. def _embed_word(self, word_index): with tf.variable_scope("Composition", reuse=True) as scope: print(scope.name) embedding = tf.get_variable("embedding") print(embedding.name) return tf.expand_dims(tf.gather(embedding, word_index), 0) # private functions used to construct the graph. def _combine_children(self, left_index, right_index): left_tensor = self.tensor_array.read(left_index) right_tensor = self.tensor_array.read(right_index) with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable('W1') b1 = tf.get_variable('b1') return tf.nn.relu(tf.matmul(tf.concat(1, [left_tensor, right_tensor]), W1) + b1) # i is the index (over data stored in the placeholders) # identical type[out] = type[in]; can be used in while_loop # so first iteration -> puts left most leaf on the tensorarray (and increments i) # next iteration -> puts next left most (leaf on stack) and increments i # .... # until all the leaves are on the stack in the correct order # starts combining the leaves after and adding to the stack def _loop_over_tree(self, tensor_array, i): is_leaf = tf.gather(self.is_a_leaf, i) word_idx = tf.gather(self.word_index, i) left_child = tf.gather(self.left_child, i) right_child = tf.gather(self.right_child, i) node_tensor = tf.cond(is_leaf, lambda : self._embed_word(word_idx), lambda : self._combine_children(left_child, right_child)) tensor_array = tensor_array.write(i, node_tensor) i = tf.add(i,1) return tensor_array, i def construct_tensor_array(self): loop_condition = lambda tensor_array, i: \ tf.less(i, tf.squeeze(tf.shape(self.is_a_leaf))) #iterate over all leaves + composition tensor_array_op = tf.while_loop(cond=loop_condition, body=self._loop_over_tree, loop_vars=[self.tensor_array, 0], parallel_iterations=1)[0] return tensor_array_op def inference_op(self, predict_only_root=False): if predict_only_root: return self.root_logits_op() return self.logits_op() def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Composition') as scope: ### YOUR CODE HERE #initializer=initializer=tf.random_normal_initializer(0,3) print(scope.name) embedding = tf.get_variable("embedding", [self.vocab.total_words, self.config.embed_size]) print(embedding.name) W1 = tf.get_variable("W1", [2 * self.config.embed_size, self.config.embed_size]) b1 = tf.get_variable("b1", [1, self.config.embed_size]) l2_loss = tf.nn.l2_loss(W1) tf.add_to_collection(name="l2_loss", value=l2_loss) variable_summaries(embedding, embedding.name) variable_summaries(W1, W1.name) variable_summaries(b1, b1.name) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", [self.config.embed_size, self.config.label_size]) bs = tf.get_variable("bs", [1, self.config.label_size]) variable_summaries(U, U.name) variable_summaries(bs, bs.name) l2_loss = tf.nn.l2_loss(U) tf.add_to_collection(name="l2_loss", value=l2_loss) ### END YOUR CODE def add_model(self): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by it's parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ if self.tensor_array_op is None: self.tensor_array_op = self.construct_tensor_array() return self.tensor_array_op def add_projections_op(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope("Projection", reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") logits = tf.matmul(node_tensors, U) + bs ### END YOUR CODE return logits def logits_op(self): #this is an operation on the updated tensor_array if self.logits is None: self.logits = self.add_projections_op(self.tensor_array_op.concat()) return self.logits def root_logits_op(self): #construct once if self.root_logits is None: self.root_logits = self.add_projections_op(self.tensor_array_op.read(self.tensor_array_op.size() -1)) return self.root_logits def root_prediction_op(self): if self.root_predict is None: self.root_predict = tf.squeeze(tf.argmax(self.root_logits_op(), 1)) return self.root_predict def full_loss_op(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ if self.full_loss is None: loss = None # YOUR CODE HERE l2_loss = self.config.l2 * tf.add_n(tf.get_collection("l2_loss")) idx = tf.where(tf.less(self.labelholder,2)) logits = tf.gather(logits, idx) labels = tf.gather(labels, idx) objective_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) loss = objective_loss + l2_loss tf.summary.scalar("loss_l2", l2_loss) tf.summary.scalar("loss_objective", tf.reduce_sum(objective_loss)) tf.summary.scalar("loss_total", loss) self.full_loss = loss # END YOUR CODE return self.full_loss def loss_op(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ if self.root_loss is None: #construct once guard loss = None # YOUR CODE HERE l2_loss = self.config.l2 * tf.add_n(tf.get_collection("l2_loss")) objective_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) loss = objective_loss + l2_loss tf.summary.scalar("root_loss_l2", l2_loss) tf.summary.scalar("root_loss_objective", tf.reduce_sum(objective_loss)) tf.summary.scalar("root_loss_total", loss) self.root_loss = loss # END YOUR CODE return self.root_loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ if self.training_op is None: # YOUR CODE HERE optimizer = tf.train.AdamOptimizer(self.config.lr)#tf.train.GradientDescentOptimizer(self.config.lr) #optimizer = tf.train.AdamOptimizer(self.config.lr) self.training_op = optimizer.minimize(loss) # END YOUR CODE return self.training_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ if self.prediction is None: # YOUR CODE HERE self.prediction = tf.argmax(y, dimension=1) # END YOUR CODE return self.prediction def build_feed_dict(self, in_node): nodes_list = [] tr.leftTraverse(in_node, lambda node, args: args.append(node), nodes_list) node_to_index = OrderedDict() for idx, i in enumerate(nodes_list): node_to_index[i] = idx feed_dict = { self.is_a_leaf : [ n.isLeaf for n in nodes_list ], self.left_child : [ node_to_index[n.left] if not n.isLeaf else -1 for n in nodes_list ], self.right_child : [ node_to_index[n.right] if not n.isLeaf else -1 for n in nodes_list ], self.word_index : [ self.vocab.encode(n.word) if n.word else -1 for n in nodes_list ], self.labelholder : [ n.label for n in nodes_list ] } return feed_dict def predict(self, trees, weights_path, get_loss = False): """Make predictions from the provided model.""" results = [] losses = [] logits = self.root_logits_op() #evaluation is based upon the root node root_loss = self.loss_op(logits=logits, labels=self.labelholder[-1:]) root_prediction_op = self.root_prediction_op() with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, weights_path) for t in trees: feed_dict = self.build_feed_dict(t.root) if get_loss: root_prediction, loss = sess.run([root_prediction_op, root_loss], feed_dict=feed_dict) losses.append(loss) results.append(root_prediction) else: root_prediction = sess.run(root_prediction_op, feed_dict=feed_dict) results.append(root_prediction) return results, losses #need to rework this: (OP creation needs to be made independent of using OPs) def run_epoch(self, new_model = False, verbose=True, epoch=0): loss_history = [] random.shuffle(self.train_data) with tf.Session() as sess: if new_model: add_model_op = self.add_model() logits = self.logits_op() loss = self.full_loss_op(logits=logits, labels=self.labelholder) train_op = self.training(loss) init = tf.global_variables_initializer() sess.run(init) else: saver = tf.train.Saver() saver.restore(sess, './weights/%s.temp'%self.config.model_name) logits = self.logits_op() loss = self.full_loss_op(logits=logits, labels=self.labelholder) train_op = self.training(loss) for step, tree in enumerate(self.train_data): feed_dict = self.build_feed_dict(tree.root) loss_value, _ = sess.run([loss, train_op], feed_dict=feed_dict) loss_history.append(loss_value) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step+1, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() saver = tf.train.Saver() if not os.path.exists("./weights"): os.makedirs("./weights") #print('./weights/%s.temp'%self.config.model_name) saver.save(sess, './weights/%s.temp'%self.config.model_name) train_preds, _ = self.predict(self.train_data, './weights/%s.temp'%self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print() print('Training acc (only root node): {}'.format(train_acc)) print('Valiation acc (only root node): {}'.format(val_acc)) print(self.make_conf(train_labels, train_preds)) print(self.make_conf(val_labels, val_preds)) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = -1 for epoch in range(self.config.max_epochs): print('epoch %d'%epoch) if epoch==0: train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True, epoch=epoch) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch(epoch=epoch) complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss>prev_epoch_loss*self.config.anneal_threshold: self.config.lr/=self.config.anneal_by print('annealed lr to %f'%self.config.lr) prev_epoch_loss = epoch_loss #save if model has improved on val if val_loss < best_val_loss: shutil.copyfile('./weights/%s.temp'%self.config.model_name, './weights/%s'%self.config.model_name) best_val_loss = val_loss best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print('\n\nstopped at %d\n'%stopped) return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l,p in zip(labels, predictions): confmat[l, p] += 1 return confmat
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps)) self.labels_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(tf.float32, shape=None) ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE L = tf.Variable(tf.random_uniform([len(self.vocab), self.config.embed_size], -1.0, 1.0), name="L") # Shape of input_placeholder : (batch_size, num_steps) # Shape of embed : (num_steps, batch_size, embed_size) embed = tf.nn.embedding_lookup(L, tf.transpose(self.input_placeholder, perm=[1,0])) inputs = [tf.squeeze(ts, [0]) for ts in tf.split(0, self.config.num_steps, embed)] ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size(LIBIN edited)). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab)) """ ### YOUR CODE HERE with tf.variable_scope("projection", initializer = xavier_weight_init(), reuse=None): U = tf.get_variable("U", shape=(self.config.hidden_size, len(self.vocab))) b2 = tf.get_variable("b2", shape=(len(self.vocab), )) outputs = [tf.matmul(ts, U) + b2 for ts in rnn_outputs] ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Check https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py Args: output: A tensor of shape (None, self.vocab) (LIBIN : not used) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE # output shape : [num_steps * (batch_size, len(self.vocab))] # targets shape : [num_steps * (batch_size, )] # weights shape : [num_steps * (batch_size, )] targets = [tf.squeeze(ts,[1]) for ts in tf.split(1, self.config.num_steps, self.labels_placeholder)] weights = [tf.ones((self.config.batch_size, )) for step in xrange(self.config.num_steps)] loss = sequence_loss(output, targets, weights) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False) train_op = optimizer.minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) #print self.outputs #print tf.concat(1, self.outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide # Libin : output not used output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) # output is a single long sequence tensor concatenated # orderly by all short squences in current batch. # Each element in output is a tensor of size self.vocab which gives the probability # distribution of current word #print output #raw_input() self.calculate_loss = self.add_loss_op(self.outputs) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE rnn_outputs = [] self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size]) with tf.variable_scope("RNN", initializer=xavier_weight_init(), reuse=None): H = tf.get_variable("H", shape=(self.config.hidden_size, self.config.hidden_size)) I = tf.get_variable("I", shape=(self.config.embed_size, self.config.hidden_size)) b1 = tf.get_variable("b1", shape=(self.config.hidden_size, )) prev_h = self.initial_state for step_input in inputs: step_input = tf.nn.dropout(step_input, self.dropout_placeholder) prev_h = tf.sigmoid(tf.matmul(prev_h, H) + tf.matmul(step_input, I) + b1) #prev_h = tf.nn.dropout(prev_h, self.dropout_placeholder) rnn_outputs.append(prev_h) self.final_state = prev_h ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: # The derivation of pp can be checked in question Q3-(a) sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) #self.encoded_test = np.array( #[self.vocab.encode(word) for word in get_ptb_dataset('test')], #dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug]#读入训练数据 self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): self.input_placeholder = tf.placeholder(tf.int32, (None, self.config.num_steps)) self.labels_placeholder = tf.placeholder(tf.float32, (None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(tf.float32) def add_embedding(self):#将one-hot转化为词向量 inputs = [] with tf.device('/cpu:0'): L = tf.get_variable("Embedding", (len(self.vocab), self.config.embed_size)) tensors = tf.nn.embedding_lookup(L, self.input_placeholder) split_tensors = tf.split(1, self.config.num_steps, tensors) for tensor in split_tensors: inputs.append(tf.squeeze(tensor, [1])) return inputs#返回的是一个list def add_projection(self, rnn_outputs):#把隐藏层转化为词语 with tf.variable_scope("projection"): U=tf.get_variable("U",shape=(self.config.hidden_size,len(self.vocab))) b_2=tf.get_variable("b_2",shape=(len(self.vocab),)) outputs=[tf.matmul(x,U)+b_2 for x in rnn_outputs]###softmax? return outputs def add_loss_op(self, output):#计算损失函数 loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], [tf.ones([self.config.batch_size * self.config.num_steps])]) return loss def add_training_op(self, loss):#对损失函数进行优化 optimizer=tf.train.AdamOptimizer(self.config.lr) train_op=optimizer.minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): hidden_size=self.config.hidden_size embed_size=self.config.embed_size batch_size=self.config.batch_size with tf.variable_scope("RNN"): H=tf.get_variable("H",shape=(hidden_size,hidden_size)) I=tf.get_variable("I",shape=(embed_size,hidden_size)) b_1=tf.get_variable("b_1",shape=(hidden_size,)) self.initial_state=tf.zeros([batch_size,hidden_size]) pre_h=self.initial_state rnn_outputs=[] for step in inputs: step=tf.nn.dropout(step,self.dropout_placeholder) pre_h=tf.sigmoid(tf.matmul(pre_h,H)+tf.matmul(step,I)+b_1) rnn_outputs.append(tf.nn.dropout(pre_h,self.dropout_placeholder)) self.final_state=pre_h return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))#总的迭代次数 total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))