Exemplo n.º 1
0
 def load_data(self, debug=False):
     self.vocab = Vocab()
     self.vocab.construct(get_ptb_dataset('train'))
     self.encoded_train = np.array([self.vocab.encode(word) for word in get_ptb_dataset('train')],dtype=np.int32)
     self.encoded_test = np.array([self.vocab.encode(word) for word in get_ptb_dataset('test')],dtype=np.int32)
     if debug:
         num_debug = 1024
         self.encoded_train = self.encoded_train[:num_debug]
         self.encoded_valid = self.encoded_valid[:num_debug]
         self.encoded_test = self.encoded_test[:num_debug]
 def load_data(self, debug=False):
   """Loads starter word-vectors and train/dev/test data."""
   self.vocab = Vocab()
   self.vocab.construct(get_ptb_dataset('train'))
   self.encoded_train = np.array(
       [self.vocab.encode(word) for word in get_ptb_dataset('train')],
       dtype=np.int32)
   self.encoded_valid = np.array(
       [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
       dtype=np.int32)
   self.encoded_test = np.array(
       [self.vocab.encode(word) for word in get_ptb_dataset('test')],
       dtype=np.int32)
   if debug:
     num_debug = 1024
     self.encoded_train = self.encoded_train[:num_debug]
     self.encoded_valid = self.encoded_valid[:num_debug]
     self.encoded_test = self.encoded_test[:num_debug]
Exemplo n.º 3
0
 def load_data(self, debug=False):
     """Loads starter word-vectors and train/dev/test data."""
     self.vocab = Vocab()
     self.vocab.construct(get_ptb_dataset('train'))
     self.encoded_train = np.array(
         [self.vocab.encode(word) for word in get_ptb_dataset('train')],
         dtype=np.int32)
     self.encoded_valid = np.array(
         [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
         dtype=np.int32)
     self.encoded_test = np.array(
         [self.vocab.encode(word) for word in get_ptb_dataset('test')],
         dtype=np.int32)
     if debug:
         num_debug = 1024
         self.encoded_train = self.encoded_train[:num_debug]
         self.encoded_valid = self.encoded_valid[:num_debug]
         self.encoded_test = self.encoded_test[:num_debug]
Exemplo n.º 4
0
def load_data(debug=False):
    """Loads starter word-vectors and train/dev/test data."""
    vocab = Vocab()
    vocab.construct(get_ptb_dataset('train'))
    encoded_train = np.array(
        [vocab.encode(word) for word in get_ptb_dataset('train')],
        dtype=np.int32)
    encoded_valid = np.array(
        [vocab.encode(word) for word in get_ptb_dataset('valid')],
        dtype=np.int32)
    encoded_test = np.array(
        [vocab.encode(word) for word in get_ptb_dataset('test')],
        dtype=np.int32)
    if debug:
        num_debug = 1024
        encoded_train = encoded_train[:num_debug]
        encoded_valid = encoded_valid[:num_debug]
        encoded_test = encoded_test[:num_debug]
    return encoded_train, encoded_valid, encoded_test, vocab
  def load_data(self, debug=False):
    """Loads starter word-vectors and train/dev/test data."""
    self.vocab = Vocab()
    self.vocab.construct(get_ptb_dataset('train'))
    self.encoded_train = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('train')],
        dtype=np.int32)
    self.encoded_valid = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('valid')],
        dtype=np.int32)
    self.encoded_test = np.array(
        [self.vocab.encode(word) for word in get_ptb_dataset('test')],
        dtype=np.int32)
    if debug:
      num_debug = 1024
      self.encoded_train = self.encoded_train[:num_debug]
      self.encoded_valid = self.encoded_valid[:num_debug]
      self.encoded_test = self.encoded_test[:num_debug]
    
    # Load word vectors
    all_embeddings = np.loadtxt("data/ner/wordVectors.txt")
    all_words = np.genfromtxt("data/ner/vocab.txt",dtype='str')

    # L is the embedding matrix
    L = np.zeros((len(self.vocab), self.config.embed_size))
    m = 0
    for i in range(len(self.vocab)):
        word = self.vocab.index_to_word[i]
        index = np.where(all_words == word)

        if index[0].shape[0] == 0:
            m += 1
            L[i,:] = all_embeddings[0,:]
        else:
            index = np.asscalar(index[0])
            L[i,:] = all_embeddings[index,:]

    self.L = tf.constant(L, dtype = tf.float32)
import sys
import time
import numpy as np
from copy import deepcopy
from utils import calculate_perplexity, get_ptb_dataset, Vocab
from utils import ptb_iterator, sample
from model import LanguageModel
import tensorflow as tf
from tensorflow.contrib.seq2seq import sequence_loss

debug = True
"""Loads starter word-vectors and train/dev/test data."""
vocab = Vocab()
vocab.construct(get_ptb_dataset('train'))
encoded_train = np.array(
    [vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32)
encoded_valid = np.array(
    [vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32)
encoded_test = np.array(
    [vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32)
if debug:
    num_debug = 1024
    encoded_train = encoded_train[:num_debug]
    encoded_valid = encoded_valid[:num_debug]
    encoded_test = encoded_test[:num_debug]

print '****** LOADED DATA'
'''**********************************************************************************************************'''

# Hyper Parameters
Exemplo n.º 7
0
import sys
import time
import numpy as np
from copy import deepcopy
from utils import calculate_perplexity, get_ptb_dataset, Vocab
from utils import ptb_iterator, sample
from model import LanguageModel
import tensorflow as tf
from tensorflow.contrib.seq2seq import sequence_loss

test_on_ptb = True
generate_fun = True
debug = True
"""Loads starter word-vectors and train/dev/test data."""
vocab = Vocab()
vocab.construct(get_ptb_dataset('train'))
encoded_train = np.array([vocab.encode(word) for word in get_ptb_dataset('train')],dtype=np.int32)
encoded_valid = np.array([vocab.encode(word) for word in get_ptb_dataset('valid')],dtype=np.int32)
encoded_test = np.array([vocab.encode(word) for word in get_ptb_dataset('test')],dtype=np.int32)
if debug:                                            
	num_debug = 1024                                   
	encoded_train = encoded_train[:num_debug]
	encoded_valid = encoded_valid[:num_debug]
	encoded_test  = encoded_test[:num_debug]  

print '****** LOADED DATA'

lr=0.01
batch_size = 1
embed_size = 50
hidden_size = 100