Пример #1
0
 def __init__(self, train=False):
     # load data from pickle and npy files
     self.metadata, idx_q, idx_a = data.load_data(PATH='datasets/twitter/')
     (trainX, trainY), (testX, testY), (validX,
                                        validY) = data_utils.split_dataset(
                                            idx_q, idx_a)  # parameters
     xseq_len = trainX.shape[-1]
     yseq_len = trainY.shape[-1]
     batch_size = 16
     xvocab_size = len(self.metadata['idx2w'])
     yvocab_size = xvocab_size
     emb_dim = 1024
     importlib.reload(seq2seq_wrapper)
     self.model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                          yseq_len=yseq_len,
                                          xvocab_size=xvocab_size,
                                          yvocab_size=yvocab_size,
                                          ckpt_path='ckpt/twitter/',
                                          emb_dim=emb_dim,
                                          num_layers=3)
     if train:
         val_batch_gen = data_utils.rand_batch_gen(validX, validY, 32)
         train_batch_gen = data_utils.rand_batch_gen(
             trainX, trainY, batch_size)
         sess = self.model.train(train_batch_gen, val_batch_gen)
     self.sess = self.model.restore_last_session()
Пример #2
0
    def __init__(
        self,
        path="/home/james/PycharmProjects/flaskChatbot/app/seq2seq_backend/seq2seq_model.ckpt-44000"
    ):
        self.metadata, self.idx_q, self.idx_a = data.load_data(
            PATH=
            '/home/james/PycharmProjects/flaskChatbot/app/seq2seq_backend/datasets/cornell_corpus/'
        )
        self.path = path
        (trainX, trainY), (testX, testY), (validX,
                                           validY) = data_utils.split_dataset(
                                               self.idx_q, self.idx_a)

        # parameters
        xseq_len = trainX.shape[-1]
        yseq_len = trainY.shape[-1]
        batch_size = 32
        xvocab_size = len(self.metadata['idx2w'])
        yvocab_size = xvocab_size
        emb_dim = 1024

        import seq2seq_wrapper

        # In[7]:

        self.model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                             yseq_len=yseq_len,
                                             xvocab_size=xvocab_size,
                                             yvocab_size=yvocab_size,
                                             ckpt_path='ckpt/cornell_corpus/',
                                             emb_dim=emb_dim,
                                             num_layers=3)
        self.sess = tf.Session()
        saver = tf.train.Saver()
        saver.restore(self.sess, self.path)
Пример #3
0
def getModel():
    # load data from pickle and npy files
    metadata, idx_q, idx_a = data.load_data(PATH='datasets/cornell_corpus/')
    (trainX, trainY), (testX, testY), (validX, validY) = data_utils.split_dataset(idx_q, idx_a)
    train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, 32)

    #print len(trainX)
    test_batch_gen = data_utils.rand_batch_gen(testX, testY, 256)
    input_ = test_batch_gen.next()[0]
    xseq_len = 25
    yseq_len = 25
    batch_size = 16
    xvocab_size = len(metadata['idx2w'])
    yvocab_size = xvocab_size
    emb_dim = 1024
    model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                               yseq_len=yseq_len,
                               xvocab_size=xvocab_size,
                               yvocab_size=yvocab_size,
                               ckpt_path='ckpt/cornell_corpus/',
                               emb_dim=emb_dim,
                               num_layers=3
                               )
    sess = model.restore_last_session()
    output = model.predict(sess, input_[0:25])
    #print(output)
    return model, sess
def chatbot(txt):
    #chatbot code here 
    # Importing the dataset
    metadata, idx_q, idx_a = data_preprocessing.load_data(PATH = './')
    # Splitting the dataset into the Training set and the Test set
    (trainX, trainY), (testX, testY), (validX, validY) = data_utils_1.split_dataset(idx_q, idx_a)
    # Embedding
    xseq_len = trainX.shape[-1]
    yseq_len = trainY.shape[-1]
    batch_size = 16
    vocab_twit = metadata['idx2w']
    xvocab_size = len(metadata['idx2w'])  
    yvocab_size = xvocab_size
    emb_dim = 1024
    idx2w, w2idx, limit = data_utils_2.get_metadata()
    # Building the seq2seq model
    model = seq2seq_wrapper.Seq2Seq(xseq_len = xseq_len,
                                yseq_len = yseq_len,
                                xvocab_size = xvocab_size,
                                yvocab_size = yvocab_size,
                                ckpt_path = './weights',
                                emb_dim = emb_dim,
                                num_layers = 3)
    # Loading the weights and Running the session
    session = model.restore_last_session()
    # Getting the ChatBot predicted answer
    def respond(question):
        encoded_question = data_utils_2.encode(question, w2idx, limit['maxq'])
        answer = model.predict(session, encoded_question)[0]
        return data_utils_2.decode(answer, idx2w) 
    # Setting up the chat 
    #while True :
        ''''
        engine = pyttsx3.init()
        engine.runAndWait()
        try:
            r = sr.Recognizer()
            mic = sr.Microphone()
            with mic as source:
                r.adjust_for_ambient_noise(source)
                audio = r.listen(source)
                print('You :')
                x=r.recognize_google(audio)
                print(x)
        except:
            continue
        question=x.lower()'''
    question=txt
        #question = input("You: ")
        #if question=='good bye':
        #print('Ok Bye')
        #break
        #answer = respond(question)
    return respond(question)
Пример #5
0
def build_model():
    # Building the seq2seq model
    xseq_len, yseq_len, xvocab_size, yvocab_size, emb_dim = load_data()
    model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                    yseq_len=yseq_len,
                                    xvocab_size=xvocab_size,
                                    yvocab_size=yvocab_size,
                                    ckpt_path='./weights',
                                    emb_dim=emb_dim,
                                    num_layers=3)
    return model
Пример #6
0
def get_model():
    importlib.reload(d_data)
    importlib.reload(IE_data)

    d_metadata, d_idx_q, d_idx_a = d_data.load_data(PATH='../datasets/danny/')
    i_metadata, i_idx_q, i_idx_a = IE_data.load_data(PATH='../datasets/IE/')

    (d_trainX, d_trainY), (d_testX, d_testY), (d_validX, d_validY) = data_utils.split_dataset(d_idx_q, d_idx_a)
    (i_trainX, i_trainY), (i_testX, i_testY), (i_validX, i_validY) = data_utils.split_dataset(i_idx_q, i_idx_a)

    d_model = seq2seq_wrapper.Seq2Seq(
        xseq_len=d_trainX.shape[-1],
        yseq_len=d_trainY.shape[-1],
        xvocab_size=len(d_metadata['idx2w']),
        yvocab_size=len(d_metadata['idx2w']),
        ckpt_path='../ckpt/danny/',
        loss_path='',
        metadata=d_metadata,
        emb_dim=1024,
        num_layers=3
    )

    i_model = seq2seq_wrapper.Seq2Seq(
        xseq_len=i_trainX.shape[-1],
        yseq_len=i_trainY.shape[-1],
        xvocab_size=len(i_metadata['idx2w']),
        yvocab_size=len(i_metadata['idx2w']),
        ckpt_path='../ckpt/IE/',
        loss_path='',
        metadata=i_metadata,
        emb_dim=1024,
        num_layers=3
    )

    d_sess = d_model.restore_last_session()
    i_sess = i_model.restore_last_session()

    return d_model, i_model, d_sess, i_sess, d_metadata, i_metadata
Пример #7
0
    def __init__(self, params):
        self.params = params
        self.lm_a = lm_wrapper.LM(params.lm_a)
        self.lm_b = lm_wrapper.LM(params.lm_b)

        self.seq2seq_ab = seq2seq_wrapper.Seq2Seq(
            xseq_len=params.seq2seq.max_len_A,
            yseq_len=params.seq2seq.max_len_B,
            xvocab_size=params.seq2seq.vocab_size_A,
            yvocab_size=params.seq2seq.vocab_size_B,
            ckpt_path=params.seq2seq.ckpt_path_AB,
            emb_dim=params.seq2seq.emb_dim,
            num_layers=params.seq2seq.num_layers,
            model_name='seq2seq_ab')
        self.seq2seq_ba = seq2seq_wrapper.Seq2Seq(
            xseq_len=params.seq2seq.max_len_B,
            yseq_len=params.seq2seq.max_len_A,
            xvocab_size=params.seq2seq.vocab_size_B,
            yvocab_size=params.seq2seq.vocab_size_A,
            ckpt_path=params.seq2seq.ckpt_path_BA,
            emb_dim=params.seq2seq.emb_dim,
            num_layers=params.seq2seq.num_layers,
            model_name='seq2seq_ba')
Пример #8
0
xvocab_size = metadata["xvocab_size"]
yvocab_size = metadata["yvocab_size"]
emb_dim = metadata["emb_dim"]
use_lstm = metadata["use_lstm"]
num_layers = metadata["num_layers"]

print("Initialzing model with:")
print("xseq_len=%s, yseq_len=%s" % (xseq_len, yseq_len))
print("xvocab_size=%s, yvocab_size=%s" % (xvocab_size, yvocab_size))
print("emb_dim=%s" % emb_dim)
print("num_layers=%s" % num_layers)

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                emb_dim=emb_dim,
                                num_layers=num_layers,
                                use_lstm=use_lstm)

vocab = data.load_vocab(FLAGS.dataset_name)

print("loaded vocabulary")
assert len(vocab["word2id"].keys()) == xvocab_size

print(len(vocab["word2id"].keys()))

saver_path = logdir
checkpoint_file = tf.train.get_checkpoint_state(saver_path)
ckpt_path = checkpoint_file.model_checkpoint_path
# parameters
xseq_len = trainX.shape[-1]
yseq_len = trainY.shape[-1]
batch_size = 16
xvocab_size = len(metadata['idx2w'])
yvocab_size = xvocab_size
emb_dim = 1024

import seq2seq_wrapper

# In[7]:

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                ckpt_path='./ckpt/cornell_corpus/',
                                emb_dim=emb_dim,
                                num_layers=3,
                                epochs=30000)

# In[8]:

val_batch_gen = data_utils.rand_batch_gen(validX, validY, 32)
train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size)

# In[9]:
#sess = model.restore_last_session()
sess = model.train(train_batch_gen, val_batch_gen)
Пример #10
0
(small_validX, small_validY) = (validX[:100], validA[:100])

# In[17]:

import seq2seq_wrapper

# In[23]:

importlib.reload(seq2seq_wrapper)

# In[47]:

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                ckpt_path='ckpt/',
                                emb_dim=emb_dim,
                                num_layers=1,
                                epochs=500)

# In[21]:

val_batch_gen = data_utils.rand_batch_gen(small_validX, small_validY, 100)
test_batch_gen = data_utils.rand_batch_gen(small_testX, small_testY, 100)
train_batch_gen = data_utils.rand_batch_gen(trainX_filter_10, trainA_filter_10,
                                            batch_size)
train_batch_gen_story = data_utils.rand_batch_gen(trainX_filter_10,
                                                  trainA_filter_10, 1)

# In[ ]:
sess = model.restore_last_session()
Пример #11
0
xseq_len = trainX.shape[-1]
yseq_len = trainY.shape[-1]
batch_size = 32
xvocab_size = len(metadata['idx2w'])
yvocab_size = xvocab_size
emb_dim = 1024

import seq2seq_wrapper

# In[7]:

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                ckpt_path='ckpt/danny/',
                                loss_path='ckpt/danny/preset/',
                                metadata=metadata,
                                emb_dim=emb_dim,
                                num_layers=3,
                                epochs=10001)

# In[8]:

val_batch_gen = data_utils.rand_batch_gen(validX, validY, 16)
train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size)

# In[9]:
#sess = model.restore_last_session()
sess = model.train(train_batch_gen, val_batch_gen)
Пример #12
0
(x_train, x_test), (y_train, y_test), (x_valid,
                                       y_valid) = data_utils.split_data(
                                           idx_descriptions, idx_headings)

#define parameters
xseq_length = x_train.shape[-1]
yseq_length = y_train.shape[-1]
batch_size = config.batch_size
xvocab_size = len(article_metadata['idx2word'])
yvocab_size = xvocab_size
checkpoint_path = path.join(config.path_outputs, 'checkpoint')

print(checkpoint_path)

#define model
model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_length,
                                yseq_len=yseq_length,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                emb_dim=config.embedding_dim,
                                num_layers=3,
                                ckpt_path=checkpoint_path)

val_batch_gen = data_utils.generate_random_batch(x_valid, y_valid,
                                                 config.batch_size)
train_batch_gen = data_utils.generate_random_batch(x_train, y_train,
                                                   config.batch_size)

sess = model.restore_last_session()
sess = model.train(train_batch_gen, val_batch_gen)
Пример #13
0
print("xseq_len=%s, yseq_len=%s" % (xseq_len, yseq_len))
print("xvocab_size=%s, yvocab_size=%s" % (xvocab_size, yvocab_size))
print("emb_dim=%s" % emb_dim)

print("Training with:")
print("training set size=%s" % train_q.shape[0])
print("test set size=%s" % test_q.shape[0])

logdir = "./logs/"
logdir += str(int(time.time())) if not FLAGS.logdir else FLAGS.logdir
print("Checkpoint saved at %s" % logdir)

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                emb_dim=emb_dim,
                                num_layers=FLAGS.num_layers,
                                learning_rate=FLAGS.learning_rate,
                                use_lstm=FLAGS.use_lstm)

eval_batch_gen = data_utils.rand_batch_gen(test_q, test_a, batch_size)
train_batch_gen = data_utils.rand_batch_gen(train_q, train_a, batch_size)

# create session for training
gpu_options = tf.GPUOptions(
    per_process_gpu_memory_fraction=FLAGS.memory_usage_percentage / 100)
session_conf = tf.ConfigProto(allow_soft_placement=True,
                              gpu_options=gpu_options)
sess = tf.Session(config=session_conf)
# init all variables
Пример #14
0
testX = np.load('test_q.npy')
testY = np.load('test_a.npy')

with open('metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

xseq_len = testX.shape[-1]
yseq_len = testY.shape[-1]
xvocab_size = len(metadata['idx2w'])
yvocab_size = xvocab_size

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                ckpt_path='ckpt/',
                                emb_dim=emb_dim,
                                num_layers=3,
                                mode=seq2seq_wrapper.ATTENTION_MODE)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

checkpoint_file = tf.train.latest_checkpoint(read_ckpt_dir)
saver.restore(sess, checkpoint_file)

start = 0
end = batch_size
while True:
    x = testX[start:end]
    y = testX[start:end]
Пример #15
0
yvocab_size = xvocab_size
zvocab_size = xvocab_size
emb_dim = 1536

>>>>>>> 0d81aa006b893195d432ed2dfbbf7f930ec62226
# In[]
import seq2seq_wrapper

# In[23]:

importlib.reload(seq2seq_wrapper)


# In[47]:

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                               yseq_len=yseq_len,
<<<<<<< HEAD
                               xvocab_size=xvocab_size,
                               yvocab_size=yvocab_size,
=======
                               zseq_len=zseq_len,
                               xvocab_size=xvocab_size,
                               yvocab_size=yvocab_size,
                               zvocab_size=zvocab_size,
>>>>>>> 0d81aa006b893195d432ed2dfbbf7f930ec62226
                               ckpt_path='ckpt/',
                               emb_dim=emb_dim,
                               num_layers=1,
                                epochs=5
                               )
Пример #16
0
# In[4]:

import seq2seq_wrapper

# In[ ]:

import importlib
importlib.reload(seq2seq_wrapper)

# In[5]:

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                ckpt_path='ckpt/twitter/',
                                emb_dim=emb_dim,
                                num_layers=3)

# In[6]:

val_batch_gen = data_utils.rand_batch_gen(validX, validY, 256)
test_batch_gen = data_utils.rand_batch_gen(testX, testY, 256)
train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size)

# In[9]:

sess = model.train(train_batch_gen, val_batch_gen)

# In[7]:
Пример #17
0
                            validY) = data_utils.split_dataset(idx_q, idx_a)

# parameters
xseq_len = trainX.shape[-1]
yseq_len = trainY.shape[-1]
xvocab_size = len(metadata['idx2w'])
yvocab_size = xvocab_size
emb_dim = 1024

import seq2seq_wrapper

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                                yseq_len=yseq_len,
                                xvocab_size=xvocab_size,
                                yvocab_size=yvocab_size,
                                ckpt_path='ckpt/IE/',
                                loss_path='',
                                metadata=metadata,
                                emb_dim=emb_dim,
                                num_layers=3)

sess = model.restore_last_session()
print('\nReady to test!\n')
input_txt = input()

while not input_txt == '[End]':
    question = data.split_sentence(input_txt, metadata)
    input_ = question.T
    output_ = model.predict(sess, input_)
    answer = data_utils.decode(sequence=output_[0],
                               lookup=metadata['idx2w'],
Пример #18
0
xvocab_size = len(metadata['idx2w'])  
yvocab_size = xvocab_size
emb_dim = 1024
idx2w, w2idx, limit = data_utils_2.get_metadata()



########## PART 2 - BUILDING THE SEQ2SEQ MODEL ##########



# Building the seq2seq model
model = seq2seq_wrapper.Seq2Seq(xseq_len = xseq_len,
                                yseq_len = yseq_len,
                                xvocab_size = xvocab_size,
                                yvocab_size = yvocab_size,
                                ckpt_path = './weights',
                                emb_dim = emb_dim,
                                num_layers = 3)



########## PART 3 - TRAINING THE SEQ2SEQ MODEL ##########



# See the Training in seq2seq_wrapper.py



########## PART 4 - TESTING THE SEQ2SEQ MODEL ##########
# parameters
xseq_len = trainX.shape[-1]
yseq_len = trainY.shape[-1]
batch_size = 128
xvocab_size = len(metadata['idx2w'])
yvocab_size = xvocab_size
emb_dim = 1024

import seq2seq_wrapper

# In[7]:

model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                               yseq_len=yseq_len,
                               xvocab_size=xvocab_size,
                               yvocab_size=yvocab_size,
                               ckpt_path='ckpt/opensubtitle/',
                               emb_dim=emb_dim,
                               num_layers=4
                               )


val_batch_gen = data_utils.rand_batch_gen(validX, validY, 32)
train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size)

if FLAGS.restore:
    sess = model.restore_last_session()

sess = model.train(train_batch_gen, val_batch_gen)