Exemplo n.º 1
0
Arquivo: nn.py Projeto: joydeb28/NLP
def predict(sentence, model):
    sen_list = [[[i, 'O\n'] for i in sentence.split()]]
    #sen_list = [[['SOCCER', 'O\n'], ['-', 'O\n'], ['JAPAN', 'O\n'], ['GET', 'O\n'], ['LUCKY', 'O\n'], ['WIN', 'O\n'], [',', 'O\n'], ['CHINA', 'O\n'], ['IN', 'O\n'], ['SURPRISE', 'O\n'], ['DEFEAT', 'O\n'], ['.', 'O\n']]]
    test = addCharInformatioin(sen_list)

    predLabels = []

    test_set = padding(
        createMatrices(test, word2Idx, label2Idx, case2Idx, char2Idx))

    test_batch, test_batch_len = createBatches(test_set)

    for i, data in enumerate(test_batch):
        tokens, casing, char, labels = data

        tokens = np.asarray([tokens])
        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = model.predict([tokens, casing, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  #Predict the classes
        predLabels.append(pred)
    entity_labels = []
    j = 0
    words_list = sentence.split()
    for i in predLabels[-1]:
        entity_labels.append((words_list[j], idx2Label[int(i)]))
        j += 1
    print("predLabels", entity_labels)

    return entity_labels
Exemplo n.º 2
0
Arquivo: helper.py Projeto: avinik/Al
def test(model, test_set, idx2Label, package):
    test_batch, test_batch_len = createBatches(test_set)
    predLabels, correctLabels = tag_dataset(test_batch, model, package)
    pre_test, rec_test, f1_test = compute_f1(predLabels, correctLabels,
                                             idx2Label)
    print("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" %
          (pre_test, rec_test, f1_test))
    return pre_test, rec_test, f1_test
Exemplo n.º 3
0
Arquivo: helper.py Projeto: avinik/Al
def train(model, train_set, epochs, package):
    train_batch, train_batch_len = createBatches(train_set)

    for epoch in range(epochs):
        print("Epoch %d/%d" % (epoch, epochs))
        a = Progbar(len(train_batch_len))
        for i, batch in enumerate(
                iterate_minibatches(train_batch, train_batch_len)):
            labels, tokens, casing, char = batch
            if package.modelName == "LSTM_word":
                with tf.device('/gpu:0'):
                    model.fit([tokens], labels, verbose=0)
            elif package.modelName == "LSTM_word_char":
                with tf.device('/gpu:0'):
                    model.fit([tokens, casing, char], labels, verbose=0)
            a.update(i)
        print(' ')
    return model
Exemplo n.º 4
0
char2Idx = {"PADDING": 0, "UNKNOWN": 1}
for c in " 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.,-_()[]{}!?:;#'\"/\\%$`&=*+@^~|":
    char2Idx[c] = len(char2Idx)

train_set = padding(
    createMatrices(trainSentences, word2Idx, label2Idx, case2Idx, char2Idx))
dev_set = padding(
    createMatrices(devSentences, word2Idx, label2Idx, case2Idx, char2Idx))
test_set = padding(
    createMatrices(testSentences, word2Idx, label2Idx, case2Idx, char2Idx))

idx2Label = {v: k for k, v in label2Idx.items()}
np.save("models/idx2Label.npy", idx2Label)
np.save("models/word2Idx.npy", word2Idx)

train_batch, train_batch_len = createBatches(train_set)
dev_batch, dev_batch_len = createBatches(dev_set)
test_batch, test_batch_len = createBatches(test_set)

words_input = Input(shape=(None, ), dtype='int32', name='words_input')
words = Embedding(input_dim=wordEmbeddings.shape[0],
                  output_dim=wordEmbeddings.shape[1],
                  weights=[wordEmbeddings],
                  trainable=False)(words_input)
casing_input = Input(shape=(None, ), dtype='int32', name='casing_input')
casing = Embedding(output_dim=caseEmbeddings.shape[1],
                   input_dim=caseEmbeddings.shape[0],
                   weights=[caseEmbeddings],
                   trainable=False)(casing_input)
character_input = Input(shape=(
    None,
Exemplo n.º 5
0
case2Idx = {
    'numeric': 0,
    'allLower': 1,
    'allUpper': 2,
    'initialUpper': 3,
    'other': 4,
    'mainly_numeric': 5,
    'contains_digit': 6,
    'PADDING_TOKEN': 7
}
caseEmbeddings = np.identity(len(case2Idx), dtype='float32')

train_set = padding(
    createMatrices(trainSentences, word2Idx, label2Idx, case2Idx, char2Idx))

train_batch, train_batch_len = createBatches(train_set)

words_input = Input(shape=(None, ), dtype='int32', name='words_input')
words = Embedding(input_dim=wordEmbeddings.shape[0],
                  output_dim=wordEmbeddings.shape[1],
                  weights=[wordEmbeddings],
                  trainable=False)(words_input)
casing_input = Input(shape=(None, ), dtype='int32', name='casing_input')
casing = Embedding(output_dim=caseEmbeddings.shape[1],
                   input_dim=caseEmbeddings.shape[0],
                   weights=[caseEmbeddings],
                   trainable=False)(casing_input)
character_input = Input(shape=(
    None,
    52,
), name='char_input')
Exemplo n.º 6
0
 def createBatches(self):
     """Create batches"""
     self.train_batch, self.train_batch_len = createBatches(self.train_set)
     self.dev_batch, self.dev_batch_len = createBatches(self.dev_set)
     self.test_batch, self.test_batch_len = createBatches(self.test_set)
Exemplo n.º 7
0
Arquivo: nn.py Projeto: avinik/Al
wordEmbeddings = np.array(wordEmbeddings)

char2Idx = {"PADDING":0, "UNKNOWN":1}
s = " 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.,-_()[]{}!?:;#'\"/\\%$`&=*+@^~|<>"
s = s + '\t' + '\n' + '\x97' + '\x92'+'\x93' + '\x94' + '\xc2'
for c in s:
    char2Idx[c] = len(char2Idx)


train_set = padding(createMatrices(trainSentences,word2Idx,  label2Idx, case2Idx, char2Idx))
learn_set = padding(createMatrices(learnSentences,word2Idx, label2Idx, case2Idx,char2Idx))
test_set = padding(createMatrices(testSentences, word2Idx, label2Idx, case2Idx,char2Idx))

idx2Label = {v: k for k, v in label2Idx.items()}

train_batch,train_batch_len = createBatches(train_set)
learn_batch,learn_batch_len = createBatches(learn_set)
test_batch,test_batch_len = createBatches(test_set)


#modelPackage conatains Deatiled information 
modelPackage = ModelPackage(wordEmbeddings, caseEmbeddings, word2Idx, label2Idx, char2Idx, modelName, datasetName)

print(modelPackage.modelName)

model = createModel(modelPackage)
modelPackage.model = model
# plot_model(model, to_file='model.png')

precision = 0
recoil = 0