Exemplo n.º 1
0
    print("\nCompiling model\n")
    tgt = Input(shape=(max_sents, SENT_DIM), dtype='float32')
    srcs = Input(shape=(max_sents, SENT_DIM), dtype='float32')
    encode = Bidirectional(LSTM(SENT_DIM / 2,
                                return_sequences=False,
                                dropout_W=0.0,
                                dropout_U=0.0),
                           input_shape=(max_sents, SENT_DIM))
    tgt_vec = encode(tgt)
    src_vec = encode(srcs)
    # attention = _Attention(max_sents,SENT_DIM,dropout=0.2)(tgt,srcs)
    # align1= _SoftAlignment(max_sents,SENT_DIM)(srcs,attention)
    # align2= _SoftAlignment(max_sents,SENT_DIM)(tgt,attention,transpose=True)
    # vec_l = _Comparison(max_sents,SENT_DIM,dropout=0.2)(tgt,align1)
    # vec_r = _Comparison(max_sents,SENT_DIM,dropout=0.2)(srcs,align2)
    pds = _Entailment(SENT_DIM, NUM_CLASSES, dropout=0.4)(tgt_vec, src_vec)
    model = Model(inputs=[tgt, srcs], outputs=pds)
    model.summary()
    model.compile(loss="categorical_crossentropy",
                  optimizer=Adam(lr=0.001),
                  metrics=["accuracy"])

    #get_attention_matrix = K.function([model.layers[0].input,model.layers[1].input,K.learning_phase()],[model.layers[3].output])

    NUM_EPOCHS = 10
    BATCH_SIZE = 10
    MODEL_NAME = "dlnd_bilstm_mlp_best_model_fold_" + str(fold - 1) + ".h5"
    train_target_vecs, valid_target_vecs, train_source_vecs, valid_source_vecs, train_golds, valid_golds = train_test_split(
        target_vecs[train],
        source_vecs[train],
        gold[train],
def process_topic():
#     LOG.debug('Processing topic: %s', topic)
#     ids = [idx for idx in range(len(subtopics)) if subtopics[idx].split('/')[0] == topic]
#     tgt_ids = [all_tgt_ids[idx] for idx in ids]
#     src_ids = [all_src_ids[idx] for idx in ids]
#     golds = [all_golds[idx] for idx in ids]
    tgt_ids = all_tgt_ids
    src_ids = all_src_ids
    golds = all_golds

    max_sents= max([len(doc) for doc in src_ids + tgt_ids])
    LOG.debug("Max sentences: %d", max_sents)

    # target_vecs = [np.vstack([vocab[sent_idx] for sent_idx in doc]) for doc in tgt_ids]
    # source_vecs = [np.vstack([vocab[sent_idx] for sent_idx in doc]) for doc in src_ids]
    # target_vecs = np.array([pad_or_truncate1(mat,max_sents) for mat in target_vecs])
    # source_vecs = np.array([pad_or_truncate1(mat,max_sents) for mat in source_vecs])
    tgt_ids = np.array([pad_or_truncate1(doc, max_sents) for doc in tgt_ids])
    src_ids = np.array([pad_or_truncate1(doc, max_sents) for doc in src_ids])
    gold_list = [i for i in golds]
    golds = to_categorical(golds)

    train, test = train_test_split(
                    list(range(len(gold_list))),
                    train_size=0.8,
                    random_state=9274,
                    shuffle=True,
                    stratify=gold_list)
    LOG.debug("Compiling model")
    emb = Embedding(
            vocab.shape[0],
            vocab.shape[1],
            weights=[vocab],
            input_length=max_sents,
            trainable=False)
    # tgt = Input(shape=(max_sents,SENT_DIM), dtype='float32')
    # srcs = Input(shape=(max_sents,SENT_DIM), dtype='float32')
    tgt = Input(shape=(max_sents,), dtype='int32')
    with tf.device('/device:CPU:0'):
        tgt_emb = emb(tgt)

    srcs = Input(shape=(max_sents,), dtype='int32')
    with tf.device('/device:CPU:0'):
        srcs_emb = emb(srcs)

    encode = Bidirectional(LSTM(SENT_DIM/2, return_sequences=False,
                                     dropout_W=0.0, dropout_U=0.0),
                                     input_shape=(max_sents, SENT_DIM))
    # tgt_vec = encode(tgt)
    # src_vec = encode(srcs)
    tgt_vec = encode(tgt_emb)
    src_vec = encode(srcs_emb)
    pds = _Entailment(SENT_DIM,NUM_CLASSES,dropout=0.2)(tgt_vec,src_vec)
    model = Model(input=[tgt,srcs],output=pds)
    model.summary()
    model.compile(loss="categorical_crossentropy",optimizer=Adam(lr=0.001),metrics=["accuracy"])

    NUM_EPOCHS = 25 
    BATCH_SIZE = 32 

    LOG.debug("Training model")
    model.fit(x=[tgt_ids[train],src_ids[train]],y=golds[train],batch_size=BATCH_SIZE,nb_epoch=NUM_EPOCHS,shuffle=True,verbose=2)

    preds = model.predict([tgt_ids[test],src_ids[test]])
    preds = np.argmax(preds,axis=1)
    gold_test = np.argmax(golds[test],axis=1)
    test_acc = accuracy_score(gold_test, preds)
    LOG.debug("Testing accuracy: %0.3f", test_acc)
    LOG.debug("Confusion matrix:\n%s\n\n", confusion_matrix(gold_test,preds))

    # Write completed topic
    # with open(completed_topics_file, 'a') as fh:
    #     fh.write('\n{}'.format(topic))

    # Cleanup memory
    del model
    gc.collect()
    K.clear_session()
    tf.reset_default_graph()
Exemplo n.º 3
0
    if fold <= folds_complete:
        fold += 1
        continue
    print("\nFold: " + str(fold))
    fold += 1
    print("\nCompiling model\n")
    tgt = Input(shape=(max_sents, SENT_DIM), dtype='float32')
    srcs = Input(shape=(max_sents, SENT_DIM), dtype='float32')
    attention = _Attention(max_sents, SENT_DIM, dropout=0.2)(tgt, srcs)
    align1 = _SoftAlignment(max_sents, SENT_DIM)(srcs, attention)
    align2 = _SoftAlignment(max_sents, SENT_DIM)(tgt,
                                                 attention,
                                                 transpose=True)
    vec_l = _Comparison(max_sents, SENT_DIM, dropout=0.2)(tgt, align1)
    vec_r = _Comparison(max_sents, SENT_DIM, dropout=0.2)(srcs, align2)
    pds = _Entailment(SENT_DIM, NUM_CLASSES, dropout=0.2)(vec_l, vec_r)
    model = Model(inputs=[tgt, srcs], outputs=pds)
    model.summary()
    model.compile(loss="categorical_crossentropy",
                  optimizer=Adam(lr=0.001),
                  metrics=["accuracy"])

    cb = [
        ModelCheckpoint("temp2_model.hdf5",
                        monitor="val_loss",
                        verbose=1,
                        save_weights_only=True,
                        save_best_only=True)
    ]

    get_attention_matrix = K.function(