示例#1
0
word2idx, word_embeddings = data_helper.load_embedding2(embedding_path, True)


# word to id
train_sources, train_sources_length = utils.word2id(train_sources, word2idx, seq_length)
train_targets, train_targets_length = utils.word2id(train_targets, word2idx, seq_length)


dev_sources, dev_sources_length = utils.word2id(dev_sources, word2idx, seq_length)
dev_targets, dev_targets_length = utils.word2id(dev_targets, word2idx, seq_length)

test_sources, test_sources_length = utils.word2id(test_sources, word2idx, seq_length)
test_targets, test_targets_length = utils.word2id(test_targets, word2idx, seq_length)

train_score_probs = utils.build_porbs(train_scores, class_num)
dev_score_probs = utils.build_porbs(dev_scores, class_num)
test_score_probs = utils.build_porbs(test_scores, class_num)

def kl_distance(y_true, y_pred):
    y_true = kb.clip(y_true, 1e-10, 1.)
    y_pred = kb.clip(y_pred, 1e-10, 1.)
    avg_distance = (kb.sum(y_true * kb.log(y_true / y_pred), axis=1) +
                    kb.sum(y_pred * kb.log(y_pred / y_true), axis=1)) / 2.0
    return kb.mean(avg_distance)

def pearson(y_true, y_pred):
    scores = kb.reshape(K.arange(0, 6, dtype='float32'), [class_num, 1])
    y_true = kb.reshape(kb.dot(y_true, scores), [-1])
    y_pred = kb.reshape(kb.dot(y_pred, scores), [-1])
drop_out_rate = 0.5
regularizer_rate = 0.004

print("loading data...")

graph_sources, graph_targets, graph_scores = data_helper.load_cross_lang_sentence_data(
    graph_path, False)

word2idx, word_embeddings = data_helper.load_embedding(embedding_path, True)

graph_sources, graph_sources_length = utils.word2id(graph_sources, word2idx,
                                                    seq_length)
graph_targets, graph_targets_length = utils.word2id(graph_targets, word2idx,
                                                    seq_length)

graph_score_probs = utils.build_porbs(graph_scores, class_num)


def kl_distance(y_true, y_pred):
    y_true = kb.clip(y_true, 1e-10, 1.)
    y_pred = kb.clip(y_pred, 1e-10, 1.)
    avg_distance = (kb.sum(y_true * kb.log(y_true / y_pred), axis=1) +
                    kb.sum(y_pred * kb.log(y_pred / y_true), axis=1)) / 2.0
    return kb.mean(avg_distance)


def pearson(y_true, y_pred):
    scores = kb.reshape(K.arange(0, 6, dtype='float32'), [class_num, 1])
    y_true = kb.reshape(kb.dot(y_true, scores), [-1])
    y_pred = kb.reshape(kb.dot(y_pred, scores), [-1])
train_sources, train_sources_length = utils.word2id(train_sources, word2idx,
                                                    FLAGS.seq_length)
train_targets, train_targets_length = utils.word2id(train_targets, word2idx,
                                                    FLAGS.seq_length)

dev_sources, dev_sources_length = utils.word2id(dev_sources, word2idx,
                                                FLAGS.seq_length)
dev_targets, dev_targets_length = utils.word2id(dev_targets, word2idx,
                                                FLAGS.seq_length)

test_sources, test_sources_length = utils.word2id(test_sources, word2idx,
                                                  FLAGS.seq_length)
test_targets, test_targets_length = utils.word2id(test_targets, word2idx,
                                                  FLAGS.seq_length)

train_scores_prob = utils.build_porbs(train_scores, FLAGS.class_num)
dev_scores_prob = utils.build_porbs(dev_scores, FLAGS.class_num)
test_scores_prob = utils.build_porbs(test_scores, FLAGS.class_num)

# train_scores = utils.normalize_probs(train_scores)
# dev_scores = utils.normalize_probs(dev_scores)
# test_scores = utils.normalize_probs(test_scores)

time_stamp = str(int(time.time()))

# Training
# ==================================================

with tf.Graph().as_default():
    session = tf.Session()
    with session.as_default():
示例#4
0
    FLAGS.test_path)

# train_source_features, train_target_features = utils.get_all_handcraft_features(train_sources, train_targets, FLAGS.seq_length)
# dev_source_features, dev_target_features = utils.get_all_handcraft_features(dev_sources, dev_targets, FLAGS.seq_length)
# test_source_features, test_target_features = utils.get_all_handcraft_features(test_sources, test_targets, FLAGS.seq_length)

word2idx, word_embeddings = data_helper.load_embedding(FLAGS.embedding_path,
                                                       True)
train_sources = utils.word2id(train_sources, word2idx, FLAGS.seq_length)
train_targets = utils.word2id(train_targets, word2idx, FLAGS.seq_length)
dev_sources = utils.word2id(dev_sources, word2idx, FLAGS.seq_length)
dev_targets = utils.word2id(dev_targets, word2idx, FLAGS.seq_length)
test_sources = utils.word2id(test_sources, word2idx, FLAGS.seq_length)
test_targets = utils.word2id(test_targets, word2idx, FLAGS.seq_length)

dev_score_probs = utils.build_porbs(dev_scores, FLAGS.class_num)
test_score_probs = utils.build_porbs(test_scores, FLAGS.class_num)

print("Train/Dev split: {:d}/{:d}".format(len(train_scores), len(dev_scores)))

time_stamp = str(int(time.time()))
# Training
# ==================================================

with tf.Graph().as_default():
    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 占用GPU70%的显存
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
    with session.as_default():
        # Define training procedure