Пример #1
0
def generate_sonnet_rhyme(num_states):
    hmm = train.load(num_states, is_reversed=True)
    text = open('data/shakespeare.txt').read()

    obs, vocab, inv_vocab = preprocess.get_observations(text)
    rhyme_dict = preprocess.build_rhyme_dict(text, vocab)
    lengths = preprocess.get_lengths(obs)
    punctuation = preprocess.get_punctuation(text)

    samples = [None] * 14
    for quatrain in range(3):
        for line in range(2):
            couplet = random.sample(random.choice(rhyme_dict), 2)
            length = np.random.choice(list(lengths.keys()),
                                      p=list(lengths.values()))

            samples[quatrain * 4 + line] = generate_emission_seeded(
                hmm, length, couplet[0])
            samples[quatrain * 4 + line + 2] = generate_emission_seeded(
                hmm, length, couplet[1])

    couplet = random.sample(random.choice(rhyme_dict), 2)
    samples[12] = generate_emission_seeded(hmm, length, couplet[0])
    samples[13] = generate_emission_seeded(hmm, length, couplet[1])

    for i in range(len(samples)):
        samples[i].reverse()

    return format_sonnet(samples, inv_vocab, punctuation)
Пример #2
0
def predict_BERT(para, feature="radical", use_bert=True):
    para['tag_num'] = len(tags)
    para['rad_vocab_size'] = len(rad2id.keys()) + 1
    para['radical_vocab_size'] = len(radical2id.keys()) + 1
    para['pinyin_vocab_size'] = len(pinyin2id.keys()) + 1
    para["word_num"] = len(word2id.keys()) + 1
    para["fea_embed"] = None

    bool_x_test = creat_bool_x(test_x)

    if "img" in feature:
        para["img_embed_weight"] = img_embed
    para["is_trainable"] = False
    model = ModelLib.FGN(para, feature=feature, use_bert=use_bert)

    # else:
    #     model = ModelLib.NORMAL_MODEL(para,feature=feature)
    model.load_weights(filepath=para["model_path"])

    if feature == "":
        pred_y = model.predict([x1_test, x2_test], batch_size=64, verbose=1)
    elif feature == "radical":
        pred_y = model.predict([x1_test, x2_test, radical_train],
                               batch_size=64,
                               verbose=1)
    elif feature == "pinyin":
        pred_y = model.predict([x1_test, x2_test, pinyin_train],
                               batch_size=64,
                               verbose=1)
    elif feature == "img":
        pred_y = model.predict([x1_test, x2_test, test_x, bool_x_test],
                               batch_size=64,
                               verbose=1)
    elif feature == "img&radical":
        pred_y = model.predict(
            [x1_test, x2_test, test_x, bool_x_test, radical_test],
            batch_size=64,
            verbose=1)
    lengths = get_lengths(x1_test)
    tag_pred_y = []
    tag_val_y = []
    for i, y in enumerate(pred_y):
        y = [numpy.argmax(dim) for dim in y]
        print(lengths[i])
        p_y = y[:lengths[i]]
        print(p_y)
        v_y = y_test[i][:lengths[i]].flatten()
        print(v_y)
        p_y = [tags[dim] for dim in p_y]
        v_y = [tags[dim] for dim in v_y]
        tag_pred_y.append(p_y)
        tag_val_y.append(v_y)
    return tag_pred_y, tag_val_y
Пример #3
0
def generate_sonnet(num_states):
    hmm = train.load(num_states)
    text = open('data/shakespeare.txt').read()

    obs, vocab, inv_vocab = preprocess.get_observations(text)
    lengths = preprocess.get_lengths(obs)
    punctuation = preprocess.get_punctuation(text)

    samples = []
    for _ in range(14):
        length = np.random.choice(list(lengths.keys()),
                                  p=list(lengths.values()))
        samples.append(generate_emission(hmm, length))

    return format_sonnet(samples, inv_vocab, punctuation)
Пример #4
0
def result_proess(pred_y, x1_test, tags):
    lengths = get_lengths(x1_test)
    tag_pred_y = []
    tag_val_y = []
    for i, y in enumerate(pred_y):
        y = [numpy.argmax(dim) for dim in y]
        # print(lengths[i])
        p_y = y[:lengths[i]]
        # print(p_y)
        v_y = y_test[i][:lengths[i]].flatten()
        # print(v_y)
        p_y = [tags[dim] for dim in p_y]
        v_y = [tags[dim] for dim in v_y]
        tag_pred_y.append(p_y)
        tag_val_y.append(v_y)
    return tag_pred_y, tag_val_y
Пример #5
0
def predict_bert(para):
    para['tag_num'] = len(tags)
    model = ModelLib.BERT_MODEL(para)
    model.load_weights(filepath=para["model_path"])
    bert_val =load_path_bert(para["test_path"],sep=para["sep"])
    lengths = get_lengths(val_x)

    pred_y = model.predict(bert_val)

    tag_pred_y = []
    tag_val_y = []
    for i, y in enumerate(pred_y):
        y = [numpy.argmax(dim) for dim in y]
        print(lengths[i])
        p_y = y[:lengths[i]]
        print(p_y)
        v_y = val_y[i][:lengths[i]].flatten()
        print(v_y)
        p_y = [tags[dim] for dim in p_y]
        v_y = [tags[dim] for dim in v_y]
        tag_pred_y.append(p_y)
        tag_val_y.append(v_y)
    return tag_pred_y,tag_val_y