def __init__(self): self.char_dict = wordDict() # 词典 self.char2vec = word2Vec() # 词向量 gensim self.embedding = self.char2vec.get_embedding() self._build_layers() self.model_path = [ None, './models/new', './models/thirdSentence/', './models/new' ] self.loss_path = [f'./result/loss/{i}_sentence' for i in range(4)] self.attention_img_path = [f'./result/attention/']
def __init__(self): self.char_dict = wordDict() self._pron_dict = dict() with open(_pinyin_path, 'r') as fin: for line in fin.readlines(): toks = line.strip().split() ch = chr(int(toks[0], 16)) if ch not in self.char_dict: continue self._pron_dict[ch] = [] for tok in toks[1:]: self._pron_dict[ch].append((tok[:-1], int(tok[-1])))
def _gen_word2vec(): print("Generating word2vec model ...") word_dict = wordDict() poems = Poems() poems = [poem[0] + poem[1] + poem[2] + poem[3] for poem in poems] print(poems[1]) model = models.Word2Vec(poems, size=WORD_VEC_DIM, min_count=1) # 低频词比较多 embedding = uniform(-1.0, 1.0, [len(word_dict), WORD_VEC_DIM]) for i, ch in enumerate(word_dict): if ch in model: embedding[i, :] = model[ch] np.save(word2vec_path, embedding)
def _gen_poems(): print("Parsing poems ...") word_dict = wordDict() with open(poems_path, 'w', encoding='utf-8') as fout: for corpus in _corpus_list: with open(os.path.join(raw_dir, corpus), 'r', encoding='utf-8') as fin: for line in fin.readlines(): sentences = split_sentences(line) if len(sentences[0].split()) != 3: continue all_word_in_dict = True for sentence in sentences: sentence = sentence.strip().split() for ch in sentence: if word_dict.word2int(ch) < 0: all_word_in_dict = False break if not all_word_in_dict: break if all_word_in_dict: fout.write('|'.join(sentences) + '\n') print("Finished parsing %s." % corpus)
from paths import save_dir from pron_dict import PronDict from random import random from singleton import Singleton from utils import WORD_VEC_DIM, NUM_OF_SENTENCES import numpy as np import os import sys import tensorflow.compat.v1 as tf import time import matplotlib.pyplot as plt BATCH_SIZE = 128 NUM_UNITS = 128 LEN_PER_SENTENCE = 5 _model_path = os.path.join(save_dir, 'model') WORD_DICT_SIZE = wordDict().__len__() model_load_path = { 0: './models/pair', 1: './models/pair', 2: './models/pair', 3: './models/pair', 4: './models/pair', } model_save_path = { 0: './models/pair', 1: './models/sentence1', 2: './models/sentence2', 3: './models/sentence3', 4: './models/sentence4', } result_save_path = {
def __init__(self): '''if not check_uptodate(word2vec_path): _gen_word2vec()''' self.embedding = np.load(word2vec_path) self.word_dict = wordDict()
from plan import Planner from generator_2021 import Generator from typing import Set, List import os from word_vec2 import word2Vec from word_dict import wordDict os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' # For testing purpose. if __name__ == '__main__': word_dict = wordDict() word2vec = word2Vec() generator = Generator() i = 0 while True: i += 1 hint: str = input("Type first sentence: ") print(hint) keywords = [i for i in list(hint) if word_dict.word2int(i) != -1] keyword = keywords[0] keywords = keywords[1:] + word2vec.similar_word_( keywords[0], 5 - len(keywords)) keywords = [keyword] + [i for i in keywords if i != keyword] print("Keywords: ", keywords) keywords = [' '.join(list(i)) for i in keywords] poem: List[str] = generator.generate_by_multiple_models( keywords, 0, './result/demo', '', f'{i}') output = ''.join(poem.split()).strip('^').replace('$', '\n') print("Poem: \n", output) with open("./result/demo/result.txt", 'a', encoding='utf-8') as f: f.write("Input: " + hint + '\n')