def train(args): checkpoint_path = args.checkpoint_path dajare_sentence = args.query nlp = spacy.load('ja_ginza_nopn') words = nlp(dajare_sentence) words = [w.orth_ for w in words] batch_size = 32 T = 32 emb_size = 128 hidden_size = 128 dropout = 0.0 lr = 1e-3 data_gen = DataForGenerator(batch_size=batch_size, T=T) data_gen.load_vocab('./vocab.csv', vocab_size=50000) words_id, _ = data_gen.preprocess([words], None) vocab_size = len(data_gen.vocab.word2id) print("Vocab size: ", vocab_size) model = RNNModel( batch_size=batch_size, vocab_size=vocab_size, emb_size=emb_size, hidden_size=hidden_size, T=T, dropout=dropout, lr=lr, model_path=None) model.load_weights(checkpoint_path) print(words) print(words_id) pred = model.predict(words_id[0]) print(pred) print(pred.shape)
def predict_dajare(args): dajare_raw = args.dajare weights_path = args.weights_path vocab_data_path = args.vocab_data_path tokenizer = TokenizerSpacy() dajare_words = tokenizer.tokenize_sentence(dajare_raw) logging.info(dajare_words) vocab = Vocab(vocab_data_path) dajare_labeled = vocab.convert_word2id(dajare_words) logging.info(dajare_labeled) batch_size = 30 T = 25 emb_size = 128 hidden_size = 128 dropout = 0.0 lr = 1e-3 vocab_size = vocab.vocab_num model = RNNModel(batch_size=batch_size, vocab_size=vocab_size, emb_size=emb_size, hidden_size=hidden_size, T=T, dropout=dropout, lr=lr, model_path=None) model.print_fn = logging.info model.load_weights(weights_path) probability = model.predict( model.predict(np.array([dajare_labeled], dtype=np.float32))) logging.info('Probability:', probability[0]) return probability