예제 #1
0
def main(args):
    global verbose
    verbose = args.verbose

    i = 0
    table_term_vowel = utils.load_csv_to_dict(
        path.join(DIR_ROOT, 'data/term_vowel_table.csv'))
    with codecs.open(args.filename, encoding='utf-8') as f:
        for line in f:
            i += 1
            if verbose:
                if i % 10 == 0:
                    logger.info(i)
            if len(line) == 0:
                continue
            result1 = get_phonetic_transcription(line.decode('utf-8'),
                                                 table_term_vowel)
            result2 = get_phonetic_transcription_juman(line.decode('utf-8'))
            print('{}'.format(line.strip()))
            print(''.join(result1.split()))
            print(result2)
            print('')
    return 0
예제 #2
0
from os import path
import sys
import codecs

import rhyme
import utils
u"""Make features

DEMO:
python make_features.py
"""

DIR_SCRIPT = path.dirname(path.abspath(__file__))
DIR_ROOT = DIR_SCRIPT  # TODO: move this file to ./features

table_term_vowel = utils.load_csv_to_dict(
    path.join(DIR_ROOT, 'data/term_vowel_table.csv'))


def calc_Jaccard_similarity(BoW1, BoW2):
    # 重複を許さない場合
    # all_words = list(set(BoW1.extend(BoW2)))
    # common_words = list(set(BoW1) & set(BoW2))

    # 重複を許す場合
    assert isinstance(BoW1, list)
    assert isinstance(BoW2, list)
    all_words = BoW1 + BoW2
    common_words = []
    for word in BoW1:
        if word in BoW2:
            common_words.append(word)
예제 #3
0
import sys
import codecs

import rhyme
import utils

u"""Make features

DEMO:
python make_features.py
"""

DIR_SCRIPT = path.dirname(path.abspath(__file__))
DIR_ROOT = DIR_SCRIPT  # TODO: move this file to ./features

table_term_vowel = utils.load_csv_to_dict(path.join(DIR_ROOT,
                                                    'data/term_vowel_table.csv'))

def calc_Jaccard_similarity(BoW1, BoW2):
    # 重複を許さない場合
    # all_words = list(set(BoW1.extend(BoW2)))
    # common_words = list(set(BoW1) & set(BoW2))

    # 重複を許す場合
    assert isinstance(BoW1, list)
    assert isinstance(BoW2, list)
    all_words = BoW1+BoW2
    common_words = []
    for word in BoW1:
        if word in BoW2:
            common_words.append(word)
            BoW2.remove(word)
예제 #4
0
inv_vocab = cPickle.load(open("inv_vocab.pkl", "rb"))

n_units = model.embed.W.shape[1]

xp = cuda.cupy if args.gpu >= 0 else np

if args.gpu >= 0:
    cuda.check_cuda_available()
    cuda.get_device(args.gpu).use()
    model.to_gpu()

if args.gpu < 0:
    model.to_cpu()

# load mild yankee dict
mild_yankee_dict = utils.load_csv_to_dict("mild_dict.csv")


def force_mild(state, line, word_id):
    cur_word = xp.array([word_id], dtype=np.int32)
    state, predict = forward_one_step(cur_word, state, train=False)
    if mild_yankee_dict.has_key(unicode(inv_vocab[word_id])):
        for next_word in mild_yankee_dict[unicode(inv_vocab[word_id])].split():
            line.append(next_word)
            next_word_id = xp.array([vocab[next_word]], dtype=np.int32)
            state, predict = forward_one_step(next_word_id, state, train=False)
    return predict


def forward_one_step(x_data, state, train=True):
    if args.gpu >= 0:
예제 #5
0
inv_vocab = cPickle.load(open("inv_vocab.pkl", "rb"))

n_units = model.embed.W.shape[1]

xp = cuda.cupy if args.gpu >= 0 else np

if args.gpu >= 0:
    cuda.check_cuda_available()
    cuda.get_device(args.gpu).use()
    model.to_gpu()

if args.gpu < 0:
    model.to_cpu()

# load mild yankee dict
mild_yankee_dict = utils.load_csv_to_dict("mild_dict.csv")


def force_mild(state, line, word_id):
    cur_word = xp.array([word_id], dtype=np.int32)
    state, predict = forward_one_step(cur_word, state, train=False)
    if mild_yankee_dict.has_key(unicode(inv_vocab[word_id])):
        for next_word in mild_yankee_dict[unicode(inv_vocab[word_id])].split():
            line.append(next_word)
            next_word_id = xp.array([vocab[next_word]], dtype=np.int32)
            state, predict = forward_one_step(next_word_id, state, train=False)
    return predict

    
def forward_one_step(x_data, state, train=True):
    if args.gpu >= 0: