def tmp_replace_word(text, noun_list_dict, num=10):
    tagger = util.create_tagger()
    result_list = []
    for _ in range(num):
        replaced, _ = replace_noun_by_similar_word(
            text,
            noun_list_dict,
            tagger,
            {}  # not used
        )
        result_list.append(replaced)
    return result_list
Пример #2
0
def output_word_list(name_char_topn, noun_topn):
    '''
    JSON出力
    '''
    # name_char_topn, noun_topn は類似度のチューニング用
    model = util.load_model(const.WORD2VEC_MODEL_PATH)
    tagger = util.create_tagger()

    name_char_dict, noun_dict = crate_target_word_dict(
        model, tagger, name_char_topn, noun_topn)

    write_to_json(const.NAME_CHARCTER_LIST_FILE, name_char_dict)
    write_to_json(const.SIMILAR_NOUN_LIST_FILE, noun_dict)
Пример #3
0
def tmp(target, noun_topn=9):

    model = util.load_model(const.WORD2VEC_MODEL_PATH)
    tagger = util.create_tagger()

    similar_noun_dict = {}

    tab_divided_word_token_list = tagger.parse(target).split('\n')
    noun_list = create_noun_list(tab_divided_word_token_list)
    noun_results = create_similar_noun_dict(
        model, tagger, noun_list, noun_topn)
    similar_noun_dict.update(noun_results)

    return similar_noun_dict
def random_generate_ese_bungo_one():
    '''
    一つだけrandomで取得する用
    '''
    tagger = util.create_tagger()
    source_dict = read_json_to_dict(const.ORIGINAL_NOVEL_FILE)
    noun_list_dict = read_json_to_dict(const.SIMILAR_NOUN_LIST_FILE)

    author_name, novel_list = random.choice(list(source_dict.items()))
    novel = random.choice(novel_list)
    title = novel['title']
    quote = random.choice(novel['quotes'])

    used_word = {}
    generated_quote, used_word = replace_noun_by_similar_word(
        quote, noun_list_dict, tagger, used_word)
    generated_title, used_word = replace_noun_by_similar_word(
        title, noun_list_dict, tagger, used_word)
    generated_name = generate_fake_name(author_name)

    print(author_name, title, quote)
    print('')
    print(generated_name, generated_title, generated_quote)
    print('---')

    original = {
        'author': author_name,
        'title': title,
        'quote': quote,
        'url': novel['url']
    }
    generated = {
        'author': generated_name,
        'title': generated_title,
        'quote': generated_quote
    }
    return original, generated
def generate_fake_books_and_quotes_tmp(num=5):
    tagger = util.create_tagger()
    noun_list_dict = read_json_to_dict(const.SIMILAR_NOUN_LIST_FILE_TMP)

    source_dict = read_json_to_dict(const.ORIGINAL_NOVEL_SOURCE_TMP)
    results = []

    for author_name, data in source_dict.items():
        print('-----------')
        print(author_name)
        for book in data['novels']:
            title = book['title']
            for quote_text in book['quotes']:
                for _ in range(num):
                    # 1つの作品(タイトル+クオート)の中で、おなじ単語は同じように変換するようにused_wordに保持。
                    # 『走れメロス』の「メロスは激怒した。」のようにタイトル中の単語が本文中で使われてる時、
                    # 別々に変換すると面白みが減るため
                    used_word = {}

                    fake_quote_text, used_word = replace_noun_by_similar_word(
                        quote_text, noun_list_dict, tagger, used_word)

                    fake_title, used_word = replace_noun_by_similar_word(
                        title, noun_list_dict, tagger, used_word)

                    generated_data = {
                        # 'author_id': author_id,
                        'fake_name': generate_fake_name(author_name,
                                                        is_tmp=True),
                        # 'book_id': book_id,
                        'fake_title': fake_title,
                        # 'quote_id': quote_id,
                        'fake_text': fake_quote_text,
                    }
                    results.append(generated_data)
    return results
Пример #6
0
def decorate_with_logging(target_text_list, beautiful_word_list, topn):
    '''
    ループして美しくdecorateして結果をprint
    '''
    main_word = beautiful_word_list[0]

    print(f'{main_word}化開始...')
    print('')
    pn_dict = decorator.get_pn_dict1()
    model = util.load_model(const.WORD2VEC_MODEL_PATH)
    tagger = util.create_tagger()

    all_result = []
    for target_text in target_text_list:
        result = decorator.decorate(model, tagger, pn_dict, target_text,
                                    beautiful_word_list, topn)
        all_result.append(result)

    print(f'{main_word}化完了')
    print('')

    print('')
    print('***************')
    print('変更結果もれCheck')
    print('***************')
    not_decorated_list = [
        result[3] for result in all_result if len(result[3]) != 0
    ]
    if len(not_decorated_list) == 0:
        print(f'全て{main_word}化!')
    else:
        for not_decorated in not_decorated_list:
            print(not_decorated)

    print('')
    print('***************')
    print(f'使用{main_word}語チェック')
    print('***************')
    for beautiful_word in beautiful_word_list:
        print('')
        print('------------------')
        print(f'{beautiful_word}によって{main_word}化された単語')
        print('------------------')
        # あとで重複削除するためにsetに変換できるようtuple化
        res = [
            tuple(decorate_history) for result in all_result
            for decorate_history in result[4]
            if decorate_history[0] == beautiful_word
        ]
        # 重複削除
        res = list(set(res))
        if len(res) == 0:
            print('なし!')
        else:
            for r in res:
                nega_word = r[1]
                posi_word = r[2]
                print(f'{nega_word} to {posi_word}')

    print('')
    print('***************')
    print('変更結果Check')
    print('***************')
    for result in all_result:
        print('-----------')
        print(result[0])
        print('↓')
        print(result[1])
        print('')
def generate_fake_books_and_quotes(num=5, author_id=None):
    '''
    元データにある文言をnum分だけ変換
    '''
    tagger = util.create_tagger()
    noun_list_dict = read_json_to_dict(const.SIMILAR_NOUN_LIST_FILE)

    results = []

    conn = get_connect()
    with conn:
        cur = conn.cursor()

        # Author
        authors = []
        if author_id is None:
            authors = get_all_authors(cur)
        else:
            authors = get_author(cur, author_id)
            print(authors)

        for author in authors:
            author_id = author[0]
            author_name = author[1]

            # Book
            books = get_all_books_by_author(cur, author_id)
            for book in books:

                book_id = book[0]
                title = book[1]
                url = book[2]

                # Quote
                quotes = get_all_quotes_by_book(cur, book_id)
                for quote in quotes:
                    quote_id = quote[0]
                    quote_text = quote[1]

                    for _ in range(num):
                        # 1つの作品(タイトル+クオート)の中で、おなじ単語は同じように変換するようにused_wordに保持。
                        # 『走れメロス』の「メロスは激怒した。」のようにタイトル中の単語が本文中で使われてる時、
                        # 別々に変換すると面白みが減るため
                        used_word = {}

                        fake_quote_text, used_word = replace_noun_by_similar_word(
                            quote_text, noun_list_dict, tagger, used_word)

                        fake_title, used_word = replace_noun_by_similar_word(
                            title, noun_list_dict, tagger, used_word)

                        generated_data = {
                            'author_id': author_id,
                            'fake_name': generate_fake_name(author_name),
                            'book_id': book_id,
                            'fake_title': fake_title,
                            'quote_id': quote_id,
                            'fake_text': fake_quote_text,
                        }
                        results.append(generated_data)
    return results
def generate_ese_bungo_all(num=1, is_tmp=True):
    '''
    元データにある文言をnum分だけ変換
    '''
    tagger = util.create_tagger()
    source_dict = read_json_to_dict(const.ORIGINAL_NOVEL_FILE)
    noun_list_dict = read_json_to_dict(const.SIMILAR_NOUN_LIST_FILE)

    results = []
    loop_cnt = 0

    orginal_list = []

    a_c = 0  # 作家数
    n_c = 0  # 作品数
    q_c = 0  # 引用文数
    for author_name, novel_list in source_dict.items():
        a_c += 1
        for novel in novel_list:
            n_c += 1
            title = novel['title']
            # for quote in novel['quotes']:
            for quote in novel['quotes']:
                q_c += 1
                orginal_list.append([author_name, title, quote, novel['url']])
                orginal_list_idx = len(orginal_list) - 1
                used_quote = []
                for _ in range(num):
                    loop_cnt = loop_cnt + 1

                    # 1つの作品(タイトル+クオート)の中で、おなじ単語は同じように変換するようにused_wordに保持。
                    # 『走れメロス』の「メロスは激怒した。」のようにタイトル中の単語が本文中で使われてる時、
                    # 別々に変換すると面白みが減るため
                    used_word = {}

                    generated_quote, used_word = replace_noun_by_similar_word(
                        quote, noun_list_dict, tagger, used_word)
                    # 同じQuoteは省く。なんども同じの見ても退屈なので(デモサイト用)
                    if generated_quote in used_quote:
                        continue
                    else:
                        used_quote.append(generated_quote)

                    generated_title, used_word = replace_noun_by_similar_word(
                        title, noun_list_dict, tagger, used_word)

                    generated_name = generate_fake_name(author_name)

                    print(author_name, title, quote)
                    print('')
                    print(generated_name, generated_title, generated_quote)
                    print('---')
                    fake = [
                        generated_name, generated_title, generated_quote,
                        orginal_list_idx
                    ]
                    results.append(fake)

    print(f'author :{a_c}, novel :{n_c}, quote :{q_c}')
    print('loop_cnt:', loop_cnt)
    print('total:', len(results))

    return orginal_list, results
def tmp():
    tagger = util.create_tagger()
    source_dict = read_json_to_dict(const.ORIGINAL_NOVEL_FILE_TMP)
    noun_list_dict = read_json_to_dict(const.SIMILAR_NOUN_LIST_FILE)