Python Dictionary.finalize примеры использования

Язык программирования: Python

Пространство имен/Пакет: nmtg.data

Класс/Тип: Dictionary

Метод/Функция: finalize

Примеров на hotexamples.com: 3

Python Dictionary.finalize - 3 примера найдено. Это лучшие примеры Python кода для nmtg.data.Dictionary.finalize, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

load(6)

Dictionary(6)

add_symbol(3)

finalize(3)

save(3)

infer_from_text(2)

load_state_dict(2)

pad(2)

convert(1)

from_counters(1)

pad_index(1)

state_dict(1)

string(1)

to_indices(1)

update(1)

Пример #1

Показать файл

Файл: monolingual.py Проект: zqs01/NMTGMinor

    def preprocess(cls, args):
        split_words = args.input_type == 'word'

        os.makedirs(args.data_dir_out, exist_ok=True)
        basename = os.path.basename(args.in_data)

        ((offsets, lengths,
          counter), ) = cls.get_indices_and_vocabulary([args.in_data],
                                                       split_words, args.lower,
                                                       not args.no_progress,
                                                       args.report_every)

        out_offsets = os.path.join(args.data_dir_out, basename + '.idx.npy')
        out_lengths = os.path.join(args.data_dir_out, basename + '.len.npy')
        np.save(out_offsets, offsets)
        np.save(out_lengths, lengths)

        dictionary = Dictionary()
        for word, count in counter.items():
            dictionary.add_symbol(word, count)

        dictionary.finalize(nwords=args.vocab_size,
                            threshold=args.vocab_threshold or -1)

        dictionary.save(os.path.join(args.data_dir_out, 'dict'))

Пример #2

Показать файл

Файл: denoising_text_trainer.py Проект: PuerJing/NMTGMinor

    def preprocess(args):
        split_words = args.input_type == 'word'

        os.makedirs(args.data_dir_out, exist_ok=True)
        train_clean_name = os.path.basename(args.train_clean)
        source_files = [args.train_clean]
        if args.train_noisy is not None:
            source_files.append(args.train_noisy)

        outputs = get_indices_and_vocabulary(source_files, split_words,
                                             args.lower, not args.no_progress,
                                             args.report_every)

        if args.train_noisy is not None:
            train_noisy_name = os.path.basename(args.train_noisy)
            (offsets, lengths, counter), \
            (noisy_offsets, noisy_lengths, noisy_counter) = outputs
            counter.update(noisy_counter)

            noisy_offset_filename = os.path.join(args.data_dir_out,
                                                 train_noisy_name + '.idx.npy')
            np.save(noisy_offset_filename, noisy_offsets)
        else:
            ((offsets, lengths, counter), ) = outputs

        out_offsets = os.path.join(args.data_dir_out,
                                   train_clean_name + '.idx.npy')
        out_lengths = os.path.join(args.data_dir_out,
                                   train_clean_name + '.len.npy')
        np.save(out_offsets, offsets)
        np.save(out_lengths, lengths)
        if args.vocab is not None:
            dictionary = Dictionary.load(args.vocab)
        else:
            dictionary = Dictionary()
            for word, count in counter.items():
                dictionary.add_symbol(word, count)

        dictionary.finalize(nwords=args.vocab_size,
                            threshold=args.vocab_threshold or -1)
        dictionary.save(os.path.join(args.data_dir_out, 'dict'))

Пример #3

Показать файл

    def preprocess(cls, args):
        split_words = args.input_type == 'word'

        os.makedirs(args.data_dir_out, exist_ok=True)
        src_name = os.path.basename(args.in_src)
        tgt_name = os.path.basename(args.in_tgt)

        (src_offsets, src_lengths, src_counter), \
        (tgt_offsets, tgt_lengths, tgt_counter) = \
            cls.get_indices_and_vocabulary((args.in_src, args.in_tgt),
                                           split_words,
                                           args.lower,
                                           not args.no_progress,
                                           args.report_every)

        out_offsets_src = os.path.join(args.data_dir_out,
                                       src_name + '.idx.npy')
        out_lengths_src = os.path.join(args.data_dir_out,
                                       src_name + '.len.npy')
        np.save(out_offsets_src, src_offsets)
        np.save(out_lengths_src, src_lengths)

        src_dictionary = Dictionary()
        for word, count in src_counter.items():
            src_dictionary.add_symbol(word, count)

        out_offsets_tgt = os.path.join(args.data_dir_out,
                                       tgt_name + '.idx.npy')
        out_lengths_tgt = os.path.join(args.data_dir_out,
                                       tgt_name + '.len.npy')
        np.save(out_offsets_tgt, tgt_offsets)
        np.save(out_lengths_tgt, tgt_lengths)

        tgt_dictionary = Dictionary()
        for word, count in tgt_counter.items():
            tgt_dictionary.add_symbol(word, count)

        if args.join_vocab:
            # If we explicitly load a target dictionary to merge
            # or we are inferring both dictionaries
            # if args.tgt_vocab is not None or args.src_vocab is None:
            #     src_dictionary.update(tgt_dictionary)
            src_dictionary.finalize(nwords=args.src_vocab_size,
                                    threshold=args.vocab_threshold or -1)

            src_dictionary.save(os.path.join(args.data_dir_out, 'dict'))

        else:
            src_dictionary.finalize(nwords=args.src_vocab_size,
                                    threshold=args.vocab_threshold or -1)
            tgt_dictionary.finalize(nwords=args.tgt_vocab_size,
                                    threshold=args.vocab_threshold or -1)

            src_dictionary.save(os.path.join(args.data_dir_out, 'src.dict'))
            tgt_dictionary.save(os.path.join(args.data_dir_out, 'tgt.dict'))