def test_add_from_counter_n_most_values_with_default(self): vocab = Vocabulary() new_token_to_id = Counter({"a": 3, "b": 1, "c": 4, "d": 1}) vocab.add_from_counter( "token_to_id", new_token_to_id, n_most_values=4, add_values=["<SOS>", "<EOS>"], ) self.assertDictEqual(vocab.token_to_id, {"<SOS>": 0, "<EOS>": 1, "c": 2, "a": 3})
def _vocab_from_counters( config: PreprocessingConfig, token_counter: Counter, target_counter: Counter, type_counter: Counter ) -> Vocabulary: vocab = Vocabulary() names_additional_tokens = [SOS, EOS, PAD, UNK] if config.wrap_name else [PAD, UNK] vocab.add_from_counter("token_to_id", token_counter, config.subtoken_vocab_max_size, names_additional_tokens) target_additional_tokens = [SOS, EOS, PAD, UNK] if config.wrap_target else [PAD, UNK] vocab.add_from_counter("label_to_id", target_counter, config.target_vocab_max_size, target_additional_tokens) paths_additional_tokens = [SOS, EOS, PAD, UNK] if config.wrap_path else [PAD, UNK] vocab.add_from_counter("type_to_id", type_counter, -1, paths_additional_tokens) return vocab
def test_add_from_counter_all_values(self): vocab = Vocabulary() new_token_to_id = Counter({"a": 3, "b": 1, "c": 4, "d": 1}) vocab.add_from_counter("token_to_id", new_token_to_id) self.assertDictEqual(vocab.token_to_id, {"c": 0, "a": 1, "b": 2, "d": 3})
def test_add_from_counter_raise_error(self): vocab = Vocabulary() values_counter = Counter({"a": 3, "b": 1, "c": 4, "d": 1}) with self.assertRaises(ValueError): vocab.add_from_counter("unknown_field", values_counter)