def test_merge_dicts(self): dict1 = {"a": 3, "b": 4} dict2 = {"b": 5, "c": 6} merge_dicts_(dict1, dict2) expected = {"a": 3, "b": 9, "c": 6} self.assertEqual(expected, dict1)
def calc_stats(dest_dir, threshold): projects_to_ignore = [] res_logged_stats = {} for logged_stats, proj_name in file_mapper(dest_dir, calc_logged_stats, ContextsDataset.LABEL_FILE_EXT): if float(logged_stats[WITH_LOGGING]) / (logged_stats[WITH_LOGGING] + logged_stats[NO_LOGGING]) <= ( threshold * 0.01): projects_to_ignore.append(proj_name) else: merge_dicts_(res_logged_stats, logged_stats) return projects_to_ignore, res_logged_stats
def add_vocab(self, partial_vocab: 'PartialVocab') -> List[str]: self.merged_word_counts, new_words = merge_dicts_( self.merged_word_counts, partial_vocab.merged_word_counts) cur_vocab_size = len(self.merged_word_counts) self.n_files += partial_vocab.n_files new_stats_entry = ( self.n_files, cur_vocab_size, self.merged_word_counts[placeholders['non_eng']], self.merged_word_counts[placeholders['non_eng_content']]) self.stats.extend(partial_vocab.stats + [new_stats_entry]) return new_words
def add_vocab(self, partial_vocab) -> List[str]: if not isinstance(partial_vocab, PartialVocab): raise TypeError( f'Vocab must be a PartialVocab, but is {type(partial_vocab)}') self.merged_word_counts, new_words = merge_dicts_( self.merged_word_counts, partial_vocab.merged_word_counts) cur_vocab_size = len(self.merged_word_counts) self.n_files += partial_vocab.n_files new_stats_entry = ( self.n_files, cur_vocab_size, self.merged_word_counts[placeholders['non_eng']], self.merged_word_counts[placeholders['non_eng_content']]) self.stats.extend(partial_vocab.stats + [new_stats_entry]) return new_words