Exemplo n.º 1
0
def update_symbols(data: MergedDataset, symbols: SymbolIdDict) -> SymbolIdDict:
    new_symbols: Set[str] = {
        x
        for y in data.items()
        for x in symbols.get_symbols(y.serialized_symbol_ids)
    }
    new_symbol_ids = SymbolIdDict.init_from_symbols_with_pad(
        new_symbols, pad_symbol=DEFAULT_PADDING_SYMBOL)
    if new_symbol_ids.get_all_symbols() != symbols.get_all_symbols():
        for entry in data.items():
            original_symbols = symbols.get_symbols(entry.serialized_symbol_ids)
            entry.serialized_symbol_ids = new_symbol_ids.get_serialized_ids(
                original_symbols)
    return new_symbol_ids
Exemplo n.º 2
0
    def make_common_symbol_ids(self) -> SymbolIdDict:
        all_symbols: Set[str] = set()
        for ds in self.data:
            all_symbols |= ds.symbol_ids.get_all_symbols()
        new_symbol_ids = SymbolIdDict.init_from_symbols_with_pad(
            all_symbols, pad_symbol=DEFAULT_PADDING_SYMBOL)

        for ds in self.data:
            for entry in ds.data.items():
                original_symbols = ds.symbol_ids.get_symbols(
                    entry.serialized_symbol_ids)
                entry.serialized_symbol_ids = new_symbol_ids.get_serialized_ids(
                    original_symbols)
            ds.symbol_ids = new_symbol_ids

        return new_symbol_ids