示例#1
0
 def __init__(self, table_name, join_keys, df, join_name):
     self.table = table_name
     self.df = df
     self.join_keys = [f"{table_name}.{k}" for k in join_keys]
     self.df.columns = [f"{table_name}.{k}" for k in self.df.columns]
     self.indicator_column = f"__in_{table_name}"
     self.sample_columns = [
         c for c in self.df.columns if c not in self.join_keys
     ]  # exclude join keys
     self.index_provider = rustlib.IndexProvider(
         f"{join_name}/{table_name}.pk.indices", NULL)
     log.info(f"DataTableActor of `{table_name}` is ready.")
示例#2
0
    def __init__(self, table, jct, join_spec):

        self.jct = jct
        self.table = table
        parents = list(join_spec.join_tree.predecessors(table))
        assert len(parents) <= 1, parents
        if len(parents) == 1:
            parent = parents[0]
            join_keys = join_spec.join_graph[parent][table]["join_keys"]
            self.table_join_key = f"{table}.{join_keys[table]}"
            self.parent_join_key = f"{parent}.{join_keys[parent]}"
            null_row_offset = self._insert_null_to_jct()
            self.index_provider = rustlib.IndexProvider(
                f"{join_spec.join_name}/{table}.jk.indices", null_row_offset)
        else:
            self.jct_distribution = get_distribution(
                self.jct[f"{self.table}.weight"])
        log.info(f"JoinCountTableActor `{table}` is ready.")