def root_tables(self): if util.all_of([self.left.op(), self.right.op()], (Join, Projection)): # Unraveling is not possible return [self.left.op(), self.right.op()] else: return ir.distinct_roots(self.left, self.right)
def remap_overlapping_column_names(table_op, root_table, data_columns): """Return an ``OrderedDict`` mapping possibly suffixed column names to column names without suffixes. Parameters ---------- table_op : TableNode The ``TableNode`` we're selecting from. root_table : TableNode The root table of the expression we're selecting from. data_columns : set or frozenset The available columns to select from Returns ------- mapping : OrderedDict[str, str] A map from possibly-suffixed column names to column names without suffixes. """ if not isinstance(table_op, ops.Join): return None left_root, right_root = ir.distinct_roots(table_op.left, table_op.right) suffixes = {left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX} column_names = [ ({name, name + suffixes[root_table]} & data_columns, name) for name in root_table.schema.names ] mapping = OrderedDict( (toolz.first(col_name), final_name) for col_name, final_name in column_names if col_name ) return mapping
def compute_projection_column_expr(expr, parent, data, scope=None, **kwargs): result_name = getattr(expr, '_name', None) op = expr.op() parent_table_op = parent.table.op() if isinstance(op, ir.TableColumn): # slightly faster path for simple column selection name = op.name if name in data: return data[name].rename(result_name or name) if not isinstance(parent_table_op, ops.Join): raise KeyError(name) root_table, = op.root_tables() left_root, right_root = ir.distinct_roots( parent_table_op.left, parent_table_op.right ) suffixes = {left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX} return data.loc[:, name + suffixes[root_table]].rename( result_name or name ) data_columns = frozenset(data.columns) additional_scope = { t: map_new_column_names_to_data( remap_overlapping_column_names(parent_table_op, t, data_columns), data ) for t in op.root_tables() } new_scope = toolz.merge(scope, additional_scope) result = execute(expr, new_scope, **kwargs) assert result_name is not None, 'Column selection name is None' return result.rename(result_name)
def root_tables(self): cases, results, default = self.args all_exprs = cases + results if default is not None: all_exprs.append(default) return ir.distinct_roots(*all_exprs)