def evaluate(self, verbose=False, passes=None): if self.index_type is not None: index, column = LazyOpResult( self.expr, WeldStruct([WeldVec(self.index_type), WeldVec(self.weld_type)]), 0).evaluate(verbose=verbose, passes=passes) series = pd.Series(column, index) series.index.rename(self.index_name, True) return series else: column = LazyOpResult.evaluate(self, verbose=verbose, passes=passes) return pd.Series(column)
def count(self): """Summary Returns: TYPE: Description """ return LazyOpResult(grizzly_impl.count(self.expr, self.weld_type), WeldInt(), 0)
def unique(self): """Summary Returns: TYPE: Description """ return LazyOpResult(grizzly_impl.unique(self.expr, self.weld_type), self.weld_type, self.dim)
def sum(self): """Summary Returns: TYPE: Description """ return LazyOpResult( grizzly_impl.aggr(self.expr, "+", 0, self.weld_type), self.weld_type, 0)
def group_eval(objs, passes=None): LazyOpResults = [] for ob in objs: if isinstance(ob, SeriesWeld): if ob.index_type is not None: weld_type = WeldStruct( [WeldVec(ob.index_type), WeldVec(ob.weld_type)]) LazyOpResults.append(LazyOpResult(ob.expr, weld_type, 0)) else: LazyOpResults.append(LazyOpResult(ob.expr, ob.weld_type, 0)) results = group(LazyOpResults).evaluate((True, -1), passes=passes) pd_results = [] for i, result in enumerate(results): ob = objs[i] if isinstance(ob, SeriesWeld): if ob.index_type is not None: index, column = result series = pd.Series(column, index) series.index.rename(ob.index_name, True) pd_results.append(series) else: pd_results.append(series) if isinstance(ob, DataFrameWeldExpr): if ob.is_pivot: index, pivot, columns = result df_dict = {} for i, column_name in enumerate(columns): df_dict[column_name] = pivot[i] pd_results.append(pd.DataFrame(df_dict, index=index)) else: columns = result df_dict = {} for i, column_name in enumerate(ob.column_names): df_dict[column_name] = columns[i] pd_results.append(pd.DataFrame(df_dict)) return pd_results
def get_column(self, column_name, column_type, index): """Summary Args: column_name (TYPE): Description column_type (TYPE): Description index (TYPE): Description Returns: TYPE: Description """ return LazyOpResult( grizzly_impl.get_column(self.expr, self.weld_type, index), column_type, 1).evaluate()
def std(self): """Standard deviation Note that is by default normalizd by n - 1 # TODO, what does pandas do for multiple grouping columns? # Currently we are just going to use one grouping column """ std_expr = grizzly_impl.groupby_std([self.column], [self.column_type], self.grouping_columns, self.grouping_column_types) unzipped_columns = grizzly_impl.unzip_columns( std_expr, self.grouping_column_types + [WeldDouble()], ) index_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 0), self.grouping_column_types[0], 1) column_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 1), self.grouping_column_types[0], 1) group_expr = utils.group([index_expr, column_expr]) return SeriesWeld(group_expr.expr, WeldDouble(), index_type=self.grouping_column_types[0], index_name=self.grouping_column_names[0])
def group(exprs): weld_type = [to_weld_type(expr.weld_type, expr.dim) for expr in exprs] exprs = [expr.expr for expr in exprs] weld_obj = WeldObject(grizzly_impl.encoder_, grizzly_impl.decoder_) weld_type = WeldStruct(weld_type) dim = 0 expr_names = [expr.obj_id for expr in exprs] for expr in exprs: weld_obj.update(expr) weld_obj.weld_code = "{%s}" % ", ".join(expr_names) for expr in exprs: weld_obj.dependencies[expr.obj_id] = expr return LazyOpResult(weld_obj, weld_type, dim)
def __sub__(self, other): # TODO subtractionw without index variables if self.index_type is not None: index = grizzly_impl.get_field(self.expr, 0) expr1 = grizzly_impl.get_field(self.expr, 1) else: expr1 = self.expr if other.index_type is not None: index2 = grizzly_impl.get_field(other.expr, 0) expr2 = grizzly_impl.get_field(other.expr, 1) else: expr2 = other.expr index_expr = LazyOpResult(index, self.index_type, 0) sub_expr = SeriesWeld( grizzly_impl.element_wise_op(expr1, expr2, "-", self.weld_type), self.weld_type, self.df, self.column_name) index_sub_expr = utils.group([index_expr, sub_expr]) return SeriesWeld(index_sub_expr.expr, self.weld_type, self.df, self.column_name, self.index_type, self.index_name)
def __getattr__(self, key): """Summary Args: key (TYPE): Description Returns: TYPE: Description Raises: Exception: Description """ if key == 'values': if self.predicates is None: return self.df.values if isinstance(self.df.values, np.ndarray): weld_type = grizzly_impl.numpy_to_weld_type_mapping[str( self.df.values.dtype)] dim = self.df.values.ndim return LazyOpResult( grizzly_impl.filter(self.df.values, self.predicates.expr, weld_type), weld_type, dim) raise Exception("Attr %s does not exist" % key)