def _project(dataframe, project_q): if not project_q: return dataframe assert_list("project", project_q) if project_q == [["count"]]: # Special case for count only, ~equal to SQL count(*) return DataFrame.from_dict({"count": [len(dataframe)]}) aggregate_fns, alias_expressions = classify_expressions(project_q) if aggregate_fns and alias_expressions: raise_malformed("Cannot mix aliasing and aggregation functions", project_q) if isinstance(dataframe, DataFrameGroupBy): dataframe = _aggregate(dataframe, project_q, aggregate_fns) elif aggregate_fns: return _aggregate_without_group_by(dataframe, project_q, aggregate_fns) elif alias_expressions: dataframe = _alias(dataframe, alias_expressions) else: # Nothing to do here pass columns = [e if type(e) is not list else e[1] for e in project_q] try: return dataframe[columns] except KeyError: missing_columns = set(columns) - set(dataframe.columns.values) raise_malformed("Selected columns not in table", list(missing_columns))
def _group_by(dataframe, group_by_q): if not group_by_q: return dataframe assert_list("group_by", group_by_q) try: return dataframe.groupby(group_by_q, as_index=False) except KeyError: raise_malformed("Group by column not in table", group_by_q)
def filter(self, dataframe, filter_q): if filter_q: assert_list('where', filter_q) filter_str = self._build_filter(filter_q) try: # The filter string may contain references to variables in env. # That's why it is defined here. env = self.env # noqa return dataframe.query(filter_str) except SyntaxError: raise_malformed('Syntax error in where clause', filter_q) except ValueError: raise_malformed('Invalid type in comparison in where clause', filter_q) return dataframe
def _order_by(dataframe, order_q): if not order_q: return dataframe assert_list("order_by", order_q) if not all(isinstance(c, basestring) for c in order_q): raise_malformed("Invalid order by format", order_q) columns = [e[1:] if e.startswith("-") else e for e in order_q] ascending = [not e.startswith("-") for e in order_q] try: return dataframe.sort_values(by=columns, ascending=ascending) except KeyError: raise_malformed("Order by column not in table", columns)
def pandas_filter(df, filter_q): if filter_q: assert_list('where', filter_q) return df[_do_pandas_filter(df, filter_q)] return df