def __build_columns(dataset, dframe, functions, name, no_index): columns = [] for function in functions: column = dframe.apply(function, axis=1, args=(dataset, )) column.name = make_unique(name, [c.name for c in columns]) if no_index: column = column.reset_index(drop=True) columns.append(column) return columns
def __build_columns(dataset, dframe, functions, name, no_index): columns = [] for function in functions: column = dframe.apply(function, axis=1, args=(dataset,)) column.name = make_unique(name, [c.name for c in columns]) if no_index: column = column.reset_index(drop=True) columns.append(column) return columns
def __check_name_and_make_unique(self, name, dataset): """Check that the name is valid and make unique if valid. :param name: The name to make unique. :param dataset: The dataset to make unique for. :raises: `UniqueCalculationError` if not unique. :returns: A unique name. """ current_names = dataset.labels if name in current_names: raise UniqueCalculationError(name, current_names) return make_unique(name, dataset.schema.keys())
def _check_name_and_make_unique(name, dataset): """Check that the name is valid and make unique if valid. :param name: The name to make unique. :param dataset: The dataset to make unique for. :raises: `UniqueCalculationError` if not unique. :returns: A unique name. """ current_names = dataset.labels if name in current_names: raise UniqueCalculationError(name, current_names) return make_unique(name, dataset.schema.keys())
def parse_columns(self, formula, name, dframe=None): """Parse formula into function and variables.""" if dframe is None: dframe = self.dataset.dframe() functions = self.parser.parse_formula(formula) columns = [] for function in functions: column = dframe.apply( function, axis=1, args=(self.parser.context, )) column.name = make_unique(name, [c.name for c in columns]) columns.append(column) return columns
def save(self, dataset, formula, name, group_str=None): """Parse, save, and calculate a formula. Validate `formula` and `group_str` for the given `dataset`. If the formula and group are valid for the dataset, then save a new calculation for them under `name`. Finally, create a background task to compute the calculation. Calculations are initially saved in a **pending** state, after the calculation has finished processing it will be in a **ready** state. :param dataset: The DataSet to save. :param formula: The formula to save. :param name: The name of the formula. :param group_str: Columns to group on. :type group_str: String, list or strings, or None. :raises: `ParseError` if an invalid formula was supplied. """ calculator = Calculator(dataset) # ensure that the formula is parsable groups = self.split_groups(group_str) if group_str else [] aggregation = calculator.validate(formula, groups) if aggregation: # set group if aggregation and group unset if not group_str: group_str = '' else: # ensure the name is unique name = make_unique(name, dataset.labels + dataset.schema.keys()) record = { DATASET_ID: dataset.dataset_id, self.AGGREGATION: aggregation, self.FORMULA: formula, self.GROUP: group_str, self.NAME: name, self.STATE: self.STATE_PENDING, } super(self.__class__, self).save(record) call_async(calculate_task, self, dataset) return record