def summarize(dataset, dframe, groups, no_cache, update=False): """Raises a ColumnTypeError if grouping on a non-dimensional column.""" # do not allow group by numeric types for group in groups: if not dataset.is_factor(group): raise ColumnTypeError("group: '%s' is not a dimension." % group) group_str = dataset.join_groups(groups) or dataset.ALL # check cached stats for group and update as necessary stats = dataset.stats group_stats = stats.get(group_str) if no_cache or not group_stats or update: group_stats = summarize_with_groups(dframe, groups, dataset) if groups else summarize_df(dframe, dataset) if not no_cache: if update: original_group_stats = stats.get(group_str, {}) group_stats = combine_dicts(original_group_stats, group_stats) stats.update({group_str: group_stats}) dataset.update({dataset.STATS: dict_for_mongo(stats)}) stats_dict = dict_from_mongo(group_stats) if groups: stats_dict = {group_str: stats_dict} return stats_dict
def summarize(dataset, dframe, groups, no_cache, update=False): """Raises a ColumnTypeError if grouping on a non-dimensional column.""" # do not allow group by numeric types for group in groups: if not dataset.is_factor(group): raise ColumnTypeError("group: '%s' is not a dimension." % group) group_str = dataset.join_groups(groups) or dataset.ALL # check cached stats for group and update as necessary stats = dataset.stats group_stats = stats.get(group_str) if no_cache or not group_stats or update: group_stats = summarize_with_groups(dframe, groups, dataset) if\ groups else summarize_df(dframe, dataset) if not no_cache: if update: original_group_stats = stats.get(group_str, {}) group_stats = combine_dicts(original_group_stats, group_stats) stats.update({group_str: group_stats}) dataset.update({dataset.STATS: dict_for_mongo(stats)}) stats_dict = dict_from_mongo(group_stats) if groups: stats_dict = {group_str: stats_dict} return stats_dict
def summarize(dataset, dframe, groups, group_str, no_cache): """Raises a ColumnTypeError if grouping on a non-dimensional column.""" # do not allow group by numeric types for group in groups: if group != dataset.ALL and not dataset.schema.is_dimension(group): raise ColumnTypeError("group: '%s' is not a dimension." % group) # check cached stats for group and update as necessary stats = dataset.stats if no_cache or not stats.get(group_str): group_stats = summarize_df(dframe, dataset=dataset) if\ group_str == dataset.ALL else\ summarize_with_groups(dframe, groups, dataset) stats.update({group_str: group_stats}) if not no_cache: dataset.update({dataset.STATS: dict_for_mongo(stats)}) stats_to_return = dict_from_mongo(stats.get(group_str)) return stats_to_return if group_str == dataset.ALL else { group_str: stats_to_return}