def get_example_messages_by_groups(self, groups, filters=[], excludes=[]): include_groups = map( lambda x: int(x['value']), filter(lambda x: x['dimension'].key == 'groups', filters)) if len(include_groups) > 0: groups = include_groups exclude_groups = map( lambda x: int(x['value']), filter(lambda x: x['dimension'].key == 'groups', excludes)) groups = filter(lambda x: x not in exclude_groups, groups) per_group = int(10 / len(groups)) combined_messages = [] group_querysets = [] for group in groups: group_obj = self.groups.get(id=group) messages = group_obj.messages for filterA in filters: dimension = filterA["dimension"] # Remove the dimension key params = { key: value for key, value in filterA.iteritems() if key != "dimension" } messages = dimension.filter(messages, **params) for exclude in excludes: dimension = exclude["dimension"] # Remove the dimension key params = { key: value for key, value in excludes.iteritems() if key != "dimension" } messages = dimension.exclude(messages, **params) group_querysets.append(messages) #combined_messages.extend(messages[:per_group]) query = "" for idx, queryset in enumerate(group_querysets): if idx > 0: query += " UNION " query += "(%s)" % (utils.quote(str(queryset.query))) query = utils.convert_boolean(query) queryset = Message.objects.raw(query) return queryset
def groups_domain(self, dimension, queryset_all, group_querysets, desired_bins=None): """Return the sorted levels in the union of groups in this dimension""" if dimension.is_related_categorical(): query = "" for idx, queryset in enumerate(group_querysets): if idx > 0: query += " UNION " query += "(%s)" %(utils.quote(str(queryset.query))) domain = group_messages_by_dimension_with_raw_query(query, dimension, fetchall) else: queryset = queryset_all domain = dimension.get_domain(queryset, bins=desired_bins) labels = dimension.get_domain_labels(domain) return domain, labels
def groups_domain(self, dimension, queryset_all, group_querysets, desired_bins=None): """Return the sorted levels in the union of groups in this dimension""" if dimension.is_related_categorical(): query = "" for idx, queryset in enumerate(group_querysets): if idx > 0: query += " UNION " query += "(%s)" % (utils.quote(str(queryset.query))) domain = group_messages_by_dimension_with_raw_query( query, dimension, fetchall) else: queryset = queryset_all domain = dimension.get_domain(queryset, bins=desired_bins) labels = dimension.get_domain_labels(domain) return domain, labels
def get_example_messages_by_groups(self, groups, filters=[], excludes=[]): include_groups = map(lambda x: int(x['value']), filter(lambda x: x['dimension'].key=='groups', filters)) if len(include_groups)> 0: groups = include_groups exclude_groups = map(lambda x: int(x['value']), filter(lambda x: x['dimension'].key=='groups', excludes)) groups = filter(lambda x: x not in exclude_groups, groups) per_group = int(10 / len(groups)) combined_messages = [] group_querysets = [] for group in groups: group_obj = self.groups.get(id=group) messages = group_obj.messages for filterA in filters: dimension = filterA["dimension"] # Remove the dimension key params = {key: value for key, value in filterA.iteritems() if key != "dimension"} messages = dimension.filter(messages, **params) for exclude in excludes: dimension = exclude["dimension"] # Remove the dimension key params = {key: value for key, value in excludes.iteritems() if key != "dimension"} messages = dimension.exclude(messages, **params) group_querysets.append(messages) #combined_messages.extend(messages[:per_group]) query = "" for idx, queryset in enumerate(group_querysets): if idx > 0: query += " UNION " query += "(%s)" %(utils.quote(str(queryset.query))) query = utils.convert_boolean(query) queryset = Message.objects.raw(query) return queryset
def generate(self, dataset, filters=None, exclude=None, page_size=100, page=None, search_key=None, groups=None): """ Generate a complete data group table response. This includes 'table', which provides the non-zero message frequency for each combination of primary and secondary dimension values, respecting the filters. It also includes 'domains', which provides, for both primary and secondary dimensions, the levels of the dimension irrespective of filters (except on those actual dimensions). """ if (groups is None): queryset = dataset.message_set.all() # Filter out null time queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) unfiltered_queryset = queryset # Filter the data (look for filters on the primary/secondary dimensions at the same time primary_filter = None secondary_filter = None if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if dimension == self.primary_dimension: primary_filter = filter if dimension == self.secondary_dimension: secondary_filter = filter primary_exclude = None secondary_exclude = None if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) if dimension == self.primary_dimension: primary_exclude = exclude_filter if dimension == self.secondary_dimension: secondary_exclude = exclude_filter domains = {} domain_labels = {} max_page = None queryset_for_others = None # flag is true if the dimension is categorical and has more than MAX_CATEGORICAL_LEVELS levels primary_flag = False secondary_flag = False # Include the domains for primary and (secondary) dimensions domain, labels = self.domain(self.primary_dimension, unfiltered_queryset, primary_filter, primary_exclude) # paging the first dimension, this is for the filter distribution if primary_filter is None and self.secondary_dimension is None and page is not None: if search_key is not None: domain, labels = self.filter_search_key(domain, labels, search_key) start = (page - 1) * page_size end = min(start + page_size, len(domain)) max_page = (len(domain) / page_size) + 1 # no level left if len(domain) == 0 or start > len(domain): return None domain = domain[start:end] if labels is not None: labels = labels[start:end] queryset = queryset.filter(utils.levels_or(self.primary_dimension.field_name, domain)) else: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical() and len(domain) > MAX_CATEGORICAL_LEVELS: primary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] queryset_for_others = queryset queryset = queryset.filter(utils.levels_or(self.primary_dimension.field_name, domain)) if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.primary_dimension.key] = domain if labels is not None: domain_labels[self.primary_dimension.key] = labels if self.secondary_dimension: domain, labels = self.domain(self.secondary_dimension, unfiltered_queryset, secondary_filter, secondary_exclude) if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical() and \ len(domain) > MAX_CATEGORICAL_LEVELS: secondary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if queryset_for_others is None: queryset_for_others = queryset queryset = queryset.filter(utils.levels_or(self.secondary_dimension.field_name, domain)) if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.secondary_dimension.key] = domain if labels is not None: domain_labels[self.secondary_dimension.key] = labels # Render a table table = self.render(queryset) if self.mode == "enable_others" and queryset_for_others is not None: # adding others to the results table_for_others = self.render_others(queryset_for_others, domains, primary_flag, secondary_flag) table = list(table) table.extend(table_for_others) results = { 'table': table, 'domains': domains, 'domain_labels': domain_labels } if max_page is not None: results['max_page'] = max_page else: domains = {} domain_labels = {} max_page = None queryset_for_others = None # flag is true if the dimension is categorical and has more than MAX_CATEGORICAL_LEVELS levels primary_flag = False secondary_flag = False primary_filter = None secondary_filter = None primary_exclude = None secondary_exclude = None queryset = dataset.message_set.all() queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if dimension == self.primary_dimension: primary_filter = filter if dimension == self.secondary_dimension: secondary_filter = filter if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) if dimension == self.primary_dimension: primary_exclude = exclude_filter if dimension == self.secondary_dimension: secondary_exclude = exclude_filter queryset_all = queryset #queryset = corpus_models.Message.objects.none() group_querysets = [] group_labels = [] #message_list = set() for group in groups: group_obj = groups_models.Group.objects.get(id=group) if group_obj.order > 0: group_labels.append("#%d %s"%(group_obj.order, group_obj.name)) else: group_labels.append("%s"%(group_obj.name)) queryset = group_obj.messages # Filter out null time queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) unfiltered_queryset = queryset # Filter the data (look for filters on the primary/secondary dimensions at the same time if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) group_querysets.append(queryset) ######################################################################################################################### # deal with union distribution # This is due to union of queries in django does not work... # super ugly. Refactoring is required. # Include the domains for primary and (secondary) dimensions domain, labels = self.groups_domain(self.primary_dimension, queryset_all, group_querysets) # paging the first dimension, this is for the filter distribution if primary_filter is None and self.secondary_dimension is None and page is not None: if search_key is not None: domain, labels = self.filter_search_key(domain, labels, search_key) start = (page - 1) * page_size end = min(start + page_size, len(domain)) max_page = (len(domain) / page_size) + 1 # no level left if len(domain) == 0 or start > len(domain): return None domain = domain[start:end] if labels is not None: labels = labels[start:end] else: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical() and len(domain) > MAX_CATEGORICAL_LEVELS: primary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.primary_dimension.key] = domain if labels is not None: domain_labels[self.primary_dimension.key] = labels if self.secondary_dimension: domain, labels = self.groups_domain(self.secondary_dimension, queryset_all, group_querysets) if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical() and \ len(domain) > MAX_CATEGORICAL_LEVELS: secondary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.secondary_dimension.key] = domain if labels is not None: domain_labels[self.secondary_dimension.key] = labels ######################################################################################################################### group_tables = [] for queryset in group_querysets: queryset_for_others = queryset if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical(): queryset = queryset.filter(utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) if self.secondary_dimension: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical(): if queryset_for_others is None: queryset_for_others = queryset queryset = queryset.filter(utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) # Render a table if self.primary_dimension.key == "words": table = group_messages_by_words_with_raw_query(utils.quote(str(queryset.query)), fetchall_table) else: table = self.render(queryset) if self.mode == "enable_others" and queryset_for_others is not None: # adding others to the results table_for_others = self.render_others(queryset_for_others, domains, primary_flag, secondary_flag) table = list(table) table.extend(table_for_others) group_tables.append(table) if self.secondary_dimension is None: final_table = [] for idx, group_table in enumerate(group_tables): for item in group_table: item['groups'] = groups[idx] final_table.extend(group_table) domains['groups'] = groups domain_labels['groups'] = group_labels results = { 'table': final_table, 'domains': domains, 'domain_labels': domain_labels } else: tables = [] for idx, group_table in enumerate(group_tables): tables.append({ 'group_id': groups[idx], 'group_name': group_labels[idx], 'table': group_table }) results = { 'tables': tables, 'domains': domains, 'domain_labels': domain_labels } if max_page is not None: results['max_page'] = max_page return results
def generate(self, dataset, filters=None, exclude=None, page_size=100, page=None, search_key=None, groups=None): """ Generate a complete data group table response. This includes 'table', which provides the non-zero message frequency for each combination of primary and secondary dimension values, respecting the filters. It also includes 'domains', which provides, for both primary and secondary dimensions, the levels of the dimension irrespective of filters (except on those actual dimensions). """ if (groups is None): queryset = dataset.message_set.all() # Filter out null time queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) unfiltered_queryset = queryset # Filter the data (look for filters on the primary/secondary dimensions at the same time primary_filter = None secondary_filter = None if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if dimension == self.primary_dimension: primary_filter = filter if dimension == self.secondary_dimension: secondary_filter = filter primary_exclude = None secondary_exclude = None if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) if dimension == self.primary_dimension: primary_exclude = exclude_filter if dimension == self.secondary_dimension: secondary_exclude = exclude_filter domains = {} domain_labels = {} max_page = None queryset_for_others = None # flag is true if the dimension is categorical and has more than MAX_CATEGORICAL_LEVELS levels primary_flag = False secondary_flag = False # Include the domains for primary and (secondary) dimensions domain, labels = self.domain(self.primary_dimension, unfiltered_queryset, primary_filter, primary_exclude) # paging the first dimension, this is for the filter distribution if primary_filter is None and self.secondary_dimension is None and page is not None: if search_key is not None: domain, labels = self.filter_search_key( domain, labels, search_key) start = (page - 1) * page_size end = min(start + page_size, len(domain)) max_page = (len(domain) / page_size) + 1 # no level left if len(domain) == 0 or start > len(domain): return None domain = domain[start:end] if labels is not None: labels = labels[start:end] queryset = queryset.filter( utils.levels_or(self.primary_dimension.field_name, domain)) else: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical() and len(domain) > MAX_CATEGORICAL_LEVELS: primary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] queryset_for_others = queryset queryset = queryset.filter( utils.levels_or(self.primary_dimension.field_name, domain)) if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.primary_dimension.key] = domain if labels is not None: domain_labels[self.primary_dimension.key] = labels if self.secondary_dimension: domain, labels = self.domain(self.secondary_dimension, unfiltered_queryset, secondary_filter, secondary_exclude) if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical() and \ len(domain) > MAX_CATEGORICAL_LEVELS: secondary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if queryset_for_others is None: queryset_for_others = queryset queryset = queryset.filter( utils.levels_or(self.secondary_dimension.field_name, domain)) if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.secondary_dimension.key] = domain if labels is not None: domain_labels[self.secondary_dimension.key] = labels # Render a table table = self.render(queryset) if self.mode == "enable_others" and queryset_for_others is not None: # adding others to the results table_for_others = self.render_others(queryset_for_others, domains, primary_flag, secondary_flag) table = list(table) table.extend(table_for_others) results = { 'table': table, 'domains': domains, 'domain_labels': domain_labels } if max_page is not None: results['max_page'] = max_page else: domains = {} domain_labels = {} max_page = None queryset_for_others = None # flag is true if the dimension is categorical and has more than MAX_CATEGORICAL_LEVELS levels primary_flag = False secondary_flag = False primary_filter = None secondary_filter = None primary_exclude = None secondary_exclude = None queryset = dataset.message_set.all() queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if dimension == self.primary_dimension: primary_filter = filter if dimension == self.secondary_dimension: secondary_filter = filter if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) if dimension == self.primary_dimension: primary_exclude = exclude_filter if dimension == self.secondary_dimension: secondary_exclude = exclude_filter queryset_all = queryset #queryset = corpus_models.Message.objects.none() group_querysets = [] group_labels = [] #message_list = set() for group in groups: group_obj = groups_models.Group.objects.get(id=group) if group_obj.order > 0: group_labels.append("#%d %s" % (group_obj.order, group_obj.name)) else: group_labels.append("%s" % (group_obj.name)) queryset = group_obj.messages # Filter out null time queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter( time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) unfiltered_queryset = queryset # Filter the data (look for filters on the primary/secondary dimensions at the same time if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) group_querysets.append(queryset) ######################################################################################################################### # deal with union distribution # This is due to union of queries in django does not work... # super ugly. Refactoring is required. # Include the domains for primary and (secondary) dimensions domain, labels = self.groups_domain(self.primary_dimension, queryset_all, group_querysets) # paging the first dimension, this is for the filter distribution if primary_filter is None and self.secondary_dimension is None and page is not None: if search_key is not None: domain, labels = self.filter_search_key( domain, labels, search_key) start = (page - 1) * page_size end = min(start + page_size, len(domain)) max_page = (len(domain) / page_size) + 1 # no level left if len(domain) == 0 or start > len(domain): return None domain = domain[start:end] if labels is not None: labels = labels[start:end] else: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical() and len(domain) > MAX_CATEGORICAL_LEVELS: primary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.primary_dimension.key] = domain if labels is not None: domain_labels[self.primary_dimension.key] = labels if self.secondary_dimension: domain, labels = self.groups_domain(self.secondary_dimension, queryset_all, group_querysets) if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical() and \ len(domain) > MAX_CATEGORICAL_LEVELS: secondary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.secondary_dimension.key] = domain if labels is not None: domain_labels[self.secondary_dimension.key] = labels ######################################################################################################################### group_tables = [] for queryset in group_querysets: queryset_for_others = queryset if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical(): queryset = queryset.filter( utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) if self.secondary_dimension: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical(): if queryset_for_others is None: queryset_for_others = queryset queryset = queryset.filter( utils.levels_or( self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) # Render a table if self.primary_dimension.key == "words": table = group_messages_by_words_with_raw_query( utils.quote(str(queryset.query)), fetchall_table) else: table = self.render(queryset) if self.mode == "enable_others" and queryset_for_others is not None: # adding others to the results table_for_others = self.render_others( queryset_for_others, domains, primary_flag, secondary_flag) table = list(table) table.extend(table_for_others) group_tables.append(table) if self.secondary_dimension is None: final_table = [] for idx, group_table in enumerate(group_tables): for item in group_table: item['groups'] = groups[idx] final_table.extend(group_table) domains['groups'] = groups domain_labels['groups'] = group_labels results = { 'table': final_table, 'domains': domains, 'domain_labels': domain_labels } else: tables = [] for idx, group_table in enumerate(group_tables): tables.append({ 'group_id': groups[idx], 'group_name': group_labels[idx], 'table': group_table }) results = { 'tables': tables, 'domains': domains, 'domain_labels': domain_labels } if max_page is not None: results['max_page'] = max_page return results