Exemplo n.º 1
0
    def _extract_key_values(self, dict_, unique_keys):
        '''\
        Extract the values for the keys in ``unique_keys``
        from the ``dict_``.

        ``dict_``
             A dict like object
        ``unique_keys``
             A list of strings

        Returns: A ``tuple`` of values of the unique_keys.

        Raises: :exc:`KeyError` if a unique column has no value.
        '''
        res = []

        for k in unique_keys:
            v = deep_get(dict_, k)
            # deep_get doesn't raise KeyErrors, so do it here.
            if v is None:
                raise KeyError("Unique key %s missing from dict: %s" % (k, dict_))
            else:
                res.append(v)

        return tuple(res)
Exemplo n.º 2
0
 def sorted_extract(drilldown):
     extracted = []
     for cell in drilldown:
         cell_extract = []
         for key in ('from.name', 'to.name', 'num_entries', 'amount'):
             cell_extract.append(deep_get(cell, key))
         extracted.append(cell_extract)
     return sorted(extracted)
Exemplo n.º 3
0
    def _cell_id_for_row(self, row, query_dimensions):
        cell_keys = []

        for dimension in query_dimensions:
            value = util.deep_get(row, dimension)

            if isinstance(value, dict):
                from_day = util.deep_get(value, 'from.day')
                if from_day:
                    cell_keys.append(from_day)
                    cell_keys.append(util.deep_get(value, 'to.day'))
                elif '_id' in value:
                    cell_keys.append(value['_id'])
                elif 'name' in value:
                    cell_keys.append(value['name'])
            else:
                cell_keys.append(value)

        return util.hash_values(map(lambda x: unicode(x).encode('utf8'), cell_keys))
Exemplo n.º 4
0
def assert_order(result, keys, expect):
    results = []
    for key in keys:
        results.append([deep_get(cell, key) for
                        cell in result['drilldown']])
    if len(results) == 1:
        result = results[0]
    else:
        result = zip(*results)
    h.assert_equal(result, expect,
                   'Not the expected order. result: %s, expected: %s' %
                   (result, expect))
Exemplo n.º 5
0
        def make_new_cell(cell_id):
            new_cell = {'_id': cell_id}
            for key in query_dimensions:
                # handle dates specially, collect year and month
                if key == 'time':
                    if 'year' in used_time_dimensions:
                        value = int(util.deep_get(row, 'time.from.year'))
                        new_cell['year'] = value
                    if 'month' in used_time_dimensions:
                        value = int(util.deep_get(row, 'time.from.month')[-2:])
                        new_cell['month'] = value
                    continue

                value = util.deep_get(row, key)
                if isinstance(value, dict):
                    from_day = util.deep_get(value, 'from.day')
                    if from_day:
                        new_cell[key] = row[key]
                        continue

                if isinstance(value, dict):
                    subdict = {}
                    for subkey in ('name', 'label', 'color',
                                   '_id', 'ref', 'taxonomy'):
                        if subkey in value:
                            subdict[subkey] = value[subkey]
                    if not subdict.get('name'):
                        # create a name so we can rely on it,
                        # e.g. in queries
                        subdict['name'] = str(subdict['_id'])

                    new_cell[key] = subdict
                elif isinstance(value, self.simpletypes):
                    new_cell[key] = value
            new_cell['amount'] = row.get('amount', 0.0)
            new_cell['num_entries'] = 1
            return new_cell
Exemplo n.º 6
0
    def _sort(self, cells, order):
        '''
        sort the *cells* by one or more *order* criteria.

        ``cells``
            A list of cells
        ``order``
            See :meth:`query`

        Returns: The sorted `list` of cells
        '''
        if order is not None:
            for (dimension, direction) in reversed(order):
                key_getter = lambda cell: deep_get(cell, dimension)
                cells = sorted(cells, key=key_getter, reverse=direction)
        return cells
Exemplo n.º 7
0
    def compute(self):
        """
        Create the cube. This processes all entries of the dataset,
        aggregates cells based on the dimensions of the cube and
        saves them into a mongodb collection in the cubes namespace.
        """
        log.debug("compute cube for dataset '%s', cube name: '%s', " \
                  "dimensions: '%s'",
                  self.dataset.name, self.name, ', '.join(self.dimensions))
        begin = time.time()

        # query fields: We query for all fields, but handle the date
        # have to query for 'time' if dates are involved.
        # time is a required field for entries, and some datasets
        # add a dimension for time, others don't.
        # If we specify cubes, we do it with 'year' (and maybe 'month')
        query_dimensions = set(self.dimensions)
        used_time_dimensions = query_dimensions.intersection(['year', 'month'])
        additional_dimensions = ['amount']
        if used_time_dimensions:
            query_dimensions = query_dimensions - used_time_dimensions
            additional_dimensions.append('time')
        query_dimensions = query_dimensions.union(additional_dimensions)
        cursor = _aggregation_query(self.dataset, {},
                                    fields=list(query_dimensions),
                                    as_class=dict)

        cells = {}
        for row in cursor:
            cell_key_values = []
            for dimension in query_dimensions:
                value = deep_get(row, dimension)
                if isinstance(value, dict):
                    from_day = deep_get(value, 'from.day')
                    if from_day:
                        value = (from_day, deep_get(value, 'to.day'))
                    elif '_id' in value:
                        value = str(value['_id'])
                    elif 'name' in value:
                        value = value['name']
                cell_key_values.append(value)
            cell_key = tuple(cell_key_values)

            try:
                cell = cells.get(cell_key, None)
            except TypeError:
                raise AssertionError("Value must be hash()able: %s" %
                                     repr(cell_key))

            if cell is None:
                new_cell = {}
                for key in query_dimensions:
                    # handle dates especially, collect year and month
                    if key == 'time':
                        if 'year' in used_time_dimensions:
                            value = int(deep_get(row, 'time.from.year'))
                            new_cell['year'] = value
                        if 'month' in used_time_dimensions:
                            value = int(deep_get(row, 'time.from.month')[-2:])
                            new_cell['month'] = value
                        continue

                    value = deep_get(row, key)
                    if isinstance(value, dict):
                        from_day = deep_get(value, 'from.day')
                        if from_day:
                            new_cell[key] = row[key]
                            continue

                    if isinstance(value, dict):
                        subdict = {}
                        for subkey in ('name', 'label', 'color',
                                       '_id', 'ref', 'taxonomy'):
                            if subkey in value:
                                subdict[subkey] = value[subkey]
                        if not subdict.get('name'):
                            # create a name so we can rely on it,
                            # e.g. in queries
                            subdict['name'] = subdict['_id']

                        new_cell[key] = subdict
                    elif isinstance(value, self.simpletypes):
                        new_cell[key] = value
                # if the row has no amount set 0.0
                amount = row.get('amount')
                new_cell['amount'] = amount and amount or 0.0
                # new_cell['entries'] = [row['_id']]
                new_cell['num_entries'] = 1
                cells[cell_key] = new_cell
            else:
                cell['amount'] += row.get('amount', 0.0)
                cell['num_entries'] += 1
                # cell['entries'].append(row['_id'])

        # remove a collection if there is one
        if self.is_computed():
            self.db.drop_collection(self.collection_name)
        collection = self.db[self.collection_name]
        for cell in cells.itervalues():
            collection.insert(cell)

        #for dimension in query_dimensions.union(used_time_dimensions):
        #    collection.ensure_index([(dimension, ASCENDING)])
        #    collection.ensure_index([(dimension, DESCENDING)])

        self.dataset['cubes'][self.name]['num_cells'] = len(cells)
        self.dataset.save()
        log.debug("Done. Took: %ds", int(time.time() - begin))