示例#1
0
def test_strip_split():
    ' args: item '
    from metriqueu.utils import strip_split

    a_lst = ['a', 'b', 'c', 'd', 'e']
    a_str = 'a, b,     c,    d , e'
    assert strip_split(a_str) == a_lst
    assert strip_split(None) == []
    assert strip_split(a_lst) == a_lst

    try:
        strip_split({})
    except TypeError:
        pass
示例#2
0
    def extract(self, exclude_fields=None, force=False,
                last_update=None, parse_timestamp=None, **kwargs):
        '''
        Extract routine for SQL based cubes.

        ... docs coming soon ...

        :param force:
            If False (default), then it will try to extract only the objects
            that have changed since the last extract.
            If True, then it will try to extract all the objects.
            If it is a list of oids, then it will try to extract only those
            objects with oids from the list.

        Accept, but ignore unknown kwargs.
        '''
        if parse_timestamp is None:
            parse_timestamp = self.get_property('parse_timestamp', None, True)

        exclude_fields = strip_split(exclude_fields)

        oids = []

        if force is True:
            # get a list of all known object ids
            table = self.get_property('table')
            _id = self.get_property('column')
            sql = 'SELECT DISTINCT %s.%s FROM %s.%s' % (table, _id, self.db,
                                                        table)
            rows = self.proxy.fetchall(sql)
            oids = self._extract_row_ids(rows)

        if force is False and self.get_property('delta', None, True):
            # include objects updated since last mtime too
            # apply delta sql clause's if we're not forcing a full run
            if self.get_property('delta_mtime', None, False):
                oids.extend(self._get_mtime_id_delta(last_update,
                                                     parse_timestamp))
            if self.get_property('delta_new_ids', None, True):
                oids.extend(self._get_new_ids())

        if isinstance(force, list):
            oids = force

        oids = sorted(set(oids))

        # this is to set the 'index' of sql columns so we can extract
        # out the sql rows and know which column : field
        field_order = list(set(self.fields) - set(exclude_fields))

        if self.config.batch_size <= 0:
            return self._extract(oids, field_order)
        else:
            return self._extract_threaded(oids, field_order)
示例#3
0
 def get_fields(self, owner, cube, fields=None):
     '''
     Return back a dict of (field, 0/1) pairs, where
     the matching fields have 1.
     '''
     if not (owner and cube):
         self._raise(400, "owner and cube required")
     logger.debug('... fields: %s' % fields)
     if fields in ['__all__', '~']:
         # None will make pymongo return back entire objects
         _fields = None
     else:
         # to return `_id`, it must be included in fields
         _fields = {'_id': 0, '_oid': 1, '_start': 1, '_end': 1}
         _split_fields = [f for f in strip_split(fields)]
         _fields.update(dict([(f, 1) for f in set(_split_fields)]))
     return _fields
示例#4
0
    def get_fields(self, owner, cube, fields=None):
        '''
        Return back a dict of (field, 0/1) pairs, where
        the matching fields have 1.

        :param cube: cube name
        :param owner: username of cube owner
        :param fields: list of fields to query
        '''
        if not (owner and cube):
            self._raise(400, "owner and cube required")
        logger.debug('... fields: %s' % fields)
        if fields in ['__all__', '~']:
            # None indicates a request should return back whole objs
            _fields = None
        else:
            # to return `_id`, it must be included in fields
            _fields = {'_id': 0, '_oid': 1, '_start': 1, '_end': 1}
            _split_fields = [f for f in strip_split(fields)]
            _fields.update(dict([(f, 1) for f in set(_split_fields)]))
        return _fields