def eval(self, env): source_result = self.source.eval(env) def iterate(env=env, source_result=source_result): # Python closure workaround if self.name: for item in source_result: yield self.body.eval(env.bind(self.name, item)) else: for item in source_result: yield self.body.eval(env.replace(item)) return RepeatableIterator(iterate)
def test_iteration(self): class LazinessException(Exception): pass def test1(): for i in range(1, 100): yield i def test2(): for i in range(1, 100): if i > 10: raise LazinessException('Not lazy enough') yield i # First make sure that we've properly set up a situation that fails # without RepeatableIterator iterator = test1() assert list(iterator) == list(range(1, 100)) assert list(iterator) == [] # Now test that the RepeatableIterator restores functionality iterator = RepeatableIterator(test1) assert list(iterator) == list(range(1, 100)) assert list(iterator) == list(range(1, 100)) assert bool(iterator) is True iterator = RepeatableIterator(lambda: (i for i in [])) assert bool(iterator) is False # Ensure that laziness is maintained iterator = RepeatableIterator(test2) assert list(islice(iterator, 5)) == list(range(1, 6)) try: list(islice(iterator, 15)) raise Exception('Should have failed') except LazinessException: pass
def lookup(self, name): "str|JsonPath -> ??" if isinstance(name, six.string_types): jsonpath_expr = self.parse(name) elif isinstance(name, jsonpath.JSONPath): jsonpath_expr = name else: raise NotFound(unwrap_val(name)) def iter(jsonpath_expr=jsonpath_expr): # Capture closure for datum in jsonpath_expr.find(self.__bindings): # HACK: The auto id from jsonpath_rw is good, but we lose it when we do .value here, # so just slap it on if not present if isinstance(datum.value, dict) and 'id' not in datum.value: datum.value['id'] = jsonpath.AutoIdForDatum(datum).value yield datum return RepeatableIterator(iter)
def iterate(self, resource, paginator, params=None): """ Assumes the endpoint is a list endpoint, and iterates over it making a lot of assumptions that it is like a tastypie endpoint. """ params = dict(params or {}) def iterate_resource(resource=resource, params=params): more_to_fetch = True last_batch_ids = set() while more_to_fetch: fetch_start = datetime.utcnow() batch = self.get(resource, params) total_count = int( batch['meta']['total_count'] ) if batch['meta']['total_count'] else 'unknown' logger.debug( 'Received %s-%s of %s', batch['meta']['offset'], int(batch['meta']['offset']) + int(batch['meta']['limit']), total_count) if not batch['objects']: more_to_fetch = False else: for obj in batch['objects']: if obj['id'] not in last_batch_ids: yield obj if batch['meta']['next']: last_batch_ids = { obj['id'] for obj in batch['objects'] } params = paginator.next_page_params_from_batch(batch) if not params: more_to_fetch = False else: more_to_fetch = False self.checkpoint(fetch_start) return RepeatableIterator(iterate_resource)
def iterate(self, resource, paginator, params=None): """ Iterates through what the API would have been had it been passed in. """ from commcare_export.cli import logger # resource is either 'form' or 'case' # params are api params # (e.g. {'limit': 1000, u'type': u'pregnant_mother', 'order_by': 'server_date_modified'}) params = dict(params or {}) mock_api = _get_mock_api(resource, self.project, params) def iterate_resource(resource=resource, params=params): more_to_fetch = True last_batch_ids = set() count = 0 total_count = mock_api.query_set.count() while more_to_fetch: batch = self.get(mock_api.query_set, count, params) batch_list = [mock_api.serialize(obj) for obj in batch] logger.info('Received {}-{} of {}'.format( count, count + self.limit, total_count)) if not batch_list: more_to_fetch = False else: for obj in batch_list: if obj['id'] not in last_batch_ids: yield obj if count < total_count: last_batch_ids = {obj['id'] for obj in batch_list} count += self.limit else: more_to_fetch = False self.checkpoint(paginator, batch_list) from commcare_export.repeatable_iterator import RepeatableIterator return RepeatableIterator(iterate_resource)
def test_or(self): env = BuiltInEnv() assert Apply(Reference("or"), Literal(None), Literal(2)).eval(env) == 2 laziness_iterator = RepeatableIterator( lambda: (i if i < 1 else die('Not lazy enough') for i in range(2))) assert Apply(Reference("or"), Literal(1), Literal(laziness_iterator)).eval(env) == 1 assert Apply(Reference("or"), Literal(''), Literal(laziness_iterator)).eval(env) == '' assert Apply(Reference("or"), Literal(0), Literal(laziness_iterator)).eval(env) == 0 with pytest.raises(LazinessException): Apply(Reference("or"), Literal(None), Literal(laziness_iterator)).eval(env) env = env | JsonPathEnv({'a': {'c': 'c val'}}) assert Apply(Reference("or"), Reference('a.b'), Reference('a.c')).eval(env) == 'c val' assert Apply(Reference("or"), Reference('a.b'), Reference('a.d')).eval(env) is None
def test_flatmap(self): env = BuiltInEnv() | DictEnv({}) laziness_iterator = RepeatableIterator(lambda: ({ 'a': range(i) } if i < 4 else die('Not lazy enough') for i in range(12))) assert list( FlatMap(source=Literal([{ 'a': [1] }, { 'a': 'foo' }, { 'a': [3, 4] }]), body=Literal( [1, 2, 3])).eval(env)) == [1, 2, 3, 1, 2, 3, 1, 2, 3] assert list( FlatMap(source=Literal([{ 'a': [1] }, { 'a': [2] }, { 'a': [3, 4] }]), body=Reference('a')).eval(env)) == [1, 2, 3, 4] assert list( islice( FlatMap(source=Literal(laziness_iterator), body=Reference('a')).eval(env), 6)) == [0, 0, 1, 0, 1, 2] try: list( FlatMap(source=Literal(laziness_iterator), body=Reference('a')).eval(env)) raise Exception('Should have failed') except LazinessException: pass
def test_map(self): env = BuiltInEnv() | DictEnv({}) laziness_iterator = RepeatableIterator(lambda: ({ 'a': i } if i < 5 else die('Not lazy enough') for i in range(12))) assert list( Map(source=Literal([{ 'a': 1 }, { 'a': 2 }, { 'a': 3 }]), body=Literal(1)).eval(env)) == [1, 1, 1] assert list( Map(source=Literal([{ 'a': 1 }, { 'a': 2 }, { 'a': 3 }]), body=Reference('a')).eval(env)) == [1, 2, 3] assert list( islice( Map(source=Literal(laziness_iterator), body=Reference('a')).eval(env), 5)) == [0, 1, 2, 3, 4] try: list( Map(source=Literal(laziness_iterator), body=Reference('a')).eval(env)) raise Exception('Should have failed') except LazinessException: pass
def emitted_tables(self): return RepeatableIterator(lambda: chain(self.left.emitted_tables(), self.right.emitted_tables()))
def iterate(self, resource, paginator, params=None, checkpoint_manager=None): """ Assumes the endpoint is a list endpoint, and iterates over it making a lot of assumptions that it is like a tastypie endpoint. """ params = dict(params or {}) def iterate_resource(resource=resource, params=params): more_to_fetch = True last_batch_ids = set() total_count = None fetched = 0 repeat_counter = 0 last_params = None while more_to_fetch: if params == last_params: repeat_counter += 1 else: repeat_counter = 0 if repeat_counter >= RESOURCE_REPEAT_LIMIT: raise ResourceRepeatException( "Requested resource '{}' {} times with same parameters" .format(resource, repeat_counter)) batch = self.get(resource, params) last_params = copy.copy(params) if not total_count or total_count == 'unknown' or fetched >= total_count: total_count = int( batch['meta']['total_count'] ) if batch['meta']['total_count'] else 'unknown' fetched = 0 fetched += len(batch['objects']) logger.debug('Received %s of %s', fetched, total_count) if not batch['objects']: more_to_fetch = False else: for obj in batch['objects']: if obj['id'] not in last_batch_ids: yield obj if batch['meta']['next']: last_batch_ids = { obj['id'] for obj in batch['objects'] } params = paginator.next_page_params_from_batch(batch) if not params: more_to_fetch = False else: more_to_fetch = False self.checkpoint(checkpoint_manager, paginator, batch, not more_to_fetch) return RepeatableIterator(iterate_resource)
def default_to_json(obj): if hasattr(obj, 'toJSON'): return obj.toJSON() else: return RepeatableIterator.to_jvalue(obj)