def parser(stream, objconf, tuples, **kwargs): """ Parses the pipe content Args: stream (Iter[dict]): The source. Note: this shares the `tuples` iterator, so consuming it will consume `tuples` as well. objconf (obj): the item independent configuration (an Objectify instance). tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) `item` is an element in the source stream and `objconf` is the item configuration (an Objectify instance). Note: this shares the `stream` iterator, so consuming it will consume `stream` as well. kwargs (dict): Keyword arguments. Kwargs: others (List[Iter(dict)]): List of streams to join Returns: Iter(dict): The output stream Examples: >>> from itertools import repeat >>> >>> stream = ({'x': x} for x in range(5)) >>> other1 = ({'x': x + 5} for x in range(5)) >>> other2 = ({'x': x + 10} for x in range(5)) >>> kwargs = {'others': [other1, other2]} >>> tuples = zip(stream, repeat(None)) >>> len(list(parser(stream, None, tuples, **kwargs))) 15 """ return chain(stream, multiplex(kwargs['others']))
def output(self): source = yield self.source async_pipeline = partial(self.async_pipe, **self.kwargs) if self.mapify: args = (async_pipeline, source, self.connections) mapped = yield ait.async_map(*args) output = multiplex(mapped) else: output = yield async_pipeline(source) return_value(output)
def output(self): pipeline = partial(self.pipe, **self.kwargs) if self.parallelize: zipped = zip(self.source, repeat(pipeline)) mapped = self.map(listpipe, zipped, chunksize=self.chunksize) elif self.mapify: mapped = self.map(pipeline, self.source) if self.parallelize and not self.reuse_pool: self.pool.close() self.pool.join() return multiplex(mapped) if self.mapify else pipeline(self.source)
def wrapper(items=None, **kwargs): module_name = wrapper.__module__.split('.')[-1] wrapper.__dict__['name'] = module_name defaults = { 'dictize': True, 'ftype': 'pass', 'ptype': 'pass', 'objectify': True, 'emit': True, 'assign': module_name} combined = cdicts(self.defaults, defaults, self.opts, kwargs) extracted = 'extract' in combined pdictize = combined.get('listize') if extracted else True combined.setdefault('pdictize', pdictize) conf = {k: combined[k] for k in self.defaults} conf.update(kwargs.get('conf', {})) combined.update({'conf': conf}) # replace conf with dictized version so we can access its # attributes even if we already extracted a value updates = {'conf': DotDict(conf), 'assign': combined.get('assign')} kwargs.update(updates) items = items or iter([]) _INPUT = map(DotDict, items) if combined.get('dictize') else items bfuncs = get_broadcast_funcs(**combined) types = {combined['ftype'], combined['ptype']} if types.difference({'pass', 'none'}): dfuncs = get_dispatch_funcs(**combined) else: dfuncs = None pairs = (dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT) parsed, _ = dispatch(DotDict(), bfuncs, dfuncs=dfuncs) # - operators can't skip items # - purposely setting both variables to maps of the same iterable # since only one is intended to be used at any given time # - `tuples` is an iterator of tuples of the first two `parsed` # elements tuples = ((p[0][0], p[0][1]) for p in pairs) orig_stream = (p[0][0] for p in pairs) objconf = parsed[1] if self.async: stream = yield pipe(orig_stream, objconf, tuples, **kwargs) else: stream = pipe(orig_stream, objconf, tuples, **kwargs) sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer' wrapper.__dict__['sub_type'] = sub_type # operators can only assign one value per item and can't skip items _, assignment = get_assignment(stream, False, **combined) if combined.get('emit'): stream = assignment else: singles = (iter([v]) for v in assignment) key = combined.get('assign') assigned = (assign({}, s, key, one=True) for s in singles) stream = utils.multiplex(assigned) if self.async: return_value(stream) else: for s in stream: yield s
def fetch(self): """Fetch all source urls""" kwargs = {'chunksize': self.chunksize} if self.parallel else {} mapped = self.map(getpipe, self.zargs, **kwargs) return multiplex(mapped)
def async_fetch(self): """Fetch all source urls""" args = (async_get_pipe, self.zargs, self.connections) mapped = yield ait.async_map(*args) return_value(multiplex(mapped))
def wrapper(items=None, **kwargs): module_name = wrapper.__module__.split('.')[-1] wrapper.__dict__['name'] = module_name defaults = { 'dictize': True, 'ftype': 'pass', 'ptype': 'pass', 'objectify': True, 'emit': True, 'assign': module_name } combined = cdicts(self.defaults, defaults, self.opts, kwargs) extracted = 'extract' in combined pdictize = combined.get('listize') if extracted else True combined.setdefault('pdictize', pdictize) conf = {k: combined[k] for k in self.defaults} conf.update(kwargs.get('conf', {})) combined.update({'conf': conf}) # replace conf with dictized version so we can access its # attributes even if we already extracted a value updates = {'conf': DotDict(conf), 'assign': combined.get('assign')} kwargs.update(updates) items = items or iter([]) _INPUT = map(DotDict, items) if combined.get('dictize') else items bfuncs = get_broadcast_funcs(**combined) types = {combined['ftype'], combined['ptype']} if types.difference({'pass', 'none'}): dfuncs = get_dispatch_funcs(**combined) else: dfuncs = None pairs = (dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT) parsed, _ = dispatch(DotDict(), bfuncs, dfuncs=dfuncs) # - operators can't skip items # - purposely setting both variables to maps of the same iterable # since only one is intended to be used at any given time # - `tuples` is an iterator of tuples of the first two `parsed` # elements tuples = ((p[0][0], p[0][1]) for p in pairs) orig_stream = (p[0][0] for p in pairs) objconf = parsed[1] if self. async: stream = yield pipe(orig_stream, objconf, tuples, **kwargs) else: stream = pipe(orig_stream, objconf, tuples, **kwargs) sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer' wrapper.__dict__['sub_type'] = sub_type # operators can only assign one value per item and can't skip items _, assignment = get_assignment(stream, False, **combined) if combined.get('emit'): stream = assignment else: singles = (iter([v]) for v in assignment) key = combined.get('assign') assigned = (assign({}, s, key, one=True) for s in singles) stream = utils.multiplex(assigned) if self. async: return_value(stream) else: for s in stream: yield s