def __call__(self, pipe): """Creates a wrapper that allows a sync/async pipe to processes a stream of items Args: pipe (func): A function of 4 args (stream, objconf, tuples) and a `**kwargs`. TODO: document args & kwargs. Returns: func: A function of 1 arg (items) and a `**kwargs`. Examples: >>> from riko.bado import react, _issync >>> from riko.bado.mock import FakeReactor >>> from riko.bado.util import maybeDeferred >>> >>> opts = { ... 'ftype': 'text', 'extract': 'times', 'listize': True, ... 'pdictize': False, 'emit': True, 'field': 'content', ... 'objectify': False} ... >>> wrapper = operator(**opts) >>> >>> def pipe1(stream, objconf, tuples, **kwargs): ... for content, times in tuples: ... value = 'say "%s" %s times!' % (content, times[0]) ... yield {kwargs['assign']: value} ... >>> def pipe2(stream, objconf, tuples, **kwargs): ... word_cnt = sum(len(content.split()) for content in stream) ... return {kwargs['assign']: word_cnt} ... >>> wrapped_pipe1 = wrapper(pipe1) >>> wrapped_pipe2 = wrapper(pipe2) >>> items = [{'content': 'hello world'}, {'content': 'bye world'}] >>> kwargs = {'conf': {'times': 'three'}, 'assign': 'content'} >>> response = {'content': 'say "hello world" three times!'} >>> >>> next(wrapped_pipe1(items, **kwargs)) == response True >>> next(wrapped_pipe2(items, **kwargs)) == {'content': 4} True >>> async_wrapper = operator(isasync=True, **opts) >>> >>> # async pipes don't have to return a deffered, >>> # they work fine either way >>> def async_pipe1(stream, objconf, tuples, **kwargs): ... for content, times in tuples: ... value = 'say "%s" %s times!' % (content, times[0]) ... yield {kwargs['assign']: value} ... >>> # this is an admittedly contrived example to show how you would >>> # call an async function >>> @coroutine ... def async_pipe2(stream, objconf, tuples, **kwargs): ... words = (len(content.split()) for content in stream) ... word_cnt = yield maybeDeferred(sum, words) ... return_value({kwargs['assign']: word_cnt}) ... >>> wrapped_async_pipe1 = async_wrapper(async_pipe1) >>> wrapped_async_pipe2 = async_wrapper(async_pipe2) >>> >>> @coroutine ... def run(reactor): ... r1 = yield wrapped_async_pipe1(items, **kwargs) ... print(next(r1) == response) ... r2 = yield wrapped_async_pipe2(items, **kwargs) ... print(next(r2) == {'content': 4}) ... >>> if _issync: ... True ... True ... else: ... try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass True True """ @wraps(pipe) def wrapper(items=None, **kwargs): module_name = wrapper.__module__.split('.')[-1] wrapper.__dict__['name'] = module_name defaults = { 'dictize': True, 'ftype': 'pass', 'ptype': 'pass', 'objectify': True, 'emit': True, 'assign': module_name} combined = cdicts(self.defaults, defaults, self.opts, kwargs) extracted = 'extract' in combined pdictize = combined.get('listize') if extracted else True combined.setdefault('pdictize', pdictize) conf = {k: combined[k] for k in self.defaults} conf.update(kwargs.get('conf', {})) combined.update({'conf': conf}) # replace conf with dictized version so we can access its # attributes even if we already extracted a value updates = {'conf': DotDict(conf), 'assign': combined.get('assign')} kwargs.update(updates) items = items or iter([]) _INPUT = map(DotDict, items) if combined.get('dictize') else items bfuncs = get_broadcast_funcs(**combined) types = {combined['ftype'], combined['ptype']} if types.difference({'pass', 'none'}): dfuncs = get_dispatch_funcs(**combined) else: dfuncs = None pairs = (dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT) parsed, _ = dispatch(DotDict(), bfuncs, dfuncs=dfuncs) # - operators can't skip items # - purposely setting both variables to maps of the same iterable # since only one is intended to be used at any given time # - `tuples` is an iterator of tuples of the first two `parsed` # elements tuples = ((p[0][0], p[0][1]) for p in pairs) orig_stream = (p[0][0] for p in pairs) objconf = parsed[1] if self.async: stream = yield pipe(orig_stream, objconf, tuples, **kwargs) else: stream = pipe(orig_stream, objconf, tuples, **kwargs) sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer' wrapper.__dict__['sub_type'] = sub_type # operators can only assign one value per item and can't skip items _, assignment = get_assignment(stream, False, **combined) if combined.get('emit'): stream = assignment else: singles = (iter([v]) for v in assignment) key = combined.get('assign') assigned = (assign({}, s, key, one=True) for s in singles) stream = utils.multiplex(assigned) if self.async: return_value(stream) else: for s in stream: yield s wrapper.__dict__['type'] = 'operator' return coroutine(wrapper) if self.async else wrapper
def __call__(self, pipe): """Creates a wrapper that allows a sync/async pipe to processes a stream of items Args: pipe (func): A function of 4 args (stream, objconf, tuples) and a `**kwargs`. TODO: document args & kwargs. Returns: func: A function of 1 arg (items) and a `**kwargs`. Examples: >>> from riko.bado import react, _issync >>> from riko.bado.mock import FakeReactor >>> from riko.bado.util import maybeDeferred >>> >>> opts = { ... 'ftype': 'text', 'extract': 'times', 'listize': True, ... 'pdictize': False, 'emit': True, 'field': 'content', ... 'objectify': False} ... >>> wrapper = operator(**opts) >>> >>> def pipe1(stream, objconf, tuples, **kwargs): ... for content, times in tuples: ... value = 'say "%s" %s times!' % (content, times[0]) ... yield {kwargs['assign']: value} ... >>> def pipe2(stream, objconf, tuples, **kwargs): ... word_cnt = _sum(len(content.split()) for content in stream) ... return {kwargs['assign']: word_cnt} ... >>> wrapped_pipe1 = wrapper(pipe1) >>> wrapped_pipe2 = wrapper(pipe2) >>> items = [{'content': 'hello world'}, {'content': 'bye world'}] >>> kwargs = {'conf': {'times': 'three'}, 'assign': 'content'} >>> response = {'content': 'say "hello world" three times!'} >>> >>> next(wrapped_pipe1(items, **kwargs)) == response True >>> next(wrapped_pipe2(items, **kwargs)) == {'content': 4} True >>> async_wrapper = operator(isasync=True, **opts) >>> >>> # async pipes don't have to return a deffered, >>> # they work fine either way >>> def async_pipe1(stream, objconf, tuples, **kwargs): ... for content, times in tuples: ... value = 'say "%s" %s times!' % (content, times[0]) ... yield {kwargs['assign']: value} ... >>> # this is an admittedly contrived example to show how you would >>> # call an async function >>> @coroutine ... def async_pipe2(stream, objconf, tuples, **kwargs): ... words = (len(content.split()) for content in stream) ... word_cnt = yield maybeDeferred(_sum, words) ... return_value({kwargs['assign']: word_cnt}) ... >>> wrapped_async_pipe1 = async_wrapper(async_pipe1) >>> wrapped_async_pipe2 = async_wrapper(async_pipe2) >>> >>> @coroutine ... def run(reactor): ... r1 = yield wrapped_async_pipe1(items, **kwargs) ... print(next(r1) == response) ... r2 = yield wrapped_async_pipe2(items, **kwargs) ... print(next(r2) == {'content': 4}) ... >>> if _issync: ... True ... True ... else: ... try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass True True """ @wraps(pipe) def wrapper(items=None, **kwargs): module_name = wrapper.__module__.split('.')[-1] wrapper.__dict__['name'] = module_name defaults = { 'dictize': True, 'ftype': 'pass', 'ptype': 'pass', 'objectify': True, 'emit': True, 'assign': module_name } combined = cdicts(self.defaults, defaults, self.opts, kwargs) extracted = 'extract' in combined pdictize = combined.get('listize') if extracted else True combined.setdefault('pdictize', pdictize) conf = {k: combined[k] for k in self.defaults} conf.update(kwargs.get('conf', {})) combined.update({'conf': conf}) # replace conf with dictized version so we can access its # attributes even if we already extracted a value updates = {'conf': DotDict(conf), 'assign': combined.get('assign')} kwargs.update(updates) items = items or iter([]) _INPUT = map(DotDict, items) if combined.get('dictize') else items bfuncs = get_broadcast_funcs(**combined) types = {combined['ftype'], combined['ptype']} if types.difference({'pass', 'none'}): dfuncs = get_dispatch_funcs(**combined) else: dfuncs = None pairs = (dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT) parsed, _ = dispatch(DotDict(), bfuncs, dfuncs=dfuncs) # - operators can't skip items # - purposely setting both variables to maps of the same iterable # since only one is intended to be used at any given time # - `tuples` is an iterator of tuples of the first two `parsed` # elements tuples = ((p[0][0], p[0][1]) for p in pairs) orig_stream = (p[0][0] for p in pairs) objconf = parsed[1] if self. async: stream = yield pipe(orig_stream, objconf, tuples, **kwargs) else: stream = pipe(orig_stream, objconf, tuples, **kwargs) sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer' wrapper.__dict__['sub_type'] = sub_type # operators can only assign one value per item and can't skip items _, assignment = get_assignment(stream, False, **combined) if combined.get('emit'): stream = assignment else: singles = (iter([v]) for v in assignment) key = combined.get('assign') assigned = (assign({}, s, key, one=True) for s in singles) stream = utils.multiplex(assigned) if self. async: return_value(stream) else: for s in stream: yield s wrapper.__dict__['type'] = 'operator' return coroutine(wrapper) if self. async else wrapper
def __call__(self, pipe): """Creates a sync/async pipe that processes individual items Args: pipe (Iter[dict]): The entry to process Yields: dict: item Returns: Deferred: twisted.internet.defer.Deferred generator of items Examples: >>> from riko.bado import react, _issync >>> from riko.bado.mock import FakeReactor >>> >>> kwargs = { ... 'ftype': 'text', 'extract': 'times', 'listize': True, ... 'pdictize': False, 'emit': True, 'field': 'content', ... 'objectify': False} ... >>> @processor(**kwargs) ... def pipe(content, times, skip, **kwargs): ... if skip: ... stream = kwargs['stream'] ... else: ... value = 'say "%s" %s times!' % (content, times[0]) ... stream = {kwargs['assign']: value} ... ... return stream, skip ... >>> # async pipes don't have to return a deffered, >>> # they work fine either way >>> @processor(isasync=True, **kwargs) ... def async_pipe(content, times, skip, **kwargs): ... if skip: ... stream = kwargs['stream'] ... else: ... value = 'say "%s" %s times!' % (content, times[0]) ... stream = {kwargs['assign']: value} ... ... return stream, skip ... >>> item = {'content': 'hello world'} >>> kwargs = {'conf': {'times': 'three'}, 'assign': 'content'} >>> response = {'content': 'say "hello world" three times!'} >>> next(pipe(item, **kwargs)) == response True >>> >>> def run(reactor): ... callback = lambda x: print(next(x) == response) ... d = async_pipe(item, **kwargs) ... return d.addCallbacks(callback, logger.error) ... >>> if _issync: ... True ... else: ... try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass True """ @wraps(pipe) def wrapper(item=None, **kwargs): module_name = wrapper.__module__.split('.')[-1] defaults = { 'dictize': True, 'ftype': 'pass', 'ptype': 'pass', 'objectify': True} combined = cdicts(self.defaults, defaults, self.opts, kwargs) is_source = combined['ftype'] == 'none' def_assign = 'content' if is_source else module_name extracted = 'extract' in combined pdictize = combined.get('listize') if extracted else True combined.setdefault('assign', def_assign) combined.setdefault('emit', is_source) combined.setdefault('pdictize', pdictize) conf = {k: combined[k] for k in self.defaults} conf.update(kwargs.get('conf', {})) combined.update({'conf': conf}) # replace conf with dictized version so we can access its # attributes even if we already extracted a value updates = {'conf': DotDict(conf), 'assign': combined.get('assign')} kwargs.update(updates) item = item or {} _input = DotDict(item) if combined.get('dictize') else item bfuncs = get_broadcast_funcs(**combined) types = {combined['ftype'], combined['ptype']} if types.difference({'pass', 'none'}): dfuncs = get_dispatch_funcs(**combined) else: dfuncs = None parsed, orig_item = dispatch(_input, bfuncs, dfuncs=dfuncs) if self.async: stream, skip = yield pipe(*parsed, stream=orig_item, **kwargs) else: stream, skip = pipe(*parsed, stream=orig_item, **kwargs) one, assignment = get_assignment(stream, skip, **combined) if skip or combined.get('emit'): stream = assignment elif not skip: key = combined.get('assign') stream = assign(_input, assignment, key, one=one) if self.async: return_value(stream) else: for s in stream: yield s is_source = self.opts.get('ftype') == 'none' wrapper.__dict__['name'] = wrapper.__module__.split('.')[-1] wrapper.__dict__['type'] = 'processor' wrapper.__dict__['sub_type'] = 'source' if is_source else 'transformer' return coroutine(wrapper) if self.async else wrapper
def __call__(self, pipe): """Creates a sync/async pipe that processes individual items Args: pipe (Iter[dict]): The entry to process Yields: dict: item Returns: Deferred: twisted.internet.defer.Deferred generator of items Examples: >>> from riko.bado import react, _issync >>> from riko.bado.mock import FakeReactor >>> >>> kwargs = { ... 'ftype': 'text', 'extract': 'times', 'listize': True, ... 'pdictize': False, 'emit': True, 'field': 'content', ... 'objectify': False} ... >>> @processor(**kwargs) ... def pipe(content, times, skip, **kwargs): ... if skip: ... stream = kwargs['stream'] ... else: ... value = 'say "%s" %s times!' % (content, times[0]) ... stream = {kwargs['assign']: value} ... ... return stream, skip ... >>> # async pipes don't have to return a deffered, >>> # they work fine either way >>> @processor(isasync=True, **kwargs) ... def async_pipe(content, times, skip, **kwargs): ... if skip: ... stream = kwargs['stream'] ... else: ... value = 'say "%s" %s times!' % (content, times[0]) ... stream = {kwargs['assign']: value} ... ... return stream, skip ... >>> item = {'content': 'hello world'} >>> kwargs = {'conf': {'times': 'three'}, 'assign': 'content'} >>> response = {'content': 'say "hello world" three times!'} >>> next(pipe(item, **kwargs)) == response True >>> >>> def run(reactor): ... callback = lambda x: print(next(x) == response) ... d = async_pipe(item, **kwargs) ... return d.addCallbacks(callback, logger.error) ... >>> if _issync: ... True ... else: ... try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass True """ @wraps(pipe) def wrapper(item=None, **kwargs): module_name = wrapper.__module__.split('.')[-1] defaults = { 'dictize': True, 'ftype': 'pass', 'ptype': 'pass', 'objectify': True } combined = cdicts(self.defaults, defaults, self.opts, kwargs) is_source = combined['ftype'] == 'none' def_assign = 'content' if is_source else module_name extracted = 'extract' in combined pdictize = combined.get('listize') if extracted else True combined.setdefault('assign', def_assign) combined.setdefault('emit', is_source) combined.setdefault('pdictize', pdictize) conf = {k: combined[k] for k in self.defaults} conf.update(kwargs.get('conf', {})) combined.update({'conf': conf}) # replace conf with dictized version so we can access its # attributes even if we already extracted a value updates = {'conf': DotDict(conf), 'assign': combined.get('assign')} kwargs.update(updates) item = item or {} _input = DotDict(item) if combined.get('dictize') else item bfuncs = get_broadcast_funcs(**combined) types = {combined['ftype'], combined['ptype']} if types.difference({'pass', 'none'}): dfuncs = get_dispatch_funcs(**combined) else: dfuncs = None parsed, orig_item = dispatch(_input, bfuncs, dfuncs=dfuncs) if self. async: stream, skip = yield pipe(*parsed, stream=orig_item, **kwargs) else: stream, skip = pipe(*parsed, stream=orig_item, **kwargs) one, assignment = get_assignment(stream, skip, **combined) if skip or combined.get('emit'): stream = assignment elif not skip: key = combined.get('assign') stream = assign(_input, assignment, key, one=one) if self. async: return_value(stream) else: for s in stream: yield s is_source = self.opts.get('ftype') == 'none' wrapper.__dict__['name'] = wrapper.__module__.split('.')[-1] wrapper.__dict__['type'] = 'processor' wrapper.__dict__['sub_type'] = 'source' if is_source else 'transformer' return coroutine(wrapper) if self. async else wrapper