Exemplo n.º 1
0
    def __call__(self, pipe):
        """Creates a wrapper that allows a sync/async pipe to processes a
        stream of items

        Args:
            pipe (func): A function of 4 args (stream, objconf, tuples)
                and a `**kwargs`. TODO: document args & kwargs.

        Returns:
            func: A function of 1 arg (items) and a `**kwargs`.

        Examples:
            >>> from riko.bado import react, _issync
            >>> from riko.bado.mock import FakeReactor
            >>> from riko.bado.util import maybeDeferred
            >>>
            >>> opts = {
            ...     'ftype': 'text', 'extract': 'times', 'listize': True,
            ...     'pdictize': False, 'emit': True, 'field': 'content',
            ...     'objectify': False}
            ...
            >>> wrapper = operator(**opts)
            >>>
            >>> def pipe1(stream, objconf, tuples, **kwargs):
            ...     for content, times in tuples:
            ...         value = 'say "%s" %s times!' % (content, times[0])
            ...         yield {kwargs['assign']: value}
            ...
            >>> def pipe2(stream, objconf, tuples, **kwargs):
            ...     word_cnt = sum(len(content.split()) for content in stream)
            ...     return {kwargs['assign']: word_cnt}
            ...
            >>> wrapped_pipe1 = wrapper(pipe1)
            >>> wrapped_pipe2 = wrapper(pipe2)
            >>> items = [{'content': 'hello world'}, {'content': 'bye world'}]
            >>> kwargs = {'conf':  {'times': 'three'}, 'assign': 'content'}
            >>> response = {'content': 'say "hello world" three times!'}
            >>>
            >>> next(wrapped_pipe1(items, **kwargs)) == response
            True
            >>> next(wrapped_pipe2(items, **kwargs)) == {'content': 4}
            True
            >>> async_wrapper = operator(isasync=True, **opts)
            >>>
            >>> # async pipes don't have to return a deffered,
            >>> # they work fine either way
            >>> def async_pipe1(stream, objconf, tuples, **kwargs):
            ...     for content, times in tuples:
            ...         value = 'say "%s" %s times!' % (content, times[0])
            ...         yield {kwargs['assign']: value}
            ...
            >>> # this is an admittedly contrived example to show how you would
            >>> # call an async function
            >>> @coroutine
            ... def async_pipe2(stream, objconf, tuples, **kwargs):
            ...     words = (len(content.split()) for content in stream)
            ...     word_cnt = yield maybeDeferred(sum, words)
            ...     return_value({kwargs['assign']: word_cnt})
            ...
            >>> wrapped_async_pipe1 = async_wrapper(async_pipe1)
            >>> wrapped_async_pipe2 = async_wrapper(async_pipe2)
            >>>
            >>> @coroutine
            ... def run(reactor):
            ...     r1 = yield wrapped_async_pipe1(items, **kwargs)
            ...     print(next(r1) == response)
            ...     r2 = yield wrapped_async_pipe2(items, **kwargs)
            ...     print(next(r2) == {'content': 4})
            ...
            >>> if _issync:
            ...     True
            ...     True
            ... else:
            ...     try:
            ...         react(run, _reactor=FakeReactor())
            ...     except SystemExit:
            ...         pass
            True
            True
        """
        @wraps(pipe)
        def wrapper(items=None, **kwargs):
            module_name = wrapper.__module__.split('.')[-1]
            wrapper.__dict__['name'] = module_name

            defaults = {
                'dictize': True, 'ftype': 'pass', 'ptype': 'pass',
                'objectify': True, 'emit': True, 'assign': module_name}

            combined = cdicts(self.defaults, defaults, self.opts, kwargs)
            extracted = 'extract' in combined
            pdictize = combined.get('listize') if extracted else True

            combined.setdefault('pdictize', pdictize)
            conf = {k: combined[k] for k in self.defaults}
            conf.update(kwargs.get('conf', {}))
            combined.update({'conf': conf})

            # replace conf with dictized version so we can access its
            # attributes even if we already extracted a value
            updates = {'conf': DotDict(conf), 'assign': combined.get('assign')}
            kwargs.update(updates)

            items = items or iter([])
            _INPUT = map(DotDict, items) if combined.get('dictize') else items
            bfuncs = get_broadcast_funcs(**combined)
            types = {combined['ftype'], combined['ptype']}

            if types.difference({'pass', 'none'}):
                dfuncs = get_dispatch_funcs(**combined)
            else:
                dfuncs = None

            pairs = (dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT)
            parsed, _ = dispatch(DotDict(), bfuncs, dfuncs=dfuncs)

            # - operators can't skip items
            # - purposely setting both variables to maps of the same iterable
            #   since only one is intended to be used at any given time
            # - `tuples` is an iterator of tuples of the first two `parsed`
            #   elements
            tuples = ((p[0][0], p[0][1]) for p in pairs)
            orig_stream = (p[0][0] for p in pairs)
            objconf = parsed[1]

            if self.async:
                stream = yield pipe(orig_stream, objconf, tuples, **kwargs)
            else:
                stream = pipe(orig_stream, objconf, tuples, **kwargs)

            sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer'
            wrapper.__dict__['sub_type'] = sub_type

            # operators can only assign one value per item and can't skip items
            _, assignment = get_assignment(stream, False, **combined)

            if combined.get('emit'):
                stream = assignment
            else:
                singles = (iter([v]) for v in assignment)
                key = combined.get('assign')
                assigned = (assign({}, s, key, one=True) for s in singles)
                stream = utils.multiplex(assigned)

            if self.async:
                return_value(stream)
            else:
                for s in stream:
                    yield s

        wrapper.__dict__['type'] = 'operator'
        return coroutine(wrapper) if self.async else wrapper
Exemplo n.º 2
0
    def __call__(self, pipe):
        """Creates a wrapper that allows a sync/async pipe to processes a
        stream of items

        Args:
            pipe (func): A function of 4 args (stream, objconf, tuples)
                and a `**kwargs`. TODO: document args & kwargs.

        Returns:
            func: A function of 1 arg (items) and a `**kwargs`.

        Examples:
            >>> from riko.bado import react, _issync
            >>> from riko.bado.mock import FakeReactor
            >>> from riko.bado.util import maybeDeferred
            >>>
            >>> opts = {
            ...     'ftype': 'text', 'extract': 'times', 'listize': True,
            ...     'pdictize': False, 'emit': True, 'field': 'content',
            ...     'objectify': False}
            ...
            >>> wrapper = operator(**opts)
            >>>
            >>> def pipe1(stream, objconf, tuples, **kwargs):
            ...     for content, times in tuples:
            ...         value = 'say "%s" %s times!' % (content, times[0])
            ...         yield {kwargs['assign']: value}
            ...
            >>> def pipe2(stream, objconf, tuples, **kwargs):
            ...     word_cnt = _sum(len(content.split()) for content in stream)
            ...     return {kwargs['assign']: word_cnt}
            ...
            >>> wrapped_pipe1 = wrapper(pipe1)
            >>> wrapped_pipe2 = wrapper(pipe2)
            >>> items = [{'content': 'hello world'}, {'content': 'bye world'}]
            >>> kwargs = {'conf':  {'times': 'three'}, 'assign': 'content'}
            >>> response = {'content': 'say "hello world" three times!'}
            >>>
            >>> next(wrapped_pipe1(items, **kwargs)) == response
            True
            >>> next(wrapped_pipe2(items, **kwargs)) == {'content': 4}
            True
            >>> async_wrapper = operator(isasync=True, **opts)
            >>>
            >>> # async pipes don't have to return a deffered,
            >>> # they work fine either way
            >>> def async_pipe1(stream, objconf, tuples, **kwargs):
            ...     for content, times in tuples:
            ...         value = 'say "%s" %s times!' % (content, times[0])
            ...         yield {kwargs['assign']: value}
            ...
            >>> # this is an admittedly contrived example to show how you would
            >>> # call an async function
            >>> @coroutine
            ... def async_pipe2(stream, objconf, tuples, **kwargs):
            ...     words = (len(content.split()) for content in stream)
            ...     word_cnt = yield maybeDeferred(_sum, words)
            ...     return_value({kwargs['assign']: word_cnt})
            ...
            >>> wrapped_async_pipe1 = async_wrapper(async_pipe1)
            >>> wrapped_async_pipe2 = async_wrapper(async_pipe2)
            >>>
            >>> @coroutine
            ... def run(reactor):
            ...     r1 = yield wrapped_async_pipe1(items, **kwargs)
            ...     print(next(r1) == response)
            ...     r2 = yield wrapped_async_pipe2(items, **kwargs)
            ...     print(next(r2) == {'content': 4})
            ...
            >>> if _issync:
            ...     True
            ...     True
            ... else:
            ...     try:
            ...         react(run, _reactor=FakeReactor())
            ...     except SystemExit:
            ...         pass
            True
            True
        """
        @wraps(pipe)
        def wrapper(items=None, **kwargs):
            module_name = wrapper.__module__.split('.')[-1]
            wrapper.__dict__['name'] = module_name

            defaults = {
                'dictize': True,
                'ftype': 'pass',
                'ptype': 'pass',
                'objectify': True,
                'emit': True,
                'assign': module_name
            }

            combined = cdicts(self.defaults, defaults, self.opts, kwargs)
            extracted = 'extract' in combined
            pdictize = combined.get('listize') if extracted else True

            combined.setdefault('pdictize', pdictize)
            conf = {k: combined[k] for k in self.defaults}
            conf.update(kwargs.get('conf', {}))
            combined.update({'conf': conf})

            # replace conf with dictized version so we can access its
            # attributes even if we already extracted a value
            updates = {'conf': DotDict(conf), 'assign': combined.get('assign')}
            kwargs.update(updates)

            items = items or iter([])
            _INPUT = map(DotDict, items) if combined.get('dictize') else items
            bfuncs = get_broadcast_funcs(**combined)
            types = {combined['ftype'], combined['ptype']}

            if types.difference({'pass', 'none'}):
                dfuncs = get_dispatch_funcs(**combined)
            else:
                dfuncs = None

            pairs = (dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT)
            parsed, _ = dispatch(DotDict(), bfuncs, dfuncs=dfuncs)

            # - operators can't skip items
            # - purposely setting both variables to maps of the same iterable
            #   since only one is intended to be used at any given time
            # - `tuples` is an iterator of tuples of the first two `parsed`
            #   elements
            tuples = ((p[0][0], p[0][1]) for p in pairs)
            orig_stream = (p[0][0] for p in pairs)
            objconf = parsed[1]

            if self. async:
                stream = yield pipe(orig_stream, objconf, tuples, **kwargs)
            else:
                stream = pipe(orig_stream, objconf, tuples, **kwargs)

            sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer'
            wrapper.__dict__['sub_type'] = sub_type

            # operators can only assign one value per item and can't skip items
            _, assignment = get_assignment(stream, False, **combined)

            if combined.get('emit'):
                stream = assignment
            else:
                singles = (iter([v]) for v in assignment)
                key = combined.get('assign')
                assigned = (assign({}, s, key, one=True) for s in singles)
                stream = utils.multiplex(assigned)

            if self. async:
                return_value(stream)
            else:
                for s in stream:
                    yield s

        wrapper.__dict__['type'] = 'operator'
        return coroutine(wrapper) if self. async else wrapper
Exemplo n.º 3
0
    def __call__(self, pipe):
        """Creates a sync/async pipe that processes individual items

        Args:
            pipe (Iter[dict]): The entry to process

        Yields:
            dict: item

        Returns:
            Deferred: twisted.internet.defer.Deferred generator of items

        Examples:
            >>> from riko.bado import react, _issync
            >>> from riko.bado.mock import FakeReactor
            >>>
            >>> kwargs = {
            ...     'ftype': 'text', 'extract': 'times', 'listize': True,
            ...     'pdictize': False, 'emit': True, 'field': 'content',
            ...     'objectify': False}
            ...
            >>> @processor(**kwargs)
            ... def pipe(content, times, skip, **kwargs):
            ...     if skip:
            ...         stream = kwargs['stream']
            ...     else:
            ...         value = 'say "%s" %s times!' % (content, times[0])
            ...         stream = {kwargs['assign']: value}
            ...
            ...     return stream, skip
            ...
            >>> # async pipes don't have to return a deffered,
            >>> # they work fine either way
            >>> @processor(isasync=True, **kwargs)
            ... def async_pipe(content, times, skip, **kwargs):
            ...     if skip:
            ...         stream = kwargs['stream']
            ...     else:
            ...         value = 'say "%s" %s times!' % (content, times[0])
            ...         stream = {kwargs['assign']: value}
            ...
            ...     return stream, skip
            ...
            >>> item = {'content': 'hello world'}
            >>> kwargs = {'conf':  {'times': 'three'}, 'assign': 'content'}
            >>> response = {'content': 'say "hello world" three times!'}
            >>> next(pipe(item, **kwargs)) == response
            True
            >>>
            >>> def run(reactor):
            ...     callback = lambda x: print(next(x) == response)
            ...     d = async_pipe(item, **kwargs)
            ...     return d.addCallbacks(callback, logger.error)
            ...
            >>> if _issync:
            ...     True
            ... else:
            ...     try:
            ...         react(run, _reactor=FakeReactor())
            ...     except SystemExit:
            ...         pass
            True
        """
        @wraps(pipe)
        def wrapper(item=None, **kwargs):
            module_name = wrapper.__module__.split('.')[-1]

            defaults = {
                'dictize': True, 'ftype': 'pass', 'ptype': 'pass',
                'objectify': True}

            combined = cdicts(self.defaults, defaults, self.opts, kwargs)
            is_source = combined['ftype'] == 'none'
            def_assign = 'content' if is_source else module_name
            extracted = 'extract' in combined
            pdictize = combined.get('listize') if extracted else True

            combined.setdefault('assign', def_assign)
            combined.setdefault('emit', is_source)
            combined.setdefault('pdictize', pdictize)
            conf = {k: combined[k] for k in self.defaults}
            conf.update(kwargs.get('conf', {}))
            combined.update({'conf': conf})
            # replace conf with dictized version so we can access its
            # attributes even if we already extracted a value
            updates = {'conf': DotDict(conf), 'assign': combined.get('assign')}
            kwargs.update(updates)

            item = item or {}
            _input = DotDict(item) if combined.get('dictize') else item
            bfuncs = get_broadcast_funcs(**combined)
            types = {combined['ftype'], combined['ptype']}

            if types.difference({'pass', 'none'}):
                dfuncs = get_dispatch_funcs(**combined)
            else:
                dfuncs = None

            parsed, orig_item = dispatch(_input, bfuncs, dfuncs=dfuncs)

            if self.async:
                stream, skip = yield pipe(*parsed, stream=orig_item, **kwargs)
            else:
                stream, skip = pipe(*parsed, stream=orig_item, **kwargs)

            one, assignment = get_assignment(stream, skip, **combined)

            if skip or combined.get('emit'):
                stream = assignment
            elif not skip:
                key = combined.get('assign')
                stream = assign(_input, assignment, key, one=one)

            if self.async:
                return_value(stream)
            else:
                for s in stream:
                    yield s

        is_source = self.opts.get('ftype') == 'none'
        wrapper.__dict__['name'] = wrapper.__module__.split('.')[-1]
        wrapper.__dict__['type'] = 'processor'
        wrapper.__dict__['sub_type'] = 'source' if is_source else 'transformer'
        return coroutine(wrapper) if self.async else wrapper
Exemplo n.º 4
0
    def __call__(self, pipe):
        """Creates a sync/async pipe that processes individual items

        Args:
            pipe (Iter[dict]): The entry to process

        Yields:
            dict: item

        Returns:
            Deferred: twisted.internet.defer.Deferred generator of items

        Examples:
            >>> from riko.bado import react, _issync
            >>> from riko.bado.mock import FakeReactor
            >>>
            >>> kwargs = {
            ...     'ftype': 'text', 'extract': 'times', 'listize': True,
            ...     'pdictize': False, 'emit': True, 'field': 'content',
            ...     'objectify': False}
            ...
            >>> @processor(**kwargs)
            ... def pipe(content, times, skip, **kwargs):
            ...     if skip:
            ...         stream = kwargs['stream']
            ...     else:
            ...         value = 'say "%s" %s times!' % (content, times[0])
            ...         stream = {kwargs['assign']: value}
            ...
            ...     return stream, skip
            ...
            >>> # async pipes don't have to return a deffered,
            >>> # they work fine either way
            >>> @processor(isasync=True, **kwargs)
            ... def async_pipe(content, times, skip, **kwargs):
            ...     if skip:
            ...         stream = kwargs['stream']
            ...     else:
            ...         value = 'say "%s" %s times!' % (content, times[0])
            ...         stream = {kwargs['assign']: value}
            ...
            ...     return stream, skip
            ...
            >>> item = {'content': 'hello world'}
            >>> kwargs = {'conf':  {'times': 'three'}, 'assign': 'content'}
            >>> response = {'content': 'say "hello world" three times!'}
            >>> next(pipe(item, **kwargs)) == response
            True
            >>>
            >>> def run(reactor):
            ...     callback = lambda x: print(next(x) == response)
            ...     d = async_pipe(item, **kwargs)
            ...     return d.addCallbacks(callback, logger.error)
            ...
            >>> if _issync:
            ...     True
            ... else:
            ...     try:
            ...         react(run, _reactor=FakeReactor())
            ...     except SystemExit:
            ...         pass
            True
        """
        @wraps(pipe)
        def wrapper(item=None, **kwargs):
            module_name = wrapper.__module__.split('.')[-1]

            defaults = {
                'dictize': True,
                'ftype': 'pass',
                'ptype': 'pass',
                'objectify': True
            }

            combined = cdicts(self.defaults, defaults, self.opts, kwargs)
            is_source = combined['ftype'] == 'none'
            def_assign = 'content' if is_source else module_name
            extracted = 'extract' in combined
            pdictize = combined.get('listize') if extracted else True

            combined.setdefault('assign', def_assign)
            combined.setdefault('emit', is_source)
            combined.setdefault('pdictize', pdictize)
            conf = {k: combined[k] for k in self.defaults}
            conf.update(kwargs.get('conf', {}))
            combined.update({'conf': conf})
            # replace conf with dictized version so we can access its
            # attributes even if we already extracted a value
            updates = {'conf': DotDict(conf), 'assign': combined.get('assign')}
            kwargs.update(updates)

            item = item or {}
            _input = DotDict(item) if combined.get('dictize') else item
            bfuncs = get_broadcast_funcs(**combined)
            types = {combined['ftype'], combined['ptype']}

            if types.difference({'pass', 'none'}):
                dfuncs = get_dispatch_funcs(**combined)
            else:
                dfuncs = None

            parsed, orig_item = dispatch(_input, bfuncs, dfuncs=dfuncs)

            if self. async:
                stream, skip = yield pipe(*parsed, stream=orig_item, **kwargs)
            else:
                stream, skip = pipe(*parsed, stream=orig_item, **kwargs)

            one, assignment = get_assignment(stream, skip, **combined)

            if skip or combined.get('emit'):
                stream = assignment
            elif not skip:
                key = combined.get('assign')
                stream = assign(_input, assignment, key, one=one)

            if self. async:
                return_value(stream)
            else:
                for s in stream:
                    yield s

        is_source = self.opts.get('ftype') == 'none'
        wrapper.__dict__['name'] = wrapper.__module__.split('.')[-1]
        wrapper.__dict__['type'] = 'processor'
        wrapper.__dict__['sub_type'] = 'source' if is_source else 'transformer'
        return coroutine(wrapper) if self. async else wrapper