def async_parser(stream, rules, tuples, **kwargs): """ Asynchronously parses the pipe content Args: stream (Iter[dict]): The source. Note: this shares the `tuples` iterator, so consuming it will consume `tuples` as well. rules (List[obj]): the item independent rules (Objectify instances). tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) `item` is an element in the source stream and `objconf` is the item configuration (an Objectify instance). Note: this shares the `stream` iterator, so consuming it will consume `stream` as well. kwargs (dict): Keyword arguments. Kwargs: conf (dict): The pipe configuration. Returns: List(dict): Deferred output stream Examples: >>> from itertools import repeat >>> from riko.bado import react, _issync >>> from riko.bado.mock import FakeReactor >>> from meza.fntools import Objectify >>> >>> def run(reactor): ... callback = lambda x: print(x[0] == {'content': 4}) ... kwargs = {'sort_key': 'content', 'sort_dir': 'desc'} ... rule = Objectify(kwargs) ... stream = ({'content': x} for x in range(5)) ... tuples = zip(stream, repeat(rule)) ... d = async_parser(stream, [rule], tuples, **kwargs) ... return d.addCallbacks(callback, logger.error) >>> >>> if _issync: ... True ... else: ... try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass True """ return ait.async_reduce(reducer, rules, stream)
def async_parser(item, rules, skip=False, **kwargs): """ Asynchronously parsers the pipe content Args: item (obj): The entry to process (a DotDict instance) rules (List[obj]): the parsed rules (Objectify instances). skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Deferred: twisted.internet.defer.Deferred dict Examples: >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> from meza.fntools import Objectify >>> >>> item = DotDict({'content': 'hello world', 'title': 'greeting'}) >>> match = r'(\w+)\s(\w+)' >>> replace = '$2wide' >>> >>> def run(reactor): ... callback = lambda x: print(x['content']) ... rule = {'field': 'content', 'match': match, 'replace': replace} ... conf = {'rule': rule, 'multi': False, 'convert': True} ... rules = [Objectify(rule)] ... kwargs = {'stream': item, 'conf': conf} ... d = async_parser(item, rules, **kwargs) ... return d.addCallbacks(callback, logger.error) >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... worldwide """ multi = kwargs['conf']['multi'] recompile = not multi @coroutine def async_reducer(item, rules): field = rules[0]['field'] word = item.get(field, **kwargs) grouped = group_by(rules, 'flags') group_rules = [g[1] for g in grouped] if multi else rules reducer = multi_substitute if multi else substitute replacement = yield ait.coop_reduce(reducer, group_rules, word) combined = merge([item, {field: replacement}]) return_value(DotDict(combined)) if skip: item = kwargs['stream'] else: new_rules = [get_new_rule(r, recompile=recompile) for r in rules] grouped = group_by(new_rules, 'field') field_rules = [g[1] for g in grouped] item = yield ait.async_reduce(async_reducer, field_rules, item) return_value(item)