def wrapper(item=None, **kwargs): module_name = wrapper.__module__.split('.')[-1] defaults = { 'dictize': True, 'ftype': 'pass', 'ptype': 'pass', 'objectify': True } combined = cdicts(self.defaults, defaults, self.opts, kwargs) is_source = combined['ftype'] == 'none' def_assign = 'content' if is_source else module_name extracted = 'extract' in combined pdictize = combined.get('listize') if extracted else True combined.setdefault('assign', def_assign) combined.setdefault('emit', is_source) combined.setdefault('pdictize', pdictize) conf = {k: combined[k] for k in self.defaults} conf.update(kwargs.get('conf', {})) combined.update({'conf': conf}) # replace conf with dictized version so we can access its # attributes even if we already extracted a value updates = {'conf': DotDict(conf), 'assign': combined.get('assign')} kwargs.update(updates) item = item or {} _input = DotDict(item) if combined.get('dictize') else item bfuncs = get_broadcast_funcs(**combined) types = {combined['ftype'], combined['ptype']} if types.difference({'pass', 'none'}): dfuncs = get_dispatch_funcs(**combined) else: dfuncs = None parsed, orig_item = dispatch(_input, bfuncs, dfuncs=dfuncs) if self. async: stream, skip = yield pipe(*parsed, stream=orig_item, **kwargs) else: stream, skip = pipe(*parsed, stream=orig_item, **kwargs) one, assignment = get_assignment(stream, skip, **combined) if skip or combined.get('emit'): stream = assignment elif not skip: key = combined.get('assign') stream = assign(_input, assignment, key, one=one) if self. async: return_value(stream) else: for s in stream: yield s
def parser(_, attrs, skip, **kwargs): """ Parses the pipe content Args: _ (None): Ignored attrs (List[dict]): Attributes skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Tuple[Iter(dict), bool]: Tuple of (stream, skip) Examples: >>> from riko.lib.utils import Objectify >>> attrs = [ ... {'key': 'title', 'value': 'the title'}, ... {'key': 'desc', 'value': 'the desc'}] >>> result, skip = parser(None, map(Objectify, attrs), False) >>> result == {'title': 'the title', 'desc': 'the desc'} True """ items = ((a.key, a.value) for a in attrs) stream = kwargs['stream'] if skip else DotDict(items) return stream, skip
def parser(item, objconf, skip, **kwargs): """ Parses the pipe content Args: item (obj): The entry to process (a DotDict instance) objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Tuple(Iter[dict], bool): Tuple of (stream, skip) Examples: >>> from riko.lib.dotdict import DotDict >>> from riko.lib.utils import Objectify >>> >>> item = DotDict() >>> conf = {'guid': 'a1', 'mediaThumbURL': 'image.png'} >>> objconf = Objectify(conf) >>> kwargs = {'stream': item} >>> result, skip = parser(item, objconf, False, **kwargs) >>> result == {'media:thumbnail': {'url': 'image.png'}, 'y:id': 'a1'} True """ if skip: stream = kwargs['stream'] else: items = objconf.items() rdict = ((RSS.get(k, k), item.get(v, v, **kwargs)) for k, v in items) stream = DotDict(rdict) return stream, skip
def meta_reducer(item, rules): field = rules[0]['field'] word = item.get(field, **kwargs) grouped = utils.group_by(rules, 'flags') group_rules = [g[1] for g in grouped] if multi else rules reducer = utils.multi_substitute if multi else utils.substitute replacement = reduce(reducer, group_rules, word) return DotDict(cdicts(item, {field: replacement}))
def async_reducer(item, rules): field = rules[0]['field'] word = item.get(field, **kwargs) grouped = utils.group_by(rules, 'flags') group_rules = [g[1] for g in grouped] if multi else rules reducer = utils.multi_substitute if multi else utils.substitute replacement = yield ait.coop_reduce(reducer, group_rules, word) combined = cdicts(item, {field: replacement}) return_value(DotDict(combined))
def parser(item, params, skip, **kwargs): """ Parsers the pipe content Args: item (obj): The entry to process (a DotDict instance) params (List[dict]): Query parameters skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Tuple (dict, bool): Tuple of (item, skip) Examples: >>> from riko.lib.utils import Objectify >>> >>> item = DotDict() >>> params = {'key': 's', 'value': 'gm'} >>> path = [{'value': 'rss'}, {'value': 'headline'}] >>> base = 'http://finance.yahoo.com' >>> conf = {'base': base, 'path': path, 'params': params} >>> kwargs = {'stream': item, 'conf': conf} >>> result = parser(item, [Objectify(params)], False, **kwargs)[0] >>> sorted(result.keys()) == [ ... 'fragment', 'netloc', 'params', 'path', 'query', 'scheme', ... 'url'] True >>> result['url'] == 'http://finance.yahoo.com/rss/headline?s=gm' True """ if skip: stream = kwargs['stream'] else: conf = kwargs.pop('conf') path = conf.get('path') paths = (get_value(item, DotDict(p), **kwargs) for p in path) params = urlencode([(p.key, p.value) for p in params]) url = '%s?%s' % (urljoin(conf['base'], '/'.join(paths)), params) stream = cast_url(url) return stream, skip
def get_broadcast_funcs(**kwargs): kw = utils.Objectify(kwargs, conf={}) pieces = kw.conf[kw.extract] if kw.extract else kw.conf no_conf = remove_keys(kwargs, 'conf') noop = partial(utils.cast, _type='none') if kw.listize: listed = utils.listize(pieces) piece_defs = map(DotDict, listed) if kw.pdictize else listed parser = partial(utils.parse_conf, **no_conf) pfuncs = [partial(parser, conf=conf) for conf in piece_defs] get_pieces = lambda item: utils.broadcast(item, *pfuncs) elif kw.ptype != 'none': conf = DotDict(pieces) if kw.pdictize and pieces else pieces get_pieces = partial(utils.parse_conf, conf=conf, **no_conf) else: get_pieces = noop ffunc = partial(utils.get_field, **kwargs) get_field = noop if kw.ftype == 'none' else ffunc return (get_field, get_pieces, partial(utils.get_skip, **kwargs))
def wrapper(items=None, **kwargs): module_name = wrapper.__module__.split('.')[-1] wrapper.__dict__['name'] = module_name defaults = { 'dictize': True, 'ftype': 'pass', 'ptype': 'pass', 'objectify': True, 'emit': True, 'assign': module_name } combined = cdicts(self.defaults, defaults, self.opts, kwargs) extracted = 'extract' in combined pdictize = combined.get('listize') if extracted else True combined.setdefault('pdictize', pdictize) conf = {k: combined[k] for k in self.defaults} conf.update(kwargs.get('conf', {})) combined.update({'conf': conf}) # replace conf with dictized version so we can access its # attributes even if we already extracted a value updates = {'conf': DotDict(conf), 'assign': combined.get('assign')} kwargs.update(updates) items = items or iter([]) _INPUT = map(DotDict, items) if combined.get('dictize') else items bfuncs = get_broadcast_funcs(**combined) types = {combined['ftype'], combined['ptype']} if types.difference({'pass', 'none'}): dfuncs = get_dispatch_funcs(**combined) else: dfuncs = None pairs = (dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT) parsed, _ = dispatch(DotDict(), bfuncs, dfuncs=dfuncs) # - operators can't skip items # - purposely setting both variables to maps of the same iterable # since only one is intended to be used at any given time # - `tuples` is an iterator of tuples of the first two `parsed` # elements tuples = ((p[0][0], p[0][1]) for p in pairs) orig_stream = (p[0][0] for p in pairs) objconf = parsed[1] if self. async: stream = yield pipe(orig_stream, objconf, tuples, **kwargs) else: stream = pipe(orig_stream, objconf, tuples, **kwargs) sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer' wrapper.__dict__['sub_type'] = sub_type # operators can only assign one value per item and can't skip items _, assignment = get_assignment(stream, False, **combined) if combined.get('emit'): stream = assignment else: singles = (iter([v]) for v in assignment) key = combined.get('assign') assigned = (assign({}, s, key, one=True) for s in singles) stream = utils.multiplex(assigned) if self. async: return_value(stream) else: for s in stream: yield s
def assign(item, assignment, key, one=False): value = next(assignment) if one else list(assignment) yield DotDict(cdicts(item, {key: value}))
def reducer(item, rule): new_dict = {rule.newval: item.get(rule.field)} if rule.newval else {} old_dict = item if rule.copy else remove_keys(item, rule.field) return DotDict(cdicts(old_dict, new_dict))