示例#1
0
        def wrapper(item=None, **kwargs):
            module_name = wrapper.__module__.split('.')[-1]

            defaults = {
                'dictize': True,
                'ftype': 'pass',
                'ptype': 'pass',
                'objectify': True
            }

            combined = merge([self.defaults, defaults, self.opts, kwargs])
            is_source = combined['ftype'] == 'none'
            def_assign = 'content' if is_source else module_name
            extracted = 'extract' in combined
            pdictize = combined.get('listize') if extracted else True

            combined.setdefault('assign', def_assign)
            combined.setdefault('emit', is_source)
            combined.setdefault('pdictize', pdictize)
            conf = {k: combined[k] for k in self.defaults}
            conf.update(kwargs.get('conf', {}))
            combined.update({'conf': conf})

            uconf = DotDict(conf) if combined.get('dictize') else conf
            updates = {'conf': uconf, 'assign': combined.get('assign')}
            kwargs.update(updates)

            item = item or {}
            _input = DotDict(item) if combined.get('dictize') else item
            bfuncs = get_broadcast_funcs(**combined)
            skip = get_skip(_input, **combined)
            types = set([]) if skip else {combined['ftype'], combined['ptype']}

            if types.difference({'pass', 'none'}):
                dfuncs = get_dispatch_funcs(**combined)
            else:
                dfuncs = None

            parsed, orig_item = _dispatch(_input, bfuncs, dfuncs=dfuncs)
            kwargs.update({'skip': skip, 'stream': orig_item})

            if self. async:
                stream = yield pipe(*parsed, **kwargs)
            else:
                stream = pipe(*parsed, **kwargs)

            one, assignment = get_assignment(stream, skip=skip, **combined)

            if skip or combined.get('emit'):
                stream = assignment
            elif not skip:
                stream = assign(_input, assignment, one=one, **combined)

            if self. async:
                return_value(stream)
            else:
                for s in stream:
                    yield s
def gen_records(src, *paths, report_date=None, blacklist=None, **kwargs):
    data = DotDict(loads(src))
    change = kwargs.get("change") or {}
    nested_path = kwargs.get("nested_path", "")
    report_datetime = dt.strptime(report_date, S3_DATE_FORMAT)

    try:
        path, subpath = paths
    except ValueError:
        path, subpath = paths[0], None

    records = data.get(path, [])

    if records and kwargs.get("listize"):
        records = [records]

    for record in records:
        record["date"] = report_datetime.isoformat()

        if subpath:
            if "." in subpath:
                subpath_0, subpath_1 = subpath.split(".", maxsplit=1)
                reference_record = dfilter(record, blacklist + [subpath_0])
            else:
                reference_record = dfilter(record, blacklist + [subpath])

            for new_record in DotDict(record)[subpath]:
                combined = {**new_record, **reference_record}
                clean_record = dfilter(combined, blacklist)
                yield {change.get(k, k): v for k, v in clean_record.items()}
        elif nested_path:
            # key is like 'race-7-description'
            keyfunc = lambda x: "-".join(re.findall(r"\d+", x[0]))
            reference_record = dfilter(record, [nested_path])
            nested = record.get(nested_path)

            if nested:
                flattened = flatten(nested)

                for key, group in groupby(flattened, keyfunc):
                    new_record = {re.sub(r"\d+-", "", k): v for k, v in group}
                    combined = {**new_record, **reference_record}
                    clean_record = dfilter(combined, blacklist)
                    yield {
                        change.get(k, k): v
                        for k, v in clean_record.items()
                    }
        else:
            clean_record = dfilter(record, blacklist)
            yield {change.get(k, k): v for k, v in clean_record.items()}
示例#3
0
def parser(item, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        item (obj): The entry to process (a DotDict instance)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko.dotdict import DotDict
        >>> from meza.fntools import Objectify
        >>>
        >>> item = DotDict()
        >>> conf = {'guid': 'a1', 'mediaThumbURL': 'image.png'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': item}
        >>> result = parser(item, objconf, **kwargs)
        >>> result == {'media:thumbnail': {'url': 'image.png'}, 'y:id': 'a1'}
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        items = objconf.items()
        rdict = ((RSS.get(k, k), item.get(v, v, **kwargs)) for k, v in items)
        stream = DotDict(rdict)

    return stream
示例#4
0
def parser(_, attrs, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        attrs (List[dict]): Attributes
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter(dict): The stream of items

    Examples:
        >>> from meza.fntools import Objectify
        >>> attrs = [
        ...     {'key': 'title', 'value': 'the title'},
        ...     {'key': 'desc', 'value': 'the desc'}]
        >>> result = parser(None, map(Objectify, attrs))
        >>> result == {'title': 'the title', 'desc': 'the desc'}
        True
    """
    items = ((a.key, a.value) for a in attrs)
    return kwargs['stream'] if skip else DotDict(items)
示例#5
0
 def meta_reducer(item, rules):
     field = rules[0]['field']
     word = item.get(field, **kwargs)
     grouped = group_by(rules, 'flags')
     group_rules = [g[1] for g in grouped] if multi else rules
     reducer = multi_substitute if multi else substitute
     replacement = reduce(reducer, group_rules, word)
     return DotDict(merge([item, {field: replacement}]))
示例#6
0
 def async_reducer(item, rules):
     field = rules[0]['field']
     word = item.get(field, **kwargs)
     grouped = group_by(rules, 'flags')
     group_rules = [g[1] for g in grouped] if multi else rules
     reducer = multi_substitute if multi else substitute
     replacement = yield ait.coop_reduce(reducer, group_rules, word)
     combined = merge([item, {field: replacement}])
     return_value(DotDict(combined))
示例#7
0
def parser(item, params, skip=False, **kwargs):
    """ Parsers the pipe content

    Args:
        item (obj): The entry to process (a DotDict instance)
        params (List[dict]): Query parameters
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        dict: The item

    Examples:
        >>> from meza.fntools import Objectify
        >>>
        >>> item = DotDict()
        >>> params = {'key': 's', 'value': 'gm'}
        >>> path = [{'value': 'rss'}, {'value': 'headline'}]
        >>> base = 'http://finance.yahoo.com'
        >>> conf = {'base': base, 'path': path, 'params': params}
        >>> kwargs = {'stream': item, 'conf': conf}
        >>> result = parser(item, [Objectify(params)], **kwargs)
        >>> sorted(result.keys()) == [
        ...     'fragment', 'netloc', 'params', 'path', 'query', 'scheme',
        ...     'url']
        True
        >>> result['url'] == 'http://finance.yahoo.com/rss/headline?s=gm'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        conf = kwargs.pop('conf')
        path = conf.get('path')
        paths = (get_value(item, DotDict(p), **kwargs) for p in path)
        params = urlencode([(p.key, p.value) for p in params])
        url = '%s?%s' % (urljoin(conf['base'], '/'.join(paths)), params)
        stream = cast_url(url)

    return stream
示例#8
0
文件: __init__.py 项目: hifly/riko
def get_broadcast_funcs(**kwargs):
    kw = Objectify(kwargs, conf={})
    pieces = kw.conf[kw.extract] if kw.extract else kw.conf
    no_conf = remove_keys(kwargs, 'conf')
    noop = partial(cast, _type='none')

    if kw.listize:
        listed = listize(pieces)
        piece_defs = map(DotDict, listed) if kw.pdictize else listed
        parser = partial(parse_conf, **no_conf)
        pfuncs = [partial(parser, conf=conf) for conf in piece_defs]
        get_pieces = lambda item: broadcast(item, *pfuncs)
    elif kw.ptype != 'none':
        conf = DotDict(pieces) if kw.pdictize and pieces else pieces
        get_pieces = partial(parse_conf, conf=conf, **no_conf)
    else:
        get_pieces = noop

    ffunc = noop if kw.ftype == 'none' else partial(get_field, **kwargs)
    return (ffunc, get_pieces)
示例#9
0
def reducer(item, rule):
    new_dict = {rule.newval: item.get(rule.field)} if rule.newval else {}
    old_dict = item if rule.copy else remove_keys(item, rule.field)
    return DotDict(merge([old_dict, new_dict]))
示例#10
0
文件: __init__.py 项目: hifly/riko
        def wrapper(items=None, **kwargs):
            module_name = wrapper.__module__.split('.')[-1]
            wrapper.__dict__['name'] = module_name

            defaults = {
                'dictize': True, 'ftype': 'pass', 'ptype': 'pass',
                'objectify': True, 'emit': True, 'assign': module_name}

            combined = merge([self.defaults, defaults, self.opts, kwargs])
            extracted = 'extract' in combined
            pdictize = combined.get('listize') if extracted else True

            combined.setdefault('pdictize', pdictize)
            conf = {k: combined[k] for k in self.defaults}
            conf.update(kwargs.get('conf', {}))
            combined.update({'conf': conf})

            # replace conf with dictized version so we can access its
            # attributes even if we already extracted a value
            updates = {'conf': DotDict(conf), 'assign': combined.get('assign')}
            kwargs.update(updates)

            items = items or iter([])
            _INPUT = map(DotDict, items) if combined.get('dictize') else items
            bfuncs = get_broadcast_funcs(**combined)
            types = {combined['ftype'], combined['ptype']}

            if types.difference({'pass', 'none'}):
                dfuncs = get_dispatch_funcs(**combined)
            else:
                dfuncs = None

            pairs = (_dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT)
            parsed, _ = _dispatch(DotDict(), bfuncs, dfuncs=dfuncs)

            # - operators can't skip items
            # - purposely setting both variables to maps of the same iterable
            #   since only one is intended to be used at any given time
            # - `tuples` is an iterator of tuples of the first two `parsed`
            #   elements
            tuples = ((p[0][0], p[0][1]) for p in pairs)
            orig_stream = (p[0][0] for p in pairs)
            objconf = parsed[1]

            if self.async:
                stream = yield pipe(orig_stream, objconf, tuples, **kwargs)
            else:
                stream = pipe(orig_stream, objconf, tuples, **kwargs)

            sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer'
            wrapper.__dict__['sub_type'] = sub_type

            # operators can only assign one value per item and can't skip items
            _, assignment = get_assignment(stream, **combined)

            if combined.get('emit'):
                stream = assignment
            else:
                singles = (iter([v]) for v in assignment)
                key = combined.get('assign')
                assigned = (assign({}, s, key, one=True) for s in singles)
                stream = multiplex(assigned)

            if self.async:
                return_value(stream)
            else:
                for s in stream:
                    yield s
示例#11
0
文件: __init__.py 项目: hifly/riko
def assign(item, assignment, key, one=False):
    value = next(assignment) if one else list(assignment)
    yield DotDict(merge([item, {key: value}]))
示例#12
0
def assign(item, assignment, **kwargs):
    key = kwargs.get('assign')
    value = next(assignment) if kwargs.get('one') else list(assignment)
    merged = merge([item, {key: value}])
    yield DotDict(merged) if kwargs.get('dictize') else merged