Пример #1
0
def asyncPipeLoop(context=None, _INPUT=None, conf=None, embed=None, **kwargs):
    """An operator that asynchronously loops over the input and performs the
    embedded submodule. Not loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : asyncPipe like object (twisted Deferred iterable of items)
    embed : the submodule, i.e., asyncPipe*(context, _INPUT, conf)
        Most modules, with the exception of User inputs and Operators can be
        sub-modules.

    conf : {
        'assign_part': {'value': <all or first>},
        'assign_to': {'value': <assigned field name>},
        'emit_part': {'value': <all or first>},
        'mode': {'value': <assign or EMIT>},
        'with': {'value': <looped field name or blank>},
        'embed': {'value': {'conf': <module conf>}}
    }

    Returns
    -------
    _OUTPUT : twisted.internet.defer.Deferred generator of items
    """
    cust_func = get_cust_func(context, conf, embed, parse_embed, **kwargs)
    opts.update({'cust_func': cust_func})
    splits = yield asyncGetSplits(_INPUT, conf, **cdicts(opts, kwargs))
    gathered = yield asyncStarMap(asyncParseResult, splits)
    _OUTPUT = utils.multiplex(gathered)
    returnValue(_OUTPUT)
Пример #2
0
def asyncPipeFetchdata(context=None, _INPUT=None, conf=None, **kwargs):
    asyncFuncs = yield asyncGetSplits(None, conf, **cdicts(opts, kwargs))
    parsed = yield asyncGetParsed(_INPUT, asyncFuncs[0])
    results = yield asyncStarMap(asyncParseResult, parsed)
    items = imap(utils.gen_items, results)
    _OUTPUT = utils.multiplex(items)
    returnValue(_OUTPUT)
Пример #3
0
def asyncPipeFetchdata(context=None, _INPUT=None, conf=None, **kwargs):
    asyncFuncs = yield asyncGetSplits(None, conf, **cdicts(opts, kwargs))
    parsed = yield asyncGetParsed(_INPUT, asyncFuncs[0])
    results = yield asyncStarMap(asyncParseResult, parsed)
    items = imap(utils.gen_items, results)
    _OUTPUT = utils.multiplex(items)
    returnValue(_OUTPUT)
Пример #4
0
def pipe_loop(context=None, _INPUT=None, conf=None, embed=None, **kwargs):
    """An operator that loops over the input and performs the embedded
    submodule. Not loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : pipe2py.modules pipe like object (iterable of items)
    embed : the submodule, i.e., pipe_*(context, _INPUT, conf)
        Most modules, with the exception of User inputs and Operators can be
        sub-modules.

    conf : {
        'assign_part': {'value': <all or first>},
        'assign_to': {'value': <assigned field name>},
        'emit_part': {'value': <all or first>},
        'mode': {'value': <assign or EMIT>},
        'with': {'value': <looped field name or blank>},
        'embed': {'value': {'conf': <module conf>}}
    }

    Returns
    -------
    _OUTPUT : generator of items
    """
    cust_func = get_cust_func(context, conf, embed, parse_embed, **kwargs)
    opts.update({'cust_func': cust_func})
    splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs))
    gathered = starmap(parse_result, splits)
    _OUTPUT = utils.multiplex(gathered)
    return _OUTPUT
Пример #5
0
def asyncParseResult(urls, _, _pass):
    # asyncParse = partial(deferToThread, speedparser.parse)
    asyncParse = partial(maybeDeferred, speedparser.parse)
    str_urls = get_urls(urls)
    contents = yield asyncImap(getPage, str_urls)
    parsed = yield asyncImap(asyncParse, contents)
    entries = imap(utils.gen_entries, parsed)
    items = utils.multiplex(entries)
    returnValue(items)
Пример #6
0
def asyncParseResult(urls, _, _pass):
    # asyncParse = partial(deferToThread, speedparser.parse)
    asyncParse = partial(maybeDeferred, speedparser.parse)
    str_urls = get_urls(urls)
    contents = yield asyncImap(getPage, str_urls)
    parsed = yield asyncImap(asyncParse, contents)
    entries = imap(utils.gen_entries, parsed)
    items = utils.multiplex(entries)
    returnValue(items)
Пример #7
0
def pipe_fetchdata(context=None, _INPUT=None, conf=None, **kwargs):
    """A source that fetches and parses an XML or JSON file. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : pipeforever pipe or an iterable of items or fields
    conf : {
        'URL': {'value': <url>},
        'path': {'value': <dot separated path to data list>}
    }

    Yields
    ------
    _OUTPUT : items

    Examples
    --------
    >>> from os import path as p
    >>> from pipe2py.modules.pipeforever import pipe_forever
    >>> parent = p.dirname(p.dirname(__file__))
    >>> abspath = p.abspath(p.join(parent, 'data', 'gigs.json'))
    >>> path = 'value.items'
    >>> url = "file://%s" % abspath
    >>> conf = {'URL': {'value': url}, 'path': {'value': path}}
    >>> pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys()[:5]
    [u'y:repeatcount', u'description', u'pubDate', u'title', u'y:published']
    >>> abspath = p.abspath(p.join(parent, 'data', 'places.xml'))
    >>> path = 'appointment'
    >>> url = "file://%s" % abspath
    >>> conf = {'URL': {'value': url}, 'path': {'value': path}}
    >>> sorted(pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys())
    ['alarmTime', 'begin', 'duration', 'places', 'subject', 'uid']
    >>> conf = {'URL': {'value': url}, 'path': {'value': ''}}
    >>> sorted(pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys())
    ['appointment', 'reminder']
    """
    # todo: iCal and KML
    funcs = get_splits(None, conf, **cdicts(opts, kwargs))
    parsed = get_parsed(_INPUT, funcs[0])
    results = starmap(parse_result, parsed)
    items = imap(utils.gen_items, results)
    _OUTPUT = utils.multiplex(items)
    return _OUTPUT
Пример #8
0
def pipe_fetchdata(context=None, _INPUT=None, conf=None, **kwargs):
    """A source that fetches and parses an XML or JSON file. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : pipeforever pipe or an iterable of items or fields
    conf : {
        'URL': {'value': <url>},
        'path': {'value': <dot separated path to data list>}
    }

    Yields
    ------
    _OUTPUT : items

    Examples
    --------
    >>> from os import path as p
    >>> from pipe2py.modules.pipeforever import pipe_forever
    >>> parent = p.dirname(p.dirname(__file__))
    >>> abspath = p.abspath(p.join(parent, 'data', 'gigs.json'))
    >>> path = 'value.items'
    >>> url = "file://%s" % abspath
    >>> conf = {'URL': {'value': url}, 'path': {'value': path}}
    >>> pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys()[:5]
    [u'y:repeatcount', u'description', u'pubDate', u'title', u'y:published']
    >>> abspath = p.abspath(p.join(parent, 'data', 'places.xml'))
    >>> path = 'appointment'
    >>> url = "file://%s" % abspath
    >>> conf = {'URL': {'value': url}, 'path': {'value': path}}
    >>> sorted(pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys())
    ['alarmTime', 'begin', 'duration', 'places', 'subject', 'uid']
    >>> conf = {'URL': {'value': url}, 'path': {'value': ''}}
    >>> sorted(pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys())
    ['appointment', 'reminder']
    """
    # todo: iCal and KML
    funcs = get_splits(None, conf, **cdicts(opts, kwargs))
    parsed = get_parsed(_INPUT, funcs[0])
    results = starmap(parse_result, parsed)
    items = imap(utils.gen_items, results)
    _OUTPUT = utils.multiplex(items)
    return _OUTPUT
Пример #9
0
def asyncPipeStringtokenizer(context=None, _INPUT=None, conf=None, **kwargs):
    """A string module that asynchronously splits a string into tokens
    delimited by separators. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : twisted Deferred iterable of items or strings
    conf : {
        'to-str': {'value': <delimiter>},
        'dedupe': {'type': 'bool', value': <1>},
        'sort': {'type': 'bool', value': <1>}
    }

    Returns
    -------
    _OUTPUT : twisted.internet.defer.Deferred generator of items
    """
    conf['delimiter'] = conf.pop('to-str', dict.get(conf, 'delimiter'))
    splits = yield asyncGetSplits(_INPUT, conf, **cdicts(opts, kwargs))
    parsed = yield asyncDispatch(splits, *get_async_dispatch_funcs())
    items = yield asyncStarMap(partial(maybeDeferred, parse_result), parsed)
    _OUTPUT = utils.multiplex(items)
    returnValue(_OUTPUT)
Пример #10
0
def pipe_stringtokenizer(context=None, _INPUT=None, conf=None, **kwargs):
    """A string module that splits a string into tokens delimited by
    separators. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : iterable of items or strings
    conf : {
        'to-str': {'value': <delimiter>},
        'dedupe': {'type': 'bool', value': <1>},
        'sort': {'type': 'bool', value': <1>}
    }

    Returns
    -------
    _OUTPUT : generator of items
    """
    conf['delimiter'] = conf.pop('to-str', dict.get(conf, 'delimiter'))
    splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs))
    parsed = utils.dispatch(splits, *get_dispatch_funcs())
    items = starmap(parse_result, parsed)
    _OUTPUT = utils.multiplex(items)
    return _OUTPUT
Пример #11
0
def asyncPipeStringtokenizer(context=None, _INPUT=None, conf=None, **kwargs):
    """A string module that asynchronously splits a string into tokens
    delimited by separators. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : twisted Deferred iterable of items or strings
    conf : {
        'to-str': {'value': <delimiter>},
        'dedupe': {'type': 'bool', value': <1>},
        'sort': {'type': 'bool', value': <1>}
    }

    Returns
    -------
    _OUTPUT : twisted.internet.defer.Deferred generator of items
    """
    conf['delimiter'] = conf.pop('to-str', dict.get(conf, 'delimiter'))
    splits = yield asyncGetSplits(_INPUT, conf, **cdicts(opts, kwargs))
    parsed = yield asyncDispatch(splits, *get_async_dispatch_funcs())
    items = yield asyncStarMap(partial(maybeDeferred, parse_result), parsed)
    _OUTPUT = utils.multiplex(items)
    returnValue(_OUTPUT)
Пример #12
0
def asyncPipeFetch(context=None, _INPUT=None, conf=None, **kwargs):
    """A source that asynchronously fetches and parses one or more feeds to
    return the feed entries. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : asyncPipe like object (twisted Deferred iterable of items)
    conf : {
        'URL': [
            {'type': 'url', 'value': <url1>},
            {'type': 'url', 'value': <url2>},
            {'type': 'url', 'value': <url3>},
        ]
    }

    Returns
    -------
    _OUTPUT : twisted.internet.defer.Deferred generator of items
    """
    splits = yield asyncGetSplits(_INPUT, conf['URL'], **cdicts(opts, kwargs))
    items = yield asyncStarMap(asyncParseResult, splits)
    _OUTPUT = utils.multiplex(items)
    returnValue(_OUTPUT)
Пример #13
0
def pipe_fetch(context=None, _INPUT=None, conf=None, **kwargs):
    """A source that fetches and parses one or more feeds to return the
    entries. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : pipeforever pipe or an iterable of items or fields
    conf : {
        'URL': [
            {'type': 'url', 'value': <url1>},
            {'type': 'url', 'value': <url2>},
            {'type': 'url', 'value': <url3>},
        ]
    }

    Returns
    -------
    _OUTPUT : generator of items
    """
    splits = get_splits(_INPUT, conf['URL'], **cdicts(opts, kwargs))
    items = starmap(parse_result, splits)
    _OUTPUT = utils.multiplex(items)
    return _OUTPUT
Пример #14
0
def pipe_stringtokenizer(context=None, _INPUT=None, conf=None, **kwargs):
    """A string module that splits a string into tokens delimited by
    separators. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : iterable of items or strings
    conf : {
        'to-str': {'value': <delimiter>},
        'dedupe': {'type': 'bool', value': <1>},
        'sort': {'type': 'bool', value': <1>}
    }

    Returns
    -------
    _OUTPUT : generator of items
    """
    conf['delimiter'] = conf.pop('to-str', dict.get(conf, 'delimiter'))
    splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs))
    parsed = utils.dispatch(splits, *get_dispatch_funcs())
    items = starmap(parse_result, parsed)
    _OUTPUT = utils.multiplex(items)
    return _OUTPUT
Пример #15
0
def asyncPipeFetch(context=None, _INPUT=None, conf=None, **kwargs):
    """A source that asynchronously fetches and parses one or more feeds to
    return the feed entries. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : asyncPipe like object (twisted Deferred iterable of items)
    conf : {
        'URL': [
            {'type': 'url', 'value': <url1>},
            {'type': 'url', 'value': <url2>},
            {'type': 'url', 'value': <url3>},
        ]
    }

    Returns
    -------
    _OUTPUT : twisted.internet.defer.Deferred generator of items
    """
    splits = yield asyncGetSplits(_INPUT, conf['URL'], **cdicts(opts, kwargs))
    items = yield asyncStarMap(asyncParseResult, splits)
    _OUTPUT = utils.multiplex(items)
    returnValue(_OUTPUT)
Пример #16
0
def pipe_fetch(context=None, _INPUT=None, conf=None, **kwargs):
    """A source that fetches and parses one or more feeds to return the
    entries. Loopable.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : pipeforever pipe or an iterable of items or fields
    conf : {
        'URL': [
            {'type': 'url', 'value': <url1>},
            {'type': 'url', 'value': <url2>},
            {'type': 'url', 'value': <url3>},
        ]
    }

    Returns
    -------
    _OUTPUT : generator of items
    """
    splits = get_splits(_INPUT, conf['URL'], **cdicts(opts, kwargs))
    items = starmap(parse_result, splits)
    _OUTPUT = utils.multiplex(items)
    return _OUTPUT
Пример #17
0
def parse_result(urls, _, _pass):
    str_urls = get_urls(urls)
    contents = (urlopen(url).read() for url in str_urls)
    parsed = imap(speedparser.parse, contents)
    entries = imap(utils.gen_entries, parsed)
    return utils.multiplex(entries)
Пример #18
0
def get_output(_INPUT, **kwargs):
    others = (v for k, v in kwargs.iteritems() if k.startswith('_OTHER'))
    others_items = utils.multiplex(others)
    input_items = utils.finitize(_INPUT)
    return chain(input_items, others_items)
Пример #19
0
def parse_result(urls, _, _pass):
    str_urls = get_urls(urls)
    contents = (urlopen(url).read() for url in str_urls)
    parsed = imap(speedparser.parse, contents)
    entries = imap(utils.gen_entries, parsed)
    return utils.multiplex(entries)