Пример #1
0
def classic_iterator(urls,
                     reader=func.chain_reader,
                     input_stream=(func.map_input_stream, ),
                     notifier=func.notifier,
                     params=None,
                     ddfs=None):
    """
    An iterator over records as seen by the classic map interface.

    :type  reader: :func:`disco.classic.worker.func.input_stream`
    :param reader: shortcut for the last input stream applied.

    :type  input_stream: sequence of :func:`disco.classic.worker.func.input_stream`
    :param input_stream: used to read from a custom file format.

    :type  notifier: :func:`disco.classic.worker.func.notifier`
    :param notifier: called when the task opens a url.
    """
    from disco.worker import Input
    from disco.worker.classic.worker import Worker
    worker = Worker(map_reader=reader, map_input_stream=input_stream)
    settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings()
    for input in util.inputlist(urls, settings=settings):
        notifier(input)
        for record in Input(input, open=worker.opener('map', 'in', params)):
            yield record
Пример #2
0
Файл: core.py Проект: hmas/disco
def classic_iterator(urls,
                     reader=task_io.chain_reader,
                     input_stream=(func.map_input_stream, ),
                     notifier=func.notifier,
                     params=None,
                     ddfs=None):
    """
    An iterator over records as seen by the classic map interface.

    :type  reader: :func:`disco.worker.task_io.input_stream`
    :param reader: shortcut for the last input stream applied.

    :type  input_stream: sequence of :func:`disco.worker.task_io.input_stream`
    :param input_stream: used to read from a custom file format.

    :type  notifier: :func:`disco.func.notifier`
    :param notifier: called when the task opens a url.
    """
    from disco.worker import Input
    from disco.worker.classic.worker import Worker
    worker = Worker(map_reader=reader, map_input_stream=input_stream)
    settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings()
    for input in util.inputlist(urls, settings=settings):
        if isinstance(input, basestring):
            dest = proxy_url(input, to_master=False)
        elif isinstance(input, tuple):
            dest = tuple([proxy_url(i, to_master=False) for i in input])
        else:
            dest = [proxy_url(i, to_master=False) for i in input]
        notifier(dest)
        for record in Input(dest, open=worker.opener('map', 'in', params)):
            yield record
Пример #3
0
def sorted_iterator(urls,
                    reader=func.chain_reader,
                    input_stream=(func.map_input_stream, ),
                    notifier=func.notifier,
                    params=None,
                    ddfs=None):

    from disco.worker import Input
    from disco.worker.classic.worker import Worker

    worker = Worker(map_reader=reader, map_input_stream=input_stream)
    settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings()

    inputs = []
    for input in util.inputlist(urls, settings=settings):
        notifier(input)
        instream = Input(input, open=worker.opener('map', 'in', params))
        if instream:
            inputs.append(instream)

    return SortedIterator(inputs)
Пример #4
0
def sorted_iterator(urls,
                    reader=func.chain_reader,
                    input_stream=(func.map_input_stream,),
                    notifier=func.notifier,
                    params=None,
                    ddfs=None):

    from disco.worker import Input
    from disco.worker.classic.worker import Worker

    worker = Worker(map_reader=reader, map_input_stream=input_stream)
    settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings()

    inputs = []
    for input in util.inputlist(urls, settings=settings):
        notifier(input)
        instream = Input(input, open=worker.opener('map', 'in', params))
        if instream:
            inputs.append(instream)

    return SortedIterator(inputs)