示例#1
0
def send_pipeline_and_wait_finished():
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipelines = []
    for i in range(10):
        pipeline = Pipeline(
            {
                Job(u'worker_1'): Job(u'worker_2'),
                Job(u'worker_2'): Job(u'worker_3')
            },
            data={'index': i})
        pipeline_manager.start(pipeline)
        pipelines.append(pipeline)
    assert pipeline_manager.started_pipelines == 10
    assert pipeline_manager.finished_pipelines == 0
    start = time()
    pipeline_manager.finished(pipelines[0])  # only for testing this method
    while pipeline_manager.finished_pipelines < pipeline_manager.started_pipelines:
        pipeline_manager.update(0.5)
    end = time()
    pipeline_manager.disconnect()
    return {
        'duration': pipeline.duration,
        'real_duration': end - start,
        'finished_pipelines': pipeline_manager.finished_pipelines,
        'started_pipelines': pipeline_manager.started_pipelines
    }
示例#2
0
def verify_PipelineManager_exceptions():
    pipeline_1 = Pipeline({
        Job(u'worker_1'): Job(u'worker_2'),
        Job(u'worker_2'): Job(u'worker_3')
    })
    pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')})
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipeline_manager.start(pipeline_1)
    raise_1, raise_2 = False, False
    try:
        pipeline_manager.start(pipeline_1)
    except ValueError:
        raise_1 = True
    try:
        pipeline_manager.finished(pipeline_2)
    except ValueError:
        raise_2 = True

    pipeline_manager.disconnect()
    return {
        'raise_1': raise_1,
        'raise_2': raise_2,
        'started_at': pipeline_1.started_at
    }
示例#3
0
def verify_PipelineManager_exceptions():
    pipeline_1 = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                           Job(u'worker_2'): Job(u'worker_3')})
    pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')})
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipeline_manager.start(pipeline_1)
    raise_1, raise_2 = False, False
    try:
        pipeline_manager.start(pipeline_1)
    except ValueError:
        raise_1 = True
    try:
        pipeline_manager.finished(pipeline_2)
    except ValueError:
        raise_2 = True

    pipeline_manager.disconnect()
    return {'raise_1': raise_1, 'raise_2': raise_2,
            'started_at': pipeline_1.started_at}
示例#4
0
def verify_PipelineManager_exceptions():
    pipeline_1 = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                           Job(u'worker_2'): Job(u'worker_3')})
    pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')})
    pipeline_manager = PipelineManager(api='tcp://localhost:5550',
                                       broadcast='tcp://localhost:5551')
    pipeline_manager.start(pipeline_1)
    raise_1, raise_2 = False, False
    try:
        pipeline_manager.start(pipeline_1)
    except ValueError:
        raise_1 = True
    try:
        pipeline_manager.finished(pipeline_2)
    except ValueError:
        raise_2 = True

    pipeline_manager.disconnect()
    return {'raise_1': raise_1, 'raise_2': raise_2,
            'started_at': pipeline_1.started_at}
示例#5
0
def send_pipeline_and_wait_finished():
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipelines = []
    for i in range(10):
        pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                             Job(u'worker_2'): Job(u'worker_3')},
                            data={'index': i})
        pipeline_manager.start(pipeline)
        pipelines.append(pipeline)
    assert pipeline_manager.started_pipelines == 10
    assert pipeline_manager.finished_pipelines == 0
    start = time()
    pipeline_manager.finished(pipelines[0]) # only for testing this method
    while pipeline_manager.finished_pipelines < pipeline_manager.started_pipelines:
        pipeline_manager.update(0.5)
    end = time()
    pipeline_manager.disconnect()
    return {'duration': pipeline.duration, 'real_duration': end - start,
            'finished_pipelines': pipeline_manager.finished_pipelines,
            'started_pipelines': pipeline_manager.started_pipelines}
示例#6
0
def send_pipeline_and_wait_finished():
    import time

    pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                         Job(u'worker_2'): Job(u'worker_3')})
    pipeline_manager = PipelineManager(api='tcp://localhost:5550',
                                       broadcast='tcp://localhost:5551')
    pipeline_manager.start(pipeline)
    start = time.time()
    while not pipeline_manager.finished(pipeline):
        time.sleep(0.1)
    end = time.time()
    pipeline_manager.disconnect()
    return {'duration': pipeline.duration, 'real_duration': end - start}
示例#7
0
def main():
    pipeline_definition = {Job('Downloader'): (Job('GetTextAndWords'),
                                               Job('GetLinks'))}
    urls = ['http://www.fsf.org', 'https://creativecommons.org',
            'https://github.com', 'http://emap.fgv.br',
            'https://twitter.com/turicas']

    pipeline_manager = PipelineManager(api='tcp://127.0.0.1:5555',
                                       broadcast='tcp://127.0.0.1:5556')
    print 'Sending pipelines...'
    my_pipelines = []
    for index, url in enumerate(urls):
        filename = '/tmp/{}.data'.format(index)
        data = json.dumps({'url': url})
        with open(filename, 'w') as fp:
            fp.write(data)
        pipeline = Pipeline(pipeline_definition, data={'filename': filename})
        pipeline_manager.start(pipeline)
        my_pipelines.append(pipeline)
        print '  Sent pipeline for url={}'.format(url)

    print 'Waiting for pipelines to finish...'
    pipelines_finished = 0
    while pipelines_finished < len(urls):
        counter = 0
        for pipeline in my_pipelines:
            if pipeline_manager.finished(pipeline):
                counter += 1
        if counter != pipelines_finished:
            print ' # of finished pipelines: {}'.format(counter)
            pipelines_finished = counter

    durations = [str(pipeline.duration) for pipeline in my_pipelines]
    print 'Pipeline durations (in seconds) = {}'.format(', '.join(durations))

    for index, url in enumerate(urls):
        filename = '/tmp/{}.data'.format(index)
        with open(filename) as fp:
            data = json.loads(fp.read())
        print ('  url={url}, download_duration={download_duration}, '
               'number_of_words={number_of_words}, '
               'number_of_links={number_of_links}'.format(**data))