Пример #1
0
def verify_PipelineManager_exceptions():
    pipeline_1 = Pipeline({
        Job(u'worker_1'): Job(u'worker_2'),
        Job(u'worker_2'): Job(u'worker_3')
    })
    pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')})
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipeline_manager.start(pipeline_1)
    raise_1, raise_2 = False, False
    try:
        pipeline_manager.start(pipeline_1)
    except ValueError:
        raise_1 = True
    try:
        pipeline_manager.finished(pipeline_2)
    except ValueError:
        raise_2 = True

    pipeline_manager.disconnect()
    return {
        'raise_1': raise_1,
        'raise_2': raise_2,
        'started_at': pipeline_1.started_at
    }
Пример #2
0
def send_pipeline_and_wait_finished():
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipelines = []
    for i in range(10):
        pipeline = Pipeline(
            {
                Job(u'worker_1'): Job(u'worker_2'),
                Job(u'worker_2'): Job(u'worker_3')
            },
            data={'index': i})
        pipeline_manager.start(pipeline)
        pipelines.append(pipeline)
    assert pipeline_manager.started_pipelines == 10
    assert pipeline_manager.finished_pipelines == 0
    start = time()
    pipeline_manager.finished(pipelines[0])  # only for testing this method
    while pipeline_manager.finished_pipelines < pipeline_manager.started_pipelines:
        pipeline_manager.update(0.5)
    end = time()
    pipeline_manager.disconnect()
    return {
        'duration': pipeline.duration,
        'real_duration': end - start,
        'finished_pipelines': pipeline_manager.finished_pipelines,
        'started_pipelines': pipeline_manager.started_pipelines
    }
Пример #3
0
    def test_repr(self):
        pipeline_manager = PipelineManager(api=API_ADDRESS,
                                           broadcast=BROADCAST_ADDRESS)
        pipeline_ids = [uuid4().hex for i in range(10)]
        pipeline_ids_copy = pipeline_ids[:]
        pipeline_manager.send_api_request = lambda x: None
        pipeline_manager.get_api_reply = \
                lambda: {'pipeline id': pipeline_ids.pop()}
        pipelines = [Pipeline({Job('A', data={'index': i}): Job('B')}) \
                     for i in range(10)]
        for pipeline in pipelines:
            pipeline_manager.start(pipeline)

        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 0 finished>')

        messages = [
            'pipeline finished: id={}, duration=0.1'.format(pipeline_id)
            for pipeline_id in pipeline_ids_copy[:3]
        ]
        poll = [False, True, True, True]

        def new_poll(timeout):
            return poll.pop()

        def new_broadcast_receive():
            return messages.pop()

        pipeline_manager.broadcast_poll = new_poll
        pipeline_manager.broadcast_receive = new_broadcast_receive
        pipeline_manager.update(0.1)
        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 3 finished>')
Пример #4
0
def send_pipeline():
    pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                         Job(u'worker_2'): Job(u'worker_3')})
    pipeline_manager = PipelineManager(api='tcp://localhost:5550',
                                       broadcast='tcp://localhost:5551')
    before = pipeline.id
    pipeline_id = pipeline_manager.start(pipeline)
    pipeline_manager.disconnect()
    return before, pipeline_id, pipeline.id
Пример #5
0
def send_pipeline():
    pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                         Job(u'worker_2'): Job(u'worker_3')})
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    before = pipeline.id
    pipeline_id = pipeline_manager.start(pipeline)
    pipeline_manager.disconnect()
    return before, pipeline_id, pipeline.id
Пример #6
0
    def test_repr(self):
        pipeline_manager = PipelineManager(api=API_ADDRESS,
                                           broadcast=BROADCAST_ADDRESS)
        pipeline_ids = [uuid4().hex for i in range(10)]
        pipeline_ids_copy = pipeline_ids[:]
        pipeline_manager.send_api_request = lambda x: None
        pipeline_manager.get_api_reply = \
                lambda: {'pipeline id': pipeline_ids.pop()}
        pipelines = [Pipeline({Job('A', data={'index': i}): Job('B')}) \
                     for i in range(10)]
        for pipeline in pipelines:
            pipeline_manager.start(pipeline)

        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 0 finished>')

        messages = ['pipeline finished: id={}, duration=0.1'.format(pipeline_id)
                    for pipeline_id in pipeline_ids_copy[:3]]
        poll = [False, True, True, True]
        def new_poll(timeout):
            return poll.pop()
        def new_broadcast_receive():
            return messages.pop()
        pipeline_manager.broadcast_poll = new_poll
        pipeline_manager.broadcast_receive = new_broadcast_receive
        pipeline_manager.update(0.1)
        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 3 finished>')
Пример #7
0
def send_pipeline():
    pipeline = Pipeline({
        Job(u'worker_1'): Job(u'worker_2'),
        Job(u'worker_2'): Job(u'worker_3')
    })
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    before = pipeline.id
    pipeline_id = pipeline_manager.start(pipeline)
    pipeline_manager.disconnect()
    return before, pipeline_id, pipeline.id
Пример #8
0
 def test_should_return_all_pipelines(self):
     pipeline_manager = PipelineManager(api=API_ADDRESS,
                                        broadcast=BROADCAST_ADDRESS)
     pipeline_manager.send_api_request = lambda x: None
     pipeline_manager.get_api_reply = lambda: {'pipeline id': uuid4().hex}
     iterations = 10
     pipelines = []
     for i in range(iterations):
         pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                              Job(u'worker_2'): Job(u'worker_3')},
                             data={'index': i})
         pipeline_manager.start(pipeline)
         pipelines.append(pipeline)
     self.assertEqual(set(pipeline_manager.pipelines), set(pipelines))
Пример #9
0
def main():
    pipeline_definition = {Job('Downloader'): (Job('GetTextAndWords'),
                                               Job('GetLinks'))}
    urls = ['http://www.fsf.org', 'https://creativecommons.org',
            'https://github.com', 'http://emap.fgv.br',
            'https://twitter.com/turicas']

    pipeline_manager = PipelineManager(api='tcp://127.0.0.1:5555',
                                       broadcast='tcp://127.0.0.1:5556')
    print 'Sending pipelines...'
    my_pipelines = []
    for index, url in enumerate(urls):
        filename = '/tmp/{}.data'.format(index)
        data = json.dumps({'url': url})
        with open(filename, 'w') as fp:
            fp.write(data)
        pipeline = Pipeline(pipeline_definition, data={'filename': filename})
        pipeline_manager.start(pipeline)
        my_pipelines.append(pipeline)
        print '  Sent pipeline for url={}'.format(url)

    print 'Waiting for pipelines to finish...'
    pipelines_finished = 0
    while pipelines_finished < len(urls):
        counter = 0
        for pipeline in my_pipelines:
            if pipeline_manager.finished(pipeline):
                counter += 1
        if counter != pipelines_finished:
            print ' # of finished pipelines: {}'.format(counter)
            pipelines_finished = counter

    durations = [str(pipeline.duration) for pipeline in my_pipelines]
    print 'Pipeline durations (in seconds) = {}'.format(', '.join(durations))

    for index, url in enumerate(urls):
        filename = '/tmp/{}.data'.format(index)
        with open(filename) as fp:
            data = json.loads(fp.read())
        print ('  url={url}, download_duration={download_duration}, '
               'number_of_words={number_of_words}, '
               'number_of_links={number_of_links}'.format(**data))
Пример #10
0
def main():
    stdout_write = sys.stdout.write
    stdout_flush = sys.stdout.flush
    pipeline_manager = PipelineManager(api=ROUTER_API,
                                       broadcast=ROUTER_BROADCAST)
    pipeline_definition = {Job('Dummy1'): Job('Dummy2')}
    process = psutil.Process(os.getpid())
    version = sys.argv[1]
    filename = 'test-{}_pipelines-pypelinin-{}.dat'.format(
        NUMBER_OF_PIPELINES, version)
    data = open(filename, 'w')
    my_pipelines = []
    for i in xrange(NUMBER_OF_PIPELINES):
        pipeline = Pipeline(pipeline_definition, data={'index': i})
        start_time = time()
        pipeline_manager.start(pipeline)
        end_time = time()
        my_pipelines.append(pipeline)
        memory_info = process.get_memory_info()
        info = (i + 1, end_time - start_time, memory_info.vms, memory_info.rss)
        data.write('{}\t{}\t{}\t{}\n'.format(*info))
        if (i + 1) % UPDATE_INTERVAL == 0:
            stdout_write('\r{} out of {}'.format(i + 1, NUMBER_OF_PIPELINES))
            stdout_flush()
    stdout_write('\rfinished sending pipelines! \o/\n')

    stdout_write('Waiting for pipelines to finish...\n')
    pipelines_finished = 0
    finished = pipeline_manager.finished
    while pipelines_finished < NUMBER_OF_PIPELINES:
        finished(my_pipelines[0])  # just need one call to update state of all
        counter = [pipeline.finished for pipeline in my_pipelines].count(True)
        if counter != pipelines_finished:
            stdout_write('\r # of finished pipelines: {}/{}'.format(
                counter, NUMBER_OF_PIPELINES))
            stdout_flush()
            pipelines_finished = counter
    stdout_write('\n')
    data.close()
Пример #11
0
def verify_PipelineManager_exceptions():
    pipeline_1 = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                           Job(u'worker_2'): Job(u'worker_3')})
    pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')})
    pipeline_manager = PipelineManager(api='tcp://localhost:5550',
                                       broadcast='tcp://localhost:5551')
    pipeline_manager.start(pipeline_1)
    raise_1, raise_2 = False, False
    try:
        pipeline_manager.start(pipeline_1)
    except ValueError:
        raise_1 = True
    try:
        pipeline_manager.finished(pipeline_2)
    except ValueError:
        raise_2 = True

    pipeline_manager.disconnect()
    return {'raise_1': raise_1, 'raise_2': raise_2,
            'started_at': pipeline_1.started_at}
Пример #12
0
def verify_PipelineManager_exceptions():
    pipeline_1 = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                           Job(u'worker_2'): Job(u'worker_3')})
    pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')})
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipeline_manager.start(pipeline_1)
    raise_1, raise_2 = False, False
    try:
        pipeline_manager.start(pipeline_1)
    except ValueError:
        raise_1 = True
    try:
        pipeline_manager.finished(pipeline_2)
    except ValueError:
        raise_2 = True

    pipeline_manager.disconnect()
    return {'raise_1': raise_1, 'raise_2': raise_2,
            'started_at': pipeline_1.started_at}
Пример #13
0
def send_pipeline_and_wait_finished():
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipelines = []
    for i in range(10):
        pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                             Job(u'worker_2'): Job(u'worker_3')},
                            data={'index': i})
        pipeline_manager.start(pipeline)
        pipelines.append(pipeline)
    assert pipeline_manager.started_pipelines == 10
    assert pipeline_manager.finished_pipelines == 0
    start = time()
    pipeline_manager.finished(pipelines[0]) # only for testing this method
    while pipeline_manager.finished_pipelines < pipeline_manager.started_pipelines:
        pipeline_manager.update(0.5)
    end = time()
    pipeline_manager.disconnect()
    return {'duration': pipeline.duration, 'real_duration': end - start,
            'finished_pipelines': pipeline_manager.finished_pipelines,
            'started_pipelines': pipeline_manager.started_pipelines}
Пример #14
0
def main():
    pipeline_definition = {Job('Downloader'): (Job('GetTextAndWords'),
                                               Job('GetLinks'))}
    urls = ['http://www.fsf.org', 'https://creativecommons.org',
            'http://emap.fgv.br', 'https://twitter.com/turicas',
            'http://www.pypln.org', 'http://www.zeromq.org',
            'http://www.python.org', 'http://www.mongodb.org',
            'http://github.com', 'http://pt.wikipedia.org']

    pipeline_manager = PipelineManager(api='tcp://127.0.0.1:5555',
                                       broadcast='tcp://127.0.0.1:5556')
    print 'Sending pipelines...'
    start_time = time()
    my_pipelines = []
    for index, url in enumerate(urls):
        filename = '/tmp/{}.dat'.format(index)
        data = json.dumps({'url': url})
        with open(filename, 'w') as fp:
            fp.write(data)
        pipeline = Pipeline(pipeline_definition, data={'filename': filename})
        pipeline_manager.start(pipeline)
        print '  Sent pipeline for url={}'.format(url)

    print
    print 'Waiting for pipelines to finish...'
    total_pipelines = pipeline_manager.started_pipelines
    finished_pipelines = 0
    while finished_pipelines < total_pipelines:
        pipeline_manager.update(0.5)
        finished_pipelines = pipeline_manager.finished_pipelines
        percentual = 100 * (float(finished_pipelines) / total_pipelines)
        sys.stdout.write('\rFinished pipelines: {}/{} ({:5.2f}%)'\
                         .format(finished_pipelines, total_pipelines,
                                 percentual))
        sys.stdout.flush()
    end_time = time()
    print '\rAll pipelines finished in {} seconds'.format(end_time - start_time)

    durations = [pipeline.duration for pipeline in pipeline_manager.pipelines]
    average_duration = sum(durations) / len(durations)
    print 'Average pipeline duration (seconds) = {} (min={}, max={})'\
          .format(average_duration, min(durations), max(durations))
    print

    print 'Some data saved by store:'
    for index, url in enumerate(urls):
        filename = '/tmp/{}.dat'.format(index)
        with open(filename) as fp:
            data = json.loads(fp.read())
        print ('  url={url}, download_duration={download_duration}, '
               'number_of_words={number_of_words}, '
               'number_of_links={number_of_links}'.format(**data))
Пример #15
0
def send_pipeline_and_wait_finished():
    import time

    pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                         Job(u'worker_2'): Job(u'worker_3')})
    pipeline_manager = PipelineManager(api='tcp://localhost:5550',
                                       broadcast='tcp://localhost:5551')
    pipeline_manager.start(pipeline)
    start = time.time()
    while not pipeline_manager.finished(pipeline):
        time.sleep(0.1)
    end = time.time()
    pipeline_manager.disconnect()
    return {'duration': pipeline.duration, 'real_duration': end - start}
Пример #16
0
 def test_should_return_all_pipelines(self):
     pipeline_manager = PipelineManager(api=API_ADDRESS,
                                        broadcast=BROADCAST_ADDRESS)
     pipeline_manager.send_api_request = lambda x: None
     pipeline_manager.get_api_reply = lambda: {'pipeline id': uuid4().hex}
     iterations = 10
     pipelines = []
     for i in range(iterations):
         pipeline = Pipeline(
             {
                 Job(u'worker_1'): Job(u'worker_2'),
                 Job(u'worker_2'): Job(u'worker_3')
             },
             data={'index': i})
         pipeline_manager.start(pipeline)
         pipelines.append(pipeline)
     self.assertEqual(set(pipeline_manager.pipelines), set(pipelines))
Пример #17
0
def create_pipeline(data):
    manager = PipelineManager(settings.ROUTER_API, settings.ROUTER_BROADCAST)
    pipeline = Pipeline(default_pipeline, data=data)
    manager.start(pipeline)