def send_pipeline_and_wait_finished(): pipeline_manager = PipelineManager(api=API_ADDRESS, broadcast=BROADCAST_ADDRESS) pipelines = [] for i in range(10): pipeline = Pipeline( { Job(u'worker_1'): Job(u'worker_2'), Job(u'worker_2'): Job(u'worker_3') }, data={'index': i}) pipeline_manager.start(pipeline) pipelines.append(pipeline) assert pipeline_manager.started_pipelines == 10 assert pipeline_manager.finished_pipelines == 0 start = time() pipeline_manager.finished(pipelines[0]) # only for testing this method while pipeline_manager.finished_pipelines < pipeline_manager.started_pipelines: pipeline_manager.update(0.5) end = time() pipeline_manager.disconnect() return { 'duration': pipeline.duration, 'real_duration': end - start, 'finished_pipelines': pipeline_manager.finished_pipelines, 'started_pipelines': pipeline_manager.started_pipelines }
def verify_PipelineManager_exceptions(): pipeline_1 = Pipeline({ Job(u'worker_1'): Job(u'worker_2'), Job(u'worker_2'): Job(u'worker_3') }) pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')}) pipeline_manager = PipelineManager(api=API_ADDRESS, broadcast=BROADCAST_ADDRESS) pipeline_manager.start(pipeline_1) raise_1, raise_2 = False, False try: pipeline_manager.start(pipeline_1) except ValueError: raise_1 = True try: pipeline_manager.finished(pipeline_2) except ValueError: raise_2 = True pipeline_manager.disconnect() return { 'raise_1': raise_1, 'raise_2': raise_2, 'started_at': pipeline_1.started_at }
def verify_PipelineManager_exceptions(): pipeline_1 = Pipeline({Job(u'worker_1'): Job(u'worker_2'), Job(u'worker_2'): Job(u'worker_3')}) pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')}) pipeline_manager = PipelineManager(api=API_ADDRESS, broadcast=BROADCAST_ADDRESS) pipeline_manager.start(pipeline_1) raise_1, raise_2 = False, False try: pipeline_manager.start(pipeline_1) except ValueError: raise_1 = True try: pipeline_manager.finished(pipeline_2) except ValueError: raise_2 = True pipeline_manager.disconnect() return {'raise_1': raise_1, 'raise_2': raise_2, 'started_at': pipeline_1.started_at}
def verify_PipelineManager_exceptions(): pipeline_1 = Pipeline({Job(u'worker_1'): Job(u'worker_2'), Job(u'worker_2'): Job(u'worker_3')}) pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')}) pipeline_manager = PipelineManager(api='tcp://localhost:5550', broadcast='tcp://localhost:5551') pipeline_manager.start(pipeline_1) raise_1, raise_2 = False, False try: pipeline_manager.start(pipeline_1) except ValueError: raise_1 = True try: pipeline_manager.finished(pipeline_2) except ValueError: raise_2 = True pipeline_manager.disconnect() return {'raise_1': raise_1, 'raise_2': raise_2, 'started_at': pipeline_1.started_at}
def send_pipeline_and_wait_finished(): pipeline_manager = PipelineManager(api=API_ADDRESS, broadcast=BROADCAST_ADDRESS) pipelines = [] for i in range(10): pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'), Job(u'worker_2'): Job(u'worker_3')}, data={'index': i}) pipeline_manager.start(pipeline) pipelines.append(pipeline) assert pipeline_manager.started_pipelines == 10 assert pipeline_manager.finished_pipelines == 0 start = time() pipeline_manager.finished(pipelines[0]) # only for testing this method while pipeline_manager.finished_pipelines < pipeline_manager.started_pipelines: pipeline_manager.update(0.5) end = time() pipeline_manager.disconnect() return {'duration': pipeline.duration, 'real_duration': end - start, 'finished_pipelines': pipeline_manager.finished_pipelines, 'started_pipelines': pipeline_manager.started_pipelines}
def send_pipeline_and_wait_finished(): import time pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'), Job(u'worker_2'): Job(u'worker_3')}) pipeline_manager = PipelineManager(api='tcp://localhost:5550', broadcast='tcp://localhost:5551') pipeline_manager.start(pipeline) start = time.time() while not pipeline_manager.finished(pipeline): time.sleep(0.1) end = time.time() pipeline_manager.disconnect() return {'duration': pipeline.duration, 'real_duration': end - start}
def main(): pipeline_definition = {Job('Downloader'): (Job('GetTextAndWords'), Job('GetLinks'))} urls = ['http://www.fsf.org', 'https://creativecommons.org', 'https://github.com', 'http://emap.fgv.br', 'https://twitter.com/turicas'] pipeline_manager = PipelineManager(api='tcp://127.0.0.1:5555', broadcast='tcp://127.0.0.1:5556') print 'Sending pipelines...' my_pipelines = [] for index, url in enumerate(urls): filename = '/tmp/{}.data'.format(index) data = json.dumps({'url': url}) with open(filename, 'w') as fp: fp.write(data) pipeline = Pipeline(pipeline_definition, data={'filename': filename}) pipeline_manager.start(pipeline) my_pipelines.append(pipeline) print ' Sent pipeline for url={}'.format(url) print 'Waiting for pipelines to finish...' pipelines_finished = 0 while pipelines_finished < len(urls): counter = 0 for pipeline in my_pipelines: if pipeline_manager.finished(pipeline): counter += 1 if counter != pipelines_finished: print ' # of finished pipelines: {}'.format(counter) pipelines_finished = counter durations = [str(pipeline.duration) for pipeline in my_pipelines] print 'Pipeline durations (in seconds) = {}'.format(', '.join(durations)) for index, url in enumerate(urls): filename = '/tmp/{}.data'.format(index) with open(filename) as fp: data = json.loads(fp.read()) print (' url={url}, download_duration={download_duration}, ' 'number_of_words={number_of_words}, ' 'number_of_links={number_of_links}'.format(**data))