示例#1
0
 def setUp(self):
     self.context = zmq.Context()
     self.start_router_sockets()
     self.pipeline = Pipeline({
         Job(u'worker_1'): Job(u'worker_2'),
         Job(u'worker_2'): Job(u'worker_3')
     })
示例#2
0
    def test_equal_not_equal_hash(self):
        job_1, job_2, job_3, job_4 = (Job('spam'), Job('eggs'), Job('ham'),
                                      Job('python'))
        pipeline_1 = Pipeline({job_1: job_2, job_2: (job_3, job_4)})
        pipeline_2 = Pipeline({job_1: job_2, job_2: (job_3, job_4)})
        pipeline_3 = Pipeline({job_1: job_2, job_2: job_3, job_3: job_4})
        self.assertTrue(pipeline_1 == pipeline_2)
        self.assertTrue(pipeline_2 == pipeline_1)
        self.assertTrue(pipeline_1 != pipeline_3)
        self.assertTrue(pipeline_3 != pipeline_1)

        my_set = set([pipeline_1, pipeline_2, pipeline_3])  #test __hash__
        self.assertIn(pipeline_1, my_set)
        self.assertIn(pipeline_2, my_set)
        self.assertIn(pipeline_3, my_set)

        pipeline_with_data = Pipeline({
            job_1: job_2,
            job_2: (job_3, job_4)
        },
                                      data={'python': 42})
        pipeline_with_data_2 = Pipeline({
            job_1: job_2,
            job_2: (job_3, job_4)
        },
                                        data={'python': 42})
        self.assertTrue(pipeline_with_data == pipeline_with_data_2)
        self.assertTrue(pipeline_with_data_2 == pipeline_with_data)
        self.assertTrue(pipeline_1 != pipeline_with_data)
        self.assertTrue(pipeline_with_data != pipeline_1)
示例#3
0
def verify_PipelineManager_exceptions():
    pipeline_1 = Pipeline({
        Job(u'worker_1'): Job(u'worker_2'),
        Job(u'worker_2'): Job(u'worker_3')
    })
    pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')})
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipeline_manager.start(pipeline_1)
    raise_1, raise_2 = False, False
    try:
        pipeline_manager.start(pipeline_1)
    except ValueError:
        raise_1 = True
    try:
        pipeline_manager.finished(pipeline_2)
    except ValueError:
        raise_2 = True

    pipeline_manager.disconnect()
    return {
        'raise_1': raise_1,
        'raise_2': raise_2,
        'started_at': pipeline_1.started_at
    }
示例#4
0
 def send_pipeline(self, pipeline_definition):
     pipeline = Pipeline(pipeline_definition['graph'],
                         data=pipeline_definition['data'])
     self.api.send_json({
         'pipeline': pipeline.serialize(),
         'pipeline id': pipeline_definition['pipeline id']
     })
示例#5
0
 def test_deserialize(self):
     job_1, job_2, job_3, job_4, job_5 = (Job('spam'), Job('eggs'),
                                          Job('ham'), Job('python'),
                                          Job('answer_42'))
     pipeline = Pipeline({job_1: job_2, job_2: (job_3, job_4), job_5: None},
                         data={'key': 42})
     serialized = pipeline.serialize()
     new_pipeline = Pipeline.deserialize(serialized)
     self.assertEqual(pipeline, new_pipeline)
     self.assertEqual(serialized, new_pipeline.serialize())
示例#6
0
    def test_repr(self):
        result = repr(Pipeline({Job('A'): Job('B'), Job('B'): Job('C')}))
        expected_list = []
        expected_list.append('<Pipeline: A, B, C>')
        expected_list.append('<Pipeline: A, C, B>')
        expected_list.append('<Pipeline: B, A, C>')
        expected_list.append('<Pipeline: B, C, A>')
        expected_list.append('<Pipeline: C, A, B>')
        expected_list.append('<Pipeline: C, B, A>')
        self.assertIn(result, expected_list)

        result = repr(Pipeline({Job('A'): None}, data={'a': 'test'}))
        expected = '<Pipeline: A, data=...>'
        self.assertEqual(expected, result)
示例#7
0
 def test_deserialize(self):
     job_1, job_2, job_3, job_4, job_5 = (Job('spam'), Job('eggs'),
                                          Job('ham'), Job('python'),
                                          Job('answer_42'))
     pipeline = Pipeline({
         job_1: job_2,
         job_2: (job_3, job_4),
         job_5: None
     },
                         data={'key': 42})
     serialized = pipeline.serialize()
     new_pipeline = Pipeline.deserialize(serialized)
     self.assertEqual(pipeline, new_pipeline)
     self.assertEqual(serialized, new_pipeline.serialize())
示例#8
0
def send_pipeline_and_wait_finished():
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipelines = []
    for i in range(10):
        pipeline = Pipeline(
            {
                Job(u'worker_1'): Job(u'worker_2'),
                Job(u'worker_2'): Job(u'worker_3')
            },
            data={'index': i})
        pipeline_manager.start(pipeline)
        pipelines.append(pipeline)
    assert pipeline_manager.started_pipelines == 10
    assert pipeline_manager.finished_pipelines == 0
    start = time()
    pipeline_manager.finished(pipelines[0])  # only for testing this method
    while pipeline_manager.finished_pipelines < pipeline_manager.started_pipelines:
        pipeline_manager.update(0.5)
    end = time()
    pipeline_manager.disconnect()
    return {
        'duration': pipeline.duration,
        'real_duration': end - start,
        'finished_pipelines': pipeline_manager.finished_pipelines,
        'started_pipelines': pipeline_manager.started_pipelines
    }
示例#9
0
    def test_repr(self):
        pipeline_manager = PipelineManager(api=API_ADDRESS,
                                           broadcast=BROADCAST_ADDRESS)
        pipeline_ids = [uuid4().hex for i in range(10)]
        pipeline_ids_copy = pipeline_ids[:]
        pipeline_manager.send_api_request = lambda x: None
        pipeline_manager.get_api_reply = \
                lambda: {'pipeline id': pipeline_ids.pop()}
        pipelines = [Pipeline({Job('A', data={'index': i}): Job('B')}) \
                     for i in range(10)]
        for pipeline in pipelines:
            pipeline_manager.start(pipeline)

        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 0 finished>')

        messages = [
            'pipeline finished: id={}, duration=0.1'.format(pipeline_id)
            for pipeline_id in pipeline_ids_copy[:3]
        ]
        poll = [False, True, True, True]

        def new_poll(timeout):
            return poll.pop()

        def new_broadcast_receive():
            return messages.pop()

        pipeline_manager.broadcast_poll = new_poll
        pipeline_manager.broadcast_receive = new_broadcast_receive
        pipeline_manager.update(0.1)
        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 3 finished>')
示例#10
0
    def test_str_and_save_dot(self):
        pipeline = Pipeline({Job('A'): Job('B'), Job('C'): None})
        result = str(pipeline)
        expected = dedent('''
        digraph graphname {
            "A";
            "C";
            "B";

            "A" -> "B";
            "C" -> "(None)";
        }
        ''').strip()
        self.assertEqual(result, expected)

        pipeline = Pipeline({(Job('A'), Job('B'), Job('C')): [Job('D')],
                             Job('E'): (Job('B'), Job('F'))})
        result = str(pipeline)
        expected = dedent('''
        digraph graphname {
            "A";
            "C";
            "B";
            "E";
            "D";
            "F";

            "A" -> "D";
            "B" -> "D";
            "C" -> "D";
            "E" -> "B";
            "E" -> "F";
        }
        ''').strip()

        self.assertEqual(result, expected)
        temp_file = NamedTemporaryFile(delete=False)
        temp_file.close()
        pipeline.save_dot(temp_file.name)
        temp_file = open(temp_file.name)
        file_contents = temp_file.read()
        temp_file.close()
        self.assertEqual(expected + '\n', file_contents)
        unlink(temp_file.name)
示例#11
0
 def test_jobs(self):
     result = Pipeline({
         Job('A'): [Job('B')],
         Job('B'): [Job('C'), Job('D'), Job('E')],
         Job('Z'): [Job('W')],
         Job('W'): Job('A')
     }).jobs
     expected = (Job('A'), Job('B'), Job('C'), Job('D'), Job('E'), Job('W'),
                 Job('Z'))
     self.assertEqual(set(result), set(expected))
示例#12
0
    def test_serialize(self):
        job_1, job_2, job_3, job_4 = (Job('spam'), Job('eggs'), Job('ham'),
                                      Job('python'))
        pipeline = Pipeline({job_1: job_2, job_2: (job_3, job_4)})
        result = pipeline.serialize()
        expected = {'graph': ((job_1.serialize(), job_2.serialize()),
                              (job_2.serialize(), job_3.serialize()),
                              (job_2.serialize(), job_4.serialize())),
                    'data': None}
        expected = tuple(expected.items())

        result = dict(result)
        expected = dict(expected)
        result['graph'] = dict(result['graph'])
        expected['graph'] = dict(expected['graph'])
        self.assertEqual(result, expected)

        pipeline = Pipeline({job_1: job_2}, data={'python': 42})
        self.assertEqual(pipeline, Pipeline.deserialize(pipeline.serialize()))
示例#13
0
def send_pipeline():
    pipeline = Pipeline({
        Job(u'worker_1'): Job(u'worker_2'),
        Job(u'worker_2'): Job(u'worker_3')
    })
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    before = pipeline.id
    pipeline_id = pipeline_manager.start(pipeline)
    pipeline_manager.disconnect()
    return before, pipeline_id, pipeline.id
示例#14
0
    def test_validate_graph(self):
        #should have at least one starter node
        with self.assertRaises(ValueError):
            Pipeline({Job('A'): Job('A')})
        with self.assertRaises(ValueError):
            Pipeline({Job('A'): [Job('B')], Job('B'): [Job('A')]})

        #should not have cycles
        with self.assertRaises(ValueError):
            Pipeline({
                Job('A'): [Job('B')],
                Job('B'): [Job('C')],
                Job('C'): [Job('B')]
            })
        with self.assertRaises(ValueError):
            Pipeline({
                Job('A'): [Job('B')],
                Job('B'): [Job('C')],
                Job('C'): [Job('D')],
                Job('D'): [Job('B')]
            })
示例#15
0
    def test_get_starters(self):
        result = Pipeline({Job('A'): []}).starters
        expected = (Job('A'), )
        self.assertEqual(set(result), set(expected))

        result = Pipeline({Job('A'): [], Job('B'): []}).starters
        expected = (Job('A'), Job('B'))
        self.assertEqual(set(result), set(expected))

        result = Pipeline({Job('A'): [Job('B')], Job('B'): []}).starters
        expected = (Job('A'), )
        self.assertEqual(set(result), set(expected))

        result = Pipeline({
            Job('A'): [Job('B')],
            Job('B'): [Job('C'), Job('D'), Job('E')],
            Job('Z'): [Job('W')]
        }).starters
        expected = (Job('A'), Job('Z'))
        self.assertEqual(set(result), set(expected))

        result = Pipeline({(Job('A'), Job('B'), Job('C')): Job('D')}).starters
        expected = [Job('A'), Job('B'), Job('C')]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({
            (Job('A'), Job('B'), Job('C')): [Job('D')],
            Job('E'): (Job('B'), Job('F'))
        }).starters
        expected = (Job('A'), Job('C'), Job('E'))
        self.assertEqual(set(result), set(expected))
示例#16
0
def main():
    pipeline_definition = {Job('Downloader'): (Job('GetTextAndWords'),
                                               Job('GetLinks'))}
    urls = ['http://www.fsf.org', 'https://creativecommons.org',
            'http://emap.fgv.br', 'https://twitter.com/turicas',
            'http://www.pypln.org', 'http://www.zeromq.org',
            'http://www.python.org', 'http://www.mongodb.org',
            'http://github.com', 'http://pt.wikipedia.org']

    pipeline_manager = PipelineManager(api='tcp://127.0.0.1:5555',
                                       broadcast='tcp://127.0.0.1:5556')
    print 'Sending pipelines...'
    start_time = time()
    my_pipelines = []
    for index, url in enumerate(urls):
        filename = '/tmp/{}.dat'.format(index)
        data = json.dumps({'url': url})
        with open(filename, 'w') as fp:
            fp.write(data)
        pipeline = Pipeline(pipeline_definition, data={'filename': filename})
        pipeline_manager.start(pipeline)
        print '  Sent pipeline for url={}'.format(url)

    print
    print 'Waiting for pipelines to finish...'
    total_pipelines = pipeline_manager.started_pipelines
    finished_pipelines = 0
    while finished_pipelines < total_pipelines:
        pipeline_manager.update(0.5)
        finished_pipelines = pipeline_manager.finished_pipelines
        percentual = 100 * (float(finished_pipelines) / total_pipelines)
        sys.stdout.write('\rFinished pipelines: {}/{} ({:5.2f}%)'\
                         .format(finished_pipelines, total_pipelines,
                                 percentual))
        sys.stdout.flush()
    end_time = time()
    print '\rAll pipelines finished in {} seconds'.format(end_time - start_time)

    durations = [pipeline.duration for pipeline in pipeline_manager.pipelines]
    average_duration = sum(durations) / len(durations)
    print 'Average pipeline duration (seconds) = {} (min={}, max={})'\
          .format(average_duration, min(durations), max(durations))
    print

    print 'Some data saved by store:'
    for index, url in enumerate(urls):
        filename = '/tmp/{}.dat'.format(index)
        with open(filename) as fp:
            data = json.loads(fp.read())
        print ('  url={url}, download_duration={download_duration}, '
               'number_of_words={number_of_words}, '
               'number_of_links={number_of_links}'.format(**data))
示例#17
0
 def test_pipeline_should_propagate_data_among_jobs(self):
     job_1 = Job('w1')
     job_2 = Job('w2')
     job_3 = Job('w3')
     pipeline_data = {'python': 42}
     pipeline = Pipeline({job_1: job_2, job_2: job_3}, data=pipeline_data)
     self.assertEqual(pipeline.data, pipeline_data)
     self.assertEqual(job_1.data, pipeline_data)
     self.assertEqual(job_2.data, pipeline_data)
     self.assertEqual(job_3.data, pipeline_data)
     self.assertEqual(job_1.pipeline, pipeline)
     self.assertEqual(job_2.pipeline, pipeline)
     self.assertEqual(job_3.pipeline, pipeline)
示例#18
0
    def test_should_send_add_pipeline_with_serialized_pipeline(self):
        result, pool = run_in_parallel(send_pipeline)
        message = self.api.recv_json()
        received = Pipeline.deserialize(message['pipeline']).serialize()
        expected = self.pipeline.serialize()
        self.assertEqual(set(message.keys()), set(['command', 'pipeline']))
        self.assertEqual(message['command'], 'add pipeline')
        self.assertEqual(received, expected)

        pipeline_id = uuid4().hex
        self.api.send_json({'answer': 'pipeline accepted',
                            'pipeline id': pipeline_id})
        result.get()
        pool.terminate()
示例#19
0
    def test_should_send_add_pipeline_with_serialized_pipeline(self):
        result, pool = run_in_parallel(send_pipeline)
        message = self.api.recv_json()
        received = Pipeline.deserialize(message['pipeline']).serialize()
        expected = self.pipeline.serialize()
        self.assertEqual(set(message.keys()), set(['command', 'pipeline']))
        self.assertEqual(message['command'], 'add pipeline')
        self.assertEqual(received, expected)

        pipeline_id = uuid4().hex
        self.api.send_json({
            'answer': 'pipeline accepted',
            'pipeline id': pipeline_id
        })
        result.get()
        pool.terminate()
示例#20
0
 def test_should_return_all_pipelines(self):
     pipeline_manager = PipelineManager(api=API_ADDRESS,
                                        broadcast=BROADCAST_ADDRESS)
     pipeline_manager.send_api_request = lambda x: None
     pipeline_manager.get_api_reply = lambda: {'pipeline id': uuid4().hex}
     iterations = 10
     pipelines = []
     for i in range(iterations):
         pipeline = Pipeline(
             {
                 Job(u'worker_1'): Job(u'worker_2'),
                 Job(u'worker_2'): Job(u'worker_3')
             },
             data={'index': i})
         pipeline_manager.start(pipeline)
         pipelines.append(pipeline)
     self.assertEqual(set(pipeline_manager.pipelines), set(pipelines))
示例#21
0
def main():
    stdout_write = sys.stdout.write
    stdout_flush = sys.stdout.flush
    pipeline_manager = PipelineManager(api=ROUTER_API,
                                       broadcast=ROUTER_BROADCAST)
    pipeline_definition = {Job('Dummy1'): Job('Dummy2')}
    process = psutil.Process(os.getpid())
    version = sys.argv[1]
    filename = 'test-{}_pipelines-pypelinin-{}.dat'.format(
        NUMBER_OF_PIPELINES, version)
    data = open(filename, 'w')
    my_pipelines = []
    for i in xrange(NUMBER_OF_PIPELINES):
        pipeline = Pipeline(pipeline_definition, data={'index': i})
        start_time = time()
        pipeline_manager.start(pipeline)
        end_time = time()
        my_pipelines.append(pipeline)
        memory_info = process.get_memory_info()
        info = (i + 1, end_time - start_time, memory_info.vms, memory_info.rss)
        data.write('{}\t{}\t{}\t{}\n'.format(*info))
        if (i + 1) % UPDATE_INTERVAL == 0:
            stdout_write('\r{} out of {}'.format(i + 1, NUMBER_OF_PIPELINES))
            stdout_flush()
    stdout_write('\rfinished sending pipelines! \o/\n')

    stdout_write('Waiting for pipelines to finish...\n')
    pipelines_finished = 0
    finished = pipeline_manager.finished
    while pipelines_finished < NUMBER_OF_PIPELINES:
        finished(my_pipelines[0])  # just need one call to update state of all
        counter = [pipeline.finished for pipeline in my_pipelines].count(True)
        if counter != pipelines_finished:
            stdout_write('\r # of finished pipelines: {}/{}'.format(
                counter, NUMBER_OF_PIPELINES))
            stdout_flush()
            pipelines_finished = counter
    stdout_write('\n')
    data.close()
示例#22
0
    def test_str_and_save_dot(self):
        pipeline = Pipeline({Job('A'): Job('B'), Job('C'): None})
        result = str(pipeline)
        expected = dedent('''
        digraph graphname {
            "A";
            "C";
            "B";

            "A" -> "B";
            "C" -> "(None)";
        }
        ''').strip()
        self.assertEqual(result, expected)

        pipeline = Pipeline({
            (Job('A'), Job('B'), Job('C')): [Job('D')],
            Job('E'): (Job('B'), Job('F'))
        })
        result = str(pipeline)
        expected = dedent('''
        digraph graphname {
            "A";
            "C";
            "B";
            "E";
            "D";
            "F";

            "A" -> "D";
            "B" -> "D";
            "C" -> "D";
            "E" -> "B";
            "E" -> "F";
        }
        ''').strip()

        self.assertEqual(result, expected)
        temp_file = NamedTemporaryFile(delete=False)
        temp_file.close()
        pipeline.save_dot(temp_file.name)
        temp_file = open(temp_file.name)
        file_contents = temp_file.read()
        temp_file.close()
        self.assertEqual(expected + '\n', file_contents)
        unlink(temp_file.name)
示例#23
0
    def test_serialize(self):
        job_1, job_2, job_3, job_4 = (Job('spam'), Job('eggs'), Job('ham'),
                                      Job('python'))
        pipeline = Pipeline({job_1: job_2, job_2: (job_3, job_4)})
        result = pipeline.serialize()
        expected = {
            'graph':
            ((job_1.serialize(), job_2.serialize()),
             (job_2.serialize(), job_3.serialize()), (job_2.serialize(),
                                                      job_4.serialize())),
            'data':
            None
        }
        expected = tuple(expected.items())

        result = dict(result)
        expected = dict(expected)
        result['graph'] = dict(result['graph'])
        expected['graph'] = dict(expected['graph'])
        self.assertEqual(result, expected)

        pipeline = Pipeline({job_1: job_2}, data={'python': 42})
        self.assertEqual(pipeline, Pipeline.deserialize(pipeline.serialize()))
示例#24
0
 def send_pipeline(self, pipeline_definition):
     pipeline = Pipeline(pipeline_definition['graph'],
                         data=pipeline_definition['data'])
     self.api.send_json({'pipeline': pipeline.serialize(),
                         'pipeline id': pipeline_definition['pipeline id']})
示例#25
0
class PipelineManagerTest(unittest.TestCase):
    def setUp(self):
        self.context = zmq.Context()
        self.start_router_sockets()
        self.pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                                  Job(u'worker_2'): Job(u'worker_3')})

    def tearDown(self):
        self.close_sockets()
        self.context.term()

    def start_router_sockets(self):
        self.api = self.context.socket(zmq.REP)
        self.broadcast = self.context.socket(zmq.PUB)
        self.api.bind(API_ADDRESS)
        self.broadcast.bind(BROADCAST_ADDRESS)

    def close_sockets(self):
        self.api.close()
        self.broadcast.close()

    def test_repr(self):
        pipeline_manager = PipelineManager(api=API_ADDRESS,
                                           broadcast=BROADCAST_ADDRESS)
        pipeline_ids = [uuid4().hex for i in range(10)]
        pipeline_ids_copy = pipeline_ids[:]
        pipeline_manager.send_api_request = lambda x: None
        pipeline_manager.get_api_reply = \
                lambda: {'pipeline id': pipeline_ids.pop()}
        pipelines = [Pipeline({Job('A', data={'index': i}): Job('B')}) \
                     for i in range(10)]
        for pipeline in pipelines:
            pipeline_manager.start(pipeline)

        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 0 finished>')

        messages = ['pipeline finished: id={}, duration=0.1'.format(pipeline_id)
                    for pipeline_id in pipeline_ids_copy[:3]]
        poll = [False, True, True, True]
        def new_poll(timeout):
            return poll.pop()
        def new_broadcast_receive():
            return messages.pop()
        pipeline_manager.broadcast_poll = new_poll
        pipeline_manager.broadcast_receive = new_broadcast_receive
        pipeline_manager.update(0.1)
        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 3 finished>')

    def test_should_send_add_pipeline_with_serialized_pipeline(self):
        result, pool = run_in_parallel(send_pipeline)
        message = self.api.recv_json()
        received = Pipeline.deserialize(message['pipeline']).serialize()
        expected = self.pipeline.serialize()
        self.assertEqual(set(message.keys()), set(['command', 'pipeline']))
        self.assertEqual(message['command'], 'add pipeline')
        self.assertEqual(received, expected)

        pipeline_id = uuid4().hex
        self.api.send_json({'answer': 'pipeline accepted',
                            'pipeline id': pipeline_id})
        result.get()
        pool.terminate()

    def test_should_save_pipeline_id_on_pipeline_object(self):
        result, pool = run_in_parallel(send_pipeline)
        message = self.api.recv_json()
        pipeline_id = uuid4().hex
        self.api.send_json({'answer': 'pipeline accepted',
                            'pipeline id': pipeline_id})
        received = result.get()
        pool.terminate()
        self.assertEqual(received, (None, pipeline_id, pipeline_id))

    def test_should_subscribe_to_broadcast_to_wait_for_finished_pipeline(self):
        result, pool = run_in_parallel(send_pipeline_and_wait_finished)
        pipeline_ids = []
        for i in range(10):
            message = self.api.recv_json()
            pipeline_id = uuid4().hex
            self.api.send_json({'answer': 'pipeline accepted',
                                'pipeline id': pipeline_id})
            pipeline_ids.append(pipeline_id)
        sleep(1)
        for pipeline_id in pipeline_ids:
            self.broadcast.send('pipeline finished: id={}, duration=1.23456'\
                                .format(pipeline_id))
        received = result.get()
        pool.terminate()
        self.assertEqual(received['duration'], 1.23456)
        self.assertTrue(received['real_duration'] > 1)
        self.assertTrue(received['finished_pipelines'], 10)
        self.assertTrue(received['started_pipelines'], 10)

    def test_should_raise_ValueError_in_some_cases(self):
        result, pool = run_in_parallel(verify_PipelineManager_exceptions)
        message = self.api.recv_json()
        pipeline_id = uuid4().hex
        self.api.send_json({'answer': 'pipeline accepted',
                            'pipeline id': pipeline_id})
        start_time = time()
        received = result.get()
        pool.terminate()
        self.assertTrue(received['raise_1'])
        self.assertTrue(received['raise_2'])
        started_at = received['started_at']
        self.assertTrue(start_time - 0.1 <= started_at <= start_time + 0.1)

    def test_should_return_all_pipelines(self):
        pipeline_manager = PipelineManager(api=API_ADDRESS,
                                           broadcast=BROADCAST_ADDRESS)
        pipeline_manager.send_api_request = lambda x: None
        pipeline_manager.get_api_reply = lambda: {'pipeline id': uuid4().hex}
        iterations = 10
        pipelines = []
        for i in range(iterations):
            pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                                 Job(u'worker_2'): Job(u'worker_3')},
                                data={'index': i})
            pipeline_manager.start(pipeline)
            pipelines.append(pipeline)
        self.assertEqual(set(pipeline_manager.pipelines), set(pipelines))
示例#26
0
 def setUp(self):
     self.context = zmq.Context()
     self.start_router_sockets()
     self.pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                               Job(u'worker_2'): Job(u'worker_3')})
示例#27
0
def create_pipeline(data):
    manager = PipelineManager(settings.ROUTER_API, settings.ROUTER_BROADCAST)
    pipeline = Pipeline(default_pipeline, data=data)
    manager.start(pipeline)
示例#28
0
 def test_only_accept_Job_objects(self):
     with self.assertRaises(ValueError):
         Pipeline({'test': 123})
示例#29
0
    def test_normalize(self):
        result = Pipeline({Job('A'): Job('B')})._graph
        expected = [(Job('A'), Job('B'))]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({Job('A'): [Job('B')]})._graph
        expected = [(Job('A'), Job('B'))]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({(Job('A'), ): (Job('B'), )})._graph
        expected = [(Job('A'), Job('B'))]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({(Job('A'), Job('C')): Job('B')})._graph
        expected = [(Job('A'), Job('B')), (Job('C'), Job('B'))]
        self.assertEqual(set(result), set(expected))

        graph = {(Job('A'), Job('C')): [Job('B'), Job('D'), Job('E')]}
        result = Pipeline(graph)._graph
        expected = [(Job('A'), Job('B')), (Job('A'), Job('D')),
                    (Job('A'), Job('E')), (Job('C'), Job('B')),
                    (Job('C'), Job('D')), (Job('C'), Job('E'))]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({
            Job('ABC'): []
        })._graph  # problem here if use string
        expected = [(Job('ABC'), None)]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({Job('A'): [], Job('B'): []})._graph
        expected = [(Job('A'), None), (Job('B'), None)]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({Job('A'): [Job('B')], Job('B'): []})._graph
        expected = [(Job('A'), Job('B')), (Job('B'), None)]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({
            Job('QWE'): [Job('B')],
            Job('B'): [Job('C'), Job('D'), Job('E')],
            Job('Z'): [Job('W')]
        })._graph
        expected = [(Job('QWE'), Job('B')), (Job('B'), Job('C')),
                    (Job('B'), Job('D')), (Job('B'), Job('E')),
                    (Job('Z'), Job('W'))]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({(Job('A'), Job('B'), Job('C')): [Job('D')]})._graph
        expected = [(Job('A'), Job('D')), (Job('B'), Job('D')),
                    (Job('C'), Job('D'))]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({
            (Job('A'), Job('B'), Job('C')): [Job('D')],
            Job('E'): (Job('B'), Job('F'))
        })._graph
        expected = [(Job('A'), Job('D')), (Job('B'), Job('D')),
                    (Job('C'), Job('D')), (Job('E'), Job('B')),
                    (Job('E'), Job('F'))]
        self.assertEqual(set(result), set(expected))
示例#30
0
 def test_default_attributes(self):
     pipeline = Pipeline({Job('test'): None})
     self.assertEqual(pipeline.data, None)
     self.assertEqual(pipeline.id, None)
     self.assertEqual(pipeline.jobs, (Job('test'), ))
     self.assertEqual(pipeline.sent_jobs, set())
示例#31
0
class PipelineManagerTest(unittest.TestCase):
    def setUp(self):
        self.context = zmq.Context()
        self.start_router_sockets()
        self.pipeline = Pipeline({
            Job(u'worker_1'): Job(u'worker_2'),
            Job(u'worker_2'): Job(u'worker_3')
        })

    def tearDown(self):
        self.close_sockets()
        self.context.term()

    def start_router_sockets(self):
        self.api = self.context.socket(zmq.REP)
        self.broadcast = self.context.socket(zmq.PUB)
        self.api.bind(API_ADDRESS)
        self.broadcast.bind(BROADCAST_ADDRESS)

    def close_sockets(self):
        self.api.close()
        self.broadcast.close()

    def test_repr(self):
        pipeline_manager = PipelineManager(api=API_ADDRESS,
                                           broadcast=BROADCAST_ADDRESS)
        pipeline_ids = [uuid4().hex for i in range(10)]
        pipeline_ids_copy = pipeline_ids[:]
        pipeline_manager.send_api_request = lambda x: None
        pipeline_manager.get_api_reply = \
                lambda: {'pipeline id': pipeline_ids.pop()}
        pipelines = [Pipeline({Job('A', data={'index': i}): Job('B')}) \
                     for i in range(10)]
        for pipeline in pipelines:
            pipeline_manager.start(pipeline)

        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 0 finished>')

        messages = [
            'pipeline finished: id={}, duration=0.1'.format(pipeline_id)
            for pipeline_id in pipeline_ids_copy[:3]
        ]
        poll = [False, True, True, True]

        def new_poll(timeout):
            return poll.pop()

        def new_broadcast_receive():
            return messages.pop()

        pipeline_manager.broadcast_poll = new_poll
        pipeline_manager.broadcast_receive = new_broadcast_receive
        pipeline_manager.update(0.1)
        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 3 finished>')

    def test_should_send_add_pipeline_with_serialized_pipeline(self):
        result, pool = run_in_parallel(send_pipeline)
        message = self.api.recv_json()
        received = Pipeline.deserialize(message['pipeline']).serialize()
        expected = self.pipeline.serialize()
        self.assertEqual(set(message.keys()), set(['command', 'pipeline']))
        self.assertEqual(message['command'], 'add pipeline')
        self.assertEqual(received, expected)

        pipeline_id = uuid4().hex
        self.api.send_json({
            'answer': 'pipeline accepted',
            'pipeline id': pipeline_id
        })
        result.get()
        pool.terminate()

    def test_should_save_pipeline_id_on_pipeline_object(self):
        result, pool = run_in_parallel(send_pipeline)
        message = self.api.recv_json()
        pipeline_id = uuid4().hex
        self.api.send_json({
            'answer': 'pipeline accepted',
            'pipeline id': pipeline_id
        })
        received = result.get()
        pool.terminate()
        self.assertEqual(received, (None, pipeline_id, pipeline_id))

    def test_should_subscribe_to_broadcast_to_wait_for_finished_pipeline(self):
        result, pool = run_in_parallel(send_pipeline_and_wait_finished)
        pipeline_ids = []
        for i in range(10):
            message = self.api.recv_json()
            pipeline_id = uuid4().hex
            self.api.send_json({
                'answer': 'pipeline accepted',
                'pipeline id': pipeline_id
            })
            pipeline_ids.append(pipeline_id)
        sleep(1)
        for pipeline_id in pipeline_ids:
            self.broadcast.send('pipeline finished: id={}, duration=1.23456'\
                                .format(pipeline_id))
        received = result.get()
        pool.terminate()
        self.assertEqual(received['duration'], 1.23456)
        self.assertTrue(received['real_duration'] > 1)
        self.assertTrue(received['finished_pipelines'], 10)
        self.assertTrue(received['started_pipelines'], 10)

    def test_should_raise_ValueError_in_some_cases(self):
        result, pool = run_in_parallel(verify_PipelineManager_exceptions)
        message = self.api.recv_json()
        pipeline_id = uuid4().hex
        self.api.send_json({
            'answer': 'pipeline accepted',
            'pipeline id': pipeline_id
        })
        start_time = time()
        received = result.get()
        pool.terminate()
        self.assertTrue(received['raise_1'])
        self.assertTrue(received['raise_2'])
        started_at = received['started_at']
        self.assertTrue(start_time - 0.1 <= started_at <= start_time + 0.1)

    def test_should_return_all_pipelines(self):
        pipeline_manager = PipelineManager(api=API_ADDRESS,
                                           broadcast=BROADCAST_ADDRESS)
        pipeline_manager.send_api_request = lambda x: None
        pipeline_manager.get_api_reply = lambda: {'pipeline id': uuid4().hex}
        iterations = 10
        pipelines = []
        for i in range(iterations):
            pipeline = Pipeline(
                {
                    Job(u'worker_1'): Job(u'worker_2'),
                    Job(u'worker_2'): Job(u'worker_3')
                },
                data={'index': i})
            pipeline_manager.start(pipeline)
            pipelines.append(pipeline)
        self.assertEqual(set(pipeline_manager.pipelines), set(pipelines))
示例#32
0
class PipelineManagerTest(unittest.TestCase):
    def setUp(self):
        self.context = zmq.Context()
        self.start_router_sockets()
        self.pipeline = Pipeline({Job(u'worker_1'): Job(u'worker_2'),
                                  Job(u'worker_2'): Job(u'worker_3')})

    def tearDown(self):
        self.close_sockets()
        self.context.term()

    def start_router_sockets(self):
        self.api = self.context.socket(zmq.REP)
        self.broadcast = self.context.socket(zmq.PUB)
        self.api.bind('tcp://127.0.0.1:5550')
        self.broadcast.bind('tcp://127.0.0.1:5551')

    def close_sockets(self):
        self.api.close()
        self.broadcast.close()

    def test_should_send_add_pipeline_with_serialized_pipeline(self):
        result, pool = run_in_parallel(send_pipeline)
        message = self.api.recv_json()
        received = Pipeline.deserialize(message['pipeline']).serialize()
        expected = self.pipeline.serialize()
        self.assertEqual(set(message.keys()), set(['command', 'pipeline']))
        self.assertEqual(message['command'], 'add pipeline')
        self.assertEqual(received, expected)

        pipeline_id = uuid4().hex
        self.api.send_json({'answer': 'pipeline accepted',
                            'pipeline id': pipeline_id})
        result.get()
        pool.terminate()

    def test_should_save_pipeline_id_on_pipeline_object(self):
        result, pool = run_in_parallel(send_pipeline)
        message = self.api.recv_json()
        pipeline_id = uuid4().hex
        self.api.send_json({'answer': 'pipeline accepted',
                            'pipeline id': pipeline_id})
        received = result.get()
        pool.terminate()
        self.assertEqual(received, (None, pipeline_id, pipeline_id))

    def test_should_subscribe_to_broadcast_to_wait_for_finished_pipeline(self):
        result, pool = run_in_parallel(send_pipeline_and_wait_finished)
        message = self.api.recv_json()
        pipeline_id = uuid4().hex
        self.api.send_json({'answer': 'pipeline accepted',
                            'pipeline id': pipeline_id})
        sleep(1)
        self.broadcast.send('pipeline finished: id={}, duration=1.23456'\
                            .format(pipeline_id))
        received = result.get()
        pool.terminate()
        self.assertEqual(received['duration'], 1.23456)
        self.assertTrue(received['real_duration'] > 1)

    def test_should_raise_ValueError_in_some_cases(self):
        result, pool = run_in_parallel(verify_PipelineManager_exceptions)
        message = self.api.recv_json()
        pipeline_id = uuid4().hex
        self.api.send_json({'answer': 'pipeline accepted',
                            'pipeline id': pipeline_id})
        start_time = time()
        received = result.get()
        pool.terminate()
        self.assertTrue(received['raise_1'])
        self.assertTrue(received['raise_2'])
        started_at = received['started_at']
        self.assertTrue(start_time - 0.1 <= started_at <= start_time + 0.1)