def test_write_pipeline(self): """Test that the provided entity gets written to the repository.""" entity = PipelineEntity.get_mock_object() with self.assertRaises(ResourceNotFoundException): PipelineService.fetch_pipeline_by_id(entity.id) PipelineService.write_pipeline(entity) self.assertEquals( PipelineService.fetch_pipeline_by_id(entity.id), entity, )
def test_run_pipeline_by_id(self, mock_graph, mock_process): node1_uuid = uuid.UUID('8bce531f-1c75-4526-b584-62a9e716933f') node2_uuid = uuid.UUID('de1b8346-0a26-4aad-b245-bb5cabf0daed') node3_uuid = uuid.UUID('cd227069-66cc-4ed3-8112-1cb2e85c4917') mock_graph.return_value = { 'graph': { node1_uuid: set([ node2_uuid, ]), node2_uuid: set([ node3_uuid, ]), }, 'source_ids': [ node1_uuid, ], } mock_process.side_effect = [ 'fake_response1', 'fake_response2', None, # last output doesn't return anything ] PipelineService.run_pipeline_by_id(self.pipeline.id) mock_graph.assert_called_once_with(self.pipeline.id) self.assertEquals(mock_process.call_count, 3) # node1.process self.assertEquals( mock_process.call_args_list[0][0], (node1_uuid, None), ) # node2.process self.assertEquals( mock_process.call_args_list[1][0], (node2_uuid, 'fake_response1'), ) # node3.process self.assertEquals( mock_process.call_args_list[2][0], (node3_uuid, 'fake_response2'), )
def test_fetch_pipeline_by_id(self, mock_fetch): """Test that you can fetch a PipelineEntity by ID.""" mock_fetch.return_value = self.pipeline entity = PipelineService.fetch_pipeline_by_id(self.pipeline.id) self.assertEquals( entity.id, self.pipeline.id, )
from ocelot.services import datastores from ocelot.services.pipeline import PipelineService if __name__ == '__main__': datastores.create_tables() datastores.initialize() # XKCD # PipelineService.run_pipeline_by_id( # '9aa97f4e-387d-4d06-ac00-f4fc344514da', # ) # Taraval PipelineService.run_pipeline_by_id( '86b48bf6-6e55-4a2e-991a-9c15f1f77b80', )
def test_run_pipeline_by_id_post_run(self, mock_post_run): """Test that pre_run_schedule is called when running the pipeline.""" PipelineService.run_pipeline_by_id(self.pipeline.id) mock_post_run.assert_called_once_with(self.pipeline.id)
def test_stop_processing_exception_only_stops_one_path( self, mock_graph, mock_process, ): """Test that if a StopProcessingException is encountered that it only stops one path. Given: A -> B -> C D -> E If B raises a `StopProcessingException`, D and E should still be called but C should not. """ a = uuid.UUID('a722396f-8890-438c-a553-408b494491e9') b = uuid.UUID('45e81c1a-361a-4276-9df7-29cea6b0f6f2') c = uuid.UUID('8c55b14b-75d4-4e6e-a7c9-a1e325279d77') d = uuid.UUID('ec7b034f-2ab1-45b4-8da4-c12db71191bc') e = uuid.UUID('d8c8b029-5ca7-4602-95b9-af720de934e8') graph = { 'graph': { a: set([ b, d, ]), b: set([ c, ]), d: set([ e, ]) }, 'source_ids': [ a, ], } def fake_process(task_id, data): if task_id == a: return 'a_response' elif task_id == b: raise StopProcessingException elif task_id == d: return 'd_response' elif task_id == e: return None # last doesn't return anything mock_process.side_effect = fake_process mock_graph.return_value = graph PipelineService.run_pipeline_by_id(self.pipeline.id) self.assertEquals(mock_process.call_count, 4) # a.process self.assertEquals( mock_process.call_args_list[0][0], (a, None), ) # b.process self.assertEquals( mock_process.call_args_list[1][0], (b, 'a_response'), ) # d.process self.assertEquals( mock_process.call_args_list[2][0], (d, 'a_response'), ) # e.process self.assertEquals( mock_process.call_args_list[3][0], (e, 'd_response'), )
def import_pipeline(cls, pipeline_data): if not pipeline_data.get('pipeline'): raise Exception('pipeline data is required') if not pipeline_data.get('pipeline_schedule'): raise Exception('pipeline schedule data is required') if not pipeline_data.get('tasks'): raise Exception('task data is required') if not pipeline_data.get('task_connections'): raise Exception('task connections are required') # Create Pipeline pipeline_entity = PipelineEntity(pipeline_data['pipeline']) pipeline_entity.validate() # Create PipelineSchedule pipeline_schedule_entity = PipelineScheduleEntity(pipeline_data['pipeline_schedule']) pipeline_schedule_entity.pipeline_id = pipeline_entity.id pipeline_schedule_entity.next_run_at = ( PipelineScheduleService.calculate_next_run_at_for_schedule( pipeline_schedule_entity ) ) pipeline_schedule_entity.validate() # Create Tasks task_entities = [] task_alias_to_id = {} for task_alias, task_data in pipeline_data['tasks'].items(): task_entity = TaskEntity(task_data) task_entity.validate() task_entities.append(task_entity) task_alias_to_id[task_alias] = task_entity.id # Create TaskConnections task_connection_entities = [] for from_alias, to_aliases in pipeline_data['task_connections'].items(): from_task_id = task_alias_to_id[from_alias] for to_alias in to_aliases: task_connection_entity = TaskConnectionEntity({ 'from_task_id': from_task_id, 'to_task_id': task_alias_to_id[to_alias], 'pipeline_id': pipeline_entity.id, }) task_connection_entity.validate() task_connection_entities.append(task_connection_entity) # Save Pipeline PipelineService.write_pipeline(pipeline_entity) # Save PipelineSchedule (enabled = False) PipelineScheduleService.write_pipeline_schedule(pipeline_schedule_entity) # Save Tasks for task_entity in task_entities: TaskService.write_task(task_entity) # Save TaskConnections for task_connection_entity in task_connection_entities: TaskConnectionService.write_task_connection(task_connection_entity) return pipeline_entity
from ocelot.lib import logging from ocelot.services import datastores from ocelot.services.pipeline import PipelineService from ocelot.services.pipeline_schedule import PipelineScheduleService log = logging.getLogger('ocelot.scheduler') SLEEP_SECONDS = 10 if __name__ == '__main__': datastores.create_tables() datastores.initialize() log.info('Starting scheduler') try: while True: pipeline_schedules = PipelineScheduleService.fetch_schedules_to_run() log.info('Found {} pipelines to run'.format(len(pipeline_schedules))) for schedule in PipelineScheduleService.fetch_schedules_to_run(): PipelineService.run_pipeline_by_id(schedule.pipeline_id) time.sleep(SLEEP_SECONDS) except KeyboardInterrupt: pass log.info('Shutting down scheduler')