def run_pipeline_by_id(cls, id): """Runs a pipeline by id. :param str id: """ log.info('Running pipeline: {}'.format(id)) PipelineScheduleService.pre_run_schedule(id) # get graph graph_data = TaskConnectionService.build_graph_for_pipeline(id) # process graph queue = deque() for source_id in graph_data['source_ids']: queue.appendleft((source_id, None)) while len(queue): task_id, data = queue.pop() try: task_response = TaskService.process_task_with_data(task_id, data) try: for next_id in graph_data['graph'][task_id]: queue.appendleft((next_id, task_response)) except KeyError: # end of list pass except StopProcessingException: pass PipelineScheduleService.post_run_schedule(id)
def test_write_pipeline_schedule(self): """Test that we can write an entity to the repository.""" self.install_fixture('pipeline_schedule_interval') entity = PipelineScheduleMapper.to_entity(self.pipeline_schedule_interval) entity.schedule = '150' PipelineScheduleService.write_pipeline_schedule(entity) self._assert_pipeline_attribute_equals( self.pipeline.id, 'schedule', '150', )
def _assert_pipeline_attribute_equals(self, pipeline_id, attribute, expected): schedule = PipelineScheduleService.fetch_schedule_for_pipeline( pipeline_id, ) self.assertEquals( getattr(schedule, attribute), expected, )
def test_update_next_run_at_interval_never_ran(self): """Test that next_run_at = current time + interval.""" self.install_fixture('pipeline_schedule_interval') # Update next_run_at PipelineScheduleService.update_next_run_at_for_pipeline( self.pipeline.id, ) # Assert next_run_at is updated to last_run_at + interval schedule = PipelineScheduleService.fetch_schedule_for_pipeline( self.pipeline.id, ) self._assert_pipeline_attribute_equals( self.pipeline.id, 'next_run_at', datetime.utcnow() + timedelta(seconds=int(schedule.schedule)), )
def test_update_next_run_at_cron(self): """Test that next_run_at = croniter parse.""" self.install_fixture('pipeline_schedule_cron') # Set last_run_at to expected PipelineScheduleService.update_last_run_at_for_pipeline( self.pipeline.id, ) # Update next_run_at PipelineScheduleService.update_next_run_at_for_pipeline( self.pipeline.id, ) # Assert that next_run_at is expected self._assert_pipeline_attribute_equals( self.pipeline.id, 'next_run_at', datetime(2014, 2, 1, 0, 5), )
def test_lock_schedule_for_pipeline(self): """Test that the schedule gets locked.""" self.install_fixture('pipeline_schedule_interval') # Assert not locked self._assert_pipeline_attribute_equals( self.pipeline.id, 'locked', False, ) # Lock pipeline PipelineScheduleService.lock_schedule_for_pipeline(self.pipeline.id) # Assert locked self._assert_pipeline_attribute_equals( self.pipeline.id, 'locked', True, )
def test_fetch_schedule_for_pipeline(self): """Test that you can fetch schedules for a pipeline.""" self.install_fixture('pipeline_schedule_interval') self.assertEqual( PipelineScheduleService.fetch_schedule_for_pipeline( self.pipeline.id, ), PipelineScheduleMapper.to_entity(self.pipeline_schedule_interval), )
def test_update_next_run_at_manual(self): """Test that next_run_at = none.""" self.install_fixture('pipeline_schedule_manual') # Update last_run_at PipelineScheduleService.update_last_run_at_for_pipeline( self.pipeline.id, ) # Update next_run_at PipelineScheduleService.update_next_run_at_for_pipeline( self.pipeline.id, ) # Assert next_run_at is updated to None self._assert_pipeline_attribute_equals( self.pipeline.id, 'next_run_at', None, )
def test_fetch_schedules_to_run(self): """Test that schedules that need to run are returned.""" self.install_fixture('pipeline_schedule_interval') self.assertEqual( PipelineScheduleService.fetch_schedules_to_run(), [ PipelineScheduleMapper.to_entity( self.pipeline_schedule_interval, ), ], )
def test_update_last_run_at(self): """Test that last_run_at is updated to the latest date.""" self.install_fixture('pipeline_schedule_interval') # Assert that last_run_at is empty self._assert_pipeline_attribute_equals( self.pipeline.id, 'last_run_at', None, ) # Set last_run_at to expected PipelineScheduleService.update_last_run_at_for_pipeline( self.pipeline.id, ) # Assert that last_run_at is expected self._assert_pipeline_attribute_equals( self.pipeline.id, 'last_run_at', datetime(2014, 02, 01), )
def test_post_run_updates_next_run_at(self, mock_update): """Test that next_run_at is updated.""" self.install_fixture('pipeline_schedule_interval') PipelineScheduleService.post_run_schedule(self.pipeline.id) mock_update.assert_called_once_with(self.pipeline.id)
def test_post_run_schedule_unlocks(self, mock_unlock): """Test that the schedule is locked.""" self.install_fixture('pipeline_schedule_interval') PipelineScheduleService.post_run_schedule(self.pipeline.id) mock_unlock.assert_called_once_with(self.pipeline.id)
def import_pipeline(cls, pipeline_data): if not pipeline_data.get('pipeline'): raise Exception('pipeline data is required') if not pipeline_data.get('pipeline_schedule'): raise Exception('pipeline schedule data is required') if not pipeline_data.get('tasks'): raise Exception('task data is required') if not pipeline_data.get('task_connections'): raise Exception('task connections are required') # Create Pipeline pipeline_entity = PipelineEntity(pipeline_data['pipeline']) pipeline_entity.validate() # Create PipelineSchedule pipeline_schedule_entity = PipelineScheduleEntity(pipeline_data['pipeline_schedule']) pipeline_schedule_entity.pipeline_id = pipeline_entity.id pipeline_schedule_entity.next_run_at = ( PipelineScheduleService.calculate_next_run_at_for_schedule( pipeline_schedule_entity ) ) pipeline_schedule_entity.validate() # Create Tasks task_entities = [] task_alias_to_id = {} for task_alias, task_data in pipeline_data['tasks'].items(): task_entity = TaskEntity(task_data) task_entity.validate() task_entities.append(task_entity) task_alias_to_id[task_alias] = task_entity.id # Create TaskConnections task_connection_entities = [] for from_alias, to_aliases in pipeline_data['task_connections'].items(): from_task_id = task_alias_to_id[from_alias] for to_alias in to_aliases: task_connection_entity = TaskConnectionEntity({ 'from_task_id': from_task_id, 'to_task_id': task_alias_to_id[to_alias], 'pipeline_id': pipeline_entity.id, }) task_connection_entity.validate() task_connection_entities.append(task_connection_entity) # Save Pipeline PipelineService.write_pipeline(pipeline_entity) # Save PipelineSchedule (enabled = False) PipelineScheduleService.write_pipeline_schedule(pipeline_schedule_entity) # Save Tasks for task_entity in task_entities: TaskService.write_task(task_entity) # Save TaskConnections for task_connection_entity in task_connection_entities: TaskConnectionService.write_task_connection(task_connection_entity) return pipeline_entity
from ocelot.lib import logging from ocelot.services import datastores from ocelot.services.pipeline import PipelineService from ocelot.services.pipeline_schedule import PipelineScheduleService log = logging.getLogger('ocelot.scheduler') SLEEP_SECONDS = 10 if __name__ == '__main__': datastores.create_tables() datastores.initialize() log.info('Starting scheduler') try: while True: pipeline_schedules = PipelineScheduleService.fetch_schedules_to_run() log.info('Found {} pipelines to run'.format(len(pipeline_schedules))) for schedule in PipelineScheduleService.fetch_schedules_to_run(): PipelineService.run_pipeline_by_id(schedule.pipeline_id) time.sleep(SLEEP_SECONDS) except KeyboardInterrupt: pass log.info('Shutting down scheduler')