def put(self, workflow_id): parser = reqparse.RequestParser() parser.add_argument('config', type=dict, required=True, help='config is empty') parser.add_argument('forkable', type=bool, required=True, help='forkable is empty') parser.add_argument('comment') data = parser.parse_args() workflow = _get_workflow(workflow_id) if workflow.config: raise ResourceConflictException( 'Resetting workflow is not allowed') workflow.comment = data['comment'] workflow.forkable = data['forkable'] workflow.set_config(dict_to_workflow_definition(data['config'])) workflow.update_target_state(WorkflowState.READY) scheduler.wakeup(workflow_id) db.session.commit() logging.info('update workflow %d target_state to %s', workflow.id, workflow.target_state) return {'data': workflow.to_dict()}, HTTPStatus.OK
def post(self): parser = reqparse.RequestParser() parser.add_argument('name', required=True, help='name is empty') parser.add_argument('project_id', type=int, required=True, help='project_id is empty') # TODO: should verify if the config is compatible with # workflow template parser.add_argument('config', type=dict, required=True, help='config is empty') parser.add_argument('forkable', type=bool, required=True, help='forkable is empty') parser.add_argument('forked_from', type=int, required=False, help='fork from base workflow') parser.add_argument('reuse_job_names', type=list, required=False, location='json', help='fork and inherit jobs') parser.add_argument('peer_reuse_job_names', type=list, required=False, location='json', help='peer fork and inherit jobs') parser.add_argument('fork_proposal_config', type=dict, required=False, help='fork and edit peer config') parser.add_argument('comment') data = parser.parse_args() name = data['name'] if Workflow.query.filter_by(name=name).first() is not None: raise ResourceConflictException( 'Workflow {} already exists.'.format(name)) # form to proto buffer template_proto = dict_to_workflow_definition(data['config']) workflow = Workflow(name=name, comment=data['comment'], project_id=data['project_id'], forkable=data['forkable'], forked_from=data['forked_from'], state=WorkflowState.NEW, target_state=WorkflowState.READY, transaction_state=TransactionState.READY) if workflow.forked_from is not None: fork_config = dict_to_workflow_definition( data['fork_proposal_config']) # TODO: more validations if len(fork_config.job_definitions) != \ len(template_proto.job_definitions): raise InvalidArgumentException( 'Forked workflow\'s template does not match base workflow') workflow.set_fork_proposal_config(fork_config) workflow.set_reuse_job_names(data['reuse_job_names']) workflow.set_peer_reuse_job_names(data['peer_reuse_job_names']) workflow.set_config(template_proto) db.session.add(workflow) db.session.commit() logging.info('Inserted a workflow to db') scheduler.wakeup(workflow.id) return {'data': workflow.to_dict()}, HTTPStatus.CREATED
def _check_workflow_state(self, workflow_id, state, target_state, transaction_state): while True: time.sleep(1) scheduler.wakeup(workflow_id) resp = self.get_helper('/api/v2/workflows/%d' % workflow_id) if resp.status_code != HTTPStatus.OK: continue if resp.json['data']['state'] == state and \ resp.json['data']['target_state'] == target_state and \ resp.json['data']['transaction_state'] == transaction_state: return
def post(self, dataset_id: int): parser = reqparse.RequestParser() parser.add_argument('event_time', type=int) parser.add_argument('files', required=True, type=list, location='json', help=_FORMAT_ERROR_MESSAGE.format('files')) parser.add_argument('move', type=bool) parser.add_argument('comment', type=str) body = parser.parse_args() event_time = body.get('event_time') files = body.get('files') move = body.get('move', False) comment = body.get('comment') dataset = Dataset.query.filter_by(id=dataset_id).first() if dataset is None: raise NotFoundException() if event_time is None and dataset.type == DatasetType.STREAMING: raise InvalidArgumentException( details='data_batch.event_time is empty') # TODO: PSI dataset should not allow multi batches # Create batch batch = DataBatch( dataset_id=dataset.id, # Use current timestamp to fill when type is PSI event_time=datetime.datetime.fromtimestamp( event_time or datetime.datetime.now().timestamp()), comment=comment, state=BatchState.NEW, move=move, ) batch_details = dataset_pb2.DataBatch() root_dir = current_app.config.get('STORAGE_ROOT') batch_folder_name = batch.event_time.strftime('%Y%m%d%H%M%S') for file_path in files: file = batch_details.files.add() file.source_path = file_path file_name = file_path.split('/')[-1] file.destination_path = f'{root_dir}/dataset/{dataset.id}' \ f'/batch/{batch_folder_name}/{file_name}' batch.set_details(batch_details) db.session.add(batch) db.session.commit() db.session.refresh(batch) scheduler.wakeup(data_batch_ids=[batch.id]) return {'data': batch.to_dict()}
def patch(self, workflow_id): parser = reqparse.RequestParser() parser.add_argument('target_state', type=str, required=True, help='target_state is empty') target_state = parser.parse_args()['target_state'] workflow = _get_workflow(workflow_id) try: workflow.update_target_state(WorkflowState[target_state]) db.session.commit() logging.info('updated workflow %d target_state to %s', workflow.id, workflow.target_state) scheduler.wakeup(workflow.id) except ValueError as e: raise InvalidArgumentException(details=str(e)) from e return {'data': workflow.to_dict()}, HTTPStatus.OK
def patch(self, workflow_id): parser = reqparse.RequestParser() parser.add_argument('target_state', type=str, required=False, default=None, help='target_state is empty') parser.add_argument('forkable', type=bool) parser.add_argument('config', type=dict, required=False, default=None, help='updated config') data = parser.parse_args() workflow = _get_workflow(workflow_id) forkable = data['forkable'] if forkable is not None: workflow.forkable = forkable db.session.commit() target_state = data['target_state'] if target_state: try: db.session.refresh(workflow) workflow.update_target_state(WorkflowState[target_state]) db.session.commit() logging.info('updated workflow %d target_state to %s', workflow.id, workflow.target_state) scheduler.wakeup(workflow.id) except ValueError as e: raise InvalidArgumentException(details=str(e)) from e config = data['config'] if config: try: db.session.refresh(workflow) if workflow.target_state != WorkflowState.INVALID or \ workflow.state not in \ [WorkflowState.READY, WorkflowState.STOPPED]: raise NoAccessException('Cannot edit running workflow') config_proto = dict_to_workflow_definition(data['config']) workflow.set_config(config_proto) db.session.commit() except ValueError as e: raise InvalidArgumentException(details=str(e)) from e return {'data': workflow.to_dict()}, HTTPStatus.OK
def post(self): parser = reqparse.RequestParser() parser.add_argument('name', required=True, help='name is empty') parser.add_argument('project_id', type=int, required=True, help='project_id is empty') # TODO: should verify if the config is compatible with # workflow template parser.add_argument('config', type=dict, required=True, help='config is empty') parser.add_argument('forkable', type=bool, required=True, help='forkable is empty') parser.add_argument('forked_from', type=int, required=False, help='forkable is empty') parser.add_argument('comment') data = parser.parse_args() name = data['name'] if Workflow.query.filter_by(name=name).first() is not None: raise ResourceConflictException( 'Workflow {} already exists.'.format(name)) # form to proto buffer template_proto = dict_to_workflow_definition(data['config']) workflow = Workflow(name=name, comment=data['comment'], project_id=data['project_id'], forkable=data['forkable'], forked_from=data['forked_from'], state=WorkflowState.NEW, target_state=WorkflowState.READY, transaction_state=TransactionState.READY) workflow.set_config(template_proto) db.session.add(workflow) db.session.commit() logging.info('Inserted a workflow to db') scheduler.wakeup(workflow.id) return {'data': workflow.to_dict()}, HTTPStatus.CREATED
def put(self, workflow_id): parser = reqparse.RequestParser() parser.add_argument('config', type=dict, required=True, help='config is empty') parser.add_argument('forkable', type=bool, required=True, help='forkable is empty') parser.add_argument('create_job_flags', type=list, required=False, location='json', help='flags in common.CreateJobFlag') parser.add_argument( 'batch_update_interval', type=int, required=False, help='interval time for cronjob of workflow in minute') parser.add_argument('comment') data = parser.parse_args() workflow = _get_workflow(workflow_id) if workflow.config: raise ResourceConflictException( 'Resetting workflow is not allowed') batch_update_interval = data['batch_update_interval'] if batch_update_interval: start_or_stop_cronjob(batch_update_interval, workflow) workflow.comment = data['comment'] workflow.forkable = data['forkable'] workflow.set_config(dict_to_workflow_definition(data['config'])) workflow.set_create_job_flags(data['create_job_flags']) workflow.update_target_state(WorkflowState.READY) db.session.commit() scheduler.wakeup(workflow_id) logging.info('update workflow %d target_state to %s', workflow.id, workflow.target_state) return {'data': workflow.to_dict()}, HTTPStatus.OK
def test_workflow_commit(self): # test the committing stage for workflow creating workflow_def = make_workflow_template() workflow = Workflow( id=20, name='job_test1', comment='这是一个测试工作流', config=workflow_def.SerializeToString(), project_id=1, forkable=True, state=WorkflowState.NEW, target_state=WorkflowState.READY, transaction_state=TransactionState.PARTICIPANT_COMMITTING) db.session.add(workflow) db.session.commit() scheduler.wakeup(20) self._wait_until( lambda: Workflow.query.get(20).state == WorkflowState.READY) workflow = Workflow.query.get(20) self.assertEqual(len(workflow.get_jobs()), 2) self.assertEqual(workflow.get_jobs()[0].state, JobState.NEW) self.assertEqual(workflow.get_jobs()[1].state, JobState.NEW) # test the committing stage for workflow running workflow.target_state = WorkflowState.RUNNING workflow.transaction_state = TransactionState.PARTICIPANT_COMMITTING db.session.commit() scheduler.wakeup(20) self._wait_until( lambda: Workflow.query.get(20).state == WorkflowState.RUNNING) workflow = Workflow.query.get(20) self._wait_until( lambda: workflow.get_jobs()[0].state == JobState.STARTED) self.assertEqual(workflow.get_jobs()[1].state, JobState.WAITING) workflow = Workflow.query.get(20) for job in workflow.owned_jobs: job.state = JobState.COMPLETED self.assertEqual(workflow.to_dict()['state'], 'COMPLETED') workflow.get_jobs()[0].state = JobState.FAILED self.assertEqual(workflow.to_dict()['state'], 'FAILED') # test the committing stage for workflow stopping workflow.target_state = WorkflowState.STOPPED workflow.transaction_state = TransactionState.PARTICIPANT_COMMITTING for job in workflow.owned_jobs: job.state = JobState.STARTED db.session.commit() scheduler.wakeup(20) self._wait_until( lambda: Workflow.query.get(20).state == WorkflowState.STOPPED) workflow = Workflow.query.get(20) self._wait_until( lambda: workflow.get_jobs()[0].state == JobState.STOPPED) self.assertEqual(workflow.get_jobs()[1].state, JobState.STOPPED)
def patch(self, workflow_id): parser = reqparse.RequestParser() parser.add_argument('target_state', type=str, required=False, default=None, help='target_state is empty') parser.add_argument('state', type=str, required=False, default=None, help='state is empty') parser.add_argument('forkable', type=bool) parser.add_argument('metric_is_public', type=bool) parser.add_argument('config', type=dict, required=False, default=None, help='updated config') data = parser.parse_args() workflow = _get_workflow(workflow_id) forkable = data['forkable'] if forkable is not None: workflow.forkable = forkable db.session.flush() metric_is_public = data['metric_is_public'] if metric_is_public is not None: workflow.metric_is_public = metric_is_public db.session.flush() target_state = data['target_state'] if target_state: try: if WorkflowState[target_state] == WorkflowState.RUNNING: for job in workflow.owned_jobs: try: generate_job_run_yaml(job) # TODO: check if peer variables is valid except RuntimeError as e: raise ValueError( f'Invalid Variable when try ' f'to format the job {job.name}:{str(e)}') workflow.update_target_state(WorkflowState[target_state]) db.session.flush() logging.info('updated workflow %d target_state to %s', workflow.id, workflow.target_state) scheduler.wakeup(workflow.id) except ValueError as e: raise InvalidArgumentException(details=str(e)) from e state = data['state'] if state: try: assert state == 'INVALID', \ 'Can only set state to INVALID for invalidation' workflow.invalidate() db.session.flush() logging.info('invalidate workflow %d', workflow.id) except ValueError as e: raise InvalidArgumentException(details=str(e)) from e config = data['config'] if config: try: if workflow.target_state != WorkflowState.INVALID or \ workflow.state not in \ [WorkflowState.READY, WorkflowState.STOPPED]: raise NoAccessException('Cannot edit running workflow') config_proto = dict_to_workflow_definition(data['config']) workflow.set_config(config_proto) db.session.flush() except ValueError as e: raise InvalidArgumentException(details=str(e)) from e db.session.commit() return {'data': workflow.to_dict()}, HTTPStatus.OK
def post(self): parser = reqparse.RequestParser() parser.add_argument('name', required=True, help='name is empty') parser.add_argument('project_id', type=int, required=True, help='project_id is empty') # TODO: should verify if the config is compatible with # workflow template parser.add_argument('config', type=dict, required=True, help='config is empty') parser.add_argument('forkable', type=bool, required=True, help='forkable is empty') parser.add_argument('forked_from', type=int, required=False, help='fork from base workflow') parser.add_argument('create_job_flags', type=list, required=False, location='json', help='flags in common.CreateJobFlag') parser.add_argument('peer_create_job_flags', type=list, required=False, location='json', help='peer flags in common.CreateJobFlag') parser.add_argument('fork_proposal_config', type=dict, required=False, help='fork and edit peer config') parser.add_argument('comment') data = parser.parse_args() name = data['name'] if Workflow.query.filter_by(name=name).first() is not None: raise ResourceConflictException( 'Workflow {} already exists.'.format(name)) # form to proto buffer template_proto = dict_to_workflow_definition(data['config']) workflow = Workflow( name=name, # 20 bytes # a DNS-1035 label must start with an # alphabetic character. substring uuid[:19] has # no collision in 10 million draws uuid=f'u{uuid4().hex[:19]}', comment=data['comment'], project_id=data['project_id'], forkable=data['forkable'], forked_from=data['forked_from'], state=WorkflowState.NEW, target_state=WorkflowState.READY, transaction_state=TransactionState.READY) workflow.set_create_job_flags(data['create_job_flags']) if workflow.forked_from is not None: fork_config = dict_to_workflow_definition( data['fork_proposal_config']) # TODO: more validations if len(fork_config.job_definitions) != \ len(template_proto.job_definitions): raise InvalidArgumentException( 'Forked workflow\'s template does not match base workflow') workflow.set_fork_proposal_config(fork_config) # TODO: check that federated jobs have # same reuse policy on both sides workflow.set_peer_create_job_flags(data['peer_create_job_flags']) workflow.set_config(template_proto) db.session.add(workflow) db.session.commit() logging.info('Inserted a workflow to db') scheduler.wakeup(workflow.id) return {'data': workflow.to_dict()}, HTTPStatus.CREATED
def patch(self, workflow_id): parser = reqparse.RequestParser() parser.add_argument('target_state', type=str, required=False, default=None, help='target_state is empty') parser.add_argument('state', type=str, required=False, help='state is empty') parser.add_argument('forkable', type=bool) parser.add_argument('metric_is_public', type=bool) parser.add_argument('config', type=dict, required=False, help='updated config') parser.add_argument('create_job_flags', type=list, required=False, location='json', help='flags in common.CreateJobFlag') parser.add_argument('batch_update_interval', type=int, required=False, help='interval for restart workflow in minute') data = parser.parse_args() workflow = _get_workflow(workflow_id) # start workflow every interval time batch_update_interval = data['batch_update_interval'] if batch_update_interval: start_or_stop_cronjob(batch_update_interval, workflow) forkable = data['forkable'] if forkable is not None: workflow.forkable = forkable db.session.flush() metric_is_public = data['metric_is_public'] if metric_is_public is not None: workflow.metric_is_public = metric_is_public db.session.flush() target_state = data['target_state'] if target_state: try: if WorkflowState[target_state] == WorkflowState.RUNNING: for job in workflow.owned_jobs: try: generate_job_run_yaml(job) # TODO: check if peer variables is valid except Exception as e: # pylint: disable=broad-except raise ValueError( f'Invalid Variable when try ' f'to format the job {job.name}:{str(e)}') workflow.update_target_state(WorkflowState[target_state]) db.session.flush() logging.info('updated workflow %d target_state to %s', workflow.id, workflow.target_state) except ValueError as e: raise InvalidArgumentException(details=str(e)) from e state = data['state'] if state: try: assert state == 'INVALID', \ 'Can only set state to INVALID for invalidation' workflow.invalidate() db.session.flush() logging.info('invalidate workflow %d', workflow.id) except ValueError as e: raise InvalidArgumentException(details=str(e)) from e config = data['config'] if config: try: if workflow.target_state != WorkflowState.INVALID or \ workflow.state not in \ [WorkflowState.READY, WorkflowState.STOPPED]: raise NoAccessException('Cannot edit running workflow') config_proto = dict_to_workflow_definition(data['config']) workflow.set_config(config_proto) db.session.flush() except ValueError as e: raise InvalidArgumentException(details=str(e)) from e create_job_flags = data['create_job_flags'] if create_job_flags: jobs = workflow.get_jobs() if len(create_job_flags) != len(jobs): raise InvalidArgumentException( details='Number of job defs does not match number ' f'of create_job_flags {len(jobs)} ' f'vs {len(create_job_flags)}') workflow.set_create_job_flags(create_job_flags) flags = workflow.get_create_job_flags() for i, job in enumerate(jobs): if job.workflow_id == workflow.id: job.is_disabled = flags[i] == \ common_pb2.CreateJobFlag.DISABLED db.session.commit() scheduler.wakeup(workflow.id) return {'data': workflow.to_dict()}, HTTPStatus.OK