Пример #1
0
    def put(self, workflow_id):
        parser = reqparse.RequestParser()
        parser.add_argument('config',
                            type=dict,
                            required=True,
                            help='config is empty')
        parser.add_argument('forkable',
                            type=bool,
                            required=True,
                            help='forkable is empty')
        parser.add_argument('comment')
        data = parser.parse_args()

        workflow = _get_workflow(workflow_id)
        if workflow.config:
            raise ResourceConflictException(
                'Resetting workflow is not allowed')

        workflow.comment = data['comment']
        workflow.forkable = data['forkable']
        workflow.set_config(dict_to_workflow_definition(data['config']))
        workflow.update_target_state(WorkflowState.READY)
        scheduler.wakeup(workflow_id)
        db.session.commit()
        logging.info('update workflow %d target_state to %s', workflow.id,
                     workflow.target_state)
        return {'data': workflow.to_dict()}, HTTPStatus.OK
Пример #2
0
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument('name', required=True, help='name is empty')
        parser.add_argument('project_id', type=int, required=True,
                            help='project_id is empty')
        # TODO: should verify if the config is compatible with
        # workflow template
        parser.add_argument('config', type=dict, required=True,
                            help='config is empty')
        parser.add_argument('forkable', type=bool, required=True,
                            help='forkable is empty')
        parser.add_argument('forked_from', type=int, required=False,
                            help='fork from base workflow')
        parser.add_argument('reuse_job_names', type=list, required=False,
                            location='json', help='fork and inherit jobs')
        parser.add_argument('peer_reuse_job_names', type=list,
                            required=False, location='json',
                            help='peer fork and inherit jobs')
        parser.add_argument('fork_proposal_config', type=dict, required=False,
                            help='fork and edit peer config')
        parser.add_argument('comment')
        data = parser.parse_args()

        name = data['name']
        if Workflow.query.filter_by(name=name).first() is not None:
            raise ResourceConflictException(
                'Workflow {} already exists.'.format(name))

        # form to proto buffer
        template_proto = dict_to_workflow_definition(data['config'])
        workflow = Workflow(name=name, comment=data['comment'],
                            project_id=data['project_id'],
                            forkable=data['forkable'],
                            forked_from=data['forked_from'],
                            state=WorkflowState.NEW,
                            target_state=WorkflowState.READY,
                            transaction_state=TransactionState.READY)

        if workflow.forked_from is not None:
            fork_config = dict_to_workflow_definition(
                data['fork_proposal_config'])
            # TODO: more validations
            if len(fork_config.job_definitions) != \
                    len(template_proto.job_definitions):
                raise InvalidArgumentException(
                    'Forked workflow\'s template does not match base workflow')
            workflow.set_fork_proposal_config(fork_config)
            workflow.set_reuse_job_names(data['reuse_job_names'])
            workflow.set_peer_reuse_job_names(data['peer_reuse_job_names'])

        workflow.set_config(template_proto)
        db.session.add(workflow)
        db.session.commit()
        logging.info('Inserted a workflow to db')
        scheduler.wakeup(workflow.id)
        return {'data': workflow.to_dict()}, HTTPStatus.CREATED
Пример #3
0
 def _check_workflow_state(self, workflow_id, state, target_state,
                           transaction_state):
     while True:
         time.sleep(1)
         scheduler.wakeup(workflow_id)
         resp = self.get_helper('/api/v2/workflows/%d' % workflow_id)
         if resp.status_code != HTTPStatus.OK:
             continue
         if resp.json['data']['state'] == state and \
                 resp.json['data']['target_state'] == target_state and \
                 resp.json['data']['transaction_state'] == transaction_state:
             return
Пример #4
0
    def post(self, dataset_id: int):
        parser = reqparse.RequestParser()
        parser.add_argument('event_time', type=int)
        parser.add_argument('files',
                            required=True,
                            type=list,
                            location='json',
                            help=_FORMAT_ERROR_MESSAGE.format('files'))
        parser.add_argument('move', type=bool)
        parser.add_argument('comment', type=str)
        body = parser.parse_args()
        event_time = body.get('event_time')
        files = body.get('files')
        move = body.get('move', False)
        comment = body.get('comment')

        dataset = Dataset.query.filter_by(id=dataset_id).first()
        if dataset is None:
            raise NotFoundException()
        if event_time is None and dataset.type == DatasetType.STREAMING:
            raise InvalidArgumentException(
                details='data_batch.event_time is empty')
        # TODO: PSI dataset should not allow multi batches

        # Create batch
        batch = DataBatch(
            dataset_id=dataset.id,
            # Use current timestamp to fill when type is PSI
            event_time=datetime.datetime.fromtimestamp(
                event_time or datetime.datetime.now().timestamp()),
            comment=comment,
            state=BatchState.NEW,
            move=move,
        )
        batch_details = dataset_pb2.DataBatch()
        root_dir = current_app.config.get('STORAGE_ROOT')
        batch_folder_name = batch.event_time.strftime('%Y%m%d%H%M%S')
        for file_path in files:
            file = batch_details.files.add()
            file.source_path = file_path
            file_name = file_path.split('/')[-1]
            file.destination_path = f'{root_dir}/dataset/{dataset.id}' \
                                    f'/batch/{batch_folder_name}/{file_name}'
        batch.set_details(batch_details)
        db.session.add(batch)
        db.session.commit()
        db.session.refresh(batch)
        scheduler.wakeup(data_batch_ids=[batch.id])
        return {'data': batch.to_dict()}
Пример #5
0
    def patch(self, workflow_id):
        parser = reqparse.RequestParser()
        parser.add_argument('target_state',
                            type=str,
                            required=True,
                            help='target_state is empty')
        target_state = parser.parse_args()['target_state']

        workflow = _get_workflow(workflow_id)
        try:
            workflow.update_target_state(WorkflowState[target_state])
            db.session.commit()
            logging.info('updated workflow %d target_state to %s', workflow.id,
                         workflow.target_state)
            scheduler.wakeup(workflow.id)
        except ValueError as e:
            raise InvalidArgumentException(details=str(e)) from e
        return {'data': workflow.to_dict()}, HTTPStatus.OK
Пример #6
0
    def patch(self, workflow_id):
        parser = reqparse.RequestParser()
        parser.add_argument('target_state', type=str, required=False,
                            default=None, help='target_state is empty')
        parser.add_argument('forkable', type=bool)
        parser.add_argument('config', type=dict, required=False,
                            default=None, help='updated config')
        data = parser.parse_args()

        workflow = _get_workflow(workflow_id)

        forkable = data['forkable']
        if forkable is not None:
            workflow.forkable = forkable
            db.session.commit()

        target_state = data['target_state']
        if target_state:
            try:
                db.session.refresh(workflow)
                workflow.update_target_state(WorkflowState[target_state])
                db.session.commit()
                logging.info('updated workflow %d target_state to %s',
                            workflow.id, workflow.target_state)
                scheduler.wakeup(workflow.id)
            except ValueError as e:
                raise InvalidArgumentException(details=str(e)) from e

        config = data['config']
        if config:
            try:
                db.session.refresh(workflow)
                if workflow.target_state != WorkflowState.INVALID or \
                        workflow.state not in \
                        [WorkflowState.READY, WorkflowState.STOPPED]:
                    raise NoAccessException('Cannot edit running workflow')
                config_proto = dict_to_workflow_definition(data['config'])
                workflow.set_config(config_proto)
                db.session.commit()
            except ValueError as e:
                raise InvalidArgumentException(details=str(e)) from e

        return {'data': workflow.to_dict()}, HTTPStatus.OK
Пример #7
0
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument('name', required=True, help='name is empty')
        parser.add_argument('project_id', type=int, required=True,
                            help='project_id is empty')
        # TODO: should verify if the config is compatible with
        # workflow template
        parser.add_argument('config', type=dict, required=True,
                            help='config is empty')
        parser.add_argument('forkable', type=bool, required=True,
                            help='forkable is empty')
        parser.add_argument('forked_from', type=int, required=False,
                            help='forkable is empty')
        parser.add_argument('comment')
        data = parser.parse_args()

        name = data['name']
        if Workflow.query.filter_by(name=name).first() is not None:
            raise ResourceConflictException(
                'Workflow {} already exists.'.format(name))

        # form to proto buffer
        template_proto = dict_to_workflow_definition(data['config'])
        workflow = Workflow(name=name, comment=data['comment'],
                            project_id=data['project_id'],
                            forkable=data['forkable'],
                            forked_from=data['forked_from'],
                            state=WorkflowState.NEW,
                            target_state=WorkflowState.READY,
                            transaction_state=TransactionState.READY)
        workflow.set_config(template_proto)
        db.session.add(workflow)
        db.session.commit()
        logging.info('Inserted a workflow to db')
        scheduler.wakeup(workflow.id)
        return {'data': workflow.to_dict()}, HTTPStatus.CREATED
Пример #8
0
    def put(self, workflow_id):
        parser = reqparse.RequestParser()
        parser.add_argument('config', type=dict, required=True,
                            help='config is empty')
        parser.add_argument('forkable', type=bool, required=True,
                            help='forkable is empty')
        parser.add_argument('create_job_flags', type=list, required=False,
                            location='json',
                            help='flags in common.CreateJobFlag')
        parser.add_argument(
            'batch_update_interval',
            type=int,
            required=False,
            help='interval time for cronjob of workflow in minute')
        parser.add_argument('comment')
        data = parser.parse_args()

        workflow = _get_workflow(workflow_id)
        if workflow.config:
            raise ResourceConflictException(
                'Resetting workflow is not allowed')

        batch_update_interval = data['batch_update_interval']
        if batch_update_interval:
            start_or_stop_cronjob(batch_update_interval, workflow)

        workflow.comment = data['comment']
        workflow.forkable = data['forkable']
        workflow.set_config(dict_to_workflow_definition(data['config']))
        workflow.set_create_job_flags(data['create_job_flags'])
        workflow.update_target_state(WorkflowState.READY)
        db.session.commit()
        scheduler.wakeup(workflow_id)
        logging.info('update workflow %d target_state to %s',
                     workflow.id, workflow.target_state)
        return {'data': workflow.to_dict()}, HTTPStatus.OK
Пример #9
0
    def test_workflow_commit(self):
        # test the committing stage for workflow creating
        workflow_def = make_workflow_template()
        workflow = Workflow(
            id=20,
            name='job_test1',
            comment='这是一个测试工作流',
            config=workflow_def.SerializeToString(),
            project_id=1,
            forkable=True,
            state=WorkflowState.NEW,
            target_state=WorkflowState.READY,
            transaction_state=TransactionState.PARTICIPANT_COMMITTING)
        db.session.add(workflow)
        db.session.commit()
        scheduler.wakeup(20)
        self._wait_until(
            lambda: Workflow.query.get(20).state == WorkflowState.READY)
        workflow = Workflow.query.get(20)
        self.assertEqual(len(workflow.get_jobs()), 2)
        self.assertEqual(workflow.get_jobs()[0].state, JobState.NEW)
        self.assertEqual(workflow.get_jobs()[1].state, JobState.NEW)

        # test the committing stage for workflow running
        workflow.target_state = WorkflowState.RUNNING
        workflow.transaction_state = TransactionState.PARTICIPANT_COMMITTING
        db.session.commit()
        scheduler.wakeup(20)
        self._wait_until(
            lambda: Workflow.query.get(20).state == WorkflowState.RUNNING)
        workflow = Workflow.query.get(20)
        self._wait_until(
            lambda: workflow.get_jobs()[0].state == JobState.STARTED)
        self.assertEqual(workflow.get_jobs()[1].state, JobState.WAITING)
        workflow = Workflow.query.get(20)
        for job in workflow.owned_jobs:
            job.state = JobState.COMPLETED
        self.assertEqual(workflow.to_dict()['state'], 'COMPLETED')
        workflow.get_jobs()[0].state = JobState.FAILED
        self.assertEqual(workflow.to_dict()['state'], 'FAILED')
        # test the committing stage for workflow stopping
        workflow.target_state = WorkflowState.STOPPED
        workflow.transaction_state = TransactionState.PARTICIPANT_COMMITTING
        for job in workflow.owned_jobs:
            job.state = JobState.STARTED
        db.session.commit()
        scheduler.wakeup(20)
        self._wait_until(
            lambda: Workflow.query.get(20).state == WorkflowState.STOPPED)
        workflow = Workflow.query.get(20)
        self._wait_until(
            lambda: workflow.get_jobs()[0].state == JobState.STOPPED)
        self.assertEqual(workflow.get_jobs()[1].state, JobState.STOPPED)
Пример #10
0
    def patch(self, workflow_id):
        parser = reqparse.RequestParser()
        parser.add_argument('target_state', type=str, required=False,
                            default=None, help='target_state is empty')
        parser.add_argument('state', type=str, required=False,
                            default=None, help='state is empty')
        parser.add_argument('forkable', type=bool)
        parser.add_argument('metric_is_public', type=bool)
        parser.add_argument('config', type=dict, required=False,
                            default=None, help='updated config')
        data = parser.parse_args()

        workflow = _get_workflow(workflow_id)

        forkable = data['forkable']
        if forkable is not None:
            workflow.forkable = forkable
            db.session.flush()

        metric_is_public = data['metric_is_public']
        if metric_is_public is not None:
            workflow.metric_is_public = metric_is_public
            db.session.flush()

        target_state = data['target_state']
        if target_state:
            try:
                if WorkflowState[target_state] == WorkflowState.RUNNING:
                    for job in workflow.owned_jobs:
                        try:
                            generate_job_run_yaml(job)
                        # TODO: check if peer variables is valid
                        except RuntimeError as e:
                            raise ValueError(
                                f'Invalid Variable when try '
                                f'to format the job {job.name}:{str(e)}')
                workflow.update_target_state(WorkflowState[target_state])
                db.session.flush()
                logging.info('updated workflow %d target_state to %s',
                            workflow.id, workflow.target_state)
                scheduler.wakeup(workflow.id)
            except ValueError as e:
                raise InvalidArgumentException(details=str(e)) from e

        state = data['state']
        if state:
            try:
                assert state == 'INVALID', \
                    'Can only set state to INVALID for invalidation'
                workflow.invalidate()
                db.session.flush()
                logging.info('invalidate workflow %d', workflow.id)
            except ValueError as e:
                raise InvalidArgumentException(details=str(e)) from e

        config = data['config']
        if config:
            try:
                if workflow.target_state != WorkflowState.INVALID or \
                        workflow.state not in \
                        [WorkflowState.READY, WorkflowState.STOPPED]:
                    raise NoAccessException('Cannot edit running workflow')
                config_proto = dict_to_workflow_definition(data['config'])
                workflow.set_config(config_proto)
                db.session.flush()
            except ValueError as e:
                raise InvalidArgumentException(details=str(e)) from e

        db.session.commit()
        return {'data': workflow.to_dict()}, HTTPStatus.OK
Пример #11
0
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument('name', required=True, help='name is empty')
        parser.add_argument('project_id',
                            type=int,
                            required=True,
                            help='project_id is empty')
        # TODO: should verify if the config is compatible with
        # workflow template
        parser.add_argument('config',
                            type=dict,
                            required=True,
                            help='config is empty')
        parser.add_argument('forkable',
                            type=bool,
                            required=True,
                            help='forkable is empty')
        parser.add_argument('forked_from',
                            type=int,
                            required=False,
                            help='fork from base workflow')
        parser.add_argument('create_job_flags',
                            type=list,
                            required=False,
                            location='json',
                            help='flags in common.CreateJobFlag')
        parser.add_argument('peer_create_job_flags',
                            type=list,
                            required=False,
                            location='json',
                            help='peer flags in common.CreateJobFlag')
        parser.add_argument('fork_proposal_config',
                            type=dict,
                            required=False,
                            help='fork and edit peer config')
        parser.add_argument('comment')
        data = parser.parse_args()
        name = data['name']
        if Workflow.query.filter_by(name=name).first() is not None:
            raise ResourceConflictException(
                'Workflow {} already exists.'.format(name))

        # form to proto buffer
        template_proto = dict_to_workflow_definition(data['config'])
        workflow = Workflow(
            name=name,
            # 20 bytes
            # a DNS-1035 label must start with an
            # alphabetic character. substring uuid[:19] has
            # no collision in 10 million draws
            uuid=f'u{uuid4().hex[:19]}',
            comment=data['comment'],
            project_id=data['project_id'],
            forkable=data['forkable'],
            forked_from=data['forked_from'],
            state=WorkflowState.NEW,
            target_state=WorkflowState.READY,
            transaction_state=TransactionState.READY)
        workflow.set_create_job_flags(data['create_job_flags'])

        if workflow.forked_from is not None:
            fork_config = dict_to_workflow_definition(
                data['fork_proposal_config'])
            # TODO: more validations
            if len(fork_config.job_definitions) != \
                    len(template_proto.job_definitions):
                raise InvalidArgumentException(
                    'Forked workflow\'s template does not match base workflow')
            workflow.set_fork_proposal_config(fork_config)
            # TODO: check that federated jobs have
            #       same reuse policy on both sides
            workflow.set_peer_create_job_flags(data['peer_create_job_flags'])

        workflow.set_config(template_proto)
        db.session.add(workflow)
        db.session.commit()
        logging.info('Inserted a workflow to db')
        scheduler.wakeup(workflow.id)
        return {'data': workflow.to_dict()}, HTTPStatus.CREATED
Пример #12
0
    def patch(self, workflow_id):
        parser = reqparse.RequestParser()
        parser.add_argument('target_state', type=str, required=False,
                            default=None, help='target_state is empty')
        parser.add_argument('state',
                            type=str,
                            required=False,
                            help='state is empty')
        parser.add_argument('forkable', type=bool)
        parser.add_argument('metric_is_public', type=bool)
        parser.add_argument('config',
                            type=dict,
                            required=False,
                            help='updated config')
        parser.add_argument('create_job_flags', type=list, required=False,
                            location='json',
                            help='flags in common.CreateJobFlag')
        parser.add_argument('batch_update_interval',
                            type=int,
                            required=False,
                            help='interval for restart workflow in minute')
        data = parser.parse_args()

        workflow = _get_workflow(workflow_id)

        # start workflow every interval time
        batch_update_interval = data['batch_update_interval']
        if batch_update_interval:
            start_or_stop_cronjob(batch_update_interval, workflow)

        forkable = data['forkable']
        if forkable is not None:
            workflow.forkable = forkable
            db.session.flush()

        metric_is_public = data['metric_is_public']
        if metric_is_public is not None:
            workflow.metric_is_public = metric_is_public
            db.session.flush()

        target_state = data['target_state']
        if target_state:
            try:
                if WorkflowState[target_state] == WorkflowState.RUNNING:
                    for job in workflow.owned_jobs:
                        try:
                            generate_job_run_yaml(job)
                        # TODO: check if peer variables is valid
                        except Exception as e:  # pylint: disable=broad-except
                            raise ValueError(
                                f'Invalid Variable when try '
                                f'to format the job {job.name}:{str(e)}')
                workflow.update_target_state(WorkflowState[target_state])
                db.session.flush()
                logging.info('updated workflow %d target_state to %s',
                             workflow.id, workflow.target_state)
            except ValueError as e:
                raise InvalidArgumentException(details=str(e)) from e

        state = data['state']
        if state:
            try:
                assert state == 'INVALID', \
                    'Can only set state to INVALID for invalidation'
                workflow.invalidate()
                db.session.flush()
                logging.info('invalidate workflow %d', workflow.id)
            except ValueError as e:
                raise InvalidArgumentException(details=str(e)) from e

        config = data['config']
        if config:
            try:
                if workflow.target_state != WorkflowState.INVALID or \
                        workflow.state not in \
                        [WorkflowState.READY, WorkflowState.STOPPED]:
                    raise NoAccessException('Cannot edit running workflow')
                config_proto = dict_to_workflow_definition(data['config'])
                workflow.set_config(config_proto)
                db.session.flush()
            except ValueError as e:
                raise InvalidArgumentException(details=str(e)) from e

        create_job_flags = data['create_job_flags']
        if create_job_flags:
            jobs = workflow.get_jobs()
            if len(create_job_flags) != len(jobs):
                raise InvalidArgumentException(
                    details='Number of job defs does not match number '
                            f'of create_job_flags {len(jobs)} '
                            f'vs {len(create_job_flags)}')
            workflow.set_create_job_flags(create_job_flags)
            flags = workflow.get_create_job_flags()
            for i, job in enumerate(jobs):
                if job.workflow_id == workflow.id:
                    job.is_disabled = flags[i] == \
                                      common_pb2.CreateJobFlag.DISABLED

        db.session.commit()
        scheduler.wakeup(workflow.id)
        return {'data': workflow.to_dict()}, HTTPStatus.OK