Exemplo n.º 1
0
def _onUpload(event):
    """
    Look at uploads containing references related to this plugin. If found,
    they are used to link item task outputs back to a job document.
    """
    try:
        ref = json.loads(event.info.get('reference', ''))
    except ValueError:
        return

    if isinstance(ref, dict) and ref.get('type') == 'item_tasks.output':
        jobModel = Job()
        tokenModel = Token()
        token = event.info['currentToken']

        if tokenModel.hasScope(token, 'item_tasks.job_write:%s' % ref['jobId']):
            job = jobModel.load(ref['jobId'], force=True, exc=True)
        else:
            job = jobModel.load(
                ref['jobId'], level=AccessType.WRITE, user=event.info['currentUser'], exc=True)

        file = event.info['file']
        item = Item().load(file['itemId'], force=True)

        # Add link to job model to the output item
        jobModel.updateJob(job, otherFields={
            'itemTaskBindings.outputs.%s.itemId' % ref['id']: item['_id']
        })

        # Also a link in the item to the job that created it
        item['createdByJob'] = job['_id']
        Item().save(item)
Exemplo n.º 2
0
def _onUpload(event):
    """
    Look at uploads containing references related to this plugin. If found,
    they are used to link item task outputs back to a job document.
    """
    try:
        ref = json.loads(event.info.get('reference', ''))
    except ValueError:
        return

    if isinstance(ref, dict) and ref.get('type') == 'item_tasks.output':
        jobModel = Job()
        tokenModel = Token()
        token = event.info['currentToken']

        if tokenModel.hasScope(token, 'item_tasks.job_write:%s' % ref['jobId']):
            job = jobModel.load(ref['jobId'], force=True, exc=True)
        else:
            job = jobModel.load(
                ref['jobId'], level=AccessType.WRITE, user=event.info['currentUser'], exc=True)

        file = event.info['file']
        item = Item().load(file['itemId'], force=True)

        # Add link to job model to the output item
        jobModel.updateJob(job, otherFields={
            'itemTaskBindings.outputs.%s.itemId' % ref['id']: item['_id']
        })

        # Also a link in the item to the job that created it
        item['createdByJob'] = job['_id']
        Item().save(item)
def run(job):
    jobModel = Job()
    jobModel.updateJob(job, status=JobStatus.RUNNING)

    src_workspace_id, dest_workspace_id = job["args"]
    user = job["kwargs"]["user"]
    tale = job["kwargs"]["tale"]

    try:
        parent = Folder().load(src_workspace_id,
                               user=user,
                               exc=True,
                               level=AccessType.READ)
        workspace = Folder().load(dest_workspace_id, user=user, exc=True)
        Folder().copyFolderComponents(parent, workspace, user, None)
        tale["status"] = TaleStatus.READY
        Tale().updateTale(tale)
        jobModel.updateJob(job,
                           status=JobStatus.SUCCESS,
                           log="Copying finished")
    except Exception:
        tale["status"] = TaleStatus.ERROR
        Tale().updateTale(tale)
        t, val, tb = sys.exc_info()
        log = "%s: %s\n%s" % (t.__name__, repr(val), traceback.extract_tb(tb))
        jobModel.updateJob(job, status=JobStatus.ERROR, log=log)
        raise
Exemplo n.º 4
0
def run(job):
    jobModel = Job()
    jobModel.updateJob(job, status=JobStatus.RUNNING)

    try:
        newFile = createThumbnail(**job['kwargs'])
        log = 'Created thumbnail file %s.' % newFile['_id']
        jobModel.updateJob(job, status=JobStatus.SUCCESS, log=log)
    except Exception:
        t, val, tb = sys.exc_info()
        log = '%s: %s\n%s' % (t.__name__, repr(val), traceback.extract_tb(tb))
        jobModel.updateJob(job, status=JobStatus.ERROR, log=log)
        raise
Exemplo n.º 5
0
def run(job):
    jobModel = Job()
    jobModel.updateJob(job, status=JobStatus.RUNNING)

    try:
        newFile = createThumbnail(**job['kwargs'])
        log = 'Created thumbnail file %s.' % newFile['_id']
        jobModel.updateJob(job, status=JobStatus.SUCCESS, log=log)
    except Exception:
        t, val, tb = sys.exc_info()
        log = '%s: %s\n%s' % (t.__name__, repr(val), traceback.extract_tb(tb))
        jobModel.updateJob(job, status=JobStatus.ERROR, log=log)
        raise
Exemplo n.º 6
0
    def testConfigureItemTaskFromSlicerCli(self):
        # Create a new item that will become a task
        item = Item().createItem(name='placeholder',
                                 creator=self.admin,
                                 folder=self.privateFolder)

        # Create task to introspect container
        with mock.patch('girder.plugins.jobs.models.job.Job.scheduleJob'
                        ) as scheduleMock:
            resp = self.request('/item/%s/item_task_slicer_cli_description' %
                                item['_id'],
                                method='POST',
                                params={
                                    'image': 'johndoe/foo:v5',
                                    'args': json.dumps(['--foo', 'bar'])
                                },
                                user=self.admin)
            self.assertStatusOk(resp)
            self.assertEqual(resp.json['_modelType'], 'job')
            self.assertEqual(len(scheduleMock.mock_calls), 1)
            job = scheduleMock.mock_calls[0][1][0]
            self.assertEqual(job['handler'], 'worker_handler')
            self.assertEqual(job['itemTaskId'], item['_id'])
            self.assertEqual(job['kwargs']['outputs']['_stdout']['method'],
                             'PUT')
            self.assertTrue(
                job['kwargs']['outputs']['_stdout']['url'].endswith(
                    'item/%s/item_task_slicer_cli_xml' % item['_id']))
            token = job['kwargs']['outputs']['_stdout']['headers'][
                'Girder-Token']

        # Task should not be registered until we get the callback
        resp = self.request('/item_task', user=self.admin)
        self.assertStatusOk(resp)
        self.assertEqual(resp.json, [])

        # Image and args should be stored in the item metadata
        item = Item().load(item['_id'], force=True)
        self.assertEqual(item['meta']['itemTaskSpec']['docker_image'],
                         'johndoe/foo:v5')
        self.assertEqual(item['meta']['itemTaskSlicerCliArgs'],
                         ['--foo', 'bar'])

        # Simulate callback from introspection job
        with open(os.path.join(os.path.dirname(__file__),
                               'slicer_cli.xml')) as f:
            xml = f.read()

        resp = self.request('/item/%s/item_task_slicer_cli_xml' % item['_id'],
                            method='PUT',
                            params={
                                'setName': True,
                                'setDescription': True
                            },
                            token=token,
                            body=xml,
                            type='application/xml')
        self.assertStatusOk(resp)

        # We should only be able to see tasks we have read access on
        resp = self.request('/item_task')
        self.assertStatusOk(resp)
        self.assertEqual(resp.json, [])

        resp = self.request('/item_task', user=self.admin)
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 1)
        self.assertEqual(resp.json[0]['_id'], str(item['_id']))

        item = Item().load(item['_id'], force=True)
        self.assertEqual(item['name'], 'PET phantom detector CLI')
        self.assertEqual(
            item['description'],
            u'**Description**: Detects positions of PET/CT pocket phantoms in PET image.\n\n'
            u'**Author(s)**: Girder Developers\n\n**Version**: 1.0\n\n'
            u'**License**: Apache 2.0\n\n**Acknowledgements**: *none*\n\n'
            u'*This description was auto-generated from the Slicer CLI XML specification.*'
        )
        self.assertTrue(item['meta']['isItemTask'])
        self.assertEqual(
            item['meta']['itemTaskSpec'], {
                'mode':
                'docker',
                'docker_image':
                'johndoe/foo:v5',
                'container_args': [
                    '--foo', 'bar', '--InputImage=$input{--InputImage}',
                    '--MaximumLineStraightnessDeviation=$input{--MaximumLineStraightnessDeviation}',
                    '--MaximumRadius=$input{--MaximumRadius}',
                    '--MaximumSphereDistance=$input{--MaximumSphereDistance}',
                    '--MinimumRadius=$input{--MinimumRadius}',
                    '--MinimumSphereActivity=$input{--MinimumSphereActivity}',
                    '--MinimumSphereDistance=$input{--MinimumSphereDistance}',
                    '--SpheresPerPhantom=$input{--SpheresPerPhantom}',
                    '$flag{--StrictSorting}',
                    '--DetectedPoints=$output{--DetectedPoints}'
                ],
                'inputs': [{
                    'description': 'Input image to be analysed.',
                    'format': 'image',
                    'name': 'InputImage',
                    'type': 'image',
                    'id': '--InputImage',
                    'target': 'filepath'
                }, {
                    'description':
                    'Used for eliminating detections which are not in a straight line. '
                    'Unit: multiples of geometric average of voxel spacing',
                    'format':
                    'number',
                    'default': {
                        'data': 1.0
                    },
                    'type':
                    'number',
                    'id':
                    '--MaximumLineStraightnessDeviation',
                    'name':
                    'MaximumLineStraightnessDeviation'
                }, {
                    'description':
                    'Used for eliminating too big blobs. Unit: millimeter [mm]',
                    'format': 'number',
                    'default': {
                        'data': 20.0
                    },
                    'type': 'number',
                    'id': '--MaximumRadius',
                    'name': 'MaximumRadius'
                }, {
                    'description':
                    'Signifies maximum distance between adjacent sphere centers [mm]. '
                    'Used to separate phantoms from tumors.',
                    'format':
                    'number',
                    'default': {
                        'data': 40.0
                    },
                    'type':
                    'number',
                    'id':
                    '--MaximumSphereDistance',
                    'name':
                    'MaximumSphereDistance'
                }, {
                    'description':
                    'Used for eliminating too small blobs. Unit: millimeter [mm]',
                    'format': 'number',
                    'default': {
                        'data': 3.0
                    },
                    'type': 'number',
                    'id': '--MinimumRadius',
                    'name': 'MinimumRadius'
                }, {
                    'description':
                    'Used for thresholding in blob detection. '
                    'Unit: becquerels per milliliter [Bq/ml]',
                    'format':
                    'number',
                    'default': {
                        'data': 5000.0
                    },
                    'type':
                    'number',
                    'id':
                    '--MinimumSphereActivity',
                    'name':
                    'MinimumSphereActivity'
                }, {
                    'description':
                    'Signifies minimum distance between adjacent sphere centers [mm]. '
                    'Used to separate phantoms from tumors.',
                    'format':
                    'number',
                    'default': {
                        'data': 30.0
                    },
                    'type':
                    'number',
                    'id':
                    '--MinimumSphereDistance',
                    'name':
                    'MinimumSphereDistance'
                }, {
                    'description':
                    'What kind of phantom are we working with here?',
                    'format': 'number-enumeration',
                    'default': {
                        'data': 3
                    },
                    'type': 'number-enumeration',
                    'id': '--SpheresPerPhantom',
                    'name': 'SpheresPerPhantom',
                    'values': [2, 3]
                }, {
                    'description':
                    'Controls whether spheres within a phantom must have descending '
                    'activities. If OFF, they can have approximately same activities '
                    '(within 15%).',
                    'format':
                    'boolean',
                    'default': {
                        'data': False
                    },
                    'type':
                    'boolean',
                    'id':
                    '--StrictSorting',
                    'name':
                    'StrictSorting'
                }],
                'outputs': [{
                    'description':
                    'Fiducial points, one for each detected sphere. '
                    'Will be multiple of 3.',
                    'format':
                    'new-file',
                    'name':
                    'DetectedPoints',
                    'type':
                    'new-file',
                    'id':
                    '--DetectedPoints',
                    'target':
                    'filepath'
                }]
            })

        # Shouldn't be able to run the task if we don't have execute permission flag
        Folder().setUserAccess(self.privateFolder,
                               user=self.user,
                               level=AccessType.READ,
                               save=True)
        resp = self.request('/item_task/%s/execution' % item['_id'],
                            method='POST',
                            user=self.user)
        self.assertStatus(resp, 403)

        # Grant the user permission, and run the task
        from girder.plugins.item_tasks.constants import ACCESS_FLAG_EXECUTE_TASK
        Folder().setUserAccess(self.privateFolder,
                               user=self.user,
                               level=AccessType.WRITE,
                               flags=ACCESS_FLAG_EXECUTE_TASK,
                               currentUser=self.admin,
                               save=True)

        inputs = {
            '--InputImage': {
                'mode': 'girder',
                'resource_type': 'item',
                'id': str(item['_id'])
            },
            '--MaximumLineStraightnessDeviation': {
                'mode': 'inline',
                'data': 1
            },
            '--MaximumRadius': {
                'mode': 'inline',
                'data': 20
            },
            '--MaximumSphereDistance': {
                'mode': 'inline',
                'data': 40
            },
            '--MinimumRadius': {
                'mode': 'inline',
                'data': 3
            },
            '--MinimumSphereActivity': {
                'mode': 'inline',
                'data': 5000
            },
            '--MinimumSphereDistance': {
                'mode': 'inline',
                'data': 30
            },
            '--SpheresPerPhantom': {
                'mode': 'inline',
                'data': 3
            },
            '--StrictSorting': {
                'mode': 'inline',
                'data': False
            }
        }

        outputs = {
            '--DetectedPoints': {
                'mode': 'girder',
                'parent_id': str(self.privateFolder['_id']),
                'parent_type': 'folder',
                'name': 'test.txt'
            }
        }

        # Ensure task was scheduled
        with mock.patch('girder.plugins.jobs.models.job.Job.scheduleJob'
                        ) as scheduleMock:
            resp = self.request('/item_task/%s/execution' % item['_id'],
                                method='POST',
                                user=self.user,
                                params={
                                    'inputs': json.dumps(inputs),
                                    'outputs': json.dumps(outputs)
                                })
            self.assertEqual(len(scheduleMock.mock_calls), 1)
        self.assertStatusOk(resp)
        job = resp.json
        self.assertEqual(job['_modelType'], 'job')
        self.assertNotIn('kwargs', job)  # ordinary user can't see kwargs

        from girder.plugins.jobs.models.job import Job
        jobModel = Job()
        job = jobModel.load(job['_id'], force=True)
        output = job['kwargs']['outputs']['--DetectedPoints']

        # Simulate output from the worker
        contents = b'Hello world'
        resp = self.request(path='/file',
                            method='POST',
                            token=output['token'],
                            params={
                                'parentType': output['parent_type'],
                                'parentId': output['parent_id'],
                                'name': output['name'],
                                'size': len(contents),
                                'mimeType': 'text/plain',
                                'reference': output['reference']
                            })
        self.assertStatusOk(resp)

        uploadId = resp.json['_id']
        fields = [('offset', 0), ('uploadId', uploadId)]
        files = [('chunk', output['name'], contents)]
        resp = self.multipartRequest(path='/file/chunk',
                                     fields=fields,
                                     files=files,
                                     token=output['token'])
        self.assertStatusOk(resp)
        file = resp.json
        self.assertEqual(file['_modelType'], 'file')
        self.assertEqual(file['size'], 11)
        self.assertEqual(file['mimeType'], 'text/plain')
        file = File().load(file['_id'], force=True)

        # Make sure temp token is removed once we change job status to final state
        job = jobModel.load(job['_id'], force=True)
        self.assertIn('itemTaskTempToken', job)

        from girder.plugins.jobs.constants import JobStatus
        # Transition through states to SUCCESS
        job = jobModel.updateJob(job, status=JobStatus.QUEUED)
        job = jobModel.updateJob(job, status=JobStatus.RUNNING)
        job = jobModel.updateJob(job, status=JobStatus.SUCCESS)

        self.assertNotIn('itemTaskTempToken', job)
        self.assertIn('itemTaskBindings', job)

        # Wait for async data.process event to bind output provenance
        start = time.time()
        while time.time() - start < 15:
            job = jobModel.load(job['_id'], force=True)

            if 'itemId' in job['itemTaskBindings']['outputs'][
                    '--DetectedPoints']:
                break
            else:
                time.sleep(0.2)
        else:
            raise Exception('Output binding did not occur in time')

        self.assertEqual(
            job['itemTaskBindings']['outputs']['--DetectedPoints']['itemId'],
            file['itemId'])
Exemplo n.º 7
0
class JobsTestCase(base.TestCase):
    def setUp(self):
        base.TestCase.setUp(self)

        self.users = [User().createUser(
            'usr' + str(n), 'passwd', 'tst', 'usr', '*****@*****.**' % n)
            for n in range(3)]

        from girder.plugins.jobs.models.job import Job
        self.jobModel = Job()

    def testJobs(self):
        self.job = None

        def schedule(event):
            self.job = event.info
            if self.job['handler'] == 'my_handler':
                self.job['status'] = JobStatus.RUNNING
                self.job = self.jobModel.save(self.job)
                self.assertEqual(self.job['args'], ('hello', 'world'))
                self.assertEqual(self.job['kwargs'], {'a': 'b'})

        events.bind('jobs.schedule', 'test', schedule)

        # Create a job
        job = self.jobModel.createJob(
            title='Job Title', type='my_type', args=('hello', 'world'),
            kwargs={'a': 'b'}, user=self.users[1], handler='my_handler',
            public=False)
        self.assertEqual(self.job, None)
        self.assertEqual(job['status'], JobStatus.INACTIVE)

        # Schedule the job, make sure our handler was invoked
        self.jobModel.scheduleJob(job)
        self.assertEqual(self.job['_id'], job['_id'])
        self.assertEqual(self.job['status'], JobStatus.RUNNING)

        # Since the job is not public, user 2 should not have access
        path = '/job/%s' % job['_id']
        resp = self.request(path, user=self.users[2])
        self.assertStatus(resp, 403)
        resp = self.request(path, user=self.users[2], method='PUT')
        self.assertStatus(resp, 403)
        resp = self.request(path, user=self.users[2], method='DELETE')
        self.assertStatus(resp, 403)

        # If no user is specified, we should get a 401 error
        resp = self.request(path, user=None)
        self.assertStatus(resp, 401)

        # Make sure user who created the job can see it
        resp = self.request(path, user=self.users[1])
        self.assertStatusOk(resp)

        # We should be able to update the job as the user who created it
        resp = self.request(path, method='PUT', user=self.users[1], params={
            'log': 'My log message\n'
        })
        self.assertStatusOk(resp)

        # We should be able to create a job token and use that to update it too
        token = self.jobModel.createJobToken(job)
        resp = self.request(path, method='PUT', params={
            'log': 'append message',
            'token': token['_id']
        })
        self.assertStatusOk(resp)
        # We shouldn't get the log back in this case
        self.assertNotIn('log', resp.json)

        # Do a fetch on the job itself to get the log
        resp = self.request(path, user=self.users[1])
        self.assertStatusOk(resp)
        self.assertEqual(
            resp.json['log'], ['My log message\n', 'append message'])

        # Test overwriting the log and updating status
        resp = self.request(path, method='PUT', params={
            'log': 'overwritten log',
            'overwrite': 'true',
            'status': JobStatus.SUCCESS,
            'token': token['_id']
        })
        self.assertStatusOk(resp)
        self.assertNotIn('log', resp.json)
        self.assertEqual(resp.json['status'], JobStatus.SUCCESS)

        job = self.jobModel.load(job['_id'], force=True, includeLog=True)
        self.assertEqual(job['log'], ['overwritten log'])

        # We should be able to delete the job as the user who created it
        resp = self.request(path, user=self.users[1], method='DELETE')
        self.assertStatusOk(resp)
        job = self.jobModel.load(job['_id'], force=True)
        self.assertIsNone(job)

    def testLegacyLogBehavior(self):
        # Force save a job with a string log to simulate a legacy job record
        job = self.jobModel.createJob(
            title='legacy', type='legacy', user=self.users[1], save=False)
        job['log'] = 'legacy log'
        job = self.jobModel.save(job, validate=False)

        self.assertEqual(job['log'], 'legacy log')

        # Load the record, we should now get the log as a list
        job = self.jobModel.load(job['_id'], force=True, includeLog=True)
        self.assertEqual(job['log'], ['legacy log'])

    def testListJobs(self):
        job = self.jobModel.createJob(title='A job', type='t', user=self.users[1], public=False)
        anonJob = self.jobModel.createJob(title='Anon job', type='t')
        # Ensure timestamp for public job is strictly higher (ms resolution)
        time.sleep(0.1)
        publicJob = self.jobModel.createJob(
            title='Anon job', type='t', public=True)

        # User 1 should be able to see their own jobs
        resp = self.request('/job', user=self.users[1], params={
            'userId': self.users[1]['_id']
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 1)
        self.assertEqual(resp.json[0]['_id'], str(job['_id']))

        # User 2 should not see user 1's jobs in the list
        resp = self.request('/job', user=self.users[2], params={
            'userId': self.users[1]['_id']
        })
        self.assertEqual(resp.json, [])

        # Omitting a userId should assume current user
        resp = self.request('/job', user=self.users[1])
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 1)
        self.assertEqual(resp.json[0]['_id'], str(job['_id']))

        # Explicitly passing "None" should show anonymous jobs
        resp = self.request('/job', user=self.users[0], params={
            'userId': 'none'
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 2)
        self.assertEqual(resp.json[0]['_id'], str(publicJob['_id']))
        self.assertEqual(resp.json[1]['_id'], str(anonJob['_id']))

        # Non-admins should only see public anon jobs
        resp = self.request('/job', params={'userId': 'none'})
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 1)
        self.assertEqual(resp.json[0]['_id'], str(publicJob['_id']))

    def testListAllJobs(self):
        self.jobModel.createJob(title='user 0 job', type='t', user=self.users[0], public=False)
        self.jobModel.createJob(title='user 1 job', type='t', user=self.users[1], public=False)
        self.jobModel.createJob(title='user 1 job', type='t', user=self.users[1], public=True)
        self.jobModel.createJob(title='user 2 job', type='t', user=self.users[2])
        self.jobModel.createJob(title='anonymous job', type='t')
        self.jobModel.createJob(title='anonymous public job', type='t2', public=True)

        # User 0, as a site admin, should be able to see all jobs
        resp = self.request('/job/all', user=self.users[0])
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 6)

        # Test deprecated listAll method
        jobs = list(self.jobModel.listAll(limit=0, offset=0, sort=None, currentUser=self.users[0]))
        self.assertEqual(len(jobs), 6)

        # get with filter
        resp = self.request('/job/all', user=self.users[0], params={
            'types': json.dumps(['t']),
            'statuses': json.dumps([0])
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 5)

        # get with unmet filter conditions
        resp = self.request('/job/all', user=self.users[0], params={
            'types': json.dumps(['nonexisttype'])
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 0)

        # User 1, as non site admin, should encounter http 403 (Forbidden)
        resp = self.request('/job/all', user=self.users[1])
        self.assertStatus(resp, 403)

        # Not authenticated user should encounter http 401 (unauthorized)
        resp = self.request('/job/all')
        self.assertStatus(resp, 401)

    def testFiltering(self):
        job = self.jobModel.createJob(title='A job', type='t', user=self.users[1], public=True)

        job['_some_other_field'] = 'foo'
        job = self.jobModel.save(job)

        resp = self.request('/job/%s' % job['_id'])
        self.assertStatusOk(resp)
        self.assertTrue('created' in resp.json)
        self.assertTrue('_some_other_field' not in resp.json)
        self.assertTrue('kwargs' not in resp.json)
        self.assertTrue('args' not in resp.json)

        resp = self.request('/job/%s' % job['_id'], user=self.users[0])
        self.assertTrue('kwargs' in resp.json)
        self.assertTrue('args' in resp.json)

        self.jobModel.exposeFields(level=AccessType.READ, fields={'_some_other_field'})
        self.jobModel.hideFields(level=AccessType.READ, fields={'created'})

        resp = self.request('/job/%s' % job['_id'])
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['_some_other_field'], 'foo')
        self.assertTrue('created' not in resp.json)

    def testJobProgressAndNotifications(self):
        job = self.jobModel.createJob(title='a job', type='t', user=self.users[1], public=True)

        path = '/job/%s' % job['_id']
        resp = self.request(path)
        self.assertEqual(resp.json['progress'], None)
        self.assertEqual(resp.json['timestamps'], [])

        resp = self.request(path, method='PUT', user=self.users[1], params={
            'progressTotal': 100,
            'progressCurrent': 3,
            'progressMessage': 'Started',
            'notify': 'false',
            'status': JobStatus.QUEUED
        })
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['progress'], {
            'total': 100,
            'current': 3,
            'message': 'Started',
            'notificationId': None
        })

        # The status update should make it so we now have a timestamp
        self.assertEqual(len(resp.json['timestamps']), 1)
        self.assertEqual(resp.json['timestamps'][0]['status'], JobStatus.QUEUED)
        self.assertIn('time', resp.json['timestamps'][0])

        # If the status does not change on update, no timestamp should be added
        resp = self.request(path, method='PUT', user=self.users[1], params={
            'status': JobStatus.QUEUED
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json['timestamps']), 1)
        self.assertEqual(resp.json['timestamps'][0]['status'], JobStatus.QUEUED)

        # We passed notify=false, so we should only have the job creation notification
        resp = self.request(path='/notification/stream', method='GET',
                            user=self.users[1], isJson=False,
                            params={'timeout': 0})
        messages = self.getSseMessages(resp)
        self.assertEqual(len(messages), 1)

        # Update progress with notify=true (the default)
        resp = self.request(path, method='PUT', user=self.users[1], params={
            'progressCurrent': 50,
            'progressMessage': 'Something bad happened',
            'status': JobStatus.ERROR
        })
        self.assertStatusOk(resp)
        self.assertNotEqual(resp.json['progress']['notificationId'], None)

        # We should now see three notifications (job created + job status + progress)
        resp = self.request(path='/notification/stream', method='GET',
                            user=self.users[1], isJson=False,
                            params={'timeout': 0})
        messages = self.getSseMessages(resp)
        job = self.jobModel.load(job['_id'], force=True)
        self.assertEqual(len(messages), 3)
        creationNotify = messages[0]
        progressNotify = messages[1]
        statusNotify = messages[2]

        self.assertEqual(creationNotify['type'], 'job_created')
        self.assertEqual(creationNotify['data']['_id'], str(job['_id']))
        self.assertEqual(statusNotify['type'], 'job_status')
        self.assertEqual(statusNotify['data']['_id'], str(job['_id']))
        self.assertEqual(int(statusNotify['data']['status']), JobStatus.ERROR)
        self.assertNotIn('kwargs', statusNotify['data'])
        self.assertNotIn('log', statusNotify['data'])

        self.assertEqual(progressNotify['type'], 'progress')
        self.assertEqual(progressNotify['data']['title'], job['title'])
        self.assertEqual(progressNotify['data']['current'], float(50))
        self.assertEqual(progressNotify['data']['state'], 'error')
        self.assertEqual(progressNotify['_id'], str(job['progress']['notificationId']))

    def testDotsInKwargs(self):
        kwargs = {
            '$key.with.dots': 'value',
            'foo': [{
                'moar.dots': True
            }]
        }
        job = self.jobModel.createJob(title='dots', type='x', user=self.users[0], kwargs=kwargs)

        # Make sure we can update a job and notification creation works
        self.jobModel.updateJob(job, status=JobStatus.QUEUED, notify=True)

        self.assertEqual(job['kwargs'], kwargs)

        resp = self.request('/job/%s' % job['_id'], user=self.users[0])
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['kwargs'], kwargs)

        job = self.jobModel.load(job['_id'], force=True)
        self.assertEqual(job['kwargs'], kwargs)
        job = self.jobModel.filter(job, self.users[0])
        self.assertEqual(job['kwargs'], kwargs)
        job = self.jobModel.filter(job, self.users[1])
        self.assertFalse('kwargs' in job)

    def testLocalJob(self):
        job = self.jobModel.createLocalJob(
            title='local', type='local', user=self.users[0], kwargs={
                'hello': 'world'
            }, module='plugin_tests.local_job_impl')

        self.jobModel.scheduleJob(job)

        job = self.jobModel.load(job['_id'], force=True, includeLog=True)
        self.assertEqual(job['log'], ['job ran!'])

        job = self.jobModel.createLocalJob(
            title='local', type='local', user=self.users[0], kwargs={
                'hello': 'world'
            }, module='plugin_tests.local_job_impl', function='fail')

        self.jobModel.scheduleJob(job)

        job = self.jobModel.load(job['_id'], force=True, includeLog=True)
        self.assertEqual(job['log'], ['job failed'])

    def testValidateCustomStatus(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])

        def validateStatus(event):
            if event.info == 1234:
                event.preventDefault().addResponse(True)

        def validTransitions(event):
            if event.info['status'] == 1234:
                event.preventDefault().addResponse([JobStatus.INACTIVE])

        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status=1234)  # Should fail

        with events.bound('jobs.status.validate', 'test', validateStatus), \
                events.bound('jobs.status.validTransitions', 'test', validTransitions):
            self.jobModel.updateJob(job, status=1234)  # Should work

            with self.assertRaises(ValidationException):
                self.jobModel.updateJob(job, status=4321)  # Should fail

    def testValidateCustomStrStatus(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])

        def validateStatus(event):
            states = ['a', 'b', 'c']

            if event.info in states:
                event.preventDefault().addResponse(True)

        def validTransitions(event):
            if event.info['status'] == 'a':
                event.preventDefault().addResponse([JobStatus.INACTIVE])

        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status='a')

        with events.bound('jobs.status.validate', 'test', validateStatus), \
                events.bound('jobs.status.validTransitions', 'test', validTransitions):
            self.jobModel.updateJob(job, status='a')
            self.assertEqual(job['status'], 'a')

        with self.assertRaises(ValidationException), \
                events.bound('jobs.status.validate', 'test', validateStatus):
            self.jobModel.updateJob(job, status='foo')

    def testUpdateOtherFields(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])
        job = self.jobModel.updateJob(job, otherFields={'other': 'fields'})
        self.assertEqual(job['other'], 'fields')

    def testCancelJob(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])
        # add to the log
        job = self.jobModel.updateJob(job, log='entry 1\n')
        # Reload without the log
        job = self.jobModel.load(id=job['_id'], force=True)
        self.assertEqual(len(job.get('log', [])), 0)
        # Cancel
        job = self.jobModel.cancelJob(job)
        self.assertEqual(job['status'], JobStatus.CANCELED)
        # Reloading should still have the log and be canceled
        job = self.jobModel.load(id=job['_id'], force=True, includeLog=True)
        self.assertEqual(job['status'], JobStatus.CANCELED)
        self.assertEqual(len(job.get('log', [])), 1)

    def testCancelJobEndpoint(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])

        # Ensure requires write perms
        jobCancelUrl = '/job/%s/cancel' % job['_id']
        resp = self.request(jobCancelUrl, user=self.users[1], method='PUT')
        self.assertStatus(resp, 403)

        # Try again with the right user
        jobCancelUrl = '/job/%s/cancel' % job['_id']
        resp = self.request(jobCancelUrl, user=self.users[0], method='PUT')
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['status'], JobStatus.CANCELED)

    def testJobsTypesAndStatuses(self):
        self.jobModel.createJob(title='user 0 job', type='t1', user=self.users[0], public=False)
        self.jobModel.createJob(title='user 1 job', type='t2', user=self.users[1], public=False)
        self.jobModel.createJob(title='user 1 job', type='t3', user=self.users[1], public=True)
        self.jobModel.createJob(title='user 2 job', type='t4', user=self.users[2])
        self.jobModel.createJob(title='anonymous job', type='t5')
        self.jobModel.createJob(title='anonymous public job', type='t6', public=True)

        # User 1, as non site admin, should encounter http 403 (Forbidden)
        resp = self.request('/job/typeandstatus/all', user=self.users[1])
        self.assertStatus(resp, 403)

        # Admin user gets all types and statuses
        resp = self.request('/job/typeandstatus/all', user=self.users[0])
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json['types']), 6)
        self.assertEqual(len(resp.json['statuses']), 1)

        # standard user gets types and statuses of its own jobs
        resp = self.request('/job/typeandstatus', user=self.users[1])
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json['types']), 2)
        self.assertEqual(len(resp.json['statuses']), 1)

    def testDefaultParentId(self):
        job = self.jobModel.createJob(title='Job', type='Job', user=self.users[0])
        # If not specified parentId should be None
        self.assertEquals(job['parentId'], None)

    def testIsParentIdCorrect(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])

        childJob = self.jobModel.createJob(
            title='Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)
        # During initialization parent job should be set correctly
        self.assertEqual(childJob['parentId'], parentJob['_id'])

    def testSetParentCorrectly(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])
        childJob = self.jobModel.createJob(title='Child Job', type='Child Job', user=self.users[0])

        self.jobModel.setParentJob(childJob, parentJob)

        # After setParentJob method is called parent job should be set correctly
        self.assertEqual(childJob['parentId'], parentJob['_id'])

    def testParentCannotBeEqualToChild(self):
        childJob = self.jobModel.createJob(title='Child Job', type='Child Job', user=self.users[0])

        # Cannot set a job as it's own parent
        with self.assertRaises(ValidationException):
            self.jobModel.setParentJob(childJob, childJob)

    def testParentIdCannotBeOverridden(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])

        anotherParentJob = self.jobModel.createJob(
            title='Another Parent Job', type='Parent Job', user=self.users[0])

        childJob = self.jobModel.createJob(
            title='Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        with self.assertRaises(ValidationException):
            # If parent job is set, cannot be overridden
            self.jobModel.setParentJob(childJob, anotherParentJob)

    def testListChildJobs(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])

        childJob = self.jobModel.createJob(
            title='Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        self.jobModel.createJob(
            title='Another Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        # Should return a list with 2 jobs
        self.assertEquals(len(list(self.jobModel.listChildJobs(parentJob))), 2)
        # Should return an empty list
        self.assertEquals(len(list(self.jobModel.listChildJobs(childJob))), 0)

    def testListChildJobsRest(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])

        childJob = self.jobModel.createJob(
            title='Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        self.jobModel.createJob(
            title='Another Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        resp = self.request('/job', user=self.users[0],
                            params={'parentId': str(parentJob['_id'])})
        resp2 = self.request('/job', user=self.users[0],
                             params={'parentId': str(childJob['_id'])})

        self.assertStatusOk(resp)
        self.assertStatusOk(resp2)

        # Should return a list with 2 jobs
        self.assertEquals(len(resp.json), 2)
        # Should return an empty list
        self.assertEquals(len(resp2.json), 0)

    def testCreateJobRest(self):
        resp = self.request('/job', method='POST',
                            user=self.users[0],
                            params={'title': 'job', 'type': 'job'})
        # If user does not have the necessary token status is 403
        self.assertStatus(resp, 403)

        token = Token().createToken(scope=REST_CREATE_JOB_TOKEN_SCOPE)

        resp2 = self.request(
            '/job', method='POST', token=token, params={'title': 'job', 'type': 'job'})
        # If user has the necessary token status is 200
        self.assertStatusOk(resp2)

    def testJobStateTransitions(self):
        job = self.jobModel.createJob(
            title='user 0 job', type='t1', user=self.users[0], public=False)

        # We can't move straight to SUCCESS
        with self.assertRaises(ValidationException):
            job = self.jobModel.updateJob(job, status=JobStatus.SUCCESS)

        self.jobModel.updateJob(job, status=JobStatus.QUEUED)
        self.jobModel.updateJob(job, status=JobStatus.RUNNING)
        self.jobModel.updateJob(job, status=JobStatus.ERROR)

        # We shouldn't be able to move backwards
        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status=JobStatus.QUEUED)
        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status=JobStatus.RUNNING)
        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status=JobStatus.INACTIVE)

    def testJobSaveEventModification(self):
        def customSave(event):
            kwargs = json_util.loads(event.info['kwargs'])
            kwargs['key2'] = 'newvalue'
            event.info['kwargs'] = json_util.dumps(kwargs)

        job = self.jobModel.createJob(title='A job', type='t', user=self.users[1], public=True)

        job['kwargs'] = {'key1': 'value1', 'key2': 'value2'}
        with events.bound('model.job.save', 'test', customSave):
            job = self.jobModel.save(job)
            self.assertEqual(job['kwargs']['key2'], 'newvalue')
Exemplo n.º 8
0
    def testConfigureItemTaskFromSlicerCli(self):
        # Create a new item that will become a task
        item = Item().createItem(name='placeholder', creator=self.admin, folder=self.privateFolder)

        # Create task to introspect container
        with mock.patch('girder.plugins.jobs.models.job.Job.scheduleJob') as scheduleMock:
            resp = self.request(
                '/item/%s/item_task_slicer_cli_description' % item['_id'], method='POST', params={
                    'image': 'johndoe/foo:v5',
                    'args': json.dumps(['--foo', 'bar'])
                }, user=self.admin)
            self.assertStatusOk(resp)
            self.assertEqual(resp.json['_modelType'], 'job')
            self.assertEqual(len(scheduleMock.mock_calls), 1)
            job = scheduleMock.mock_calls[0][1][0]
            self.assertEqual(job['handler'], 'worker_handler')
            self.assertEqual(job['itemTaskId'], item['_id'])
            self.assertEqual(job['kwargs']['outputs']['_stdout']['method'], 'PUT')
            self.assertTrue(job['kwargs']['outputs']['_stdout']['url'].endswith(
                'item/%s/item_task_slicer_cli_xml' % item['_id']))
            token = job['kwargs']['outputs']['_stdout']['headers']['Girder-Token']

        # Task should not be registered until we get the callback
        resp = self.request('/item_task', user=self.admin)
        self.assertStatusOk(resp)
        self.assertEqual(resp.json, [])

        # Image and args should be stored in the item metadata
        item = Item().load(item['_id'], force=True)
        self.assertEqual(item['meta']['itemTaskSpec']['docker_image'], 'johndoe/foo:v5')
        self.assertEqual(item['meta']['itemTaskSlicerCliArgs'], ['--foo', 'bar'])

        # Simulate callback from introspection job
        with open(os.path.join(os.path.dirname(__file__), 'slicer_cli.xml')) as f:
            xml = f.read()

        resp = self.request(
            '/item/%s/item_task_slicer_cli_xml' % item['_id'], method='PUT', params={
                'setName': True,
                'setDescription': True
            }, token=token, body=xml, type='application/xml')
        self.assertStatusOk(resp)

        # We should only be able to see tasks we have read access on
        resp = self.request('/item_task')
        self.assertStatusOk(resp)
        self.assertEqual(resp.json, [])

        resp = self.request('/item_task', user=self.admin)
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 1)
        self.assertEqual(resp.json[0]['_id'], str(item['_id']))

        item = Item().load(item['_id'], force=True)
        self.assertEqual(item['name'], 'PET phantom detector CLI')
        self.assertEqual(
            item['description'],
            u'**Description**: Detects positions of PET/CT pocket phantoms in PET image.\n\n'
            u'**Author(s)**: Girder Developers\n\n**Version**: 1.0\n\n'
            u'**License**: Apache 2.0\n\n**Acknowledgements**: *none*\n\n'
            u'*This description was auto-generated from the Slicer CLI XML specification.*'
        )
        self.assertTrue(item['meta']['isItemTask'])
        self.assertEqual(item['meta']['itemTaskSpec'], {
            'mode': 'docker',
            'docker_image': 'johndoe/foo:v5',
            'container_args': [
                '--foo', 'bar', '--InputImage=$input{--InputImage}',
                '--MaximumLineStraightnessDeviation=$input{--MaximumLineStraightnessDeviation}',
                '--MaximumRadius=$input{--MaximumRadius}',
                '--MaximumSphereDistance=$input{--MaximumSphereDistance}',
                '--MinimumRadius=$input{--MinimumRadius}',
                '--MinimumSphereActivity=$input{--MinimumSphereActivity}',
                '--MinimumSphereDistance=$input{--MinimumSphereDistance}',
                '--SpheresPerPhantom=$input{--SpheresPerPhantom}', '$flag{--StrictSorting}',
                '--DetectedPoints=$output{--DetectedPoints}'
            ],
            'inputs': [{
                'description': 'Input image to be analysed.',
                'format': 'image',
                'name': 'InputImage', 'type': 'image', 'id': '--InputImage',
                'target': 'filepath'
            }, {
                'description': 'Used for eliminating detections which are not in a straight line. '
                               'Unit: multiples of geometric average of voxel spacing',
                'format': 'number',
                'default': {'data': 1.0},
                'type': 'number',
                'id': '--MaximumLineStraightnessDeviation',
                'name': 'MaximumLineStraightnessDeviation'
            }, {
                'description': 'Used for eliminating too big blobs. Unit: millimeter [mm]',
                'format': 'number', 'default': {'data': 20.0},
                'type': 'number',
                'id': '--MaximumRadius',
                'name': 'MaximumRadius'
            }, {
                'description': 'Signifies maximum distance between adjacent sphere centers [mm]. '
                               'Used to separate phantoms from tumors.',
                'format': 'number', 'default': {'data': 40.0},
                'type': 'number',
                'id': '--MaximumSphereDistance',
                'name': 'MaximumSphereDistance'
            }, {
                'description': 'Used for eliminating too small blobs. Unit: millimeter [mm]',
                'format': 'number',
                'default': {'data': 3.0},
                'type': 'number',
                'id': '--MinimumRadius',
                'name': 'MinimumRadius'
            }, {
                'description': 'Used for thresholding in blob detection. '
                               'Unit: becquerels per milliliter [Bq/ml]',
                'format': 'number', 'default': {'data': 5000.0},
                'type': 'number',
                'id': '--MinimumSphereActivity',
                'name': 'MinimumSphereActivity'
            }, {
                'description': 'Signifies minimum distance between adjacent sphere centers [mm]. '
                               'Used to separate phantoms from tumors.',
                'format': 'number',
                'default': {'data': 30.0},
                'type': 'number',
                'id': '--MinimumSphereDistance',
                'name': 'MinimumSphereDistance'
            }, {
                'description': 'What kind of phantom are we working with here?',
                'format': 'number-enumeration',
                'default': {'data': 3},
                'type': 'number-enumeration',
                'id': '--SpheresPerPhantom',
                'name': 'SpheresPerPhantom',
                'values': [2, 3]
            }, {
                'description': 'Controls whether spheres within a phantom must have descending '
                               'activities. If OFF, they can have approximately same activities '
                               '(within 15%).',
                'format': 'boolean',
                'default': {'data': False},
                'type': 'boolean',
                'id': '--StrictSorting',
                'name': 'StrictSorting'
            }],
            'outputs': [{
                'description': 'Fiducial points, one for each detected sphere. '
                               'Will be multiple of 3.',
                'format': 'new-file',
                'name': 'DetectedPoints',
                'type': 'new-file',
                'id': '--DetectedPoints',
                'target': 'filepath'
            }]
        })

        # Shouldn't be able to run the task if we don't have execute permission flag
        Folder().setUserAccess(
            self.privateFolder, user=self.user, level=AccessType.READ, save=True)
        resp = self.request(
            '/item_task/%s/execution' % item['_id'], method='POST', user=self.user)
        self.assertStatus(resp, 403)

        # Grant the user permission, and run the task
        from girder.plugins.item_tasks.constants import ACCESS_FLAG_EXECUTE_TASK
        Folder().setUserAccess(
            self.privateFolder, user=self.user, level=AccessType.WRITE,
            flags=ACCESS_FLAG_EXECUTE_TASK, currentUser=self.admin, save=True)

        inputs = {
            '--InputImage': {
                'mode': 'girder',
                'resource_type': 'item',
                'id': str(item['_id'])
            },
            '--MaximumLineStraightnessDeviation': {
                'mode': 'inline',
                'data': 1
            },
            '--MaximumRadius': {
                'mode': 'inline',
                'data': 20
            },
            '--MaximumSphereDistance': {
                'mode': 'inline',
                'data': 40
            },
            '--MinimumRadius': {
                'mode': 'inline',
                'data': 3
            },
            '--MinimumSphereActivity': {
                'mode': 'inline',
                'data': 5000
            },
            '--MinimumSphereDistance': {
                'mode': 'inline',
                'data': 30
            },
            '--SpheresPerPhantom': {
                'mode': 'inline',
                'data': 3},
            '--StrictSorting': {
                'mode': 'inline',
                'data': False
            }
        }

        outputs = {
            '--DetectedPoints': {
                'mode': 'girder',
                'parent_id': str(self.privateFolder['_id']),
                'parent_type': 'folder',
                'name': 'test.txt'
            }
        }

        # Ensure task was scheduled
        with mock.patch('girder.plugins.jobs.models.job.Job.scheduleJob') as scheduleMock:
            resp = self.request(
                '/item_task/%s/execution' % item['_id'], method='POST', user=self.user, params={
                    'inputs': json.dumps(inputs),
                    'outputs': json.dumps(outputs)
                })
            self.assertEqual(len(scheduleMock.mock_calls), 1)
        self.assertStatusOk(resp)
        job = resp.json
        self.assertEqual(job['_modelType'], 'job')
        self.assertNotIn('kwargs', job)  # ordinary user can't see kwargs

        from girder.plugins.jobs.models.job import Job
        jobModel = Job()
        job = jobModel.load(job['_id'], force=True)
        output = job['kwargs']['outputs']['--DetectedPoints']

        # Simulate output from the worker
        contents = b'Hello world'
        resp = self.request(
            path='/file', method='POST', token=output['token'], params={
                'parentType': output['parent_type'],
                'parentId': output['parent_id'],
                'name': output['name'],
                'size': len(contents),
                'mimeType': 'text/plain',
                'reference': output['reference']
            })
        self.assertStatusOk(resp)

        uploadId = resp.json['_id']
        fields = [('offset', 0), ('uploadId', uploadId)]
        files = [('chunk', output['name'], contents)]
        resp = self.multipartRequest(
            path='/file/chunk', fields=fields, files=files, token=output['token'])
        self.assertStatusOk(resp)
        file = resp.json
        self.assertEqual(file['_modelType'], 'file')
        self.assertEqual(file['size'], 11)
        self.assertEqual(file['mimeType'], 'text/plain')
        file = File().load(file['_id'], force=True)

        # Make sure temp token is removed once we change job status to final state
        job = jobModel.load(job['_id'], force=True)
        self.assertIn('itemTaskTempToken', job)

        from girder.plugins.jobs.constants import JobStatus
        # Transition through states to SUCCESS
        job = jobModel.updateJob(job, status=JobStatus.QUEUED)
        job = jobModel.updateJob(job, status=JobStatus.RUNNING)
        job = jobModel.updateJob(job, status=JobStatus.SUCCESS)

        self.assertNotIn('itemTaskTempToken', job)
        self.assertIn('itemTaskBindings', job)

        # Wait for async data.process event to bind output provenance
        start = time.time()
        while time.time() - start < 15:
            job = jobModel.load(job['_id'], force=True)

            if 'itemId' in job['itemTaskBindings']['outputs']['--DetectedPoints']:
                break
            else:
                time.sleep(0.2)
        else:
            raise Exception('Output binding did not occur in time')

        self.assertEqual(
            job['itemTaskBindings']['outputs']['--DetectedPoints']['itemId'], file['itemId'])
Exemplo n.º 9
0
class JobsTestCase(base.TestCase):
    def setUp(self):
        base.TestCase.setUp(self)

        self.users = [User().createUser(
            'usr' + str(n), 'passwd', 'tst', 'usr', '*****@*****.**' % n)
            for n in range(3)]

        from girder.plugins.jobs.models.job import Job
        self.jobModel = Job()

    def testJobs(self):
        self.job = None

        def schedule(event):
            self.job = event.info
            if self.job['handler'] == 'my_handler':
                self.job['status'] = JobStatus.RUNNING
                self.job = self.jobModel.save(self.job)
                self.assertEqual(self.job['args'], ('hello', 'world'))
                self.assertEqual(self.job['kwargs'], {'a': 'b'})

        events.bind('jobs.schedule', 'test', schedule)

        # Create a job
        job = self.jobModel.createJob(
            title='Job Title', type='my_type', args=('hello', 'world'),
            kwargs={'a': 'b'}, user=self.users[1], handler='my_handler',
            public=False)
        self.assertEqual(self.job, None)
        self.assertEqual(job['status'], JobStatus.INACTIVE)

        # Schedule the job, make sure our handler was invoked
        self.jobModel.scheduleJob(job)
        self.assertEqual(self.job['_id'], job['_id'])
        self.assertEqual(self.job['status'], JobStatus.RUNNING)

        # Since the job is not public, user 2 should not have access
        path = '/job/%s' % job['_id']
        resp = self.request(path, user=self.users[2])
        self.assertStatus(resp, 403)
        resp = self.request(path, user=self.users[2], method='PUT')
        self.assertStatus(resp, 403)
        resp = self.request(path, user=self.users[2], method='DELETE')
        self.assertStatus(resp, 403)

        # Make sure user who created the job can see it
        resp = self.request(path, user=self.users[1])
        self.assertStatusOk(resp)

        # We should be able to update the job as the user who created it
        resp = self.request(path, method='PUT', user=self.users[1], params={
            'log': 'My log message\n'
        })
        self.assertStatusOk(resp)

        # We should be able to create a job token and use that to update it too
        token = self.jobModel.createJobToken(job)
        resp = self.request(path, method='PUT', params={
            'log': 'append message',
            'token': token['_id']
        })
        self.assertStatusOk(resp)
        # We shouldn't get the log back in this case
        self.assertNotIn('log', resp.json)

        # Do a fetch on the job itself to get the log
        resp = self.request(path, user=self.users[1])
        self.assertStatusOk(resp)
        self.assertEqual(
            resp.json['log'], ['My log message\n', 'append message'])

        # Test overwriting the log and updating status
        resp = self.request(path, method='PUT', params={
            'log': 'overwritten log',
            'overwrite': 'true',
            'status': JobStatus.SUCCESS,
            'token': token['_id']
        })
        self.assertStatusOk(resp)
        self.assertNotIn('log', resp.json)
        self.assertEqual(resp.json['status'], JobStatus.SUCCESS)

        job = self.jobModel.load(job['_id'], force=True, includeLog=True)
        self.assertEqual(job['log'], ['overwritten log'])

        # We should be able to delete the job as the user who created it
        resp = self.request(path, user=self.users[1], method='DELETE')
        self.assertStatusOk(resp)
        job = self.jobModel.load(job['_id'], force=True)
        self.assertIsNone(job)

    def testLegacyLogBehavior(self):
        # Force save a job with a string log to simulate a legacy job record
        job = self.jobModel.createJob(
            title='legacy', type='legacy', user=self.users[1], save=False)
        job['log'] = 'legacy log'
        job = self.jobModel.save(job, validate=False)

        self.assertEqual(job['log'], 'legacy log')

        # Load the record, we should now get the log as a list
        job = self.jobModel.load(job['_id'], force=True, includeLog=True)
        self.assertEqual(job['log'], ['legacy log'])

    def testListJobs(self):
        job = self.jobModel.createJob(title='A job', type='t', user=self.users[1], public=False)
        anonJob = self.jobModel.createJob(title='Anon job', type='t')
        # Ensure timestamp for public job is strictly higher (ms resolution)
        time.sleep(0.1)
        publicJob = self.jobModel.createJob(
            title='Anon job', type='t', public=True)

        # User 1 should be able to see their own jobs
        resp = self.request('/job', user=self.users[1], params={
            'userId': self.users[1]['_id']
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 1)
        self.assertEqual(resp.json[0]['_id'], str(job['_id']))

        # User 2 should not see user 1's jobs in the list
        resp = self.request('/job', user=self.users[2], params={
            'userId': self.users[1]['_id']
        })
        self.assertEqual(resp.json, [])

        # Omitting a userId should assume current user
        resp = self.request('/job', user=self.users[1])
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 1)
        self.assertEqual(resp.json[0]['_id'], str(job['_id']))

        # Explicitly passing "None" should show anonymous jobs
        resp = self.request('/job', user=self.users[0], params={
            'userId': 'none'
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 2)
        self.assertEqual(resp.json[0]['_id'], str(publicJob['_id']))
        self.assertEqual(resp.json[1]['_id'], str(anonJob['_id']))

        # Non-admins should only see public anon jobs
        resp = self.request('/job', params={'userId': 'none'})
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 1)
        self.assertEqual(resp.json[0]['_id'], str(publicJob['_id']))

    def testListAllJobs(self):
        self.jobModel.createJob(title='user 0 job', type='t', user=self.users[0], public=False)
        self.jobModel.createJob(title='user 1 job', type='t', user=self.users[1], public=False)
        self.jobModel.createJob(title='user 1 job', type='t', user=self.users[1], public=True)
        self.jobModel.createJob(title='user 2 job', type='t', user=self.users[2])
        self.jobModel.createJob(title='anonymous job', type='t')
        self.jobModel.createJob(title='anonymous public job', type='t2', public=True)

        # User 0, as a site admin, should be able to see all jobs
        resp = self.request('/job/all', user=self.users[0])
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 6)

        # Test deprecated listAll method
        jobs = list(self.jobModel.listAll(limit=0, offset=0, sort=None, currentUser=self.users[0]))
        self.assertEqual(len(jobs), 6)

        # get with filter
        resp = self.request('/job/all', user=self.users[0], params={
            'types': json.dumps(['t']),
            'statuses': json.dumps([0])
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 5)

        # get with unmet filter conditions
        resp = self.request('/job/all', user=self.users[0], params={
            'types': json.dumps(['nonexisttype'])
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json), 0)

        # User 1, as non site admin, should encounter http 403 (Forbidden)
        resp = self.request('/job/all', user=self.users[1])
        self.assertStatus(resp, 403)

        # Not authenticated user should encounter http 401 (unauthorized)
        resp = self.request('/job/all')
        self.assertStatus(resp, 401)

    def testFiltering(self):
        job = self.jobModel.createJob(title='A job', type='t', user=self.users[1], public=True)

        job['_some_other_field'] = 'foo'
        job = self.jobModel.save(job)

        resp = self.request('/job/%s' % job['_id'])
        self.assertStatusOk(resp)
        self.assertTrue('created' in resp.json)
        self.assertTrue('_some_other_field' not in resp.json)
        self.assertTrue('kwargs' not in resp.json)
        self.assertTrue('args' not in resp.json)

        resp = self.request('/job/%s' % job['_id'], user=self.users[0])
        self.assertTrue('kwargs' in resp.json)
        self.assertTrue('args' in resp.json)

        self.jobModel.exposeFields(level=AccessType.READ, fields={'_some_other_field'})
        self.jobModel.hideFields(level=AccessType.READ, fields={'created'})

        resp = self.request('/job/%s' % job['_id'])
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['_some_other_field'], 'foo')
        self.assertTrue('created' not in resp.json)

    def testJobProgressAndNotifications(self):
        job = self.jobModel.createJob(title='a job', type='t', user=self.users[1], public=True)

        path = '/job/%s' % job['_id']
        resp = self.request(path)
        self.assertEqual(resp.json['progress'], None)
        self.assertEqual(resp.json['timestamps'], [])

        resp = self.request(path, method='PUT', user=self.users[1], params={
            'progressTotal': 100,
            'progressCurrent': 3,
            'progressMessage': 'Started',
            'notify': 'false',
            'status': JobStatus.QUEUED
        })
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['progress'], {
            'total': 100,
            'current': 3,
            'message': 'Started',
            'notificationId': None
        })

        # The status update should make it so we now have a timestamp
        self.assertEqual(len(resp.json['timestamps']), 1)
        self.assertEqual(resp.json['timestamps'][0]['status'], JobStatus.QUEUED)
        self.assertIn('time', resp.json['timestamps'][0])

        # If the status does not change on update, no timestamp should be added
        resp = self.request(path, method='PUT', user=self.users[1], params={
            'status': JobStatus.QUEUED
        })
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json['timestamps']), 1)
        self.assertEqual(resp.json['timestamps'][0]['status'], JobStatus.QUEUED)

        # We passed notify=false, so we should only have the job creation notification
        resp = self.request(path='/notification/stream', method='GET',
                            user=self.users[1], isJson=False,
                            params={'timeout': 0})
        messages = self.getSseMessages(resp)
        self.assertEqual(len(messages), 1)

        # Update progress with notify=true (the default)
        resp = self.request(path, method='PUT', user=self.users[1], params={
            'progressCurrent': 50,
            'progressMessage': 'Something bad happened',
            'status': JobStatus.ERROR
        })
        self.assertStatusOk(resp)
        self.assertNotEqual(resp.json['progress']['notificationId'], None)

        # We should now see three notifications (job created + job status + progress)
        resp = self.request(path='/notification/stream', method='GET',
                            user=self.users[1], isJson=False,
                            params={'timeout': 0})
        messages = self.getSseMessages(resp)
        job = self.jobModel.load(job['_id'], force=True)
        self.assertEqual(len(messages), 3)
        creationNotify = messages[0]
        progressNotify = messages[1]
        statusNotify = messages[2]

        self.assertEqual(creationNotify['type'], 'job_created')
        self.assertEqual(creationNotify['data']['_id'], str(job['_id']))
        self.assertEqual(statusNotify['type'], 'job_status')
        self.assertEqual(statusNotify['data']['_id'], str(job['_id']))
        self.assertEqual(int(statusNotify['data']['status']), JobStatus.ERROR)
        self.assertNotIn('kwargs', statusNotify['data'])
        self.assertNotIn('log', statusNotify['data'])

        self.assertEqual(progressNotify['type'], 'progress')
        self.assertEqual(progressNotify['data']['title'], job['title'])
        self.assertEqual(progressNotify['data']['current'], float(50))
        self.assertEqual(progressNotify['data']['state'], 'error')
        self.assertEqual(progressNotify['_id'], str(job['progress']['notificationId']))

    def testDotsInKwargs(self):
        kwargs = {
            '$key.with.dots': 'value',
            'foo': [{
                'moar.dots': True
            }]
        }
        job = self.jobModel.createJob(title='dots', type='x', user=self.users[0], kwargs=kwargs)

        # Make sure we can update a job and notification creation works
        self.jobModel.updateJob(job, status=JobStatus.QUEUED, notify=True)

        self.assertEqual(job['kwargs'], kwargs)

        resp = self.request('/job/%s' % job['_id'], user=self.users[0])
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['kwargs'], kwargs)

        job = self.jobModel.load(job['_id'], force=True)
        self.assertEqual(job['kwargs'], kwargs)
        job = self.jobModel.filter(job, self.users[0])
        self.assertEqual(job['kwargs'], kwargs)
        job = self.jobModel.filter(job, self.users[1])
        self.assertFalse('kwargs' in job)

    def testLocalJob(self):
        job = self.jobModel.createLocalJob(
            title='local', type='local', user=self.users[0], kwargs={
                'hello': 'world'
            }, module='plugin_tests.local_job_impl')

        self.jobModel.scheduleJob(job)

        job = self.jobModel.load(job['_id'], force=True, includeLog=True)
        self.assertEqual(job['log'], ['job ran!'])

        job = self.jobModel.createLocalJob(
            title='local', type='local', user=self.users[0], kwargs={
                'hello': 'world'
            }, module='plugin_tests.local_job_impl', function='fail')

        self.jobModel.scheduleJob(job)

        job = self.jobModel.load(job['_id'], force=True, includeLog=True)
        self.assertEqual(job['log'], ['job failed'])

    def testValidateCustomStatus(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])

        def validateStatus(event):
            if event.info == 1234:
                event.preventDefault().addResponse(True)

        def validTransitions(event):
            if event.info['status'] == 1234:
                event.preventDefault().addResponse([JobStatus.INACTIVE])

        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status=1234)  # Should fail

        with events.bound('jobs.status.validate', 'test', validateStatus), \
                events.bound('jobs.status.validTransitions', 'test', validTransitions):
            self.jobModel.updateJob(job, status=1234)  # Should work

            with self.assertRaises(ValidationException):
                self.jobModel.updateJob(job, status=4321)  # Should fail

    def testValidateCustomStrStatus(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])

        def validateStatus(event):
            states = ['a', 'b', 'c']

            if event.info in states:
                event.preventDefault().addResponse(True)

        def validTransitions(event):
            if event.info['status'] == 'a':
                event.preventDefault().addResponse([JobStatus.INACTIVE])

        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status='a')

        with events.bound('jobs.status.validate', 'test', validateStatus), \
                events.bound('jobs.status.validTransitions', 'test', validTransitions):
            self.jobModel.updateJob(job, status='a')
            self.assertEqual(job['status'], 'a')

        with self.assertRaises(ValidationException), \
                events.bound('jobs.status.validate', 'test', validateStatus):
            self.jobModel.updateJob(job, status='foo')

    def testUpdateOtherFields(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])
        job = self.jobModel.updateJob(job, otherFields={'other': 'fields'})
        self.assertEqual(job['other'], 'fields')

    def testCancelJob(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])
        # add to the log
        job = self.jobModel.updateJob(job, log='entry 1\n')
        # Reload without the log
        job = self.jobModel.load(id=job['_id'], force=True)
        self.assertEqual(len(job.get('log', [])), 0)
        # Cancel
        job = self.jobModel.cancelJob(job)
        self.assertEqual(job['status'], JobStatus.CANCELED)
        # Reloading should still have the log and be canceled
        job = self.jobModel.load(id=job['_id'], force=True, includeLog=True)
        self.assertEqual(job['status'], JobStatus.CANCELED)
        self.assertEqual(len(job.get('log', [])), 1)

    def testCancelJobEndpoint(self):
        job = self.jobModel.createJob(title='test', type='x', user=self.users[0])

        # Ensure requires write perms
        jobCancelUrl = '/job/%s/cancel' % job['_id']
        resp = self.request(jobCancelUrl, user=self.users[1], method='PUT')
        self.assertStatus(resp, 403)

        # Try again with the right user
        jobCancelUrl = '/job/%s/cancel' % job['_id']
        resp = self.request(jobCancelUrl, user=self.users[0], method='PUT')
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['status'], JobStatus.CANCELED)

    def testJobsTypesAndStatuses(self):
        self.jobModel.createJob(title='user 0 job', type='t1', user=self.users[0], public=False)
        self.jobModel.createJob(title='user 1 job', type='t2', user=self.users[1], public=False)
        self.jobModel.createJob(title='user 1 job', type='t3', user=self.users[1], public=True)
        self.jobModel.createJob(title='user 2 job', type='t4', user=self.users[2])
        self.jobModel.createJob(title='anonymous job', type='t5')
        self.jobModel.createJob(title='anonymous public job', type='t6', public=True)

        # User 1, as non site admin, should encounter http 403 (Forbidden)
        resp = self.request('/job/typeandstatus/all', user=self.users[1])
        self.assertStatus(resp, 403)

        # Admin user gets all types and statuses
        resp = self.request('/job/typeandstatus/all', user=self.users[0])
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json['types']), 6)
        self.assertEqual(len(resp.json['statuses']), 1)

        # standard user gets types and statuses of its own jobs
        resp = self.request('/job/typeandstatus', user=self.users[1])
        self.assertStatusOk(resp)
        self.assertEqual(len(resp.json['types']), 2)
        self.assertEqual(len(resp.json['statuses']), 1)

    def testDefaultParentId(self):
        job = self.jobModel.createJob(title='Job', type='Job', user=self.users[0])
        # If not specified parentId should be None
        self.assertEquals(job['parentId'], None)

    def testIsParentIdCorrect(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])

        childJob = self.jobModel.createJob(
            title='Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)
        # During initialization parent job should be set correctly
        self.assertEqual(childJob['parentId'], parentJob['_id'])

    def testSetParentCorrectly(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])
        childJob = self.jobModel.createJob(title='Child Job', type='Child Job', user=self.users[0])

        self.jobModel.setParentJob(childJob, parentJob)

        # After setParentJob method is called parent job should be set correctly
        self.assertEqual(childJob['parentId'], parentJob['_id'])

    def testParentCannotBeEqualToChild(self):
        childJob = self.jobModel.createJob(title='Child Job', type='Child Job', user=self.users[0])

        # Cannot set a job as it's own parent
        with self.assertRaises(ValidationException):
            self.jobModel.setParentJob(childJob, childJob)

    def testParentIdCannotBeOverridden(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])

        anotherParentJob = self.jobModel.createJob(
            title='Another Parent Job', type='Parent Job', user=self.users[0])

        childJob = self.jobModel.createJob(
            title='Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        with self.assertRaises(ValidationException):
            # If parent job is set, cannot be overridden
            self.jobModel.setParentJob(childJob, anotherParentJob)

    def testListChildJobs(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])

        childJob = self.jobModel.createJob(
            title='Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        self.jobModel.createJob(
            title='Another Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        # Should return a list with 2 jobs
        self.assertEquals(len(list(self.jobModel.listChildJobs(parentJob))), 2)
        # Should return an empty list
        self.assertEquals(len(list(self.jobModel.listChildJobs(childJob))), 0)

    def testListChildJobsRest(self):
        parentJob = self.jobModel.createJob(
            title='Parent Job', type='Parent Job', user=self.users[0])

        childJob = self.jobModel.createJob(
            title='Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        self.jobModel.createJob(
            title='Another Child Job', type='Child Job', user=self.users[0], parentJob=parentJob)

        resp = self.request('/job', user=self.users[0],
                            params={'parentId': str(parentJob['_id'])})
        resp2 = self.request('/job', user=self.users[0],
                             params={'parentId': str(childJob['_id'])})

        self.assertStatusOk(resp)
        self.assertStatusOk(resp2)

        # Should return a list with 2 jobs
        self.assertEquals(len(resp.json), 2)
        # Should return an empty list
        self.assertEquals(len(resp2.json), 0)

    def testCreateJobRest(self):
        resp = self.request('/job', method='POST',
                            user=self.users[0],
                            params={'title': 'job', 'type': 'job'})
        # If user does not have the necessary token status is 403
        self.assertStatus(resp, 403)

        token = Token().createToken(scope=REST_CREATE_JOB_TOKEN_SCOPE)

        resp2 = self.request(
            '/job', method='POST', token=token, params={'title': 'job', 'type': 'job'})
        # If user has the necessary token status is 200
        self.assertStatusOk(resp2)

    def testJobStateTransitions(self):
        job = self.jobModel.createJob(
            title='user 0 job', type='t1', user=self.users[0], public=False)

        # We can't move straight to SUCCESS
        with self.assertRaises(ValidationException):
            job = self.jobModel.updateJob(job, status=JobStatus.SUCCESS)

        self.jobModel.updateJob(job, status=JobStatus.QUEUED)
        self.jobModel.updateJob(job, status=JobStatus.RUNNING)
        self.jobModel.updateJob(job, status=JobStatus.ERROR)

        # We shouldn't be able to move backwards
        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status=JobStatus.QUEUED)
        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status=JobStatus.RUNNING)
        with self.assertRaises(ValidationException):
            self.jobModel.updateJob(job, status=JobStatus.INACTIVE)
Exemplo n.º 10
0
def run(job):
    jobModel = Job()
    jobModel.updateJob(job, status=JobStatus.RUNNING)

    lookup_kwargs, = job["args"]
    user = User().load(job["userId"], force=True)
    tale = Tale().load(job["kwargs"]["taleId"], user=user)
    spawn = job["kwargs"]["spawn"]
    asTale = job["kwargs"]["asTale"]
    token = Token().createToken(user=user, days=0.5)

    progressTotal = 3 + int(spawn)
    progressCurrent = 0

    try:
        # 0. Spawn instance in the background
        if spawn:
            instance = Instance().createInstance(tale, user, token, spawn=spawn)

        # 1. Register data using url
        progressCurrent += 1
        jobModel.updateJob(
            job,
            status=JobStatus.RUNNING,
            progressTotal=progressTotal,
            progressCurrent=progressCurrent,
            progressMessage="Registering external data",
        )
        dataIds = lookup_kwargs.pop("dataId")
        base_url = lookup_kwargs.get("base_url", DataONELocations.prod_cn)
        dataMap = pids_to_entities(
            dataIds, user=user, base_url=base_url, lookup=True
        )  # DataONE shouldn't be here
        imported_data = register_dataMap(
            dataMap,
            getOrCreateRootFolder(CATALOG_NAME),
            "folder",
            user=user,
            base_url=base_url,
        )

        if dataMap[0]["repository"].lower().startswith("http"):
            resource = Item().load(imported_data[0], user=user, level=AccessType.READ)
            resourceType = "item"
        else:
            resource = Folder().load(imported_data[0], user=user, level=AccessType.READ)
            resourceType = "folder"

        data_set = [
            {
                "itemId": imported_data[0],
                "mountPath": resource["name"],
                "_modelType": resourceType,
            }
        ]

        if asTale:
            if resourceType == "folder":
                # Create a dataset with the content of root ds folder,
                # so that it looks nicely and it's easy to copy to workspace later on
                workspace_data_set = [
                    {
                        "itemId": folder["_id"],
                        "mountPath": folder["name"],
                        "_modelType": "folder ",
                    }
                    for folder in Folder().childFolders(
                        parentType="folder", parent=resource, user=user
                    )
                ]
                workspace_data_set += [
                    {
                        "itemId": item["_id"],
                        "mountPath": item["name"],
                        "_modelType": "item",
                    }
                    for item in Folder().childItems(resource)
                ]
            else:
                workspace_data_set = data_set

            # 2. Create a session
            # TODO: yay circular dependencies! IMHO we really should merge
            # wholetale and wt_data_manager plugins...
            from girder.plugins.wt_data_manager.models.session import Session

            # Session is created so that we can easily copy files to workspace,
            # without worrying about how to handler transfers. DMS will do that for us <3
            session = Session().createSession(user, dataSet=workspace_data_set)

            # 3. Copy data to the workspace using WebDAVFS
            progressCurrent += 1
            jobModel.updateJob(
                job,
                status=JobStatus.RUNNING,
                log="Copying files to workspace",
                progressTotal=progressTotal,
                progressCurrent=progressCurrent,
                progressMessage="Copying files to workspace",
            )
            girder_root = "http://localhost:{}".format(
                config.getConfig()["server.socket_port"]
            )
            with WebDAVFS(
                girder_root,
                login=user["login"],
                password="******".format(**token),
                root="/tales/{_id}".format(**tale),
            ) as destination_fs, DMSFS(
                str(session["_id"]), girder_root + "/api/v1", str(token["_id"])
            ) as source_fs:
                copy_fs(source_fs, destination_fs)
                sanitize_binder(destination_fs)

            Session().deleteSession(user, session)
        else:
            # 3. Update Tale's dataSet
            update_citations = {_["itemId"] for _ in tale["dataSet"]} ^ {
                _["itemId"] for _ in data_set
            }
            tale["dataSet"] = data_set
            tale = Tale().updateTale(tale)

            if update_citations:
                eventParams = {"tale": tale, "user": user}
                event = events.trigger("tale.update_citation", eventParams)
                if len(event.responses):
                    tale = Tale().updateTale(event.responses[-1])

        # Tale is ready to be built
        tale = Tale().load(tale["_id"], user=user)  # Refresh state
        tale["status"] = TaleStatus.READY
        tale = Tale().updateTale(tale)

        # 4. Wait for container to show up
        if spawn:
            progressCurrent += 1
            jobModel.updateJob(
                job,
                status=JobStatus.RUNNING,
                log="Waiting for a Tale container",
                progressTotal=progressTotal,
                progressCurrent=progressCurrent,
                progressMessage="Waiting for a Tale container",
            )

            sleep_step = 10
            timeout = 15 * 60
            while instance["status"] == InstanceStatus.LAUNCHING and timeout > 0:
                time.sleep(sleep_step)
                instance = Instance().load(instance["_id"], user=user)
                timeout -= sleep_step
            if timeout <= 0:
                raise RuntimeError(
                    "Failed to launch instance {}".format(instance["_id"])
                )
        else:
            instance = None

    except Exception:
        tale = Tale().load(tale["_id"], user=user)  # Refresh state
        tale["status"] = TaleStatus.ERROR
        tale = Tale().updateTale(tale)
        t, val, tb = sys.exc_info()
        log = "%s: %s\n%s" % (t.__name__, repr(val), traceback.extract_tb(tb))
        jobModel.updateJob(
            job,
            progressTotal=progressTotal,
            progressCurrent=progressTotal,
            progressMessage="Task failed",
            status=JobStatus.ERROR,
            log=log,
        )
        raise

    # To get rid of ObjectId's, dates etc.
    tale = json.loads(
        json.dumps(tale, sort_keys=True, allow_nan=False, cls=JsonEncoder)
    )
    instance = json.loads(
        json.dumps(instance, sort_keys=True, allow_nan=False, cls=JsonEncoder)
    )

    jobModel.updateJob(
        job,
        status=JobStatus.SUCCESS,
        log="Tale created",
        progressTotal=progressTotal,
        progressCurrent=progressTotal,
        progressMessage="Tale created",
        otherFields={"result": {"tale": tale, "instance": instance}},
    )
def run(job):
    jobModel = Job()
    jobModel.updateJob(job, status=JobStatus.RUNNING)

    tale_dir, manifest_file = job["args"]
    user = User().load(job["userId"], force=True)
    tale = Tale().load(job["kwargs"]["taleId"], user=user)
    token = Token().createToken(user=user,
                                days=0.5,
                                scope=(TokenScope.USER_AUTH,
                                       REST_CREATE_JOB_TOKEN_SCOPE))

    progressTotal = 3
    progressCurrent = 0

    try:
        os.chdir(tale_dir)
        with open(manifest_file, "r") as manifest_fp:
            manifest = json.load(manifest_fp)

        # 1. Register data
        progressCurrent += 1
        jobModel.updateJob(
            job,
            status=JobStatus.RUNNING,
            progressTotal=progressTotal,
            progressCurrent=progressCurrent,
            progressMessage="Registering external data",
        )
        dataIds = [obj["identifier"] for obj in manifest["Datasets"]]
        dataIds += [
            obj["uri"] for obj in manifest["aggregates"]
            if obj["uri"].startswith("http")
        ]
        if dataIds:
            dataMap = pids_to_entities(
                dataIds,
                user=user,
                base_url=DataONELocations.prod_cn,
                lookup=True)  # DataONE shouldn't be here
            register_dataMap(
                dataMap,
                getOrCreateRootFolder(CATALOG_NAME),
                "folder",
                user=user,
                base_url=DataONELocations.prod_cn,
            )

        # 2. Construct the dataSet
        dataSet = []
        for obj in manifest["aggregates"]:
            if "bundledAs" not in obj:
                continue
            uri = obj["uri"]
            fobj = File().findOne(
                {"linkUrl": uri})  # TODO: That's expensive, use something else
            if fobj:
                dataSet.append({
                    "itemId": fobj["itemId"],
                    "_modelType": "item",
                    "mountPath": obj["bundledAs"]["filename"],
                })
            # TODO: handle folders

        # 3. Update Tale's dataSet
        update_citations = {_["itemId"]
                            for _ in tale["dataSet"]
                            } ^ {_["itemId"]
                                 for _ in dataSet}
        tale["dataSet"] = dataSet
        tale = Tale().updateTale(tale)

        if update_citations:
            eventParams = {"tale": tale, "user": user}
            event = events.trigger("tale.update_citation", eventParams)
            if len(event.responses):
                tale = Tale().updateTale(event.responses[-1])

        # 4. Copy data to the workspace using WebDAVFS (if it exists)
        progressCurrent += 1
        jobModel.updateJob(
            job,
            status=JobStatus.RUNNING,
            progressTotal=progressTotal,
            progressCurrent=progressCurrent,
            progressMessage="Copying files to workspace",
        )
        orig_tale_id = pathlib.Path(manifest_file).parts[0]
        for workdir in ("workspace", "data/workspace", None):
            if workdir:
                workdir = os.path.join(orig_tale_id, workdir)
                if os.path.isdir(workdir):
                    break

        if workdir:
            password = "******".format(**token)
            root = "/tales/{_id}".format(**tale)
            url = "http://localhost:{}".format(
                config.getConfig()["server.socket_port"])
            with WebDAVFS(url,
                          login=user["login"],
                          password=password,
                          root=root) as webdav_handle:
                copy_fs(OSFS(workdir), webdav_handle)

        # Tale is ready to be built
        tale = Tale().load(tale["_id"], user=user)  # Refresh state
        tale["status"] = TaleStatus.READY
        tale = Tale().updateTale(tale)

        progressCurrent += 1
        jobModel.updateJob(
            job,
            status=JobStatus.SUCCESS,
            log="Tale created",
            progressTotal=progressTotal,
            progressCurrent=progressCurrent,
            progressMessage="Tale created",
        )
    except Exception:
        tale = Tale().load(tale["_id"], user=user)  # Refresh state
        tale["status"] = TaleStatus.ERROR
        tale = Tale().updateTale(tale)
        t, val, tb = sys.exc_info()
        log = "%s: %s\n%s" % (t.__name__, repr(val), traceback.extract_tb(tb))
        jobModel.updateJob(job, status=JobStatus.ERROR, log=log)
        raise