Пример #1
0
    def setup_job_dir(self, data_files, job_exe_id):
        '''Sets up the directory structure for a job execution and downloads the given files

        :param data_files: Dict with each file parameter name mapping to a bool indicating if the parameter accepts
            multiple files (True) and a relative directory path
        :type data_files: dict of str -> tuple(bool, str)
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        :returns: Dict with each file parameter name mapping to a list of absolute file paths of the written files
        :rtype: dict of str -> list of str
        '''

        download_dir = get_job_exe_input_data_dir(job_exe_id)
        download_work_dir = get_job_exe_input_work_dir(job_exe_id)
        upload_dir = get_job_exe_output_data_dir(job_exe_id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe_id)

        # Download the job execution input files
        self.retrieve_input_data_files(download_dir, download_work_dir,
                                       data_files)

        # Set up upload directories for output workspace
        workspace_ids = self.get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            ScaleFile.objects.setup_upload_dir(upload_dir, upload_work_dir,
                                               workspace)
Пример #2
0
    def cleanup_job_execution(self, job_exe):
        """See :meth:`job.execution.job_exe_cleaner.JobExecutionCleaner.cleanup_job_execution`
        """

        logger.info('Cleaning up a non-system job')

        download_dir = get_job_exe_input_data_dir(job_exe.id)
        download_work_dir = get_job_exe_input_work_dir(job_exe.id)
        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe.id)

        logger.info('Cleaning up download directory')
        ScaleFile.objects.cleanup_download_dir(download_dir, download_work_dir)

        logger.info('Cleaning up upload directories')
        workspace_ids = job_exe.job.get_job_data().get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            logger.info('Cleaning up upload directory for workspace %s', workspace.name)
            ScaleFile.objects.cleanup_upload_dir(upload_dir, upload_work_dir, workspace)

        move_work_dir = os.path.join(upload_work_dir, 'move_source_file_in_workspace')
        if os.path.exists(move_work_dir):
            logger.info('Cleaning up work directory for moving parsed source files')
            ScaleFile.objects.cleanup_move_dir(move_work_dir)
            logger.info('Deleting %s', move_work_dir)
            os.rmdir(move_work_dir)

        delete_normal_job_exe_dir_tree(job_exe.id)
Пример #3
0
    def test_file_in_command(self, mock_retrieve_call, mock_os_mkdir, mock_get_one_file, mock_setup_upload):
        job_work_dir = "/test"
        job_exe_id = 1
        job_input_dir = get_job_exe_input_data_dir(job_exe_id)
        job_output_dir = os.path.join(job_work_dir, u'outputs')

        def new_retrieve(arg1, arg2, arg3):
            return {u'file1_out': [input_file_path]}

        input_file_path = os.path.join(job_input_dir, 'file1', 'foo.txt')
        mock_retrieve_call.side_effect = new_retrieve
        mock_get_one_file.side_effect = lambda(arg1): input_file_path
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env()
        job_interface_dict[u'command_arguments'] = u'${file1}'
        job_interface_dict[u'input_data'] = [{u'name' : u'file1', u'type' : u'file', 'required' : True}]
        job_data_dict[u'input_data'].append({u'name': u'file1', u'file_id': 1})
        job_data_dict[u'output_data'].append({u'name': u'file1_out', u'workspace_id': self.workspace.id})

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = JobEnvironment(job_environment_dict)
        
        
        job_interface.perform_pre_steps(job_data, job_environment, job_exe_id)
        job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id)
        self.assertEqual(job_command_arguments, input_file_path, u'expected a different command from pre_steps')
        mock_setup_upload.assert_called_once_with(get_job_exe_output_data_dir(job_exe_id), get_job_exe_output_work_dir(job_exe_id), self.workspace)
Пример #4
0
    def setup_job_dir(self, data_files, job_exe_id):
        """Sets up the directory structure for a job execution and downloads the given files

        :param data_files: Dict with each file parameter name mapping to a bool indicating if the parameter accepts
            multiple files (True) and a relative directory path
        :type data_files: dict of str -> tuple(bool, str)
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        :returns: Dict with each file parameter name mapping to a list of absolute file paths of the written files
        :rtype: dict of str -> list of str
        """

        download_dir = get_job_exe_input_data_dir(job_exe_id)
        download_work_dir = get_job_exe_input_work_dir(job_exe_id)
        upload_dir = get_job_exe_output_data_dir(job_exe_id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe_id)

        # Download the job execution input files
        self.retrieve_input_data_files(download_dir, download_work_dir, data_files)

        # Set up upload directories for output workspace
        workspace_ids = self.get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            ScaleFile.objects.setup_upload_dir(upload_dir, upload_work_dir, workspace)

        # If the upload dir did not get created (e.g. no output files), make sure it gets created for results manifests
        if not os.path.exists(upload_dir):
            logger.info("Creating %s", upload_dir)
            os.makedirs(upload_dir, mode=0755)
Пример #5
0
    def cleanup_job_execution(self, job_exe):
        """See :meth:`job.execution.job_exe_cleaner.JobExecutionCleaner.cleanup_job_execution`
        """

        logger.info('Cleaning up a non-system job')

        download_dir = get_job_exe_input_data_dir(job_exe.id)
        download_work_dir = get_job_exe_input_work_dir(job_exe.id)
        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe.id)

        logger.info('Cleaning up download directory')
        ScaleFile.objects.cleanup_download_dir(download_dir, download_work_dir)

        logger.info('Cleaning up upload directories')
        workspace_ids = job_exe.job.get_job_data().get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            logger.info('Cleaning up upload directory for workspace %s',
                        workspace.name)
            ScaleFile.objects.cleanup_upload_dir(upload_dir, upload_work_dir,
                                                 workspace)

        move_work_dir = os.path.join(upload_work_dir,
                                     'move_source_file_in_workspace')
        if os.path.exists(move_work_dir):
            logger.info(
                'Cleaning up work directory for moving parsed source files')
            ScaleFile.objects.cleanup_move_dir(move_work_dir)
            logger.info('Deleting %s', move_work_dir)
            os.rmdir(move_work_dir)

        delete_normal_job_exe_dir_tree(job_exe.id)
Пример #6
0
    def test_file_in_command(self, mock_retrieve_call, mock_os_mkdir,
                             mock_get_one_file, mock_setup_upload):
        job_exe_id = 1
        job_input_dir = file_system.get_job_exe_input_data_dir(job_exe_id)

        def new_retrieve(arg1, arg2, arg3):
            return {
                'file1_out': [input_file_path],
            }

        input_file_path = os.path.join(job_input_dir, 'file1', 'foo.txt')
        mock_retrieve_call.side_effect = new_retrieve
        mock_get_one_file.side_effect = lambda (arg1): input_file_path
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env(
        )
        job_interface_dict['command_arguments'] = '${file1}'
        job_interface_dict['input_data'] = [{
            'name': 'file1',
            'type': 'file',
            'required': True,
        }]
        job_data_dict['input_data'].append({
            'name': 'file1',
            'file_id': self.file.id,
        })
        job_data_dict['output_data'].append({
            'name': 'file1_out',
            'workspace_id': self.workspace.id,
        })

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = JobEnvironment(job_environment_dict)

        job_interface.perform_pre_steps(job_data, job_environment, job_exe_id)
        job_command_arguments = job_interface.fully_populate_command_argument(
            job_data, job_environment, job_exe_id)
        self.assertEqual(job_command_arguments, input_file_path,
                         'expected a different command from pre_steps')
        mock_setup_upload.assert_called_once_with(
            file_system.get_job_exe_output_data_dir(job_exe_id),
            file_system.get_job_exe_output_work_dir(job_exe_id),
            self.workspace)
Пример #7
0
    def cleanup_job_execution(self, job_exe):
        '''See :meth:`job.execution.job_exe_cleaner.JobExecutionCleaner.cleanup_job_execution`
        '''

        logger.info('Cleaning up a non-system job')

        download_dir = get_job_exe_input_data_dir(job_exe.id)
        download_work_dir = get_job_exe_input_work_dir(job_exe.id)
        upload_dir = get_job_exe_output_data_dir(job_exe.id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe.id)

        logger.info('Cleaning up download directory')
        ScaleFile.objects.cleanup_download_dir(download_dir, download_work_dir)

        logger.info('Cleaning up upload directories')
        workspace_ids = job_exe.job.get_job_data().get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            logger.info('Cleaning up upload directory for workspace %s', workspace.name)
            ScaleFile.objects.cleanup_upload_dir(upload_dir, upload_work_dir, workspace)

        save_job_exe_metrics(job_exe)
Пример #8
0
    def fully_populate_command_argument(self, job_data, job_environment,
                                        job_exe_id):
        '''Return a fully populated command arguments string. If pre-steps are necessary
        (see are_pre_steps_needed), they should be run before this.  populated with information
        from the job_data, job_environment, job_input_dir, and job_output_dir.This gets the properties and
        input_files from the job_data, the shared_resources from the job_environment, and ${input}
        ${output_dir} from the work_dir.
        Throws a :class:`job.configuration.interface.exceptions.InvalidEnvironment` if the necessary
        pre-steps have not been performed

        :param job_data: The job data
        :type job_data: :class:`job.configuration.data.job_data.JobData`
        :param job_environment: The job environment
        :type job_environment: :class:`job.configuration.environment.job_environment.JobEnvironment`
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        '''
        # TODO: don't ignore job_envirnoment
        command_arguments = self.populate_command_argument_properties(job_data)

        job_input_dir = get_job_exe_input_data_dir(job_exe_id)
        job_output_dir = get_job_exe_output_data_dir(job_exe_id)

        for input_data in self.definition['input_data']:
            input_name = input_data['name']
            input_type = input_data['type']
            if input_type == 'file':
                param_dir = os.path.join(job_input_dir, input_name)
                file_path = self._get_one_file_from_directory(param_dir)
                command_arguments = self._replace_command_parameter(
                    command_arguments, input_name, file_path)
            elif input_type == 'files':
                #TODO: verify folder exists
                param_dir = os.path.join(job_input_dir, input_name)
                command_arguments = self._replace_command_parameter(
                    command_arguments, input_name, param_dir)

        command_arguments = self._replace_command_parameter(
            command_arguments, 'job_output_dir', job_output_dir)
        return command_arguments
Пример #9
0
    def test_files_in_command(self, mock_retrieve_call, mock_os_mkdir, mock_setup_upload):
        def new_retrieve(arg1, arg2, arg3):
            return {
                'files1_out': ['/test/file1/foo.txt', '/test/file1/bar.txt'],
            }

        mock_retrieve_call.side_effect = new_retrieve
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env()
        job_interface_dict['command_arguments'] = '${files1}'
        job_interface_dict['input_data'] = [{
            'name': 'files1',
            'type': 'files',
            'required': True,
        }]
        job_data_dict['input_data'].append({
            'name': 'files1',
            'file_ids': [1, 2, 3],
        })
        job_data_dict['output_data'].append({
            'name': 'files1_out',
            'workspace_id': self.workspace.id,
        })

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = JobEnvironment(job_environment_dict)
        job_exe_id = 1
        job_input_dir = file_system.get_job_exe_input_data_dir(job_exe_id)

        job_interface.perform_pre_steps(job_data, job_environment, 1)
        job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id)
        expected_command_arguments = os.path.join(job_input_dir, 'files1')
        self.assertEqual(job_command_arguments, expected_command_arguments,
                         'expected a different command from pre_steps')
        mock_setup_upload.assert_called_once_with(file_system.get_job_exe_output_data_dir(job_exe_id),
                                                  file_system.get_job_exe_output_work_dir(job_exe_id), self.workspace)
Пример #10
0
    def fully_populate_command_argument(self, job_data, job_environment, job_exe_id):
        '''Return a fully populated command arguments string. If pre-steps are necessary
        (see are_pre_steps_needed), they should be run before this.  populated with information
        from the job_data, job_environment, job_input_dir, and job_output_dir.This gets the properties and
        input_files from the job_data, the shared_resources from the job_environment, and ${input}
        ${output_dir} from the work_dir.
        Throws a :class:`job.configuration.interface.exceptions.InvalidEnvironment` if the necessary
        pre-steps have not been performed

        :param job_data: The job data
        :type job_data: :class:`job.configuration.data.job_data.JobData`
        :param job_environment: The job environment
        :type job_environment: :class:`job.configuration.environment.job_environment.JobEnvironment`
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        '''
        # TODO: don't ignore job_envirnoment
        command_arguments = self.populate_command_argument_properties(job_data)

        job_input_dir = get_job_exe_input_data_dir(job_exe_id)
        job_output_dir = get_job_exe_output_data_dir(job_exe_id)

        for input_data in self.definition['input_data']:
            input_name = input_data['name']
            input_type = input_data['type']
            if input_type == 'file':
                param_dir = os.path.join(job_input_dir, input_name)
                file_path = self._get_one_file_from_directory(param_dir)
                command_arguments = self._replace_command_parameter(command_arguments, input_name, file_path)
            elif input_type == 'files':
                #TODO: verify folder exists
                param_dir = os.path.join(job_input_dir, input_name)
                command_arguments = self._replace_command_parameter(command_arguments, input_name, param_dir)

        command_arguments = self._replace_command_parameter(command_arguments, 'job_output_dir', job_output_dir)
        return command_arguments
Пример #11
0
    def setup_job_dir(self, data_files, job_exe_id):
        '''Sets up the directory structure for a job execution and downloads the given files

        :param data_files: Dict with each file parameter name mapping to a bool indicating if the parameter accepts
            multiple files (True) and a relative directory path
        :type data_files: dict of str -> tuple(bool, str)
        :param job_exe_id: The job execution ID
        :type job_exe_id: int
        :returns: Dict with each file parameter name mapping to a list of absolute file paths of the written files
        :rtype: dict of str -> list of str
        '''

        download_dir = get_job_exe_input_data_dir(job_exe_id)
        download_work_dir = get_job_exe_input_work_dir(job_exe_id)
        upload_dir = get_job_exe_output_data_dir(job_exe_id)
        upload_work_dir = get_job_exe_output_work_dir(job_exe_id)

        # Download the job execution input files
        self.retrieve_input_data_files(download_dir, download_work_dir, data_files)

        # Set up upload directories for output workspace
        workspace_ids = self.get_output_workspace_ids()
        for workspace in Workspace.objects.filter(id__in=workspace_ids):
            ScaleFile.objects.setup_upload_dir(upload_dir, upload_work_dir, workspace)