def test_collection_of_resource_uuids(self):
        '''
        To ensure that we don't erase crucial data resources in a workspace
        we have a utility function that scans through the executed operations
        of a workspace and returns a list of the "used" resource UUIDs.

        Here, we test that it returns the expected list
        '''
        # first test one where we expect an empty list-- no resources
        # are used or created:
        f = os.path.join(
            TESTDIR,
            'simple_op_test.json'
        )
        d = read_operation_json(f)
        mock_inputs = {
            'some_string': 'abc'
        }
        result = collect_resource_uuids(d['inputs'], mock_inputs)
        self.assertEqual(result, [])

        # test empty output/input dict:
        mock_outputs = {}
        result = collect_resource_uuids(d['outputs'], mock_outputs)
        self.assertEqual(result, [])

        # test a non-empty return
        f = os.path.join(
            TESTDIR,
            'valid_workspace_operation.json'
        )
        d = read_operation_json(f)
        mock_outputs = {
            'norm_counts': 'abc',
            'dge_table': 'xyz'
        }
        result = collect_resource_uuids(d['outputs'], mock_outputs)
        self.assertEqual(result, ['abc', 'xyz'])

        # test if one of the DataResource outputs was not used (which is fine)
        # and the output value was assigned to None
        mock_outputs = {
            'norm_counts': None,
            'dge_table': 'xyz'
        }
        result = collect_resource_uuids(d['outputs'], mock_outputs)
        self.assertEqual(result, ['xyz'])

        # test if there is some discrepancy in the expected and actual inputs
        # or outputs
        mock_outputs = {
            'junk': 'abc'
        }
        with self.assertRaises(Exception):
            result = collect_resource_uuids(d['outputs'], mock_outputs)
示例#2
0
    def add_dummy_operation(self):
        # use a valid operation spec contained in the test folder
        op_spec_file = os.path.join(settings.BASE_DIR, 'api', 'tests',
                                    'operation_test_files',
                                    'valid_operation.json')
        d = read_operation_json(op_spec_file)
        d['id'] = str(uuid.uuid4())
        d['git_hash'] = 'abcd'
        d['repository_url'] = 'https://github.com/some-repo/'
        d['repo_name'] = 'some-repo'
        op_serializer = validate_operation(d)

        # need to make a directory with dummy files to use the
        # `save_operation` function
        dummy_dir_path = os.path.join('/tmp', 'dummy_op')
        try:
            os.mkdir(dummy_dir_path)
        except OSError as ex:
            if ex.errno == errno.EEXIST:
                pass
            else:
                raise Exception('Failed to create directory at {p}'.format(
                    p=dummy_dir_path))
        op = op_serializer.get_instance()
        op_data = OperationSerializer(op).data
        save_operation(op_data, dummy_dir_path, True)
        OperationDbModel.objects.create(id=op.id, name=op.name)

        # use a valid operation spec contained in the test folder
        op_spec_file = os.path.join(settings.BASE_DIR, 'api', 'tests',
                                    'operation_test_files',
                                    'valid_workspace_operation.json')
        d = read_operation_json(op_spec_file)
        d['id'] = str(uuid.uuid4())
        d['git_hash'] = 'abcd'
        d['repository_url'] = 'https://github.com/some-repo/'
        d['repo_name'] = 'some-repo'
        op_serializer = validate_operation(d)

        # need to make a directory with dummy files to use the
        # `save_operation` function
        dummy_dir_path = os.path.join('/tmp', 'dummy_op2')
        try:
            os.mkdir(dummy_dir_path)
        except OSError as ex:
            if ex.errno == errno.EEXIST:
                pass
            else:
                raise Exception('Failed to create directory at {p}'.format(
                    p=dummy_dir_path))
        op = op_serializer.get_instance()
        op_data = OperationSerializer(op).data
        save_operation(op_data, dummy_dir_path, True)
        OperationDbModel.objects.create(id=op.id, name=op.name)
示例#3
0
    def test_basic_user_inputs(self):
        '''
        This tests that the proper validation happens
        when comparing the user-submitted values and the input
        specifications. Here, the values are all valid.
        '''
        f = os.path.join(TESTDIR, 'sample_for_basic_types.json')
        d = read_operation_json(f)

        # some valid user inputs corresponding to the input specifications
        sample_inputs = {
            'int_type': 10,
            'positive_int_type': 3,
            'nonnegative_int_type': 0,
            'bounded_int_type': 2,
            'float_type': 0.2,
            'bounded_float_type': 0.4,
            'positive_float_type': 0.01,
            'nonnegative_float_type': 0.1,
            'string_type': 'abc',
            'boolean_type': True,
            'option_string_type': 'abc'
        }

        for key, val in sample_inputs.items():
            spec_object = d['inputs'][key]['spec']
            spec_type = spec_object['attribute_type']
            submitted_input_or_output_class = submitted_operation_input_or_output_mapping[
                spec_type]
            submitted_input_or_output_class(self.regular_user_1, None, None,
                                            key, val, spec_object)
示例#4
0
    def test_feature_inputs(self):
        '''
        Tests that the inputs are properly validated when they
        correspond to an input type of `Feature`
        '''
        f = os.path.join(TESTDIR, 'feature_set_test.json')
        d = read_operation_json(f)

        clazz = submitted_operation_input_or_output_mapping['Feature']

        valid_feature_1 = {
            'id': 'foo',
            'attributes': {
                'treatment': {
                    'attribute_type': 'String',
                    'value': 'A'
                }
            }
        }
        valid_feature_2 = {'id': 'foo'}
        invalid_feature = {'attributes': {'treatment': 'A'}}

        # test that we are fine with a valid input:
        x = clazz(self.regular_user_1, None, None, 'xyz', valid_feature_1,
                  d['inputs']['feature_type'])
        y = clazz(self.regular_user_1, None, None, 'xyz', valid_feature_2,
                  d['inputs']['feature_type'])
        self.assertDictEqual(x.get_value(), valid_feature_1)
        self.assertDictEqual(y.get_value(), {'id': 'foo', 'attributes': {}})
        with self.assertRaises(ValidationError):
            clazz(self.regular_user_1, None, None, 'xyz', invalid_feature,
                  d['inputs']['feature_type'])
    def test_user_input_validation(self, mock_get_operation_instance_data):
        '''
        Test that we receive back an appropriate object following
        successful validation. All the inputs below are valid
        '''
        f = os.path.join(
            TESTDIR,
            'sample_for_basic_types_no_default.json'
        )
        d = read_operation_json(f)
        mock_get_operation_instance_data.return_value = d

        # some valid user inputs corresponding to the input specifications
        sample_inputs = {
            'int_no_default_type': 10, 
            'positive_int_no_default_type': 3, 
            'nonnegative_int_no_default_type': 0, 
            'bounded_int_no_default_type': 2, 
            'float_no_default_type':0.2, 
            'bounded_float_no_default_type': 0.4, 
            'positive_float_no_default_type': 0.01, 
            'nonnegative_float_no_default_type': 0.1, 
            'string_no_default_type': 'abc', 
            'boolean_no_default_type': True
        }

        workspaces = Workspace.objects.all()
        if len(workspaces) == 0:
            raise ImproperlyConfigured('Need at least one Workspace to run this test.')
        validate_operation_inputs(self.regular_user_1, 
            sample_inputs, self.db_op, self.workspace)
 def test_list_attr_inputs(self, mock_get_operation_instance_data):
     '''
     Test the case where inputs are of a list type (e.g. a list of strings)
     Check that it all validates as expected
     '''
     # first test one where we expect an empty list-- no resources
     # are used or created:
     f = os.path.join(
         TESTDIR,
         'valid_op_with_list_inputs.json'
     )
     d = read_operation_json(f)
     mock_get_operation_instance_data.return_value = d
     l1 = ['https://foo.com/bar', 'https://foo.com/baz']
     l2 = ['abc', 'def']
     inputs = {
         'link_list': l1,
         'regular_string_list': l2
     }
     ops = OperationDbModel.objects.all()
     op = ops[0]
     result = validate_operation_inputs(self.regular_user_1,
             inputs, op, None)
     self.assertIsNone(result['optional_input'])
     self.assertCountEqual(result['link_list'].get_value(), l1)
     self.assertCountEqual(result['regular_string_list'].get_value(), l2)
    def test_read_operation_json(self, mock_read_local_file):

        # test that a properly formatted file returns 
        # a dict as expected:

        fp = open(self.filepath)
        mock_read_local_file.return_value = fp
        d = read_operation_json(self.filepath)
        self.assertDictEqual(d, self.valid_dict)
示例#8
0
 def convert(self, input_key, user_input, op_dir, staging_dir):
     operation_json_filepath = os.path.join(
         op_dir, settings.OPERATION_SPEC_FILENAME)
     op_spec = read_operation_json(operation_json_filepath)
     spec = op_spec['inputs'][input_key]['spec']
     min_val = spec['min']
     max_val = spec['max']
     f = BoundedFloatAttribute(user_input, min=min_val, max=max_val)
     return {input_key: f.value}
示例#9
0
    def test_defaults_for_non_required_inputs(self):
        '''
        Certain inputs may not be required by the user. In that case, check that
        the defaults are properly entered as the value
        '''
        f = os.path.join(TESTDIR, 'sample_for_basic_types.json')
        d = read_operation_json(f)

        # try to create objects for each- ensure they raise an exception:
        for key, op_input in d['inputs'].items():
            spec_object = op_input['spec']
            spec_type = spec_object['attribute_type']
            submitted_input_or_output_class = submitted_operation_input_or_output_mapping[
                spec_type]
            # can pass None for the workspace arg since we don't use it when checking the basic types
            submitted_input_or_output_class(self.regular_user_1, None, None,
                                            key, None, spec_object)
示例#10
0
    def test_no_default_for_required_param(self, mock_get_operation_instance_data):
        '''
        Test that a missing required parameter triggers a validation error
        '''
        f = os.path.join(
            TESTDIR,
            'required_without_default.json'
        )
        d = read_operation_json(f)
        mock_get_operation_instance_data.return_value = d

        # one input was optional, one required. An empty payload
        # qualifies as a problem since it's missing the required key
        sample_inputs = {}

        with self.assertRaises(ValidationError):
            validate_operation_inputs(self.regular_user_1, 
                sample_inputs, self.db_op, self.workspace)
示例#11
0
    def test_optional_without_default_becomes_none(self, mock_get_operation_instance_data):
        '''
        Generally, Operations with optional inputs should have defaults. However,
        if that is violated, the "input" should be assigned to be None
        '''
        f = os.path.join(
            TESTDIR,
            'optional_without_default.json'
        )
        d = read_operation_json(f)
        mock_get_operation_instance_data.return_value = d

        # the only input is optional, so this is technically fine.
        sample_inputs = {}

        #with self.assertRaises(ValidationError):
        final_inputs = validate_operation_inputs(self.regular_user_1, 
            sample_inputs, self.db_op, self.workspace)
        self.assertIsNone(final_inputs['optional_int_type'])
示例#12
0
    def test_optional_value_overridden(self, mock_get_operation_instance_data):
        '''
        Test that the optional parameter is overridden when given
        '''
        f = os.path.join(
            TESTDIR,
            'required_without_default.json'
        )
        d = read_operation_json(f)
        mock_get_operation_instance_data.return_value = d

        sample_inputs = {
            'required_int_type': 22,
            'optional_int_type': 33
        }

        final_inputs = validate_operation_inputs(self.regular_user_1, 
            sample_inputs, self.db_op, self.workspace)
        self.assertEqual(final_inputs['required_int_type'].submitted_value, 22)
        self.assertEqual(final_inputs['optional_int_type'].submitted_value, 33)
示例#13
0
    def test_optional_boolean_value_filled_by_default(self, mock_get_operation_instance_data):
        '''
        Test that a missing optional boolean parameter gets the default value
        '''
        f = os.path.join(
            TESTDIR,
            'valid_op_with_default_bool.json'
        )
        d = read_operation_json(f)
        mock_get_operation_instance_data.return_value = d

        # one input was optional, one required. An empty payload
        # qualifies as a problem since it's missing the required key
        sample_inputs = {}

        final_inputs = validate_operation_inputs(self.regular_user_1, 
            sample_inputs, self.db_op, self.workspace)
        self.assertEqual(final_inputs['some_boolean'].submitted_value, False)
        expected_default = d['inputs']['some_boolean']['spec']['default']
        self.assertEqual(
            final_inputs['some_boolean'].submitted_value, expected_default)
示例#14
0
    def test_bad_basic_user_inputs(self):
        '''
        This tests that the proper validation happens
        when comparing the user-submitted values and the input
        specifications. Here, the user inputs violate the type
        constraints
        '''
        f = os.path.join(TESTDIR, 'sample_for_basic_types_no_default.json')
        d = read_operation_json(f)

        # some INvalid user inputs corresponding to the input specifications
        sample_inputs = {
            'int_no_default_type': 10.5,
            'positive_int_no_default_type': -3,
            'nonnegative_int_no_default_type': -10,
            'bounded_int_no_default_type': 22222,
            'float_no_default_type': 'abc',
            'bounded_float_no_default_type': 10000.4,
            'positive_float_no_default_type': -10.01,
            'nonnegative_float_no_default_type': -0.1,
            'string_no_default_type': '.*',
            'boolean_no_default_type': 'abc',
            'option_string_no_default_type': 'zzz'
        }

        # try to create objects for each- ensure they raise an exception:
        for key, val in sample_inputs.items():
            spec_object = d['inputs'][key]['spec']
            spec_type = spec_object['attribute_type']
            submitted_input_or_output_class = submitted_operation_input_or_output_mapping[
                spec_type]
            with self.assertRaises(ValidationError):
                # can pass None for the workspace arg since we don't use it when checking the basic types
                # Also pass None for the Operation argument. None of the basic attributes require that.
                submitted_input_or_output_class(self.regular_user_1, None,
                                                None, key, val, spec_object)
示例#15
0
def ingest_dir(staging_dir, op_uuid, git_hash, repo_name, repository_url, overwrite=False):

    # Parse the JSON file defining this new Operation:
    operation_json_filepath = os.path.join(staging_dir, settings.OPERATION_SPEC_FILENAME)
    j = read_operation_json(operation_json_filepath)

    # extra parameters for an Operation that are not required
    # to be specified by the developer who wrote the `Operation`
    add_required_keys_to_operation(j, id=op_uuid,
        git_hash = git_hash,
        repository_url = repository_url,
        repo_name = repo_name
    )

    # attempt to validate the data for the operation:
    try:
        op_serializer = validate_operation(j)
    except ValidationError as ex:
        logger.info('A validation error was raised when validating'
            ' the information parsed from {path}. Exception was: {ex}.\n '
            'Full info was: {j}'.format(
                path = operation_json_filepath,
                j = json.dumps(j, indent=2),
                ex = ex
            )
        )
        raise ex
    except Exception as ex:
        logger.info('An unexpected error was raised when validating'
            ' the information parsed from {path}. Exception was: {ex}.\n '
            'Full info was: {j}'.format(
                path = operation_json_filepath,
                j = json.dumps(j, indent=2),
                ex = ex
            )
        )
        raise ex

    # get an instance of the Operation (the data structure, NOT the database model)
    op = op_serializer.get_instance()
    op_data = op.to_dict()
    #op_data = OperationSerializer(op).data
    logging.info('After parsing operation spec, we have: {spec}'.format(spec=op_data))

    # check that the required files, etc. are there for the particular run mode:
    check_required_files(op_data, staging_dir)

    # handle any operation-specific resources/files:
    handle_operation_specific_resources(op_data, staging_dir, op_uuid)

    # prepare any elements required for running the operation:
    prepare_operation(op_data, staging_dir, repo_name, git_hash)

    # save the operation in a final location:
    save_operation(op_data, staging_dir, overwrite)

    # update the database instance.
    try:
        o = OperationDbModel.objects.get(id=op.id)
        o.name = op.name
        o.active = True
        o.successful_ingestion = True
        o.workspace_operation = op_data['workspace_operation']
        o.save()
    except OperationDbModel.DoesNotExist:
        logger.error('Could not find the Operation corresponding to'
            ' id={u}'.format(u=op_uuid)
        )
        raise Exception('Encountered issue when trying update an Operation'
            ' database instance after ingesting from repository.'
        )
示例#16
0
    def test_check_for_resource_operations_case3(self, mock_get_operation_instance_data):
        '''
        When removing a Resource from a Workspace, we need to ensure
        we are not removing a file that has been used in one or more 
        ExecutedOperations.

        Below, we check where a file HAS been used, but the analysis
        failed. Hence, it's safe to remove since it was not used to
        create anything.
        '''
        # need to create an ExecutedOperation that is based on a known
        # Operation and part of an existing workspace. Also need to ensure
        # that there is a Resource that is being used in that Workspace

        all_workspaces = Workspace.objects.all()
        workspace_with_resource = None
        for w in all_workspaces:
            if len(w.resources.all()) > 0:
                workspace_with_resource = w
        if workspace_with_resource is None:
            raise ImproperlyConfigured('Need at least one Workspace that has'
                 ' at least a single Resource.'
            )

        ops = Operation.objects.all()
        if len(ops) > 0:
            op = ops[0]
        else:
            raise ImproperlyConfigured('Need at least one Operation'
                ' to use for this test'
            )
        
        f = os.path.join(
            TESTDIR,
            'valid_workspace_operation.json'
        )
        op_data = read_operation_json(f)
        mock_get_operation_instance_data.return_value = op_data
        executed_op_pk = uuid.uuid4()
        # the op_data we get from above has two outputs, one of which
        # is a DataResource. Just to be sure everything is consistent
        # between the spec and our mocked inputs below, we do this assert:
        input_keyset = list(op_data['inputs'].keys())
        self.assertCountEqual(input_keyset, ['count_matrix','p_val'])

        mock_used_resource = workspace_with_resource.resources.all()[0]
        mock_validated_inputs = {
            'count_matrix': str(mock_used_resource.pk), 
            'p_val': 0.01
        }
        ex_op = WorkspaceExecutedOperation.objects.create(
            id=executed_op_pk,
            owner = self.regular_user_1, 
            workspace = workspace_with_resource,
            job_name = 'abc',
            inputs = mock_validated_inputs,
            outputs = {},
            operation = op,
            mode = op_data['mode'],
            status = ExecutedOperation.COMPLETION_ERROR,
            job_failed = True
        )
        was_used = check_for_resource_operations(mock_used_resource, workspace_with_resource)
        self.assertFalse(was_used)
示例#17
0
    def test_observation_set_inputs(self):
        '''
        Tests that the inputs are properly validated when they
        correspond to an input type of `ObservationSet`
        '''
        f = os.path.join(TESTDIR, 'obs_set_test.json')
        d = read_operation_json(f)

        clazz = submitted_operation_input_or_output_mapping['ObservationSet']

        valid_obs_1 = {
            'id': 'foo',
            'attributes': {
                'treatment': {
                    'attribute_type': 'String',
                    'value': 'A'
                }
            }
        }
        valid_obs_2 = {
            'id': 'bar',
            'attributes': {
                'treatment': {
                    'attribute_type': 'String',
                    'value': 'B'
                }
            }
        }

        valid_obs_set = {
            'multiple': True,
            'elements': [valid_obs_1, valid_obs_2]
        }

        # test that we are fine with a valid input:
        x = clazz(self.regular_user_1, None, None, 'xyz', valid_obs_set,
                  d['inputs']['obs_set_type'])
        val = x.get_value()
        self.assertEqual(val['multiple'], valid_obs_set['multiple'])
        self.assertCountEqual(val['elements'], valid_obs_set['elements'])

        # an empty element set is technically valid
        empty_obs_set = {'multiple': True, 'elements': []}
        x = clazz(self.regular_user_1, None, None, 'xyz', empty_obs_set,
                  d['inputs']['obs_set_type'])
        val = x.get_value()
        self.assertCountEqual(val['elements'], [])

        invalid_obs_set = {
            'multiple': False,
            'elements': [valid_obs_1, valid_obs_2]
        }
        # the >1 elements coupled with multiple=False makes this an invalid ObservationSet
        with self.assertRaises(ValidationError):
            clazz(self.regular_user_1, None, None, 'xyz', invalid_obs_set,
                  d['inputs']['obs_set_type'])

        valid_obs_set = {
            'multiple':
            True,
            'elements': [
                valid_obs_1,
                {
                    'id': 'baz'
                }  # missing the 'attributes' key, but that is OK
            ]
        }
        clazz(self.regular_user_1, None, None, 'xyz', valid_obs_set,
              d['inputs']['obs_set_type'])

        invalid_obs_set = {
            'multiple':
            True,
            'elements': [
                valid_obs_1,
                {}  # missing the 'id' key, which is required
            ]
        }
        # missing 'id' causes the nested Observation to be invalid
        with self.assertRaises(ValidationError):
            clazz(self.regular_user_1, None, None, 'xyz', invalid_obs_set,
                  d['inputs']['obs_set_type'])
示例#18
0
    def test_feature_set_inputs(self):
        '''
        Tests that the inputs are properly validated when they
        correspond to an input type of `FeatureSet`
        '''
        f = os.path.join(TESTDIR, 'feature_set_test.json')
        d = read_operation_json(f)

        clazz = submitted_operation_input_or_output_mapping['FeatureSet']

        valid_feature_1 = {'id': 'foo', 'attributes': {}}
        valid_feature_2 = {'id': 'bar', 'attributes': {}}

        valid_feature_set = {
            'multiple': True,
            'elements': [valid_feature_1, valid_feature_2]
        }

        # test that we are fine with a valid input:
        x = clazz(self.regular_user_1, None, None, 'xyz', valid_feature_set,
                  d['inputs']['feature_set_type'])
        val = x.get_value()
        self.assertEqual(val['multiple'], valid_feature_set['multiple'])
        self.assertCountEqual(val['elements'], valid_feature_set['elements'])

        # this featureset has zero elements. It's technically valid
        empty_feature_set = {'multiple': True, 'elements': []}
        x = clazz(self.regular_user_1, None, None, 'xyz', empty_feature_set,
                  d['inputs']['feature_set_type'])
        val = x.get_value()
        self.assertCountEqual(val['elements'], [])

        invalid_feature_set = {
            'multiple': False,
            'elements': [valid_feature_1, valid_feature_2]
        }
        # the >1 elements coupled with multiple=False makes this an invalid FeatureSet
        with self.assertRaises(ValidationError):
            clazz(self.regular_user_1, None, None, 'xyz', invalid_feature_set,
                  d['inputs']['feature_set_type'])

        valid_feature_set2 = {
            'multiple':
            True,
            'elements': [
                valid_feature_1,
                {
                    'id': 'bar'
                }  # missing the 'attributes' key, but that is OK
            ]
        }
        x = clazz(self.regular_user_1, None, None, 'xyz', valid_feature_set2,
                  d['inputs']['feature_set_type'])
        # note that we compare against the original valid_feature_set.
        # This is because our methods add the empty 'attributes' key.
        # Therefore, a strict comparison of valid_feature_set2 would not be possible
        # as we designed THAT dict to be missing the 'attributes' key.
        val = x.get_value()
        self.assertEqual(val['multiple'], valid_feature_set['multiple'])
        self.assertCountEqual(val['elements'], valid_feature_set['elements'])

        invalid_feature_set = {
            'multiple':
            True,
            'elements': [
                valid_feature_1,
                {}  # missing the 'id' key, which is required
            ]
        }
        # missing 'id' causes the nested Feature to be invalid
        with self.assertRaises(ValidationError):
            clazz(self.regular_user_1, None, None, 'xyz', invalid_feature_set,
                  d['inputs']['feature_set_type'])