def test_nullable_observation_set_serializer(self): ''' Depending on our needs, we can permit attributes of Observation instances to have null values. Therefore, we created the NullableObservationSet class which holds a set (`elements`) of NullableObservations. Check that it works and that the original, non-nullable implementation rejects the null value ''' data = { 'multiple': True, 'elements': [{ 'id': 'foo', 'attributes': { 'keyA': { 'attribute_type': 'String', 'value': None } } }, { 'id': 'bar', 'attributes': { 'keyA': { 'attribute_type': 'String', 'value': 'abc' } } }] } s = ObservationSetSerializer(data=data) self.assertFalse(s.is_valid()) s = NullableObservationSetSerializer(data=data) self.assertTrue(s.is_valid())
def __init__(self, user, operation, workspace, key, submitted_value, input_or_output_spec): super().__init__(user, operation, workspace, key, submitted_value, input_or_output_spec) # verify that the ObservationSet is valid by using the serializer obs_s = ObservationSetSerializer(data=self.submitted_value) try: obs_s.is_valid(raise_exception=True) except ValidationError as ex: raise ValidationError({key: ex.detail}) # set the instance: self.instance = obs_s.get_instance()
def test_metadata_correct_case2(self): ''' Typically, the metadata is collected following a successful validation. Do that here ''' m = IntegerMatrix() resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv') metadata = m.extract_metadata(resource_path) # Parse the test file to ensure we extracted the right content. line = open(resource_path).readline() contents = line.strip().split('\t') samplenames = contents[1:] obs_list = [Observation(x) for x in samplenames] gene_list = [] for i, line in enumerate(open(resource_path)): if i > 0: g = line.split('\t')[0] gene_list.append(g) feature_list = [Feature(x) for x in gene_list] obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY]) self.assertEqual(feature_set, metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_metadata_correct(self): resource_path = os.path.join(TESTDIR, 'three_column_annotation.tsv') t = AnnotationTable() column_dict = {} obs_list = [] for i, line in enumerate(open(resource_path)): if i == 0: contents = line.strip().split('\t') for j, c in enumerate(contents[1:]): column_dict[j] = c else: contents = line.strip().split('\t') samplename = contents[0] attr_dict = {} for j, v in enumerate(contents[1:]): attr = UnrestrictedStringAttribute(v) attr_dict[column_dict[j]] = attr obs = Observation(samplename, attr_dict) obs_list.append(obs) expected_obs_set = ObservationSetSerializer( ObservationSet(obs_list)).data metadata = t.extract_metadata(resource_path, 'tsv') self.assertEqual(metadata[OBSERVATION_SET_KEY], expected_obs_set) self.assertIsNone(metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_metadata_correct_case2(self): ''' Typically, the metadata is collected following a successful validation. However, here we don't validate. Check that it goes and collects the table in the process ''' m = Matrix() resource_path = os.path.join(TESTDIR, 'test_matrix.tsv') metadata = m.extract_metadata(resource_path, 'tsv') # Parse the test file to ensure we extracted the right content. line = open(resource_path).readline() contents = line.strip().split('\t') samplenames = contents[1:] obs_list = [Observation(x) for x in samplenames] gene_list = [] for i, line in enumerate(open(resource_path)): if i > 0: g = line.split('\t')[0] gene_list.append(g) feature_list = [Feature(x) for x in gene_list] obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY]) # Commented out when removed the feature metadata, as it was causing database # issues due to the size of the json object. #self.assertEqual(feature_set, metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def extract_metadata(self, resource_path, parent_op_pk=None): super().extract_metadata(resource_path, parent_op_pk) # the FeatureSet comes from the rows: f_set = FeatureSet([Feature(x) for x in self.table.index]) self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer( f_set).data # the ObservationSet comes from the cols: o_set = ObservationSet([Observation(x) for x in self.table.columns]) self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer( o_set).data return self.metadata
def extract_metadata(self, resource_path, parent_op_pk=None): ''' When we extract the metadata from an AnnotationTable, we expect the Observation instances to be the rows. Additional columns specify attributes of each Observation, which we incorporate ''' super().extract_metadata(resource_path, parent_op_pk) observation_list = super().prep_metadata(Observation) o_set = ObservationSet(observation_list) self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer( o_set).data return self.metadata
def extract_metadata(self, resource_path, file_extension, parent_op_pk=None): super().extract_metadata(resource_path, file_extension, parent_op_pk) # Note: removed the addition of FeatureSets to the metadata as it was causing # issues with large json objects being inserted into the database. # the FeatureSet comes from the rows: # f_set = FeatureSet([Feature(x) for x in self.table.index]) # self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(f_set).data # the ObservationSet comes from the cols: o_set = ObservationSet([Observation(x) for x in self.table.columns]) self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer( o_set).data return self.metadata