Пример #1
0
    def test_valid_schemas(self):
        """Test that the schema creation APIs properly validate the schemas
        as they are constructed.
        """
        good_schema = DataEntryMetadataSchema()
        good_schema.addAttr(Double("attr1"))
        good_schema.addAttr(String("attr2"))

        bad_schema = DataEntryMetadataSchema()
        self.assertRaises(ValueError, bad_schema.addAttr, str)
Пример #2
0
 def test_valid_schemas(self):
     """Test that the schema creation APIs properly validate the schemas
     as they are constructed.
     """
     good_schema = DataEntryMetadataSchema()
     good_schema.addAttr(Double("attr1"))
     good_schema.addAttr(String("attr2"))
     
     bad_schema = DataEntryMetadataSchema()
     self.assertRaises(ValueError, bad_schema.addAttr, str)        
Пример #3
0
    def test_data_metadata(self):
        work = self.ingester_platform.createUnitOfWork()
        schema = DataEntryMetadataSchema("Quality Assurance")
        schema.addAttr(Double("value"))
        schema.addAttr(String("description"))
        work.post(schema)
        work.commit()
        self.schemas.append(schema)

        ingested_schema = self.ingester_platform.getSchema(schema.id)
        self.compare_schema_attrs(ingested_schema.attrs, schema.attrs)
        self.assertEquals(ingested_schema.name, schema.name)
Пример #4
0
 def test_schema_attributes(self):
     schema = DataEntryMetadataSchema()
     schema.addAttr(Double("one"))
     schema.addAttr(String("two"))
     self.assertEquals("one", schema.attrs["one"].name)
     self.assertEquals("two", schema.attrs["two"].name)
     self.assertTrue(isinstance(schema.attrs["one"], Double))
     self.assertTrue(isinstance(schema.attrs["two"], String))
     
     schema_dict = self.marshaller.obj_to_dict(schema)
     
     schema_obj = self.marshaller.dict_to_obj(schema_dict)
     
     self.assertEquals("one", schema_obj.attrs["one"].name)
     self.assertEquals("two", schema_obj.attrs["two"].name)
     self.assertTrue(isinstance(schema_obj.attrs["one"], Double))
     self.assertTrue(isinstance(schema_obj.attrs["two"], String))
Пример #5
0
    def test_schema_attributes(self):
        schema = DataEntryMetadataSchema()
        schema.addAttr(Double("one"))
        schema.addAttr(String("two"))
        self.assertEquals("one", schema.attrs["one"].name)
        self.assertEquals("two", schema.attrs["two"].name)
        self.assertTrue(isinstance(schema.attrs["one"], Double))
        self.assertTrue(isinstance(schema.attrs["two"], String))

        schema_dict = self.marshaller.obj_to_dict(schema)

        schema_obj = self.marshaller.dict_to_obj(schema_dict)

        self.assertEquals("one", schema_obj.attrs["one"].name)
        self.assertEquals("two", schema_obj.attrs["two"].name)
        self.assertTrue(isinstance(schema_obj.attrs["one"], Double))
        self.assertTrue(isinstance(schema_obj.attrs["two"], String))
Пример #6
0
    def test_data_metadata(self):
        work = self.ingester_platform.createUnitOfWork()
        schema = DataEntryMetadataSchema("Quality Assurance")
        schema.addAttr(Double("value"))
        schema.addAttr(String("description"))
        work.post(schema)
        work.commit()
        self.schemas.append(schema)

        ingested_schema = self.ingester_platform.getSchema(schema.id)
        self.compare_schema_attrs(ingested_schema.attrs, schema.attrs)
        self.assertEquals(ingested_schema.name, schema.name)
Пример #7
0
    def test_api_usage(self):
#       User data that is created by filling out the provisioning interface workflow steps.
        #   General
        title = "Test project"
        data_manager = "A Person"
        project_lead = "Another Person"

        #   Metadata
        project_region = Region("Test Region", ((1, 1), (2, 2),(2,1), (1,1)))

        #   Methods & Datasets
        loc1 = Location(11.0, 11.0, "Test Site", 100)
        loc2 = Location(11.0, 11.0, "Test Site", 100)
        loc3 = Location(12.0, 11.0, "Test Site", 100)

        temp_work = self.ingester_platform.createUnitOfWork()
        temperature_schema = DataEntrySchema("Test Temp Schema")
        temperature_schema.addAttr(Double("temperature"))
        temp_work.post(temperature_schema)
        temp_work.commit()

        air_temperature_schema = DataEntrySchema("Air Temp Schema")
        air_temperature_schema.extends = [temperature_schema.id]
        air_temperature_schema = self.ingester_platform.post(air_temperature_schema)

        second_level_inheritence_schema = DataEntrySchema("Second Inheritence")
        second_level_inheritence_schema.extends = [air_temperature_schema.id]
        second_level_inheritence_schema = self.ingester_platform.post(second_level_inheritence_schema)

        # Check the name is set
        temperature_schema_1 = self.ingester_platform.getSchema(temperature_schema.id)
        self.assertIsNotNone(temperature_schema.name)
        self.assertEquals(temperature_schema.name, temperature_schema_1.name)
        
        file_schema = DataEntrySchema()
        file_schema.addAttr(FileDataType("file"))
        file_schema = self.ingester_platform.post(file_schema)

        dataset1 = Dataset(location=None, schema=temperature_schema.id)
        dataset2 = Dataset(location=None, schema=file_schema.id, data_source=PullDataSource("http://test.com", "file_handle", processing_script="file://d:/processing_scripts/awsome_processing.py"))

#        dataset3 = Dataset(None, file_schema, PullDataSource("http://test.com", "file_handle"), CustomSampling("file://d:/sampling_scripts/awsome_sampling.py"), "file://d:/processing_scripts/awsome_processing.py")

        self.cleanup_files.append(dataset2.data_source.processing_script)
#        self.cleanup_files.push(dataset3.sampling.script)
#        self.cleanup_files.push(dataset3.processing_script)

#       Provisioning admin accepts the submitted project
        work = self.ingester_platform.createUnitOfWork()

        work.post(project_region)    # Save the region

        loc1.region = project_region.id                  # Set the datasets location to use the projects region
        work.post(loc1)                        # Save the location
        dataset1.location = loc1.id                            # Set the datasets location
        work.post(dataset1)                # Save the dataset

        loc2.region = project_region.id
        work.post(loc2)
        dataset2.location = loc2.id
        work.post(dataset2)

        work.commit()

        # Region, location and dataset id's will be saved to the project within the provisioning system in some way


#       User searches for datasets

        # TODO: Nigel? - Define searching api
        found_dataset_id = dataset1.id                  # The dataset that has an extended file schema

#       User manually enters data
        timestamp = datetime.datetime.now()
        data_entry_1 = DataEntry(found_dataset_id, timestamp)
        data_entry_1['temperature'] = 27.8                # Add the extended schema items
        data_entry_1 = self.ingester_platform.post(data_entry_1)
        self.assertIsNotNone(data_entry_1.id)

        timestamp2 = timestamp + datetime.timedelta(seconds=1)
        data_entry_2 = DataEntry(found_dataset_id, timestamp2)
        data_entry_2['temperature'] = 27.8                # Add the extended schema items
        data_entry_2 = self.ingester_platform.post(data_entry_2)
        
        self.assertEquals(2, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id), 0, 10).results))
        result = self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id), 0, 1)
        self.assertEquals(2, result.count)
        self.assertEquals(1, len(result.results))
        self.assertEquals(1, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id), 1, 1).results))
        
        result = self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id), 2, 1)
        self.assertEquals(0, len(result.results))
                
        self.assertEquals(0, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id, 
                                 end_time=timestamp-datetime.timedelta(seconds=60)), 0, 10).results))
        self.assertEquals(0, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id, 
                                 start_time=timestamp+datetime.timedelta(seconds=60)), 0, 10).results))
        self.assertEquals(2, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id, 
                                 start_time=timestamp-datetime.timedelta(seconds=60),
                                 end_time=timestamp+datetime.timedelta(seconds=60)), 0, 10).results))

        work = self.ingester_platform.createUnitOfWork()
        data_entry_3 = DataEntry(dataset2.id, datetime.datetime.now())
        data_entry_3['file'] = FileObject(f_handle=open(os.path.join(
                    os.path.dirname(jcudc24ingesterapi.__file__), "tests/test_ingest.xml")), 
                    mime_type="text/xml")
        work.post(data_entry_3)
        work.commit()
        self.assertIsNotNone(data_entry_3.id)
        
        f_in = self.ingester_platform.getDataEntryStream(dataset2.id, data_entry_3.id, "file")
        self.assertIsNotNone(f_in)
        data = f_in.read()
        f_in.close()
        self.assertLess(0, len(data), "Expected data in file")

#       User enters quality assurance metadata
        quality_metadata_schema = DatasetMetadataSchema()
        quality_metadata_schema.addAttr(String("unit"))
        quality_metadata_schema.addAttr(String("description"))
        quality_metadata_schema.addAttr(Double("value"))
        quality_metadata_schema = self.ingester_platform.post(quality_metadata_schema)
        
        entered_metadata = DatasetMetadataEntry(data_entry_1.dataset, quality_metadata_schema.id)
        entered_metadata['unit'] = "%"
        entered_metadata['description'] = "Percent error"
        entered_metadata['value'] = 0.98

        entered_metadata = self.ingester_platform.post(entered_metadata)
        
        # Now find that metadata
        results = self.ingester_platform.search(DatasetMetadataSearchCriteria(data_entry_1.dataset),0 , 10).results
        self.assertEqual(1, len(results))
        
        
        data_entry_md_schema = DataEntryMetadataSchema("test")
        data_entry_md_schema.addAttr(String("description"))
        data_entry_md_schema.addAttr(Double("value"))
        data_entry_md_schema = self.ingester_platform.post(data_entry_md_schema)
        calibration = DataEntryMetadataEntry(metadata_schema_id=int(data_entry_md_schema.id), dataset_id=dataset2.id, object_id=data_entry_3.id)
        calibration["description"] = "Test"
        calibration["value"] = 1.2

        calibration2 = DataEntryMetadataEntry(metadata_schema_id=int(data_entry_md_schema.id), dataset_id=dataset2.id, object_id=data_entry_3.id)
        calibration2["description"] = "Test2"
        calibration2["value"] = 2.3
        calibration2 = self.ingester_platform.post(calibration2)

        calibrations = self.ingester_platform.search(DataEntryMetadataSearchCriteria(int(81), int(3648)), offset=0, limit=1000)
        self.assertEquals(1, len(calibrations.results))
        self.assertEquals(calibrations.results[0].schema_id, data_entry_md_schema.id)

        self.ingester_platform.delete(calibration2)
        self.ingester_platform.delete(calibration)
        self.ingester_platform.delete(data_entry_md_schema)

#       User changes sampling rate
# FIXME: This test is going to be changed to be done by editing the dataset
#        sampling_rate_changed = Metadata(dataset1.id, type(dataset1), SampleRateMetadataSchema())
#        sampling_rate_changed.change_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
#        sampling_rate_changed.sampling = CustomSampling("file://d:/sampling_scripts/awsome_sampling.py")
#
#        try:
#            sampling_rate_changed = self.ingester_platform.post(sampling_rate_changed)
#            assert(sampling_rate_changed.metadata_id is None, "Sampling rate change failed")
#        except:
#            assert(True, "Sampling rate change failed")

#       User wants some random metadata specific to their project
# FIXME: Not sure what use case this is trying to demonstrate
#        random_metadata_schema =  DataEntryMetadataSchema()
#        random_metadata_schema.addAttr('random_field', Double())

#        random_metadata = Metadata(data_entry.data_entry_id, type(data_entry), random_metadata_schema)
#        random_metadata.random_field = 1.5

#        try:
#            random_metadata = self.ingester_platform.post(random_metadata)
#            assert(random_metadata.metadata_id is None, "random_metadata failed")
#        except:
#            assert(True, "random_metadata failed")

#       User changes the data source of the dataset
        new_data_source = PullDataSource("http://test.com/new_data", "file_handle")
        dataset1.data_source = new_data_source
        dataset1 = self.ingester_platform.post(dataset1)
        self.assertNotEqual(None, dataset1)

#       External, 3rd party searches for data
        # TODO: external 3rd parties should be able to use the api to get data without authentication
        # TODO: I'm not sure exactly how this should work, but the search api could be open access (need spam limitations or something?)

#       Project is disabled/finished
        work = self.ingester_platform.createUnitOfWork()
        work.disable(dataset1.id)
        work.disable(dataset2.id)
        work.commit()

#       Project is obsolete and data should be deleted
        work = self.ingester_platform.createUnitOfWork()
        work.delete(dataset1.id)
        work.delete(dataset2.id)
        work.commit()
Пример #8
0
    def test_api_usage(self):
        #       User data that is created by filling out the provisioning interface workflow steps.
        #   General
        title = "Test project"
        data_manager = "A Person"
        project_lead = "Another Person"

        #   Metadata
        project_region = Region("Test Region",
                                ((1, 1), (2, 2), (2, 1), (1, 1)))

        #   Methods & Datasets
        loc1 = Location(11.0, 11.0, "Test Site", 100)
        loc2 = Location(11.0, 11.0, "Test Site", 100)
        loc3 = Location(12.0, 11.0, "Test Site", 100)

        temp_work = self.ingester_platform.createUnitOfWork()
        temperature_schema = DataEntrySchema("Test Temp Schema")
        temperature_schema.addAttr(Double("temperature"))
        temp_work.post(temperature_schema)
        temp_work.commit()

        air_temperature_schema = DataEntrySchema("Air Temp Schema")
        air_temperature_schema.extends = [temperature_schema.id]
        air_temperature_schema = self.ingester_platform.post(
            air_temperature_schema)

        second_level_inheritence_schema = DataEntrySchema("Second Inheritence")
        second_level_inheritence_schema.extends = [air_temperature_schema.id]
        second_level_inheritence_schema = self.ingester_platform.post(
            second_level_inheritence_schema)

        # Check the name is set
        temperature_schema_1 = self.ingester_platform.getSchema(
            temperature_schema.id)
        self.assertIsNotNone(temperature_schema.name)
        self.assertEquals(temperature_schema.name, temperature_schema_1.name)

        file_schema = DataEntrySchema()
        file_schema.addAttr(FileDataType("file"))
        file_schema = self.ingester_platform.post(file_schema)

        dataset1 = Dataset(location=None, schema=temperature_schema.id)
        dataset2 = Dataset(
            location=None,
            schema=file_schema.id,
            data_source=PullDataSource(
                "http://test.com",
                "file_handle",
                processing_script=
                "file://d:/processing_scripts/awsome_processing.py"))

        #        dataset3 = Dataset(None, file_schema, PullDataSource("http://test.com", "file_handle"), CustomSampling("file://d:/sampling_scripts/awsome_sampling.py"), "file://d:/processing_scripts/awsome_processing.py")

        self.cleanup_files.append(dataset2.data_source.processing_script)
        #        self.cleanup_files.push(dataset3.sampling.script)
        #        self.cleanup_files.push(dataset3.processing_script)

        #       Provisioning admin accepts the submitted project
        work = self.ingester_platform.createUnitOfWork()

        work.post(project_region)  # Save the region

        loc1.region = project_region.id  # Set the datasets location to use the projects region
        work.post(loc1)  # Save the location
        dataset1.location = loc1.id  # Set the datasets location
        work.post(dataset1)  # Save the dataset

        loc2.region = project_region.id
        work.post(loc2)
        dataset2.location = loc2.id
        work.post(dataset2)

        work.commit()

        # Region, location and dataset id's will be saved to the project within the provisioning system in some way

        #       User searches for datasets

        # TODO: Nigel? - Define searching api
        found_dataset_id = dataset1.id  # The dataset that has an extended file schema

        #       User manually enters data
        timestamp = datetime.datetime.now()
        data_entry_1 = DataEntry(found_dataset_id, timestamp)
        data_entry_1['temperature'] = 27.8  # Add the extended schema items
        data_entry_1 = self.ingester_platform.post(data_entry_1)
        self.assertIsNotNone(data_entry_1.id)

        timestamp2 = timestamp + datetime.timedelta(seconds=1)
        data_entry_2 = DataEntry(found_dataset_id, timestamp2)
        data_entry_2['temperature'] = 27.8  # Add the extended schema items
        data_entry_2 = self.ingester_platform.post(data_entry_2)

        self.assertEquals(
            2,
            len(
                self.ingester_platform.search(
                    DataEntrySearchCriteria(found_dataset_id), 0, 10).results))
        result = self.ingester_platform.search(
            DataEntrySearchCriteria(found_dataset_id), 0, 1)
        self.assertEquals(2, result.count)
        self.assertEquals(1, len(result.results))
        self.assertEquals(
            1,
            len(
                self.ingester_platform.search(
                    DataEntrySearchCriteria(found_dataset_id), 1, 1).results))

        result = self.ingester_platform.search(
            DataEntrySearchCriteria(found_dataset_id), 2, 1)
        self.assertEquals(0, len(result.results))

        self.assertEquals(
            0,
            len(
                self.ingester_platform.search(
                    DataEntrySearchCriteria(found_dataset_id,
                                            end_time=timestamp -
                                            datetime.timedelta(seconds=60)), 0,
                    10).results))
        self.assertEquals(
            0,
            len(
                self.ingester_platform.search(
                    DataEntrySearchCriteria(found_dataset_id,
                                            start_time=timestamp +
                                            datetime.timedelta(seconds=60)), 0,
                    10).results))
        self.assertEquals(
            2,
            len(
                self.ingester_platform.search(
                    DataEntrySearchCriteria(
                        found_dataset_id,
                        start_time=timestamp - datetime.timedelta(seconds=60),
                        end_time=timestamp + datetime.timedelta(seconds=60)),
                    0, 10).results))

        work = self.ingester_platform.createUnitOfWork()
        data_entry_3 = DataEntry(dataset2.id, datetime.datetime.now())
        data_entry_3['file'] = FileObject(f_handle=open(
            os.path.join(os.path.dirname(jcudc24ingesterapi.__file__),
                         "tests/test_ingest.xml")),
                                          mime_type="text/xml")
        work.post(data_entry_3)
        work.commit()
        self.assertIsNotNone(data_entry_3.id)

        f_in = self.ingester_platform.getDataEntryStream(
            dataset2.id, data_entry_3.id, "file")
        self.assertIsNotNone(f_in)
        data = f_in.read()
        f_in.close()
        self.assertLess(0, len(data), "Expected data in file")

        #       User enters quality assurance metadata
        quality_metadata_schema = DatasetMetadataSchema()
        quality_metadata_schema.addAttr(String("unit"))
        quality_metadata_schema.addAttr(String("description"))
        quality_metadata_schema.addAttr(Double("value"))
        quality_metadata_schema = self.ingester_platform.post(
            quality_metadata_schema)

        entered_metadata = DatasetMetadataEntry(data_entry_1.dataset,
                                                quality_metadata_schema.id)
        entered_metadata['unit'] = "%"
        entered_metadata['description'] = "Percent error"
        entered_metadata['value'] = 0.98

        entered_metadata = self.ingester_platform.post(entered_metadata)

        # Now find that metadata
        results = self.ingester_platform.search(
            DatasetMetadataSearchCriteria(data_entry_1.dataset), 0, 10).results
        self.assertEqual(1, len(results))

        data_entry_md_schema = DataEntryMetadataSchema("test")
        data_entry_md_schema.addAttr(String("description"))
        data_entry_md_schema.addAttr(Double("value"))
        data_entry_md_schema = self.ingester_platform.post(
            data_entry_md_schema)
        calibration = DataEntryMetadataEntry(metadata_schema_id=int(
            data_entry_md_schema.id),
                                             dataset_id=dataset2.id,
                                             object_id=data_entry_3.id)
        calibration["description"] = "Test"
        calibration["value"] = 1.2

        calibration2 = DataEntryMetadataEntry(metadata_schema_id=int(
            data_entry_md_schema.id),
                                              dataset_id=dataset2.id,
                                              object_id=data_entry_3.id)
        calibration2["description"] = "Test2"
        calibration2["value"] = 2.3
        calibration2 = self.ingester_platform.post(calibration2)

        calibrations = self.ingester_platform.search(
            DataEntryMetadataSearchCriteria(int(81), int(3648)),
            offset=0,
            limit=1000)
        self.assertEquals(1, len(calibrations.results))
        self.assertEquals(calibrations.results[0].schema_id,
                          data_entry_md_schema.id)

        self.ingester_platform.delete(calibration2)
        self.ingester_platform.delete(calibration)
        self.ingester_platform.delete(data_entry_md_schema)

        #       User changes sampling rate
        # FIXME: This test is going to be changed to be done by editing the dataset
        #        sampling_rate_changed = Metadata(dataset1.id, type(dataset1), SampleRateMetadataSchema())
        #        sampling_rate_changed.change_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
        #        sampling_rate_changed.sampling = CustomSampling("file://d:/sampling_scripts/awsome_sampling.py")
        #
        #        try:
        #            sampling_rate_changed = self.ingester_platform.post(sampling_rate_changed)
        #            assert(sampling_rate_changed.metadata_id is None, "Sampling rate change failed")
        #        except:
        #            assert(True, "Sampling rate change failed")

        #       User wants some random metadata specific to their project
        # FIXME: Not sure what use case this is trying to demonstrate
        #        random_metadata_schema =  DataEntryMetadataSchema()
        #        random_metadata_schema.addAttr('random_field', Double())

        #        random_metadata = Metadata(data_entry.data_entry_id, type(data_entry), random_metadata_schema)
        #        random_metadata.random_field = 1.5

        #        try:
        #            random_metadata = self.ingester_platform.post(random_metadata)
        #            assert(random_metadata.metadata_id is None, "random_metadata failed")
        #        except:
        #            assert(True, "random_metadata failed")

        #       User changes the data source of the dataset
        new_data_source = PullDataSource("http://test.com/new_data",
                                         "file_handle")
        dataset1.data_source = new_data_source
        dataset1 = self.ingester_platform.post(dataset1)
        self.assertNotEqual(None, dataset1)

        #       External, 3rd party searches for data
        # TODO: external 3rd parties should be able to use the api to get data without authentication
        # TODO: I'm not sure exactly how this should work, but the search api could be open access (need spam limitations or something?)

        #       Project is disabled/finished
        work = self.ingester_platform.createUnitOfWork()
        work.disable(dataset1.id)
        work.disable(dataset2.id)
        work.commit()

        #       Project is obsolete and data should be deleted
        work = self.ingester_platform.createUnitOfWork()
        work.delete(dataset1.id)
        work.delete(dataset2.id)
        work.commit()