def test_dataset_data_source_unit(self): """This test creates a simple schema hierarchy, and tests updates, etc""" unit = UnitOfWork(None) schema1 = DataEntrySchema("base1") schema1.addAttr(FileDataType("file")) schema_id = unit.post(schema1) loc = Location(10.0, 11.0) loc.name = "Location" loc_id = unit.post(loc) dataset1 = Dataset() dataset1.schema = schema_id dataset1.location = loc_id dataset1_id = unit.post(dataset1) dataset2 = Dataset() dataset2.schema = schema_id dataset2.location = loc_id dataset2.data_source = DatasetDataSource(dataset1_id, "") dataset2_id = unit.post(dataset2) ret = self.service.commit(unit, None) found = False for r in ret: if isinstance(r, Dataset) and dataset1_id == r.correlationid: dataset1_id = r.id elif isinstance(r, Dataset) and dataset2_id == r.correlationid: self.assertEquals(dataset1_id, r.data_source.dataset_id, "Data source dataset_id was not updated") found = True self.assertTrue( found, "Didn't find the dataset with the dataset data source")
def test_dataset_persist(self): schema = DataEntrySchema("base1") schema.addAttr(FileDataType("file")) schema = self.service.persist(schema) loc = Location(10.0, 11.0) loc.name = "Location" loc = self.service.persist(loc) dataset = Dataset() dataset.schema = schema.id dataset.location = loc.id dataset1 = self.service.persist(dataset) self.assertEquals(1, dataset1.version) dataset1.version = 0 self.assertRaises(StaleObjectError, self.service.persist, dataset1) dataset1.version = 1 dataset2 = self.service.persist(dataset1) self.assertEquals(2, dataset2.version)
def test_data_types(self): schema1 = DatasetMetadataSchema("schema1") schema1.addAttr(FileDataType("file")) schema1a = self.service.persist(schema1) self.assertEquals(1, len(schema1a.attrs)) schema2 = DataEntrySchema("schema2") schema2.addAttr(FileDataType("file")) schema2.addAttr(Double("x")) schema2a = self.service.persist(schema2) loc = Location(10.0, 11.0) loca = self.service.persist(loc) dataset = Dataset() dataset.schema = schema1a.id dataset.location = loca.id # We've trying to use a dataset_metadata schema, so this should fail self.assertRaises(ValueError, self.service.persist, dataset) dataset.schema = schema2a.id # Now we're using the correct type of schema dataset1a = self.service.persist(dataset) dataset1b = self.service.get_dataset(dataset1a.id) self.assertEquals(dataset1a.id, dataset1b.id) self.assertDictEqual(dataset1a.__dict__, dataset1b.__dict__) # Update and add a data source dataset1b.data_source = PullDataSource( "http://www.abc.net.au", None, recursive=False, field="file", processing_script="TEST", sampling=PeriodicSampling(10000)) dataset1b.enabled = True dataset1c = self.service.persist(dataset1b) self.assertNotEqual(None, dataset1c.data_source) self.assertEqual("TEST", dataset1c.data_source.processing_script) self.assertNotEqual(None, dataset1c.data_source.sampling) datasets = self.service.get_active_datasets() self.assertEquals(1, len(datasets)) self.assertNotEqual(None, datasets[0].data_source) self.assertEqual("TEST", datasets[0].data_source.processing_script) self.assertNotEqual(None, datasets[0].data_source.sampling) # Test with criteria datasets = self.service.get_active_datasets(kind="pull_data_source") self.assertEquals(1, len(datasets)) datasets = self.service.get_active_datasets(kind="push_data_source") self.assertEquals(0, len(datasets)) schema1b = self.service.get_schema(schema1a.id) self.assertEquals(schema1a.id, schema1b.id) datasets = self.service.search("dataset") self.assertEquals(1, len(datasets)) schemas = self.service.search("data_entry_schema") self.assertEquals(1, len(schemas)) schemas = self.service.search("dataset_metadata_schema") self.assertEquals(1, len(schemas)) locs = self.service.search("location") self.assertEquals(1, len(locs)) # Test ingest data_entry_1 = DataEntry(dataset1b.id, datetime.datetime.now()) data_entry_1['x'] = 27.8 data_entry_1 = self.service.persist(data_entry_1) self.assertIsNotNone(data_entry_1.id)
def test_api_usage(self): # User data that is created by filling out the provisioning interface workflow steps. # General title = "Test project" data_manager = "A Person" project_lead = "Another Person" # Metadata project_region = Region("Test Region", ((1, 1), (2, 2),(2,1), (1,1))) # Methods & Datasets loc1 = Location(11.0, 11.0, "Test Site", 100) loc2 = Location(11.0, 11.0, "Test Site", 100) loc3 = Location(12.0, 11.0, "Test Site", 100) temp_work = self.ingester_platform.createUnitOfWork() temperature_schema = DataEntrySchema("Test Temp Schema") temperature_schema.addAttr(Double("temperature")) temp_work.post(temperature_schema) temp_work.commit() air_temperature_schema = DataEntrySchema("Air Temp Schema") air_temperature_schema.extends = [temperature_schema.id] air_temperature_schema = self.ingester_platform.post(air_temperature_schema) second_level_inheritence_schema = DataEntrySchema("Second Inheritence") second_level_inheritence_schema.extends = [air_temperature_schema.id] second_level_inheritence_schema = self.ingester_platform.post(second_level_inheritence_schema) # Check the name is set temperature_schema_1 = self.ingester_platform.getSchema(temperature_schema.id) self.assertIsNotNone(temperature_schema.name) self.assertEquals(temperature_schema.name, temperature_schema_1.name) file_schema = DataEntrySchema() file_schema.addAttr(FileDataType("file")) file_schema = self.ingester_platform.post(file_schema) dataset1 = Dataset(location=None, schema=temperature_schema.id) dataset2 = Dataset(location=None, schema=file_schema.id, data_source=PullDataSource("http://test.com", "file_handle", processing_script="file://d:/processing_scripts/awsome_processing.py")) # dataset3 = Dataset(None, file_schema, PullDataSource("http://test.com", "file_handle"), CustomSampling("file://d:/sampling_scripts/awsome_sampling.py"), "file://d:/processing_scripts/awsome_processing.py") self.cleanup_files.append(dataset2.data_source.processing_script) # self.cleanup_files.push(dataset3.sampling.script) # self.cleanup_files.push(dataset3.processing_script) # Provisioning admin accepts the submitted project work = self.ingester_platform.createUnitOfWork() work.post(project_region) # Save the region loc1.region = project_region.id # Set the datasets location to use the projects region work.post(loc1) # Save the location dataset1.location = loc1.id # Set the datasets location work.post(dataset1) # Save the dataset loc2.region = project_region.id work.post(loc2) dataset2.location = loc2.id work.post(dataset2) work.commit() # Region, location and dataset id's will be saved to the project within the provisioning system in some way # User searches for datasets # TODO: Nigel? - Define searching api found_dataset_id = dataset1.id # The dataset that has an extended file schema # User manually enters data timestamp = datetime.datetime.now() data_entry_1 = DataEntry(found_dataset_id, timestamp) data_entry_1['temperature'] = 27.8 # Add the extended schema items data_entry_1 = self.ingester_platform.post(data_entry_1) self.assertIsNotNone(data_entry_1.id) timestamp2 = timestamp + datetime.timedelta(seconds=1) data_entry_2 = DataEntry(found_dataset_id, timestamp2) data_entry_2['temperature'] = 27.8 # Add the extended schema items data_entry_2 = self.ingester_platform.post(data_entry_2) self.assertEquals(2, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id), 0, 10).results)) result = self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id), 0, 1) self.assertEquals(2, result.count) self.assertEquals(1, len(result.results)) self.assertEquals(1, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id), 1, 1).results)) result = self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id), 2, 1) self.assertEquals(0, len(result.results)) self.assertEquals(0, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id, end_time=timestamp-datetime.timedelta(seconds=60)), 0, 10).results)) self.assertEquals(0, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id, start_time=timestamp+datetime.timedelta(seconds=60)), 0, 10).results)) self.assertEquals(2, len(self.ingester_platform.search(DataEntrySearchCriteria(found_dataset_id, start_time=timestamp-datetime.timedelta(seconds=60), end_time=timestamp+datetime.timedelta(seconds=60)), 0, 10).results)) work = self.ingester_platform.createUnitOfWork() data_entry_3 = DataEntry(dataset2.id, datetime.datetime.now()) data_entry_3['file'] = FileObject(f_handle=open(os.path.join( os.path.dirname(jcudc24ingesterapi.__file__), "tests/test_ingest.xml")), mime_type="text/xml") work.post(data_entry_3) work.commit() self.assertIsNotNone(data_entry_3.id) f_in = self.ingester_platform.getDataEntryStream(dataset2.id, data_entry_3.id, "file") self.assertIsNotNone(f_in) data = f_in.read() f_in.close() self.assertLess(0, len(data), "Expected data in file") # User enters quality assurance metadata quality_metadata_schema = DatasetMetadataSchema() quality_metadata_schema.addAttr(String("unit")) quality_metadata_schema.addAttr(String("description")) quality_metadata_schema.addAttr(Double("value")) quality_metadata_schema = self.ingester_platform.post(quality_metadata_schema) entered_metadata = DatasetMetadataEntry(data_entry_1.dataset, quality_metadata_schema.id) entered_metadata['unit'] = "%" entered_metadata['description'] = "Percent error" entered_metadata['value'] = 0.98 entered_metadata = self.ingester_platform.post(entered_metadata) # Now find that metadata results = self.ingester_platform.search(DatasetMetadataSearchCriteria(data_entry_1.dataset),0 , 10).results self.assertEqual(1, len(results)) data_entry_md_schema = DataEntryMetadataSchema("test") data_entry_md_schema.addAttr(String("description")) data_entry_md_schema.addAttr(Double("value")) data_entry_md_schema = self.ingester_platform.post(data_entry_md_schema) calibration = DataEntryMetadataEntry(metadata_schema_id=int(data_entry_md_schema.id), dataset_id=dataset2.id, object_id=data_entry_3.id) calibration["description"] = "Test" calibration["value"] = 1.2 calibration2 = DataEntryMetadataEntry(metadata_schema_id=int(data_entry_md_schema.id), dataset_id=dataset2.id, object_id=data_entry_3.id) calibration2["description"] = "Test2" calibration2["value"] = 2.3 calibration2 = self.ingester_platform.post(calibration2) calibrations = self.ingester_platform.search(DataEntryMetadataSearchCriteria(int(81), int(3648)), offset=0, limit=1000) self.assertEquals(1, len(calibrations.results)) self.assertEquals(calibrations.results[0].schema_id, data_entry_md_schema.id) self.ingester_platform.delete(calibration2) self.ingester_platform.delete(calibration) self.ingester_platform.delete(data_entry_md_schema) # User changes sampling rate # FIXME: This test is going to be changed to be done by editing the dataset # sampling_rate_changed = Metadata(dataset1.id, type(dataset1), SampleRateMetadataSchema()) # sampling_rate_changed.change_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") # sampling_rate_changed.sampling = CustomSampling("file://d:/sampling_scripts/awsome_sampling.py") # # try: # sampling_rate_changed = self.ingester_platform.post(sampling_rate_changed) # assert(sampling_rate_changed.metadata_id is None, "Sampling rate change failed") # except: # assert(True, "Sampling rate change failed") # User wants some random metadata specific to their project # FIXME: Not sure what use case this is trying to demonstrate # random_metadata_schema = DataEntryMetadataSchema() # random_metadata_schema.addAttr('random_field', Double()) # random_metadata = Metadata(data_entry.data_entry_id, type(data_entry), random_metadata_schema) # random_metadata.random_field = 1.5 # try: # random_metadata = self.ingester_platform.post(random_metadata) # assert(random_metadata.metadata_id is None, "random_metadata failed") # except: # assert(True, "random_metadata failed") # User changes the data source of the dataset new_data_source = PullDataSource("http://test.com/new_data", "file_handle") dataset1.data_source = new_data_source dataset1 = self.ingester_platform.post(dataset1) self.assertNotEqual(None, dataset1) # External, 3rd party searches for data # TODO: external 3rd parties should be able to use the api to get data without authentication # TODO: I'm not sure exactly how this should work, but the search api could be open access (need spam limitations or something?) # Project is disabled/finished work = self.ingester_platform.createUnitOfWork() work.disable(dataset1.id) work.disable(dataset2.id) work.commit() # Project is obsolete and data should be deleted work = self.ingester_platform.createUnitOfWork() work.delete(dataset1.id) work.delete(dataset2.id) work.commit()
def test_api_usage(self): # User data that is created by filling out the provisioning interface workflow steps. # General title = "Test project" data_manager = "A Person" project_lead = "Another Person" # Metadata project_region = Region("Test Region", ((1, 1), (2, 2), (2, 1), (1, 1))) # Methods & Datasets loc1 = Location(11.0, 11.0, "Test Site", 100) loc2 = Location(11.0, 11.0, "Test Site", 100) loc3 = Location(12.0, 11.0, "Test Site", 100) temp_work = self.ingester_platform.createUnitOfWork() temperature_schema = DataEntrySchema("Test Temp Schema") temperature_schema.addAttr(Double("temperature")) temp_work.post(temperature_schema) temp_work.commit() air_temperature_schema = DataEntrySchema("Air Temp Schema") air_temperature_schema.extends = [temperature_schema.id] air_temperature_schema = self.ingester_platform.post( air_temperature_schema) second_level_inheritence_schema = DataEntrySchema("Second Inheritence") second_level_inheritence_schema.extends = [air_temperature_schema.id] second_level_inheritence_schema = self.ingester_platform.post( second_level_inheritence_schema) # Check the name is set temperature_schema_1 = self.ingester_platform.getSchema( temperature_schema.id) self.assertIsNotNone(temperature_schema.name) self.assertEquals(temperature_schema.name, temperature_schema_1.name) file_schema = DataEntrySchema() file_schema.addAttr(FileDataType("file")) file_schema = self.ingester_platform.post(file_schema) dataset1 = Dataset(location=None, schema=temperature_schema.id) dataset2 = Dataset( location=None, schema=file_schema.id, data_source=PullDataSource( "http://test.com", "file_handle", processing_script= "file://d:/processing_scripts/awsome_processing.py")) # dataset3 = Dataset(None, file_schema, PullDataSource("http://test.com", "file_handle"), CustomSampling("file://d:/sampling_scripts/awsome_sampling.py"), "file://d:/processing_scripts/awsome_processing.py") self.cleanup_files.append(dataset2.data_source.processing_script) # self.cleanup_files.push(dataset3.sampling.script) # self.cleanup_files.push(dataset3.processing_script) # Provisioning admin accepts the submitted project work = self.ingester_platform.createUnitOfWork() work.post(project_region) # Save the region loc1.region = project_region.id # Set the datasets location to use the projects region work.post(loc1) # Save the location dataset1.location = loc1.id # Set the datasets location work.post(dataset1) # Save the dataset loc2.region = project_region.id work.post(loc2) dataset2.location = loc2.id work.post(dataset2) work.commit() # Region, location and dataset id's will be saved to the project within the provisioning system in some way # User searches for datasets # TODO: Nigel? - Define searching api found_dataset_id = dataset1.id # The dataset that has an extended file schema # User manually enters data timestamp = datetime.datetime.now() data_entry_1 = DataEntry(found_dataset_id, timestamp) data_entry_1['temperature'] = 27.8 # Add the extended schema items data_entry_1 = self.ingester_platform.post(data_entry_1) self.assertIsNotNone(data_entry_1.id) timestamp2 = timestamp + datetime.timedelta(seconds=1) data_entry_2 = DataEntry(found_dataset_id, timestamp2) data_entry_2['temperature'] = 27.8 # Add the extended schema items data_entry_2 = self.ingester_platform.post(data_entry_2) self.assertEquals( 2, len( self.ingester_platform.search( DataEntrySearchCriteria(found_dataset_id), 0, 10).results)) result = self.ingester_platform.search( DataEntrySearchCriteria(found_dataset_id), 0, 1) self.assertEquals(2, result.count) self.assertEquals(1, len(result.results)) self.assertEquals( 1, len( self.ingester_platform.search( DataEntrySearchCriteria(found_dataset_id), 1, 1).results)) result = self.ingester_platform.search( DataEntrySearchCriteria(found_dataset_id), 2, 1) self.assertEquals(0, len(result.results)) self.assertEquals( 0, len( self.ingester_platform.search( DataEntrySearchCriteria(found_dataset_id, end_time=timestamp - datetime.timedelta(seconds=60)), 0, 10).results)) self.assertEquals( 0, len( self.ingester_platform.search( DataEntrySearchCriteria(found_dataset_id, start_time=timestamp + datetime.timedelta(seconds=60)), 0, 10).results)) self.assertEquals( 2, len( self.ingester_platform.search( DataEntrySearchCriteria( found_dataset_id, start_time=timestamp - datetime.timedelta(seconds=60), end_time=timestamp + datetime.timedelta(seconds=60)), 0, 10).results)) work = self.ingester_platform.createUnitOfWork() data_entry_3 = DataEntry(dataset2.id, datetime.datetime.now()) data_entry_3['file'] = FileObject(f_handle=open( os.path.join(os.path.dirname(jcudc24ingesterapi.__file__), "tests/test_ingest.xml")), mime_type="text/xml") work.post(data_entry_3) work.commit() self.assertIsNotNone(data_entry_3.id) f_in = self.ingester_platform.getDataEntryStream( dataset2.id, data_entry_3.id, "file") self.assertIsNotNone(f_in) data = f_in.read() f_in.close() self.assertLess(0, len(data), "Expected data in file") # User enters quality assurance metadata quality_metadata_schema = DatasetMetadataSchema() quality_metadata_schema.addAttr(String("unit")) quality_metadata_schema.addAttr(String("description")) quality_metadata_schema.addAttr(Double("value")) quality_metadata_schema = self.ingester_platform.post( quality_metadata_schema) entered_metadata = DatasetMetadataEntry(data_entry_1.dataset, quality_metadata_schema.id) entered_metadata['unit'] = "%" entered_metadata['description'] = "Percent error" entered_metadata['value'] = 0.98 entered_metadata = self.ingester_platform.post(entered_metadata) # Now find that metadata results = self.ingester_platform.search( DatasetMetadataSearchCriteria(data_entry_1.dataset), 0, 10).results self.assertEqual(1, len(results)) data_entry_md_schema = DataEntryMetadataSchema("test") data_entry_md_schema.addAttr(String("description")) data_entry_md_schema.addAttr(Double("value")) data_entry_md_schema = self.ingester_platform.post( data_entry_md_schema) calibration = DataEntryMetadataEntry(metadata_schema_id=int( data_entry_md_schema.id), dataset_id=dataset2.id, object_id=data_entry_3.id) calibration["description"] = "Test" calibration["value"] = 1.2 calibration2 = DataEntryMetadataEntry(metadata_schema_id=int( data_entry_md_schema.id), dataset_id=dataset2.id, object_id=data_entry_3.id) calibration2["description"] = "Test2" calibration2["value"] = 2.3 calibration2 = self.ingester_platform.post(calibration2) calibrations = self.ingester_platform.search( DataEntryMetadataSearchCriteria(int(81), int(3648)), offset=0, limit=1000) self.assertEquals(1, len(calibrations.results)) self.assertEquals(calibrations.results[0].schema_id, data_entry_md_schema.id) self.ingester_platform.delete(calibration2) self.ingester_platform.delete(calibration) self.ingester_platform.delete(data_entry_md_schema) # User changes sampling rate # FIXME: This test is going to be changed to be done by editing the dataset # sampling_rate_changed = Metadata(dataset1.id, type(dataset1), SampleRateMetadataSchema()) # sampling_rate_changed.change_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") # sampling_rate_changed.sampling = CustomSampling("file://d:/sampling_scripts/awsome_sampling.py") # # try: # sampling_rate_changed = self.ingester_platform.post(sampling_rate_changed) # assert(sampling_rate_changed.metadata_id is None, "Sampling rate change failed") # except: # assert(True, "Sampling rate change failed") # User wants some random metadata specific to their project # FIXME: Not sure what use case this is trying to demonstrate # random_metadata_schema = DataEntryMetadataSchema() # random_metadata_schema.addAttr('random_field', Double()) # random_metadata = Metadata(data_entry.data_entry_id, type(data_entry), random_metadata_schema) # random_metadata.random_field = 1.5 # try: # random_metadata = self.ingester_platform.post(random_metadata) # assert(random_metadata.metadata_id is None, "random_metadata failed") # except: # assert(True, "random_metadata failed") # User changes the data source of the dataset new_data_source = PullDataSource("http://test.com/new_data", "file_handle") dataset1.data_source = new_data_source dataset1 = self.ingester_platform.post(dataset1) self.assertNotEqual(None, dataset1) # External, 3rd party searches for data # TODO: external 3rd parties should be able to use the api to get data without authentication # TODO: I'm not sure exactly how this should work, but the search api could be open access (need spam limitations or something?) # Project is disabled/finished work = self.ingester_platform.createUnitOfWork() work.disable(dataset1.id) work.disable(dataset2.id) work.commit() # Project is obsolete and data should be deleted work = self.ingester_platform.createUnitOfWork() work.delete(dataset1.id) work.delete(dataset2.id) work.commit()