Пример #1
0
    def test_file_rename_or_move(self):
        # test that file can't be moved or renamed for any resource file
        # that's part of the TimeSeries logical file object (LFO)

        self.sqlite_file_obj = open(self.sqlite_file, 'r')
        self._create_composite_resource()
        res_file = self.composite_resource.files.first()

        # extract metadata from the sqlite file
        TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                            res_file.id, self.user)

        # test renaming of files that are associated with timeseries LFO - should raise exception
        self.assertEqual(self.composite_resource.files.count(), 1)

        base_path = "data/contents/ODM2_Multi_Site_One_Variable/{}"
        src_path = base_path.format('ODM2_Multi_Site_One_Variable.sqlite')
        tgt_path = base_path.format('ODM2_Multi_Site_One_Variable_1.sqlite')
        with self.assertRaises(DRF_ValidationError):
            move_or_rename_file_or_folder(self.user,
                                          self.composite_resource.short_id,
                                          src_path, tgt_path)
        # TODO: test for renaming csv file when we implement csv file

        # test moving the files associated with timeseries LFO
        tgt_path = 'data/contents/new_folder/ODM2_Multi_Site_One_Variable.sqlite'
        with self.assertRaises(DRF_ValidationError):
            move_or_rename_file_or_folder(self.user,
                                          self.composite_resource.short_id,
                                          src_path, tgt_path)

        # TODO: test for moving csv file when we implement csv file

        self.composite_resource.delete()
Пример #2
0
    def _test_invalid_csv_file(self, invalid_csv_file_name):
        invalid_csv_file_obj = self._get_invalid_csv_file_obj(
            invalid_csv_file_name)

        file_to_upload = UploadedFile(file=invalid_csv_file_obj,
                                      name=os.path.basename(
                                          invalid_csv_file_obj.name))
        self._create_composite_resource(title='Untitled Resource',
                                        file_to_upload=file_to_upload)

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()

        # check that the resource file is associated with GenericLogicalFile
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
        # check that there is one GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 1)

        # check that there is no TimeSeriesLogicalFile object
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0)

        # trying to set this invalid csv file to timeseries file type should raise
        # ValidationError
        with self.assertRaises(ValidationError):
            TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                                res_file.id, self.user)

        # test that the invalid file did not get deleted
        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()
        # check that the resource file is still associated with generic logical file
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
        self.composite_resource.delete()
Пример #3
0
    def test_sqlite_set_file_type_to_timeseries(self):
        # here we are using a valid sqlite file for setting it
        # to TimeSeries file type which includes metadata extraction
        self.sqlite_file_obj = open(self.sqlite_file, 'r')
        self._create_composite_resource(title='Untitled Resource')

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()

        # check that the resource file is associated with GenericLogicalFile
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
        # check that there is one GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 1)

        # check that there is no TimeSeriesLogicalFile object
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0)

        # set the sqlite file to TimeSeries file type
        TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                            res_file.id, self.user)
        # test extracted metadata
        assert_time_series_file_type_metadata(self)
        # test file level keywords
        # res_file = self.composite_resource.files.first()
        # logical_file = res_file.logical_file
        # self.assertEqual(len(logical_file.metadata.keywords), 1)
        # self.assertEqual(logical_file.metadata.keywords[0], 'Snow water equivalent')
        self.composite_resource.delete()
Пример #4
0
    def test_timeseries_file_type_folder_delete(self):
        # when  a file is set to TimeSeriesLogicalFile type
        # system automatically creates folder using the name of the file
        # that was used to set the file type
        # Here we need to test that when that folder gets deleted, all files
        # in that folder gets deleted, the logicalfile object gets deleted and
        # the associated metadata objects get deleted
        self.sqlite_file_obj = open(self.sqlite_file, 'r')
        self._create_composite_resource(title='Untitled Resource')

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()
        # set the sqlite file to TimeSeries file type
        TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                            res_file.id, self.user)
        res_file = self.composite_resource.files.first()

        # test that we have one logical file of type TimeSeries
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 1)
        self.assertEqual(TimeSeriesFileMetaData.objects.count(), 1)
        # delete the folder for the logical file
        folder_path = "data/contents/ODM2_Multi_Site_One_Variable"
        remove_folder(self.user, self.composite_resource.short_id, folder_path)
        # there should no content files
        self.assertEqual(self.composite_resource.files.count(), 0)

        # there should not be any timeseries logical file or metadata file
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0)
        self.assertEqual(TimeSeriesFileMetaData.objects.count(), 0)
        # test that all file level metadata deleted
        # there should be no Site metadata objects
        self.assertTrue(Site.objects.count() == 0)
        # there should be no Variable metadata objects
        self.assertTrue(Variable.objects.count() == 0)
        # there should be no Method metadata objects
        self.assertTrue(Method.objects.count() == 0)
        # there should be no ProcessingLevel metadata objects
        self.assertTrue(ProcessingLevel.objects.count() == 0)
        # there should be no TimeSeriesResult metadata objects
        self.assertTrue(TimeSeriesResult.objects.count() == 0)

        # there should not be any CV type records
        self.assertEqual(CVVariableType.objects.all().count(), 0)
        self.assertEqual(CVVariableName.objects.all().count(), 0)
        self.assertEqual(CVSpeciation.objects.all().count(), 0)
        self.assertEqual(CVElevationDatum.objects.all().count(), 0)
        self.assertEqual(CVSiteType.objects.all().count(), 0)
        self.assertEqual(CVMethodType.objects.all().count(), 0)
        self.assertEqual(CVUnitsType.objects.all().count(), 0)
        self.assertEqual(CVStatus.objects.all().count(), 0)
        self.assertEqual(CVMedium.objects.all().count(), 0)
        self.assertEqual(CVAggregationStatistic.objects.all().count(), 0)

        self.composite_resource.delete()
Пример #5
0
    def _test_file_metadata_on_file_delete(self, ext):
        self.sqlite_file_obj = open(self.sqlite_file, 'r')
        self._create_composite_resource()
        res_file = self.composite_resource.files.first()
        # set the sqlite file to TimeSeries file type
        TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                            res_file.id, self.user)

        # test that we have one logical file of type TimeSeries
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 1)
        self.assertEqual(TimeSeriesFileMetaData.objects.count(), 1)
        # delete content file specified by extension (ext parameter)
        res_file = hydroshare.utils.get_resource_files_by_extension(
            self.composite_resource, ext)[0]
        hydroshare.delete_resource_file(self.composite_resource.short_id,
                                        res_file.id, self.user)

        # test that we don't have any logical file of type TimeSeries
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0)
        self.assertEqual(TimeSeriesFileMetaData.objects.count(), 0)

        # test that all file level metadata deleted
        # there should be no Site metadata objects
        self.assertTrue(Site.objects.count() == 0)
        # there should be no Variable metadata objects
        self.assertTrue(Variable.objects.count() == 0)
        # there should be no Method metadata objects
        self.assertTrue(Method.objects.count() == 0)
        # there should be no ProcessingLevel metadata objects
        self.assertTrue(ProcessingLevel.objects.count() == 0)
        # there should be no TimeSeriesResult metadata objects
        self.assertTrue(TimeSeriesResult.objects.count() == 0)

        # there should not be any CV type records
        self.assertEqual(CVVariableType.objects.all().count(), 0)
        self.assertEqual(CVVariableName.objects.all().count(), 0)
        self.assertEqual(CVSpeciation.objects.all().count(), 0)
        self.assertEqual(CVElevationDatum.objects.all().count(), 0)
        self.assertEqual(CVSiteType.objects.all().count(), 0)
        self.assertEqual(CVMethodType.objects.all().count(), 0)
        self.assertEqual(CVUnitsType.objects.all().count(), 0)
        self.assertEqual(CVStatus.objects.all().count(), 0)
        self.assertEqual(CVMedium.objects.all().count(), 0)
        self.assertEqual(CVAggregationStatistic.objects.all().count(), 0)

        self.composite_resource.delete()
Пример #6
0
    def test_bag_ingestion(self):
        from hs_core.views.utils import unzip_file

        def normalize_metadata(metadata_str):
            """Prepares metadata string to match resource id and hydroshare url of original"""
            return metadata_str\
                .replace(current_site_url(), "http://www.hydroshare.org")\
                .replace(res.short_id, "97523bdb7b174901b3fc2d89813458f1")

        # create empty resource
        res = resource.create_resource(
            'CompositeResource',
            self.user,
            'My Test Resource'
            )
        full_paths = {}

        files_to_upload = [UploadedFile(file=open('hs_core/tests/data/test_resource_metadata_files.zip', 'rb'),
                                        name="test_resource_metadata_files.zip")]
        add_resource_files(res.short_id, *files_to_upload, full_paths=full_paths)

        unzip_file(self.user, res.short_id, "data/contents/test_resource_metadata_files.zip", True,
                   overwrite=True, auto_aggregate=True, ingest_metadata=True)

        def compare_metadatas(new_metadata_str, original_metadata_file):
            original_graph = Graph()
            with open(os.path.join(self.extracted_directory, original_metadata_file), "r") as f:
                original_graph = original_graph.parse(data=f.read())
            new_graph = Graph()
            new_graph = new_graph.parse(data=normalize_metadata(new_metadata_str))

            # remove modified date, they'll never match
            subject = new_graph.value(predicate=RDF.type, object=DCTERMS.modified)
            new_graph.remove((subject, None, None))
            subject = original_graph.value(predicate=RDF.type, object=DCTERMS.modified)
            original_graph.remove((subject, None, None))

            for (new_triple, original_triple) in _squashed_graphs_triples(new_graph, original_graph):
                self.assertEquals(new_triple, original_triple, "Ingested resource metadata does not match original")

        res.refresh_from_db()
        compare_metadatas(res.metadata.get_xml(), "resourcemetadata.xml")

        compare_metadatas(res.get_logical_files(GenericLogicalFile.type_name())[0].metadata.get_xml(),
                          "test_meta.xml")
        compare_metadatas(res.get_logical_files(FileSetLogicalFile.type_name())[0].metadata.get_xml(),
                          "asdf/asdf_meta.xml")
        compare_metadatas(res.get_logical_files(GeoFeatureLogicalFile.type_name())[0].metadata.get_xml(),
                          "watersheds_meta.xml")
        compare_metadatas(res.get_logical_files(GeoRasterLogicalFile.type_name())[0].metadata.get_xml(),
                          "logan_meta.xml")
        compare_metadatas(res.get_logical_files(NetCDFLogicalFile.type_name())[0].metadata.get_xml(),
                          "SWE_time_meta.xml")
        compare_metadatas(res.get_logical_files(RefTimeseriesLogicalFile.type_name())[0].metadata.get_xml(),
                          "msf_version.refts_meta.xml")
        compare_metadatas(res.get_logical_files(TimeSeriesLogicalFile.type_name())[0].metadata.get_xml(),
                          "ODM2_Multi_Site_One_Variable_meta.xml")
    def test_nc_set_file_type_to_netcdf(self):
        # only do federation testing when REMOTE_USE_IRODS is True and irods docker containers
        # are set up properly
        super(TimeSeriesFileTypeTest, self).assert_federated_irods_available()

        # here we are using a valid netcdf file for setting it
        # to NetCDF file type which includes metadata extraction
        fed_test_file_full_path = '/{zone}/home/{username}/{fname}'.format(
            zone=settings.HS_USER_IRODS_ZONE, username=self.user.username,
            fname=self.sqlite_file_name)
        res_upload_files = []
        fed_res_path = hydroshare.utils.get_federated_zone_home_path(fed_test_file_full_path)
        res_title = 'Untitled resource'
        self.composite_resource = hydroshare.create_resource(
            resource_type='CompositeResource',
            owner=self.user,
            title=res_title,
            files=res_upload_files,
            source_names=[fed_test_file_full_path],
            fed_res_path=fed_res_path,
            move=False,
            metadata=[],
            auto_aggregate=False
        )

        # test resource is created on federated zone
        self.assertNotEqual(self.composite_resource.resource_federation_path, '')

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()

        # check that the resource file is not associated with any logical file
        self.assertEqual(res_file.has_logical_file, False)

        fed_file_path = "{}/{}".format(self.composite_resource.file_path, self.sqlite_file_name)
        self.assertEqual(res_file.storage_path, fed_file_path)

        # set the sqlite file to TimeSeries file type
        TimeSeriesLogicalFile.set_file_type(self.composite_resource, self.user, res_file.id)
        # test extracted metadata
        res_file = self.composite_resource.files.first()
        base_file_name, _ = os.path.splitext(res_file.file_name)
        expected_file_folder = base_file_name
        assert_time_series_file_type_metadata(self, expected_file_folder=expected_file_folder)
Пример #8
0
    def _test_invalid_file(self):
        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()

        # check that the resource file is associated with the generic logical file
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")

        # trying to set this invalid sqlite file to timeseries file type should raise
        # ValidationError
        with self.assertRaises(ValidationError):
            TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                                res_file.id, self.user)

        # test that the invalid file did not get deleted
        self.assertEqual(self.composite_resource.files.all().count(), 1)

        # check that the resource file is not associated with generic logical file
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
Пример #9
0
    def test_file_metadata_on_logical_file_delete(self):
        # test that when the TimeSeriesLogicalFile instance is deleted
        # all metadata associated with it also get deleted
        self.sqlite_file_obj = open(self.sqlite_file, 'r')
        self._create_composite_resource(title='Untitled Resource')

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()
        # set the sqlite file to TimeSeries file type
        TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                            res_file.id, self.user)
        res_file = self.composite_resource.files.first()
        logical_file = res_file.logical_file
        # file level metadata
        # there should be Site metadata objects
        self.assertTrue(Site.objects.count() > 0)
        # there should be Variable metadata objects
        self.assertTrue(Variable.objects.count() > 0)
        # there should be Method metadata objects
        self.assertTrue(Method.objects.count() > 0)
        # there should be ProcessingLevel metadata objects
        self.assertTrue(ProcessingLevel.objects.count() > 0)
        # there should be TimeSeriesResult metadata objects
        self.assertTrue(TimeSeriesResult.objects.count() > 0)

        # CV lookup data
        self.assertEqual(logical_file.metadata.cv_variable_types.all().count(),
                         23)
        self.assertEqual(CVVariableType.objects.all().count(), 23)
        self.assertEqual(logical_file.metadata.cv_variable_names.all().count(),
                         805)
        self.assertEqual(CVVariableName.objects.all().count(), 805)
        self.assertEqual(logical_file.metadata.cv_speciations.all().count(),
                         145)
        self.assertEqual(CVSpeciation.objects.all().count(), 145)
        self.assertEqual(
            logical_file.metadata.cv_elevation_datums.all().count(), 5)
        self.assertEqual(CVElevationDatum.objects.all().count(), 5)
        self.assertEqual(logical_file.metadata.cv_site_types.all().count(), 51)
        self.assertEqual(CVSiteType.objects.all().count(), 51)
        self.assertEqual(logical_file.metadata.cv_method_types.all().count(),
                         25)
        self.assertEqual(CVMethodType.objects.all().count(), 25)
        self.assertEqual(logical_file.metadata.cv_units_types.all().count(),
                         179)
        self.assertEqual(CVUnitsType.objects.all().count(), 179)
        self.assertEqual(logical_file.metadata.cv_statuses.all().count(), 4)
        self.assertEqual(CVStatus.objects.all().count(), 4)
        self.assertEqual(logical_file.metadata.cv_mediums.all().count(), 18)
        self.assertEqual(CVMedium.objects.all().count(), 18)
        self.assertEqual(
            logical_file.metadata.cv_aggregation_statistics.all().count(), 17)
        self.assertEqual(CVAggregationStatistic.objects.all().count(), 17)

        # delete the logical file
        logical_file.logical_delete(self.user)
        # test that we have no logical file of type TimeSeries
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0)
        self.assertEqual(TimeSeriesFileMetaData.objects.count(), 0)

        # test that all file level metadata deleted
        # there should be no Site metadata objects
        self.assertTrue(Site.objects.count() == 0)
        # there should be no Variable metadata objects
        self.assertTrue(Variable.objects.count() == 0)
        # there should be no Method metadata objects
        self.assertTrue(Method.objects.count() == 0)
        # there should be no ProcessingLevel metadata objects
        self.assertTrue(ProcessingLevel.objects.count() == 0)
        # there should be no TimeSeriesResult metadata objects
        self.assertTrue(TimeSeriesResult.objects.count() == 0)

        # there should not be any CV type records
        self.assertEqual(CVVariableType.objects.all().count(), 0)
        self.assertEqual(CVVariableName.objects.all().count(), 0)
        self.assertEqual(CVSpeciation.objects.all().count(), 0)
        self.assertEqual(CVElevationDatum.objects.all().count(), 0)
        self.assertEqual(CVSiteType.objects.all().count(), 0)
        self.assertEqual(CVMethodType.objects.all().count(), 0)
        self.assertEqual(CVUnitsType.objects.all().count(), 0)
        self.assertEqual(CVStatus.objects.all().count(), 0)
        self.assertEqual(CVMedium.objects.all().count(), 0)
        self.assertEqual(CVAggregationStatistic.objects.all().count(), 0)

        self.composite_resource.delete()
Пример #10
0
    def test_sqlite_metadata_update(self):
        # here we are using a valid sqlite file for setting it
        # to TimeSeries file type which includes metadata extraction
        # then we are testing update of the file level metadata elements

        self.sqlite_file_obj = open(self.sqlite_file, 'r')
        self._create_composite_resource(title='Untitled Resource')

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()

        # check that the resource file is associated with GenericLogicalFile
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
        # check that there is one GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 1)

        # check that there is no TimeSeriesLogicalFile object
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0)

        # set the sqlite file to TimeSeries file type
        TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                            res_file.id, self.user)
        res_file = self.composite_resource.files.first()
        logical_file = res_file.logical_file

        # test updating site element
        site = logical_file.metadata.sites.filter(
            site_code='USU-LBR-Paradise').first()
        self.assertNotEqual(site, None)
        site_name = 'Little Bear River at McMurdy Hollow near Paradise, Utah'
        self.assertEqual(site.site_name, site_name)
        self.assertEqual(site.elevation_m, 1445)
        self.assertEqual(site.elevation_datum, 'NGVD29')
        self.assertEqual(site.site_type, 'Stream')
        self.assertFalse(logical_file.metadata.is_dirty)

        site_name = 'Little Bear River at Logan, Utah'
        site_data = {
            'site_name': site_name,
            'elevation_m': site.elevation_m,
            'elevation_datum': site.elevation_datum,
            'site_type': site.site_type
        }
        logical_file.metadata.update_element('Site', site.id, **site_data)
        site = logical_file.metadata.sites.filter(
            site_code='USU-LBR-Paradise').first()
        self.assertEqual(site.site_name, site_name)
        self.assertTrue(logical_file.metadata.is_dirty)

        # updating site lat/long should update the resource coverage as well as file level coverage
        box_coverage = self.composite_resource.metadata.coverages.all().filter(
            type='box').first()
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(box_coverage.value['northlimit'], 41.718473)
        self.assertEqual(box_coverage.value['eastlimit'], -111.799324)
        self.assertEqual(box_coverage.value['southlimit'], 41.495409)
        self.assertEqual(box_coverage.value['westlimit'], -111.946402)

        box_coverage = logical_file.metadata.spatial_coverage
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(box_coverage.value['northlimit'], 41.718473)
        self.assertEqual(box_coverage.value['eastlimit'], -111.799324)
        self.assertEqual(box_coverage.value['southlimit'], 41.495409)
        self.assertEqual(box_coverage.value['westlimit'], -111.946402)

        site_data['latitude'] = 40.7896
        logical_file.metadata.update_element('Site', site.id, **site_data)
        site = logical_file.metadata.sites.filter(
            site_code='USU-LBR-Paradise').first()
        self.assertEqual(site.latitude, 40.7896)

        # test that resource level coverage got updated
        box_coverage = self.composite_resource.metadata.coverages.all().filter(
            type='box').first()
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(box_coverage.value['northlimit'], 41.718473)
        self.assertEqual(box_coverage.value['eastlimit'], -111.799324)
        # this is the changed value for the southlimit as a result of changing the sit latitude
        self.assertEqual(box_coverage.value['southlimit'], 40.7896)
        self.assertEqual(box_coverage.value['westlimit'], -111.946402)

        # test that file level coverage got updated
        box_coverage = logical_file.metadata.spatial_coverage
        self.assertEqual(box_coverage.value['projection'], 'WGS 84 EPSG:4326')
        self.assertEqual(box_coverage.value['units'], 'Decimal degrees')
        self.assertEqual(box_coverage.value['northlimit'], 41.718473)
        self.assertEqual(box_coverage.value['eastlimit'], -111.799324)
        # this is the changed value for the southlimit as a result of changing the sit latitude
        self.assertEqual(box_coverage.value['southlimit'], 40.7896)
        self.assertEqual(box_coverage.value['westlimit'], -111.946402)

        logical_file.metadata.is_dirty = False
        logical_file.metadata.save()
        # test updating variable element
        variable = logical_file.metadata.variables.filter(
            variable_code='USU36').first()
        self.assertNotEqual(variable, None)
        self.assertEqual(variable.variable_name, 'Temperature')
        self.assertEqual(variable.variable_type, 'Water Quality')
        self.assertEqual(variable.no_data_value, -9999)
        self.assertEqual(variable.speciation, 'Not Applicable')
        self.assertEqual(variable.variable_definition, None)

        var_def = 'Concentration of oxygen dissolved in water.'
        variable_data = {'variable_definition': var_def}
        logical_file.metadata.update_element('Variable', variable.id,
                                             **variable_data)
        variable = logical_file.metadata.variables.filter(
            variable_code='USU36').first()
        self.assertEqual(variable.variable_definition, var_def)
        self.assertEqual(variable.variable_name, 'Temperature')
        self.assertEqual(variable.variable_type, 'Water Quality')
        self.assertEqual(variable.no_data_value, -9999)
        self.assertEqual(variable.speciation, 'Not Applicable')

        self.assertTrue(logical_file.metadata.is_dirty)
        logical_file.metadata.is_dirty = False
        logical_file.metadata.save()

        # test updating method element
        method = logical_file.metadata.methods.filter(method_code=28).first()
        self.assertNotEqual(method, None)
        self.assertEqual(
            method.method_name,
            'Quality Control Level 1 Data Series created from raw '
            'QC Level 0 data using ODM Tools.')
        self.assertEqual(method.method_type, 'Instrument deployment')
        self.assertEqual(
            method.method_description,
            'Quality Control Level 1 Data Series created '
            'from raw QC Level 0 data using ODM Tools.')
        self.assertEqual(method.method_link, None)

        method_link = "http://somesite.com"
        method_data = {'method_link': method_link}
        logical_file.metadata.update_element('Method', method.id,
                                             **method_data)
        method = logical_file.metadata.methods.filter(method_code=28).first()
        self.assertNotEqual(method, None)
        self.assertEqual(
            method.method_name,
            'Quality Control Level 1 Data Series created from raw '
            'QC Level 0 data using ODM Tools.')
        self.assertEqual(method.method_type, 'Instrument deployment')
        self.assertEqual(
            method.method_description,
            'Quality Control Level 1 Data Series created '
            'from raw QC Level 0 data using ODM Tools.')
        self.assertEqual(method.method_link, method_link)

        self.assertTrue(logical_file.metadata.is_dirty)
        logical_file.metadata.is_dirty = False
        logical_file.metadata.save()

        # test updating processing level element
        pro_level = logical_file.metadata.processing_levels.filter(
            processing_level_code=1).first()
        self.assertNotEqual(pro_level, None)
        self.assertEqual(pro_level.definition, 'Quality controlled data')
        explanation = 'Quality controlled data that have passed quality assurance procedures ' \
                      'such as routine estimation of timing and sensor calibration or visual ' \
                      'inspection and removal of obvious errors. An example is USGS published ' \
                      'streamflow records following parsing through USGS quality ' \
                      'control procedures.'
        self.assertEqual(pro_level.explanation, explanation)

        definition = "Uncontrolled data"
        pro_level_data = {'definition': definition}
        logical_file.metadata.update_element('ProcessingLevel', pro_level.id,
                                             **pro_level_data)
        pro_level = logical_file.metadata.processing_levels.filter(
            processing_level_code=1).first()
        self.assertNotEqual(pro_level, None)
        self.assertEqual(pro_level.definition, definition)
        explanation = 'Quality controlled data that have passed quality assurance procedures ' \
                      'such as routine estimation of timing and sensor calibration or visual ' \
                      'inspection and removal of obvious errors. An example is USGS published ' \
                      'streamflow records following parsing through USGS quality ' \
                      'control procedures.'
        self.assertEqual(pro_level.explanation, explanation)

        self.assertTrue(logical_file.metadata.is_dirty)
        logical_file.metadata.is_dirty = False
        logical_file.metadata.save()

        # test updating time series result element
        ts_result = logical_file.metadata.time_series_results.all().first()
        self.assertNotEqual(ts_result, None)
        self.assertEqual(ts_result.units_type, 'Temperature')
        self.assertEqual(ts_result.units_name, 'degree celsius')
        self.assertEqual(ts_result.units_abbreviation, 'degC')
        self.assertEqual(ts_result.status, 'Unknown')
        self.assertEqual(ts_result.sample_medium, 'Surface Water')
        self.assertEqual(ts_result.value_count, 1441)
        self.assertEqual(ts_result.aggregation_statistics, 'Average')

        ts_data = {'status': 'Complete'}
        logical_file.metadata.update_element('timeseriesresult', ts_result.id,
                                             **ts_data)
        ts_result = logical_file.metadata.time_series_results.all().first()
        self.assertNotEqual(ts_result, None)
        self.assertEqual(ts_result.units_type, 'Temperature')
        self.assertEqual(ts_result.units_name, 'degree celsius')
        self.assertEqual(ts_result.units_abbreviation, 'degC')
        self.assertEqual(ts_result.status, 'Complete')
        self.assertEqual(ts_result.sample_medium, 'Surface Water')
        self.assertEqual(ts_result.value_count, 1441)
        self.assertEqual(ts_result.aggregation_statistics, 'Average')
        self.assertTrue(logical_file.metadata.is_dirty)

        self.composite_resource.delete()
Пример #11
0
    def test_CSV_set_file_type_to_timeseries(self):
        # here we are using a valid CSV file for setting it
        # to TimeSeries file type which includes metadata extraction
        self.odm2_csv_file_obj = open(self.odm2_csv_file, 'r')
        file_to_upload = UploadedFile(file=self.odm2_csv_file_obj,
                                      name=os.path.basename(
                                          self.odm2_csv_file_obj.name))
        self._create_composite_resource(title='Untitled Resource',
                                        file_to_upload=file_to_upload)

        self.assertEqual(self.composite_resource.files.all().count(), 1)
        res_file = self.composite_resource.files.first()

        # check that the resource file is associated with GenericLogicalFile
        self.assertEqual(res_file.has_logical_file, True)
        self.assertEqual(res_file.logical_file_type_name, "GenericLogicalFile")
        # check that there is one GenericLogicalFile object
        self.assertEqual(GenericLogicalFile.objects.count(), 1)

        # check that there is no TimeSeriesLogicalFile object
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 0)

        # set the CSV file to TimeSeries file type
        TimeSeriesLogicalFile.set_file_type(self.composite_resource,
                                            res_file.id, self.user)

        # test that the ODM2.sqlite blank file got added to the resource
        self.assertEqual(self.composite_resource.files.all().count(), 2)
        csv_res_file = None
        sqlite_res_file = None
        for res_file in self.composite_resource.files.all():
            if res_file.extension == '.sqlite':
                sqlite_res_file = res_file
            elif res_file.extension == '.csv':
                csv_res_file = res_file
        self.assertNotEqual(csv_res_file, None)
        self.assertNotEqual(sqlite_res_file, None)

        self.assertEqual(csv_res_file.logical_file_type_name,
                         "TimeSeriesLogicalFile")
        self.assertEqual(sqlite_res_file.logical_file_type_name,
                         "TimeSeriesLogicalFile")
        self.assertEqual(TimeSeriesLogicalFile.objects.count(), 1)
        logical_file = csv_res_file.logical_file

        # test that both csv and sqlite files of the logical file are in a folder
        csv_file_name = os.path.basename(self.odm2_csv_file_obj.name)
        for res_file in logical_file.files.all():
            self.assertEqual(res_file.file_folder, csv_file_name[:-4])

        # since the uploaded csv file has 2 data columns, the metadata should have 2 series names
        self.assertEqual(len(logical_file.metadata.series_names), 2)
        csv_data_column_names = set(['Temp_DegC_Mendon', 'Temp_DegC_Paradise'])
        self.assertEqual(set(logical_file.metadata.series_names),
                         csv_data_column_names)

        # since the uploaded csv file has 2 data columns, the metadata should have
        # the attribute value_counts (dict) 2 elements
        self.assertEqual(len(logical_file.metadata.value_counts), 2)
        self.assertEqual(set(logical_file.metadata.value_counts.keys()),
                         csv_data_column_names)

        # there should be 20 data values for each series
        self.assertEqual(
            logical_file.metadata.value_counts['Temp_DegC_Mendon'], '20')
        self.assertEqual(
            logical_file.metadata.value_counts['Temp_DegC_Paradise'], '20')

        # the dataset name (title) must be set the name of the CSV file
        self.assertEqual(logical_file.dataset_name, csv_file_name[:-4])

        # there should not be any file level abstract
        self.assertEqual(logical_file.metadata.abstract, None)

        # there should not be any file level keywords
        self.assertEqual(logical_file.metadata.keywords, [])

        # there should be 1 coverage element of type period at the file level
        self.assertEqual(logical_file.metadata.coverages.all().count(), 1)
        self.assertEqual(
            logical_file.metadata.coverages.filter(type='period').count(), 1)
        self.assertEqual(logical_file.has_csv_file, True)

        # at file level there should not be any site element
        self.assertEqual(logical_file.metadata.sites.all().count(), 0)

        # at file level there should not be any method element
        self.assertEqual(logical_file.metadata.methods.all().count(), 0)

        # at file level there should not be any variable element
        self.assertEqual(logical_file.metadata.variables.all().count(), 0)

        # at file level there should not an any site processing level
        self.assertEqual(logical_file.metadata.processing_levels.all().count(),
                         0)

        # at file level there should not be any result element
        self.assertEqual(
            logical_file.metadata.time_series_results.all().count(), 0)

        # resource title does not get updated when csv is set to file type
        self.assertEqual(self.composite_resource.metadata.title.value,
                         'Untitled Resource')
        # self._test_no_change_in_metadata()
        # there should be  2 format elements - since the resource has a csv file and a sqlite file
        self.assertEqual(
            self.composite_resource.metadata.formats.all().count(), 2)

        # there should be 1 coverage element of type period
        self.assertEqual(
            self.composite_resource.metadata.coverages.all().count(), 1)
        self.assertEqual(
            self.composite_resource.metadata.coverages.filter(
                type='period').count(), 1)
        self.composite_resource.delete()
Пример #12
0
    def get_folder_aggregation_type_to_set(self, dir_path):
        """Returns an aggregation (file type) type that the specified folder *dir_path* can
        possibly be set to.

        :param dir_path: Resource file directory path (full folder path starting with resource id)
        for which the possible aggregation type that can be set needs to be determined

        :return If the specified folder is already represents an aggregation or does
        not contain suitable file(s) then returns "" (empty string). If the specified folder
        contains only the files that meet the requirements of a supported aggregation, and
        does not contain other folders or does not have a parent folder then return the
        class name of that matching aggregation type.
        """
        aggregation_type_to_set = ""
        if self.get_folder_aggregation_object(dir_path) is not None:
            # target folder is already an aggregation
            return None

        istorage = self.get_irods_storage()
        irods_path = dir_path
        if self.is_federated:
            irods_path = os.path.join(self.resource_federation_path,
                                      irods_path)
        store = istorage.listdir(irods_path)
        if store[0]:
            # seems there are folders under dir_path - no aggregation type can be set if the target
            # folder contains other folders
            return None

        files_in_folder = [
            res_file for res_file in self.files.all()
            if res_file.dir_path == dir_path
        ]
        if not files_in_folder:
            # folder is empty
            return None
        if len(files_in_folder) > 1:
            # check for geo feature
            aggregation_type_to_set = GeoFeatureLogicalFile.check_files_for_aggregation_type(
                files_in_folder)
            if aggregation_type_to_set:
                return aggregation_type_to_set

            # check for raster
            aggregation_type_to_set = GeoRasterLogicalFile.check_files_for_aggregation_type(
                files_in_folder)
            if aggregation_type_to_set:
                return aggregation_type_to_set
        else:
            # check for raster
            aggregation_type_to_set = GeoRasterLogicalFile.check_files_for_aggregation_type(
                files_in_folder)
            if aggregation_type_to_set:
                return aggregation_type_to_set
            # check for NetCDF aggregation type
            aggregation_type_to_set = NetCDFLogicalFile.check_files_for_aggregation_type(
                files_in_folder)
            if aggregation_type_to_set:
                return aggregation_type_to_set
            # check for TimeSeries aggregation type
            aggregation_type_to_set = TimeSeriesLogicalFile.check_files_for_aggregation_type(
                files_in_folder)
            if aggregation_type_to_set:
                return aggregation_type_to_set

        return None
Пример #13
0
    def handle(self, *args, **options):
        logger = logging.getLogger(__name__)
        resource_counter = 0
        to_resource_type = 'CompositeResource'
        msg = "THERE ARE CURRENTLY {} TIMESERIES RESOURCES PRIOR TO CONVERSION.".format(
            TimeSeriesResource.objects.all().count())
        logger.info(msg)
        print(">> {}".format(msg))

        for ts_res in TimeSeriesResource.objects.all():
            # check resource exists on irods
            istorage = ts_res.get_irods_storage()
            if not istorage.exists(ts_res.root_path):
                err_msg = "Timeseries resource not found in irods (ID: {})".format(
                    ts_res.short_id)
                logger.error(err_msg)
                print("Error:>> {}".format(err_msg))
                # skip this timeseries resource for migration
                continue

            sqlite_file = None
            res_file_count = ts_res.files.count()
            if res_file_count == 1 or res_file_count == 2:
                for res_file in ts_res.files.all():
                    if res_file.extension.lower() == '.sqlite':
                        sqlite_file = res_file

            create_ts_aggregation = sqlite_file is not None
            if create_ts_aggregation:
                # check resource files exist on irods
                file_missing = False
                for res_file in ts_res.files.all():
                    file_path = res_file.public_path
                    if not istorage.exists(file_path):
                        err_msg = "File path not found in irods:{}".format(
                            file_path)
                        logger.error(err_msg)
                        err_msg = "Failed to convert timeseries resource (ID: {}). " \
                                  "Resource file is missing on irods".format(ts_res.short_id)
                        print("Error:>> {}".format(err_msg))
                        file_missing = True
                        break
                if file_missing:
                    # skip this corrupt timeseries resource for migration
                    continue

            # change the resource_type
            ts_metadata_obj = ts_res.metadata
            ts_res.resource_type = to_resource_type
            ts_res.content_model = to_resource_type.lower()
            ts_res.save()
            # get the converted resource object - CompositeResource
            comp_res = ts_res.get_content_model()

            # set CoreMetaData object for the composite resource
            core_meta_obj = CoreMetaData.objects.create()
            comp_res.content_object = core_meta_obj

            # migrate timeseries resource core metadata elements to composite resource
            migrate_core_meta_elements(ts_metadata_obj, comp_res)

            # update url attribute of the metadata 'type' element
            type_element = comp_res.metadata.type
            type_element.url = '{0}/terms/{1}'.format(current_site_url(),
                                                      to_resource_type)
            type_element.save()
            if create_ts_aggregation:
                # create a Timeseries aggregation
                ts_aggr = None
                try:
                    ts_aggr = TimeSeriesLogicalFile.create(resource=comp_res)
                except Exception as ex:
                    err_msg = 'Failed to create Timeseries aggregation for resource (ID: {})'
                    err_msg = err_msg.format(ts_res.short_id)
                    err_msg = err_msg + '\n' + ex.message
                    logger.error(err_msg)
                    print("Error:>> {}".format(err_msg))

                if ts_aggr is not None:
                    # set aggregation dataset title
                    ts_aggr.dataset_name = comp_res.metadata.title.value
                    ts_aggr.save()
                    # make the res files part of the aggregation
                    for res_file in comp_res.files.all():
                        ts_aggr.add_resource_file(res_file)

                    # migrate timeseries specific metadata to aggregation
                    for site in ts_metadata_obj.sites:
                        site.content_object = ts_aggr.metadata
                        site.save()
                    for variable in ts_metadata_obj.variables:
                        variable.content_object = ts_aggr.metadata
                        variable.save()
                    for method in ts_metadata_obj.methods:
                        method.content_object = ts_aggr.metadata
                        method.save()
                    for proc_level in ts_metadata_obj.processing_levels:
                        proc_level.content_object = ts_aggr.metadata
                        proc_level.save()
                    for ts_result in ts_metadata_obj.time_series_results:
                        ts_result.content_object = ts_aggr.metadata
                        ts_result.save()

                    # create aggregation level coverage elements
                    for coverage in comp_res.metadata.coverages.all():
                        aggr_coverage = Coverage()
                        aggr_coverage.type = coverage.type
                        aggr_coverage._value = coverage._value
                        aggr_coverage.content_object = ts_aggr.metadata
                        aggr_coverage.save()

                    utc_offset = ts_metadata_obj.utc_offset
                    if utc_offset:
                        utc_offset.content_object = ts_aggr.metadata
                        utc_offset.save()

                    ts_aggr.metadata.value_counts = ts_metadata_obj.value_counts
                    ts_aggr.metadata.save()

                    # create aggregation level keywords
                    keywords = [
                        sub.value for sub in comp_res.metadata.subjects.all()
                    ]
                    ts_aggr.metadata.keywords = keywords
                    # set aggregation metadata dirty status to that of the timeseries resource
                    # metadata dirty status - this would trigger netcdf file update for the
                    # new aggregation if metadata is dirty
                    ts_aggr.metadata.is_dirty = ts_metadata_obj.is_dirty
                    ts_aggr.metadata.save()
                    # create aggregation level xml files
                    ts_aggr.create_aggregation_xml_documents()
                    msg = 'One Timeseries aggregation was created in resource (ID: {})'
                    msg = msg.format(comp_res.short_id)
                    logger.info(msg)

            comp_res.save()
            # set resource to dirty so that resource level xml files (resource map and
            # metadata xml files) will be re-generated as part of next bag download
            try:
                set_dirty_bag_flag(comp_res)
            except Exception as ex:
                err_msg = 'Failed to set bag flag dirty for the converted resource (ID: {})'
                err_msg = err_msg.format(ts_res.short_id)
                err_msg = err_msg + '\n' + ex.message
                logger.error(err_msg)
                print("Error:>> {}".format(err_msg))

            resource_counter += 1
            # delete the instance of TimeSeriesMetaData that was part of the original
            # timeseries resource
            ts_metadata_obj.delete()
            msg = 'Timeseries resource (ID: {}) was converted to Composite Resource type'
            msg = msg.format(comp_res.short_id)
            logger.info(msg)

        msg = "{} TIMESERIES RESOURCES WERE CONVERTED TO COMPOSITE RESOURCE.".format(
            resource_counter)
        logger.info(msg)
        print(">> {}".format(msg))
        msg = "THERE ARE CURRENTLY {} TIMESERIES RESOURCES AFTER CONVERSION.".format(
            TimeSeriesResource.objects.all().count())
        logger.info(msg)
        if TimeSeriesResource.objects.all().count() > 0:
            msg = "NOT ALL TIMESERIES RESOURCES WERE CONVERTED TO COMPOSITE RESOURCE TYPE"
            logger.error(msg)
        print(">> {}".format(msg))