Пример #1
0
    def test_read_mcf(self):
        """Test reading MCFs, strings or dict"""

        # test as file
        with self.assertRaises(IOError):
            mcf = read_mcf(get_abspath('../404.yml'))

        mcf = read_mcf(get_abspath('../sample.yml'))
        self.assertIsInstance(mcf, dict, 'Expected dict')

        # test MCF section
        self.assertTrue('version' in mcf['mcf'], 'Expected MCF version')
        self.assertTrue('metadata' in mcf, 'Expected metadata section')

        # test as string
        with open(get_abspath('../sample.yml')) as fh:
            mcf_string = fh.read()

        mcf = read_mcf(mcf_string)
        self.assertTrue('metadata' in mcf, 'Expected metadata section')

        # test as dict
        mcf_dict = yaml.load(mcf_string)
        mcf = read_mcf(mcf_dict)
        self.assertTrue('metadata' in mcf, 'Expected metadata section')
Пример #2
0
    def test_read_mcf(self):
        """Test reading MCFs, strings or dict"""

        # test as file
        with self.assertRaises(IOError):
            mcf = read_mcf(get_abspath('../404.yml'))

        mcf = read_mcf(get_abspath('../sample.yml'))
        self.assertIsInstance(mcf, dict, 'Expected dict')

        # test MCF section
        self.assertTrue('version' in mcf['mcf'], 'Expected MCF version')
        self.assertTrue('metadata' in mcf, 'Expected metadata section')

        # test as string
        with open(get_abspath('../sample.yml')) as fh:
            mcf_string = fh.read()

        mcf = read_mcf(mcf_string)
        self.assertTrue('metadata' in mcf, 'Expected metadata section')

        # test as dict
        mcf_dict = yaml.load(mcf_string)
        mcf = read_mcf(mcf_dict)
        self.assertTrue('metadata' in mcf, 'Expected metadata section')
Пример #3
0
    def test_mcf_version(self):
        """Test MCF version validation"""

        with self.assertRaises(MCFReadError):
            read_mcf(get_abspath('missing-version.yml'))

        with self.assertRaises(MCFReadError):
            read_mcf(get_abspath('bad-version.yml'))
Пример #4
0
    def test_mcf_version(self):
        """Test MCF version validation"""

        with self.assertRaises(MCFReadError):
            read_mcf(get_abspath('missing-version.yml'))

        with self.assertRaises(MCFReadError):
            read_mcf(get_abspath('bad-version.yml'))
Пример #5
0
    def test_json_output_schema(self):
        """test JSON as dict-based output schemas"""

        mcf = read_mcf(get_abspath('../sample.yml'))

        record = OGCAPIRecordOutputSchema().write(mcf)
        self.assertIsInstance(record, str)

        mcf = read_mcf(get_abspath('../sample.yml'))
        record = OGCAPIRecordOutputSchema().write(mcf, stringify=False)
        self.assertIsInstance(record, dict)
Пример #6
0
    def test_wmo_wigos(self):
        """test WMO WIGOS Metadata support"""

        mcf = read_mcf(get_abspath('../sample-wmo-wigos.yml'))
        self.assertEqual(len(mcf['facility'].keys()), 1)
        self.assertEqual(
            len(mcf['facility']['first_station']['spatiotemporal']), 1)
Пример #7
0
    def test_19139_2(self):
        """test ISO 19139-2 Metadata support"""

        mcf = read_mcf(get_abspath('../sample.yml'))
        self.assertIn('acquisition', mcf)
        self.assertIn('platforms', mcf['acquisition'])
        self.assertIn('instruments', mcf['acquisition']['platforms'][0])
Пример #8
0
    def test_broken_yaml(self):
        """test against broken YAML"""

        iso_os = ISO19139OutputSchema()
        with self.assertRaises(MCFReadError):
            render_j2_template(read_mcf(get_abspath('broken-yaml.yml')),
                               iso_os.template_dir)
Пример #9
0
    def test_wmo_wigos(self):
        """test WMO WIGOS Metadata support"""

        mcf = read_mcf(get_abspath('../sample-wmo-wigos.yml'))
        self.assertEqual(len(mcf['facility'].keys()), 1)
        self.assertEqual(
            len(mcf['facility']['first_station']['spatiotemporal']), 1)
Пример #10
0
    def test_pre1900_dates(self):
        """test datestrings that are pre-1900"""

        iso_os = ISO19139OutputSchema()

        xml = render_j2_template(read_mcf(get_abspath('dates-pre-1900.yml')),
                                 iso_os.template_dir)
        self.assertIsInstance(xml, str, 'Expected unicode string')
Пример #11
0
 def load_collection_level_metadata(self):
     logger.debug('Loading collection level metadata')
     for clm in os.listdir(COLLECTION_LEVEL_METADATA):
         logger.debug(f'collection metadata file: {clm}')
         clm_ = os.path.join(COLLECTION_LEVEL_METADATA, clm)
         clm_mcf = read_mcf(clm_)
         clm_iso = ISO19139OutputSchema().write(clm_mcf)
         logger.debug(f'Upserting metadata: {clm_}')
         self._parse_and_upsert_metadata(clm_iso)
Пример #12
0
    def test_validate_mcf(self):
        """test MCF validation"""

        mcf = read_mcf(get_abspath('../sample.yml'))

        instance = json.loads(json.dumps(mcf, default=json_serial))

        is_valid = validate_mcf(instance)
        assert is_valid

        # validated nested MCF
        mcf = read_mcf(get_abspath('./sample-child.yml'))

        instance = json.loads(json.dumps(mcf, default=json_serial))

        is_valid = validate_mcf(instance)
        assert is_valid

        with self.assertRaises(MCFValidationError):
            is_valid = validate_mcf({'foo': 'bar'})
Пример #13
0
    def test_nested_mcf(self):
        """test nested mcf support"""

        mcf = read_mcf(get_abspath('child.yml'))

        self.assertEqual(mcf['metadata']['identifier'], 1234,
                         'Expected specific identifier')

        self.assertEqual(mcf['identification']['title_en'], 'title in English',
                         'Expected specific title')

        self.assertIsInstance(mcf, dict, 'Expected dict')
Пример #14
0
    def test_pretty_print(self):
        """Test pretty-printing"""

        iso_os = ISO19139OutputSchema()

        xml = render_j2_template(read_mcf(get_abspath('../sample.yml')),
                                 iso_os.template_dir)
        xml2 = pretty_print(xml)

        self.assertIsInstance(xml2, str, 'Expected unicode string')
        self.assertEqual(xml2[-1], '>', 'Expected closing bracket')
        self.assertTrue(xml2.startswith('<?xml'), 'Expected XML declaration')
Пример #15
0
    def test_mcf_model(self):
        """test mcf model and types"""

        mcf = read_mcf(get_abspath('../sample.yml'))
        self.assertIsInstance(mcf['identification']['dates'], dict,
                              'Expected list')
        self.assertIsInstance(mcf['identification']['keywords'], dict,
                              'Expected dict')
        self.assertIsInstance(mcf['identification']['topiccategory'], list,
                              'Expected list')
        self.assertIsInstance(mcf['contact'], dict, 'Expected dict')
        self.assertIsInstance(mcf['distribution'], dict, 'Expected dict')
Пример #16
0
    def test_mcf_model(self):
        """test mcf model and types"""

        mcf = read_mcf(get_abspath('../sample.yml'))
        self.assertIsInstance(mcf['identification']['dates'], dict,
                              'Expected list')
        self.assertIsInstance(mcf['identification']['keywords'], dict,
                              'Expected dict')
        self.assertIsInstance(mcf['identification']['topiccategory'], list,
                              'Expected list')
        self.assertIsInstance(mcf['contact'], dict, 'Expected dict')
        self.assertIsInstance(mcf['distribution'], dict, 'Expected dict')
Пример #17
0
    def test_nested_mcf(self):
        """test nested mcf support"""

        mcf = read_mcf(get_abspath('child.yml'))

        self.assertEqual(mcf['metadata']['identifier'], 1234,
                         'Expected specific identifier')

        self.assertEqual(mcf['identification']['title_en'],
                         'title in English',
                         'Expected specific title')

        self.assertIsInstance(mcf, dict, 'Expected dict')
Пример #18
0
    def test_deep_nested_mcf(self):
        """test deep nested mcf support"""

        mcf = read_mcf(get_abspath('deep-nest-child.yml'))

        self.assertEqual(mcf['metadata']['identifier'], 'MYID',
                         'Expected specific identifier')

        self.assertEqual(mcf['identification']['title_en'], 'child title',
                         'Expected specific title')
        self.assertEqual(mcf['distribution']['waf']['url'],
                         'http://dd.meteo.gc.ca', 'Expected specific URL')

        self.assertEqual(mcf['contact']['main']['positionname'],
                         'Senior Systems Scientist', 'Expected specific name')
Пример #19
0
    def test_deep_nested_mcf(self):
        """test deep nested mcf support"""

        mcf = read_mcf(get_abspath('deep-nest-child.yml'))

        self.assertEqual(mcf['metadata']['identifier'], 'MYID',
                         'Expected specific identifier')

        self.assertEqual(mcf['identification']['title_en'], 'child title',
                         'Expected specific title')
        self.assertEqual(mcf['distribution']['waf']['url'],
                         'http://dd.meteo.gc.ca', 'Expected specific URL')

        self.assertEqual(mcf['contact']['main']['positionname'],
                         'Senior Systems Scientist', 'Expected specific name')
Пример #20
0
    def test_nested_mcf(self):
        """test nested mcf support"""

        mcf = read_mcf(get_abspath('child.yml'))

        self.assertEqual(mcf['metadata']['identifier'], 5678,
                         'Expected specific identifier')

        self.assertEqual(mcf['distribution']['waf']['type'], 'WWW:LINK',
                         'Expected specific distribution type')

        self.assertEqual(mcf['distribution']['waf']['url'],
                         'http://example.org/waf',
                         'Expected specific distribution url')

        self.assertEqual(mcf['metadata']['datestamp'],
                         datetime.date(2011, 11, 11),
                         'Expected specific metadata datestamp')

        self.assertIsInstance(mcf, dict, 'Expected dict')
Пример #21
0
    def test_nested_mcf(self):
        """test nested mcf support"""

        mcf = read_mcf(get_abspath('child.yml'))

        self.assertEqual(mcf['metadata']['identifier'], 5678,
                         'Expected specific identifier')

        self.assertEqual(mcf['distribution']['waf']['type'], 'WWW:LINK',
                         'Expected specific distribution type')

        self.assertEqual(mcf['distribution']['waf']['url'],
                         'http://example.org/waf',
                         'Expected specific distribution url')

        self.assertEqual(mcf['metadata']['datestamp'],
                         datetime.date(2011, 11, 11),
                         'Expected specific metadata datestamp')

        self.assertIsInstance(mcf, dict, 'Expected dict')
Пример #22
0
    def test_render_j2_template(self):
        """test template rendering"""

        test_mcf_paths = [
            '../sample.yml', 'unilingual.yml',
            'nil-identification-language.yml'
        ]

        for mcf_path in test_mcf_paths:

            iso_os = ISO19139OutputSchema()

            # working template directory
            xml = render_j2_template(read_mcf(get_abspath(mcf_path)),
                                     iso_os.template_dir)
            self.assertIsInstance(xml, str, 'Expected unicode string')

            # no template directory or local schema provided
            with self.assertRaises(RuntimeError):
                render_j2_template(read_mcf(get_abspath(mcf_path)))

            # bad template directory provided
            with self.assertRaises(RuntimeError):
                xml = render_j2_template(read_mcf(get_abspath(mcf_path)),
                                         'bad_dir')

            # bad j2 template_dir provided
            with self.assertRaises(RuntimeError):
                xml = render_j2_template(read_mcf(get_abspath(mcf_path)),
                                         template_dir='/bad_schema/path')

            # good j2 template_dir provided
            xml = render_j2_template(
                read_mcf(get_abspath(mcf_path)),
                template_dir=get_abspath('sample_schema_j2'))  # noqa

            # good sample output schema
            s_os = SampleOutputSchema()
            _ = s_os.write(read_mcf(get_abspath(mcf_path)))
Пример #23
0
def _describe_file(filepath):
    """
    Helper function to describe a geospatial data
    First checks if a sidecar mcf file is available, if so uses that
    if not, script will parse the file to retrieve some info from the file

    :param filepath: path to file

    :returns: `dict` of GeoJSON item
    """

    content = {'bbox': None, 'geometry': None, 'properties': {}}

    mcf_file = '{}.yml'.format(os.path.splitext(filepath)[0])

    if os.path.isfile(mcf_file):
        try:
            from pygeometa.core import read_mcf, MCFReadError
            from pygeometa.schemas.stac import STACItemOutputSchema

            md = read_mcf(mcf_file)
            stacjson = STACItemOutputSchema.write(STACItemOutputSchema, md)
            stacdata = loads(stacjson)
            for k, v in stacdata.items():
                content[k] = v
        except ImportError:
            LOGGER.debug('pygeometa not found')
        except MCFReadError as err:
            LOGGER.warning('MCF error: {}'.format(err))
    else:
        LOGGER.debug('No mcf found at: {}'.format(mcf_file))

    if content['geometry'] is None and content['bbox'] is None:
        try:
            import rasterio
            from rasterio.crs import CRS
            from rasterio.warp import transform_bounds
        except ImportError as err:
            LOGGER.warning('rasterio not found')
            LOGGER.warning(err)
            return content

        try:
            import fiona
        except ImportError as err:
            LOGGER.warning('fiona not found')
            LOGGER.warning(err)
            return content

        try:  # raster
            LOGGER.debug('Testing raster data detection')
            d = rasterio.open(filepath)
            content['bbox'] = [
                d.bounds.left, d.bounds.bottom, d.bounds.right, d.bounds.top
            ]
            content['geometry'] = {
                'type':
                'Polygon',
                'coordinates': [[[d.bounds.left, d.bounds.bottom],
                                 [d.bounds.left, d.bounds.top],
                                 [d.bounds.right, d.bounds.top],
                                 [d.bounds.right, d.bounds.bottom],
                                 [d.bounds.left, d.bounds.bottom]]]
            }
            for k, v in d.tags(d.count).items():
                content['properties'][k] = v
                if k in ['GRIB_REF_TIME']:
                    value = int(v.split()[0])
                    datetime_ = datetime.fromtimestamp(value)
                    content['properties']['datetime'] = datetime_.isoformat(
                    ) + 'Z'  # noqa
        except rasterio.errors.RasterioIOError:
            try:
                LOGGER.debug('Testing vector data detection')
                d = fiona.open(filepath)
                scrs = CRS(d.crs)
                if scrs.to_epsg() is not None and scrs.to_epsg() != 4326:
                    tcrs = CRS.from_epsg(4326)
                    bnds = transform_bounds(scrs, tcrs, d.bounds[0],
                                            d.bounds[1], d.bounds[2],
                                            d.bounds[3])
                    content['properties']['projection'] = scrs.to_epsg()
                else:
                    bnds = d.bounds

                if d.schema['geometry'] not in [None, 'None']:
                    content['bbox'] = [bnds[0], bnds[1], bnds[2], bnds[3]]
                    content['geometry'] = {
                        'type':
                        'Polygon',
                        'coordinates':
                        [[[bnds[0], bnds[1]], [bnds[0], bnds[3]],
                          [bnds[2], bnds[3]], [bnds[2], bnds[1]],
                          [bnds[0], bnds[1]]]]
                    }

                for k, v in d.schema['properties'].items():
                    content['properties'][k] = v

                if d.driver == 'ESRI Shapefile':
                    id_ = os.path.splitext(os.path.basename(filepath))[0]
                    content['assets'] = {}
                    for suffix in ['shx', 'dbf', 'prj']:
                        fullpath = '{}.{}'.format(
                            os.path.splitext(filepath)[0], suffix)

                        if os.path.exists(fullpath):
                            filectime = file_modified_iso8601(fullpath)
                            filesize = os.path.getsize(fullpath)

                            content['assets'][suffix] = {
                                'href': './{}.{}'.format(id_, suffix),
                                'created': filectime,
                                'file:size': filesize
                            }

            except fiona.errors.DriverError:
                LOGGER.debug('Could not detect raster or vector data')

    return content
Пример #24
0
def indexDir(dir, dir_out, dir_out_mode, mode, dbtype, profile, db):
    if not dir:
        dir = "."
    if not dir_out_mode or dir_out_mode not in ["flat", "nested"]:
        dir_out_mode = "flat"
    if not mode or mode not in ["init", "update", "export"]:
        mode = "init"
    if not dbtype or dbtype not in ["path", "sqlite", "postgres"]:
        dbtype = "path"
    if not db:
        db = dir
    if not profile or profile not in ["iso19139", "dcat"]:
        profile = "iso19139"
    print(mode + ' metadata in ' + dir + ' as ' + profile + ' in ' + db)

    if mode == "export":
        if dbtype == 'sqlite':
            dir_out = os.path.join(dir_out, db)
            createIndexIfDoesntExist(dir_out)
        elif dbtype == "path":
            if not os.path.exists(dir_out):
                print('creating out folder ' + dir_out)
                os.makedirs(dir_out)
        else:
            print("postgis not supported")

    # core metadata gets populated by merging the index.yaml content from parent folders
    coreMetadata = {}
    # identify if there is a path change
    prvPath = "dummy"

    for path, dirs, files in os.walk(dir):
        if mode == 'export':
            # if dir has index.yaml merge it to parent
            f = os.path.join(path, 'index.yaml')
            if os.path.exists(f):
                if prvPath != path:
                    print('Indexing path ' + path)
                    prvPath = path
                    with open(os.path.join(f), mode="r",
                              encoding="utf-8") as yf:
                        pathMetadata = yaml.load(yf, Loader=SafeLoader)
                        pathMetadata.pop('index')
                        pathMetadata.pop('mode')
                        dict_merge(pathMetadata, coreMetadata)
                        coreMetadata = pathMetadata
            else:
                print(f + ' does not exist')  # create it?
        for file in files:
            fname = os.path.join(path, file)
            if '.' in file:
                base, extension = file.rsplit('.', 1)
                if extension.lower() in SPATIAL_FILE_TYPES:
                    print('Indexing file ' + fname)
                    yf = os.path.join(path, base + '.yaml')
                    if (mode == 'update'
                            or (not os.path.exists(yf) and mode == 'init')):
                        # mode init for spatial files without metadata or update
                        cnt = indexSpatialFile(fname, extension)
                        if (mode == 'update'
                            ):  # keep manual changes on the original
                            try:
                                with open(os.path.join(yf),
                                          mode="r",
                                          encoding="utf-8") as f:
                                    orig = yaml.load(f, Loader=SafeLoader)
                                    dict_merge(
                                        orig, cnt
                                    )  # or should we overwrite some values from cnt explicitely?
                                    cnt = orig
                            except Exception as e:
                                print('Failed to merge original:', f, e)
                        md = asPGM(cnt)
                        # write yf
                        try:
                            with open(os.path.join(yf), 'w') as f:
                                yaml.dump(md, f, sort_keys=False)
                        except Exception as e:
                            print('Failed to dump yaml:', e)
                    elif mode == 'export':
                        try:
                            with open(os.path.join(yf),
                                      mode="r",
                                      encoding="utf-8") as f:
                                cnf = yaml.load(f, Loader=SafeLoader)
                                dict_merge(cnf, coreMetadata)
                                if dbtype == 'sqlite' or dbtype == 'postgres':
                                    insert_or_update(cnt, dir_out)
                                elif dbtype == "path":
                                    #load yml as mcf
                                    md = read_mcf(cnf)
                                    #yaml to iso/dcat
                                    if schemaPath and os.path.exists(
                                            schemaPath):
                                        print('Using schema', schemaPath)
                                        xml_string = render_j2_template(
                                            md,
                                            template_dir="{}/iso19139".format(
                                                schemaPath))
                                    else:
                                        print('Using default iso19139 schema')
                                        iso_os = ISO19139OutputSchema()
                                        xml_string = iso_os.write(md)
                                    if dir_out_mode == "flat":
                                        pth = os.path.join(
                                            dir_out,
                                            cnf['metadata']['identifier'] +
                                            '.xml')
                                    else:
                                        pth = os.path.join(path, base + '.xml')
                                    print("write to file: " + pth)
                                    with open(pth, 'w+') as ff:
                                        ff.write(xml_string)
                                        print('iso19139 xml generated at ' +
                                              pth)
                        except Exception as e:
                            print('Failed to create xml:', e)
                else:
                    None
                    # print('Skipping {}, not approved file type: {}'.format(fname, extension))
            else:
                None