def test_read_mcf(self): """Test reading MCFs, strings or dict""" # test as file with self.assertRaises(IOError): mcf = read_mcf(get_abspath('../404.yml')) mcf = read_mcf(get_abspath('../sample.yml')) self.assertIsInstance(mcf, dict, 'Expected dict') # test MCF section self.assertTrue('version' in mcf['mcf'], 'Expected MCF version') self.assertTrue('metadata' in mcf, 'Expected metadata section') # test as string with open(get_abspath('../sample.yml')) as fh: mcf_string = fh.read() mcf = read_mcf(mcf_string) self.assertTrue('metadata' in mcf, 'Expected metadata section') # test as dict mcf_dict = yaml.load(mcf_string) mcf = read_mcf(mcf_dict) self.assertTrue('metadata' in mcf, 'Expected metadata section')
def test_mcf_version(self): """Test MCF version validation""" with self.assertRaises(MCFReadError): read_mcf(get_abspath('missing-version.yml')) with self.assertRaises(MCFReadError): read_mcf(get_abspath('bad-version.yml'))
def test_json_output_schema(self): """test JSON as dict-based output schemas""" mcf = read_mcf(get_abspath('../sample.yml')) record = OGCAPIRecordOutputSchema().write(mcf) self.assertIsInstance(record, str) mcf = read_mcf(get_abspath('../sample.yml')) record = OGCAPIRecordOutputSchema().write(mcf, stringify=False) self.assertIsInstance(record, dict)
def test_wmo_wigos(self): """test WMO WIGOS Metadata support""" mcf = read_mcf(get_abspath('../sample-wmo-wigos.yml')) self.assertEqual(len(mcf['facility'].keys()), 1) self.assertEqual( len(mcf['facility']['first_station']['spatiotemporal']), 1)
def test_19139_2(self): """test ISO 19139-2 Metadata support""" mcf = read_mcf(get_abspath('../sample.yml')) self.assertIn('acquisition', mcf) self.assertIn('platforms', mcf['acquisition']) self.assertIn('instruments', mcf['acquisition']['platforms'][0])
def test_broken_yaml(self): """test against broken YAML""" iso_os = ISO19139OutputSchema() with self.assertRaises(MCFReadError): render_j2_template(read_mcf(get_abspath('broken-yaml.yml')), iso_os.template_dir)
def test_pre1900_dates(self): """test datestrings that are pre-1900""" iso_os = ISO19139OutputSchema() xml = render_j2_template(read_mcf(get_abspath('dates-pre-1900.yml')), iso_os.template_dir) self.assertIsInstance(xml, str, 'Expected unicode string')
def load_collection_level_metadata(self): logger.debug('Loading collection level metadata') for clm in os.listdir(COLLECTION_LEVEL_METADATA): logger.debug(f'collection metadata file: {clm}') clm_ = os.path.join(COLLECTION_LEVEL_METADATA, clm) clm_mcf = read_mcf(clm_) clm_iso = ISO19139OutputSchema().write(clm_mcf) logger.debug(f'Upserting metadata: {clm_}') self._parse_and_upsert_metadata(clm_iso)
def test_validate_mcf(self): """test MCF validation""" mcf = read_mcf(get_abspath('../sample.yml')) instance = json.loads(json.dumps(mcf, default=json_serial)) is_valid = validate_mcf(instance) assert is_valid # validated nested MCF mcf = read_mcf(get_abspath('./sample-child.yml')) instance = json.loads(json.dumps(mcf, default=json_serial)) is_valid = validate_mcf(instance) assert is_valid with self.assertRaises(MCFValidationError): is_valid = validate_mcf({'foo': 'bar'})
def test_nested_mcf(self): """test nested mcf support""" mcf = read_mcf(get_abspath('child.yml')) self.assertEqual(mcf['metadata']['identifier'], 1234, 'Expected specific identifier') self.assertEqual(mcf['identification']['title_en'], 'title in English', 'Expected specific title') self.assertIsInstance(mcf, dict, 'Expected dict')
def test_pretty_print(self): """Test pretty-printing""" iso_os = ISO19139OutputSchema() xml = render_j2_template(read_mcf(get_abspath('../sample.yml')), iso_os.template_dir) xml2 = pretty_print(xml) self.assertIsInstance(xml2, str, 'Expected unicode string') self.assertEqual(xml2[-1], '>', 'Expected closing bracket') self.assertTrue(xml2.startswith('<?xml'), 'Expected XML declaration')
def test_mcf_model(self): """test mcf model and types""" mcf = read_mcf(get_abspath('../sample.yml')) self.assertIsInstance(mcf['identification']['dates'], dict, 'Expected list') self.assertIsInstance(mcf['identification']['keywords'], dict, 'Expected dict') self.assertIsInstance(mcf['identification']['topiccategory'], list, 'Expected list') self.assertIsInstance(mcf['contact'], dict, 'Expected dict') self.assertIsInstance(mcf['distribution'], dict, 'Expected dict')
def test_deep_nested_mcf(self): """test deep nested mcf support""" mcf = read_mcf(get_abspath('deep-nest-child.yml')) self.assertEqual(mcf['metadata']['identifier'], 'MYID', 'Expected specific identifier') self.assertEqual(mcf['identification']['title_en'], 'child title', 'Expected specific title') self.assertEqual(mcf['distribution']['waf']['url'], 'http://dd.meteo.gc.ca', 'Expected specific URL') self.assertEqual(mcf['contact']['main']['positionname'], 'Senior Systems Scientist', 'Expected specific name')
def test_nested_mcf(self): """test nested mcf support""" mcf = read_mcf(get_abspath('child.yml')) self.assertEqual(mcf['metadata']['identifier'], 5678, 'Expected specific identifier') self.assertEqual(mcf['distribution']['waf']['type'], 'WWW:LINK', 'Expected specific distribution type') self.assertEqual(mcf['distribution']['waf']['url'], 'http://example.org/waf', 'Expected specific distribution url') self.assertEqual(mcf['metadata']['datestamp'], datetime.date(2011, 11, 11), 'Expected specific metadata datestamp') self.assertIsInstance(mcf, dict, 'Expected dict')
def test_render_j2_template(self): """test template rendering""" test_mcf_paths = [ '../sample.yml', 'unilingual.yml', 'nil-identification-language.yml' ] for mcf_path in test_mcf_paths: iso_os = ISO19139OutputSchema() # working template directory xml = render_j2_template(read_mcf(get_abspath(mcf_path)), iso_os.template_dir) self.assertIsInstance(xml, str, 'Expected unicode string') # no template directory or local schema provided with self.assertRaises(RuntimeError): render_j2_template(read_mcf(get_abspath(mcf_path))) # bad template directory provided with self.assertRaises(RuntimeError): xml = render_j2_template(read_mcf(get_abspath(mcf_path)), 'bad_dir') # bad j2 template_dir provided with self.assertRaises(RuntimeError): xml = render_j2_template(read_mcf(get_abspath(mcf_path)), template_dir='/bad_schema/path') # good j2 template_dir provided xml = render_j2_template( read_mcf(get_abspath(mcf_path)), template_dir=get_abspath('sample_schema_j2')) # noqa # good sample output schema s_os = SampleOutputSchema() _ = s_os.write(read_mcf(get_abspath(mcf_path)))
def _describe_file(filepath): """ Helper function to describe a geospatial data First checks if a sidecar mcf file is available, if so uses that if not, script will parse the file to retrieve some info from the file :param filepath: path to file :returns: `dict` of GeoJSON item """ content = {'bbox': None, 'geometry': None, 'properties': {}} mcf_file = '{}.yml'.format(os.path.splitext(filepath)[0]) if os.path.isfile(mcf_file): try: from pygeometa.core import read_mcf, MCFReadError from pygeometa.schemas.stac import STACItemOutputSchema md = read_mcf(mcf_file) stacjson = STACItemOutputSchema.write(STACItemOutputSchema, md) stacdata = loads(stacjson) for k, v in stacdata.items(): content[k] = v except ImportError: LOGGER.debug('pygeometa not found') except MCFReadError as err: LOGGER.warning('MCF error: {}'.format(err)) else: LOGGER.debug('No mcf found at: {}'.format(mcf_file)) if content['geometry'] is None and content['bbox'] is None: try: import rasterio from rasterio.crs import CRS from rasterio.warp import transform_bounds except ImportError as err: LOGGER.warning('rasterio not found') LOGGER.warning(err) return content try: import fiona except ImportError as err: LOGGER.warning('fiona not found') LOGGER.warning(err) return content try: # raster LOGGER.debug('Testing raster data detection') d = rasterio.open(filepath) content['bbox'] = [ d.bounds.left, d.bounds.bottom, d.bounds.right, d.bounds.top ] content['geometry'] = { 'type': 'Polygon', 'coordinates': [[[d.bounds.left, d.bounds.bottom], [d.bounds.left, d.bounds.top], [d.bounds.right, d.bounds.top], [d.bounds.right, d.bounds.bottom], [d.bounds.left, d.bounds.bottom]]] } for k, v in d.tags(d.count).items(): content['properties'][k] = v if k in ['GRIB_REF_TIME']: value = int(v.split()[0]) datetime_ = datetime.fromtimestamp(value) content['properties']['datetime'] = datetime_.isoformat( ) + 'Z' # noqa except rasterio.errors.RasterioIOError: try: LOGGER.debug('Testing vector data detection') d = fiona.open(filepath) scrs = CRS(d.crs) if scrs.to_epsg() is not None and scrs.to_epsg() != 4326: tcrs = CRS.from_epsg(4326) bnds = transform_bounds(scrs, tcrs, d.bounds[0], d.bounds[1], d.bounds[2], d.bounds[3]) content['properties']['projection'] = scrs.to_epsg() else: bnds = d.bounds if d.schema['geometry'] not in [None, 'None']: content['bbox'] = [bnds[0], bnds[1], bnds[2], bnds[3]] content['geometry'] = { 'type': 'Polygon', 'coordinates': [[[bnds[0], bnds[1]], [bnds[0], bnds[3]], [bnds[2], bnds[3]], [bnds[2], bnds[1]], [bnds[0], bnds[1]]]] } for k, v in d.schema['properties'].items(): content['properties'][k] = v if d.driver == 'ESRI Shapefile': id_ = os.path.splitext(os.path.basename(filepath))[0] content['assets'] = {} for suffix in ['shx', 'dbf', 'prj']: fullpath = '{}.{}'.format( os.path.splitext(filepath)[0], suffix) if os.path.exists(fullpath): filectime = file_modified_iso8601(fullpath) filesize = os.path.getsize(fullpath) content['assets'][suffix] = { 'href': './{}.{}'.format(id_, suffix), 'created': filectime, 'file:size': filesize } except fiona.errors.DriverError: LOGGER.debug('Could not detect raster or vector data') return content
def indexDir(dir, dir_out, dir_out_mode, mode, dbtype, profile, db): if not dir: dir = "." if not dir_out_mode or dir_out_mode not in ["flat", "nested"]: dir_out_mode = "flat" if not mode or mode not in ["init", "update", "export"]: mode = "init" if not dbtype or dbtype not in ["path", "sqlite", "postgres"]: dbtype = "path" if not db: db = dir if not profile or profile not in ["iso19139", "dcat"]: profile = "iso19139" print(mode + ' metadata in ' + dir + ' as ' + profile + ' in ' + db) if mode == "export": if dbtype == 'sqlite': dir_out = os.path.join(dir_out, db) createIndexIfDoesntExist(dir_out) elif dbtype == "path": if not os.path.exists(dir_out): print('creating out folder ' + dir_out) os.makedirs(dir_out) else: print("postgis not supported") # core metadata gets populated by merging the index.yaml content from parent folders coreMetadata = {} # identify if there is a path change prvPath = "dummy" for path, dirs, files in os.walk(dir): if mode == 'export': # if dir has index.yaml merge it to parent f = os.path.join(path, 'index.yaml') if os.path.exists(f): if prvPath != path: print('Indexing path ' + path) prvPath = path with open(os.path.join(f), mode="r", encoding="utf-8") as yf: pathMetadata = yaml.load(yf, Loader=SafeLoader) pathMetadata.pop('index') pathMetadata.pop('mode') dict_merge(pathMetadata, coreMetadata) coreMetadata = pathMetadata else: print(f + ' does not exist') # create it? for file in files: fname = os.path.join(path, file) if '.' in file: base, extension = file.rsplit('.', 1) if extension.lower() in SPATIAL_FILE_TYPES: print('Indexing file ' + fname) yf = os.path.join(path, base + '.yaml') if (mode == 'update' or (not os.path.exists(yf) and mode == 'init')): # mode init for spatial files without metadata or update cnt = indexSpatialFile(fname, extension) if (mode == 'update' ): # keep manual changes on the original try: with open(os.path.join(yf), mode="r", encoding="utf-8") as f: orig = yaml.load(f, Loader=SafeLoader) dict_merge( orig, cnt ) # or should we overwrite some values from cnt explicitely? cnt = orig except Exception as e: print('Failed to merge original:', f, e) md = asPGM(cnt) # write yf try: with open(os.path.join(yf), 'w') as f: yaml.dump(md, f, sort_keys=False) except Exception as e: print('Failed to dump yaml:', e) elif mode == 'export': try: with open(os.path.join(yf), mode="r", encoding="utf-8") as f: cnf = yaml.load(f, Loader=SafeLoader) dict_merge(cnf, coreMetadata) if dbtype == 'sqlite' or dbtype == 'postgres': insert_or_update(cnt, dir_out) elif dbtype == "path": #load yml as mcf md = read_mcf(cnf) #yaml to iso/dcat if schemaPath and os.path.exists( schemaPath): print('Using schema', schemaPath) xml_string = render_j2_template( md, template_dir="{}/iso19139".format( schemaPath)) else: print('Using default iso19139 schema') iso_os = ISO19139OutputSchema() xml_string = iso_os.write(md) if dir_out_mode == "flat": pth = os.path.join( dir_out, cnf['metadata']['identifier'] + '.xml') else: pth = os.path.join(path, base + '.xml') print("write to file: " + pth) with open(pth, 'w+') as ff: ff.write(xml_string) print('iso19139 xml generated at ' + pth) except Exception as e: print('Failed to create xml:', e) else: None # print('Skipping {}, not approved file type: {}'.format(fname, extension)) else: None