def test_group_array_geo(self): with self.temp_filename('.nc') as tfile: name = 'group_array_geo' # hgurl = 'https://www.unidata.ucar.edu/software/netcdf/examples/test_hgroups.cdl' # res = requests.get(hgurl) # if res.status_code != 200: # raise ValueError('{} failed to download: {}'.format(hgurl, res.status_code)) # with self.temp_filename('.cdl.') as cdlfile: # with open(cdlfile, 'w') as fh: # fh.write(res.text) cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name)) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'file://CDL/{}.cdl'.format(name) alias_dict = { # 'NetCDF': 'http://def.scitools.org.uk/NetCDF', # 'CFTerms': 'http://def.scitools.org.uk/CFTerms', 'cf_sname': 'http://vocab.nerc.ac.uk/standard_name/' } root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, alias_dict=alias_dict, cache=self.acache) rdfgraph = root_container.rdfgraph() ttl = rdfgraph.serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: sf.write(ttl) with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: expected_rdfgraph = rdflib.Graph() expected_rdfgraph.parse(sf, format='n3') self.check_result(rdfgraph, expected_rdfgraph)
def test_votemper(self): with self.temp_filename('.nc') as tfile: name = 'votemper' cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name)) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'file://CDL/{}.cdl'.format(name) alias_dict = { 'NetCDF': 'http://def.scitools.org.uk/NetCDF', 'CFTerms': 'http://def.scitools.org.uk/CFTerms', 'cf_sname': 'http://vocab.nerc.ac.uk/standard_name/' } root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, alias_dict=alias_dict, cache=self.acache) rdfgraph = root_container.rdfgraph() ttl = rdfgraph.serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: sf.write(ttl) with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: expected_rdfgraph = rdflib.Graph() expected_rdfgraph.parse(sf, format='n3') self.check_result(rdfgraph, expected_rdfgraph)
def test_array_reference_external_prefix_context(self): with self.temp_filename('.nc') as tfile: cdlname = 'array_reference_external_prefix_context.cdl' cdl_file = os.path.join(self.cdl_path, cdlname) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'file://CDL/{}'.format(cdlname) prefix_context = json.dumps({ '@context': { 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#', 'bald': 'https://www.opengis.net/def/binary-array-ld/' } }) root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, prefix_contexts=prefix_context, cache=self.acache) rdfgraph = root_container.rdfgraph() ttl = rdfgraph.serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open( os.path.join( self.ttl_path, 'array_reference_external_prefix_context.ttl'), 'w') as sf: sf.write(ttl) with open( os.path.join( self.ttl_path, 'array_reference_external_prefix_context.ttl'), 'r') as sf: expected_rdfgraph = rdflib.Graph() expected_rdfgraph.parse(sf, format='n3') self.check_result(rdfgraph, expected_rdfgraph)
def test_conformance_c(self): with self.temp_filename('.nc') as tfile: cdlname = 'ogcClassC.cdl' cdl_file = os.path.join(self.cdl_path, cdlname) with open(cdl_file, 'w') as cdlf: cdluri = '{}CDL/ogcClassC.cdl'.format(OGCFiles) r = requests.get(cdluri) if r.status_code != 200: raise ValueError( 'CDL download failed for {}'.format(cdluri)) cdlf.write(r.text) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'http://secret.binary-array-ld.net/alias.nc' alias_dict = {'NetCDF': 'http://def.scitools.org.uk/NetCDF'} root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, alias_dict=alias_dict, cache=self.acache) ttl = root_container.rdfgraph().serialize( format='n3').decode("utf-8") ttl_file = os.path.join(self.ttl_path, 'ogcClassA.ttl') with open(ttl_file, 'w') as ttlf: ttluri = '{}TTL/ogcClassC.ttl'.format(OGCFiles) r = requests.get(ttluri) if r.status_code != 200: raise ValueError( 'TTL download failed for {}'.format(ttluri)) ttlf.write(r.text) with open(ttl_file, 'r') as sf: expected_ttl = sf.read() os.remove(ttl_file) os.remove(cdl_file) self.assertEqual(expected_ttl, ttl)
def nc2schemaorg(ncfilename, outformat, baseuri=None): root_container = bald.load_netcdf(ncfilename, baseuri=baseuri) graph = root_container.rdfgraph() schema_g = baldgraph2schemaorg(graph, path=ncfilename, baseuri=baseuri) if (outformat == 'json-ld'): context = "http://schema.org/" s = schema_g.serialize(format=outformat, context=context, indent=4) else: s = schema_g.serialize(format=outformat).decode("utf-8") print(s)
def nc2schemaorg(ncfilename, outformat, baseuri=None): root_container = bald.load_netcdf(ncfilename, baseuri=baseuri) graph = root_container.rdfgraph() schema_g = baldgraph2schemaorg(graph, path=ncfilename, baseuri=baseuri) if(outformat == 'json-ld'): context = "http://schema.org/" s = schema_g.serialize(format=outformat, context=context, indent=4).decode("utf-8") else: s = schema_g.serialize(format=outformat).decode("utf-8") print(s)
def test_load(self): cdlfile = os.path.join(self.cdl_path, 'multi_array_reference.cdl') with self.temp_filename('.nc') as tfile: subprocess.check_call(['ncgen', '-o', tfile, cdlfile]) inputs = bald.load_netcdf(tfile, baseuri='file://CDL/multi_array_reference.nc', cache=self.acache) for contained in inputs.bald__contains: if contained.identity == 'file://CDL/multi_array_reference.nc/list_collection': self.assertTrue(isinstance(contained.bald__references, list)) elif contained.identity == 'file://CDL/multi_array_reference.nc/set_collection': self.assertTrue(isinstance(contained.bald__references, set))
def test_array_reference(self): with self.temp_filename('.nc') as tfile: cdl_file = os.path.join(self.cdl_path, 'array_reference.cdl') subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf(tfile) html = root_container.viewgraph() # with open(os.path.join(self.html_path, 'array_reference.html'), 'w') as sf: # sf.write(html) with open(os.path.join(self.html_path, 'array_reference.html'), 'r') as sf: expected_html = sf.read() self.assertStringEqual(expected_html, html)
def test_array_reference_with_baseuri(self): with self.temp_filename('.nc') as tfile: cdl_file = os.path.join(self.cdl_path, 'array_reference.cdl') subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf(tfile, baseuri='http://example.org/base', cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open(os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'w') as sf: sf.write(ttl) with open(os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl)
def test_multi_array_reference(self): with self.temp_filename('.nc') as tfile: cdl_file = os.path.join(self.cdl_path, 'multi_array_reference.cdl') subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf(tfile) ttl = root_container.rdfgraph().serialize( format='n3').decode("utf-8") # with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'w') as sf: # sf.write(ttl) with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl)
def test_turtle(self): with self.temp_filename('.nc') as tfile: cdlname = 'multi_array_reference.cdl' cdl_file = os.path.join(self.cdl_path, cdlname) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'file://CDL/{}'.format(cdlname) root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'w') as sf: sf.write(ttl) with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl)
def test_multi_array_reference(self): with self.temp_filename('.nc') as tfile: cdlname = 'multi_array_reference.cdl' cdl_file = os.path.join(self.cdl_path, cdlname) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'file://CDL/{}'.format(cdlname) root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, cache=self.acache) html = root_container.viewgraph() # with open(os.path.join(self.html_path, 'multi_array_reference.html'), 'w') as sf: # sf.write(html) with open(os.path.join(self.html_path, 'multi_array_reference.html'), 'r') as sf: expected_html = sf.read() self.assertStringEqual(expected_html, html)
def test_ereefs(self): with self.temp_filename('.nc') as tfile: name = 'ereefs_gbr4_ncld' cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name)) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'file://CDL/{}.cdl'.format(name) root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: sf.write(ttl) with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl)
def generateMetadata(targetUrl, targetRemote, targetBucket, stagingPrefix, metadataVocab, metadataFormat): access_key = os.getenv('S3_ACCESS_KEY') secret_key = os.getenv('S3_SECRET_KEY') metadataExt = 'json' s3 = resource('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, endpoint_url=targetUrl) bucket = s3.Bucket(targetBucket) for s3object in bucket.objects.filter(Prefix=stagingPrefix): if s3object.key.endswith('/'): continue s3endpoint = 's3://' + targetRemote + '/' + targetBucket + '/' + s3object.key ncdf = bald.load_netcdf(s3endpoint) context = {'@vocab': metadataVocab} metadata = ncdf.rdfgraph().serialize(format=metadataFormat, context=context, indent=4).decode('utf-8') outputKey = os.path.splitext(s3object.key)[0] + '.' + metadataExt outputObject = s3.Object(targetBucket, outputKey) outputObject.put(Body=metadata)
def setUp(self): self.cdl_path = os.path.join(os.path.dirname(__file__), 'CDL') # Check to see if another test has already loaded the graph. if not loaded_boolean: # load bald graphs from cdl for cdl_file in glob.glob(os.path.join(self.cdl_path, '*.cdl')): with self.temp_filename('.nc') as tfile: subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf(tfile, cache=self.acache) curr_g = root_container.rdfgraph() #merge into graph in test obj thisGraph[0] = thisGraph[0] + curr_g print('setting loaded_boolean') loaded_boolean.append(True) self.graph = thisGraph[0]
def test_ProcessChain0300(self): with self.temp_filename('.nc') as tfile: name = 'ProcessChain0300' cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name)) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'file://CDL/{}.cdl'.format(name) alias_dict = {'CFTerms': 'http://def.scitools.org.uk/CFTerms', 'cf_sname': 'http://vocab.nerc.ac.uk/standard_name/' } root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, alias_dict=alias_dict, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: sf.write(ttl) with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl)
def generateMetadata(targetRemote, targetBucket, stagingPrefix, metadataVocab, metadataFormat): metadataExt = 'json' s3 = getTargetS3(targetRemote) bucket = s3.Bucket(targetBucket) s3uri = "s3://" + targetRemote for s3object in bucket.objects.filter(Prefix=stagingPrefix): if s3object.key.endswith('/'): continue s3endpoint = s3uri + '/' + targetBucket + '/' + s3object.key ncdf = bald.load_netcdf(s3endpoint) context = {'@vocab': metadataVocab} metadata = ncdf.rdfgraph().serialize(format=metadataFormat, context=context, indent=4) outputKey = os.path.splitext(s3object.key)[0] + '.' + metadataExt outputObject = s3.Object(targetBucket, outputKey) outputObject.put(Body=metadata, ContentType='application/json')
def test_array_reference(self): with self.temp_filename('.nc') as tfile: cdlname = 'array_reference.cdl' cdl_file = os.path.join(self.cdl_path, cdlname) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) cdl_file_uri = 'file://CDL/{}'.format(cdlname) root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, cache=self.acache) html = root_container.viewgraph() if os.environ.get('bald_update_results') is not None: with open(os.path.join(self.html_path, 'array_reference.html'), 'w') as sf: sf.write(html) with open(os.path.join(self.html_path, 'array_reference.html'), 'r') as sf: expected_html = sf.read() self.assertStringEqual(expected_html, html)
def test_array_reference_with_baseuri(self): with self.temp_filename('.nc') as tfile: cdl_file = os.path.join(self.cdl_path, 'array_reference.cdl') subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf( tfile, baseuri='http://example.org/base', cache=self.acache) rdfgraph = root_container.rdfgraph() ttl = rdfgraph.serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open( os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'w') as sf: sf.write(ttl) with open( os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'r') as sf: expected_rdfgraph = rdflib.Graph() expected_rdfgraph.parse(sf, format='n3') self.check_result(rdfgraph, expected_rdfgraph)
def test_hgroups_schema_dot_org(self): with self.temp_filename('.nc') as tfile: name = 'hgroups_schema.org' hgurl = 'https://www.unidata.ucar.edu/software/netcdf/examples/test_hgroups.cdl' baseuri = hgurl + '/' res = requests.get(hgurl) if res.status_code != 200: raise ValueError('{} failed to download: {}'.format( hgurl, res.status_code)) with self.temp_filename('.cdl.') as cdlfile: with open(cdlfile, 'w') as fh: fh.write(res.text) #cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name)) subprocess.check_call(['ncgen', '-o', tfile, cdlfile]) cdl_file_uri = 'file://CDL/{}.cdl'.format(name) alias_dict = { 'NetCDF': 'http://def.scitools.org.uk/NetCDF', 'CFTerms': 'http://def.scitools.org.uk/CFTerms', 'cf_sname': 'http://vocab.nerc.ac.uk/standard_name/' } root_container = bald.load_netcdf(tfile, baseuri=baseuri, alias_dict=alias_dict, cache=self.acache, file_locator=hgurl) rdfgraph = root_container.rdfgraph() schema_org_inst = bald.schemaOrg(rdfgraph, hgurl, baseuri).getSchemaOrgGraph() #rdfgraph = schema_org_inst.distribution(baseuri, rdfgraph, hgurl) ttl = schema_org_inst.serialize(format='n3').decode("utf-8") if os.environ.get('bald_update_results') is not None: with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: sf.write(ttl) with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: expected_rdfgraph = rdflib.Graph() expected_rdfgraph.parse(sf, format='n3') self.check_result(schema_org_inst, expected_rdfgraph)
def nc2rdf(ncfilename, outformat, baseuri=None): root_container = bald.load_netcdf(ncfilename, baseuri=baseuri) ttl = root_container.rdfgraph().serialize(format=outformat).decode("utf-8") print(ttl)
def nc2rdf(ncfilename, outformat): #print("nc2rdf test") #print(ncfile) root_container = bald.load_netcdf(ncfilename) ttl = root_container.rdfgraph().serialize(format=outformat).decode("utf-8") print(ttl)
def nc2rdf(ncfilename, outformat, baseuri=None): root_container = bald.load_netcdf(ncfilename, baseuri=baseuri) ttl = root_container.rdfgraph().serialize(format=outformat).decode("utf-8") print(ttl)