def test_url_fixing(self): conn1 = SearchConnection(self.test_service) conn2 = SearchConnection(self.test_service + '/') conn3 = SearchConnection(self.test_service + '///') conn4 = SearchConnection(self.test_service + '/search') conn5 = SearchConnection(self.test_service + '/search///') assert conn1.url == conn2.url == conn3.url == conn4.url == conn5.url
def test_url_fixing(): conn1 = SearchConnection(TEST_SERVICE) conn2 = SearchConnection(TEST_SERVICE+'/') conn3 = SearchConnection(TEST_SERVICE+'///') conn4 = SearchConnection(TEST_SERVICE+'/search') conn5 = SearchConnection(TEST_SERVICE+'/search///') assert conn1.url == conn2.url == conn3.url == conn4.url == conn5.url
def test_distrib(): conn = SearchConnection(TEST_SERVICE, distrib=False) context = conn.new_context(project='CMIP5') count1 = context.hit_count conn2 = SearchConnection(TEST_SERVICE, distrib=True) context = conn2.new_context(project='CMIP5') count2 = context.hit_count assert count1 < count2
def test_url_fixing(self): # Switch off warnings for this case because we are testing that issue import warnings warnings.simplefilter("ignore") conn1 = SearchConnection(self.test_service) conn2 = SearchConnection(self.test_service + '/') conn3 = SearchConnection(self.test_service + '///') conn4 = SearchConnection(self.test_service + '/search') conn5 = SearchConnection(self.test_service + '/search///') warnings.resetwarnings() assert conn1.url == conn2.url == conn3.url == conn4.url == conn5.url
def test_shards_constrain(self): # Test that a file-context constrains the shard list conn = SearchConnection(self.test_service, distrib=True) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() # !TODO: white-box test. Refactor. query_dict = f_ctx._build_query() full_query = f_ctx.connection._build_query(query_dict, shards=f_ctx.shards) # !TODO: Force fail to see whether shards is passed through. # NOTE: 'shards' is NOT even a key in this dictionary. Needs rewrite!!! q_shard = full_query['shards'] # Check it isn't a ',' separated list assert ',' not in q_shard q_shard_host = q_shard.split(':')[0] assert q_shard_host == r1.json['index_node'] # Now make the query to make sure it returns data from # the right index_node f_results = f_ctx.search() f_r1 = f_results[0] assert f_r1.json['index_node'] == r1.json['index_node']
def search_landsea_mask_by_esgf(resource): """ Searches a landsea mask (variable sftlf) in ESGF which matches the NetCDF attributes in the NetCDF files ``resource``. Raises an Exception if no mask is found. Returns the OpenDAP URL of the first found mask file. """ # fill search constraints from nc attributes ds = Dataset(resource) attributes = ds.ncattrs() constraints = dict(variable="sftlf") for attr, facet in ATTRIBUTE_TO_FACETS_MAP.iteritems(): if attr in attributes: constraints[facet] = ds.getncattr(attr) # run file search conn = SearchConnection(config.esgfsearch_url(), distrib=config.esgfsearch_distrib()) ctx = conn.new_context(search_type=TYPE_FILE, **constraints) if ctx.hit_count == 0: raise Exception("Could not find a mask in ESGF for dataset {0}".format( os.path.basename(resource))) # LOGGER.exception("Could not find a mask in ESGF.") # return if ctx.hit_count > 1: LOGGER.warn("Found more then one mask file.") results = ctx.search(batch_size=1) return results[0].opendap_url
def test_context_facets1(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model="IPSL-CM5A-LR") assert context2.facet_constraints['project'] == 'CMIP5' assert context2.facet_constraints['model'] == 'IPSL-CM5A-LR'
def test_get_shard_list(): conn = SearchConnection(TEST_SERVICE, distrib=True) shards = conn.get_shard_list() #!NOTE: the exact shard list will change depending on the shard replication configuration # on the test server assert 'esgf-index2.ceda.ac.uk' in shards # IPSL now replicates all non-local shards. Just check it has a few shards assert len(shards['esgf-index2.ceda.ac.uk']) > 3
def test_result1(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] assert r1.dataset_id == 'cmip5.output1.IPSL.IPSL-CM5A-LR.1pctCO2.3hr.atmos.3hr.r1i1p1.v20110427|vesg.ipsl.fr'
def test_connection_instance(self): import requests_cache td = datetime.timedelta(hours=1) session = requests_cache.core.CachedSession(self.cache, expire_after=td) with SearchConnection(self.test_service, session=session) as conn: context = conn.new_context(project='cmip5') assert context.facet_constraints['project'] == 'cmip5'
def test_result1(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] assert re.match(r'cmip5\.output1\.IPSL\..\|vesg.ipsl.fr', r1.dataset_id)
def test_constrain_freetext(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', query='humidity') assert context.freetext_constraint == 'humidity' context = context.constrain(experiment='historical') assert context.freetext_constraint == 'humidity'
def test_result1(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] assert re.match(r'cmip5\.output1\.MOHC\..+\|esgf-data1.ceda.ac.uk', r1.dataset_id)
def test_get_manifest(): conn = SearchConnection(CEDA_SERVICE, distrib=False) manifest = get_manifest( 'GeoMIP.output1.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1', 20120223, conn) filename = 'psl_day_HadGEM2-ES_G1_r1i1p1_19291201-19291230.nc' assert manifest[filename]['checksum'] == 'd20bbba8e05d6689f44cf3f8eebb9e7b'
def test_facet_count(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model="IPSL-CM5A-LR") counts = context2.facet_counts assert counts['model'].keys() == ['IPSL-CM5A-LR'] assert counts['project'].keys() == ['CMIP5']
def test_constrain(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') count1 = context.hit_count context = context.constrain(model="IPSL-CM5A-LR") count2 = context.hit_count assert count1 > count2
def test_context_facet_multivalue2(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', model='IPSL-CM5A-MR') assert context.facet_constraints.getall('model') == ['IPSL-CM5A-MR'] context2 = context.constrain(model=['IPSL-CM5A-MR', 'IPSL-CM5A-LR']) assert sorted(context2.facet_constraints.getall('model')) == [ 'IPSL-CM5A-LR', 'IPSL-CM5A-MR' ]
def test_index_node(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] service = urlparse(self.test_service) assert r1.index_node == service.hostname
def test_file_context(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() assert f_ctx.facet_constraints['dataset_id'] == r1.dataset_id
def test_get_manifest(): conn = SearchConnection(CEDA_SERVICE, distrib=False) manifest = get_manifest( 'GeoMIP.output.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1', 20120223, conn) filename = 'psl_day_HadGEM2-ES_G1_r1i1p1_19291201-19291230.nc' assert manifest[filename][ 'checksum'] == '5c459a61cfb904ca235ad1f796227114df095d9162a2a3f044bc01f881b532ce'
def test_download_url(): conn = SearchConnection(CEDA_SERVICE, distrib=False) ctx = conn.new_context() results = ctx.search( drs_id='GeoMIP.output.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1') files = results[0].file_context().search() download_url = files[0].download_url assert re.match(r'http://.*\.nc', download_url)
def test_context_facets3(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model="IPSL-CM5A-LR") results = context2.search() result = results[0] assert result.json['project'] == ['CMIP5'] assert result.json['model'] == ['IPSL-CM5A-LR']
def test_context_facets_multivalue(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model=['IPSL-CM5A-LR', 'IPSL-CM5A-MR']) assert context2.hit_count > 0 assert context2.facet_constraints['project'] == 'CMIP5' assert sorted(context2.facet_constraints.getall('model')) == [ 'IPSL-CM5A-LR', 'IPSL-CM5A-MR' ]
def test_get_shard_list(self): conn = SearchConnection(self.test_service, cache=self.cache, distrib=True) shards = conn.get_shard_list() # !NOTE: the exact shard list will change depending on the shard # replication configuration # on the test server assert 'esgf-index1.ceda.ac.uk' in shards # IPSL now replicates all non-local shards. # Just check it has a few shards assert len(shards['esgf-index1.ceda.ac.uk']) > 1
def test_context_facet_options(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', model='IPSL-CM5A-LR', ensemble='r1i1p1', experiment='rcp60', realm='seaIce') assert context.get_facet_options().keys() == [ 'product', 'cf_standard_name', 'variable_long_name', 'cmor_table', 'time_frequency', 'variable' ]
def test_other_index_node(): conn = SearchConnection(TEST_SERVICE, distrib=True) ctx = conn.new_context(project='CMIP5', institute='INM') results = ctx.search() r1 = results[0] service = urlparse(TEST_SERVICE) print 'index_node = %s' % r1.index_node assert r1.index_node is not None assert r1.index_node != service.hostname
def test_file_list2(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() file_results = f_ctx.search() for file_result in file_results: assert re.search(r'ds/.*\.nc', file_result.download_url)
def test_other_index_node(self): conn = SearchConnection(self.test_service, distrib=True) ctx = conn.new_context(project='CMIP5', institute='IPSL') results = ctx.search() r1 = results[0] service = urlparse(self.test_service) print(('index_node = %s' % r1.index_node)) assert r1.index_node is not None assert r1.index_node != service.hostname
def test_context_facet_multivalue3(): conn = SearchConnection(TEST_SERVICE) ctx = conn.new_context(project='CMIP5', query='humidity', experiment='rcp45') hits1 = ctx.hit_count assert hits1 > 0 ctx2 = conn.new_context(project='CMIP5', query='humidity', experiment=['rcp45', 'rcp85']) hits2 = ctx2.hit_count assert hits2 > hits1
def test_file_list2(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() file_results = f_ctx.search() for file_result in file_results: print file_result.url assert re.match(r'http://vesg.ipsl.fr/thredds/.*\.nc', file_result.url)