def __init__(self, **kwargs): # list to hold all the parameter names; will be accessed in super to # construct dependency graph self.provided_params = self.__required_params + self.__optional_params # check that all required params have been provided for param in self.__required_params: if param not in kwargs: raise GeoEDFError('Required parameter %s for DamFilter not provided' % param) # set all required parameters for key in self.__required_params: setattr(self,key,kwargs.get(key)) # set optional parameters for key in self.__optional_params: # if key not provided in optional arguments, defaults value to None setattr(self,key,kwargs.get(key,None)) # check if neither of the optional params have been provided # note that shapefile takes precedence if self.shapefile is None and self.extent is None: raise GeoEDFError('Either a shapefile path or extent needs to be provided for DamFilter') # initialize filter values array self.values = [] # class super class init super().__init__()
def filter(self): # set lat-lon limits for CONUS to check if state falls in there latmin = 24 latmax = 50 lonmin = -125 lonmax = -65 # load up the tiger lines shapefile; driver = ogr.GetDriverByName('ESRI Shapefile') inDataset = driver.Open(self.__states_shapefile, 0) if inDataset is None: raise GeoEDFError( 'Error opening Tiger States shapefile in CONUSStateFilter') inLayer = inDataset.GetLayer() # for each state feature check if it's lat-lon falls in CONUS limits try: # loop through features in the layer and retrieve state USPS code and lat-lon for feature in inLayer: state_code = feature.GetField("STUSPS") state_lat = float(feature.GetField("INTPTLAT")) state_lon = float(feature.GetField("INTPTLON")) if latmin < state_lat < latmax: if lonmin < state_lon < lonmax: self.values.append(state_code) except: raise GeoEDFError( "Error processing Tiger states shapefile in CONUSStateFilter")
def filter(self): # convert the start and end dates from strings to Pandas DateTime try: # check if time is present if self.has_time: start_date = pd.to_datetime(self.start, format='%m/%d/%Y %H:%M:%S') else: start_date = pd.to_datetime(self.start, format='%m/%d/%Y') if self.end is not None: if self.has_time: end_date = pd.to_datetime(self.end, format='%m/%d/%Y %H:%M:%S') else: end_date = pd.to_datetime(self.end, format='%m/%d/%Y') except ValueError as e: raise GeoEDFError( 'Invalid values provided for start or end date to DateTimeFilter : %s' % e) except: raise GeoEDFError( 'Invalid values provided for start or end date to DateTimeFilter' ) # use the period to generate all intervening dates try: # if exact_dates is used and the period is n days, process differently # essentially reset start date to align with period if (not self.exact_dates) and (self.period[-1:] == 'D'): start_year = start_date.strftime('%Y') start_day_of_year = int(start_date.strftime('%j')) period_num = int(self.period[:-1]) # check if start day aligns with period if (start_day_of_year - 1) % period_num > 0: new_start_day_of_year = math.ceil( (start_day_of_year - 1) / period_num) * period_num + 1 start_date = pd.to_datetime( '%d/%s' % (new_start_day_of_year, start_year), format='%j/%Y') if self.end is not None: all_dates = pd.date_range(start=start_date, end=end_date, freq=self.period) else: all_dates = [start_date] # convert back to string using the pattern for dt in all_dates: self.values.append(dt.strftime(self.pattern)) except ValueError as e: raise GeoEDFError('Error applying DateTimeFilter : %s' % e) except: raise GeoEDFError('Unknown error applying DateTimeFilter')
def getFileList(url, auth): if '*' in url: #has wildcard # first get the base URL to get a listing of files partitioned = url.rpartition('/') base_url = partitioned[0] poss_filename = partitioned[2] # naive check whether poss_filename is indeed a file if '.' in poss_filename and '*' in poss_filename: filename_pattern = poss_filename try: # get a listing of files from the base_url session = SessionWithHeaderRedirection(auth['user'], auth['password']) res = session.get(base_url) res.raise_for_status() # parse the returned HTML to get a possible file listing parser = HTMLHelper() parser.feed(res.text) files = parser.pathList result = [] for filename in files: # some filenames may be an absolute or relative path if '/' in filename: actual_filename = os.path.basename(filename) else: actual_filename = filename if fnmatch.fnmatch(actual_filename, filename_pattern): # if path leads with a /, we need to revise the url, else can just append if filename.startswith('/'): # get the URL prefix if base_url.startswith('https://'): skip = 8 # number of characters to skip in prefix elif base_url.startswith('http://'): skip = 7 else: skip = 0 next_slash = base_url.find('/', skip) if next_slash != -1: url_prefix = base_url[:next_slash] else: url_prefix = base_url result.append('%s%s' % (url_prefix, filename)) else: result.append('%s/%s' % (base_url, filename)) return result except requests.exceptions.HTTPError: raise GeoEDFError('Error accessing file listing at URL') except: raise else: raise GeoEDFError('URL does not point to a file or set of files') else: return [url]
def get(self): # semantic checking of parameters # process dates try: startdate = pd.to_datetime(self.start_date,format='%m/%d/%Y') enddate = pd.to_datetime(self.end_date,format='%m/%d/%Y') except: raise GeoEDFError("Error parsing dates provided to GHCNDInput, please ensure format is mm/dd/YYYY") # param checks complete try: # parse out station_id station_id = self.station_id.split(':')[1] # use new API # construct URL station_data_url = "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=SNOW,PRCP,SNWD,TMIN,TMAX&stations=%s&startDate=2010-08-30&endDate=2020-09-30&format=json" % station_id res = requests.get(station_data_url) res.raise_for_status() station_data = pd.read_json(res.text) # first reindex data by date station_data.set_index(pd.to_datetime(station_data['DATE']), inplace=True) except: print("Error fetching GHCND data for station %s in GHCNDInput" % self.station_id) return # for each of the five params, first check if we have sufficient data # then write out to CSV file for met_param in self.met_params: try: if met_param == 'PRCP' or met_param == 'TMAX' or met_param == 'TMIN': if met_param in station_data: num_nan = station_data[met_param].isna().sum() if num_nan < 365: # then we are fine # write out csv file param_csvfile = '%s/%s_%s.csv' % (self.target_path,self.station_id,met_param) param_data = station_data.filter([met_param]) param_data.to_csv(param_csvfile) # check for snow params if met_param == 'SNOW' or met_param == 'SNWD': if met_param in station_data: num_nan = station_data[met_param].isna().sum() if num_nan < 3500: # write out csv file param_csvfile = '%s/%s_%s.csv' % (self.target_path,self.station_id,met_param) param_data = station_data.filter([met_param]) param_data.to_csv(param_csvfile) except: raise GeoEDFError("Error occurred while writing out %s data to CSV for station %s in GHCNDInput" % (met_param,self.station_id))
def filter(self): # first load up the shapefile to determine its projection driver = ogr.GetDriverByName('ESRI Shapefile') inDataset = driver.Open(self.shapefile, 0) if inDataset is None: raise GeoEDFError('Error opening shapefile %s in ShpExtentFilter' % self.shapefile) inLayer = inDataset.GetLayer() try: inSpatialRef = inLayer.GetSpatialRef() except: raise GeoEDFError( 'Error determining projection of input shapefile, cannot fetch extents in lat-lon' ) # construct the desired output projection try: outSpatialRef = osr.SpatialReference() outSpatialRef.ImportFromEPSG(4326) except BaseException as e: raise GeoEDFError( 'Error occurred when constructing target projection: %s' % e) try: # create Coordinate Transformation coordTransform = osr.CoordinateTransformation( inSpatialRef, outSpatialRef) # get layer extent inExtent = inLayer.GetExtent() # extent is in the format: xmin,xmax,ymin,ymax # construct the point geometry for both bottom left and top right # then reproject bottomLeft = ogr.Geometry(ogr.wkbPoint) bottomLeft.AddPoint(inExtent[0], inExtent[2]) topRight = ogr.Geometry(ogr.wkbPoint) topRight.AddPoint(inExtent[1], inExtent[3]) bottomLeft.Transform(coordTransform) topRight.Transform(coordTransform) self.values.append('%f,%f,%f,%f' % (bottomLeft.GetY(), topRight.GetY(), bottomLeft.GetX(), topRight.GetX())) except: raise GeoEDFError( "Error occurred when trying to reproject extents")
def __init__(self, **kwargs): #list to hold all parameter names self.provided_params = self.__required_params + self.__optional_params # check that all required params have been provided for param in self.__required_params: if param not in kwargs: raise GeoEDFError( 'Required parameter %s for SimpleDataClean not provided' % param) # set all required parameters for key in self.__required_params: setattr(self, key, kwargs.get(key)) # set optional parameters for key in self.__optional_params: #if key not provided in optional arguments, defaults value to None setattr(self, key, kwargs.get(key, None)) # fetch static reg, crop, and livestock set CSVs that are packaged with processor # if no overrides have been provided # look in setup.py data_files for location where these have been placed if self.regsets_csv is None: self.regsets_csv = '/usr/local/data/reg_sets.csv' if self.cropsets_csv is None: self.cropsets_csv = '/usr/local/data/crop_sets.csv' if self.livestocksets_csv is None: self.livestocksets_csv = '/usr/local/data/livestock_sets.csv' # also fetch the static region maps csv; this file is always packaged with the processor self.regmaps_csv = '/usr/local/data/reg_map.csv' # finally, the R script that needs to be executed # this is stored at /usr/local/bin self.data_clean_script = '/usr/local/bin/01_data_clean.r' # validate start and end years try: if int(self.start_year) > int(self.end_year): raise GeoEDFError( 'start_year must be smaller than end_year in SimpleDataClean' ) except: raise GeoEDFError( 'Error occurred when validating start_year and end_year for SimpleDataClean; make sure they are integers' ) # super class init super().__init__()
def HDF_proj_WKT(hdf_filepath): # returns the projection of the HDF file in Well Known Text (WKT) format # first determine the HDF type hdf_type = HDF_type(hdf_filepath) if hdf_type == 'hdf4': # for HDF4 assume corner coordinates are stored in the StructMetadata.0 section hdf_file = SD(hdf_filepath, SDC.READ) try: # access grid metadata section of StructMetadata.0 fattr = hdf_file.attributes(full=1) structmeta = fattr['StructMetadata.0'] gridmeta = structmeta[0] # determine the projection GCTP code from the grid metadata proj_regex = re.compile(r'''Projection=(?P<projection>\w+)''', re.VERBOSE) match = proj_regex.search(gridmeta) proj = match.group('projection') # support MODIS sinusoidal projection for now, add others later if proj == 'GCTP_SNSOID': sinu_proj4 = "+proj=sinu +R=6371007.181 +nadgrids=@null +wktext" srs = osr.SpatialReference() srs.ImportFromProj4(sinu_proj4) return srs.ExportToWkt() except: #prjfile = open('/home/rkalyana/GeoEDF/GeoEDF/connector/filter/modis/6933.prj', 'r') #prj_txt = prjfile.read() #srs = osr.SpatialReference() #srs.ImportFromESRI([prj_txt]) #prjfile.close() #return srs.ExportToWkt() raise GeoEDFError( 'Error determining the projection or unsupported projection') else: # HDF5 file; only SMAP files in EASE Grid 2.0 are supported at the moment hdf_file = h5py.File(hdf_filepath, mode='r') # check to see if this is a EASE Grid 2.0 file if 'EASE2_global_projection' in hdf_file.keys(): ease_proj4 = "+proj=cea +lat_0=0 +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m" srs = osr.SpatialReference() srs.ImportFromProj4(ease_proj4) return srs.ExportToWkt() else: raise GeoEDFError( 'Error determining the projection or unsupported projection')
def __init__(self, **kwargs): # list to hold all the parameter names; will be accessed in super to # construct dependency graph self.provided_params = self.__required_params + self.__optional_params # check that all required params have been provided for param in self.__required_params: if param not in kwargs: raise GeoEDFError( 'Required parameter %s for OpenDAPFilter not provided' % param) # set all required parameters for key in self.__required_params: setattr(self, key, kwargs.get(key)) # set optional parameters for key in self.__optional_params: # if key not provided in optional arguments, defaults value to None setattr(self, key, kwargs.get(key, None)) # initialize filter values array self.values = [] # class super class init super().__init__()
def __init__(self, **kwargs): # list to hold all the parameter names; will be accessed in super to # construct dependency graph self.provided_params = self.__required_params + self.__optional_params # check that all required params have been provided for param in self.__required_params: if param not in kwargs: raise GeoEDFError('Required parameter %s for GHCNDInput not provided' % param) # set all required parameters for key in self.__required_params: setattr(self,key,kwargs.get(key)) # set optional parameters for key in self.__optional_params: # if key not provided in optional arguments, defaults value to None setattr(self,key,kwargs.get(key,None)) # set the hardcoded set of meterological params # can possibly generalize to fetch any list of params in the future self.met_params = ['SNOW','SNWD','TMAX','TMIN','PRCP'] # class super class init super().__init__()
def process(self): # the R script is invoked with the following command line arguments: # 1. start year # 2. end year # 3. input directory where FAO files are stored # 4. output directory # 5. region map csv path # 6. region sets csv path # 7. crop sets csv path # 8. livestock sets csv path try: command = "Rscript" args = [ str(self.start_year), str(self.end_year), self.fao_input_dir, self.target_path, self.regmaps_csv, self.regsets_csv, self.cropsets_csv, self.livestocksets_csv ] cmd = [command, self.data_clean_script] + args stdout = subprocess.check_output(cmd, universal_newlines=True) except CalledProcessError: raise GeoEDFError( 'Error occurred when running SimpleDataClean processor: ', stdout)
def __init__(self, **kwargs): # list to hold all the parameter names; will be accessed in super to # construct dependency graph self.provided_params = self.__required_params + self.__optional_params # check that all required params have been provided for param in self.__required_params: if param not in kwargs: raise GeoEDFError( 'Required parameter %s for WQPInput not provided' % param) # specific check for conditioal required params # set all required parameters for key in self.__required_params: setattr(self, key, kwargs.get(key)) # set optional parameters for key in self.__optional_params: # if key not provided in optional arguments, defaults value to None setattr(self, key, kwargs.get(key, None)) # set defaults if none provided if (self.start_date is None): self.start_date = '' if (self.end_date is None): self.end_date = '05-01-2020' # class super class init super().__init__()
def __init__(self, **kwargs): # list to hold all param names self.provided_params = self.__required_params + self.__optional_params # check that all required params have been provided for param in self.__required_params: if param not in kwargs: raise GeoEDFError( 'Required parameter %s for ReprojectShapefile not provided' % param) # set all required parameters for key in self.__required_params: setattr(self, key, kwargs.get(key)) proj_params = ['prjfile', 'prjepsg', 'prjwkt'] # make sure exactly one of the projection params has been provided if len(set(kwargs.keys()).intersection(set(proj_params))) != 1: raise GeoEDFError( 'Exactly one among the target projection file, EPSG code, or Well Known Text (WKT) is required' ) # set optional parameters for key in self.__optional_params: # special error handling of the newname parameter; needs to be a filename if key == 'newname': val = kwargs.get(key, None) if val is not None: if os.path.basename(val) != val: raise GeoEDFError( 'The value of the newname parameter needs to be a filename and not a path' ) else: # make sure it has a .shp extension if os.path.splitext(val)[1] != '.shp': raise GeoEDFError( 'newname must have a .shp extension') # set the value setattr(self, key, val) continue # if key not provided in optional arguments, defaults value to None setattr(self, key, kwargs.get(key, None)) super().__init__()
def HDF_type(hdf_filepath): # determine if HDF4 or HDF5 (for now based on file extension alone) try: (ignore, hdf_filename) = os.path.split(hdf_filepath) (ignore, extension) = os.path.splitext(hdf_filename) if extension == '.hdf': hdftype = 'hdf4' elif extension == '.h5': hdftype = 'hdf5' else: raise GeoEDFError( 'Could not determine HDF file type from file extension') except: raise GeoEDFError('Could not determine HDF file type') return hdftype
def get(self): # user provided scenarios to download user_scenarios = self.scenarios.split(',') # loop through scenarios available for this dam and download those that match the provided # scenario names r = requests.get( "https://fim.sec.usace.army.mil/ci/fim/getEAPLayers?id=" + self.dam_id) dam_scenarios = json.loads(r.content) for scenario in dam_scenarios: for user_scenario in user_scenarios: if user_scenario in scenario['displayName']: # then download link = "https://fim.sec.usace.army.mil/ci/download/start?LAYERID="\ + str(scenario["layerId"])\ + "&type=s3&RASTER_INFO_ID=" + str(scenario["rasterInfoID"])\ + "&TABLE=FLOOD_DEPTH&TABLE_ID=" + str(scenario["floodDepthID"]) #construct filename out of load and breach condition fileName = '%s/%s_%s_%s.tiff' % ( self.target_path, scenario['loadCondition'], scenario['breachCondition'], self.dam_id) # download file try: file = urllib.request.urlretrieve(link, fileName) except urllib.error.HTTPError as err: raise GeoEDFError("DamFIMInput for %s - HTTPError" % self.dam_id) except requests.exceptions.ConnectionError as err: raise GeoEDFError( "DamFIMInput for %s - ConnectionError" % self.dam_id) except requests.exceptions.Timeout: raise GeoEDFError("DamFIMInput for %s - Timeout" % self.dam_id) except requests.exceptions.TooManyRedirects: raise GeoEDFError( "DamFIMInput for %s - TooManyRedirects" % self.dam_id) except requests.exceptions.RequestException as e: raise GeoEDFError("DamFIMInput for %s - Error" % self.dam_id) return True
def process(self): # first read the CSV file to fetch the regions and their corresponding values regions = [] vals = [] # name of the data field data_key = None with open(self.csvfile,'r') as csvFileObj: reader = csv.DictReader(csvFileObj) for row in reader: # pre-process step only required once # determine the name of the data field if data_key is None: if len(list(row.keys())) != 2: raise GeoEDFError("Error in CSV2HAR when processing %s. Exactly two fields are required" % self.csvfile) else: # REG is one, what is the other? for key in row.keys(): if key != 'REG': data_key = key break regions.append(row['REG']) vals.append(row[data_key]) # now build the HAR file header # first create the HAR file object (ignore, csvFilename) = os.path.split(self.csvfile) basename = os.path.splitext(csvFilename)[0] harFilename = '%s/%s.har' % (self.target_path,basename) harFile = HarFileObj(harFilename) # create the two header array objects and set them to the file # first the region header # in this header, region names are always padded to 12 characters long reg_arr = np.array([reg.ljust(12) for reg in regions],dtype='<U12') reg_setNames = ['REG'] reg_setElements = [[reg.ljust(12) for reg in regions]] reg_coeff_name = ''.ljust(12) reg_long_name = 'Set REG inferred from CSV file'.ljust(70) reg_header = HeaderArrayObj.HeaderArrayFromData(reg_arr,reg_coeff_name,reg_long_name,reg_setNames,dict(zip(reg_setNames,reg_setElements))) # add header to HAR file harFile["SET1"] = reg_header # then the csv data header csv_arr = np.array(vals,dtype='float32') csv_setNames = ['REG'] csv_setElements = [regions] csv_coeff_name = 'CSVData'.ljust(12) csv_long_name = 'Array extracted from CSV'.ljust(70) csv_header = HeaderArrayObj.HeaderArrayFromData(csv_arr,csv_coeff_name,csv_long_name,csv_setNames,dict(zip(csv_setNames,csv_setElements))) harFile["CSV"] = csv_header # write out the HAR file harFile.writeToDisk()
def __init__(self, **kwargs): # list to hold all the parameter names; will be accessed in super to # construct dependency graph self.provided_params = self.__required_params + self.__optional_params # check that all required params have been provided for param in self.__required_params: if param not in kwargs: raise GeoEDFError( 'Required parameter %s for DateTimeFilter not provided' % param) # specific check for conditionally required params # if end is provided, also need a period if 'end' in kwargs: if 'period' not in kwargs: raise GeoEDFError( 'Period is required for DateTimeFilter when both start and end are provided.' ) # set all required parameters for key in self.__required_params: setattr(self, key, kwargs.get(key)) # set optional parameters for key in self.__optional_params: # if key not provided in optional arguments, defaults value to None setattr(self, key, kwargs.get(key, None)) # if has_time is not provided, set to False if key == 'has_time': if self.has_time is None: self.has_time = False if key == 'exact_dates': if self.exact_dates is None: self.exact_dates = False # initialize filter values array self.values = [] # class super class init super().__init__()
def filter(self): # convert the start and end dates from strings to Pandas DateTime try: # check if time is present if self.has_time: start_date = pd.to_datetime(self.start, format='%m/%d/%Y %H:%M:%S') else: start_date = pd.to_datetime(self.start, format='%m/%d/%Y') if self.end is not None: if self.has_time: end_date = pd.to_datetime(self.end, format='%m/%d/%Y %H:%M:%S') else: end_date = pd.to_datetime(self.end, format='%m/%d/%Y') except ValueError as e: raise GeoEDFError( 'Invalid values provided for start or end date to DateTimeFilter : %s' % e) except: raise GeoEDFError( 'Invalid values provided for start or end date to DateTimeFilter' ) # use the period to generate all intervening dates try: if self.end is not None: all_dates = pd.date_range(start=start_date, end=end_date, freq=self.period) else: all_dates = [start_date] # convert back to string using the pattern for dt in all_dates: self.values.append(dt.strftime(self.pattern)) except ValueError as e: raise GeoEDFError('Error applying DateTimeFilter : %s' % e) except: raise GeoEDFError('Unknown error applying DateTimeFilter')
def getFile(url, path=None): """ download file(s) at url and save to path if path is None, save to /tmp returns boolean result """ # validate that URL is not null if url is None: raise GeoEDFError('Null URL provided for getFile') # default path to /tmp if path is None: path = '/tmp' try: # if there is a wildcard in the URL, we need to process a list of files instead if '*' in url: fileURLList = getFileList(url) for fileURL in fileURLList: res = requests.get(fileURL, stream=True) res.raise_for_status() # get the name of the file to save outFilename = getFilename(res, fileURL) outPath = '%s/%s' % (path, outFilename.strip('"')) with open(outPath, 'wb') as outFile: for chunk in res.iter_content(chunk_size=1024 * 1024): outFile.write(chunk) return True else: # no wildcard res = requests.get(url, stream=True) res.raise_for_status() # get the name of the file to save outFilename = getFilename(res, url) outPath = '%s/%s' % (path, outFilename.strip('"')) with open(outPath, 'wb') as outFile: for chunk in res.iter_content(chunk_size=1024 * 1024): outFile.write(chunk) return True except GeoEDFError: # known error raise except requests.exceptions.HTTPError: raise
def constructSpatialRef(prj_file=None,prj_epsg_code=None,prj_wkt=None): try: outSpatialRef = osr.SpatialReference() # if projection file provided, read the WKT if prj_file is not None: prjfile = open(prj_file, 'r') prj_txt = prjfile.read() outSpatialRef.ImportFromESRI([prj_txt]) elif prj_epsg_code is not None: if prj_epsg_code.isdigit(): outSpatialRef.ImportFromEPSG(int(prj_epsg_code)) elif prj_wkt is not None: outSpatialRef.ImportFromESRI([prj_wkt]) else: raise GeoEDFError('Non-null target projection file, EPSG code, or WKT is required') return outSpatialRef except: raise
def __init__(self, **kwargs): #list to hold all parameter names self.provided_params = self.__required_params + self.__optional_params # check that all required params have been provided for param in self.__required_params: if param not in kwargs: raise GeoEDFError('Required parameter %s for CSV2HAR not provided' % param) # set all required parameters for key in self.__required_params: setattr(self,key,kwargs.get(key)) # set optional parameters for key in self.__optional_params: # if key not provided in optional arguments, defaults value to None setattr(self,key,kwargs.get(key,None)) super().__init__()
def filter(self): try: # construct the catalog URL and attempt to retrieve it using requests catalog_url = '%s/catalog.xml' % self.opendap_url res = requests.get(catalog_url, stream=True) # temporarily save catalog file to directory holding eventual filter output outFilename = '%s/catalog.xml' % os.path.dirname(self.target_path) with open(outFilename, 'wb') as catalogFile: for chunk in res.iter_content(chunk_size=1024): catalogFile.write(chunk) # parse catalog XML file tree = ET.parse(outFilename) root = tree.getroot() # assuming fixed format and namespaces # root > dataset > dataset array > access leaf # construct tag keys dataset_key = '%sdataset' % self.thredds_ns access_key = '%saccess' % self.thredds_ns for child in root.findall(dataset_key): for children in child.findall(dataset_key): for access_child in children.findall(access_key): if access_child.attrib['serviceName'] == 'dap': dataset_path = access_child.attrib['urlPath'] filename = os.path.split(dataset_path)[1] # construct direct access URL for NetCDF4 format self.values.append('%s/%s.nc4' % (self.opendap_url, filename)) except: raise GeoEDFError('Unknown error applying OpenDAPFilter')
def getFile(url, auth=None, path=None): """ download file(s) at url and save to path if path is None, save to /tmp auth is an optional dictionary with user and password returns boolean result """ # validate that URL is not null if url is None: raise GeoEDFError('Null URL provided for getFile') # default path to /tmp if path is None: path = '/tmp' # if no auth provided, use an non-authenticated request # if insufficient/incorrect auth provided, return error try: if auth is None: raise GeoEDFError( 'Authentication required for accessing NASA data') else: if validateAuth(auth): # auth validated for completeness session = SessionWithHeaderRedirection(auth['user'], auth['password']) # if there is a wildcard in the URL, we need to process a list of files instead if '*' in url: fileURLList = getFileList(url, auth) # recreate session object since file listing may not need auth session = SessionWithHeaderRedirection( auth['user'], auth['password']) for fileURL in fileURLList: res = session.get(fileURL, stream=True) res.raise_for_status() # get the name of the file to save outFilename = getFilename(res, fileURL) outPath = '%s/%s' % (path, outFilename) with open(outPath, 'wb') as outFile: for chunk in res.iter_content(chunk_size=1024 * 1024): outFile.write(chunk) return True else: # no wildcard res = session.get(url) res.raise_for_status() # get the name of the file to save outFilename = getFilename(res, url) outPath = '%s/%s' % (path, outFilename) with open(outPath, 'wb') as outFile: for chunk in res.iter_content(chunk_size=1024 * 1024): outFile.write(chunk) return True else: # auth could not be validated raise GeoEDFError('Invalid authentication provided!') except GeoEDFError: # known error raise except requests.exceptions.HTTPError: raise
def filter(self): # first transform comma separated gage IDs into a list of strings gage_ids = self.gages.rstrip().split(',') # since HF cannot handle a large number of station IDs, split into chunks of 100 num_split = math.ceil(len(gage_ids) / 100) gage_id_chunks = np.array_split(gage_ids, num_split) # semantic checks on params # Check (1) start and end date are dates and in right order try: start_date = pd.to_datetime(self.start, format='%m/%d/%Y') end_date = pd.to_datetime(self.end, format='%m/%d/%Y') except ValueError as e: raise GeoEDFError( 'Invalid values provided for start or end date to DischargeDateFilter : %s' % e) except: raise GeoEDFError( 'Invalid values provided for start or end date to DischargeDataFilter' ) if start_date > end_date: raise GeoEDFError( 'Start date cannot be later than end date in DischargeFilter') # make sure cutoff is an integer < 100 try: self.cutoff = int(self.cutoff) if self.cutoff < 1 or self.cutoff > 100: raise GeoEDFError( 'Cutoff parameter in DischargeDataFilter must be an integer between 1 and 100' ) except: raise GeoEDFError( 'Cutoff parameter in DischargeDataFilter must be an integer between 1 and 100' ) # next query Hydrofunctions for discharge data for the provided gages # 00060 is discharge parameter try: # process each chunk separately and merge the resulting dataframes # discharges holds the merged DF discharges = None for gage_chunk in gage_id_chunks: chunk_data = hf.NWIS( list(gage_chunk), 'dv', start_date=start_date.strftime('%Y-%m-%d'), end_date=end_date.strftime('%Y-%m-%d'), parameterCd='00060') if discharges is None: discharges = chunk_data.df() else: # simple merge discharges = discharges.merge(chunk_data.df(), how='outer', left_index=True, right_index=True) # get the statistics of retrieved data, we are looking for count # in order to filter by coverage % stn_data = discharges.describe() # maximum data available max_count = stn_data.loc['count'].max() # cutoff number of days count_cutoff = (max_count * self.cutoff) / 100 # filter by availability keep_stn = (stn_data.loc['count'] >= count_cutoff) valid_stns = keep_stn[keep_stn].index.to_list() # clean up station IDs since the returned IDs have USGS:####:param format filtered_ids = list( map(lambda stn_id: stn_id.split(':')[1], valid_stns)) # if any remain, set the return value to a comma separated list of these IDs if len(filtered_ids) > 0: self.values.append(','.join(filtered_ids)) except: raise GeoEDFError( "Error retrieving discharge data for gages in DischargeDataFilter" )
def filter(self): # semantic checks on params # Check (1) exactly four values need to be provided in extent extent_vals = list(map((lambda val: float(val)),self.extent.split(','))) if len(extent_vals) != 4: raise GeoEDFError('NOAAStationFilter requires a N,S,E,W string of floating point numbers as the extent') # Check (2) that lat and lon pairs are in the right order north = extent_vals[0] south = extent_vals[1] east = extent_vals[2] west = extent_vals[3] if south > north: raise GeoEDFError('please check the ordering of the south and north extents') if west > east: raise GeoEDFError('please check the ordering of the east and west extents') # passed semantic checks, prepare dict of extents for API extent_dict = {"north": north, "south": south, "east": east, "west": west} # process dates try: startdate = pd.to_datetime(self.start_date,format='%m/%d/%Y') enddate = pd.to_datetime(self.end_date,format='%m/%d/%Y') except: raise GeoEDFError("Error parsing dates provided to NOAAStationFiler, please ensure format is mm/dd/YYYY") # param checks complete try: # get a client for NCDC API usage cdo_client = Client(self.token, default_units="None", default_limit=1000) # we are looking for stations with GHCND data #The find_stations function returns the dataframe containing stations' info within the input extent. stations = cdo_client.find_stations( datasetid="GHCND", extent=extent_dict, startdate=startdate, enddate=enddate, return_dataframe=True) # filter to only retain stations which have sufficient data for the date range stations_to_drop = [] # Drop stations without enough observations for the given date range for i in range(len(stations.maxdate)): # get max and min date of each station station_maxdate = pd.to_datetime(stations.maxdate[i],format='%Y-%m-%d') station_mindate = pd.to_datetime(stations.mindate[i],format='%Y-%m-%d') # check if station's maxdate is earlier than enddate if station_maxdate < enddate: stations_to_drop.append(i) # check if station's mindate is later than startdate if station_mindate > startdate: stations_to_drop.append(i) # delete stations without enough time length valid_stations = stations.drop(stations.index[stations_to_drop]) # add station IDs to values array self.values += list(valid_stations.id) except: raise GeoEDFError('Error occurred when querying NCDC API for stations in NOAAStationFiler')
def process(self): # first reproject the shapefile to WGS84; all processing will happen in lat-lon # use the ReprojectShapefile processor # Set the name of this new shapefile based on the HDF filename (ignore, hdffilename) = os.path.split(self.hdffile) tmpfilename = '%s.shp' % hdffilename # reproject shapefile try: # first get the HDF file's native projection #hdf_proj_wkt = HDFEOSHelper.HDF_proj_WKT(self.hdffile) shapefileReprojector = ReprojectShapefile(shapefile=self.shapefile, prjepsg='4326', newname=tmpfilename) shapefileReprojector.target_path = self.target_path #shapefileReprojector = ReprojectShapefile(shapefile=self.shapefile,destdir=self.destdir,prjwkt=hdf_proj_wkt,newname=tmpfilename) shapefileReprojector.process() shapefile_wgs84 = '%s/%s' % (self.target_path, tmpfilename) except: raise GeoEDFError( 'Error reprojecting input shapefile, cannot proceed with masking HDF data' ) # now process the HDF file's subdatasets # get the data matrix for the selected subdatasets hdf_data = HDFEOSHelper.HDF_subdataset_data(self.hdffile, self.datasets) # get the lat-lon for the corner coordinates #(upperLeftX, upperLeftY, lowerRightX, lowerRightY) = HDFEOSHelper.HDF_corner_coords(self.hdffile) (upperLeftX, upperLeftY, lowerRightX, lowerRightY) = (-180, 90, 180, -90) # get the grid dimensions of the data hdf_sample_data = next(iter(hdf_data.values()))['data'] num_rows = hdf_sample_data.shape[0] num_cols = hdf_sample_data.shape[1] #print(num_rows,num_cols) # determine area of a single grid cell; assume equal size grids grid_cell_width = (lowerRightX - upperLeftX) / num_cols grid_cell_height = (upperLeftY - lowerRightY) / num_rows grid_cell_rect = ogr.Geometry(ogr.wkbLinearRing) grid_cell_rect.AddPoint(upperLeftX, upperLeftY) grid_cell_rect.AddPoint(upperLeftX + grid_cell_width, upperLeftY) grid_cell_rect.AddPoint(upperLeftX + grid_cell_width, upperLeftY - grid_cell_height) grid_cell_rect.AddPoint(upperLeftX, upperLeftY - grid_cell_height) grid_cell_rect.AddPoint(upperLeftX, upperLeftY) grid_cell_geom = ogr.Geometry(ogr.wkbPolygon) grid_cell_geom.AddGeometry(grid_cell_rect) grid_cell_area = grid_cell_geom.Area() shp_driver = ogr.GetDriverByName("ESRI Shapefile") mask_shp_data_source = shp_driver.Open(shapefile_wgs84, 1) mask_shp_layer = mask_shp_data_source.GetLayer() #print(mask_shp_layer.GetExtent()) # add new fields to store the aggregate value for each subdataset for key in hdf_data.keys(): # dbfs only allow for field names up to 10 characters long key_10char = key[0:10] mask_shp_layer.CreateField(ogr.FieldDefn(key_10char, ogr.OFTReal)) # loop through shapefile features, determining different subdataset aggregate value for each for ignore, mask_shp_feature in enumerate(mask_shp_layer): mask_shp_feature_geom = mask_shp_feature.GetGeometryRef() mask_shp_feature_area = mask_shp_feature_geom.Area() # initialize dictionary of aggregate data for each hdf subdataset for the current feature feature_hdf_data = dict() for key in hdf_data.keys(): feature_hdf_data[key] = 0.0 # factor to weigh aggregate data by based on intersection areas with each grid cell feature_weight = 0.0 # get the bounds of the feature mask_shp_feature_geom.FlattenTo2D() x_min, x_max, y_min, y_max = mask_shp_feature_geom.GetEnvelope() # optimization to only process intersecting rows and columns rather than all grids j_low = max(0, int((x_min - upperLeftX) / grid_cell_width) - 1) j_high = min(num_cols, int((x_max - upperLeftX) / grid_cell_width) + 1) i_low = max(0, int((upperLeftY - y_max) / grid_cell_height) - 1) i_high = min(num_rows, int((upperLeftY - y_min) / grid_cell_height) + 1) num_cells = 0 num_cells_0 = 0 num_cells_1 = 0 num_cells_partial = 0 # loop through grid cells, checking for intersection with feature and aggregating # weighted value for each subdataset for i in range(i_low, i_high): # further optimize by determining the subset of columns that are relevant for this row row_rect = ogr.Geometry(ogr.wkbLinearRing) row_rect.AddPoint(upperLeftX + j_low * grid_cell_width, upperLeftY - i * grid_cell_height) row_rect.AddPoint(upperLeftX + (j_high + 1) * grid_cell_width, upperLeftY - i * grid_cell_height) row_rect.AddPoint(upperLeftX + (j_high + 1) * grid_cell_width, upperLeftY - (i + 1) * grid_cell_height) row_rect.AddPoint(upperLeftX + j_low * grid_cell_width, upperLeftY - (i + 1) * grid_cell_height) row_rect.AddPoint(upperLeftX + j_low * grid_cell_width, upperLeftY - i * grid_cell_height) row_geom = ogr.Geometry(ogr.wkbPolygon) row_geom.AddGeometry(row_rect) row_intersection_geom = row_geom.Intersection( mask_shp_feature_geom) row_intersection_area = row_intersection_geom.Area() if row_intersection_geom != None and row_intersection_area > 0.0: row_x_min, row_x_max, row_y_min, row_y_max = row_intersection_geom.GetEnvelope( ) new_j_low = max( 0, int((row_x_min - upperLeftX) / grid_cell_width) - 1) new_j_high = min( num_cols, int((row_x_max - upperLeftX) / grid_cell_width) + 1) else: row_intersection_geom = mask_shp_feature_geom new_j_low = j_low new_j_high = j_high for j in range(new_j_low, new_j_high): num_cells = num_cells + 1 # get the cell value cell_val = hdf_data[key]['data'][i][j] # TODO: move this out of the loop # if this cell doesn't contain a valid value, skip if False and 'range' in hdf_data[key]: val_range = hdf_data[key]['range'] if not (val_range[0] < cell_val < val_range[1]): continue else: # skip cells that contain the "nodata" value if 'fillValue' in hdf_data[key]: fillValue = hdf_data[key]['fillValue'] if cell_val == fillValue or cell_val == 0 - fillValue: continue # construct a grid cell based on the lat-lon values for this grid row and column cell_rect = ogr.Geometry(ogr.wkbLinearRing) cell_rect.AddPoint(upperLeftX + j * grid_cell_width, upperLeftY - i * grid_cell_height) cell_rect.AddPoint(upperLeftX + (j + 1) * grid_cell_width, upperLeftY - i * grid_cell_height) cell_rect.AddPoint(upperLeftX + (j + 1) * grid_cell_width, upperLeftY - (i + 1) * grid_cell_height) cell_rect.AddPoint(upperLeftX + j * grid_cell_width, upperLeftY - (i + 1) * grid_cell_height) cell_rect.AddPoint(upperLeftX + j * grid_cell_width, upperLeftY - i * grid_cell_height) cell_geom = ogr.Geometry(ogr.wkbPolygon) cell_geom.AddGeometry(cell_rect) # check to see if the grid cell intersects the column intersection geometry # get the overlap area to weight the aggregation calculation if (cell_geom.Disjoint(row_intersection_geom) ): # the geometries are disjoint num_cells_0 = num_cells_0 + 1 cell_intersection_area = 0.0 elif (cell_geom.Within(row_intersection_geom) ): # grid cell is fully contained num_cells_1 = num_cells_1 + 1 cell_intersection_area = grid_cell_area else: num_cells_partial = num_cells_partial + 1 cell_intersection_geom = cell_geom.Intersection( row_intersection_geom ) # grid cell intersects feature if (cell_intersection_geom != None): cell_intersection_area = cell_intersection_geom.Area( ) else: cell_intersection_area = 0.0 # grid cell does not intersect the feature if (cell_intersection_area <= 0.0): continue # add the weighted contribution of this grid cell to the feature value for each subdataset for key in hdf_data.keys(): feature_hdf_data[key] += hdf_data[key]['data'][i][ j] * cell_intersection_area / grid_cell_area feature_weight += cell_intersection_area / grid_cell_area # done with loop over grid cells, compute actual weighted aggregate value for feature for key in hdf_data.keys(): key_10char = key[0:10] if feature_weight > 0.0: feature_hdf_data[ key] = feature_hdf_data[key] / feature_weight else: feature_hdf_data[key] = 0.0 # set the value for this subdataset field on the feature mask_shp_feature.SetField(key_10char, feature_hdf_data[key]) mask_shp_layer.SetFeature(mask_shp_feature) mask_shp_data_source.SyncToDisk() # close the result shapefile mask_shp_layer = None mask_shp_data_source.SyncToDisk() mask_shp_data_source = None
def filter(self): # semantic checks on params # Check (1) exactly four values need to be provided in extent extent_vals = list( map((lambda val: int(float(val))), self.extent.split(','))) if len(extent_vals) != 4: raise GeoEDFError( 'GeoRangeFilter requires a latmin,latmax,lonmin,lonmax string as the extent' ) # Check (2) that lat and lon pairs are in the right order self.latmin = extent_vals[0] self.latmax = extent_vals[1] self.lonmin = extent_vals[2] self.lonmax = extent_vals[3] if self.latmin > self.latmax: raise GeoEDFError( 'extent[0] and extent[1] need to be the latmin and latmax; please check the ordering' ) if self.lonmin > self.lonmax: raise GeoEDFError( 'extent[2] and extent[3] need to be the lonmin and lonmax; please check the ordering' ) try: # first produce all intermediate values for lat and lon pairs # increment max by 1 since int(float()) returns floor if self.latmax <= 0: # latmin is also < 0 since we've checked ordering # we take abs value, flip for right range, and produce S# lat_range = list(range(abs(self.latmax), abs(self.latmin) + 2)) lat_vals = list( map((lambda lat_val: 's%d' % lat_val), lat_range)) else: #latmax is > 0 if self.latmin < 0: # need to split into two ranges; upto 0 and then > 0 lat_range1 = list(range(0, abs(self.latmin) + 2)) lat_vals = list( map((lambda lat_val: 's%d' % lat_val), lat_range1)) lat_range2 = list(range(0, self.latmax + 2)) lat_vals += list( map((lambda lat_val: 'n%d' % lat_val), lat_range2)) else: #latmin is >= 0 lat_range = list(range(self.latmin, self.latmax + 2)) lat_vals = list( map((lambda lat_val: 'n%d' % lat_val), lat_range)) # process lon values if self.lonmax <= 0: # lonmin is also < 0 since we've checked ordering # we take abs value, flip for right range, and produce S# lon_range = list(range(abs(self.lonmax), abs(self.lonmin) + 2)) lon_vals = list( map((lambda lon_val: 'w%03d' % lon_val), lon_range)) else: #lonmax is > 0 if self.lonmin < 0: # need to split into two ranges; upto 0 and then > 0 lon_range1 = list(range(0, abs(self.lonmin) + 2)) lon_vals = list( map((lambda lon_val: 'w%03d' % lon_val), lon_range1)) lon_range2 = list(range(0, self.lonmax + 2)) lon_vals += list( map((lambda lon_val: 'e%03d' % lon_val), lon_range2)) else: #lonmin is >= 0 lon_range = list(range(self.lonmin, self.lonmax + 2)) lon_vals = list( map((lambda lon_val: 'e%03d' % lon_val), lon_range)) # concatenate the lat and lon vals to produce a single string for lat_val in lat_vals: for lon_val in lon_vals: self.values.append(lat_val + lon_val) except: raise GeoEDFError( 'Unknown error occurred when attempting to construct filter values' )
def process(self): # set the reprojected file output name and path # if a new name has not been provided, reuse the source filename # since the output directory is always new, there won't be a clash if self.newname is not None: outfilename = self.newname else: (ignore, outfilename) = os.path.split(self.shapefile) (outfileshortname, extension) = os.path.splitext(outfilename) outfilepath = '%s/%s' % (self.target_path, outfilename) driver = ogr.GetDriverByName('ESRI Shapefile') indataset = driver.Open(self.shapefile, 0) if indataset is None: raise GeoEDFError( 'Error opening shapefile %s in ReprojectShapefile processor' % self.shapefile) inlayer = indataset.GetLayer() try: inSpatialRef = inlayer.GetSpatialRef() except: raise GeoEDFError( 'Error determining projection of input shapefile, cannot reproject' ) # construct the desired output projection try: outSpatialRef = ProjectionHelper.constructSpatialRef( self.prjfile, self.prjepsg, self.prjwkt) except BaseException as e: raise GeoEDFError( 'Error occurred when constructing target projection: %s' % e) # create Coordinate Transformation coordTransform = osr.CoordinateTransformation(inSpatialRef, outSpatialRef) # Create the output shapefile outdataset = driver.CreateDataSource(outfilepath) if outdataset is None: raise GeoEDFError('Error creating reprojected shapefile %s', outfile) outlayer = outdataset.CreateLayer(outfileshortname, geom_type=inlayer.GetGeomType()) # add fields inLayerDefn = inlayer.GetLayerDefn() for i in range(0, inLayerDefn.GetFieldCount()): fieldDefn = inLayerDefn.GetFieldDefn(i) outlayer.CreateField(fieldDefn) featureDefn = outlayer.GetLayerDefn() infeature = inlayer.GetNextFeature() while infeature: #get the input geometry geometry = infeature.GetGeometryRef() #reproject the geometry, each one has to be projected seperately geometry.Transform(coordTransform) #create a new output feature outfeature = ogr.Feature(featureDefn) #set the geometry and attribute outfeature.SetGeometry(geometry) #set field values from input shapefile #for i in range(0, featureDefn.GetFieldCount()): # outfeature.SetField(featureDefn.GetFieldDefn(i).GetNameRef(), infeature.GetField(i)) #add the feature to the output shapefile outlayer.CreateFeature(outfeature) #destroy the features and get the next input features outfeature.Destroy infeature.Destroy infeature = inlayer.GetNextFeature() #close the shapefiles indataset.Destroy() outdataset.Destroy() #create the new prj projection file outSpatialRef.MorphToESRI() outPrjFileName = '%s/%s.prj' % (self.target_path, outfileshortname) outPrjFile = open(outPrjFileName, 'w') outPrjFile.write(outSpatialRef.ExportToWkt()) outPrjFile.close()
def HDF_subdataset_data(hdf_filepath, subdataset_substrs): # process the names of the subdatasets, finding any that contain a member of # subdataset_substrs as a substring # subdataset_substrs is a list # returned dictionary indexed by subdataset name # contains data grid and value range hdf_data = dict() # first determine the HDF type hdf_type = HDF_type(hdf_filepath) if hdf_type == 'hdf4': hdf_file = SD(hdf_filepath, SDC.READ) try: dset_names = hdf_file.datasets().keys() # loop through input subdataset substrings for subdset_substr in subdataset_substrs: # loop through datasets in HDF file for dset_name in dset_names: # if substring found if subdset_substr in dset_name: # if this subdataset has not been processed before if dset_name not in hdf_data: try: data2D = hdf_file.select(dset_name) data = data2D[:, :].astype(np.float64) hdf_data[dset_name] = dict() hdf_data[dset_name]['data'] = data #hdf_data[dset_name]['range'] = data2D.getrange() hdf_data[dset_name][ 'fillValue'] = data2D.getfillvalue() except: raise GeoEDFError( 'Error retrieving subdataset %s data from HDF file' % dset_name) except: raise GeoEDFError( 'Error retrieving subdatasets from HDF4 file %s' % hdf_filepath) else: hdf_file = h5py.File(hdf_filepath, mode='r') # assume this follows the structure of HDF-EOS files where all subdatasets are in a "Geophysical_Data" group if 'Geophysical_Data' in hdf_file.keys(): dset_names = hdf_file['Geophysical_Data'].keys() # loop through input subdataset substrings for subdset_substr in subdataset_substrs: # loop through subdatasets in HDF file for dset_name in dset_names: # if substring matches if subdset_substr in dset_name: # if subdataset not processed yet if dset_name not in hdf_data: try: # construct fully qualified subdataset name fq_dset_name = '/Geophysical_Data/%s' % dset_name data = hdf_file[fq_dset_name] hdf_data[dset_name] = dict() hdf_data[dset_name]['data'] = data[:] hdf_data[dset_name][ 'fillValue'] = data.fillvalue except: raise GeoEDFError( 'Error retrieving subdataset %s data from HDF file' % dset_name) else: raise GeoEDFError( 'Cannot handle HDF5 files that do not follow the HDF-EOS standards' ) return hdf_data
def HDF_corner_coords(hdf_filepath): # return a tuple of upper left and lower right coordinates in lat-lon # first determine the HDF type hdf_type = HDF_type(hdf_filepath) if hdf_type == 'hdf4': # for HDF4 assume corner coordinates are stored in the StructMetadata.0 section hdf_file = SD(hdf_filepath, SDC.READ) try: # access grid metadata section of StructMetadata.0 fattr = hdf_file.attributes(full=1) structmeta = fattr['StructMetadata.0'] gridmeta = structmeta[0] # parse the text to retrieve corner coordinates in meters ul_regex = re.compile( r'''UpperLeftPointMtrs=\( (?P<upper_left_x>[+-]?\d+\.\d+) , (?P<upper_left_y>[+-]?\d+\.\d+) \)''', re.VERBOSE) match = ul_regex.search(gridmeta) x0 = np.float(match.group('upper_left_x')) y0 = np.float(match.group('upper_left_y')) lr_regex = re.compile( r'''LowerRightMtrs=\( (?P<lower_right_x>[+-]?\d+\.\d+) , (?P<lower_right_y>[+-]?\d+\.\d+) \)''', re.VERBOSE) match = lr_regex.search(gridmeta) x1 = np.float(match.group('lower_right_x')) y1 = np.float(match.group('lower_right_y')) # construct the projection transformer to convert from meters to lat-lon # determine the projection GCTP code from the grid metadata proj_regex = re.compile(r'''Projection=(?P<projection>\w+)''', re.VERBOSE) match = proj_regex.search(gridmeta) proj = match.group('projection') # support MODIS sinusoidal projection for now, add others later if proj == 'GCTP_SNSOID': #sinu = pyproj.Proj("+proj=sinu +R=6371007.181 +nadgrids=@null +wktext") #wgs84 = pyproj.Proj("+init=EPSG:4326") #lon0, lat0 = pyproj.transform(sinu, wgs84, x0, y0) #lon1, lat1 = pyproj.transform(sinu, wgs84, x1, y1) #return (lon0, lat0, lon1, lat1) return (x0, y0, x1, y1) else: raise GeoEDFError( 'Only MODIS sinusoidal grids are supported currently') except Exception as e: #x0, y0, x1, y1 = -17357881.81713629,7324184.56362408,17357881.81713629,-7324184.56362408 #return (x0,y0,x1,y1) raise GeoEDFError( 'Error retrieving corner coordinates of HDF file') else: # HDF5 file; only SMAP files in EASE Grid 2.0 are supported at the moment hdf_file = h5py.File(hdf_filepath, mode='r') # check to see if this is a EASE Grid 2.0 file if 'EASE2_global_projection' in hdf_file.keys(): # hardcoded corner coordinates, since this is not stored in the file metadata x0, y0, x1, y1 = -17357881.81713629, 7324184.56362408, 17357881.81713629, -7324184.56362408 #ease = pyproj.Proj(("+proj=cea +lat_0=0 +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m")) #wgs84 = pyproj.Proj("+init=EPSG:4326") #lon0, lat0 = pyproj.transform(ease, wgs84, x0, y0) #lon1, lat1 = pyproj.transform(ease, wgs84, x1, y1) #return (lon0, lat0, lon1, lat1) return (x0, y0, x1, y1) else: raise GeoEDFError( 'Only EASE Grid 2.0 HDF5 files are supported currently')