def download_filelist(ftptexts, filetypes = False, outdir = False): """ Reads text file of download links, downloads them. This script reads a text file with urls such as those output from ECHO REVERB and outputs them to an output directory. It will retry failed links 20 times before giving up and outputing a warning to the user. Inputs: ftptexts array of txt files ordered from reverb containing ftp links filetype file extension of the desired files, leave blank or False to grab all types. outdir folder where files are to be placed after download Outputs: failed list of files which failed to download after the end of the script. """ # force inputs to take list format ftptexts = core.enf_list(ftptexts) if filetypes: filetypes = core.enf_list(filetypes) for ftptext in ftptexts: #verify that things exist. core.exists(ftptext) if not outdir: outdir,_ = os.path.split(ftptext) ftp = open(ftptext,'r') sites = ftp.readlines() print("Attempting to download {0} files!".format(len(sites))) print("Saving all files to {0}".format(outdir)) # perform the first attempt failed = download_urls(sites, outdir, filetypes) # for 19 more times, if there are still items in the failed list, try again for i in range(1,19): if len(failed)>0: print("retry number {0} to grab {1} failed downloads!".format(i,len(failed))) time.sleep(60) failed = download_urls(failed, filetypes, outdir) # once all tries are complete, print a list of files which repeatedly failed if len(failed)>0: print('Files at the following URLs have failed 20 download attempts') print('Manually verify that these files exist on the server:') for i in failed: print(i) else: print('Finished with no errors!') # close the open text files and finish up ftp.close() return failed
def download_filelist(ftp_texts, file_type=None, outdir=None): """ Reads text file of download links, downloads them. This script reads a text file with urls such as those output from ECHO REVERB and outputs them to an output directory. It will retry failed links 20 times before giving up and outputting a warning to the user. :param ftp_texts: array of txt files ordered from reverb containing ftp links :param file_type: file extension of the desired files, leave blank or False to grab all types. :param outdir: folder where files are to be placed after download :return list failed: list of files which failed to download after the end of the script. """ failed = [] # force inputs to take list format ftp_texts = core.enf_list(ftp_texts) if file_type is not None: file_type = core.enf_list(file_type) for ftptext in ftp_texts: #verify that things exist. core.exists(ftptext) if not outdir: outdir, _ = os.path.split(ftptext) ftp = open(ftptext, 'r') sites = ftp.readlines() print("Attempting to download {0} files!".format(len(sites))) print("Saving all files to {0}".format(outdir)) # perform the first attempt failed = download_urls(sites, outdir, file_type) # for 19 more times, if there are still items in the failed list, try again for i in range(1, 19): if len(failed) > 0: print("retry number {0} to grab {1} failed downloads!".format( i, len(failed))) time.sleep(60) failed = download_urls(failed, file_type, outdir) # once all tries are complete, print a list of files which repeatedly failed if len(failed) > 0: print( 'Files at the following URLs have failed 20 download attempts') print('Manually verify that these files exist on the server:') for i in failed: print(i) else: print('Finished with no errors!') # close the open text files and finish up ftp.close() return failed
def project_resample(filelist, reference_file, outdir=False, resampling_type=None, cell_size=None): """ Wrapper for multiple arcpy projecting functions. Projects to reference file Inputs a filelist and a reference file, then projects all rasters or feature classes in the filelist to match the projection of the reference file. Writes new files with a "_p" appended to the end of the input filenames. This also will perform resampling. Inputs: filelist list of files to be projected outdir optional desired output directory. If none is specified, output files will be named with '_p' as a suffix. reference_file Either a file with the desired projection, or a .prj file. resampling type exactly as the input for arcmaps project_Raster_management function cell_size exactly as the input for arcmaps project_Raster_management function Output: Spatial reference spatial referencing information for further checking. """ output_filelist = [] # sanitize inputs core.exists(reference_file) rasterlist = enf_rastlist(filelist) featurelist = core.enf_featlist(filelist) cleanlist = rasterlist + featurelist # ensure output directory exists if not os.path.exists(outdir): os.makedirs(outdir) # grab data about the spatial reference of the reference file. (prj or otherwise) if reference_file[-3:] == 'prj': Spatial_Reference = arcpy.SpatialReference(reference_file) else: Spatial_Reference = arcpy.Describe(reference_file).spatialReference # determine cell size if cell_size is None: cx = arcpy.GetRasterProperties_management(reference_file, "CELLSIZEX").getOutput(0) cy = arcpy.GetRasterProperties_management(reference_file, "CELLSIZEY").getOutput(0) cell_size = "{0} {1}".format(cx, cy) # determine wether coordinate system is projected or geographic and print info if Spatial_Reference.type == 'Projected': print('Found {0} projected coord system'.format( Spatial_Reference.PCSName)) else: print('Found {0} geographic coord system'.format( Spatial_Reference.GCSName)) for filename in cleanlist: # create the output filename outname = core.create_outname(outdir, filename, 'p') output_filelist.append(Spatial_Reference) # use ProjectRaster_management for rast files if is_rast(filename): arcpy.ProjectRaster_management(filename, outname, Spatial_Reference, resampling_type, cell_size) print('Wrote projected and resampled file to {0}'.format(outname)) # otherwise, use Project_management for featureclasses and featurelayers else: arcpy.Project_management(filename, outname, Spatial_Reference) print('Wrote projected file to {0}'.format(outname)) print("finished projecting!") return output_filelist
def spatially_match(snap_raster, rasterlist, outdir, NoData_Value = False, resamp_type = False): """ Prepares input rasters for further numerical processing This function simply ensures all rasters in "rasterlist" are identically projected and have the same cell size, then calls the raster.clip_and_snap function to ensure that the cells are perfectly coincident and that the total spatial extents of the images are identical, even when NoData values are considered. This is useful because it allows the two images to be passed on for numerical processing as nothing more than matrices of values, and the user can be sure that any index in any matrix is exactly coincident with the same index in any other matrix. This is especially important to use when comparing different datasets from different sources outside arcmap, for example MODIS and Landsat data with an ASTER DEM. inputs: snap_raster raster to which all other images will be snapped rasterlist list of rasters, a single raster, or a directory full of tiffs which will be clipped to the extent of "snap_raster" and aligned such that the cells are perfectly coincident. outdir the output directory to save newly created spatially matched tifs. resamp_type The resampling type to use if images are not identical cell sizes. "NEAREST","BILINEAR",and "CUBIC" are the most common. this function automatically invokes clip_and_snap project_resample """ # import modules and sanitize inputs tempdir = os.path.join(outdir, 'temp') if not os.path.isdir(outdir): os.makedirs(outdir) if not os.path.isdir(tempdir): os.makedirs(tempdir) rasterlist = enf_rastlist(rasterlist) core.exists(snap_raster) usetemp = False # set the snap raster environment in arcmap. arcpy.env.snapRaster = snap_raster print('Loading snap raster {0}'.format(snap_raster)) _,snap_meta = to_numpy(snap_raster) print('Bounds of rectangle to define boundaries: [{0}]'.format(snap_meta.rectangle)) # for every raster in the raster list, snap rasters and clip. for rastname in rasterlist: _,meta = to_numpy(rastname) head,tail = os.path.split(rastname) if snap_meta.projection.projectionName != meta.projection.projectionName: print('Projection discrepancy found. Reprojecting...') project_resample(rastname, snap_raster, tempdir, resamp_type) tempname = core.create_outname(tempdir,tail,"p") usetemp = True # define an output name and run the Clip_ans_Snap_Raster function on formatted tifs outname = core.create_outname(outdir, rastname, "sm") # if a temporary file was created in previous steps, use that one for clip and snap if usetemp: clip_and_snap(snap_raster, tempname, outname, NoData_Value) else: clip_and_snap(snap_raster, rastname, outname, NoData_Value) print('Finished matching raster {0}'.format(rastname)) return
def project_resample(filelist, reference_file, outdir = False, resampling_type = None, cell_size = None): """ Wrapper for multiple arcpy projecting functions. Projects to reference file Inputs a filelist and a reference file, then projects all rasters or feature classes in the filelist to match the projection of the reference file. Writes new files with a "_p" appended to the end of the input filenames. This also will perform resampling. Inputs: filelist list of files to be projected outdir optional desired output directory. If none is specified, output files will be named with '_p' as a suffix. reference_file Either a file with the desired projection, or a .prj file. resampling type exactly as the input for arcmaps project_Raster_management function cell_size exactly as the input for arcmaps project_Raster_management function Output: Spatial reference spatial referencing information for further checking. """ output_filelist = [] # sanitize inputs core.exists(reference_file) rasterlist = enf_rastlist(filelist) featurelist = core.enf_featlist(filelist) cleanlist = rasterlist + featurelist # ensure output directory exists if not os.path.exists(outdir): os.makedirs(outdir) # grab data about the spatial reference of the reference file. (prj or otherwise) if reference_file[-3:]=='prj': Spatial_Reference = arcpy.SpatialReference(reference_file) else: Spatial_Reference = arcpy.Describe(reference_file).spatialReference # determine cell size if cell_size is None: cx = arcpy.GetRasterProperties_management(reference_file, "CELLSIZEX").getOutput(0) cy = arcpy.GetRasterProperties_management(reference_file, "CELLSIZEY").getOutput(0) cell_size = "{0} {1}".format(cx,cy) # determine wether coordinate system is projected or geographic and print info if Spatial_Reference.type == 'Projected': print('Found {0} projected coord system'.format(Spatial_Reference.PCSName)) else: print('Found {0} geographic coord system'.format(Spatial_Reference.GCSName)) for filename in cleanlist: # create the output filename outname = core.create_outname(outdir, filename, 'p') output_filelist.append(Spatial_Reference) # use ProjectRaster_management for rast files if is_rast(filename): arcpy.ProjectRaster_management(filename, outname, Spatial_Reference, resampling_type, cell_size) print('Wrote projected and resampled file to {0}'.format(outname)) # otherwise, use Project_management for featureclasses and featurelayers else: arcpy.Project_management(filename,outname,Spatial_Reference) print('Wrote projected file to {0}'.format(outname)) print("finished projecting!") return output_filelist
def spatially_match(snap_raster, rasterlist, outdir, NoData_Value=False, resamp_type=False): """ Prepares input rasters for further numerical processing This function simply ensures all rasters in "rasterlist" are identically projected and have the same cell size, then calls the raster.clip_and_snap function to ensure that the cells are perfectly coincident and that the total spatial extents of the images are identical, even when NoData values are considered. This is useful because it allows the two images to be passed on for numerical processing as nothing more than matrices of values, and the user can be sure that any index in any matrix is exactly coincident with the same index in any other matrix. This is especially important to use when comparing different datasets from different sources outside arcmap, for example MODIS and Landsat data with an ASTER DEM. inputs: snap_raster raster to which all other images will be snapped rasterlist list of rasters, a single raster, or a directory full of tiffs which will be clipped to the extent of "snap_raster" and aligned such that the cells are perfectly coincident. outdir the output directory to save newly created spatially matched tifs. resamp_type The resampling type to use if images are not identical cell sizes. "NEAREST","BILINEAR",and "CUBIC" are the most common. this function automatically invokes clip_and_snap project_resample """ # import modules and sanitize inputs tempdir = os.path.join(outdir, 'temp') if not os.path.isdir(outdir): os.makedirs(outdir) if not os.path.isdir(tempdir): os.makedirs(tempdir) rasterlist = enf_rastlist(rasterlist) core.exists(snap_raster) usetemp = False # set the snap raster environment in arcmap. arcpy.env.snapRaster = snap_raster print('Loading snap raster {0}'.format(snap_raster)) _, snap_meta = to_numpy(snap_raster) print('Bounds of rectangle to define boundaries: [{0}]'.format( snap_meta.rectangle)) # for every raster in the raster list, snap rasters and clip. for rastname in rasterlist: _, meta = to_numpy(rastname) head, tail = os.path.split(rastname) if snap_meta.projection.projectionName != meta.projection.projectionName: print('Projection discrepancy found. Reprojecting...') project_resample(rastname, snap_raster, tempdir, resamp_type) tempname = core.create_outname(tempdir, tail, "p") usetemp = True # define an output name and run the Clip_ans_Snap_Raster function on formatted tifs outname = core.create_outname(outdir, rastname, "sm") # if a temporary file was created in previous steps, use that one for clip and snap if usetemp: clip_and_snap(snap_raster, tempname, outname, NoData_Value) else: clip_and_snap(snap_raster, rastname, outname, NoData_Value) print('Finished matching raster {0}'.format(rastname)) return
def grab_info(filepath, data_type = False, CustGroupings = None): """ Extracts in-filename metadata from common NASA data products This function simply extracts relevant sorting information from a MODIS or Landsat filepath of any type or product and returns object properties relevant to that data. it will be expanded to include additional data products in the future. Inputs: filepath Full or partial filepath to any modis product tile data_type Manually tell the software what the data is. CustGroupings User defined sorting by julian days of specified bin widths. input of 5 for example will group January 1,2,3,4,5 in the first bin and january 6,7,8,9,10 in the second bin, etc. Outputs: info on object containing the attributes (product, year, day, tile) retrieve these values by calling "info.product", "info.year" etc. Attributes by data type: All type,year,j_day,month,day,season,CustGroupings,suffix MODIS product,tile Landsat sensor,satellite,WRSpath,WRSrow,groundstationID,Version,band Attribute descriptions: type NASA data type, for exmaple 'MODIS' and 'Landsat' year four digit year the data was taken j_day julian day 1 to 365 or 366 for leap years month three character month abbreviation day day of the month season 'Winter','Spring','Summer', or 'Autumn' CustGroupings bin number of data according to custom group value. sorted by julian day suffix Any additional trailing information in the filename. used to find details about special product usually a level 3 data product from sensor such as MOD11A1 tile MODIS sinusoidal tile h##v## format sensor Landsat sensor satellite usually 5,7, or 8 for the landsat satellite WRSpath Landsat path designator WRSrow Landsat row designator groundstationID ground station which recieved the data download fromt he satellite Version Version of landsat data product band band of landsat data product, usually 1 through 10 or 11. """ # pull the filename and path apart path, name = os.path.split(filepath) # create an info object class instance class info_object(object):pass info = info_object() # figure out what kind of data these files are. if not data_type: data_type = identify(name) if data_type == 'MODIS': params =['product', 'year', 'j_day', 'tile', 'type', 'version', 'tag', 'suffix'] n = name.split('.') end = n[4] string =[n[0], name[9:13] ,name[13:16], n[2], 'MODIS', n[3], end[:13], end[13:]] elif data_type =='Landsat': params =['sensor', 'satellite', 'WRSpath', 'WRSrow', 'year', 'j_day', 'groundstationID', 'Version', 'band', 'type', 'suffix'] n = name.split('.')[0] string =[n[1], n[2], n[3:6], n[6:9], n[9:13], n[13:16], n[16:19], n[19:21], n[23:].split('_')[0], 'Landsat', '_'.join(n[23:].split('_')[1:])] elif data_type == 'WELD_CONUS' or data_type == 'WELD_AK': params = ['coverage', 'period', 'year', 'tile', 'start_day', 'end_day', 'type'] n = name.split('.') string =[n[0], n[1], n[2] ,n[3], n[4][4:6], n[4][8:11], 'WELD'] # take everything after the first underscore as a suffix if onecore.exists. if '_' in name: params.append('suffix') string.append('_'.join(name.split('_')[1:])) elif data_type == 'ASTER': params = ['product','N','W','type','period'] n = name.split('_') string = [n[0],n[1][1:3],n[1][5:9],n[-1].split('.')[0],'none'] elif data_type == 'TRMM': print '{Grab_Data_Info} no support for TRMM data yet! you could add it!' return(False) elif data_type == 'AMSR_E': print '{Grab_Data_Info} no support for AMSR_E data yet! you could add it!' return False elif data_type == 'AIRS': print '{Grab_Data_Info} no support for AIRS data yet! you could add it!' return False # if data doesnt look like anything! else: print('Data type for file [{0}] could not be identified as any supported type'.format(name)) print('improve this function by adding info for this datatype!') return False # Create atributes and assign parameter names and values for i in range(len(params)): setattr(info,params[i],string[i]) # ................................................................................ # perform additional data gathering only if data has no info.period atribute. Images with # this attribute represent data that is produced from many dates, not just one day. if not hasattr(info,'period'): # fill in date format values and custom grouping and season information based on julian day # many files are named according to julian day. we want the date info for these files. try: tempinfo = datetime.datetime(int(info.year),1,1)+datetime.timedelta(int(int(info.j_day)-1)) info.month = tempinfo.strftime('%b') info.day = tempinfo.day # some files are named according to date. we want the julian day info for these files except: fmt = '%Y.%m.%d' tempinfo = datetime.datetime.strptime('.'.join([info.year,info.month,info.day]),fmt) info.j_day = tempinfo.strftime('%j') # fill in the seasons by checking the value of julian day if int(info.j_day) <=78 or int(info.j_day) >=355: info.season='Winter' elif int(info.j_day) <=171: info.season='Spring' elif int(info.j_day)<=265: info.season='Summer' elif int(info.j_day)<=354: info.season='Autumn' # bin by julian day if integer group width was input if not CustGroupings == None: CustGroupings = core.enf_list(CustGroupings) for grouping in CustGroupings: if isinstance(grouping,int): groupname = 'custom' + str(grouping) setattr(info, groupname, 1+(int(info.j_day)-1)/(grouping)) else: print('invalid custom grouping entered!') print('CustGrouping must be one or more integers in a list') # make sure the filepath input actually leads to a real file, then give user the info if core.exists(filepath): #print('{0} file {1} has attributes:'.format(info.type, name)) #print(vars(info)) return info else: return False