def fetch_TRMM(start_dto, end_dto, outdir, product_string): """ Fetches TRMM data from an FTP server. ftp://trmmopen.gsfc.nasa.gov/trmmdata/ByDate/V07/ :param start_dto: datetime object for start date of desired range :param end_dto: datetime object for end date of desired range :param outdir: output directory where files should be saved (str) :param product_string: the string for the desired product, options include 1B11, 1B21, 1CTMI, 2A12, 2A21, 2A23, 2A25, 2B31, 3B42, 3G25, 3G31. The usual precip product of interest is the well known 3B42 data product. :param output_files: a list of new filepaths created by this function """ # set up empty structure dates = [] output_files = [] ftpsite = "ftp://pps.gsfc.nasa.gov" un = "*****@*****.**" date_delta = end_dto - start_dto for i in range(date_delta.days + 1): dates.append(start_dto + timedelta(days=i)) for date in dates: # navigate to path of desired year/month/day workdir = '/'.join([ 'trmmdata', 'ByDate', 'V07', str(date.year), str(date.month).zfill(2), str(date.day).zfill(2) ]) filenames, filepaths = list_ftp(site=ftpsite, dir=workdir, username=un, password=un) for filename in filenames: if product_string in filename: try: outname = os.path.join(outdir, os.path.basename(filename)) download_url(ftpsite + filename, outname, username=un, password=un) output_files.append(outname) # now extract it out of its GZ format with gzip.open(outname, 'rb') as gz: with open(outname.replace(".gz", ""), 'wb') as f: content = gz.read() f.write(content) os.remove(outname) print("downloaded and extracted {0}".format( os.path.basename(filename))) except: print("failed to download {0}".format( os.path.basename(filename))) print("Finished downloading TRMM files!") return output_files
def fetch_GPM_IMERG(start_dto, end_dto, outdir, product = "gis", time_res = "1day"): """ Fetches 30 minute resolution GPM IMERG data from an ftp server. Several restrictions exist for this relatively new dataset, please read in the input section carefully. http://pps.gsfc.nasa.gov/Documents/GPM_Data_Info_140616.pdf Input: start_dto datetime object for starting time of study boundary end_dto datetiem object for ending time of study boundary outdir output directory to save the data product either "early" , "late" or "final" for full HDF5 data stacks of the respective runs, which are all at 30minute resolutions. OR product can be set equal to "gis" (default) to find only tif averages of the precipitation estimates. This gis tif data is only provided for data less than one year old. time_res if "product" is set to "gis", specify what time average period you want. options are "30min", "3hr", "1day", "3day", "7day". Defaults to "1day" learn more at the link below [http://pmm.nasa.gov/data-access/downloads/gpm] """ # set up empty list of downloaded filepaths on local dir download_list = [] # username and password info, should eventually be some DEVELOP credential. # this information is not at all sensitive. login = "******" # special filtering for gis type tif data to minimize data representation overlap. if product == "gis": if time_res == "30min": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 30)] elif time_res == "3hr": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 180)] else: ok_minutes = "0000" # assemble address information pps_server = r"ftp://jsimpson.pps.eosdis.nasa.gov" # set product directory prod_server = "/".join(["NRTPUB/imerg", product]) # log in and list available month folders. foldnames, foldpaths = list_ftp(site = pps_server, dir = prod_server, username = login, password = login) # perform a simple quick filtering of folders that definitely don't have data we want. for foldname in foldnames: try: int(foldname) except: foldnames.remove(foldname) for foldname in foldnames: print("exploring directory '{0}'".format(foldname)) subdir = "/".join([prod_server, foldname]) filenames, filepaths = list_ftp(site = pps_server, dir = subdir, username = login, password = login) for filepath in filepaths: filename = os.path.basename(filepath) finfo = filename.split(".") prod = finfo[3] date_cords = finfo[4] minutes = finfo[5] time = finfo[7] date_str = date_cords.split("-")[0] date = datetime.strptime(date_str, "%Y%m%d") + timedelta(minutes = int(minutes)) # see if this file meets criteria for download good_date = start_dto <= date <= end_dto if product == "gis": good_minutes = minutes in ok_minutes good_time = time_res == time else: good_minutes = True good_time = True # download the files if good_date and good_time and good_minutes: outname = os.path.join(outdir, date.strftime("%Y-%m-%d"), filename) download_url(filepath, outname, username = login, password = login) print("saved '{0}' in '{1}'".format(filename, outdir)) download_list.append(outname) return download_list
def fetch_MODIS(product, version, tiles, outdir, years, j_days = False, force_overwrite = False): """ Fetch MODIS Land products from one of two servers. http://e4ftl01.cr.usgs.gov ftp://n5eil01u.ecs.nsidc.org Inputs: product MODIS product to download such as 'MOD10A1' or 'MYD11A1' version version number, usually '004' or '041' or '005' tiles list of tiles to grab such as ['h11v12','h11v11'] outdir output directory to save downloaded files years list of years to grab such as range(2001,2014) j_days list of days to grab such as range(31:60). Defaults to all days in year force_overwrite will re-download files even if they already exist """ def Find_MODIS_Product(product, version): """ Subfunction to determine server properties for MODIS data product. returns http/ftp handles the two current servers where aqua/terra MODIS data can be downloaded are site1='http://e4ftl01.cr.usgs.gov' site2='n5eil01u.ecs.nsidc.org' Inputs: product modis product such as 'MOD10A1' versions modis version, usually '005', '004', or '041' Outputs: site server address where data can be found ftp ftp handle for open ftp session Dir subdirectory of server to further search for files of input product. """ sat_designation = product[0:3] prod_ID = product[3:] site1 = 'http://e4ftl01.cr.usgs.gov/' site2 = 'n5eil01u.ecs.nsidc.org' isftp = False Dir = False # refine the address of the desired data product if '10' in prod_ID: isftp = True site = site2 if sat_designation == 'MOD': if isftp: Dir = 'MOST/' + product + '.' + version else: site = site1+'MOLT/' + product + '.' + version elif sat_designation == 'MYD': if isftp: Dir = 'DP1/MOSA/' + product + '.' + version else: site = site1+'MOLA/' + product+'.' + version elif sat_designation == 'MCD': site = site1+'MOTA/' + product + '.' + version else: print('No such MODIS product is available for download with this script!') site = "None" return site, isftp, Dir # check formats tiles = core.enf_list(tiles) years = core.enf_list(years) years = [str(year) for year in years] if isinstance(j_days, list): js = [str(j_day).zfill(3) for j_day in j_days] elif isinstance(j_days, int) and j_days != False: js = [str(j_days)] else: js = [str(x).zfill(3) for x in range(367)] # do a quick input tile check for 6 characters. for tile in tiles: if not len(tile) == 6: print("Warning! your tiles appear to be invalid!") print("Warning! make sure they are in format 'h##v##") # create output directories if not os.path.exists(outdir): os.makedirs(outdir) print("Connecting to servers!") # obtain the web address, protocol information, and subdirectory where # this tpe of MODIS data can be found. site, isftp, Dir = Find_MODIS_Product(product, version) if Dir: print("Connected to {0}/{1}".format(site, Dir)) else: print("Connected to {0}".format(site)) # Depending on the type of connection (ftp vs http) populate the file list try: if isftp: dates,_ = list_ftp(site, False, False, Dir) else: dates = list_http(site) except: raise ValueError("Could not connect to {0}/{1}".format(site,Dir)) # refine contents down to just addresses of valid year and j_day good_dates=[] for date in dates: try: dto = datetime.strptime(date, "%Y.%m.%d") j_day = dto.strftime("%j") year = dto.strftime("%Y") if year in years: good_dates.append(date) if j_days: if j_day not in js: good_dates.remove(date) except ValueError: print("skipping non date folder name {0}".format(date)) print('Found {0} days within range'.format(len(good_dates))) # for all folders within the desired date range, map the subfolder contents. for good_date in good_dates: if isftp: files,_ = list_ftp(site, False, False, Dir + '/' + good_date) else: files = list_http(site + '/' + good_date) for afile in files: # only list files with desired tile names and not preview jpgs if not '.jpg' in afile: for tile in tiles: if tile in afile: # assemble the address if isftp: address='/'.join(['ftp://'+site, Dir, good_date, afile]) else: address='/'.join([site, good_date, afile]) #download the file outname = os.path.join(outdir, afile) if not os.path.isfile(outname) and not force_overwrite: download_url(address, outname) print('Downloaded {0}'.format(address)) print('Finished retrieving MODIS - {0} data!'.format(product)) return
def fetch_GPM_IMERG(start_dto, end_dto, outdir, product="gis", time_res="1day"): """ Fetches 30 minute resolution GPM IMERG data from an ftp server. Several restrictions exist for this relatively new dataset, please read in the input section carefully. http://pps.gsfc.nasa.gov/Documents/GPM_Data_Info_140616.pdf :param start_dto: datetime object for starting time of study boundary :param end_dto: datetime object for ending time of study boundary :param outdir: output directory to save the data :param product: either "early" , "late" or "final" for full HDF5 data stacks of the respective runs, which are all at 30minute resolutions. OR product can be set equal to "gis" (default) to find only tif averages of the precipitation estimates. This gis tif data is ONLY provided for data less than one year old. :param time_res: if "product" is set to "gis", specify what time average period you want. options are "30min", "3hr", "1day", "3day", "7day". Defaults to "1day" :return: Returns a list of filepaths to freshly downloaded files learn more at [http://pmm.nasa.gov/data-access/downloads/gpm] """ # set up empty list of downloaded filepaths on local dir download_list = [] # username and password info, should eventually be some DEVELOP credential. # this information is not at all sensitive. login = "******" # special filtering for gis type tif data to minimize data representation overlap. if product == "gis": if time_res == "30min": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 30)] elif time_res == "3hr": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 180)] else: ok_minutes = "0000" # assemble address information pps_server = r"ftp://jsimpson.pps.eosdis.nasa.gov" # set product directory prod_server = "/".join(["NRTPUB/imerg", product]) # log in and list available month folders. foldnames, foldpaths = list_ftp(site=pps_server, dir=prod_server, username=login, password=login) # perform a simple quick filtering of folders that definitely don't have data we want. for foldname in foldnames: try: int(foldname) except: foldnames.remove(foldname) for foldname in foldnames: print("exploring directory '{0}'".format(foldname)) subdir = "/".join([prod_server, foldname]) filenames, filepaths = list_ftp(site=pps_server, dir=subdir, username=login, password=login) for filepath in filepaths: filename = os.path.basename(filepath) finfo = filename.split(".") prod = finfo[3] date_cords = finfo[4] minutes = finfo[5] time = finfo[7] date_str = date_cords.split("-")[0] date = datetime.strptime( date_str, "%Y%m%d") + timedelta(minutes=int(minutes)) # see if this file meets criteria for download good_date = start_dto <= date <= end_dto if product == "gis": good_minutes = minutes in ok_minutes good_time = time_res == time else: good_minutes = True good_time = True # download the files if good_date and good_time and good_minutes: outname = os.path.join(outdir, date.strftime("%Y-%m-%d"), filename) download_url(filepath, outname, username=login, password=login) print("saved '{0}' in '{1}'".format(filename, outdir)) download_list.append(outname) return download_list
def fetch_TRMM(start_dto, end_dto, outdir, product_string): """ Fetches TRMM data from an FTP server. ftp://trmmopen.gsfc.nasa.gov/trmmdata/ByDate/V07/ Input: start_dto datetime object for start date of desired range end_dto datetime object for end date of desired range outdir output directory where files should be saved (str) product_string the string for the desired product, options include 1B11, 1B21, 1CTMI, 2A12, 2A21, 2A23, 2A25, 2B31, 3B42, 3G25, 3G31. The usual precip product of interest is the famous 3B42 data product. outputs: output_files a list of new filepaths created by this function """ # set up empty structure dates = [] output_files = [] ftpsite = "ftp://pps.gsfc.nasa.gov" un = "*****@*****.**" date_delta = end_dto - start_dto for i in range(date_delta.days +1): dates.append(start_dto + timedelta(days = i)) for date in dates: # navigate to path of desired year/month/day workdir = '/'.join(['trmmdata','ByDate','V07', str(date.year), str(date.month).zfill(2), str(date.day).zfill(2)]) filenames, filepaths = list_ftp(site = ftpsite, dir = workdir, username = un, password = un) for filename in filenames: if product_string in filename: try: outname = os.path.join(outdir, os.path.basename(filename)) download_url(ftpsite + filename, outname, username = un, password = un) output_files.append(outname) # now extract it out of its GZ format with gzip.open(outname, 'rb') as gz: with open(outname.replace(".gz",""), 'wb') as f: content = gz.read() f.write(content) os.remove(outname) print("downloaded and extracted {0}".format(os.path.basename(filename))) except: print("failed to download {0}".format(os.path.basename(filename))) print("Finished downloading TRMM files!") return output_files
def fetch_MODIS(product, version, tiles, outdir, start_dto, end_dto, force_overwrite=False): """ Fetch MODIS Land products from one of two servers. If this function runs and downloads 0 files, check that your inputs are consistent with the naming convention at the appropriate server address. http://e4ftl01.cr.usgs.gov ftp://n5eil01u.ecs.nsidc.org :param product: MODIS product to download such as 'MOD10A1' or 'MYD11A1' :param version: version number, usually '004' or '041' or '005' :param tiles: list of tiles to grab such as ['h11v12','h11v11'] NOTE: for some MODIS products, the h and v are omitted. :param outdir : output directory to save downloaded files :param start_dto: datetime object, the starting date of the range of data to download :param end_dto: datetime object, the ending date of the range of data to download :param force_overwrite: will re-download files even if they already exist :return out_filepaths: a list of filepaths to all files created by this function """ out_filepaths = [] # check formats tiles = core.enf_list(tiles) # create output directories if not os.path.exists(outdir): os.makedirs(outdir) print("Connecting to servers!") # obtain the web address, protocol information, and subdirectory where # this tpe of MODIS data can be found. site, isftp, Dir = _find_modis_product(product, version) if Dir: print("Connected to {0}/{1}".format(site, Dir)) else: print("Connected to {0}".format(site)) # Depending on the type of connection (ftp vs http) populate the file list try: if isftp: dates, _ = list_ftp(site, False, False, Dir) else: dates = list_http_e4ftl01(site) except: raise ValueError("Could not connect to {0}/{1}".format(site, Dir)) # refine contents down to just addresses of valid year and j_day good_dates = [] for date in dates: try: date_dto = datetime.strptime(date, "%Y.%m.%d") if start_dto <= date_dto <= end_dto: good_dates.append(date) except: print("skipping non date folder name {0}".format(date)) print('Found {0} days within range'.format(len(good_dates))) # for all folders within the desired date range, map the subfolder contents. for good_date in good_dates: if isftp: files, _ = list_ftp(site, False, False, Dir + '/' + good_date) else: files = list_http_e4ftl01(site + '/' + good_date) for afile in files: # only list files with desired tile names and not preview jpgs if not '.jpg' in afile: for tile in tiles: if tile in afile: # assemble the address if isftp: address = '/'.join( ['ftp://' + site, Dir, good_date, afile]) else: address = '/'.join([site, good_date, afile]) #download the file outname = os.path.join(outdir, afile) out_filepaths.append(outname) if not os.path.isfile(outname) and not force_overwrite: download_url(address, outname) print('Downloaded {0}'.format(address)) print("Finished retrieving MODIS - {0} data!".format(product)) print("Downloaded {0} files".format(len(out_filepaths))) return out_filepaths
def fetch_MODIS(product, version, tiles, outdir, start_dto, end_dto, force_overwrite = False): """ Fetch MODIS Land products from one of two servers. If this function runs and downloads 0 files, check that your inputs are consistent with the naming convention at the appropriate server address. http://e4ftl01.cr.usgs.gov ftp://n5eil01u.ecs.nsidc.org :param product: MODIS product to download such as 'MOD10A1' or 'MYD11A1' :param version: version number, usually '004' or '041' or '005' :param tiles: list of tiles to grab such as ['h11v12','h11v11'] NOTE: for some MODIS products, the h and v are omitted. :param outdir : output directory to save downloaded files :param start_dto: datetime object, the starting date of the range of data to download :param end_dto: datetime object, the ending date of the range of data to download :param force_overwrite: will re-download files even if they already exist :return out_filepaths: a list of filepaths to all files created by this function """ out_filepaths = [] # check formats tiles = core.enf_list(tiles) # create output directories if not os.path.exists(outdir): os.makedirs(outdir) print("Connecting to servers!") # obtain the web address, protocol information, and subdirectory where # this tpe of MODIS data can be found. site, isftp, Dir = _find_modis_product(product, version) if Dir: print("Connected to {0}/{1}".format(site, Dir)) else: print("Connected to {0}".format(site)) # Depending on the type of connection (ftp vs http) populate the file list try: if isftp: dates,_ = list_ftp(site, False, False, Dir) else: dates = list_http_e4ftl01(site) except: raise ValueError("Could not connect to {0}/{1}".format(site,Dir)) # refine contents down to just addresses of valid year and j_day good_dates = [] for date in dates: try: date_dto = datetime.strptime(date, "%Y.%m.%d") if start_dto <= date_dto <= end_dto: good_dates.append(date) except: print("skipping non date folder name {0}".format(date)) print('Found {0} days within range'.format(len(good_dates))) # for all folders within the desired date range, map the subfolder contents. for good_date in good_dates: if isftp: files,_ = list_ftp(site, False, False, Dir + '/' + good_date) else: files = list_http_e4ftl01(site + '/' + good_date) for afile in files: # only list files with desired tile names and not preview jpgs if not '.jpg' in afile: for tile in tiles: if tile in afile: # assemble the address if isftp: address='/'.join(['ftp://'+site, Dir, good_date, afile]) else: address='/'.join([site, good_date, afile]) #download the file outname = os.path.join(outdir, afile) out_filepaths.append(outname) if not os.path.isfile(outname) and not force_overwrite: download_url(address, outname) print('Downloaded {0}'.format(address)) print("Finished retrieving MODIS - {0} data!".format(product)) print("Downloaded {0} files".format(len(out_filepaths))) return out_filepaths