def ingest_IMERG(startYYYYMMDD, endYYYYMMDD): # Set the Datatype number current_DataTypeNumber = 34 # Hardcoded until there are more IMERG types in here.. # Instance of Imerg Data Classes IMERG_DataClass = IDC.IMERG_Data() # Convert to dates dateFormat = "%Y%m%d" start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat) end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat) # Build expected string list dataset_Obj_List = [] #expected_Tif_FileNames = [] # # iterate through all dates delta = end_Date - start_Date for i in range(delta.days + 1): #print start_Date + datetime.timedelta(days=i) currentDate = start_Date + datetime.timedelta(days=i) tifFileName = IMERG_DataClass.get_Expected_Tif_FileName(currentDate.year, currentDate.month, currentDate.day) #expected_Tif_FileNames.append(tifFileName) obj_To_Append = { "Tif_File_Name":tifFileName, "year":currentDate.year, "month":currentDate.month, "day":currentDate.day } dataset_Obj_List.append(obj_To_Append) # Get the expected file names. # Folder where TIF and TFW files end up. input_Dataset_Folder = params.dataTypes[current_DataTypeNumber]['inputDataLocation'] # Other vars needed for the loop itemsCounter = 0 ingest_Error_List = [] capabilities_DateFormatString = "%Y_%m_%d" last_YYYY_MM_DD_Processed = None # Ingest specific stuff yearForHDF = int(startYYYYMMDD[0:4]) # Year for HDF File dataStore = dataS.datastorage(current_DataTypeNumber, yearForHDF, forWriting=True) indexer = params.dataTypes[current_DataTypeNumber]['indexer'] # Do the actual ingest. #for fileName in expected_Tif_FileNames: for currentObj in dataset_Obj_List: try: # Try to ingest the file, record error if there is an error # open the file fileName = currentObj['Tif_File_Name'] fileToProcess = os.path.join(input_Dataset_Folder,fileName) print(fileToProcess) if os.path.isfile(fileToProcess): print("") else: fileToProcess=fileToProcess.replace("03E","04A") if os.path.isfile(fileToProcess): print("") else: fileToProcess=fileToProcess.replace("04A","04B") print("-Processing File: " + str(fileToProcess)) # For some reason, we need to open TFW files instead of TIFs with GDAL.. fileToProcess_TFW = IMERG_DataClass.convert_TIF_FileName_To_TFW_Filename(fileToProcess) theYear = yearForHDF #currentObj['year'] theMonth = currentObj['month'] theDay = currentObj['day'] print("before geotiff") # Open / Read the file #ds = georead.openGeoTiff(fileToProcess_TFW) ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess) print("after geotiff") # Set a new projection (since the IMERG data does not come with one already..) ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String()) ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj()) # Get the values to save (just like in all the other ingest procedures. prj = ds.GetProjection() grid = ds.GetGeoTransform() # Index it. img = georead.readBandFromFile(ds, 1) print img ds = None index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear) #print "Index:",index dataStore.putData(index, img) last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay) itemsCounter += 1 except: # do something in the event of an error e = sys.exc_info()[0] errorStr = "-ERROR Ingesting File: " + str(fileName) + " System Error Message: " + str(e) print(str(errorStr)) ingest_Error_List.append(errorStr) # Close and save the data dataStore.close() if(itemsCounter > 0): dataS.writeSpatialInformation(params.dataTypes[current_DataTypeNumber]['directory'],prj,grid,yearForHDF) #print("Debug: processedFileNames: " + str(processedFileNames)) #print("Debug: skippedFileNames: " + str(skippedFileNames)) print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(yearForHDF)) # need the projection and grid strings for the capabilities output. #retObject = {"projection":prj,"grid":grid} #return retObject # Update the capabilities try: print("-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)") capabilities_Info = { "name":params.dataTypes[current_DataTypeNumber]['name'], "description":params.dataTypes[current_DataTypeNumber]['description'], "size":params.dataTypes[current_DataTypeNumber]['size'], "fillValue":params.dataTypes[current_DataTypeNumber]['fillValue'], "data_category":params.dataTypes[current_DataTypeNumber]['data_category'], "projection":prj, "grid":grid, # Get the start and end Date range. "startDateTime":"2015_03_08", "endDateTime":last_YYYY_MM_DD_Processed, "date_FormatString_For_ForecastRange":capabilities_DateFormatString # Other items to save? } # Write the capabilities info to the bddb theJSONString = json.dumps(capabilities_Info) # Create a connection to the DB, set the new values, close the connection conn = bdp.BDDbConnector_Capabilities() conn.set_DataType_Capabilities_JSON(current_DataTypeNumber, theJSONString) conn.close() print("-API Datatype Capabilities for datatype number: " +str(current_DataTypeNumber) + " written to local DB as: " + str(theJSONString)) except: print("-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB") else: print("No Items found for year: " + str(yearForHDF)) print(str(len(ingest_Error_List)) + " errors associated with ingest items.") print("") print("Output of per-item Error Log: " + str(ingest_Error_List)) print("")
def download_and_Extract_IMERG_2015_Dataset(): # Parse Start and End date ranges from strings #startYear = startYYYYMMDD[0:4] #startMonth = startYYYYMMDD[4:6] #startDay = startYYYYMMDD[6:8] #endYear = endYYYYMMDD[0:4] #endMonth = endYYYYMMDD[4:6] #endDay = endYYYYMMDD[6:8] # Set the Datatype number current_DataTypeNumber = 26 # Hardcoded until there are more IMERG types in here.. # Instance of Imerg Data Classes IMERG_DataClass = IDC.IMERG_Data() # Hard coding because this is a one off dataset startYYYYMMDD = "20150307" endYYYYMMDD = "20151231" # Convert to dates dateFormat = "%Y%m%d" start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat) end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat) # Build expected string list #expected_FTP_FilePaths_TIF = [] #expected_FTP_FilePaths_TFW = [] expected_FTP_FilePaths = [] # "ftpPathTo_tif" and "ftpPathTo_tfw # iterate through all dates delta = end_Date - start_Date for i in range(delta.days + 1): #print start_Date + datetime.timedelta(days=i) currentDate = start_Date + datetime.timedelta(days=i) #tifPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tif(currentDate.year, currentDate.month, currentDate.day) #tfwPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tfw(currentDate.year, currentDate.month, currentDate.day) tifPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tif_2015Dataset( currentDate.year, currentDate.month, currentDate.day) tfwPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tfw_2015Dataset( currentDate.year, currentDate.month, currentDate.day) objToAdd = {"ftpPathTo_tif": tifPath, "ftpPathTo_tfw": tfwPath} expected_FTP_FilePaths.append(objToAdd) #expected_FTP_FilePaths_TIF.append(tifPath) #expected_FTP_FilePaths_TFW.append(tfwPath) # Folder Stuff # Create the destination folder if it does not exist dataDestinationFolder = params.dataTypes[current_DataTypeNumber][ 'inputDataLocation'] print("-Data Destination Folder (Downloading To) : " + str(dataDestinationFolder)) testFolderPath = os.path.dirname(dataDestinationFolder) if not os.path.exists(testFolderPath): os.makedirs(testFolderPath) print("-Created a new folder at path: " + str(testFolderPath)) # Connect to the FTP Server and download all of the files in the list. ftp_Connection = None try: ftp_Connection = ftplib.FTP(IMERG_DataClass.FTP_Host, IMERG_DataClass.FTP_UserName, IMERG_DataClass.FTP_UserPass) time.sleep(1) except: e = sys.exc_info()[0] print( "-ERROR Connecting to FTP.. bailing out..., System Error Message: " + str(e)) return print( "-Downloading, extracting and removing temp files... this may take a few minutes...." ) downloadCounter = 0 # Iterate through all of our expected file paths for ftpFullFilePaths in expected_FTP_FilePaths: isError = False errorLog = [] # print progress if (downloadCounter % 10 == 0): print("-Downloaded (extracted and removed temp zipfiles): " + str(downloadCounter) + " rasters so far..") # Get the file names filenameOnly_Tif = ftpFullFilePaths['ftpPathTo_tif'].split('/')[-1] filenameOnly_Tfw = ftpFullFilePaths['ftpPathTo_tfw'].split('/')[-1] # Make local filenames local_FullFilePath_ToSave_Tif = os.path.join(dataDestinationFolder, filenameOnly_Tif) local_FullFilePath_ToSave_Tfw = os.path.join(dataDestinationFolder, filenameOnly_Tfw) # Unzipped filenames local_FullFilePath_ToExtract_Tif = local_FullFilePath_ToSave_Tif[: -3] # Removes the '.gz' part of the filename local_FullFilePath_ToExtract_Tfw = local_FullFilePath_ToSave_Tfw[: -3] # Removes the '.gz' part of the filename # Get directoryPath and Filename for FTP Server ftp_PathTo_TIF = IMERG_DataClass._get_FTP_FolderPath_From_FullFilePath( ftpFullFilePaths['ftpPathTo_tif']) ftp_PathTo_TFW = IMERG_DataClass._get_FTP_FolderPath_From_FullFilePath( ftpFullFilePaths['ftpPathTo_tif']) # Download the Tif try: with open(local_FullFilePath_ToSave_Tif, "wb") as f: ftp_Connection.retrbinary( "RETR " + ftp_PathTo_TIF, f.write) # "RETR %s" % ftp_PathTo_TIF except: errorStr = "-ERROR Downloading TIF file: " + ftp_PathTo_TIF print(errorStr) errorLog.append(errorStr) isError = True # Give the FTP Connection a short break (Server spam protection mitigation) time.sleep(1) # Download the Tfw try: with open(local_FullFilePath_ToSave_Tfw, "wb") as f: ftp_Connection.retrbinary("RETR " + ftp_PathTo_TFW, f.write) except: errorStr = "-ERROR Downloading TFW file: " + ftp_PathTo_TFW print(errorStr) errorLog.append(errorStr) isError = True # Give the FTP Connection a short break (Server spam protection mitigation) time.sleep(1) # Extract the Tif files try: inF = gzip.open(local_FullFilePath_ToSave_Tif, 'rb') outF = open(local_FullFilePath_ToExtract_Tif, 'wb') outF.write(inF.read()) inF.close() outF.close() except: errorStr = "-ERROR Extracting the TIF file: " + local_FullFilePath_ToSave_Tif print(errorStr) errorLog.append(errorStr) isError = True # Extract the Tfw file try: inF = gzip.open(local_FullFilePath_ToSave_Tfw, 'rb') outF = open(local_FullFilePath_ToExtract_Tfw, 'wb') outF.write(inF.read()) inF.close() outF.close() except: errorStr = "-ERROR Extracting the TFW file: " + local_FullFilePath_ToSave_Tfw print(errorStr) errorLog.append(errorStr) isError = True # Remove the temporary TIF.gz file (the .gz files) try: os.remove(local_FullFilePath_ToSave_Tif) except: errorStr = "-ERROR Removing the tif.gz file: " + local_FullFilePath_ToSave_Tif print(errorStr) errorLog.append(errorStr) isError = True # Remove the temporary TFW.gz file (the .gz files) try: os.remove(local_FullFilePath_ToSave_Tfw) except: errorStr = "-ERROR Removing the tfw.gz file: " + local_FullFilePath_ToSave_Tfw print(errorStr) errorLog.append(errorStr) isError = True if isError == True: # try and remove the file?? pass downloadCounter += 1 #print "STOPPED RIGHT HERE!!! SHOULD ALREADY HAVE THE FILES... NOW NEED TO BETTER CATCH THE ERRORS AND REPORT THEM DOWN HERE... THATS ABOUT IT REALLY FOR THE DOWNLOADER....." # Pretty much done with the downloader for now.. pass
def download_IMERG(startYYYYMMDD, endYYYYMMDD): # Parse Start and End date ranges from strings #startYear = startYYYYMMDD[0:4] #startMonth = startYYYYMMDD[4:6] #startDay = startYYYYMMDD[6:8] #endYear = endYYYYMMDD[0:4] #endMonth = endYYYYMMDD[4:6] #endDay = endYYYYMMDD[6:8] # Set the Datatype number current_DataTypeNumber = 26 # Hardcoded until there are more IMERG types in here.. # Instance of Imerg Data Classes IMERG_DataClass = IDC.IMERG_Data() # Convert to dates dateFormat = "%Y%m%d" start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat) end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat) # Build expected string list #expected_FTP_FilePaths_TIF = [] #expected_FTP_FilePaths_TFW = [] expected_FTP_FilePaths = [] # "ftpPathTo_tif" and "ftpPathTo_tfw # iterate through all dates delta = end_Date - start_Date for i in range(delta.days + 1): #print start_Date + datetime.timedelta(days=i) currentDate = start_Date + datetime.timedelta(days=i) print(currentDate) tifPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tif( currentDate.year, currentDate.month, currentDate.day) tfwPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tfw( currentDate.year, currentDate.month, currentDate.day) objToAdd = {"ftpPathTo_tif": tifPath, "ftpPathTo_tfw": tfwPath} if len(tifPath.strip()) > 0: expected_FTP_FilePaths.append(objToAdd) #expected_FTP_FilePaths_TIF.append(tifPath) #expected_FTP_FilePaths_TFW.append(tfwPath) # Folder Stuff # Create the destination folder if it does not exist dataDestinationFolder = params.dataTypes[current_DataTypeNumber][ 'inputDataLocation'] print("-Data Destination Folder (Downloading To) : " + str(dataDestinationFolder)) testFolderPath = os.path.dirname(dataDestinationFolder) if not os.path.exists(testFolderPath): os.makedirs(testFolderPath) print("-Created a new folder at path: " + str(testFolderPath)) # Connect to the FTP Server and download all of the files in the list. ftp_Connection = None print("-Downloading files... this may take a few minutes....") downloadCounter = 0 # Iterate through all of our expected file paths for ftpFullFilePaths in expected_FTP_FilePaths: isError = False errorLog = [] if (downloadCounter % 10 == 0): print("-Downloaded: " + str(downloadCounter) + " rasters so far..") # Get the file names filenameOnly_Tif = ftpFullFilePaths['ftpPathTo_tif'].split('/')[-1] filenameOnly_Tfw = filenameOnly_Tif[:-2] + "fw" # Remove part of the extension #ftpFullFilePaths['ftpPathTo_tfw'].split('/')[-1] # Make local filenames local_FullFilePath_ToSave_Tif = os.path.join(dataDestinationFolder, filenameOnly_Tif) local_FullFilePath_ToSave_Twf = os.path.join(dataDestinationFolder, filenameOnly_Tfw) # Get directoryPath and Filename for FTP Server ftp_PathTo_TIF = ftpFullFilePaths[ 'ftpPathTo_tif'] #IMERG_DataClass._get_FTP_FolderPath_From_FullFilePath(ftpFullFilePaths['ftpPathTo_tif']) ftp_PathTo_TWF = ftpFullFilePaths[ 'ftpPathTo_tif'][:-2] + "fw" # IMERG_DataClass._get_FTP_FolderPath_From_FullFilePath(ftpFullFilePaths['ftpPathTo_tfw']) # Download the Tif fx = open(local_FullFilePath_ToSave_Tif, "wb") fx.close() os.chmod(local_FullFilePath_ToSave_Tif, 0777) print "creating file: " + local_FullFilePath_ToSave_Tif print "download path: " + ftp_PathTo_TIF try: urllib.urlretrieve(ftp_PathTo_TIF, local_FullFilePath_ToSave_Tif) except Exception, e: os.remove(local_FullFilePath_ToSave_Tif) print "removing the tif file: " + str(e) # Download the Tfw fx = open(local_FullFilePath_ToSave_Twf, "wb") fx.close() os.chmod(local_FullFilePath_ToSave_Twf, 0777) try: urllib.urlretrieve(ftp_PathTo_TWF, local_FullFilePath_ToSave_Twf) except: os.remove(local_FullFilePath_ToSave_Twf) print "removing the twf file" if isError == True: # try and remove the file?? pass downloadCounter += 1