def pairLidarFiles(lidarFolder): '''For each pair of lidar files generate a double size point cloud. We can use these later since they do not have any gaps between adjacent files.''' logger = logging.getLogger(__name__) logger.info('Generating lidar pairs...') # Create the output folder pairFolder = os.path.join(lidarFolder, 'paired') os.system('mkdir -p ' + pairFolder) # All files in the folder lidarFiles = os.listdir(lidarFolder) # Get just the files we converted to csv format csvFiles = [] for f in lidarFiles: extension = os.path.splitext(f)[1] if extension == '.csv': csvFiles.append(f) csvFiles.sort() numCsvFiles = len(csvFiles) # Loop through all pairs of csv files in the folder for i in range(0, numCsvFiles - 2): thisFile = csvFiles[i] nextFile = csvFiles[i + 1] #date1, time1 = icebridge_common.parseTimeStamps(thisFile) date2, time2 = icebridge_common.parseTimeStamps(nextFile) # Record the name with the second file # - More useful because the time for the second file represents the middle of the file. outputName = 'LIDAR_PAIR_' + date2 + '_' + time2 + '.csv' # Handle paths path1 = os.path.join(lidarFolder, thisFile) path2 = os.path.join(lidarFolder, nextFile) outputPath = os.path.join(pairFolder, outputName) if os.path.exists(outputPath): continue # Concatenate the two files cmd1 = 'cat ' + path1 + ' > ' + outputPath cmd2 = 'tail -n +2 -q ' + path2 + ' >> ' + outputPath logger.info(cmd1) p = subprocess.Popen(cmd1, stdout=subprocess.PIPE, shell=True) out, err = p.communicate() logger.info(cmd2) p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True) out, err = p.communicate() if not os.path.exists(outputPath): raise Exception('Failed to generate merged LIDAR file: ' + outputPath)
def massRenameByGlob(self, startFrame, stopFrame, orthoFrameDict, globStr, logger): '''Axuxually function used below.''' files = glob.glob(globStr) for fileName in files: # This is rather fragile, try to ignore certain types of files if ('_sub' in fileName) or ('pct.tif' in fileName) or ('_hillshade_' in fileName): continue [prefix, dateString, timeString, frameString, suffix] = \ icebridge_common.parseParts(fileName) if frameString == "": logger.info("Could not parse frame and time stamps from: " + fileName) continue frame = int(frameString) if frame < startFrame or frame > stopFrame: continue if not frame in orthoFrameDict: logger.info("Missing ortho for frame: " + frame) continue [newDateString, newTimeString ] = icebridge_common.parseTimeStamps(orthoFrameDict[frame]) newFile = prefix + icebridge_common.formFilePrefix( newDateString, newTimeString, frame) + suffix if not os.path.exists(fileName): continue if fileName == newFile: continue if os.path.exists(newFile): logger.info("File exists: " + newFile + ", will wipe " + fileName) os.system("rm -f " + fileName) continue logger.info("Renaming: " + fileName + " to " + newFile) os.system("mv -f " + fileName + " " + newFile)
def massRenameByGlob(self, startFrame, stopFrame, orthoFrameDict, globStr, logger): '''Axuxually function used below.''' files = glob.glob(globStr) for fileName in files: # This is rather fragile, try to ignore certain types of files if ('_sub' in fileName) or ('pct.tif' in fileName) or ('_hillshade_' in fileName): continue [prefix, dateString, timeString, frameString, suffix] = \ icebridge_common.parseParts(fileName) if frameString == "": logger.info("Could not parse frame and time stamps from: " + fileName) continue frame = int(frameString) if frame < startFrame or frame > stopFrame: continue if not frame in orthoFrameDict: logger.info("Missing ortho for frame: " + frame) continue [newDateString, newTimeString] = icebridge_common.parseTimeStamps(orthoFrameDict[frame]) newFile = prefix + icebridge_common.formFilePrefix(newDateString, newTimeString, frame) + suffix if not os.path.exists(fileName): continue if fileName == newFile: continue if os.path.exists(newFile): logger.info("File exists: " + newFile + ", will wipe " + fileName) os.system("rm -f " + fileName) continue logger.info("Renaming: " + fileName + " to " + newFile) os.system("mv -f " + fileName + " " + newFile)
def fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, path, fileType): '''Retrieve the index file for a folder of data and create a parsed version of it that contains frame number / filename pairs.''' # Download the html file curlCmd = baseCurlCmd + ' ' + folderUrl + ' > ' + path logger.info(curlCmd) p = subprocess.Popen(curlCmd, shell=True) os.waitpid(p.pid, 0) # Find all the file names in the index file and # dump them to a new index file logger.info('Extracting file name list from index.html file...') with open(path, 'r') as f: indexText = f.read() # Must wipe this html file. We fetch it too often in different # contexts. If not wiped, the code fails to work in some # very rare but real situations. if os.path.exists(path): os.remove(path) # Extract just the file names fileList = [] # ensure initialization if fileType == 'jpeg': fileList = re.findall(">[0-9_]*.JPG", indexText, re.IGNORECASE) if fileType == 'ortho': fileList = re.findall(">DMS\w*.tif<", indexText, re.IGNORECASE) if fileType == 'fireball': # Fireball DEMs fileList = re.findall(">IODMS\w*DEM.tif", indexText, re.IGNORECASE) if fileType == 'lvis': fileList = re.findall(">ILVIS\w+.TXT", indexText, re.IGNORECASE) if fileType == 'atm1': fileList = re.findall(">ILATM1B[0-9_]*.ATM4\w+.qi", indexText, re.IGNORECASE) # >ILATM1B_20111018_145455.ATM4BT4.qi # or >ILATM1B_20091016_165112.atm4cT3.qi if fileType == 'atm2': # Match ILATM1B_20160713_195419.ATM5BT5.h5 fileList = re.findall(">ILATM1B[0-9_]*.ATM\w+.h5", indexText, re.IGNORECASE) # Get rid of '>' and '<' for fileIter in range(len(fileList)): fileList[fileIter] = fileList[fileIter].replace(">", "") fileList[fileIter] = fileList[fileIter].replace("<", "") # Some runs, eg, https://n5eil01u.ecs.nsidc.org/ICEBRIDGE/IODMS1B.001/2015.09.24 # have files for both GR and AN, with same frame number. Those need to be separated # by latitude. This is a problem only with orthoimages. badXmls = set() outputFolder = os.path.dirname(path) if separateByLat: allFilesToFetch = [] allUrlsToFetch = [] for filename in fileList: xmlFile = icebridge_common.xmlFile(filename) url = os.path.join(folderUrl, xmlFile) outputPath = os.path.join(outputFolder, xmlFile) allFilesToFetch.append(outputPath) allUrlsToFetch.append(url) dryRun = False icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, dryRun, outputFolder, allFilesToFetch, allUrlsToFetch, logger) # Mark the bad ones for xmlFile in allFilesToFetch: latitude = icebridge_common.parseLatitude(xmlFile) isGood = hasGoodLat(latitude, isSouth) if not isGood: badXmls.add(xmlFile) elif (fileType == 'ortho' or fileType == 'fireball'): # Sometimes there is a large gap in the timestamp. That means orthoimages # from previous day are spilling over. If dayVal is 0, we must ignore # the spillover images. If dayVal is 1, we must keep the spillover images # and igore the others. list1 = [] list2 = [] isBigGap = False prevStamp = -1 for filename in fileList: [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename) currStamp = float(imageTimeString) / 1000000.0 # hours if prevStamp < 0: list1.append(filename) prevStamp = currStamp continue # Note that once isBigGap becomes true, it stays true # even when the gap gets small again if currStamp - prevStamp >= 6: # six hour gap is a lot isBigGap = True if not isBigGap: list1.append(filename) else: list2.append(filename) prevStamp = currStamp # for next iteration if isBigGap: if dayVal == 0: fileList = list2[:] # current day else: fileList = list1[:] # spillover from prev day # For each entry that matched the regex, record: the frame number and the file name. frameDict = {} urlDict = {} badFiles = [] for filename in fileList: if len(badXmls) > 0: xmlFile = os.path.join(outputFolder, icebridge_common.xmlFile(filename)) if xmlFile in badXmls: continue frame = icebridge_common.getFrameNumberFromFilename(filename) if frame in frameDict.keys(): # The same frame must not occur twice. if fileType not in LIDAR_TYPES: logger.error("Error: Found two file names with same frame number: " + \ frameDict[frame] + " and " + filename) badFiles.append(filename) badFiles.append(frameDict[frame]) # note that folderUrl can vary among orthoimages, as sometimes # some of them are in a folder for the next day. frameDict[frame] = filename urlDict[frame] = folderUrl # Wipe them all, to be sorted later for badFile in badFiles: if os.path.exists(badFile): logger.info("Deleting: " + badFile) os.remove(badFile) xmlFile = icebridge_common.xmlFile(badFile) if os.path.exists(xmlFile): logger.info("Deleting: " + xmlFile) os.remove(xmlFile) if len(badFiles) > 0: raise Exception("Found files with same frame number") return (frameDict, urlDict)
def fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder): '''Create a list of all files that must be fetched unless done already.''' # For AN 20091112, etc, some of the ortho images are stored at the # beginning of the next day's flight. Need to sort this out, and # it is tricky. More comments within the code. fetchNextDay = True separateByLat = (options.type == 'ortho' and isInSeparateByLatTable(options.yyyymmdd)) if separateByLat: # Here we won't fetch the next day, we will just separate by latitude within # a given day fetchNextDay = False orthoOrFireball = ((options.type == 'ortho') or (options.type == 'fireball')) if fetchNextDay: # Normally we fetch for next day only for ortho or fireball. However, # for one single special flight, we do it for jpeg too, as then # the jpegs are also split. if orthoOrFireball or \ ((options.type == 'jpeg') and twoFlightsInOneDay(options.site, options.yyyymmdd)): fetchNextDay = True else: fetchNextDay = False # If we need to parse the next flight day as well, as expected in some runs, # we will fetch two html files, but create a single index out of them. dayVals = [0] if fetchNextDay: dayVals.append(1) indexPath = icebridge_common.htmlIndexFile(outputFolder) currIndexPath = indexPath parsedIndexPath = icebridge_common.csvIndexFile(outputFolder) if options.refetchIndex: os.system('rm -f ' + indexPath) os.system('rm -f ' + parsedIndexPath) if icebridge_common.fileNonEmpty(parsedIndexPath): logger.info('Already have the index file ' + parsedIndexPath + ', keeping it.') return parsedIndexPath frameDict = {} urlDict = {} # We need the list of jpeg frames. Sometimes when fetching ortho images, # and we have to fetch from the next day, don't fetch unless # in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': jpegFolder = icebridge_common.getJpegFolder( os.path.dirname(outputFolder)) jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder) (jpegFrameDict, jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath) orthoStamp = {} if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. orthoFolder = icebridge_common.getOrthoFolder( os.path.dirname(outputFolder)) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) for frame in sorted(orthoFrameDict.keys()): filename = orthoFrameDict[frame] [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename) orthoStamp[frame] = imageTimeString for dayVal in dayVals: if len(dayVals) > 1: currIndexPath = indexPath + '.day' + str(dayVal) if options.refetchIndex: os.system('rm -f ' + currIndexPath) # Find folderUrl which contains all of the files if options.type in LIDAR_TYPES: options.allFrames = True # For lidar, always get all the frames! # For lidar, the data can come from one of three sources. # Unfortunately sometimes there is more than one source, and then # we need to pick by latitude. folderUrls = [] lidar_types = [] for lidar in LIDAR_TYPES: folderUrl = getFolderUrl( options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, lidar) logger.info('Checking lidar URL: ' + folderUrl) if checkIfUrlExists(folderUrl): logger.info('Found match with lidar type: ' + lidar) folderUrls.append(folderUrl) lidar_types.append(lidar) if len(folderUrls) == 0: logger.info( 'WARNING: Could not find any lidar data for the given date!' ) elif len(folderUrls) == 1: # Unique solution folderUrl = folderUrls[0] options.type = lidar_types[0] elif len(folderUrls) >= 2: # Multiple solutions. Pick the good one by latitude. logger.info("Multiples URLs to search: " + " ".join(folderUrls)) count = -1 isGood = False for folderUrl in folderUrls: count += 1 (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, lidar_types[count]) for frame in sorted(localFrameDict.keys()): filename = localFrameDict[frame] xmlFile = icebridge_common.xmlFile(filename) url = os.path.join(folderUrl, xmlFile) # Download the file curlCmd = baseCurlCmd + ' ' + url + ' > ' + xmlFile logger.info(curlCmd) p = subprocess.Popen(curlCmd, shell=True) os.waitpid(p.pid, 0) latitude = icebridge_common.parseLatitude(xmlFile) if os.path.exists(xmlFile): os.remove(xmlFile) if hasGoodLat(latitude, isSouth): isGood = True options.type = lidar_types[count] logger.info("Good latitude " + str(latitude) + ", will use " + folderUrl + " of type " + lidar_types[count]) else: logger.info("Bad latitude " + str(latitude) + ", will not use " + folderUrl + " of type " + lidar_types[count]) # Stop at first file no matter what break if isGood: break if not isGood: if options.type in LIDAR_TYPES and options.ignoreMissingLidar: logger.info("No lidar. None of these URLs are good: " + " ".join(folderUrls)) else: raise Exception("None of these URLs are good: " + " ".join(folderUrls)) else: # Other cases are simpler folderUrl = getFolderUrl( options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, options.type) logger.info('Fetching from URL: ' + folderUrl) (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, options.type) # Append to the main index for frame in sorted(localFrameDict.keys()): if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. # Otherwise we may accidentally getting one from next day. [imageDateString, imageTimeString] = \ icebridge_common.parseTimeStamps(localFrameDict[frame]) if frame not in orthoStamp: #logger.info("Missing ortho for fireball: " + localFrameDict[frame]) continue if abs(int(imageTimeString) - int(orthoStamp[frame])) > 1000: # Apparently a tolerance is needed. Use 10 seconds, so the number 1000. #logger.info("Will not use fireball DEM whose timestamp differs from ortho.") #logger.info("Fireball is: " + localFrameDict[frame]) #logger.info("Ortho is: " + orthoFrameDict[frame]) continue # Fetch from next day, unless already have a value. And don't fetch # frames not in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': if not frame in jpegFrameDict.keys(): continue if frame in frameDict.keys(): continue frameDict[frame] = localFrameDict[frame] urlDict[frame] = localUrlDict[frame] # Write the combined index file icebridge_common.writeIndexFile(parsedIndexPath, frameDict, urlDict) return parsedIndexPath
def pushByType(run, options, logger, dataType): # Fetch the ortho index from NSIDC if missing outputFolder = run.getFolder() logger.info("Output folder is " + outputFolder) os.system("mkdir -p " + outputFolder) # Current directory. It is important to go from /u to the real dir which is /nobackup... unpackDir = os.path.realpath(os.getcwd()) logger.info("Unpack directory is " + unpackDir) orthoFolder = icebridge_common.getOrthoFolder(outputFolder) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) if not os.path.exists(orthoIndexPath): fetchIndices(options, logger) logger.info("Reading ortho index: " + orthoIndexPath) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) # Fetch unarchived folder if missing if dataType == 'DEM': unarchivedFolder = run.getAssemblyFolder() elif dataType == 'ORTHO': unarchivedFolder = run.getProcessFolder() else: raise Exception("Unknown data type: " + dataType) logger.info("Unarchived data folder is " + unarchivedFolder) # Especially for ortho, force-fetch each time, as there is no good way # of checking if we fetched well before. start_time() if not archive_functions.fetchProcessedByType(run, unpackDir, logger, dataType): return stop_time("fetching archived data by type: " + dataType, logger) # Make the output directory at NSIDC m = re.match("(\d\d\d\d)(\d\d)(\d\d)", options.yyyymmdd) if m: outDir = options.site + "_" + m.group(1) + "." + m.group( 2) + "." + m.group(3) else: raise Exception("Could not parse: " + options.yyyymmdd) # Keep the output directory locally here localDirPath = os.path.join(outputFolder, dataType, outDir) os.system("mkdir -p " + localDirPath) logger.info("Storing the renamed " + dataType + " files in " + localDirPath) logger.info("Directory name at NSIDC: " + outDir) # Read the DEMs and orthos, and copy them to outDir according to the final convention if dataType == 'DEM': dataFiles = icebridge_common.getTifs(unarchivedFolder, prependFolder=True) else: dataFiles = glob.glob( os.path.join(unarchivedFolder, 'batch_*', 'out-ortho.tif')) for dataFile in dataFiles: # Here we use the convention from archive_functions.py for DEMs and from how we store orthos. if dataType == 'DEM': m = re.match("^.*?" + unarchivedFolder + "/F_(\d+)_\d+_" + dataType + \ "\.tif$", dataFile) if not m: continue frameNumber = int(m.group(1)) else: m = re.match("^.*?" + unarchivedFolder + "/batch_(\d+)_\d+_\d+/" + \ "out-ortho.tif$", dataFile) if not m: continue frameNumber = int(m.group(1)) if frameNumber < options.startFrame or frameNumber > options.stopFrame: continue # For each data file, copy from the ortho its meta info if not frameNumber in orthoFrameDict.keys(): # Bugfix: Ortho fetching failed, try again fetchIndices(options, logger) logger.info("Re-reading ortho index: " + orthoIndexPath) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) if not frameNumber in orthoFrameDict.keys(): # This time there is nothing we can do raise Exception("Cannot find ortho for frame: " + str(frameNumber)) orthoFile = orthoFrameDict[frameNumber] [dateString, timeString] = icebridge_common.parseTimeStamps(orthoFile) # It is always possible that the ortho file date will be the next day # after the current flight date, if the flight goes after midnight. # So it is not unreasonable that options.yyyymmdd != dateString. if dataType == 'DEM': outFile = ('IODEM3_%s_%s_%05d_DEM.tif' % (dateString, timeString, frameNumber)) else: # TODO: Need to think more of the naming convention. outFile = ('IODEM3_%s_%s_%05d_ORTHO.tif' % (dateString, timeString, frameNumber)) cmd = "/bin/cp -fv " + dataFile + " " + os.path.join( localDirPath, outFile) logger.info(cmd) os.system(cmd) # Push the directory to NSIDC remoteDirPath = os.path.join( os.path.basename(os.path.dirname(localDirPath)), os.path.basename(localDirPath)) remoteDirPath = os.path.join('/incoming', 'Ames', remoteDirPath) logger.info("Storing at NSIDC in: " + remoteDirPath) cmd = 'lftp -e "mirror -P 20 -c -R -vvv --delete --delete-first ' + localDirPath + \ ' ' + remoteDirPath + ' -i \'\.(tif)$\'; bye\" -u ' + options.loginInfo logger.info(cmd) start_time() (output, err, status) = asp_system_utils.executeCommand(cmd, suppressOutput=True) #status = os.system(cmd) logger.info("LFTP output and error: " + output + ' ' + err) logger.info("LFTP status: " + str(status)) #if status != 0: # raise Exception("Problem pushing") stop_time("push to NSIDC", logger)
def pairLidarFiles(lidarFolder, skipValidate, logger): '''For each pair of lidar files generate a double size point cloud. We can use these later since they do not have any gaps between adjacent files.''' logger.info('Generating lidar pairs...') # Create the output folder pairedFolder = icebridge_common.getPairedLidarFolder(lidarFolder) os.system('mkdir -p ' + pairedFolder) convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder) if not os.path.exists(convLidarFile): raise Exception("Missing file: " + convLidarFile) (lidarDict, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile) lidarExt = '' for frame in lidarDict: lidarExt = icebridge_common.fileExtension(lidarDict[frame]) numLidarFiles = len(lidarDict.keys()) pairedDict = {} # Loop through all pairs of csv files in the folder badFiles = False lidarKeys = sorted(lidarDict.keys()) for i in range(len(lidarKeys) - 1): thisFile = lidarDict[lidarKeys[i]] nextFile = lidarDict[lidarKeys[i + 1]] date2, time2 = icebridge_common.parseTimeStamps(nextFile) # Record the name with the second file # - More useful because the time for the second file represents the middle of the file. outputName = icebridge_common.lidar_pair_prefix( ) + date2 + '_' + time2 + lidarExt pairedDict[lidarKeys[i]] = outputName # Handle paths path1 = os.path.join(lidarFolder, thisFile) path2 = os.path.join(lidarFolder, nextFile) outputPath = os.path.join(pairedFolder, outputName) if not os.path.exists(path1) or not os.path.exists(path2): logger.info("Cannot create " + outputPath + " as we are missing its inputs") # If the inputs are missing, but the output is there, most likely it is corrupt. # Wipe it. Hopefully a subsequent fetch and convert step will bring it back. if os.path.exists(outputPath): logger.info("Wiping: " + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if icebridge_common.isValidLidarCSV(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) continue # Concatenate the two files cmd1 = 'cat ' + path1 + ' > ' + outputPath cmd2 = 'tail -n +2 -q ' + path2 + ' >> ' + outputPath logger.info(cmd1) p = subprocess.Popen(cmd1, stdout=subprocess.PIPE, shell=True) out, err = p.communicate() logger.info(cmd2) p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True) out, err = p.communicate() if not icebridge_common.isValidLidarCSV(outputPath): logger.error('Failed to generate merged LIDAR file, will wipe: ' + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True pairedLidarFile = icebridge_common.getPairedIndexFile(pairedFolder) willWritePairedFile = False if not os.path.exists(pairedLidarFile): willWritePairedFile = True else: # Bugfix: Sometimes the written converted file has the wrong size, maybe # something got interrupted earlier. (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(pairedLidarFile) if lidarDictIn != pairedDict: willWritePairedFile = True if willWritePairedFile: logger.info("Writing: " + pairedLidarFile) icebridge_common.writeIndexFile(pairedLidarFile, pairedDict, {}) return (not badFiles)
def pushByType(run, options, logger, dataType): # Fetch the ortho index from NSIDC if missing outputFolder = run.getFolder() logger.info("Output folder is " + outputFolder) os.system("mkdir -p " + outputFolder) # Current directory. It is important to go from /u to the real dir which is /nobackup... unpackDir = os.path.realpath(os.getcwd()) logger.info("Unpack directory is " + unpackDir) orthoFolder = icebridge_common.getOrthoFolder(outputFolder) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) if not os.path.exists(orthoIndexPath): fetchIndices(options, logger) logger.info("Reading ortho index: " + orthoIndexPath) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) # Fetch unarchived folder if missing if dataType == 'DEM': unarchivedFolder = run.getAssemblyFolder() elif dataType == 'ORTHO': unarchivedFolder = run.getProcessFolder() else: raise Exception("Unknown data type: " + dataType) logger.info("Unarchived data folder is " + unarchivedFolder) # Especially for ortho, force-fetch each time, as there is no good way # of checking if we fetched well before. start_time() if not archive_functions.fetchProcessedByType(run, unpackDir, logger, dataType): return stop_time("fetching archived data by type: " + dataType, logger) # Make the output directory at NSIDC m = re.match("(\d\d\d\d)(\d\d)(\d\d)", options.yyyymmdd) if m: outDir = options.site + "_" + m.group(1) + "." + m.group(2) + "." + m.group(3) else: raise Exception("Could not parse: " + options.yyyymmdd) # Keep the output directory locally here localDirPath = os.path.join(outputFolder, dataType, outDir) os.system("mkdir -p " + localDirPath) logger.info("Storing the renamed " + dataType + " files in " + localDirPath) logger.info("Directory name at NSIDC: " + outDir) # Read the DEMs and orthos, and copy them to outDir according to the final convention if dataType == 'DEM': dataFiles = icebridge_common.getTifs(unarchivedFolder, prependFolder=True) else: dataFiles = glob.glob(os.path.join(unarchivedFolder, 'batch_*', 'out-ortho.tif')) for dataFile in dataFiles: # Here we use the convention from archive_functions.py for DEMs and from how we store orthos. if dataType == 'DEM': m = re.match("^.*?" + unarchivedFolder + "/F_(\d+)_\d+_" + dataType + \ "\.tif$", dataFile) if not m: continue frameNumber = int(m.group(1)) else: m = re.match("^.*?" + unarchivedFolder + "/batch_(\d+)_\d+_\d+/" + \ "out-ortho.tif$", dataFile) if not m: continue frameNumber = int(m.group(1)) if frameNumber < options.startFrame or frameNumber > options.stopFrame: continue # For each data file, copy from the ortho its meta info if not frameNumber in orthoFrameDict.keys(): # Bugfix: Ortho fetching failed, try again fetchIndices(options, logger) logger.info("Re-reading ortho index: " + orthoIndexPath) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) if not frameNumber in orthoFrameDict.keys(): # This time there is nothing we can do raise Exception("Cannot find ortho for frame: " + str(frameNumber)) orthoFile = orthoFrameDict[frameNumber] [dateString, timeString] = icebridge_common.parseTimeStamps(orthoFile) # It is always possible that the ortho file date will be the next day # after the current flight date, if the flight goes after midnight. # So it is not unreasonable that options.yyyymmdd != dateString. if dataType == 'DEM': outFile = ('IODEM3_%s_%s_%05d_DEM.tif' % (dateString, timeString, frameNumber)) else: # TODO: Need to think more of the naming convention. outFile = ('IODEM3_%s_%s_%05d_ORTHO.tif' % (dateString, timeString, frameNumber)) cmd = "/bin/cp -fv " + dataFile + " " + os.path.join(localDirPath, outFile) logger.info(cmd) os.system(cmd) # Push the directory to NSIDC remoteDirPath = os.path.join(os.path.basename(os.path.dirname(localDirPath)), os.path.basename(localDirPath)) remoteDirPath = os.path.join('/incoming', 'Ames', remoteDirPath) logger.info("Storing at NSIDC in: " + remoteDirPath) cmd = 'lftp -e "mirror -P 20 -c -R -vvv --delete --delete-first ' + localDirPath + \ ' ' + remoteDirPath + ' -i \'\.(tif)$\'; bye\" -u ' + options.loginInfo logger.info(cmd) start_time() (output, err, status) = asp_system_utils.executeCommand(cmd, suppressOutput = True) #status = os.system(cmd) logger.info("LFTP output and error: " + output + ' ' + err) logger.info("LFTP status: " + str(status)) #if status != 0: # raise Exception("Problem pushing") stop_time("push to NSIDC", logger)
def fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder): '''Create a list of all files that must be fetched unless done already.''' # For AN 20091112, etc, some of the ortho images are stored at the # beginning of the next day's flight. Need to sort this out, and # it is tricky. More comments within the code. fetchNextDay = True separateByLat = (options.type == 'ortho' and isInSeparateByLatTable(options.yyyymmdd)) if separateByLat: # Here we won't fetch the next day, we will just separate by latitude within # a given day fetchNextDay = False orthoOrFireball = ( (options.type == 'ortho') or (options.type == 'fireball') ) if fetchNextDay: # Normally we fetch for next day only for ortho or fireball. However, # for one single special flight, we do it for jpeg too, as then # the jpegs are also split. if orthoOrFireball or \ ((options.type == 'jpeg') and twoFlightsInOneDay(options.site, options.yyyymmdd)): fetchNextDay = True else: fetchNextDay = False # If we need to parse the next flight day as well, as expected in some runs, # we will fetch two html files, but create a single index out of them. dayVals = [0] if fetchNextDay: dayVals.append(1) indexPath = icebridge_common.htmlIndexFile(outputFolder) currIndexPath = indexPath parsedIndexPath = icebridge_common.csvIndexFile(outputFolder) if options.refetchIndex: os.system('rm -f ' + indexPath) os.system('rm -f ' + parsedIndexPath) if icebridge_common.fileNonEmpty(parsedIndexPath): logger.info('Already have the index file ' + parsedIndexPath + ', keeping it.') return parsedIndexPath frameDict = {} urlDict = {} # We need the list of jpeg frames. Sometimes when fetching ortho images, # and we have to fetch from the next day, don't fetch unless # in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': jpegFolder = icebridge_common.getJpegFolder(os.path.dirname(outputFolder)) jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder) (jpegFrameDict, jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath) orthoStamp = {} if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. orthoFolder = icebridge_common.getOrthoFolder(os.path.dirname(outputFolder)) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) for frame in sorted(orthoFrameDict.keys()): filename = orthoFrameDict[frame] [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename) orthoStamp[frame] = imageTimeString for dayVal in dayVals: if len(dayVals) > 1: currIndexPath = indexPath + '.day' + str(dayVal) if options.refetchIndex: os.system('rm -f ' + currIndexPath) # Find folderUrl which contains all of the files if options.type in LIDAR_TYPES: options.allFrames = True # For lidar, always get all the frames! # For lidar, the data can come from one of three sources. # Unfortunately sometimes there is more than one source, and then # we need to pick by latitude. folderUrls = [] lidar_types = [] for lidar in LIDAR_TYPES: folderUrl = getFolderUrl(options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, lidar) logger.info('Checking lidar URL: ' + folderUrl) if checkIfUrlExists(folderUrl, baseCurlCmd): logger.info('Found match with lidar type: ' + lidar) folderUrls.append(folderUrl) lidar_types.append(lidar) if len(folderUrls) == 0: logger.info('WARNING: Could not find any lidar data for the given date!') elif len(folderUrls) == 1: # Unique solution folderUrl = folderUrls[0] options.type = lidar_types[0] elif len(folderUrls) >= 2: # Multiple solutions. Pick the good one by latitude. logger.info("Multiples URLs to search: " + " ".join(folderUrls)) count = -1 isGood = False for folderUrl in folderUrls: count += 1 (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, lidar_types[count]) for frame in sorted(localFrameDict.keys()): filename = localFrameDict[frame] xmlFile = icebridge_common.xmlFile(filename) url = os.path.join(folderUrl, xmlFile) # Download the file curlCmd = baseCurlCmd + ' ' + url + ' > ' + xmlFile logger.info(curlCmd) p = subprocess.Popen(curlCmd, shell=True, universal_newlines=True) os.waitpid(p.pid, 0) latitude = icebridge_common.parseLatitude(xmlFile) if os.path.exists(xmlFile): os.remove(xmlFile) if hasGoodLat(latitude, isSouth): isGood = True options.type = lidar_types[count] logger.info("Good latitude " + str(latitude) + ", will use " + folderUrl + " of type " + lidar_types[count]) else: logger.info("Bad latitude " + str(latitude) + ", will not use " + folderUrl + " of type " + lidar_types[count]) # Stop at first file no matter what break if isGood: break if not isGood: if options.type in LIDAR_TYPES and options.ignoreMissingLidar: logger.info("No lidar. None of these URLs are good: " + " ".join(folderUrls)) else: raise Exception("None of these URLs are good: " + " ".join(folderUrls)) else: # Other cases are simpler folderUrl = getFolderUrl(options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, options.type) logger.info('Fetching from URL: ' + folderUrl) (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, options.type) # Append to the main index for frame in sorted(localFrameDict.keys()): if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. # Otherwise we may accidentally getting one from next day. [imageDateString, imageTimeString] = \ icebridge_common.parseTimeStamps(localFrameDict[frame]) if frame not in orthoStamp: #logger.info("Missing ortho for fireball: " + localFrameDict[frame]) continue if abs(int(imageTimeString) - int(orthoStamp[frame])) > 1000: # Apparently a tolerance is needed. Use 10 seconds, so the number 1000. #logger.info("Will not use fireball DEM whose timestamp differs from ortho.") #logger.info("Fireball is: " + localFrameDict[frame]) #logger.info("Ortho is: " + orthoFrameDict[frame]) continue # Fetch from next day, unless already have a value. And don't fetch # frames not in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': if not frame in jpegFrameDict.keys(): continue if frame in frameDict.keys(): continue frameDict[frame] = localFrameDict[frame] urlDict[frame] = localUrlDict[frame] # Write the combined index file icebridge_common.writeIndexFile(parsedIndexPath, frameDict, urlDict) return parsedIndexPath
def fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, path, fileType): '''Retrieve the index file for a folder of data and create a parsed version of it that contains frame number / filename pairs.''' # Download the html file curlCmd = baseCurlCmd + ' ' + folderUrl + ' > ' + path logger.info(curlCmd) p = subprocess.Popen(curlCmd, shell=True, universal_newlines=True) os.waitpid(p.pid, 0) # Find all the file names in the index file and # dump them to a new index file logger.info('Extracting file name list from index.html file...') with open(path, 'r') as f: indexText = f.read() # Must wipe this html file. We fetch it too often in different # contexts. If not wiped, the code fails to work in some # very rare but real situations. if os.path.exists(path): os.remove(path) # Extract just the file names fileList = [] # ensure initialization if fileType == 'jpeg': fileList = re.findall(">[0-9_]*.JPG", indexText, re.IGNORECASE) if fileType == 'ortho': fileList = re.findall(">DMS\w*.tif<", indexText, re.IGNORECASE) if fileType == 'fireball': # Fireball DEMs fileList = re.findall(">IODMS\w*DEM.tif", indexText, re.IGNORECASE) if fileType == 'lvis': fileList = re.findall(">ILVIS\w+.TXT", indexText, re.IGNORECASE) if fileType == 'atm1': fileList = re.findall(">ILATM1B[0-9_]*.ATM4\w+.qi", indexText, re.IGNORECASE) # >ILATM1B_20111018_145455.ATM4BT4.qi # or >ILATM1B_20091016_165112.atm4cT3.qi if fileType == 'atm2': # Match ILATM1B_20160713_195419.ATM5BT5.h5 fileList = re.findall(">ILATM1B[0-9_]*.ATM\w+.h5", indexText, re.IGNORECASE) # Get rid of '>' and '<' for fileIter in range(len(fileList)): fileList[fileIter] = fileList[fileIter].replace(">", "") fileList[fileIter] = fileList[fileIter].replace("<", "") # Some runs, eg, https://n5eil01u.ecs.nsidc.org/ICEBRIDGE/IODMS1B.001/2015.09.24 # have files for both GR and AN, with same frame number. Those need to be separated # by latitude. This is a problem only with orthoimages. badXmls = set() outputFolder = os.path.dirname(path) if separateByLat: allFilesToFetch = [] allUrlsToFetch = [] for filename in fileList: xmlFile = icebridge_common.xmlFile(filename) url = os.path.join(folderUrl, xmlFile) outputPath = os.path.join(outputFolder, xmlFile) allFilesToFetch.append(outputPath) allUrlsToFetch.append(url) dryRun = False icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, dryRun, outputFolder, allFilesToFetch, allUrlsToFetch, logger) # Mark the bad ones for xmlFile in allFilesToFetch: latitude = icebridge_common.parseLatitude(xmlFile) isGood = hasGoodLat(latitude, isSouth) if not isGood: badXmls.add(xmlFile) elif (fileType == 'ortho' or fileType == 'fireball'): # Sometimes there is a large gap in the timestamp. That means orthoimages # from previous day are spilling over. If dayVal is 0, we must ignore # the spillover images. If dayVal is 1, we must keep the spillover images # and igore the others. list1 = [] list2 = [] isBigGap = False prevStamp = -1 for filename in fileList: [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename) currStamp = float(imageTimeString)/1000000.0 # hours if prevStamp < 0: list1.append(filename) prevStamp = currStamp continue # Note that once isBigGap becomes true, it stays true # even when the gap gets small again if currStamp - prevStamp >= 6: # six hour gap is a lot isBigGap = True if not isBigGap: list1.append(filename) else: list2.append(filename) prevStamp = currStamp # for next iteration if isBigGap: if dayVal == 0: fileList = list2[:] # current day else: fileList = list1[:] # spillover from prev day # For each entry that matched the regex, record: the frame number and the file name. frameDict = {} urlDict = {} badFiles = [] for filename in fileList: if len(badXmls) > 0: xmlFile = os.path.join(outputFolder, icebridge_common.xmlFile(filename)) if xmlFile in badXmls: continue frame = icebridge_common.getFrameNumberFromFilename(filename) if frame in frameDict.keys(): # The same frame must not occur twice. if fileType not in LIDAR_TYPES: logger.error("Error: Found two file names with same frame number: " + \ frameDict[frame] + " and " + filename) badFiles.append(filename) badFiles.append(frameDict[frame]) # note that folderUrl can vary among orthoimages, as sometimes # some of them are in a folder for the next day. frameDict[frame] = filename urlDict[frame] = folderUrl # Wipe them all, to be sorted later for badFile in badFiles: if os.path.exists(badFile): logger.info("Deleting: " + badFile) os.remove(badFile) xmlFile = icebridge_common.xmlFile(badFile) if os.path.exists(xmlFile): logger.info("Deleting: " + xmlFile) os.remove(xmlFile) if len(badFiles) > 0: raise Exception("Found files with same frame number") return (frameDict, urlDict)
def pairLidarFiles(lidarFolder, skipValidate, logger): '''For each pair of lidar files generate a double size point cloud. We can use these later since they do not have any gaps between adjacent files.''' logger.info('Generating lidar pairs...') # Create the output folder pairedFolder = icebridge_common.getPairedLidarFolder(lidarFolder) os.system('mkdir -p ' + pairedFolder) convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder) if not os.path.exists(convLidarFile): raise Exception("Missing file: " + convLidarFile) (lidarDict, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile) lidarExt = '' for frame in lidarDict: lidarExt = icebridge_common.fileExtension(lidarDict[frame]) numLidarFiles = len(lidarDict.keys()) pairedDict = {} # Loop through all pairs of csv files in the folder badFiles = False lidarKeys = sorted(lidarDict.keys()) for i in range(len(lidarKeys)-1): thisFile = lidarDict[lidarKeys[i ]] nextFile = lidarDict[lidarKeys[i+1]] date2, time2 = icebridge_common.parseTimeStamps(nextFile) # Record the name with the second file # - More useful because the time for the second file represents the middle of the file. outputName = icebridge_common.lidar_pair_prefix() + date2 +'_'+ time2 + lidarExt pairedDict[lidarKeys[i]] = outputName # Handle paths path1 = os.path.join(lidarFolder, thisFile) path2 = os.path.join(lidarFolder, nextFile) outputPath = os.path.join(pairedFolder, outputName) if not os.path.exists(path1) or not os.path.exists(path2): logger.info("Cannot create " + outputPath + " as we are missing its inputs") # If the inputs are missing, but the output is there, most likely it is corrupt. # Wipe it. Hopefully a subsequent fetch and convert step will bring it back. if os.path.exists(outputPath): logger.info("Wiping: " + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if icebridge_common.isValidLidarCSV(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) continue # Concatenate the two files cmd1 = 'cat ' + path1 + ' > ' + outputPath cmd2 = 'tail -n +2 -q ' + path2 + ' >> ' + outputPath logger.info(cmd1) p = subprocess.Popen(cmd1, stdout=subprocess.PIPE, shell=True, universal_newlines=True) out, err = p.communicate() logger.info(cmd2) p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True, universal_newlines=True) out, err = p.communicate() if not icebridge_common.isValidLidarCSV(outputPath): logger.error('Failed to generate merged LIDAR file, will wipe: ' + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True pairedLidarFile = icebridge_common.getPairedIndexFile(pairedFolder) willWritePairedFile = False if not os.path.exists(pairedLidarFile): willWritePairedFile = True else: # Bugfix: Sometimes the written converted file has the wrong size, maybe # something got interrupted earlier. (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(pairedLidarFile) if lidarDictIn != pairedDict: willWritePairedFile = True if willWritePairedFile: logger.info("Writing: " + pairedLidarFile) icebridge_common.writeIndexFile(pairedLidarFile, pairedDict, {}) return (not badFiles)