def fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder): '''Create a list of all files that must be fetched unless done already.''' # For AN 20091112, etc, some of the ortho images are stored at the # beginning of the next day's flight. Need to sort this out, and # it is tricky. More comments within the code. fetchNextDay = True separateByLat = (options.type == 'ortho' and isInSeparateByLatTable(options.yyyymmdd)) if separateByLat: # Here we won't fetch the next day, we will just separate by latitude within # a given day fetchNextDay = False orthoOrFireball = ((options.type == 'ortho') or (options.type == 'fireball')) if fetchNextDay: # Normally we fetch for next day only for ortho or fireball. However, # for one single special flight, we do it for jpeg too, as then # the jpegs are also split. if orthoOrFireball or \ ((options.type == 'jpeg') and twoFlightsInOneDay(options.site, options.yyyymmdd)): fetchNextDay = True else: fetchNextDay = False # If we need to parse the next flight day as well, as expected in some runs, # we will fetch two html files, but create a single index out of them. dayVals = [0] if fetchNextDay: dayVals.append(1) indexPath = icebridge_common.htmlIndexFile(outputFolder) currIndexPath = indexPath parsedIndexPath = icebridge_common.csvIndexFile(outputFolder) if options.refetchIndex: os.system('rm -f ' + indexPath) os.system('rm -f ' + parsedIndexPath) if icebridge_common.fileNonEmpty(parsedIndexPath): logger.info('Already have the index file ' + parsedIndexPath + ', keeping it.') return parsedIndexPath frameDict = {} urlDict = {} # We need the list of jpeg frames. Sometimes when fetching ortho images, # and we have to fetch from the next day, don't fetch unless # in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': jpegFolder = icebridge_common.getJpegFolder( os.path.dirname(outputFolder)) jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder) (jpegFrameDict, jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath) orthoStamp = {} if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. orthoFolder = icebridge_common.getOrthoFolder( os.path.dirname(outputFolder)) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) for frame in sorted(orthoFrameDict.keys()): filename = orthoFrameDict[frame] [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename) orthoStamp[frame] = imageTimeString for dayVal in dayVals: if len(dayVals) > 1: currIndexPath = indexPath + '.day' + str(dayVal) if options.refetchIndex: os.system('rm -f ' + currIndexPath) # Find folderUrl which contains all of the files if options.type in LIDAR_TYPES: options.allFrames = True # For lidar, always get all the frames! # For lidar, the data can come from one of three sources. # Unfortunately sometimes there is more than one source, and then # we need to pick by latitude. folderUrls = [] lidar_types = [] for lidar in LIDAR_TYPES: folderUrl = getFolderUrl( options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, lidar) logger.info('Checking lidar URL: ' + folderUrl) if checkIfUrlExists(folderUrl): logger.info('Found match with lidar type: ' + lidar) folderUrls.append(folderUrl) lidar_types.append(lidar) if len(folderUrls) == 0: logger.info( 'WARNING: Could not find any lidar data for the given date!' ) elif len(folderUrls) == 1: # Unique solution folderUrl = folderUrls[0] options.type = lidar_types[0] elif len(folderUrls) >= 2: # Multiple solutions. Pick the good one by latitude. logger.info("Multiples URLs to search: " + " ".join(folderUrls)) count = -1 isGood = False for folderUrl in folderUrls: count += 1 (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, lidar_types[count]) for frame in sorted(localFrameDict.keys()): filename = localFrameDict[frame] xmlFile = icebridge_common.xmlFile(filename) url = os.path.join(folderUrl, xmlFile) # Download the file curlCmd = baseCurlCmd + ' ' + url + ' > ' + xmlFile logger.info(curlCmd) p = subprocess.Popen(curlCmd, shell=True) os.waitpid(p.pid, 0) latitude = icebridge_common.parseLatitude(xmlFile) if os.path.exists(xmlFile): os.remove(xmlFile) if hasGoodLat(latitude, isSouth): isGood = True options.type = lidar_types[count] logger.info("Good latitude " + str(latitude) + ", will use " + folderUrl + " of type " + lidar_types[count]) else: logger.info("Bad latitude " + str(latitude) + ", will not use " + folderUrl + " of type " + lidar_types[count]) # Stop at first file no matter what break if isGood: break if not isGood: if options.type in LIDAR_TYPES and options.ignoreMissingLidar: logger.info("No lidar. None of these URLs are good: " + " ".join(folderUrls)) else: raise Exception("None of these URLs are good: " + " ".join(folderUrls)) else: # Other cases are simpler folderUrl = getFolderUrl( options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, options.type) logger.info('Fetching from URL: ' + folderUrl) (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, options.type) # Append to the main index for frame in sorted(localFrameDict.keys()): if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. # Otherwise we may accidentally getting one from next day. [imageDateString, imageTimeString] = \ icebridge_common.parseTimeStamps(localFrameDict[frame]) if frame not in orthoStamp: #logger.info("Missing ortho for fireball: " + localFrameDict[frame]) continue if abs(int(imageTimeString) - int(orthoStamp[frame])) > 1000: # Apparently a tolerance is needed. Use 10 seconds, so the number 1000. #logger.info("Will not use fireball DEM whose timestamp differs from ortho.") #logger.info("Fireball is: " + localFrameDict[frame]) #logger.info("Ortho is: " + orthoFrameDict[frame]) continue # Fetch from next day, unless already have a value. And don't fetch # frames not in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': if not frame in jpegFrameDict.keys(): continue if frame in frameDict.keys(): continue frameDict[frame] = localFrameDict[frame] urlDict[frame] = localUrlDict[frame] # Write the combined index file icebridge_common.writeIndexFile(parsedIndexPath, frameDict, urlDict) return parsedIndexPath
def pairLidarFiles(lidarFolder, skipValidate, logger): '''For each pair of lidar files generate a double size point cloud. We can use these later since they do not have any gaps between adjacent files.''' logger.info('Generating lidar pairs...') # Create the output folder pairedFolder = icebridge_common.getPairedLidarFolder(lidarFolder) os.system('mkdir -p ' + pairedFolder) convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder) if not os.path.exists(convLidarFile): raise Exception("Missing file: " + convLidarFile) (lidarDict, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile) lidarExt = '' for frame in lidarDict: lidarExt = icebridge_common.fileExtension(lidarDict[frame]) numLidarFiles = len(lidarDict.keys()) pairedDict = {} # Loop through all pairs of csv files in the folder badFiles = False lidarKeys = sorted(lidarDict.keys()) for i in range(len(lidarKeys) - 1): thisFile = lidarDict[lidarKeys[i]] nextFile = lidarDict[lidarKeys[i + 1]] date2, time2 = icebridge_common.parseTimeStamps(nextFile) # Record the name with the second file # - More useful because the time for the second file represents the middle of the file. outputName = icebridge_common.lidar_pair_prefix( ) + date2 + '_' + time2 + lidarExt pairedDict[lidarKeys[i]] = outputName # Handle paths path1 = os.path.join(lidarFolder, thisFile) path2 = os.path.join(lidarFolder, nextFile) outputPath = os.path.join(pairedFolder, outputName) if not os.path.exists(path1) or not os.path.exists(path2): logger.info("Cannot create " + outputPath + " as we are missing its inputs") # If the inputs are missing, but the output is there, most likely it is corrupt. # Wipe it. Hopefully a subsequent fetch and convert step will bring it back. if os.path.exists(outputPath): logger.info("Wiping: " + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if icebridge_common.isValidLidarCSV(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) continue # Concatenate the two files cmd1 = 'cat ' + path1 + ' > ' + outputPath cmd2 = 'tail -n +2 -q ' + path2 + ' >> ' + outputPath logger.info(cmd1) p = subprocess.Popen(cmd1, stdout=subprocess.PIPE, shell=True) out, err = p.communicate() logger.info(cmd2) p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True) out, err = p.communicate() if not icebridge_common.isValidLidarCSV(outputPath): logger.error('Failed to generate merged LIDAR file, will wipe: ' + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True pairedLidarFile = icebridge_common.getPairedIndexFile(pairedFolder) willWritePairedFile = False if not os.path.exists(pairedLidarFile): willWritePairedFile = True else: # Bugfix: Sometimes the written converted file has the wrong size, maybe # something got interrupted earlier. (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(pairedLidarFile) if lidarDictIn != pairedDict: willWritePairedFile = True if willWritePairedFile: logger.info("Writing: " + pairedLidarFile) icebridge_common.writeIndexFile(pairedLidarFile, pairedDict, {}) return (not badFiles)
def convertLidarDataToCsv(lidarFolder, startFrame, stopFrame, skipValidate, logger): '''Make sure all lidar data is available in a readable text format. Returns false if any files failed to convert.''' logger.info('Converting LIDAR files...') lidarIndexPath = icebridge_common.csvIndexFile(lidarFolder) (frameDict, urlDict) = icebridge_common.readIndexFile(lidarIndexPath) if not skipValidate: validFilesList = icebridge_common.validFilesList( os.path.dirname(lidarFolder), startFrame, stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) convDict = {} # Loop through all files in the folder badFiles = False for frame in sorted(frameDict.keys()): f = frameDict[frame] extension = icebridge_common.fileExtension(f) # Only interested in a few file types if (extension != '.qi') and (extension != '.hdf5') and (extension != '.h5'): convDict[frame] = f # these are already in plain text continue convDict[frame] = os.path.splitext(f)[0] + '.csv' outputPath = os.path.join(lidarFolder, convDict[frame]) # Handle paths fullPath = os.path.join(lidarFolder, f) if not os.path.exists(fullPath): logger.info("Cannot convert missing file: " + fullPath) continue # If the input is invalid, wipe both it, its xml, and the output # Hopefully there will be a subsquent fetch step where it will get # refetched. if not icebridge_common.hasValidChkSum(fullPath, logger): logger.info("Will wipe invalid file: " + fullPath) xmlFile = icebridge_common.xmlFile(fullPath) os.system('rm -f ' + fullPath) # will not throw os.system('rm -f ' + xmlFile) # will not throw os.system('rm -f ' + outputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue if icebridge_common.isValidLidarCSV(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) continue # Call the conversion logger.info("Process " + fullPath) extract_icebridge_ATM_points.main([fullPath]) # Check the result if not icebridge_common.isValidLidarCSV(outputPath): logger.error('Failed to parse LIDAR file, will wipe: ' + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True else: if not skipValidate: validFilesSet.add(outputPath) # mark it as validated convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder) willWriteConvFile = False if not os.path.exists(convLidarFile): willWriteConvFile = True else: # Bugfix: Sometimes the written converted file has the wrong size, maybe # something got interrupted earlier. (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile) if lidarDictIn != convDict: willWriteConvFile = True if willWriteConvFile: logger.info("Writing: " + convLidarFile) icebridge_common.writeIndexFile(convLidarFile, convDict, {}) if not skipValidate: # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)
def fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder): '''Create a list of all files that must be fetched unless done already.''' # For AN 20091112, etc, some of the ortho images are stored at the # beginning of the next day's flight. Need to sort this out, and # it is tricky. More comments within the code. fetchNextDay = True separateByLat = (options.type == 'ortho' and isInSeparateByLatTable(options.yyyymmdd)) if separateByLat: # Here we won't fetch the next day, we will just separate by latitude within # a given day fetchNextDay = False orthoOrFireball = ( (options.type == 'ortho') or (options.type == 'fireball') ) if fetchNextDay: # Normally we fetch for next day only for ortho or fireball. However, # for one single special flight, we do it for jpeg too, as then # the jpegs are also split. if orthoOrFireball or \ ((options.type == 'jpeg') and twoFlightsInOneDay(options.site, options.yyyymmdd)): fetchNextDay = True else: fetchNextDay = False # If we need to parse the next flight day as well, as expected in some runs, # we will fetch two html files, but create a single index out of them. dayVals = [0] if fetchNextDay: dayVals.append(1) indexPath = icebridge_common.htmlIndexFile(outputFolder) currIndexPath = indexPath parsedIndexPath = icebridge_common.csvIndexFile(outputFolder) if options.refetchIndex: os.system('rm -f ' + indexPath) os.system('rm -f ' + parsedIndexPath) if icebridge_common.fileNonEmpty(parsedIndexPath): logger.info('Already have the index file ' + parsedIndexPath + ', keeping it.') return parsedIndexPath frameDict = {} urlDict = {} # We need the list of jpeg frames. Sometimes when fetching ortho images, # and we have to fetch from the next day, don't fetch unless # in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': jpegFolder = icebridge_common.getJpegFolder(os.path.dirname(outputFolder)) jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder) (jpegFrameDict, jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath) orthoStamp = {} if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. orthoFolder = icebridge_common.getOrthoFolder(os.path.dirname(outputFolder)) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) for frame in sorted(orthoFrameDict.keys()): filename = orthoFrameDict[frame] [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename) orthoStamp[frame] = imageTimeString for dayVal in dayVals: if len(dayVals) > 1: currIndexPath = indexPath + '.day' + str(dayVal) if options.refetchIndex: os.system('rm -f ' + currIndexPath) # Find folderUrl which contains all of the files if options.type in LIDAR_TYPES: options.allFrames = True # For lidar, always get all the frames! # For lidar, the data can come from one of three sources. # Unfortunately sometimes there is more than one source, and then # we need to pick by latitude. folderUrls = [] lidar_types = [] for lidar in LIDAR_TYPES: folderUrl = getFolderUrl(options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, lidar) logger.info('Checking lidar URL: ' + folderUrl) if checkIfUrlExists(folderUrl, baseCurlCmd): logger.info('Found match with lidar type: ' + lidar) folderUrls.append(folderUrl) lidar_types.append(lidar) if len(folderUrls) == 0: logger.info('WARNING: Could not find any lidar data for the given date!') elif len(folderUrls) == 1: # Unique solution folderUrl = folderUrls[0] options.type = lidar_types[0] elif len(folderUrls) >= 2: # Multiple solutions. Pick the good one by latitude. logger.info("Multiples URLs to search: " + " ".join(folderUrls)) count = -1 isGood = False for folderUrl in folderUrls: count += 1 (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, lidar_types[count]) for frame in sorted(localFrameDict.keys()): filename = localFrameDict[frame] xmlFile = icebridge_common.xmlFile(filename) url = os.path.join(folderUrl, xmlFile) # Download the file curlCmd = baseCurlCmd + ' ' + url + ' > ' + xmlFile logger.info(curlCmd) p = subprocess.Popen(curlCmd, shell=True, universal_newlines=True) os.waitpid(p.pid, 0) latitude = icebridge_common.parseLatitude(xmlFile) if os.path.exists(xmlFile): os.remove(xmlFile) if hasGoodLat(latitude, isSouth): isGood = True options.type = lidar_types[count] logger.info("Good latitude " + str(latitude) + ", will use " + folderUrl + " of type " + lidar_types[count]) else: logger.info("Bad latitude " + str(latitude) + ", will not use " + folderUrl + " of type " + lidar_types[count]) # Stop at first file no matter what break if isGood: break if not isGood: if options.type in LIDAR_TYPES and options.ignoreMissingLidar: logger.info("No lidar. None of these URLs are good: " + " ".join(folderUrls)) else: raise Exception("None of these URLs are good: " + " ".join(folderUrls)) else: # Other cases are simpler folderUrl = getFolderUrl(options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, options.type) logger.info('Fetching from URL: ' + folderUrl) (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, options.type) # Append to the main index for frame in sorted(localFrameDict.keys()): if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. # Otherwise we may accidentally getting one from next day. [imageDateString, imageTimeString] = \ icebridge_common.parseTimeStamps(localFrameDict[frame]) if frame not in orthoStamp: #logger.info("Missing ortho for fireball: " + localFrameDict[frame]) continue if abs(int(imageTimeString) - int(orthoStamp[frame])) > 1000: # Apparently a tolerance is needed. Use 10 seconds, so the number 1000. #logger.info("Will not use fireball DEM whose timestamp differs from ortho.") #logger.info("Fireball is: " + localFrameDict[frame]) #logger.info("Ortho is: " + orthoFrameDict[frame]) continue # Fetch from next day, unless already have a value. And don't fetch # frames not in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': if not frame in jpegFrameDict.keys(): continue if frame in frameDict.keys(): continue frameDict[frame] = localFrameDict[frame] urlDict[frame] = localUrlDict[frame] # Write the combined index file icebridge_common.writeIndexFile(parsedIndexPath, frameDict, urlDict) return parsedIndexPath
def pairLidarFiles(lidarFolder, skipValidate, logger): '''For each pair of lidar files generate a double size point cloud. We can use these later since they do not have any gaps between adjacent files.''' logger.info('Generating lidar pairs...') # Create the output folder pairedFolder = icebridge_common.getPairedLidarFolder(lidarFolder) os.system('mkdir -p ' + pairedFolder) convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder) if not os.path.exists(convLidarFile): raise Exception("Missing file: " + convLidarFile) (lidarDict, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile) lidarExt = '' for frame in lidarDict: lidarExt = icebridge_common.fileExtension(lidarDict[frame]) numLidarFiles = len(lidarDict.keys()) pairedDict = {} # Loop through all pairs of csv files in the folder badFiles = False lidarKeys = sorted(lidarDict.keys()) for i in range(len(lidarKeys)-1): thisFile = lidarDict[lidarKeys[i ]] nextFile = lidarDict[lidarKeys[i+1]] date2, time2 = icebridge_common.parseTimeStamps(nextFile) # Record the name with the second file # - More useful because the time for the second file represents the middle of the file. outputName = icebridge_common.lidar_pair_prefix() + date2 +'_'+ time2 + lidarExt pairedDict[lidarKeys[i]] = outputName # Handle paths path1 = os.path.join(lidarFolder, thisFile) path2 = os.path.join(lidarFolder, nextFile) outputPath = os.path.join(pairedFolder, outputName) if not os.path.exists(path1) or not os.path.exists(path2): logger.info("Cannot create " + outputPath + " as we are missing its inputs") # If the inputs are missing, but the output is there, most likely it is corrupt. # Wipe it. Hopefully a subsequent fetch and convert step will bring it back. if os.path.exists(outputPath): logger.info("Wiping: " + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if icebridge_common.isValidLidarCSV(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) continue # Concatenate the two files cmd1 = 'cat ' + path1 + ' > ' + outputPath cmd2 = 'tail -n +2 -q ' + path2 + ' >> ' + outputPath logger.info(cmd1) p = subprocess.Popen(cmd1, stdout=subprocess.PIPE, shell=True, universal_newlines=True) out, err = p.communicate() logger.info(cmd2) p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True, universal_newlines=True) out, err = p.communicate() if not icebridge_common.isValidLidarCSV(outputPath): logger.error('Failed to generate merged LIDAR file, will wipe: ' + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True pairedLidarFile = icebridge_common.getPairedIndexFile(pairedFolder) willWritePairedFile = False if not os.path.exists(pairedLidarFile): willWritePairedFile = True else: # Bugfix: Sometimes the written converted file has the wrong size, maybe # something got interrupted earlier. (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(pairedLidarFile) if lidarDictIn != pairedDict: willWritePairedFile = True if willWritePairedFile: logger.info("Writing: " + pairedLidarFile) icebridge_common.writeIndexFile(pairedLidarFile, pairedDict, {}) return (not badFiles)
def convertLidarDataToCsv(lidarFolder, startFrame, stopFrame, skipValidate, logger): '''Make sure all lidar data is available in a readable text format. Returns false if any files failed to convert.''' logger.info('Converting LIDAR files...') lidarIndexPath = icebridge_common.csvIndexFile(lidarFolder) (frameDict, urlDict) = icebridge_common.readIndexFile(lidarIndexPath) if not skipValidate: validFilesList = icebridge_common.validFilesList(os.path.dirname(lidarFolder), startFrame, stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) convDict = {} # Loop through all files in the folder badFiles = False for frame in sorted(frameDict.keys()): f = frameDict[frame] extension = icebridge_common.fileExtension(f) # Only interested in a few file types if (extension != '.qi') and (extension != '.hdf5') and (extension != '.h5'): convDict[frame] = f # these are already in plain text continue convDict[frame] = os.path.splitext(f)[0] + '.csv' outputPath = os.path.join(lidarFolder, convDict[frame]) # Handle paths fullPath = os.path.join(lidarFolder, f) if not os.path.exists(fullPath): logger.info("Cannot convert missing file: " + fullPath) continue # If the input is invalid, wipe both it, its xml, and the output # Hopefully there will be a subsquent fetch step where it will get # refetched. if not icebridge_common.hasValidChkSum(fullPath, logger): logger.info("Will wipe invalid file: " + fullPath) xmlFile = icebridge_common.xmlFile(fullPath) os.system('rm -f ' + fullPath) # will not throw os.system('rm -f ' + xmlFile) # will not throw os.system('rm -f ' + outputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue if icebridge_common.isValidLidarCSV(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) continue # Call the conversion logger.info("Process " + fullPath) extract_icebridge_ATM_points.main([fullPath]) # Check the result if not icebridge_common.isValidLidarCSV(outputPath): logger.error('Failed to parse LIDAR file, will wipe: ' + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True else: if not skipValidate: validFilesSet.add(outputPath) # mark it as validated convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder) willWriteConvFile = False if not os.path.exists(convLidarFile): willWriteConvFile = True else: # Bugfix: Sometimes the written converted file has the wrong size, maybe # something got interrupted earlier. (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile) if lidarDictIn != convDict: willWriteConvFile = True if willWriteConvFile: logger.info("Writing: " + convLidarFile) icebridge_common.writeIndexFile(convLidarFile, convDict, {}) if not skipValidate: # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)