def validateOrthosAndFireball(options, fileType, logger):
    '''Validate ortho and fireball files within the current frame range. This
    is expected to be in called in parallel for smaller chunks. Lidar files
    will be validated serially. Jpegs get validated when converted to tif.
    Return True if all is good.'''

    badFiles = False
    logger.info("Validating files of type: " + fileType)
    
    if fileType   == 'ortho':
        dataFolder = icebridge_common.getOrthoFolder(options.outputFolder)
    elif fileType == 'fireball':
        dataFolder = icebridge_common.getFireballFolder(options.outputFolder)
    else:
        raise Exception("Unknown file type: " + fileType)

    indexPath = icebridge_common.csvIndexFile(dataFolder)
    if not os.path.exists(indexPath):
        # The issue of what to do when the index does not exist should
        # have been settled by now.
        return (not badFiles)

    # Fetch from disk the set of already validated files, if any
    validFilesList = icebridge_common.validFilesList(options.outputFolder,
                                                     options.startFrame, options.stopFrame)
    validFilesSet = set()
    validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
    numInitialValidFiles = len(validFilesSet)
    
    (frameDict, urlDict) = icebridge_common.readIndexFile(indexPath, prependFolder = True)
    for frame in frameDict.keys():

        if frame < options.startFrame or frame > options.stopFrame:
            continue

        outputPath = frameDict[frame]
        xmlFile = icebridge_common.xmlFile(outputPath)

        if outputPath in validFilesSet and os.path.exists(outputPath) and \
            xmlFile in validFilesSet and os.path.exists(xmlFile):
            #logger.info('Previously validated: ' + outputPath + ' ' + xmlFile)
            continue
        else:
            isGood = icebridge_common.hasValidChkSum(outputPath, logger)
            if not isGood:
                logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile)
                os.system('rm -f ' + outputPath) # will not throw
                os.system('rm -f ' + xmlFile) # will not throw
                badFiles = True
            else:
                logger.info('Valid file: ' + outputPath)
                validFilesSet.add(outputPath)
                validFilesSet.add(xmlFile)
            
        if fileType != 'fireball':
            continue

        # Also validate tfw
        tfwFile = icebridge_common.tfwFile(outputPath)
        xmlFile = icebridge_common.xmlFile(tfwFile)
        if tfwFile in validFilesSet and os.path.exists(tfwFile) and \
            xmlFile in validFilesSet and os.path.exists(xmlFile):
            #logger.info('Previously validated: ' + tfwFile + ' ' + xmlFile)
            continue
        else:
            isGood = icebridge_common.isValidTfw(tfwFile, logger)
            if not isGood:
                logger.info('Found invalid tfw. Will wipe: ' + tfwFile + ' ' + xmlFile)
                os.system('rm -f ' + tfwFile) # will not throw
                os.system('rm -f ' + xmlFile) # will not throw
                badFiles = True
            else:
                logger.info('Valid tfw file: ' + tfwFile)
                validFilesSet.add(tfwFile)
                validFilesSet.add(xmlFile)
        
    # Write to disk the list of validated files, but only if new
    # validations happened.  First re-read that list, in case a
    # different process modified it in the meantime, such as if two
    # managers are running at the same time.
    numFinalValidFiles = len(validFilesSet)
    if numInitialValidFiles != numFinalValidFiles:
        validFilesSet = \
                      icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
        icebridge_common.writeValidFilesList(validFilesList, validFilesSet)

    return (not badFiles)
Пример #2
0
def validateOrthosAndFireball(options, fileType, logger):
    '''Validate ortho and fireball files within the current frame range. This
    is expected to be in called in parallel for smaller chunks. Lidar files
    will be validated serially. Jpegs get validated when converted to tif.
    Return True if all is good.'''

    badFiles = False
    logger.info("Validating files of type: " + fileType)

    if fileType == 'ortho':
        dataFolder = icebridge_common.getOrthoFolder(options.outputFolder)
    elif fileType == 'fireball':
        dataFolder = icebridge_common.getFireballFolder(options.outputFolder)
    else:
        raise Exception("Unknown file type: " + fileType)

    indexPath = icebridge_common.csvIndexFile(dataFolder)
    if not os.path.exists(indexPath):
        # The issue of what to do when the index does not exist should
        # have been settled by now.
        return (not badFiles)

    # Fetch from disk the set of already validated files, if any
    validFilesList = icebridge_common.validFilesList(options.outputFolder,
                                                     options.startFrame,
                                                     options.stopFrame)
    validFilesSet = set()
    validFilesSet = icebridge_common.updateValidFilesListFromDisk(
        validFilesList, validFilesSet)
    numInitialValidFiles = len(validFilesSet)

    (frameDict, urlDict) = icebridge_common.readIndexFile(indexPath,
                                                          prependFolder=True)
    for frame in frameDict.keys():

        if frame < options.startFrame or frame > options.stopFrame:
            continue

        outputPath = frameDict[frame]
        xmlFile = icebridge_common.xmlFile(outputPath)

        if outputPath in validFilesSet and os.path.exists(outputPath) and \
            xmlFile in validFilesSet and os.path.exists(xmlFile):
            #logger.info('Previously validated: ' + outputPath + ' ' + xmlFile)
            continue
        else:
            isGood = icebridge_common.hasValidChkSum(outputPath, logger)
            if not isGood:
                logger.info('Found invalid data. Will wipe: ' + outputPath +
                            ' ' + xmlFile)
                os.system('rm -f ' + outputPath)  # will not throw
                os.system('rm -f ' + xmlFile)  # will not throw
                badFiles = True
            else:
                logger.info('Valid file: ' + outputPath)
                validFilesSet.add(outputPath)
                validFilesSet.add(xmlFile)

        if fileType != 'fireball':
            continue

        # Also validate tfw
        tfwFile = icebridge_common.tfwFile(outputPath)
        xmlFile = icebridge_common.xmlFile(tfwFile)
        if tfwFile in validFilesSet and os.path.exists(tfwFile) and \
            xmlFile in validFilesSet and os.path.exists(xmlFile):
            #logger.info('Previously validated: ' + tfwFile + ' ' + xmlFile)
            continue
        else:
            isGood = icebridge_common.isValidTfw(tfwFile, logger)
            if not isGood:
                logger.info('Found invalid tfw. Will wipe: ' + tfwFile + ' ' +
                            xmlFile)
                os.system('rm -f ' + tfwFile)  # will not throw
                os.system('rm -f ' + xmlFile)  # will not throw
                badFiles = True
            else:
                logger.info('Valid tfw file: ' + tfwFile)
                validFilesSet.add(tfwFile)
                validFilesSet.add(xmlFile)

    # Write to disk the list of validated files, but only if new
    # validations happened.  First re-read that list, in case a
    # different process modified it in the meantime, such as if two
    # managers are running at the same time.
    numFinalValidFiles = len(validFilesSet)
    if numInitialValidFiles != numFinalValidFiles:
        validFilesSet = \
                      icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
        icebridge_common.writeValidFilesList(validFilesList, validFilesSet)

    return (not badFiles)
def doFetch(options, outputFolder):
    '''The main fetch function.
       Returns the number of failures.'''
    
    # Verify that required files exist
    home = os.path.expanduser("~")
    if not (os.path.exists(home+'/.netrc') and os.path.exists(home+'/.urs_cookies')):
        logger.error('Missing a required authentication file!  See instructions here:\n' +
                     '    https://nsidc.org/support/faq/what-options-are-available-bulk-' +
                     'downloading-data-https-earthdata-login-enabled')
        return -1
    
    curlPath = asp_system_utils.which("curl")
    curlOpts    = ' -n -L '
    cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies '
    baseCurlCmd = curlPath + curlOpts + cookiePaths

    logger.info('Creating output folder: ' + outputFolder)
    os.system('mkdir -p ' + outputFolder)  

    isSouth = (options.site == 'AN')
    
    if options.type == 'nav': # Nav fetching is much less complicated
        return fetchNavData(options, outputFolder)
    
    parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder)
    if not icebridge_common.fileNonEmpty(parsedIndexPath):
        # Some dirs are weird, both images, fireball dems, and ortho.
        # Just accept whatever there is, but with a warning.
        logger.info('Warning: Missing index file: ' + parsedIndexPath)

    # Store file information in a dictionary
    # - Keep track of the earliest and latest frame
    logger.info('Reading file list from ' + parsedIndexPath)
    try:
        (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath)
    except:
        # We probably ran into old format index file. Must refetch.
        logger.info('Could not read index file. Try again.')
        options.refetchIndex = True
        parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder)
        (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath)

    if options.stopAfterIndexFetch:
        return 0
    
    isLidar = (options.type in LIDAR_TYPES)

    allFrames  = sorted(frameDict.keys())
    
    if not isLidar:
        # The lidar frames use a totally different numbering than the image/ortho/dem frames
        firstFrame = icebridge_common.getLargestFrame()    # start big
        lastFrame  = icebridge_common.getSmallestFrame()   # start small
        for frameNumber in allFrames:
            if frameNumber < firstFrame:
                firstFrame = frameNumber
            if frameNumber > lastFrame:
                lastFrame = frameNumber

        if options.allFrames:
            options.startFrame = firstFrame
            options.stopFrame  = lastFrame

    if isLidar:
        # Based on image frames, determine which lidar frames to fetch.
        if options.ignoreMissingLidar and len(frameDict.keys()) == 0:
            # Nothing we can do if this run has no lidar and we are told to continue
            logger.info("Warning: missing lidar, but continuing.")
            lidarsToFetch = set()
        else:
            lidarsToFetch = lidarFilesInRange(frameDict, outputFolder,
                                              options.startFrame, options.stopFrame)
        
    # There is always a chance that not all requested frames are available.
    # That is particularly true for Fireball DEMs. Instead of failing,
    # just download what is present and give a warning. 
    if options.startFrame not in frameDict and not isLidar:
        logger.info("Warning: Frame " + str(options.startFrame) +
                    " is not found in this flight.")
                    
    if options.stopFrame and (options.stopFrame not in frameDict) and not isLidar:
        logger.info("Warning: Frame " + str(options.stopFrame) +
                    " is not found in this flight.")

    allFilesToFetch = [] # Files that we will fetch, relative to the current dir. 
    allUrlsToFetch  = [] # Full url of each file.
    
    # Loop through all found frames within the provided range
    currentFileCount = 0
    lastFrame = ""
    if len(allFrames) > 0:
        lastFrame = allFrames[len(allFrames)-1]

    hasTfw = (options.type == 'fireball')
    hasXml = ( isLidar or (options.type == 'ortho') or hasTfw )
    numFetched = 0
    skipCount  = 0
    for frame in allFrames:

        # Skip frame outside of range
        if isLidar:
            if frameDict[frame] not in lidarsToFetch:
                continue
        else:       
            if ((frame < options.startFrame) or (frame > options.stopFrame) ):
                continue
                
        # Handle the frame skip option
        if options.frameSkip > 0: 
            if skipCount < options.frameSkip:
                skipCount += 1
                continue
            skipCount = 0

        filename = frameDict[frame]
        
        # Some files have an associated xml file. Fireball DEMs also have a tfw file.
        currFilesToFetch = [filename]
        if hasXml: 
            currFilesToFetch.append(icebridge_common.xmlFile(filename))
        if hasTfw: 
            currFilesToFetch.append(icebridge_common.tfwFile(filename))

        for filename in currFilesToFetch:    
            url        = os.path.join(urlDict[frame], filename)
            outputPath = os.path.join(outputFolder, filename)
            allFilesToFetch.append(outputPath)
            allUrlsToFetch.append(url)

    # Restrict lidar fetch amount according to the parameter
    if (isLidar and options.maxNumLidarToFetch > 0 and 
           len(allFilesToFetch) > options.maxNumLidarToFetch):

        # Ensure an even number, to fetch both the lidar file and its xml
        if options.maxNumLidarToFetch % 2 == 1:
            options.maxNumLidarToFetch += 1
        
        allFilesToFetch = allFilesToFetch[0:options.maxNumLidarToFetch]
        allUrlsToFetch  = allUrlsToFetch [0:options.maxNumLidarToFetch]
                
    icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, options.dryRun,
                                         outputFolder,
                                         allFilesToFetch, allUrlsToFetch, logger)

    # Fetch from disk the set of already validated files, if any
    validFilesList = icebridge_common.validFilesList(os.path.dirname(outputFolder),
                                                     options.startFrame, options.stopFrame)
    validFilesSet = set()
    validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
    numInitialValidFiles = len(validFilesSet)
    
    # Verify that all files were fetched and are in good shape
    failedFiles = []
    for outputPath in allFilesToFetch:

        if options.skipValidate:
            continue
        
        if not icebridge_common.fileNonEmpty(outputPath):
            logger.info('Missing file: ' + outputPath)
            failedFiles.append(outputPath)
            continue

        if icebridge_common.hasImageExtension(outputPath):
            if False:
                # This check is just so slow. Turn it off for now.
                # This will impact only the validation of jpegs,
                # as the other files can be validated via the checksum.
                # Jpegs will be validated when converting them to 1 band images
                if outputPath in validFilesSet and os.path.exists(outputPath):
                    #logger.info('Previously validated: ' + outputPath)   # verbose
                    continue
                else:
                    if not icebridge_common.isValidImage(outputPath):
                        logger.info('Found an invalid image. Will wipe it: ' + outputPath)
                        if os.path.exists(outputPath): os.remove(outputPath)
                        failedFiles.append(outputPath)
                        continue
                    else:
                        logger.info('Valid image: ' + outputPath)
                        validFilesSet.add(outputPath) # mark it as validated

        # Sanity check: XML files must have the right latitude.
        if icebridge_common.fileExtension(outputPath) == '.xml':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) #verbose
                continue
            else:
                if os.path.exists(outputPath):
                    try:
                        latitude = icebridge_common.parseLatitude(outputPath)
                        logger.info('Valid file: ' + outputPath)
                        validFilesSet.add(outputPath) # mark it as validated
                    except:
                        # Corrupted file
                        logger.info("Failed to parse latitude, will wipe: " + outputPath)
                        if os.path.exists(outputPath): os.remove(outputPath)
                        failedFiles.append(outputPath)

                    # On a second thought, don't wipe files with wrong latitude, as
                    # next time we run fetch we will have to fetch them again.
                    # Hopefully they will be ignored.
                    #isGood = hasGoodLat(latitude, isSouth)
                    #if not isGood:
                    #    logger.info("Wiping XML file " + outputPath + " with bad latitude " + \
                    #                str(latitude))
                    #    os.remove(outputPath)
                    #    imageFile = icebridge_common.xmlToImage(outputPath)
                    #    if os.path.exists(imageFile):
                    #        logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \
                    #                    str(latitude))
                    #        os.remove(imageFile)
                    
        # Verify the chcksum    
        if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \
               and outputPath[-4:] != '.tfw':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) # verbose
                continue
            else:
                isGood = icebridge_common.hasValidChkSum(outputPath, logger)
                if not isGood:
                    xmlFile = icebridge_common.xmlFile(outputPath)
                    logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile)
                    if os.path.exists(outputPath): os.remove(outputPath)
                    if os.path.exists(xmlFile):    os.remove(xmlFile)
                    failedFiles.append(outputPath)
                    failedFiles.append(xmlFile)
                    continue
                else:
                    logger.info('Valid file: ' + outputPath)
                    validFilesSet.add(outputPath)

        if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath)
                continue
            else:
                isGood = icebridge_common.isValidTfw(outputPath, logger)
                if not isGood:
                    xmlFile = icebridge_common.xmlFile(outputPath)
                    logger.info('Found invalid tfw. Will wipe: ' + outputPath + ' ' + xmlFile)
                    if os.path.exists(outputPath): os.remove(outputPath)
                    if os.path.exists(xmlFile):    os.remove(xmlFile)
                    failedFiles.append(outputPath)
                    failedFiles.append(xmlFile)
                    continue
                else:
                    logger.info('Valid tfw file: ' + outputPath)
                    validFilesSet.add(outputPath)

    # Write to disk the list of validated files, but only if new
    # validations happened.  First re-read that list, in case a
    # different process modified it in the meantime, such as if two
    # managers are running at the same time.
    numFinalValidFiles = len(validFilesSet)
    if numInitialValidFiles != numFinalValidFiles:
        validFilesSet = \
                      icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
        icebridge_common.writeValidFilesList(validFilesList, validFilesSet)

    numFailed = len(failedFiles)
    if numFailed > 0:
        logger.info("Number of files that could not be processed: " + str(numFailed))
        
    return numFailed
Пример #4
0
def doFetch(options, outputFolder):
    '''The main fetch function.
       Returns the number of failures.'''

    # Verify that required files exist
    home = os.path.expanduser("~")
    if not (os.path.exists(home + '/.netrc')
            and os.path.exists(home + '/.urs_cookies')):
        logger.error(
            'Missing a required authentication file!  See instructions here:\n'
            +
            '    https://nsidc.org/support/faq/what-options-are-available-bulk-'
            + 'downloading-data-https-earthdata-login-enabled')
        return -1

    curlPath = asp_system_utils.which("curl")
    curlOpts = ' -n -L '
    cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies '
    baseCurlCmd = curlPath + curlOpts + cookiePaths

    logger.info('Creating output folder: ' + outputFolder)
    os.system('mkdir -p ' + outputFolder)

    isSouth = (options.site == 'AN')

    if options.type == 'nav':  # Nav fetching is much less complicated
        return fetchNavData(options, outputFolder)

    parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd,
                                             outputFolder)
    if not icebridge_common.fileNonEmpty(parsedIndexPath):
        # Some dirs are weird, both images, fireball dems, and ortho.
        # Just accept whatever there is, but with a warning.
        logger.info('Warning: Missing index file: ' + parsedIndexPath)

    # Store file information in a dictionary
    # - Keep track of the earliest and latest frame
    logger.info('Reading file list from ' + parsedIndexPath)
    try:
        (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath)
    except:
        # We probably ran into old format index file. Must refetch.
        logger.info('Could not read index file. Try again.')
        options.refetchIndex = True
        parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd,
                                                 outputFolder)
        (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath)

    if options.stopAfterIndexFetch:
        return 0

    isLidar = (options.type in LIDAR_TYPES)

    allFrames = sorted(frameDict.keys())

    if not isLidar:
        # The lidar frames use a totally different numbering than the image/ortho/dem frames
        firstFrame = icebridge_common.getLargestFrame()  # start big
        lastFrame = icebridge_common.getSmallestFrame()  # start small
        for frameNumber in allFrames:
            if frameNumber < firstFrame:
                firstFrame = frameNumber
            if frameNumber > lastFrame:
                lastFrame = frameNumber

        if options.allFrames:
            options.startFrame = firstFrame
            options.stopFrame = lastFrame

    if isLidar:
        # Based on image frames, determine which lidar frames to fetch.
        if options.ignoreMissingLidar and len(frameDict.keys()) == 0:
            # Nothing we can do if this run has no lidar and we are told to continue
            logger.info("Warning: missing lidar, but continuing.")
            lidarsToFetch = set()
        else:
            lidarsToFetch = lidarFilesInRange(frameDict, outputFolder,
                                              options.startFrame,
                                              options.stopFrame)

    # There is always a chance that not all requested frames are available.
    # That is particularly true for Fireball DEMs. Instead of failing,
    # just download what is present and give a warning.
    if options.startFrame not in frameDict and not isLidar:
        logger.info("Warning: Frame " + str(options.startFrame) +
                    " is not found in this flight.")

    if options.stopFrame and (options.stopFrame
                              not in frameDict) and not isLidar:
        logger.info("Warning: Frame " + str(options.stopFrame) +
                    " is not found in this flight.")

    allFilesToFetch = [
    ]  # Files that we will fetch, relative to the current dir.
    allUrlsToFetch = []  # Full url of each file.

    # Loop through all found frames within the provided range
    currentFileCount = 0
    lastFrame = ""
    if len(allFrames) > 0:
        lastFrame = allFrames[len(allFrames) - 1]

    hasTfw = (options.type == 'fireball')
    hasXml = (isLidar or (options.type == 'ortho') or hasTfw)
    numFetched = 0
    skipCount = 0
    for frame in allFrames:

        # Skip frame outside of range
        if isLidar:
            if frameDict[frame] not in lidarsToFetch:
                continue
        else:
            if ((frame < options.startFrame) or (frame > options.stopFrame)):
                continue

        # Handle the frame skip option
        if options.frameSkip > 0:
            if skipCount < options.frameSkip:
                skipCount += 1
                continue
            skipCount = 0

        filename = frameDict[frame]

        # Some files have an associated xml file. Fireball DEMs also have a tfw file.
        currFilesToFetch = [filename]
        if hasXml:
            currFilesToFetch.append(icebridge_common.xmlFile(filename))
        if hasTfw:
            currFilesToFetch.append(icebridge_common.tfwFile(filename))

        for filename in currFilesToFetch:
            url = os.path.join(urlDict[frame], filename)
            outputPath = os.path.join(outputFolder, filename)
            allFilesToFetch.append(outputPath)
            allUrlsToFetch.append(url)

    # Restrict lidar fetch amount according to the parameter
    if (isLidar and options.maxNumLidarToFetch > 0
            and len(allFilesToFetch) > options.maxNumLidarToFetch):

        # Ensure an even number, to fetch both the lidar file and its xml
        if options.maxNumLidarToFetch % 2 == 1:
            options.maxNumLidarToFetch += 1

        allFilesToFetch = allFilesToFetch[0:options.maxNumLidarToFetch]
        allUrlsToFetch = allUrlsToFetch[0:options.maxNumLidarToFetch]

    icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL,
                                         options.dryRun, outputFolder,
                                         allFilesToFetch, allUrlsToFetch,
                                         logger)

    # Fetch from disk the set of already validated files, if any
    validFilesList = icebridge_common.validFilesList(
        os.path.dirname(outputFolder), options.startFrame, options.stopFrame)
    validFilesSet = set()
    validFilesSet = icebridge_common.updateValidFilesListFromDisk(
        validFilesList, validFilesSet)
    numInitialValidFiles = len(validFilesSet)

    # Verify that all files were fetched and are in good shape
    failedFiles = []
    for outputPath in allFilesToFetch:

        if options.skipValidate:
            continue

        if not icebridge_common.fileNonEmpty(outputPath):
            logger.info('Missing file: ' + outputPath)
            failedFiles.append(outputPath)
            continue

        if icebridge_common.hasImageExtension(outputPath):
            if False:
                # This check is just so slow. Turn it off for now.
                # This will impact only the validation of jpegs,
                # as the other files can be validated via the checksum.
                # Jpegs will be validated when converting them to 1 band images
                if outputPath in validFilesSet and os.path.exists(outputPath):
                    #logger.info('Previously validated: ' + outputPath)   # verbose
                    continue
                else:
                    if not icebridge_common.isValidImage(outputPath):
                        logger.info('Found an invalid image. Will wipe it: ' +
                                    outputPath)
                        if os.path.exists(outputPath): os.remove(outputPath)
                        failedFiles.append(outputPath)
                        continue
                    else:
                        logger.info('Valid image: ' + outputPath)
                        validFilesSet.add(outputPath)  # mark it as validated

        # Sanity check: XML files must have the right latitude.
        if icebridge_common.fileExtension(outputPath) == '.xml':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) #verbose
                continue
            else:
                if os.path.exists(outputPath):
                    try:
                        latitude = icebridge_common.parseLatitude(outputPath)
                        logger.info('Valid file: ' + outputPath)
                        validFilesSet.add(outputPath)  # mark it as validated
                    except:
                        # Corrupted file
                        logger.info("Failed to parse latitude, will wipe: " +
                                    outputPath)
                        if os.path.exists(outputPath): os.remove(outputPath)
                        failedFiles.append(outputPath)

                    # On a second thought, don't wipe files with wrong latitude, as
                    # next time we run fetch we will have to fetch them again.
                    # Hopefully they will be ignored.
                    #isGood = hasGoodLat(latitude, isSouth)
                    #if not isGood:
                    #    logger.info("Wiping XML file " + outputPath + " with bad latitude " + \
                    #                str(latitude))
                    #    os.remove(outputPath)
                    #    imageFile = icebridge_common.xmlToImage(outputPath)
                    #    if os.path.exists(imageFile):
                    #        logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \
                    #                    str(latitude))
                    #        os.remove(imageFile)

        # Verify the chcksum
        if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \
               and outputPath[-4:] != '.tfw':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) # verbose
                continue
            else:
                isGood = icebridge_common.hasValidChkSum(outputPath, logger)
                if not isGood:
                    xmlFile = icebridge_common.xmlFile(outputPath)
                    logger.info('Found invalid data. Will wipe: ' +
                                outputPath + ' ' + xmlFile)
                    if os.path.exists(outputPath): os.remove(outputPath)
                    if os.path.exists(xmlFile): os.remove(xmlFile)
                    failedFiles.append(outputPath)
                    failedFiles.append(xmlFile)
                    continue
                else:
                    logger.info('Valid file: ' + outputPath)
                    validFilesSet.add(outputPath)

        if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath)
                continue
            else:
                isGood = icebridge_common.isValidTfw(outputPath, logger)
                if not isGood:
                    xmlFile = icebridge_common.xmlFile(outputPath)
                    logger.info('Found invalid tfw. Will wipe: ' + outputPath +
                                ' ' + xmlFile)
                    if os.path.exists(outputPath): os.remove(outputPath)
                    if os.path.exists(xmlFile): os.remove(xmlFile)
                    failedFiles.append(outputPath)
                    failedFiles.append(xmlFile)
                    continue
                else:
                    logger.info('Valid tfw file: ' + outputPath)
                    validFilesSet.add(outputPath)

    # Write to disk the list of validated files, but only if new
    # validations happened.  First re-read that list, in case a
    # different process modified it in the meantime, such as if two
    # managers are running at the same time.
    numFinalValidFiles = len(validFilesSet)
    if numInitialValidFiles != numFinalValidFiles:
        validFilesSet = \
                      icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
        icebridge_common.writeValidFilesList(validFilesList, validFilesSet)

    numFailed = len(failedFiles)
    if numFailed > 0:
        logger.info("Number of files that could not be processed: " +
                    str(numFailed))

    return numFailed
Пример #5
0
def doFetch(options, outputFolder):

    # Verify that required files exist
    home = os.path.expanduser("~")
    if not (os.path.exists(home + '/.netrc')
            and os.path.exists(home + '/.urs_cookies')):
        logger.error(
            'Missing a required authentication file!  See instructions here:\n'
            +
            '    https://nsidc.org/support/faq/what-options-are-available-bulk-downloading-data-https-earthdata-login-enabled'
        )
        return -1

    curlPath = asp_system_utils.which("curl")
    curlOpts = ' -n -L '
    cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies '
    baseCurlCmd = curlPath + curlOpts + cookiePaths

    logger.info('Creating output folder: ' + outputFolder)
    os.system('mkdir -p ' + outputFolder)

    isSouth = (options.site == 'AN')
    parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd,
                                             outputFolder)
    if not icebridge_common.fileNonEmpty(parsedIndexPath):
        # Some dirs are weird, both images, dems, and ortho.
        # Just accept whatever there is, but with a warning.
        logger.info('Warning: Missing index file: ' + parsedIndexPath)

    # Store file information in a dictionary
    # - Keep track of the earliest and latest frame
    logger.info('Reading file list from ' + parsedIndexPath)
    try:
        (frameDict, urlDict) = readIndexFile(parsedIndexPath)
    except:
        # We probably ran into old format index file. Must refetch.
        logger.info('Could not read index file. Try again.')
        options.refetchIndex = True
        parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd,
                                                 outputFolder)
        (frameDict, urlDict) = readIndexFile(parsedIndexPath)

    allFrames = sorted(frameDict.keys())
    firstFrame = icebridge_common.getLargestFrame()  # start big
    lastFrame = icebridge_common.getSmallestFrame()  # start small
    for frameNumber in allFrames:
        if frameNumber < firstFrame:
            firstFrame = frameNumber
        if frameNumber > lastFrame:
            lastFrame = frameNumber

    if options.allFrames:
        options.startFrame = firstFrame
        options.stopFrame = lastFrame

    # There is always a chance that not all requested frames are available.
    # That is particularly true for Fireball DEMs. Instead of failing,
    # just download what is present and give a warning.
    if options.startFrame not in frameDict:
        logger.info("Warning: Frame " + str(options.startFrame) + \
                    " is not found in this flight.")

    if options.stopFrame and (options.stopFrame not in frameDict):
        logger.info("Warning: Frame " + str(options.stopFrame) + \
                    " is not found in this flight.")

    allFilesToFetch = [
    ]  # Files that we will fetch, relative to the current dir.
    allUrlsToFetch = []  # Full url of each file.

    # Loop through all found frames within the provided range
    currentFileCount = 0
    lastFrame = ""
    if len(allFrames) > 0:
        lastFrame = allFrames[len(allFrames) - 1]

    hasTfw = (options.type == 'dem')
    hasXml = ((options.type in LIDAR_TYPES) or (options.type == 'ortho')
              or hasTfw)
    numFetched = 0
    for frame in allFrames:
        if (frame >= options.startFrame) and (frame <= options.stopFrame):

            filename = frameDict[frame]

            # Some files have an associated xml file. DEMs also have a tfw file.
            currFilesToFetch = [filename]
            if hasXml:
                currFilesToFetch.append(icebridge_common.xmlFile(filename))
            if hasTfw:
                currFilesToFetch.append(icebridge_common.tfwFile(filename))

            for filename in currFilesToFetch:
                url = os.path.join(urlDict[frame], filename)
                outputPath = os.path.join(outputFolder, filename)
                allFilesToFetch.append(outputPath)
                allUrlsToFetch.append(url)

    if options.maxNumToFetch > 0 and len(
            allFilesToFetch) > options.maxNumToFetch:
        allFilesToFetch = allFilesToFetch[0:options.maxNumToFetch]
        allUrlsToFetch = allUrlsToFetch[0:options.maxNumToFetch]

    icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL,
                                         options.dryRun, outputFolder,
                                         allFilesToFetch, allUrlsToFetch,
                                         logger)

    # Verify that all files were fetched and are in good shape
    failedFiles = []
    for outputPath in allFilesToFetch:

        if options.skipValidate: continue

        if not icebridge_common.fileNonEmpty(outputPath):
            logger.info('Missing file: ' + outputPath)
            failedFiles.append(outputPath)
            continue

        if icebridge_common.hasImageExtension(outputPath):
            if not icebridge_common.isValidImage(outputPath):
                logger.info('Found an invalid image. Will wipe it: ' +
                            outputPath)
                if os.path.exists(outputPath): os.remove(outputPath)
                failedFiles.append(outputPath)
                continue
            else:
                logger.info('Valid image: ' + outputPath)

        # Sanity check: XML files must have the right latitude.
        if icebridge_common.fileExtension(outputPath) == '.xml':
            if os.path.exists(outputPath):
                latitude = icebridge_common.parseLatitude(outputPath)
                isGood = hasGoodLat(latitude, isSouth)
                if not isGood:
                    logger.info("Wiping XML file " + outputPath + " with bad latitude " + \
                                str(latitude))
                    os.remove(outputPath)
                    imageFile = icebridge_common.xmlToImage(outputPath)
                    if os.path.exists(imageFile):
                        logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \
                                    str(latitude))
                        os.remove(imageFile)

        # Verify the chcksum
        if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \
               and outputPath[-4:] != '.tfw':
            isGood = icebridge_common.hasValidChkSum(outputPath)
            if not isGood:
                xmlFile = icebridge_common.xmlFile(outputPath)
                logger.info('Found invalid data. Will wipe it: ' + outputPath +
                            ' ' + xmlFile)
                if os.path.exists(outputPath): os.remove(outputPath)
                if os.path.exists(xmlFile): os.remove(xmlFile)
                failedFiles.append(outputPath)
                failedFiles.append(xmlFile)
                continue
            else:
                logger.info('Valid chksum: ' + outputPath)

        if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw':
            isGood = icebridge_common.isValidTfw(outputPath)
            if not isGood:
                xmlFile = icebridge_common.xmlFile(outputPath)
                logger.info('Found invalid data. Will wipe it: ' + outputPath +
                            ' ' + xmlFile)
                if os.path.exists(outputPath): os.remove(outputPath)
                if os.path.exists(xmlFile): os.remove(xmlFile)
                failedFiles.append(outputPath)
                failedFiles.append(xmlFile)
                continue
            else:
                logger.info('Valid tfw file: ' + outputPath)

    numFailed = len(failedFiles)
    if numFailed > 0:
        logger.info("Number of files that could not be processed: " +
                    str(numFailed))

    return numFailed