def grd2ArdBatch(inputDf, dwnDir, prcDir, tmpDir, procParam): # get params outResolution = procParam['resolution'] prdType = procParam['prdType'] lsMask = procParam['lsMask'] spkFlt = procParam['spkFlt'] subset = procParam['subset'] polarisation = procParam['polarisation'] # we create a processing dictionary, # where all frames are grouped into acquisitions procDict = refine.createProcDict(inputDf) for track, allScenes in procDict.items(): for sceneList in procDict[track]: # get acquisition date acqDate = s1Metadata(sceneList[0]).start_date # create a subdirectory baed on acq. date outDir = opj(prcDir, track, acqDate) os.makedirs(outDir, exist_ok=True) # get the paths to the file scenePaths = ([s1Metadata(i).s1DwnPath(dwnDir) for i in sceneList]) # apply the grd2ard function grd2Ard.grd2Ard(scenePaths, outDir, acqDate, tmpDir, outResolution, prdType, lsMask, spkFlt, subset, polarisation)
def createBurstGdf(footprintGdf, uname=None, pword=None): # create column names for empty data frame colNames = ['SceneID', 'Date', 'SwathID', 'BurstID', 'BurstNr', 'geometry'] # crs for empty dataframe crs = {'init': 'epsg:4326'} # create empty dataframe gdfFull = gpd.GeoDataFrame(columns=colNames, crs=crs) #uname, pword = scihub.askScihubCreds() for sceneId in footprintGdf.identifier: #print(metadata.s1Metadata(sceneId).s1IPTpath()) s = metadata.s1Metadata(sceneId) if os.path.exists(s.s1IPTpath()): #print('here') gdfFull = gdfFull.append(s.s1IPTAnno()) else: if s.checkOnlineStatus is False: print( ' INFO: Product needs to be online to create a burst database.' ) print( ' INFO: Download the product first and do the burst list from the local data.' ) else: gdfFull = gdfFull.append(s.s1EsaAnno(uname, pword)) return gdfFull
def createBurstGdfOffline(footprintGdf, dwnDir): # create column names for empty data frame colNames = ['SceneID', 'Date', 'SwathID', 'BurstID', 'BurstNr', 'geometry'] # crs for empty dataframe crs = {'init': 'epsg:4326'} # create empty dataframe gdfFull = gpd.GeoDataFrame(columns=colNames, crs=crs) for sceneId in footprintGdf.identifier: s = metadata.s1Metadata(sceneId) gdfFull = gdfFull.append(s.s1DwnAnno(dwnDir)) return gdfFull
def downloadS1(inputGDF, dwnDir, concurrent=4): print(' INFO: One or more of your scenes need to be downloaded.') print(' Select the server from where you want to download:') print(' (1) Copernicus Apihub (ESA, rolling archive)') print(' (2) Alaska Satellite Facility (NASA, full archive)') print(' (3) PEPS (CNES, 1 year rolling archive)') #mirror = input(' Type 1, 2 or 3: ') mirror = input(' Type 1, 2 or 3: ') print(' Please provide username and password for the selected server') uname = input(' Username:'******' Password:'******'1': errCode = checkApihubConn(uname, pword) elif mirror == '2': errCode = checkASFConn(uname, pword) elif mirror == '3': errCode = checkPepsConn(uname, pword) if mirror is not '3': # check response if errCode == 401: raise ValueError(' ERROR: Username/Password are incorrect.') exit(401) elif errCode != 200: raise ValueError(' Some connection error.') exit(401) # check if all scenes exist scenes = inputGDF['identifier'].tolist() dowList = [] asfList = [] for sceneID in scenes: scene = s1Metadata(sceneID) dlPath = '{}/SAR/{}/{}/{}/{}'.format(dwnDir, scene.product_type, scene.year, scene.month, scene.day) fileName = '{}.zip'.format(scene.scene_id) uuid = inputGDF['uuid'][inputGDF['identifier'] == sceneID].tolist() if os.path.isdir(dlPath) is False: os.makedirs(dlPath) # in case the data has been downloaded before #if os.path.exists('{}/{}'.format(dlPath, fileName)) is False: # create list objects for download dowList.append( [uuid[0], '{}/{}'.format(dlPath, fileName), uname, pword]) asfList.append([ scene.s1ASFURL(), '{}/{}'.format(dlPath, fileName), uname, pword ]) # download in parallel if mirror == '1': # scihub pool = multiprocessing.Pool(processes=2) pool.map(s1ApihubDownload, dowList) elif mirror == '2': # ASF pool = multiprocessing.Pool(processes=concurrent) pool.map(s1ASFDownload, asfList) elif mirror is '3': # PEPS batchDownloadPeps(inputGDF, dwnDir, uname, pword, concurrent)
def batchDownloadPeps(fpDataFrame, dwnDir, uname, pword, concurrent=10): print( ' INFO: Getting the storage status (online/onTape) of each scene on the Peps server.' ) print(' INFO: This may take a while.') # this function does not just check, # but it already triggers the production of the S1 scene fpDataFrame['pepsStatus'], fpDataFrame['pepsUrl'] = (zip(*[ s1Metadata(x).s1PepsStatus(uname, pword) for x in fpDataFrame.identifier.tolist() ])) # as long as there are any scenes left for downloading, loop while len(fpDataFrame[fpDataFrame['pepsStatus'] != 'downloaded']) > 0: # excluded downlaoded scenes fpDataFrame = fpDataFrame[fpDataFrame['pepsStatus'] != 'downloaded'] # recheck for status fpDataFrame['pepsStatus'], fpDataFrame['pepsUrl'] = (zip(*[ s1Metadata(x).s1PepsStatus(uname, pword) for x in fpDataFrame.identifier.tolist() ])) # if all scenes to download are on Tape, we wait for a minute if len(fpDataFrame[fpDataFrame['pepsStatus'] == 'online']) == 0: print( 'INFO: Imagery still on tape, we will wait for 1 minute and try again.' ) time.sleep(60) # else we start downloading else: # create the pepslist for parallel download pepsList = [] for index, row in fpDataFrame[fpDataFrame['pepsStatus'] == 'online'].iterrows(): # get scene identifier sceneID = row.identifier # construct download path scene = s1Metadata(sceneID) dwnFile = scene.s1DwnPath(dwnDir) # put all info to the pepslist for parallelised download pepsList.append([ fpDataFrame.pepsUrl[fpDataFrame.identifier == sceneID].tolist()[0], dwnFile, uname, pword ]) # parallelised download pool = multiprocessing.Pool(processes=concurrent) pool.map(s1PepsDownload, pepsList) # routine to check if the file has been downloaded for index, row in fpDataFrame[fpDataFrame['pepsStatus'] == 'online'].iterrows(): # get scene identifier sceneID = row.identifier # construct download path scene = s1Metadata(sceneID) dwnFile = scene.s1DwnPath(dwnDir) if os.path.exists(dwnFile): fpDataFrame.at[index, 'pepsStatus'] = 'downloaded'
def checkSceneAvailability(inputGDF, dwnDir, cloudProvider=None): ''' This function checks for the availability of scenes inside a geodataframe on different cloud providers and flags the ones that need to be downloaded. Note: Should be applied after readInventory and before download param: inputGDF is a GeoDataFrame coming from a search and possible pre-sorting param: downDir is the directory where scenes should be downloaded param: cloudProvider defines on which cloud we operate (IPT, AWS, OTC) returns: a GeoDataFrame with all scenes and a flag of which scenes need to be downloaded ''' print(' INFO: Checking if scenes need to be downloaded.') # create an empty DataFrame df = pd.DataFrame(columns=['identifier', 'filepath', 'toDownload']) # loop through each scene scenes = inputGDF['identifier'].tolist() for sceneID in scenes: scene = s1Metadata(sceneID) # check if we can download from the cloudprovider if cloudProvider == 'IPT': testPath = scene.s1IPTpath() elif cloudProvider == 'Amazon': testPath = scene.s1AmazonPath() # function needs to be added elif cloudProvider == 'T-Cloud': testPath = scene.s1TCloudPath() # function needs to be added else: # construct download path testPath = '{}/SAR/{}/{}/{}/{}/{}.zip'.format( dwnDir, scene.product_type, scene.year, scene.month, scene.day, sceneID) # if the file exists! ### NOTE THAT at the moment we assume IPT structure where files are stored in SAFE format, ### i.e. they are directories, not zips if os.path.isdir(testPath) is True or os.path.exists(testPath) is True: # if we are not in cloud if dwnDir in testPath: # file is already succesfully downloaded df = df.append( { 'identifier': sceneID, 'filepath': testPath, 'toDownload': False }, ignore_index=True) else: # file is on cloud storage df = df.append( { 'identifier': sceneID, 'filepath': testPath, 'toDownload': False }, ignore_index=True) else: # construct download path to check if we already downloaded testPath = '{}/SAR/{}/{}/{}/{}/{}.zip'.format( dwnDir, scene.product_type, scene.year, scene.month, scene.day, sceneID) # if we are on cloud, check if we already downloaded if os.path.exists(testPath) is True: # file is already succesfully downloaded df = df.append( { 'identifier': sceneID, 'filepath': testPath, 'toDownload': False }, ignore_index=True) else: df = df.append( { 'identifier': sceneID, 'filepath': testPath, 'toDownload': True }, ignore_index=True) # merge the dataframe and return it inputGDF = inputGDF.merge(df, on='identifier') return inputGDF