def _optionallyUploadLocalFile(self): """ """ if self.lfn != "": return if self.namePattern != "" and self.lfn == "": logger.info("I have a local DiracFile, however you're requesting it's location on the grid") logger.info("Shall I upload it to the grid before I continue?") decision = raw_input('[y] / n:') while not (decision.lower() in ['y', 'n'] or decision.lower() == ''): decision = raw_input('[y] / n:') if decision.lower() in ['y', '']: # upload namePattern to grid logger.debug("Uploading the file first") self.put() elif decision == 'n': logger.debug("Not uploading now") return else: # do Nothing logger.debug("Continuing without uploading file") if self.lfn == "": raise GangaFileError('Uploading of namePattern: %s failed' % self.namePattern) if self.namePattern == "" and self.lfn == "": raise GangaFileError('Cannot do anything if I don\'t have an lfn or a namePattern!') return
def get(self): """ Retrieves locally all files that were uploaded before that Order of priority about where a file is going to be placed are: 1) The localDir as defined in the schema. (Exceptions thrown if this doesn't exist) 2) The Job outpudir of the parent job if the localDir is not defined. 3) raise an exception if neither are defined correctly. """ if self.localDir: if not os.path.isdir(self.localDir): msg = "Folder '%s' doesn't exist. Please construct this before 'get'-ing a file." % self.localDir raise GangaFileError(msg) to_location = self.localDir else: try: to_location = self.getJobObject().outputdir except AssertionError: msg = "%s: Failed to get file object. Please set the `localDir` parameter and try again. e.g. file.localDir=os.getcwd();file.get()" % getName( self) logger.debug("localDir value: %s" % self.localDir) logger.debug("parent: %s" % self._getParent()) raise GangaFileError(msg) # FIXME CANNOT perform a remote globbing here in a nice way so have to just perform a copy when dealing with wildcards if not os.path.isfile(os.path.join(to_location, self.namePattern)): returnable = self.copyTo(to_location) if not self.localDir: self.localDir = to_location return returnable else: logger.debug("File: %s already exists, not performing copy" % (os.path.join(to_location, self.namePattern), )) return True
def internalCopyTo(self, targetPath): """ Retrieves locally the file matching this DiracFile object pattern. If localPath is specified Args: targetPath(str): The path the file should be placed at locally """ to_location = targetPath if self.lfn == "": raise GangaFileError('Can\'t download a file without an LFN.') logger.info("Getting file %s" % self.lfn) stdout = execute('getFile("%s", destDir="%s")' % (self.lfn, to_location), cred_req=self.credential_requirements) if self.namePattern == "": name = os.path.basename(self.lfn) if self.compressed: name = name[:-3] self.namePattern = name if self.guid == "" or not self.locations: self.getMetadata() return True
def getMetadata(self): """ Get Metadata associated with this files lfn. This method will also try to automatically set the files guid attribute. """ if self.lfn == "": self._optionallyUploadLocalFile() # check that it has a replica if not self.getReplicas(): raise GangaFileError("No replica found for this file!") # eval again here as datatime not included in dirac_ganga_server ret = execute('getMetadata("%s")' % self.lfn, cred_req=self.credential_requirements) if self.guid != ret.get('Successful', {}).get(self.lfn, {}).get( 'GUID', False): self.guid = ret['Successful'][self.lfn]['GUID'] reps = self.getReplicas() ret['Successful'][self.lfn].update({'replicas': self.locations}) return ret
def copyTo(self, targetPath): """ Copy a the file to the local storage using the appropriate file-transfer mechanism This will raise an exception if targetPath isn't set to something sensible. Args: targetPath (str): Target path where the file is to copied to """ if not isinstance(targetPath, str) and targetPath: raise GangaFileError( "Cannot perform a copyTo with no given targetPath!") if regex.search(self.namePattern) is None\ and os.path.isfile(os.path.join(self.localDir, self.namePattern)): if not os.path.isfile(os.path.join(targetPath, self.namePattern)): shutil.copy(os.path.join(self.localDir, self.namePattern), os.path.join(targetPath, self.namePattern)) else: logger.debug("Already found file: %s" % os.path.join(targetPath, self.namePattern)) return True # Again, cannot perform a remote glob here so have to ignore wildcards else: return self.internalCopyTo(targetPath)
def getReplicas(self, forceRefresh=False): """ Get the list of all SE where this file has a replica This relies on an internally stored list of replicas, (SE and unless forceRefresh = True """ if self.lfn == '': self._optionallyUploadLocalFile() if self.lfn == '': raise GangaFileError( "Can't find replicas for file which has no LFN!") these_replicas = None if len(self.subfiles) != 0: allReplicas = [] for i in self.subfiles: allReplicas.append(i.getReplicas()) these_replicas = allReplicas else: # deep copy just before wer change it incase we're pointing to the # data stored in original from a copy if self._have_copied: self._storedReplicas = copy.deepcopy(self._storedReplicas) if (self._storedReplicas == {} and len(self.subfiles) == 0) or forceRefresh: try: self._storedReplicas = execute( 'getReplicas("%s")' % self.lfn, cred_req=self.credential_requirements) except GangaDiracError as err: logger.error("Couldn't find replicas for: %s" % str(self.lfn)) self._storedReplicas = {} raise try: self._storedReplicas = self._storedReplicas['Successful'] except Exception as err: logger.error("Unknown Error: %s from %s" % (str(err), self._storedReplicas)) raise logger.debug("getReplicas: %s" % str(self._storedReplicas)) if self.lfn in self._storedReplicas: self._updateRemoteURLs(self._storedReplicas) these_replicas = [self._storedReplicas[self.lfn]] else: these_replicas = {} elif self._storedReplicas != {}: these_replicas = [self._storedReplicas[self.lfn]] return these_replicas
def remove(self): """ Remove this lfn and all replicas from DIRAC LFC/SEs """ if self.lfn == "": raise GangaFileError('Can\'t remove a file from DIRAC SE without an LFN.') logger.info('Removing file %s' % self.lfn) stdout = execute('removeFile("%s")' % self.lfn, cred_req=self.credential_requirements) self.lfn = "" self.locations = [] self.guid = '' return True
def removeReplica(self, SE): """ Remove the replica from the given SE """ self.getReplicas() if SE not in self.locations: raise GangaFileError("No replica at supplied SE: %s" % SE) try: logger.info("Removing replica at %s for LFN %s" % (SE, self.lfn)) stdout = execute('removeReplica("%s", "%s")' % (self.lfn, SE), cred_req=self.credential_requirements) self.locations.remove(SE) except GangaDiracError as err: raise err return True
def replicate(self, destSE, sourceSE=''): """ Replicate an LFN to another SE Args: destSE (str): the SE to replicate the file to sourceSE (str): the se to use as a cource for the file """ if not self.lfn: raise GangaFileError('Must supply an lfn to replicate') logger.info("Replicating file %s to %s" % (self.lfn, destSE)) stdout = execute('replicateFile("%s", "%s", "%s")' % (self.lfn, destSE, sourceSE), cred_req=self.credential_requirements) if destSE not in self.locations: self.locations.append(destSE)
def put(self, lfn='', force=False, uploadSE="", replicate=False): """ Try to upload file sequentially to storage elements defined in configDirac['allDiracSE']. File will be uploaded to the first SE that the upload command succeeds for. The file is uploaded to the SE described by the DiracFile.defaultSE attribute Alternatively, the user can specify an uploadSE which contains an SE which the file is to be uploaded to. If the user wants to replicate this file(s) across all SE then they should state replicate = True. Return value will be either the stdout from the dirac upload command if not using the wildcard characters '*?[]' in the namePattern. If the wildcard characters are used then the return value will be a list containing newly created DiracFile objects which were the result of glob-ing the wildcards. The objects in this list will have been uploaded or had their failureReason attribute populated if the upload failed. """ if self.lfn != "" and force == False and lfn == '': logger.warning( "Warning you're about to 'put' this DiracFile: %s on the grid as it already has an lfn: %s" % (self.namePattern, self.lfn)) decision = raw_input('y / [n]:') while not (decision.lower() in ['y', 'n'] or decision.lower() == ''): decision = raw_input('y / [n]:') if decision.lower() == 'y': pass else: return if (lfn != '' and self.lfn != '') and force == False: logger.warning( "Warning you're attempting to put this DiracFile: %s" % self.namePattern) logger.warning("It currently has an LFN associated with it: %s" % self.lfn) logger.warning( "Do you want to continue and attempt to upload to: %s" % lfn) decision = raw_input('y / [n]:') while not (decision.lower() in ['y', 'n', '']): decision = raw_input('y / [n]:') if decision.lower() == 'y': pass else: return if lfn and os.path.basename(lfn) != self.namePattern: logger.warning( "Changing namePattern from: '%s' to '%s' during put operation" % (self.namePattern, os.path.basename(lfn))) if lfn: self.lfn = lfn # looks like will only need this for the interactive uploading of jobs. # Also if any backend need dirac upload on client then when downloaded # this will upload then delete the file. if self.namePattern == "": if self.lfn != '': logger.warning( "'Put'-ing a file with ONLY an existing LFN makes no sense!" ) raise GangaFileError( 'Can\'t upload a file without a local file name.') sourceDir = self.localDir if self.localDir is None: sourceDir = os.getcwd() # attached to a job, use the joboutputdir if self._parent != None and os.path.isdir( self.getJobObject().outputdir): sourceDir = self.getJobObject().outputdir if not os.path.isdir(sourceDir): raise GangaFileError( 'localDir attribute is not a valid dir, don\'t know from which dir to take the file' ) if regex.search(self.namePattern) is not None: if self.lfn != "": logger.warning( "Cannot specify a single lfn for a wildcard namePattern") logger.warning("LFN will be generated automatically") self.lfn = "" if not self.remoteDir: try: job = self.getJobObject() lfn_folder = os.path.join("GangaJob_%s" % job.getFQID('/'), "OutputFiles") except AssertionError: t = datetime.datetime.now() this_date = t.strftime("%H.%M_%A_%d_%B_%Y") lfn_folder = os.path.join('GangaFiles_%s' % this_date) lfn_base = os.path.join( DiracFile.diracLFNBase(self.credential_requirements), lfn_folder) else: lfn_base = os.path.join( DiracFile.diracLFNBase(self.credential_requirements), self.remoteDir) if uploadSE == "": if self.defaultSE != "": storage_elements = [self.defaultSE] else: if configDirac['allDiracSE']: storage_elements = [ random.choice(configDirac['allDiracSE']) ] else: raise GangaFileError( "Can't upload a file without a valid defaultSE or storageSE, please provide one" ) elif isinstance(uploadSE, list): storage_elements = uploadSE else: storage_elements = [uploadSE] outputFiles = GangaList() for this_file in glob.glob(os.path.join(sourceDir, self.namePattern)): name = this_file if not os.path.exists(name): if not self.compressed: raise GangaFileError( 'Cannot upload file. File "%s" must exist!' % name) name += '.gz' if not os.path.exists(name): raise GangaFileError('File "%s" must exist!' % name) else: if self.compressed: os.system('gzip -c %s > %s.gz' % (name, name)) name += '.gz' if not os.path.exists(name): raise GangaFileError('File "%s" must exist!' % name) lfn = os.path.join(lfn_base, os.path.basename(this_file)) d = DiracFile() d.namePattern = os.path.basename(name) d.compressed = self.compressed d.localDir = sourceDir stderr = '' stdout = '' logger.info('Uploading file \'%s\' to \'%s\' as \'%s\'' % (name, storage_elements[0], lfn)) logger.debug('execute: uploadFile("%s", "%s", %s)' % (lfn, os.path.join(sourceDir, name), str([storage_elements[0]]))) try: stdout = execute('uploadFile("%s", "%s", %s)' % (lfn, os.path.join(sourceDir, name), str([storage_elements[0]])), cred_req=self.credential_requirements) except GangaDiracError as err: logger.warning("Couldn't upload file '%s': \'%s\'" % (os.path.basename(name), err)) failureReason = "Error in uploading file '%s' : '%s'" % ( os.path.basename(name), err) if regex.search(self.namePattern) is not None: d.failureReason = failureReason outputFiles.append(d) continue self.failureReason += '\n' + failureReason continue stdout_temp = stdout.get('Successful') if not stdout_temp: msg = "Couldn't upload file '%s': \'%s\'" % ( os.path.basename(name), stdout) logger.warning(msg) if regex.search(self.namePattern) is not None: d.failureReason = msg outputFiles.append(d) continue self.failureReason = msg continue else: lfn_out = stdout_temp[lfn] # when doing the two step upload delete the temp file if self.compressed or self._parent != None: os.remove(name) # need another eval as datetime needs to be included. guid = lfn_out.get('GUID', '') if regex.search(self.namePattern) is not None: d.lfn = lfn d.remoteDir = os.path.dirname(lfn) d.locations = lfn_out.get('allDiracSE', '') d.guid = guid outputFiles.append(d) continue else: self.lfn = lfn self.remoteDir = os.path.dirname(lfn) self.locations = lfn_out.get('allDiracSE', '') self.guid = guid if replicate == True: if len(outputFiles) == 1 or len(outputFiles) == 0: storage_elements.pop(0) for se in storage_elements: self.replicate(se) else: storage_elements.pop(0) for this_file in outputFiles: for se in storage_elements: this_file.replicate(se) if len(outputFiles) > 0: return outputFiles else: outputFiles.append(self) return outputFiles
def processWildcardMatches(self): if regex.search(self.namePattern) is not None: raise GangaFileError( "No wildcards in inputfiles for DiracFile just yet. Dirac are exposing this in API soon." )