def generateOperation(self, opType, nbFiles, dests, sources=None): """ Generate one FTS3Operation object with FTS3Files in it""" op = None if opType == 'Transfer': op = FTS3TransferOperation() elif opType == 'Staging': op = FTS3StagingOperation() op.username = "******" op.userGroup = "Floyd" op.sourceSEs = sources for _i in xrange(nbFiles * len(dests)): self.fileCounter += 1 for dest in dests: ftsFile = FTS3File() ftsFile.lfn = 'lfn%s' % self.fileCounter ftsFile.targetSE = dest op.ftsFiles.append(ftsFile) return op
def generateOperation(self, opType, nbFiles, dests, sources=None): """Generate one FTS3Operation object with FTS3Files in it""" op = None if opType == "Transfer": op = FTS3TransferOperation() elif opType == "Staging": op = FTS3StagingOperation() proxyInfo = getProxyInfo()["Value"] op.username = proxyInfo["username"] op.userGroup = proxyInfo["group"] op.sourceSEs = sources for _i in range(nbFiles * len(dests)): self.fileCounter += 1 for dest in dests: ftsFile = FTS3File() ftsFile.lfn = "lfn%s" % self.fileCounter ftsFile.targetSE = dest op.ftsFiles.append(ftsFile) return op
def test_raceCondition(fts3db): """This tests a race condition that was exhibited when running multiple agent in parallel. What was happening was that we were getting some nonFinishedOperations for further processing while some jobs associated to that operation were being monitored. This test reproduces all the possible combination of job/operation being assigned/non assigned | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | OpID | OpAssigned | JobID | JobAssigned | Comment | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | 1 | | | | No job | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | 2 | Yes | | | No Job | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | 3 | | 1 | | Nothing is Assigned | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | 4 | | 2 | yes | Job is assigned, so can't use the operation | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | 5 | yes | 3 | | Op is assigned, so can't use it | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | 6 | yes | 4 | yes | That would be a problematic situation !! | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | 7 | | 5 | yes | Job 5 is assigned, so Op 7 cannot be used, even if Job6 is unassigned (this was the bug) | | | | 6 | | | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | | 8 | yes | 7 | yes | Op8 is assigned, so can't be used (and is problematic like op6) | | | yes | 8 | | | | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- | Under these circumstances, we want: * getNonFinishedOperation to return operations 1 and 3 * getActiveJobs to return jobs 1 and 6 """ # Utility to create a FT3File. # All operations must have at least one file associated # for the queries to make sense def _makeFile(): f = FTS3File() f.targetSE = "targetSE" return f # op1: Non assigned operation without any job op1 = FTS3TransferOperation() op1.operationID = 1 op1.ftsFiles.append(_makeFile()) # op2: assigned operation without any job op2 = FTS3TransferOperation() op2.operationID = 2 op2.ftsFiles.append(_makeFile()) # op3: Non assigned operation with one non assigned job op3 = FTS3TransferOperation() op3.operationID = 3 op3.ftsFiles.append(_makeFile()) j1 = FTS3Job() j1.jobID = 1 op3.ftsJobs.append(j1) # op4: Non assigned operation with one assigned job op4 = FTS3TransferOperation() op4.operationID = 4 op4.ftsFiles.append(_makeFile()) j2 = FTS3Job() j2.jobID = 2 op4.ftsJobs.append(j2) # op5: assigned operation with one non assigned job op5 = FTS3TransferOperation() op5.operationID = 5 op5.ftsFiles.append(_makeFile()) j3 = FTS3Job() j3.jobID = 3 op5.ftsJobs.append(j3) # op6: assigned operation with one assigned job # This is a very problematic case that we want # to avoid op6 = FTS3TransferOperation() op6.operationID = 6 op6.ftsFiles.append(_makeFile()) j4 = FTS3Job() j4.jobID = 4 op6.ftsJobs.append(j4) # op7: Non assigned operation with one assigned job and one non assigned job op7 = FTS3TransferOperation() op7.operationID = 7 op7.ftsFiles.append(_makeFile()) j5 = FTS3Job() j5.jobID = 5 op7.ftsJobs.append(j5) j6 = FTS3Job() op7.ftsFiles.append(_makeFile()) j6.jobID = 6 op7.ftsJobs.append(j6) # op8: assigned operation with one assigned job and one non assigned job # That is problematic, like op6 op8 = FTS3TransferOperation() op8.operationID = 8 j7 = FTS3Job() op8.ftsFiles.append(_makeFile()) j7.jobID = 7 op8.ftsJobs.append(j7) j8 = FTS3Job() j8.jobID = 8 op8.ftsJobs.append(j8) allOps = [op1, op2, op3, op4, op5, op6, op7, op8] for op in allOps: res = fts3db.persistOperation(op) assert res["OK"] with fts3db.engine.begin() as conn: conn.execute( update(FTS3DB.fts3JobTable).values(assignment="Yes").where( FTS3DB.fts3JobTable.c.jobID.in_([2, 4, 5, 7]))) with fts3db.engine.begin() as conn: conn.execute( update(FTS3DB.fts3OperationTable).values(assignment="Yes").where( FTS3DB.fts3OperationTable.c.operationID.in_([2, 5, 6, 8]))) res = fts3db.getNonFinishedOperations(operationAssignmentTag=None) assert res["OK"] nonFinishedOps = res["Value"] nonFinishedOpsIDs = [op.operationID for op in nonFinishedOps] assert nonFinishedOpsIDs == [1, 3] res = fts3db.getActiveJobs(jobAssignmentTag=None) assert res["OK"] activeJobs = res["Value"] activeJobIDs = [op.jobID for op in activeJobs] assert activeJobIDs == [1, 6]
def fts3Transfer(self): """ replicate and register using FTS3 """ self.log.info("scheduling files in FTS3...") # Check first if we do not have ongoing transfers res = self._checkExistingFTS3Operations() if not res['OK']: return res # if res['Value'] is False # it means that there are ongoing transfers # and we should stop here if res['Value'] is False: # return S_OK such that the request is put back return S_OK() fts3Files = [] toSchedule = {} # Dict which maps the FileID to the object rmsFilesIds = {} for opFile in self.getWaitingFilesList(): rmsFilesIds[opFile.FileID] = opFile opFile.Error = '' gMonitor.addMark("FTSScheduleAtt") # # check replicas replicas = self._filterReplicas(opFile) if not replicas["OK"]: continue replicas = replicas["Value"] validReplicas = replicas["Valid"] noMetaReplicas = replicas["NoMetadata"] noReplicas = replicas['NoReplicas'] badReplicas = replicas['Bad'] noPFN = replicas['NoPFN'] if validReplicas: validTargets = list( set(self.operation.targetSEList) - set(validReplicas)) if not validTargets: self.log.info("file %s is already present at all targets" % opFile.LFN) opFile.Status = "Done" else: toSchedule[opFile.LFN] = [opFile, validTargets] else: gMonitor.addMark("FTSScheduleFail") if noMetaReplicas: self.log.warn( "unable to schedule '%s', couldn't get metadata at %s" % (opFile.LFN, ','.join(noMetaReplicas))) opFile.Error = "Couldn't get metadata" elif noReplicas: self.log.error( "Unable to schedule transfer", "File %s doesn't exist at %s" % (opFile.LFN, ','.join(noReplicas))) opFile.Error = 'No replicas found' opFile.Status = 'Failed' elif badReplicas: self.log.error( "Unable to schedule transfer", "File %s, all replicas have a bad checksum at %s" % (opFile.LFN, ','.join(badReplicas))) opFile.Error = 'All replicas have a bad checksum' opFile.Status = 'Failed' elif noPFN: self.log.warn( "unable to schedule %s, could not get a PFN at %s" % (opFile.LFN, ','.join(noPFN))) res = self._addMetadataToFiles(toSchedule) if not res['OK']: return res else: filesToSchedule = res['Value'] for lfn in filesToSchedule: opFile = filesToSchedule[lfn] validTargets = toSchedule[lfn][1] for targetSE in validTargets: ftsFile = FTS3File.fromRMSFile(opFile, targetSE) fts3Files.append(ftsFile) if fts3Files: res = Registry.getUsernameForDN(self.request.OwnerDN) if not res['OK']: self.log.error( "Cannot get username for DN", "%s %s" % (self.request.OwnerDN, res['Message'])) return res username = res['Value'] fts3Operation = FTS3TransferOperation.fromRMSObjects( self.request, self.operation, username) fts3Operation.ftsFiles = fts3Files ftsSchedule = FTS3Client().persistOperation(fts3Operation) if not ftsSchedule["OK"]: self.log.error("Completely failed to schedule to FTS3:", ftsSchedule["Message"]) return ftsSchedule # might have nothing to schedule ftsSchedule = ftsSchedule["Value"] self.log.info("Scheduled with FTS3Operation id %s" % ftsSchedule) self.log.info("%d files have been scheduled to FTS3" % len(fts3Files)) for ftsFile in fts3Files: opFile = rmsFilesIds[ftsFile.rmsFileID] gMonitor.addMark("FTSScheduleOK", 1) opFile.Status = "Scheduled" self.log.debug("%s has been scheduled for FTS" % opFile.LFN) else: self.log.info("No files to schedule after metadata checks") # Just in case some transfers could not be scheduled, try them with RM return self.dmTransfer(fromFTS=True)
def fts3Transfer(self): """ replicate and register using FTS3 """ self.log.info("scheduling files in FTS3...") # Check first if we do not have ongoing transfers res = self._checkExistingFTS3Operations() if not res['OK']: return res # if res['Value'] is False # it means that there are ongoing transfers # and we should stop here if res['Value'] is False: # return S_OK such that the request is put back return S_OK() fts3Files = [] toSchedule = {} # Dict which maps the FileID to the object rmsFilesIds = {} for opFile in self.getWaitingFilesList(): rmsFilesIds[opFile.FileID] = opFile opFile.Error = '' gMonitor.addMark("FTSScheduleAtt") # # check replicas replicas = self._filterReplicas(opFile) if not replicas["OK"]: continue replicas = replicas["Value"] validReplicas = replicas["Valid"] noMetaReplicas = replicas["NoMetadata"] noReplicas = replicas['NoReplicas'] badReplicas = replicas['Bad'] noPFN = replicas['NoPFN'] if validReplicas: validTargets = list(set(self.operation.targetSEList) - set(validReplicas)) if not validTargets: self.log.info("file %s is already present at all targets" % opFile.LFN) opFile.Status = "Done" else: toSchedule[opFile.LFN] = [opFile, validTargets] else: gMonitor.addMark("FTSScheduleFail") if noMetaReplicas: self.log.warn("unable to schedule '%s', couldn't get metadata at %s" % (opFile.LFN, ','.join(noMetaReplicas))) opFile.Error = "Couldn't get metadata" elif noReplicas: self.log.error( "Unable to schedule transfer", "File %s doesn't exist at %s" % (opFile.LFN, ','.join(noReplicas))) opFile.Error = 'No replicas found' opFile.Status = 'Failed' elif badReplicas: self.log.error( "Unable to schedule transfer", "File %s, all replicas have a bad checksum at %s" % (opFile.LFN, ','.join(badReplicas))) opFile.Error = 'All replicas have a bad checksum' opFile.Status = 'Failed' elif noPFN: self.log.warn( "unable to schedule %s, could not get a PFN at %s" % (opFile.LFN, ','.join(noPFN))) res = self._addMetadataToFiles(toSchedule) if not res['OK']: return res else: filesToSchedule = res['Value'] for lfn in filesToSchedule: opFile = filesToSchedule[lfn] validTargets = toSchedule[lfn][1] for targetSE in validTargets: ftsFile = FTS3File.fromRMSFile(opFile, targetSE) fts3Files.append(ftsFile) if fts3Files: res = Registry.getUsernameForDN(self.request.OwnerDN) if not res['OK']: self.log.error( "Cannot get username for DN", "%s %s" % (self.request.OwnerDN, res['Message'])) return res username = res['Value'] fts3Operation = FTS3TransferOperation.fromRMSObjects(self.request, self.operation, username) fts3Operation.ftsFiles = fts3Files ftsSchedule = FTS3Client().persistOperation(fts3Operation) if not ftsSchedule["OK"]: self.log.error("Completely failed to schedule to FTS3:", ftsSchedule["Message"]) return ftsSchedule # might have nothing to schedule ftsSchedule = ftsSchedule["Value"] self.log.info("Scheduled with FTS3Operation id %s" % ftsSchedule) self.log.info("%d files have been scheduled to FTS3" % len(fts3Files)) for ftsFile in fts3Files: opFile = rmsFilesIds[ftsFile.rmsFileID] gMonitor.addMark("FTSScheduleOK", 1) opFile.Status = "Scheduled" self.log.debug("%s has been scheduled for FTS" % opFile.LFN) else: self.log.info("No files to schedule after metadata checks") # Just in case some transfers could not be scheduled, try them with RM return self.dmTransfer(fromFTS=True)
def fts3Transfer(self): """replicate and register using FTS3""" self.log.info("scheduling files in FTS3...") # Check first if we do not have ongoing transfers res = self._checkExistingFTS3Operations() if not res["OK"]: return res # if res['Value'] is False # it means that there are ongoing transfers # and we should stop here if res["Value"] is False: # return S_OK such that the request is put back return S_OK() fts3Files = [] toSchedule = {} # Dict which maps the FileID to the object rmsFilesIds = {} if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord( self.createRMSRecord("Attempted", len(self.getWaitingFilesList()))) for opFile in self.getWaitingFilesList(): rmsFilesIds[opFile.FileID] = opFile opFile.Error = "" # # check replicas replicas = self._filterReplicas(opFile) if not replicas["OK"]: continue replicas = replicas["Value"] validReplicas = replicas["Valid"] noMetaReplicas = replicas["NoMetadata"] noReplicas = replicas["NoReplicas"] badReplicas = replicas["Bad"] noPFN = replicas["NoPFN"] if validReplicas: validTargets = list( set(self.operation.targetSEList) - set(validReplicas)) if not validTargets: self.log.info("file %s is already present at all targets" % opFile.LFN) opFile.Status = "Done" else: toSchedule[opFile.LFN] = [opFile, validTargets] else: if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord( self.createRMSRecord("Failed", 1)) if noMetaReplicas: self.log.warn( "unable to schedule file", "'%s': couldn't get metadata at %s" % (opFile.LFN, ",".join(noMetaReplicas)), ) opFile.Error = "Couldn't get metadata" elif noReplicas: self.log.error( "Unable to schedule transfer", "File %s doesn't exist at %s" % (opFile.LFN, ",".join(noReplicas)), ) opFile.Error = "No replicas found" opFile.Status = "Failed" elif badReplicas: self.log.error( "Unable to schedule transfer", "File %s, all replicas have a bad checksum at %s" % (opFile.LFN, ",".join(badReplicas)), ) opFile.Error = "All replicas have a bad checksum" opFile.Status = "Failed" elif noPFN: self.log.warn( "unable to schedule %s, could not get a PFN at %s" % (opFile.LFN, ",".join(noPFN))) if self.rmsMonitoring: self.rmsMonitoringReporter.commit() res = self._addMetadataToFiles(toSchedule) if not res["OK"]: return res else: filesToSchedule = res["Value"] for lfn in filesToSchedule: opFile = filesToSchedule[lfn] validTargets = toSchedule[lfn][1] for targetSE in validTargets: ftsFile = FTS3File.fromRMSFile(opFile, targetSE) fts3Files.append(ftsFile) if fts3Files: res = Registry.getUsernameForDN(self.request.OwnerDN) if not res["OK"]: self.log.error( "Cannot get username for DN", "%s %s" % (self.request.OwnerDN, res["Message"])) return res username = res["Value"] fts3Operation = FTS3TransferOperation.fromRMSObjects( self.request, self.operation, username) fts3Operation.ftsFiles = fts3Files try: if not fts3Operation.activity: vo = getVOfromProxyGroup().get("Value") fts3Plugin = getFTS3Plugin(vo=vo) fts3Operation.activity = fts3Plugin.inferFTSActivity( fts3Operation, self.request, self.operation) except Exception: pass ftsSchedule = FTS3Client().persistOperation(fts3Operation) if not ftsSchedule["OK"]: self.log.error("Completely failed to schedule to FTS3:", ftsSchedule["Message"]) return ftsSchedule # might have nothing to schedule ftsSchedule = ftsSchedule["Value"] self.log.info("Scheduled with FTS3Operation id %s" % ftsSchedule) self.log.info("%d files have been scheduled to FTS3" % len(fts3Files)) if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord( self.createRMSRecord("Successful", len(fts3Files))) for ftsFile in fts3Files: opFile = rmsFilesIds[ftsFile.rmsFileID] opFile.Status = "Scheduled" self.log.debug("%s has been scheduled for FTS" % opFile.LFN) else: self.log.info("No files to schedule after metadata checks") if self.rmsMonitoring: self.rmsMonitoringReporter.commit() # Just in case some transfers could not be scheduled, try them with RM return self.dmTransfer(fromFTS=True)