def create_file_spec(self, pfn): checksum = calculate_checksum(pfn) filesize = os.path.getsize(pfn) file_data = {'scope': 'transient', 'lfn': os.path.basename(pfn), 'checksum': checksum, 'filesize': filesize, } file_spec = FileSpec(filetype='output', **file_data) return file_spec
def verify_catalog_checksum(fspec, path): """ Verify that the local and remote (fspec) checksum values are the same. The function will update the fspec object. :param fspec: FileSpec object for a given file. :param path: path to local file (string). :return: state (string), diagnostics (string). """ diagnostics = "" state = "" checksum_type = get_checksum_type(fspec.checksum) checksum_catalog = get_checksum_value(fspec.checksum) if checksum_type == 'unknown': diagnostics = 'unknown checksum type for checksum(catalog): %s' % fspec.checksum logger.warning(diagnostics) fspec.status_code = ErrorCodes.UNKNOWNCHECKSUMTYPE fspec.status = 'failed' state = 'UNKNOWN_CHECKSUM_TYPE' else: checksum_local = calculate_checksum(path, algorithm=checksum_type) if checksum_type == 'ad32': checksum_type = 'adler32' logger.info('checksum (catalog): %s (type: %s)' % (checksum_catalog, checksum_type)) logger.info('checksum (local): %s' % checksum_local) if checksum_local and checksum_local != '' and checksum_local != checksum_catalog: diagnostics = 'checksum verification failed for LFN=%s: checksum (catalog)=%s != checksum (local)=%s' % \ (fspec.lfn, checksum_catalog, checksum_local) logger.warning(diagnostics) fspec.status_code = ErrorCodes.GETADMISMATCH if checksum_type == 'adler32' else ErrorCodes.GETMD5MISMATCH fspec.status = 'failed' state = 'AD_MISMATCH' if checksum_type == 'ad32' else 'MD_MISMATCH' else: logger.info('catalog and local checksum values are the same') return state, diagnostics
def transfer_files(self, copytool, files, activity, **kwargs): # noqa: C901 """ Automatically stage out files using the selected copy tool module. :param copytool: copytool module :param files: list of `FileSpec` objects :param activity: ordered list of preferred activity names to resolve SE protocols :param kwargs: extra kwargs to be passed to copytool transfer handler :return: the output of the copytool transfer operation :raise: PilotException in case of controlled error """ # check if files exist before actual processing # populate filesize if need, calc checksum for fspec in files: pfn = fspec.surl or getattr(fspec, 'pfn', None) or os.path.join( kwargs.get('workdir', ''), fspec.lfn) if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK): msg = "Error: output pfn file does not exist: %s" % pfn self.logger.error(msg) self.trace_report.update(clientState='NO_REPLICA', stateReason=msg) self.trace_report.send() raise PilotException(msg, code=ErrorCodes.MISSINGOUTPUTFILE, state="FILE_INFO_FAIL") if not fspec.filesize: fspec.filesize = os.path.getsize(pfn) if not fspec.filesize: msg = 'output file has size zero: %s' % fspec.lfn self.logger.fatal(msg) raise PilotException(msg, code=ErrorCodes.ZEROFILESIZE, state="ZERO_FILE_SIZE") fspec.surl = pfn fspec.activity = activity if not fspec.checksum.get('adler32'): fspec.checksum['adler32'] = calculate_checksum(pfn) # prepare files (resolve protocol/transfer url) if getattr(copytool, 'require_protocols', True) and files: ddmconf = self.infosys.resolve_storage_data() allowed_schemas = getattr(copytool, 'allowed_schemas', None) if self.infosys and self.infosys.queuedata: copytool_name = copytool.__name__.rsplit('.', 1)[-1] allowed_schemas = self.infosys.queuedata.resolve_allowed_schemas( activity, copytool_name) or allowed_schemas files = self.resolve_protocols(files, activity) for fspec in files: protocols = self.resolve_protocol(fspec, allowed_schemas) if not protocols: # no protocols found error = 'Failed to resolve protocol for file=%s, allowed_schemas=%s, fspec=%s' % ( fspec.lfn, allowed_schemas, fspec) self.logger.error("resolve_protocol: %s" % error) raise PilotException(error, code=ErrorCodes.NOSTORAGEPROTOCOL) # take first available protocol for copytool: FIX ME LATER if need (do iterate over all allowed protocols?) protocol = protocols[0] self.logger.info( "resolved protocol to be used for transfer: data=%s" % protocol) resolve_surl = getattr(copytool, 'resolve_surl', None) if not callable(resolve_surl): resolve_surl = self.resolve_surl r = resolve_surl( fspec, protocol, ddmconf, activity=activity ) ## pass ddmconf & activity for possible custom look up at the level of copytool if r.get('surl'): fspec.turl = r['surl'] if r.get('ddmendpoint'): fspec.ddmendpoint = r['ddmendpoint'] if not copytool.is_valid_for_copy_out(files): self.logger.warning( 'Input is not valid for transfers using copytool=%s' % copytool) self.logger.debug('Input: %s' % files) raise PilotException('Invalid input for transfer operation') self.logger.info('ready to transfer (stage-out) files: %s' % files) if self.infosys: kwargs['copytools'] = self.infosys.queuedata.copytools # some copytools will need to know endpoint specifics (e.g. the space token) stored in ddmconf, add it kwargs['ddmconf'] = self.infosys.resolve_storage_data() if not files: msg = 'nothing to stage-out - an internal Pilot error has occurred' self.logger.fatal(msg) raise PilotException(msg, code=errors.INTERNALPILOTPROBLEM) # add the trace report kwargs['trace_report'] = self.trace_report return copytool.copy_out(files, **kwargs)
def transfer_files(self, copytool, files, activity, **kwargs): """ Automatically stage out files using the selected copy tool module. :param copytool: copytool module :param files: list of `FileSpec` objects :param activity: ordered list of preferred activity names to resolve SE protocols :param kwargs: extra kwargs to be passed to copytool transfer handler :return: the output of the copytool transfer operation :raise: PilotException in case of controlled error """ # check if files exist before actual processing # populate filesize if need, calc checksum for fspec in files: if not fspec.ddmendpoint: # ensure that output destination is properly set msg = 'No output RSE defined for file=%s' % fspec.lfn self.logger.error(msg) raise PilotException(msg, code=ErrorCodes.NOSTORAGE, state='NO_OUTPUTSTORAGE_DEFINED') pfn = fspec.surl or getattr(fspec, 'pfn', None) or os.path.join(kwargs.get('workdir', ''), fspec.lfn) if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK): msg = "Error: output pfn file does not exist: %s" % pfn self.logger.error(msg) self.trace_report.update(clientState='MISSINGOUTPUTFILE', stateReason=msg) self.trace_report.send() raise PilotException(msg, code=ErrorCodes.MISSINGOUTPUTFILE, state="FILE_INFO_FAIL") if not fspec.filesize: fspec.filesize = os.path.getsize(pfn) if not fspec.filesize: msg = 'output file has size zero: %s' % fspec.lfn self.logger.fatal(msg) raise PilotException(msg, code=ErrorCodes.ZEROFILESIZE, state="ZERO_FILE_SIZE") fspec.surl = pfn fspec.activity = activity if not fspec.checksum.get('adler32'): fspec.checksum['adler32'] = calculate_checksum(pfn) # prepare files (resolve protocol/transfer url) if getattr(copytool, 'require_protocols', True) and files: self.require_protocols(files, copytool, activity) if not copytool.is_valid_for_copy_out(files): self.logger.warning('Input is not valid for transfers using copytool=%s' % copytool) self.logger.debug('Input: %s' % files) raise PilotException('Invalid input for transfer operation') self.logger.info('ready to transfer (stage-out) files: %s' % files) if self.infosys: kwargs['copytools'] = self.infosys.queuedata.copytools # some copytools will need to know endpoint specifics (e.g. the space token) stored in ddmconf, add it kwargs['ddmconf'] = self.infosys.resolve_storage_data() if not files: msg = 'nothing to stage-out - an internal Pilot error has occurred' self.logger.fatal(msg) raise PilotException(msg, code=ErrorCodes.INTERNALPILOTPROBLEM) # add the trace report kwargs['trace_report'] = self.trace_report return copytool.copy_out(files, **kwargs)