def build_infile(): from DIRAC.Resources.Catalog.PoolXMLCatalog import PoolXMLCatalog pm = PoolXMLCatalog('pool_xml_catalog.xml') for Lfn in pm.getLfnsList(): pfn = pm.getPfnsByLfn(Lfn)['Replicas']['Uknown'] return pfn
def build_infile(runlist): from DIRAC.Resources.Catalog.PoolXMLCatalog import PoolXMLCatalog pm = PoolXMLCatalog('pool_xml_catalog.xml') runlist = runlist + '.list' f = open(runlist, 'a') for Lfn in pm.getLfnsList(): pfn = pm.getPfnsByLfn(Lfn)['Replicas']['Uknown'] RunNum = pfn.split('dst_CTA_')[1].split('.root')[0] pfn = RunNum + ' ' + '-1 ' + pfn f.write(pfn) f.write('\n') f.close() return DIRAC.S_OK()
def _resolveInputVariables(self): """ By convention any workflow parameters are resolved here. """ super(AnalyseFileAccess, self)._resolveInputVariables() super(AnalyseFileAccess, self)._resolveInputStep() self.XMLSummary_o = XMLSummary(self.XMLSummary, log=self.log) self.poolXMLCatName_o = PoolXMLCatalog(xmlfile=self.poolXMLCatName)
def getGUID(fileNames, directory=''): """ This function searches the directory for POOL XML catalog files and extracts the GUID. fileNames can be a string or a list, directory defaults to PWD. """ if not directory: directory = os.getcwd() if not os.path.isdir(directory): return S_ERROR('%s is not a directory' % directory) if not type(fileNames) == type([]): fileNames = [fileNames] gLogger.verbose('Will look for POOL XML Catalog GUIDs in %s for %s' % (directory, string.join(fileNames, ', '))) finalCatList = _getPoolCatalogs(directory) #Create POOL catalog with final list of catalog files and extract GUIDs generated = [] pfnGUIDs = {} catalog = PoolXMLCatalog(finalCatList) for fname in fileNames: guid = str(catalog.getGuidByPfn(fname)) if not guid: guid = makeGuid(fname) generated.append(fname) pfnGUIDs[fname] = guid if not generated: gLogger.info('Found GUIDs from POOL XML Catalogue for all files: %s' % string.join(fileNames, ', ')) else: gLogger.info( 'GUIDs not found from POOL XML Catalogue (and were generated) for: %s' % string.join(generated, ', ')) result = S_OK(pfnGUIDs) result['directory'] = directory result['generated'] = generated return result
def getType(fileNames, directory=""): """This function searches the directory for POOL XML catalog files and extracts the type of the pfn. fileNames can be a string or a list, directory defaults to PWD. """ if not directory: directory = os.getcwd() if not os.path.isdir(directory): return S_ERROR("%s is not a directory" % directory) if not isinstance(fileNames, list): fileNames = [fileNames] gLogger.verbose("Will look for POOL XML Catalog file types in %s for %s" % (directory, ", ".join(fileNames))) finalCatList = _getPoolCatalogs(directory) # Create POOL catalog with final list of catalog files and extract GUIDs generated = [] pfnTypes = {} catalog = PoolXMLCatalog(finalCatList) for fname in fileNames: typeFile = str(catalog.getTypeByPfn(fname)) if not typeFile: typeFile = "ROOT_All" generated.append(fname) pfnTypes[fname] = typeFile if not generated: gLogger.info("Found Types from POOL XML Catalogue for all files: %s" % ", ".join(fileNames)) else: gLogger.info( "GUIDs not found from POOL XML Catalogue (and were generated) for: %s" % ", ".join(generated)) result = S_OK(pfnTypes) result["directory"] = directory result["generated"] = generated return result
def execute(self, dataDict): """ Given a dictionary of resolved input data, this will creates a POOL XML slice. """ poolXMLCatName = self.fileName mdata = dataDict try: poolXMLCat = PoolXMLCatalog() self.log.verbose('Creating POOL XML slice') for lfn, mdata in dataDict.items(): # lfn,pfn,se,guid tuple taken by POOL XML Catalogue if mdata.has_key('path'): poolXMLCat.addFile((lfn, mdata['path'], mdata['se'], mdata['guid'], mdata['pfntype'])) elif os.path.exists(os.path.basename(mdata['pfn'])): poolXMLCat.addFile( (lfn, os.path.abspath(os.path.basename(mdata['pfn'])), mdata['se'], mdata['guid'], mdata['pfntype'])) else: poolXMLCat.addFile((lfn, mdata['turl'], mdata['se'], mdata['guid'], mdata['pfntype'])) xmlSlice = poolXMLCat.toXML() self.log.verbose('POOL XML Slice is: ') self.log.verbose(xmlSlice) poolSlice = open(poolXMLCatName, 'w') poolSlice.write(xmlSlice) poolSlice.close() self.log.info('POOL XML Catalogue slice written to %s' % (poolXMLCatName)) try: # Temporary solution to the problem of storing the SE in the Pool XML slice poolSlice_temp = open('%s.temp' % (poolXMLCatName), 'w') xmlSlice = poolXMLCat.toXML(True) poolSlice_temp.write(xmlSlice) poolSlice_temp.close() except Exception, x: self.log.warn( 'Attempted to write catalog also to %s.temp but this failed' % (poolXMLCatName)) except Exception, x: self.log.error(str(x)) return S_ERROR('Exception during construction of POOL XML slice')
def getOutputType(outputs, inputs, directory=''): """ This function searches the directory for POOL XML catalog files and extracts the type of the pfn. If not found, inherits from the type of the inputs """ if not isinstance(outputs, list): outputs = [outputs] catalog = PoolXMLCatalog(_getPoolCatalogs(directory)) # inputs - by lfn generatedIn = False typeFileIn = [] for fname in inputs: try: tFileIn = str( catalog.getTypeByPfn( str(catalog.getPfnsByLfn(fname)['Replicas'].values()[0]))) except KeyError: tFileIn = None if not tFileIn: generatedIn = True else: typeFileIn.append(tFileIn) if generatedIn and inputs: raise ValueError('Could not find Type for inputs') # outputs - by pfn pfnTypesOut = {} for fname in outputs: tFileOut = str(catalog.getTypeByPfn(fname)) if not tFileOut: if typeFileIn: tFileOut = typeFileIn[0] else: tFileOut = 'ROOT' pfnTypesOut[fname] = tFileOut return pfnTypesOut
def getGUID(fileNames, directory=""): """ This function searches the directory for POOL XML catalog files and extracts the GUID. fileNames can be a string or a list, directory defaults to PWD. """ if not directory: directory = os.getcwd() if not os.path.isdir(directory): return S_ERROR("%s is not a directory" % directory) if not isinstance(fileNames, list): fileNames = [fileNames] gLogger.verbose("Will look for POOL XML Catalog GUIDs in %s for %s" % (directory, ", ".join(fileNames))) finalCatList = _getPoolCatalogs(directory) # Create POOL catalog with final list of catalog files and extract GUIDs generated = [] pfnGUIDs = {} catalog = PoolXMLCatalog(finalCatList) for fname in fileNames: guid = str(catalog.getGuidByPfn(fname)) if not guid: guid = makeGuid(fname) generated.append(fname) pfnGUIDs[fname] = guid if not generated: gLogger.info("Found GUIDs from POOL XML Catalogue for all files: %s" % ", ".join(fileNames)) else: gLogger.info("GUIDs not found from POOL XML Catalogue (and were generated) for: %s" % ", ".join(generated)) result = S_OK(pfnGUIDs) result["directory"] = directory result["generated"] = generated return result
def getType( fileNames, directory = '' ): """ This function searches the directory for POOL XML catalog files and extracts the type of the pfn. fileNames can be a string or a list, directory defaults to PWD. """ if not directory: directory = os.getcwd() if not os.path.isdir( directory ): return S_ERROR( '%s is not a directory' % directory ) if not type( fileNames ) == type( [] ): fileNames = [fileNames] gLogger.verbose( 'Will look for POOL XML Catalog file types in %s for %s' % ( directory, ', '.join( fileNames ) ) ) finalCatList = _getPoolCatalogs( directory ) #Create POOL catalog with final list of catalog files and extract GUIDs generated = [] pfnTypes = {} catalog = PoolXMLCatalog( finalCatList ) for fname in fileNames: typeFile = str( catalog.getTypeByPfn( fname ) ) if not typeFile: typeFile = 'ROOT_All' generated.append( fname ) pfnTypes[fname] = typeFile if not generated: gLogger.info( 'Found Types from POOL XML Catalogue for all files: %s' % ', '.join( fileNames ) ) else: gLogger.info( 'GUIDs not found from POOL XML Catalogue (and were generated) for: %s' % ', '.join( generated ) ) result = S_OK( pfnTypes ) result['directory'] = directory result['generated'] = generated return result
def execute(self, dataDict): """ Given a dictionary of resolved input data, this will creates a POOL XML slice. """ poolXMLCatName = self.fileName try: poolXMLCat = PoolXMLCatalog() self.log.verbose('Creating POOL XML slice') for lfn, mdataList in dataDict.items(): # lfn,pfn,se,guid tuple taken by POOL XML Catalogue if not isinstance(mdataList, list): mdataList = [mdataList] # As a file may have several replicas, set first the file, then the replicas poolXMLCat.addFile( (lfn, None, None, mdataList[0]['guid'], None)) for mdata in mdataList: path = '' if 'path' in mdata: path = mdata['path'] elif os.path.exists(os.path.basename(mdata['pfn'])): path = os.path.abspath(os.path.basename(mdata['pfn'])) else: path = mdata['turl'] poolXMLCat.addReplica((lfn, path, mdata['se'], False)) xmlSlice = poolXMLCat.toXML() self.log.verbose('POOL XML Slice is: ') self.log.verbose(xmlSlice) with open(poolXMLCatName, 'w') as poolSlice: poolSlice.write(xmlSlice) self.log.info('POOL XML Catalogue slice written to %s' % (poolXMLCatName)) try: # Temporary solution to the problem of storing the SE in the Pool XML slice poolSlice_temp = open('%s.temp' % (poolXMLCatName), 'w') xmlSlice = poolXMLCat.toXML(True) poolSlice_temp.write(xmlSlice) poolSlice_temp.close() except Exception as x: self.log.warn( 'Attempted to write catalog also to %s.temp but this failed' % (poolXMLCatName)) except Exception as x: self.log.error(str(x)) return S_ERROR('Exception during construction of POOL XML slice') return S_OK('POOL XML Slice created')
def execute(self, dataDict): """Given a dictionary of resolved input data, this will creates a POOL XML slice.""" poolXMLCatName = self.fileName try: poolXMLCat = PoolXMLCatalog() self.log.verbose("Creating POOL XML slice") for lfn, mdataList in dataDict.items(): # lfn,pfn,se,guid tuple taken by POOL XML Catalogue if not isinstance(mdataList, list): mdataList = [mdataList] # As a file may have several replicas, set first the file, then the replicas poolXMLCat.addFile( (lfn, None, None, mdataList[0]["guid"], None)) for mdata in mdataList: path = "" if "path" in mdata: path = mdata["path"] elif os.path.exists(os.path.basename(mdata["pfn"])): path = os.path.abspath(os.path.basename(mdata["pfn"])) else: path = mdata["turl"] poolXMLCat.addReplica((lfn, path, mdata["se"], False)) xmlSlice = poolXMLCat.toXML() self.log.verbose("POOL XML Slice is: ") self.log.verbose(xmlSlice) with open(poolXMLCatName, "w") as poolSlice: poolSlice.write(xmlSlice) self.log.info("POOL XML Catalogue slice written to %s" % (poolXMLCatName)) try: # Temporary solution to the problem of storing the SE in the Pool XML slice with open("%s.temp" % (poolXMLCatName), "w") as poolSlice_temp: xmlSlice = poolXMLCat.toXML(True) poolSlice_temp.write(xmlSlice) except Exception as x: self.log.warn( "Attempted to write catalog also to %s.temp but this failed" % (poolXMLCatName)) except Exception as x: self.log.error(str(x)) return S_ERROR("Exception during construction of POOL XML slice") return S_OK("POOL XML Slice created")
def execute(self,dataDict): """Given a dictionary of resolved input data, this will create a POOL XML slice. """ poolXMLCatName = self.fileName mdata = dataDict try: poolXMLCat = PoolXMLCatalog() self.log.verbose('Creating POOL XML slice') for lfn,mdata in dataDict.items(): #lfn,pfn,size,se,guid tuple taken by POOL XML Catalogue if mdata.has_key('path'): poolXMLCat.addFile((lfn,mdata['path'],0,mdata['se'],mdata['guid'],mdata['pfntype'])) elif os.path.exists(os.path.basename(mdata['pfn'])): poolXMLCat.addFile((lfn,os.path.abspath(os.path.basename(mdata['pfn'])),0,mdata['se'],mdata['guid'],mdata['pfntype'])) else: poolXMLCat.addFile((lfn,mdata['turl'],0,mdata['se'],mdata['guid'],mdata['pfntype'])) xmlSlice = poolXMLCat.toXML() self.log.verbose('POOL XML Slice is: ') self.log.verbose(xmlSlice) poolSlice = open(poolXMLCatName,'w') poolSlice.write(xmlSlice) poolSlice.close() self.log.info('POOL XML Catalogue slice written to %s' %(poolXMLCatName)) try: # Temporary solution to the problem of storing the SE in the Pool XML slice poolSlice_temp = open('%s.temp' %(poolXMLCatName),'w') xmlSlice = poolXMLCat.toXML(True) poolSlice_temp.write(xmlSlice) poolSlice_temp.close() except Exception,x: self.log.warn('Attempted to write catalog also to %s.temp but this failed' %(poolXMLCatName)) except Exception,x: self.log.error(str(x)) return S_ERROR('Exception during construction of POOL XML slice')
def execute( self, dataDict ): """ Given a dictionary of resolved input data, this will creates a POOL XML slice. """ poolXMLCatName = self.fileName try: poolXMLCat = PoolXMLCatalog() self.log.verbose( 'Creating POOL XML slice' ) for lfn, mdataList in dataDict.items(): # lfn,pfn,se,guid tuple taken by POOL XML Catalogue if type( mdataList ) != types.ListType: mdataList = [mdataList] # As a file may have several replicas, set first the file, then the replicas poolXMLCat.addFile( ( lfn, None, None, mdataList[0]['guid'], None ) ) for mdata in mdataList: path = '' if 'path' in mdata: path = mdata['path'] elif os.path.exists( os.path.basename( mdata['pfn'] ) ): path = os.path.abspath( os.path.basename( mdata['pfn'] ) ) else: path = mdata['turl'] poolXMLCat.addReplica( ( lfn, path, mdata['se'], False ) ) xmlSlice = poolXMLCat.toXML() self.log.verbose( 'POOL XML Slice is: ' ) self.log.verbose( xmlSlice ) with open( poolXMLCatName, 'w' ) as poolSlice: poolSlice.write( xmlSlice ) self.log.info( 'POOL XML Catalogue slice written to %s' % ( poolXMLCatName ) ) try: # Temporary solution to the problem of storing the SE in the Pool XML slice poolSlice_temp = open( '%s.temp' % ( poolXMLCatName ), 'w' ) xmlSlice = poolXMLCat.toXML( True ) poolSlice_temp.write( xmlSlice ) poolSlice_temp.close() except Exception as x: self.log.warn( 'Attempted to write catalog also to %s.temp but this failed' % ( poolXMLCatName ) ) except Exception as x: self.log.error( str( x ) ) return S_ERROR( 'Exception during construction of POOL XML slice' ) return S_OK( 'POOL XML Slice created' )
def _getPoolCatalogs(directory=""): patterns = ["*.xml", "*.xml*gz"] omissions = [r"\.bak$"] # to be ignored for production files # First obtain valid list of unpacked catalog files in directory poolCatalogList = [] for pattern in patterns: fileList = glob.glob(os.path.join(directory, pattern)) for fname in fileList: if fname.endswith(".bak"): gLogger.verbose("Ignoring BAK file: %s" % fname) elif tarfile.is_tarfile(fname): gLogger.debug("Unpacking catalog XML file %s" % (os.path.join(directory, fname))) with tarfile.open(os.path.join(directory, fname), "r") as tf: for member in tf.getmembers(): tf.extract(member, directory) poolCatalogList.append( os.path.join(directory, member.name)) else: poolCatalogList.append(fname) poolCatalogList = uniqueElements(poolCatalogList) # Now have list of all XML files but some may not be Pool XML catalogs... finalCatList = [] for possibleCat in poolCatalogList: try: _cat = PoolXMLCatalog(possibleCat) finalCatList.append(possibleCat) except Exception as x: gLogger.debug("Ignoring non-POOL catalogue file %s" % possibleCat) gLogger.debug("Final list of catalog files are: %s" % ", ".join(finalCatList)) return finalCatList
tarFile.extract(member, directory) poolCatalogList.append( os.path.join(directory, member.name)) except Exception, x: gLogger.error('Could not untar %s with exception %s' % (fname, str(x))) else: poolCatalogList.append(fname) poolCatalogList = uniqueElements(poolCatalogList) #Now have list of all XML files but some may not be Pool XML catalogs... finalCatList = [] for possibleCat in poolCatalogList: try: cat = PoolXMLCatalog(possibleCat) finalCatList.append(possibleCat) except Exception, x: gLogger.debug('Ignoring non-POOL catalogue file %s' % possibleCat) #Create POOL catalog with final list of catalog files and extract GUIDs generated = [] pfnGUIDs = {} gLogger.debug('Final list of catalog files are: %s' % string.join(finalCatList, ', ')) catalog = PoolXMLCatalog(finalCatList) for fname in fileNames: guid = str(catalog.getGuidByPfn(fname)) if not guid: guid = makeGuid(fname) generated.append(fname)
(os.path.join(directory, fname))) tarFile = tarfile.open(os.path.join(directory, fname), 'r') for member in tarFile.getmembers(): tarFile.extract(member, directory) poolCatalogList.append( os.path.join(directory, member.name)) except Exception, x: gLogger.error('Could not untar %s with exception %s' % (fname, str(x))) else: poolCatalogList.append(fname) poolCatalogList = uniqueElements(poolCatalogList) #Now have list of all XML files but some may not be Pool XML catalogs... finalCatList = [] for possibleCat in poolCatalogList: try: cat = PoolXMLCatalog(possibleCat) finalCatList.append(possibleCat) except Exception, x: gLogger.debug('Ignoring non-POOL catalogue file %s' % possibleCat) gLogger.debug('Final list of catalog files are: %s' % string.join(finalCatList, ', ')) return finalCatList #############################################################################
def execute(self, production_id=None, prod_job_id=None, wms_job_id=None, workflowStatus=None, stepStatus=None, wf_commons=None, step_commons=None, step_number=None, step_id=None): """ Main execution function. """ try: super(MergeMDF, self).execute(self.version, production_id, prod_job_id, wms_job_id, workflowStatus, stepStatus, wf_commons, step_commons, step_number, step_id) poolCat = PoolXMLCatalog(self.poolXMLCatName) self._resolveInputVariables() stepOutputs, stepOutputTypes, _histogram = self._determineOutputs() logLines = [ '#' * len(self.version), self.version, '#' * len(self.version) ] localInputs = [ str(poolCat.getPfnsByLfn(x)['Replicas'].values()[0]) for x in self.stepInputData ] inputs = ' '.join(localInputs) cmd = 'cat %s > %s' % (inputs, self.outputFilePrefix + '.' + stepOutputTypes[0]) logLines.append('\nExecuting merge operation...') self.log.info('Executing "%s"' % cmd) result = systemCall(timeout=600, cmdSeq=shlex.split(cmd)) if not result['OK']: self.log.error(result) logLines.append('Merge operation failed with result:\n%s' % result) return S_ERROR('Problem Executing Application') status = result['Value'][0] stdout = result['Value'][1] stderr = result['Value'][2] self.log.info(stdout) if stderr: self.log.error(stderr) if status: msg = 'Non-zero status %s while executing "%s"' % (status, cmd) self.log.info(msg) logLines.append(msg) return S_ERROR('Problem Executing Application') self.log.info("Going to manage %s output" % self.applicationName) self._manageAppOutput(stepOutputs) # Still have to set the application status e.g. user job case. self.setApplicationStatus( '%s %s Successful' % (self.applicationName, self.applicationVersion)) # Write to log file msg = 'Produced merged MDF file' self.log.info(msg) logLines.append(msg) logLines = [str(i) for i in logLines] logLines.append('#EOF') fopen = open(self.applicationLog, 'w') fopen.write('\n'.join(logLines) + '\n') fopen.close() return S_OK('%s %s Successful' % (self.applicationName, self.applicationVersion)) except Exception as e: #pylint:disable=broad-except self.log.exception("Failure in MergeMDF execute module", lException=e) return S_ERROR(str(e)) finally: super(MergeMDF, self).finalize(self.version)
poolCatalogList = uniqueElements(poolCatalogList) #Now have list of all XML files but some may not be Pool XML catalogs... finalCatList = [] for possibleCat in poolCatalogList: try: cat = PoolXMLCatalog(possibleCat) finalCatList.append(possibleCat) except Exception,x: gLogger.debug('Ignoring non-POOL catalogue file %s' %possibleCat) #Create POOL catalog with final list of catalog files and extract GUIDs generated = [] pfnGUIDs = {} gLogger.debug('Final list of catalog files are: %s' %string.join(finalCatList,', ')) catalog = PoolXMLCatalog(finalCatList) for fname in fileNames: guid = str(catalog.getGuidByPfn(fname)) if not guid: guid = makeGuid(fname) generated.append(fname) pfnGUIDs[fname]=guid if not generated: gLogger.info('Found GUIDs from POOL XML Catalogue for all files: %s' %string.join(fileNames,', ')) else: gLogger.info('GUIDs not found from POOL XML Catalogue (and were generated) for: %s' %string.join(generated,', ')) result = S_OK(pfnGUIDs) result['directory']=directory