Python JSONRestClient примеры использования

Язык программирования: Python

Пространство имен/Пакет: grid_control.utils.webservice

Класс/Тип: JSONRestClient

Примеров на hotexamples.com: 20

Python JSONRestClient - 20 примеров найдено. Это лучшие примеры Python кода для grid_control.utils.webservice.JSONRestClient, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

JSONRestClient(7)

get(5)

post(1)

Основные методы

JSONRestClient (7)

get (5)

post (1)

Пример #1

Показать файл

Файл: dbs3_lite_client.py Проект: thomas-mueller/grid-control

 def __init__(self, url):
     self._reader_url = '%s/%s' % (url, 'DBSReader')
     self._writer_url = '%s/%s' % (url, 'DBSWriter')
     self._migrate_url = '%s/%s' % (url, 'DBSMigrate')
     self._proxy_path = os.environ.get('X509_USER_PROXY', '')
     if not os.path.exists(self._proxy_path):
         raise UserError(
             'VOMS proxy needed to query DBS3! Environment variable X509_USER_PROXY is "%s"'
             % self._proxy_path)
     self._jrc = JSONRestClient(cert=self._proxy_path)

Пример #2

Показать файл

Файл: provider_cms.py Проект: tolange/grid-control

    def __init__(self,
                 config,
                 datasource_name,
                 dataset_expr,
                 dataset_nick=None,
                 dataset_proc=None):
        dataset_config = config.change_view(
            default_on_change=TriggerResync(['datasets', 'parameters']))
        self._lumi_filter = dataset_config.get_lookup(
            ['lumi filter', '%s lumi filter' % datasource_name],
            default={},
            parser=parse_lumi_filter,
            strfun=str_lumi)
        if not self._lumi_filter.empty():
            config.set('%s processor' % datasource_name, 'LumiDataProcessor',
                       '+=')
        DataProvider.__init__(self, config, datasource_name, dataset_expr,
                              dataset_nick, dataset_proc)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = dataset_config.get_bool(
            ['lumi metadata',
             '%s lumi metadata' % datasource_name],
            default=not self._lumi_filter.empty())
        config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=')
        # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
        self._phedex_filter = dataset_config.get_filter(
            'phedex sites',
            '-* T1_*_Disk T2_* T3_*',
            default_matcher='BlackWhiteMatcher',
            default_filter='StrictListFilter')
        self._only_complete = dataset_config.get_bool('only complete sites',
                                                      True)
        self._only_valid = dataset_config.get_bool('only valid', True)
        self._allow_phedex = dataset_config.get_bool('allow phedex', True)
        self._location_format = dataset_config.get_enum(
            'location format', CMSLocationFormat, CMSLocationFormat.hostname)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )
        self._sitedb = SiteDB()

        dataset_expr_parts = split_opt(dataset_expr, '@#')
        (self._dataset_path, self._dataset_instance,
         self._dataset_block_selector) = dataset_expr_parts
        instance_default = dataset_config.get('dbs instance', '')
        self._dataset_instance = self._dataset_instance or instance_default
        if not self._dataset_instance:
            self._dataset_instance = 'prod/global'
        elif '/' not in self._dataset_instance:
            self._dataset_instance = 'prod/%s' % self._dataset_instance
        self._dataset_block_selector = self._dataset_block_selector or 'all'

Пример #3

Показать файл

Файл: provider_cms.py Проект: grid-control/grid-control

	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		dataset_config = config.change_view(default_on_change=TriggerResync(['datasets', 'parameters']))
		self._lumi_filter = dataset_config.get_lookup(['lumi filter', '%s lumi filter' % datasource_name],
			default={}, parser=parse_lumi_filter, strfun=str_lumi)
		if not self._lumi_filter.empty():
			config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = dataset_config.get_bool(
			['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty())
		config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=')
		# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
		self._phedex_filter = dataset_config.get_filter('phedex sites', '-* T1_*_Disk T2_* T3_*',
			default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter')
		self._only_complete = dataset_config.get_bool('only complete sites', True)
		self._only_valid = dataset_config.get_bool('only valid', True)
		self._location_format = dataset_config.get_enum('location format',
			CMSLocationFormat, CMSLocationFormat.hostname)
		self._pjrc = JSONRestClient(url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
		self._sitedb = SiteDB()

		dataset_expr_parts = split_opt(dataset_expr, '@#')
		(self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts
		instance_default = dataset_config.get('dbs instance', '')
		self._dataset_instance = self._dataset_instance or instance_default
		if not self._dataset_instance:
			self._dataset_instance = 'prod/global'
		elif '/' not in self._dataset_instance:
			self._dataset_instance = 'prod/%s' % self._dataset_instance
		self._dataset_block_selector = self._dataset_block_selector or 'all'

Пример #4

Показать файл

Файл: provider_cms.py Проект: Fra-nk/grid-control

	def __init__(self, config, datasetExpr, datasetNick = None):
		self._changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = self._changeTrigger)
		if not self._lumi_filter.empty():
			config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasetExpr, datasetNick)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = self._changeTrigger)
		config.set('phedex sites matcher mode', 'shell', '?=')
		# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
		self._phedexFilter = config.getFilter('phedex sites', '-* T1_*_Disk T2_* T3_*',
			defaultMatcher = 'blackwhite', defaultFilter = 'strict', onChange = self._changeTrigger)
		self._onlyComplete = config.getBool('only complete sites', True, onChange = self._changeTrigger)
		self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = self._changeTrigger)
		self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
		self._sitedb = SiteDB()

		(self._datasetPath, self._datasetInstance, self._datasetBlock) = optSplit(datasetExpr, '@#')
		instance_default = config.get('dbs instance', '', onChange = self._changeTrigger)
		self._datasetInstance = self._datasetInstance or instance_default
		if not self._datasetInstance:
			self._datasetInstance = 'prod/global'
		elif '/' not in self._datasetInstance:
			self._datasetInstance = 'prod/%s' % self._datasetInstance
		self._datasetBlock = self._datasetBlock or 'all'
		self.onlyValid = config.getBool('only valid', True, onChange = self._changeTrigger)

Пример #5

Показать файл

    def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0):
        changeTrigger = triggerResync(['datasets', 'parameters'])
        self._lumi_filter = config.getLookup('lumi filter', {},
                                             parser=parseLumiFilter,
                                             strfun=strLumi,
                                             onChange=changeTrigger)
        if not self._lumi_filter.empty():
            config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataProvider.__init__(self, config, datasetExpr, datasetNick,
                              datasetID)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = config.getBool('lumi metadata',
                                          not self._lumi_filter.empty(),
                                          onChange=changeTrigger)
        # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
        self._phedexFilter = config.getFilter('phedex sites',
                                              '-T3_US_FNALLPC',
                                              defaultMatcher='blackwhite',
                                              defaultFilter='weak',
                                              onChange=changeTrigger)
        self._phedexT1Filter = config.getFilter('phedex t1 accept',
                                                'T1_DE_KIT T1_US_FNAL',
                                                defaultMatcher='blackwhite',
                                                defaultFilter='weak',
                                                onChange=changeTrigger)
        self._phedexT1Mode = config.getEnum('phedex t1 mode',
                                            PhedexT1Mode,
                                            PhedexT1Mode.disk,
                                            onChange=changeTrigger)
        self.onlyComplete = config.getBool('only complete sites',
                                           True,
                                           onChange=changeTrigger)
        self._locationFormat = config.getEnum('location format',
                                              CMSLocationFormat,
                                              CMSLocationFormat.hostname,
                                              onChange=changeTrigger)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )

        (self._datasetPath, self._url,
         self._datasetBlock) = optSplit(datasetExpr, '@#')
        self._url = self._url or config.get('dbs instance', '')
        self._datasetBlock = self._datasetBlock or 'all'
        self.onlyValid = config.getBool('only valid',
                                        True,
                                        onChange=changeTrigger)

Пример #6

Показать файл

Файл: provider_cms.py Проект: nils-braun/grid-control

    def __init__(self, config, datasetExpr, datasetNick=None):
        self._changeTrigger = triggerResync(['datasets', 'parameters'])
        self._lumi_filter = config.getLookup('lumi filter', {},
                                             parser=parseLumiFilter,
                                             strfun=strLumi,
                                             onChange=self._changeTrigger)
        if not self._lumi_filter.empty():
            config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataProvider.__init__(self, config, datasetExpr, datasetNick)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = config.getBool('lumi metadata',
                                          not self._lumi_filter.empty(),
                                          onChange=self._changeTrigger)
        config.set('phedex sites matcher mode', 'shell', '?=')
        # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
        self._phedexFilter = config.getFilter('phedex sites',
                                              '-* T1_*_Disk T2_* T3_*',
                                              defaultMatcher='blackwhite',
                                              defaultFilter='strict',
                                              onChange=self._changeTrigger)
        self._onlyComplete = config.getBool('only complete sites',
                                            True,
                                            onChange=self._changeTrigger)
        self._locationFormat = config.getEnum('location format',
                                              CMSLocationFormat,
                                              CMSLocationFormat.hostname,
                                              onChange=self._changeTrigger)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )
        self._sitedb = SiteDB()

        (self._datasetPath, self._datasetInstance,
         self._datasetBlock) = optSplit(datasetExpr, '@#')
        instance_default = config.get('dbs instance',
                                      '',
                                      onChange=self._changeTrigger)
        self._datasetInstance = self._datasetInstance or instance_default
        if not self._datasetInstance:
            self._datasetInstance = 'prod/global'
        elif '/' not in self._datasetInstance:
            self._datasetInstance = 'prod/%s' % self._datasetInstance
        self._datasetBlock = self._datasetBlock or 'all'
        self.onlyValid = config.getBool('only valid',
                                        True,
                                        onChange=self._changeTrigger)

Пример #7

Показать файл

Файл: storage_cms.py Проект: harrypuuter/grid-control

def _lfn2pfn(node, lfn, prot='srmv2'):
    return JSONRestClient().get(
        url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/lfn2pfn',
        params={
            'node': node,
            'protocol': prot,
            'lfn': lfn
        })['phedex']['mapping']

Пример #8

Показать файл

Файл: dbs3_lite_client.py Проект: thomas-mueller/grid-control

class DBS3LiteClient(object):
    def __init__(self, url):
        self._reader_url = '%s/%s' % (url, 'DBSReader')
        self._writer_url = '%s/%s' % (url, 'DBSWriter')
        self._migrate_url = '%s/%s' % (url, 'DBSMigrate')
        self._proxy_path = os.environ.get('X509_USER_PROXY', '')
        if not os.path.exists(self._proxy_path):
            raise UserError(
                'VOMS proxy needed to query DBS3! Environment variable X509_USER_PROXY is "%s"'
                % self._proxy_path)
        self._jrc = JSONRestClient(cert=self._proxy_path)

    def listBlocks(self, **kwargs):
        return self._jrc.get(url=self._reader_url, api='blocks', params=kwargs)

    def listFiles(self, **kwargs):
        return self._jrc.get(url=self._reader_url, api='files', params=kwargs)

    def listFileParents(self, **kwargs):
        return self._jrc.get(url=self._reader_url,
                             api='fileparents',
                             params=kwargs)

    def insertBulkBlock(self, data):
        return self._jrc.post(url=self._writer_url,
                              api='bulkblocks',
                              data=data)

    def migrateSubmit(self, data):
        return self._jrc.post(url=self._migrate_url, api='submit', data=data)

    def migrateStatus(self, **kwargs):
        return self._jrc.get(url=self._migrate_url,
                             api='status',
                             params=kwargs)

Пример #9

Показать файл

Файл: provider_cms.py Проект: artus-analysis/grid-control

	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = changeTrigger)
		if not self._lumi_filter.empty():
			config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = changeTrigger)
		# PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
		self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC',
			defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger)
		self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL',
			defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger)
		self._phedexT1Mode = config.getEnum('phedex t1 mode', PhedexT1Mode, PhedexT1Mode.disk, onChange = changeTrigger)
		self.onlyComplete = config.getBool('only complete sites', True, onChange = changeTrigger)
		self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = changeTrigger)
		self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')

		(self._datasetPath, self._url, self._datasetBlock) = optSplit(datasetExpr, '@#')
		self._url = self._url or config.get('dbs instance', '')
		self._datasetBlock = self._datasetBlock or 'all'
		self.onlyValid = config.getBool('only valid', True, onChange = changeTrigger)

Пример #10

Показать файл

Файл: provider_cms.py Проект: Fra-nk/grid-control

class CMSBaseProvider(DataProvider):
	def __init__(self, config, datasetExpr, datasetNick = None):
		self._changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = self._changeTrigger)
		if not self._lumi_filter.empty():
			config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasetExpr, datasetNick)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = self._changeTrigger)
		config.set('phedex sites matcher mode', 'shell', '?=')
		# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
		self._phedexFilter = config.getFilter('phedex sites', '-* T1_*_Disk T2_* T3_*',
			defaultMatcher = 'blackwhite', defaultFilter = 'strict', onChange = self._changeTrigger)
		self._onlyComplete = config.getBool('only complete sites', True, onChange = self._changeTrigger)
		self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = self._changeTrigger)
		self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
		self._sitedb = SiteDB()

		(self._datasetPath, self._datasetInstance, self._datasetBlock) = optSplit(datasetExpr, '@#')
		instance_default = config.get('dbs instance', '', onChange = self._changeTrigger)
		self._datasetInstance = self._datasetInstance or instance_default
		if not self._datasetInstance:
			self._datasetInstance = 'prod/global'
		elif '/' not in self._datasetInstance:
			self._datasetInstance = 'prod/%s' % self._datasetInstance
		self._datasetBlock = self._datasetBlock or 'all'
		self.onlyValid = config.getBool('only valid', True, onChange = self._changeTrigger)


	# Define how often the dataprovider can be queried automatically
	def queryLimit(self):
		return 2 * 60 * 60 # 2 hour delay minimum


	# Check if splitterClass is valid
	def checkSplitter(self, splitterClass):
		if (DataSplitter.Skipped in splitterClass.neededEnums()) and not self._lumi_filter.empty():
			self._log.debug('Selected splitter %s is not compatible with active lumi filter!', splitterClass.__name__)
			self._log.warning('Active lumi section filter forced selection of HybridSplitter')
			return HybridSplitter
		return splitterClass


	def _replicaLocation(self, replica_info):
		(name_node, name_hostname, _) = replica_info
		if self._locationFormat == CMSLocationFormat.siteDB:
			yield name_node
		else:
			if name_hostname is not None:
				name_hostnames = [name_hostname]
			else:
				name_hostnames = self._sitedb.cms_name_to_se(name_node)
			for name_hostname in name_hostnames:
				if self._locationFormat == CMSLocationFormat.hostname:
					yield name_hostname
				else:
					yield '%s/%s' % (name_node, name_hostname)


	def _fmtLocations(self, replica_infos):
		for replica_info in replica_infos:
			(_, _, completed) = replica_info
			if completed:
				for entry in self._replicaLocation(replica_info):
					yield entry
			else:
				for entry in self._replicaLocation(replica_info):
					yield '(%s)' % entry


	def _processReplicas(self, blockPath, replica_infos):
		def empty_with_warning(*args):
			self._log.warning(*args)
			return []
		def expanded_replica_locations(replica_infos):
			for replica_info in replica_infos:
				for entry in self._replicaLocation(replica_info):
					yield entry

		if not replica_infos:
			return empty_with_warning('Dataset block %r has no replica information!', blockPath)
		replica_infos_selected = self._phedexFilter.filterList(replica_infos, key = itemgetter(0))
		if not replica_infos_selected:
			return empty_with_warning('Dataset block %r is not available at the selected locations!\nAvailable locations: %s', blockPath,
				str.join(', ', self._fmtLocations(replica_infos)))
		if not self._onlyComplete:
			return list(expanded_replica_locations(replica_infos_selected))
		replica_infos_complete = lfilter(lambda nn_nh_c: nn_nh_c[2], replica_infos_selected)
		if not replica_infos_complete:
			return empty_with_warning('Dataset block %r is not completely available at the selected locations!\nAvailable locations: %s', blockPath,
				str.join(', ', self._fmtLocations(replica_infos)))
		return list(expanded_replica_locations(replica_infos_complete))


	# Get dataset se list from PhEDex (perhaps concurrent with listFiles)
	def _getPhedexReplicas(self, blockPath, dictReplicas):
		dictReplicas[blockPath] = []
		for phedexBlock in self._pjrc.get(params = {'block': blockPath})['phedex']['block']:
			for replica in phedexBlock['replica']:
				dictReplicas[blockPath].append((replica['node'], replica.get('se'), replica['complete'] == 'y'))


	def getDatasets(self):
		if self._cache_dataset is None:
			self._cache_dataset = [self._datasetPath]
			if '*' in self._datasetPath:
				self._cache_dataset = list(self._getCMSDatasets(self._datasetPath))
				if not self._cache_dataset:
					raise DatasetError('No datasets selected by DBS wildcard %s !' % self._datasetPath)
		return self._cache_dataset


	def _getCMSBlocks(self, datasetPath, getSites):
		iter_blockname_selist = self._getCMSBlocksImpl(datasetPath, getSites)
		n_blocks = 0
		selected_blocks = False
		for (blockname, selist) in iter_blockname_selist:
			n_blocks += 1
			if (self._datasetBlock != 'all') and (str.split(blockname, '#')[1] != self._datasetBlock):
				continue
			selected_blocks = True
			yield (blockname, selist)
		if (n_blocks > 0) and not selected_blocks:
			raise DatasetError('Dataset %r contains %d blocks, but none were selected by %r' % (datasetPath, n_blocks, self._datasetBlock))


	def _fillCMSFiles(self, block, blockPath):
		lumi_used = False
		lumiDict = {}
		if self._lumi_query: # central lumi query
			lumiDict = self._getCMSLumisImpl(blockPath)
		fileList = []
		for (fileInfo, listLumi) in self._getCMSFilesImpl(blockPath, self.onlyValid, self._lumi_query):
			if lumiDict and not listLumi:
				listLumi = lumiDict.get(fileInfo[DataProvider.URL], [])
			if listLumi:
				(listLumiExt_Run, listLumiExt_Lumi) = ([], [])
				for (run, lumi_list) in sorted(listLumi):
					listLumiExt_Run.extend([run] * len(lumi_list))
					listLumiExt_Lumi.extend(lumi_list)
				fileInfo[DataProvider.Metadata] = [listLumiExt_Run, listLumiExt_Lumi]
				lumi_used = True
			fileList.append(fileInfo)
		if lumi_used:
			block.setdefault(DataProvider.Metadata, []).extend(['Runs', 'Lumi'])
		block[DataProvider.FileList] = fileList


	def _getCMSLumisImpl(self, blockPath):
		return None


	def _getGCBlocks(self, usePhedex):
		for datasetPath in self.getDatasets():
			counter = 0
			for (blockPath, replica_infos) in self._getCMSBlocks(datasetPath, getSites = not usePhedex):
				result = {}
				result[DataProvider.Dataset] = blockPath.split('#')[0]
				result[DataProvider.BlockName] = blockPath.split('#')[1]

				if usePhedex: # Start parallel phedex query
					dictReplicas = {}
					tPhedex = start_thread('Query phedex site info for %s' % blockPath, self._getPhedexReplicas, blockPath, dictReplicas)
					self._fillCMSFiles(result, blockPath)
					tPhedex.join()
					replica_infos = dictReplicas.get(blockPath)
				else:
					self._fillCMSFiles(result, blockPath)
				result[DataProvider.Locations] = self._processReplicas(blockPath, replica_infos)

				if len(result[DataProvider.FileList]):
					counter += 1
					yield result

			if counter == 0:
				raise DatasetError('Dataset %s does not contain any valid blocks!' % datasetPath)

Пример #11

Показать файл

Файл: geodb.py Проект: whahmad/grid-control

tau-cream.hep.tau.ac.il
tech-crm.hep.technion.ac.il
top.ucr.edu
umiss001.hep.olemiss.edu
uosaf0008.sscc.uos.ac.kr
uscms1.fltech-grid3.fit.edu
v6ce00.grid.hep.ph.ic.ac.uk
vserv13.hep.phy.cam.ac.uk
wipp-crm.weizmann.ac.il
"""

    import sys, time
    from grid_control.utils.webservice import JSONRestClient
    from python_compat import set, imap, lmap, lfilter, sorted

    jrc = JSONRestClient(
        url='http://maps.googleapis.com/maps/api/geocode/json')

    def geocode(loc):
        result = jrc.get(params={
            'address': str.join('.',
                                loc.split('.')[2:]),
            'sensor': 'false'
        })
        if 'Placemark' in result:
            result = lmap(
                lambda x:
                (x['address'], tuple(reversed(x['Point']['coordinates'][:2]))),
                result['Placemark'])
        return result

    counter = 0

Пример #12

Показать файл

Файл: provider_cms.py Проект: tolange/grid-control

class CMSBaseProvider(DataProvider):
    # required format: <dataset path>[@<instance>][#<block>]
    def __init__(self,
                 config,
                 datasource_name,
                 dataset_expr,
                 dataset_nick=None,
                 dataset_proc=None):
        dataset_config = config.change_view(
            default_on_change=TriggerResync(['datasets', 'parameters']))
        self._lumi_filter = dataset_config.get_lookup(
            ['lumi filter', '%s lumi filter' % datasource_name],
            default={},
            parser=parse_lumi_filter,
            strfun=str_lumi)
        if not self._lumi_filter.empty():
            config.set('%s processor' % datasource_name, 'LumiDataProcessor',
                       '+=')
        DataProvider.__init__(self, config, datasource_name, dataset_expr,
                              dataset_nick, dataset_proc)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = dataset_config.get_bool(
            ['lumi metadata',
             '%s lumi metadata' % datasource_name],
            default=not self._lumi_filter.empty())
        config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=')
        # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
        self._phedex_filter = dataset_config.get_filter(
            'phedex sites',
            '-* T1_*_Disk T2_* T3_*',
            default_matcher='BlackWhiteMatcher',
            default_filter='StrictListFilter')
        self._only_complete = dataset_config.get_bool('only complete sites',
                                                      True)
        self._only_valid = dataset_config.get_bool('only valid', True)
        self._allow_phedex = dataset_config.get_bool('allow phedex', True)
        self._location_format = dataset_config.get_enum(
            'location format', CMSLocationFormat, CMSLocationFormat.hostname)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )
        self._sitedb = SiteDB()

        dataset_expr_parts = split_opt(dataset_expr, '@#')
        (self._dataset_path, self._dataset_instance,
         self._dataset_block_selector) = dataset_expr_parts
        instance_default = dataset_config.get('dbs instance', '')
        self._dataset_instance = self._dataset_instance or instance_default
        if not self._dataset_instance:
            self._dataset_instance = 'prod/global'
        elif '/' not in self._dataset_instance:
            self._dataset_instance = 'prod/%s' % self._dataset_instance
        self._dataset_block_selector = self._dataset_block_selector or 'all'

    def check_splitter(self, splitter):
        # Check if splitter is valid
        if (DataSplitter.Skipped in splitter.get_needed_enums()
            ) and not self._lumi_filter.empty():
            self._log.debug(
                'Selected splitter %s is not compatible with active lumi filter!',
                splitter.__name__)
            self._log.warning(
                'Active lumi section filter forced selection of HybridSplitter'
            )
            return HybridSplitter
        return splitter

    def get_dataset_name_list(self):
        if self._cache_dataset is None:
            self._cache_dataset = [self._dataset_path]
            if '*' in self._dataset_path:
                activity = Activity('Getting dataset list for %s' %
                                    self._dataset_path)
                self._cache_dataset = list(
                    self._get_cms_dataset_list(self._dataset_path))
                if not self._cache_dataset:
                    raise DatasetError(
                        'No datasets selected by DBS wildcard %s !' %
                        self._dataset_path)
                activity.finish()
        return self._cache_dataset

    def get_query_interval(self):
        # Define how often the dataprovider can be queried automatically
        return 2 * 60 * 60  # 2 hour delay minimum

    def _fill_cms_fi_list(self, block, block_path):
        activity_fi = Activity('Getting file information')
        lumi_used = False
        lumi_info_dict = {}
        if self._lumi_query:  # central lumi query
            lumi_info_dict = self._get_cms_lumi_dict(block_path)
        fi_list = []
        for (fi,
             lumi_info_list) in self._iter_cms_files(block_path,
                                                     self._only_valid,
                                                     self._lumi_query):
            self._raise_on_abort()
            if lumi_info_dict and not lumi_info_list:
                lumi_info_list = lumi_info_dict.get(fi[DataProvider.URL], [])
            if lumi_info_list:
                (run_list_result, lumi_list_result) = ([], [])
                for (run, lumi_list) in sorted(lumi_info_list):
                    run_list_result.extend([run] * len(lumi_list))
                    lumi_list_result.extend(lumi_list)
                assert len(run_list_result) == len(lumi_list_result)
                fi[DataProvider.Metadata] = [run_list_result, lumi_list_result]
                lumi_used = True
            fi_list.append(fi)
        if lumi_used:
            block.setdefault(DataProvider.Metadata,
                             []).extend(['Runs', 'Lumi'])
        block[DataProvider.FileList] = fi_list
        activity_fi.finish()

    def _filter_cms_blockinfo_list(self, dataset_path, do_query_sites):
        iter_dataset_block_name_selist = self._iter_cms_blocks(
            dataset_path, do_query_sites)
        n_blocks = 0
        selected_blocks = False
        for (dataset_block_name, selist) in iter_dataset_block_name_selist:
            n_blocks += 1
            block_name = str.split(dataset_block_name, '#')[1]
            if (self._dataset_block_selector !=
                    'all') and (block_name != self._dataset_block_selector):
                continue
            selected_blocks = True
            yield (dataset_block_name, selist)
        if (n_blocks > 0) and not selected_blocks:
            raise DatasetError(
                'Dataset %r contains %d blocks, but none were selected by %r' %
                (dataset_path, n_blocks, self._dataset_block_selector))

    def _get_cms_dataset_list(self, dataset_path):
        raise AbstractError

    def _get_cms_lumi_dict(self, block_path):
        return None

    def _get_gc_block_list(self, use_phedex):
        dataset_name_list = self.get_dataset_name_list()
        progress_ds = ProgressActivity('Getting dataset',
                                       len(dataset_name_list))
        for dataset_idx, dataset_path in enumerate(dataset_name_list):
            progress_ds.update_progress(dataset_idx,
                                        msg='Getting dataset %s' %
                                        dataset_path)
            counter = 0
            blockinfo_list = list(
                self._filter_cms_blockinfo_list(dataset_path, not use_phedex))
            progress_block = ProgressActivity('Getting block information',
                                              len(blockinfo_list))
            for (block_path, replica_infos) in blockinfo_list:
                result = {}
                result[DataProvider.Dataset] = block_path.split('#')[0]
                result[DataProvider.BlockName] = block_path.split('#')[1]
                progress_block.update_progress(
                    counter,
                    msg='Getting block information for ' +
                    result[DataProvider.BlockName])

                if use_phedex and self._allow_phedex:  # Start parallel phedex query
                    replicas_dict = {}
                    phedex_thread = start_thread(
                        'Query phedex site info for %s' % block_path,
                        self._get_phedex_replica_list, block_path,
                        replicas_dict)
                    self._fill_cms_fi_list(result, block_path)
                    phedex_thread.join()
                    replica_infos = replicas_dict.get(block_path)
                else:
                    self._fill_cms_fi_list(result, block_path)
                result[DataProvider.Locations] = self._process_replica_list(
                    block_path, replica_infos)

                if len(result[DataProvider.FileList]):
                    counter += 1
                    yield result
            progress_block.finish()

            if counter == 0:
                raise DatasetError(
                    'Dataset %s does not contain any valid blocks!' %
                    dataset_path)
        progress_ds.finish()

    def _get_phedex_replica_list(self, block_path, replicas_dict):
        activity_fi = Activity('Getting file replica information from PhEDex')
        # Get dataset se list from PhEDex (perhaps concurrent with get_dbs_file_list)
        replicas_dict[block_path] = []
        for phedex_block in self._pjrc.get(
                params={'block': block_path})['phedex']['block']:
            for replica in phedex_block['replica']:
                replica_info = (replica['node'], replica.get('se'),
                                replica['complete'] == 'y')
                replicas_dict[block_path].append(replica_info)
        activity_fi.finish()

    def _iter_cms_blocks(self, dataset_path, do_query_sites):
        raise AbstractError

    def _iter_cms_files(self, block_path, query_only_valid, query_lumi):
        raise AbstractError

    def _iter_formatted_locations(self, replica_infos):
        for replica_info in replica_infos:
            (_, _, completed) = replica_info
            if completed:
                for entry in self._iter_replica_locations(replica_info):
                    yield entry
            else:
                for entry in self._iter_replica_locations(replica_info):
                    yield '(%s)' % entry

    def _iter_replica_locations(self, replica_info):
        (name_node, name_hostname, _) = replica_info
        if self._location_format == CMSLocationFormat.siteDB:
            yield name_node
        else:
            if name_hostname is not None:
                name_hostnames = [name_hostname]
            else:
                name_hostnames = self._sitedb.cms_name_to_se(name_node)
            for name_hostname in name_hostnames:
                if self._location_format == CMSLocationFormat.hostname:
                    yield name_hostname
                else:
                    yield '%s/%s' % (name_node, name_hostname)

    def _process_replica_list(self, block_path, replica_infos):
        def _empty_with_warning(error_msg, *args):
            self._log.warning('Dataset block %r ' + error_msg, block_path,
                              *args)
            return []

        def _expanded_replica_locations(replica_infos):
            for replica_info in replica_infos:
                for entry in self._iter_replica_locations(replica_info):
                    yield entry

        if not replica_infos:
            return _empty_with_warning('has no replica information!')
        replica_infos_selected = self._phedex_filter.filter_list(
            replica_infos, key=itemgetter(0))
        if not replica_infos_selected:
            return _empty_with_warning(
                'is not available at the selected locations!\n' +
                'Available locations: %s',
                str.join(', ', self._iter_formatted_locations(replica_infos)))
        if not self._only_complete:
            return list(_expanded_replica_locations(replica_infos_selected))
        replica_infos_complete = lfilter(lambda nn_nh_c: nn_nh_c[2],
                                         replica_infos_selected)
        if not replica_infos_complete:
            return _empty_with_warning(
                'is not completely available at the selected locations!\n' +
                'Available locations: %s',
                str.join(', ', self._iter_formatted_locations(replica_infos)))
        return list(_expanded_replica_locations(replica_infos_complete))

Пример #13

Показать файл

Файл: geodb.py Проект: jolange/grid-control

 def __init__(self):
     from grid_control.utils.webservice import JSONRestClient
     self._jrc = JSONRestClient(
         url='http://maps.googleapis.com/maps/api/geocode/json')

Пример #14

Показать файл

Файл: geodb.py Проект: jolange/grid-control

class GeoResolver(object):
    def __init__(self):
        from grid_control.utils.webservice import JSONRestClient
        self._jrc = JSONRestClient(
            url='http://maps.googleapis.com/maps/api/geocode/json')

    def run(self):
        # output of lcg-infosites ce | while read X X X X X CE; do echo $CE; done
        #   | cut -d "/" -f 1 | cut -d ":" -f 1 | sort | uniq
        ce_list_str = """alcyone-cms.grid.helsinki.fi
			alice23.spbu.ru
			arc-ce01.gridpp.rl.ac.uk
			arc-ce02.gridpp.rl.ac.uk
			arc-ce03.gridpp.rl.ac.uk
			argoce01.na.infn.it
			atlasce1.lnf.infn.it
			atlasce2.lnf.infn.it
			atlasce3.lnf.infn.it
			atlas-cream01.na.infn.it
			atlas-cream02.na.infn.it
			boce.bo.infn.it
			bonner-grid.rice.edu
			brux3.hep.brown.edu
			cale.uniandes.edu.co
			carter-osg.rcac.purdue.edu
			cccreamceli07.in2p3.fr
			cccreamceli08.in2p3.fr
			cce.ihep.ac.cn
			ce0002.m45.ihep.su
			ce0004.m45.ihep.su
			ce01.cmsaf.mit.edu
			ce01.jinr-t1.ru
			ce01-lcg.cr.cnaf.infn.it
			ce-01.roma3.infn.it
			ce01.tier2.hep.manchester.ac.uk
			ce02.cmsaf.mit.edu
			ce02.jinr-t1.ru
			ce02.ngcc.acad.bg
			ce02.tier2.hep.manchester.ac.uk
			ce04-lcg.cr.cnaf.infn.it
			ce05.esc.qmul.ac.uk
			ce05-lcg.cr.cnaf.infn.it
			ce05.ncg.ingrid.pt
			ce06.esc.qmul.ac.uk
			ce06-lcg.cr.cnaf.infn.it
			ce07.esc.qmul.ac.uk
			ce07-lcg.cr.cnaf.infn.it
			ce08-lcg.cr.cnaf.infn.it
			ce101.grid.ucy.ac.cy
			ce1.accre.vanderbilt.edu
			ce1.dur.scotgrid.ac.uk
			ce1.grid.lebedev.ru
			ce1.ts.infn.it
			ce201.cern.ch
			ce202.cern.ch
			ce203.cern.ch
			ce204.cern.ch
			ce205.cern.ch
			ce206.cern.ch
			ce207.cern.ch
			ce208.cern.ch
			ce2.accre.vanderbilt.edu
			ce2.particles.ipm.ac.ir
			ce301.cern.ch
			ce302.cern.ch
			ce3.ppgrid1.rhul.ac.uk
			ce401.cern.ch
			ce402.cern.ch
			ce403.cern.ch
			ce404.cern.ch
			ce405.cern.ch
			ce406.cern.ch
			ce407.cern.ch
			ce408.cern.ch
			ce64.ipb.ac.rs
			ce6.grid.icm.edu.pl
			ce7.glite.ecdf.ed.ac.uk
			ce9.grid.icm.edu.pl
			cebo-t3-01.cr.cnaf.infn.it
			cebo-t3-02.cr.cnaf.infn.it
			ce.cis.gov.pl
			cecream.ca.infn.it
			ce.fesb.egi.cro-ngi.hr
			ce.grid.unesp.br
			ce.irb.egi.cro-ngi.hr
			ceprod05.grid.hep.ph.ic.ac.uk
			ceprod06.grid.hep.ph.ic.ac.uk
			ceprod07.grid.hep.ph.ic.ac.uk
			ceprod08.grid.hep.ph.ic.ac.uk
			cert-37.pd.infn.it
			ce.scope.unina.it
			ce.srce.egi.cro-ngi.hr
			cetest01.grid.hep.ph.ic.ac.uk
			cetest02.grid.hep.ph.ic.ac.uk
			ce.ulakbim.gov.tr
			cit-gatekeeper2.ultralight.org
			cit-gatekeeper.ultralight.org
			cluster118.knu.ac.kr
			cluster50.knu.ac.kr
			cms-0.mps.ohio-state.edu
			cmsce01.na.infn.it
			cmsgrid01.hep.wisc.edu
			cmsgrid02.hep.wisc.edu
			cms-grid0.hep.uprm.edu
			cmsosgce2.fnal.gov
			cmsosgce4.fnal.gov
			cmsosgce.fnal.gov
			cmsrm-cream01.roma1.infn.it
			cmsrm-cream02.roma1.infn.it
			cmsrm-cream03.roma1.infn.it
			cmstest1.rcac.purdue.edu
			cms.tier3.ucdavis.edu
			conte-osg.rcac.purdue.edu
			cox01.grid.metu.edu.tr
			cr1.ipp.acad.bg
			cream01.grid.auth.gr
			cream01.grid.sinica.edu.tw
			cream01.grid.uoi.gr
			cream01.kallisto.hellasgrid.gr
			cream01.lcg.cscs.ch
			cream02.grid.cyf-kr.edu.pl
			cream02.iihe.ac.be
			cream02.lcg.cscs.ch
			cream03.lcg.cscs.ch
			cream04.grid.sinica.edu.tw
			cream04.lcg.cscs.ch
			cream05.grid.sinica.edu.tw
			cream2.ppgrid1.rhul.ac.uk
			cream3.hep.kbfi.ee
			cream4.hep.kbfi.ee
			cream.afroditi.hellasgrid.gr
			cream-ce01.ariagni.hellasgrid.gr
			cream-ce01.indiacms.res.in
			cream-ce01.marie.hellasgrid.gr
			cream-ce02.cat.cbpf.br
			creamce02.ciemat.es
			cream-ce02.marie.hellasgrid.gr
			creamce03.ciemat.es
			creamce1.itep.ru
			cream-ce-2.ba.infn.it
			cream-ce-4.ba.infn.it
			cream-ce.cat.cbpf.br
			cream-ce.grid.atomki.hu
			creamce.hephy.oeaw.ac.at
			creamce.inula.man.poznan.pl
			cream-ce.kipt.kharkov.ua
			cream-ce.pg.infn.it
			creamce.reef.man.poznan.pl
			cream.grid.cyf-kr.edu.pl
			cream.ipb.ac.rs
			dc2-grid-66.brunel.ac.uk
			dc2-grid-68.brunel.ac.uk
			dc2-grid-70.brunel.ac.uk
			dwarf.wcss.wroc.pl
			earth.crc.nd.edu
			epgr02.ph.bham.ac.uk
			erbium.lsr.nectec.or.th
			f-cream01.grid.sinica.edu.tw
			f-cream04.grid.sinica.edu.tw
			fiupg.hep.fiu.edu
			foam.grid.kiae.ru
			fornax-ce2.itwm.fhg.de
			fornax-ce.itwm.fhg.de
			grcreamce01.inr.troitsk.ru
			grid001.ics.forth.gr
			grid002.jet.efda.org
			grid012.ct.infn.it
			grid01.physics.uoi.gr
			grid0.fe.infn.it
			grid106.kfki.hu
			grid107.kfki.hu
			grid109.kfki.hu
			grid129.sinp.msu.ru
			grid36.lal.in2p3.fr
			grid72.phy.ncu.edu.tw
			gridce01.ifca.es
			gridce03.ifca.es
			gridce0.pi.infn.it
			gridce1.pi.infn.it
			grid-ce2.physik.rwth-aachen.de
			gridce2.pi.infn.it
			gridce3.pi.infn.it
			gridce4.pi.infn.it
			gridce.ilc.cnr.it
			grid-ce.physik.rwth-aachen.de
			grid-cr0.desy.de
			grid-cr1.desy.de
			grid-cr2.desy.de
			grid-cr3.desy.de
			grid-cr4.desy.de
			gridgk01.racf.bnl.gov
			gridgk02.racf.bnl.gov
			gridgk03.racf.bnl.gov
			gridgk04.racf.bnl.gov
			gridgk05.racf.bnl.gov
			gridgk06.racf.bnl.gov
			gridgk08.racf.bnl.gov
			gridtest02.racf.bnl.gov
			gridvm03.roma2.infn.it
			grisuce.scope.unina.it
			gt3.pnpi.nw.ru
			hansen-osg.rcac.purdue.edu
			hepcms-0.umd.edu
			hepgrid10.ph.liv.ac.uk
			hepgrid5.ph.liv.ac.uk
			hepgrid6.ph.liv.ac.uk
			hepgrid97.ph.liv.ac.uk
			hephygr.oeaw.ac.at
			heposg01.colorado.edu
			hurr.tamu.edu
			ingrid.cism.ucl.ac.be
			jade-cms.hip.fi
			juk.nikhef.nl
			kalkan1.ulakbim.gov.tr
			khaldun.biruni.upm.my
			klomp.nikhef.nl
			kodiak-ce.baylor.edu
			lcg18.sinp.msu.ru
			lcg52.sinp.msu.ru
			lcgce01.phy.bris.ac.uk
			lcgce03.phy.bris.ac.uk
			lcgce04.phy.bris.ac.uk
			lcgce12.jinr.ru
			lcgce1.shef.ac.uk
			lcgce21.jinr.ru
			lcgce2.shef.ac.uk
			lcg-cream.ifh.de
			llrcream.in2p3.fr
			lpnhe-cream.in2p3.fr
			lyogrid07.in2p3.fr
			magic.cse.buffalo.edu
			mwt2-gk.campuscluster.illinois.edu
			ndcms.crc.nd.edu
			node01-03.usm.renam.md
			node01-04.grid.renam.md
			node05-02.imi.renam.md
			node74.datagrid.cea.fr
			nodeslab-0002.nlab.tb.hiit.fi
			ntugrid2.phys.ntu.edu.tw
			ntugrid5.phys.ntu.edu.tw
			nys1.cac.cornell.edu
			osgce.hepgrid.uerj.br
			osg-ce.sprace.org.br
			osg-gk.mwt2.org
			osg-gw-6.t2.ucsd.edu
			osg-gw-7.t2.ucsd.edu
			osg.hpc.ufl.edu
			osg-nemo-ce.phys.uwm.edu
			osg.rcac.purdue.edu
			osgserv01.slac.stanford.edu
			osgserv02.slac.stanford.edu
			ouhep0.nhn.ou.edu
			pamelace01.na.infn.it
			pcncp04.ncp.edu.pk
			pcncp05.ncp.edu.pk
			pre7230.datagrid.cea.fr
			prod-ce-01.pd.infn.it
			razi.biruni.upm.my
			recasce01.na.infn.it
			red-gw1.unl.edu
			red-gw2.unl.edu
			red.unl.edu
			rossmann-osg.rcac.purdue.edu
			sbgce2.in2p3.fr
			snf-189278.vm.okeanos.grnet.gr
			snf-458754.vm.okeanos.grnet.gr
			spacina-ce.scope.unina.it
			svr009.gla.scotgrid.ac.uk
			svr010.gla.scotgrid.ac.uk
			svr011.gla.scotgrid.ac.uk
			svr014.gla.scotgrid.ac.uk
			t2arc01.physics.ox.ac.uk
			t2-ce-01.lnl.infn.it
			t2-ce-01.to.infn.it
			t2-ce-02.lnl.infn.it
			t2ce02.physics.ox.ac.uk
			t2-ce-03.lnl.infn.it
			t2-ce-04.lnl.infn.it
			t2-ce-04.mi.infn.it
			t2ce04.physics.ox.ac.uk
			t2-ce-05.mi.infn.it
			t2-ce-06.lnl.infn.it
			t2ce06.physics.ox.ac.uk
			t3serv007.mit.edu
			tau-cream.hep.tau.ac.il
			tech-crm.hep.technion.ac.il
			top.ucr.edu
			umiss001.hep.olemiss.edu
			uosaf0008.sscc.uos.ac.kr
			uscms1.fltech-grid3.fit.edu
			v6ce00.grid.hep.ph.ic.ac.uk
			vserv13.hep.phy.cam.ac.uk
			wipp-crm.weizmann.ac.il
		"""
        import sys, time
        from python_compat import set, imap, lfilter, sorted

        counter = 0
        used = set()
        for line in imap(str.strip, ce_list_str.splitlines()):
            time.sleep(0.2)
            match = get_geo_match(line)
            if not match:
                counter += 1
                sys.stderr.write('\t%r: %r\n' % (line, self._geocode(line)))
            else:
                used.add(match)
        sys.stderr.write('%s unmatched entries\n' % counter)
        sys.stderr.write('unused entries:\n%s\n' %
                         repr(lfilter(lambda x: x not in used, _GEO_DICT)))

        sys.stdout.write('_GEO_DICT = {\n')
        geo_dict_key_list = sorted(
            _GEO_DICT.keys(),
            key=lambda x: str.join('.', reversed(x.split('.'))))
        for entry in geo_dict_key_list:
            sys.stdout.write('\t%r: (%.6f, %.6f),\n' %
                             (entry, _GEO_DICT[entry][0], _GEO_DICT[entry][1]))
        sys.stdout.write('}\n')

    def _geocode(self, loc):
        result = self._jrc.get(params={
            'address': str.join('.',
                                loc.split('.')[2:]),
            'sensor': 'false'
        })
        if 'Placemark' in result:  # unfold placemake entries
            place_list = []
            for entry in result['Placemark']:
                place_list.append(
                    (entry['address'],
                     tuple(reversed(entry['Point']['coordinates'][:2]))))
            return place_list
        return result

Пример #15

Показать файл

Файл: geodb.py Проект: grid-control/grid-control

class GeoResolver(object):
	def __init__(self):
		from grid_control.utils.webservice import JSONRestClient
		self._jrc = JSONRestClient(url='http://maps.googleapis.com/maps/api/geocode/json')

	def run(self):
		# output of lcg-infosites ce | while read X X X X X CE; do echo $CE; done
		#   | cut -d "/" -f 1 | cut -d ":" -f 1 | sort | uniq
		ce_list_str = """alcyone-cms.grid.helsinki.fi
			alice23.spbu.ru
			arc-ce01.gridpp.rl.ac.uk
			arc-ce02.gridpp.rl.ac.uk
			arc-ce03.gridpp.rl.ac.uk
			argoce01.na.infn.it
			atlasce1.lnf.infn.it
			atlasce2.lnf.infn.it
			atlasce3.lnf.infn.it
			atlas-cream01.na.infn.it
			atlas-cream02.na.infn.it
			boce.bo.infn.it
			bonner-grid.rice.edu
			brux3.hep.brown.edu
			cale.uniandes.edu.co
			carter-osg.rcac.purdue.edu
			cccreamceli07.in2p3.fr
			cccreamceli08.in2p3.fr
			cce.ihep.ac.cn
			ce0002.m45.ihep.su
			ce0004.m45.ihep.su
			ce01.cmsaf.mit.edu
			ce01.jinr-t1.ru
			ce01-lcg.cr.cnaf.infn.it
			ce-01.roma3.infn.it
			ce01.tier2.hep.manchester.ac.uk
			ce02.cmsaf.mit.edu
			ce02.jinr-t1.ru
			ce02.ngcc.acad.bg
			ce02.tier2.hep.manchester.ac.uk
			ce04-lcg.cr.cnaf.infn.it
			ce05.esc.qmul.ac.uk
			ce05-lcg.cr.cnaf.infn.it
			ce05.ncg.ingrid.pt
			ce06.esc.qmul.ac.uk
			ce06-lcg.cr.cnaf.infn.it
			ce07.esc.qmul.ac.uk
			ce07-lcg.cr.cnaf.infn.it
			ce08-lcg.cr.cnaf.infn.it
			ce101.grid.ucy.ac.cy
			ce1.accre.vanderbilt.edu
			ce1.dur.scotgrid.ac.uk
			ce1.grid.lebedev.ru
			ce1.ts.infn.it
			ce201.cern.ch
			ce202.cern.ch
			ce203.cern.ch
			ce204.cern.ch
			ce205.cern.ch
			ce206.cern.ch
			ce207.cern.ch
			ce208.cern.ch
			ce2.accre.vanderbilt.edu
			ce2.particles.ipm.ac.ir
			ce301.cern.ch
			ce302.cern.ch
			ce3.ppgrid1.rhul.ac.uk
			ce401.cern.ch
			ce402.cern.ch
			ce403.cern.ch
			ce404.cern.ch
			ce405.cern.ch
			ce406.cern.ch
			ce407.cern.ch
			ce408.cern.ch
			ce64.ipb.ac.rs
			ce6.grid.icm.edu.pl
			ce7.glite.ecdf.ed.ac.uk
			ce9.grid.icm.edu.pl
			cebo-t3-01.cr.cnaf.infn.it
			cebo-t3-02.cr.cnaf.infn.it
			ce.cis.gov.pl
			cecream.ca.infn.it
			ce.fesb.egi.cro-ngi.hr
			ce.grid.unesp.br
			ce.irb.egi.cro-ngi.hr
			ceprod05.grid.hep.ph.ic.ac.uk
			ceprod06.grid.hep.ph.ic.ac.uk
			ceprod07.grid.hep.ph.ic.ac.uk
			ceprod08.grid.hep.ph.ic.ac.uk
			cert-37.pd.infn.it
			ce.scope.unina.it
			ce.srce.egi.cro-ngi.hr
			cetest01.grid.hep.ph.ic.ac.uk
			cetest02.grid.hep.ph.ic.ac.uk
			ce.ulakbim.gov.tr
			cit-gatekeeper2.ultralight.org
			cit-gatekeeper.ultralight.org
			cluster118.knu.ac.kr
			cluster50.knu.ac.kr
			cms-0.mps.ohio-state.edu
			cmsce01.na.infn.it
			cmsgrid01.hep.wisc.edu
			cmsgrid02.hep.wisc.edu
			cms-grid0.hep.uprm.edu
			cmsosgce2.fnal.gov
			cmsosgce4.fnal.gov
			cmsosgce.fnal.gov
			cmsrm-cream01.roma1.infn.it
			cmsrm-cream02.roma1.infn.it
			cmsrm-cream03.roma1.infn.it
			cmstest1.rcac.purdue.edu
			cms.tier3.ucdavis.edu
			conte-osg.rcac.purdue.edu
			cox01.grid.metu.edu.tr
			cr1.ipp.acad.bg
			cream01.grid.auth.gr
			cream01.grid.sinica.edu.tw
			cream01.grid.uoi.gr
			cream01.kallisto.hellasgrid.gr
			cream01.lcg.cscs.ch
			cream02.grid.cyf-kr.edu.pl
			cream02.iihe.ac.be
			cream02.lcg.cscs.ch
			cream03.lcg.cscs.ch
			cream04.grid.sinica.edu.tw
			cream04.lcg.cscs.ch
			cream05.grid.sinica.edu.tw
			cream2.ppgrid1.rhul.ac.uk
			cream3.hep.kbfi.ee
			cream4.hep.kbfi.ee
			cream.afroditi.hellasgrid.gr
			cream-ce01.ariagni.hellasgrid.gr
			cream-ce01.indiacms.res.in
			cream-ce01.marie.hellasgrid.gr
			cream-ce02.cat.cbpf.br
			creamce02.ciemat.es
			cream-ce02.marie.hellasgrid.gr
			creamce03.ciemat.es
			creamce1.itep.ru
			cream-ce-2.ba.infn.it
			cream-ce-4.ba.infn.it
			cream-ce.cat.cbpf.br
			cream-ce.grid.atomki.hu
			creamce.hephy.oeaw.ac.at
			creamce.inula.man.poznan.pl
			cream-ce.kipt.kharkov.ua
			cream-ce.pg.infn.it
			creamce.reef.man.poznan.pl
			cream.grid.cyf-kr.edu.pl
			cream.ipb.ac.rs
			dc2-grid-66.brunel.ac.uk
			dc2-grid-68.brunel.ac.uk
			dc2-grid-70.brunel.ac.uk
			dwarf.wcss.wroc.pl
			earth.crc.nd.edu
			epgr02.ph.bham.ac.uk
			erbium.lsr.nectec.or.th
			f-cream01.grid.sinica.edu.tw
			f-cream04.grid.sinica.edu.tw
			fiupg.hep.fiu.edu
			foam.grid.kiae.ru
			fornax-ce2.itwm.fhg.de
			fornax-ce.itwm.fhg.de
			grcreamce01.inr.troitsk.ru
			grid001.ics.forth.gr
			grid002.jet.efda.org
			grid012.ct.infn.it
			grid01.physics.uoi.gr
			grid0.fe.infn.it
			grid106.kfki.hu
			grid107.kfki.hu
			grid109.kfki.hu
			grid129.sinp.msu.ru
			grid36.lal.in2p3.fr
			grid72.phy.ncu.edu.tw
			gridce01.ifca.es
			gridce03.ifca.es
			gridce0.pi.infn.it
			gridce1.pi.infn.it
			grid-ce2.physik.rwth-aachen.de
			gridce2.pi.infn.it
			gridce3.pi.infn.it
			gridce4.pi.infn.it
			gridce.ilc.cnr.it
			grid-ce.physik.rwth-aachen.de
			grid-cr0.desy.de
			grid-cr1.desy.de
			grid-cr2.desy.de
			grid-cr3.desy.de
			grid-cr4.desy.de
			gridgk01.racf.bnl.gov
			gridgk02.racf.bnl.gov
			gridgk03.racf.bnl.gov
			gridgk04.racf.bnl.gov
			gridgk05.racf.bnl.gov
			gridgk06.racf.bnl.gov
			gridgk08.racf.bnl.gov
			gridtest02.racf.bnl.gov
			gridvm03.roma2.infn.it
			grisuce.scope.unina.it
			gt3.pnpi.nw.ru
			hansen-osg.rcac.purdue.edu
			hepcms-0.umd.edu
			hepgrid10.ph.liv.ac.uk
			hepgrid5.ph.liv.ac.uk
			hepgrid6.ph.liv.ac.uk
			hepgrid97.ph.liv.ac.uk
			hephygr.oeaw.ac.at
			heposg01.colorado.edu
			hurr.tamu.edu
			ingrid.cism.ucl.ac.be
			jade-cms.hip.fi
			juk.nikhef.nl
			kalkan1.ulakbim.gov.tr
			khaldun.biruni.upm.my
			klomp.nikhef.nl
			kodiak-ce.baylor.edu
			lcg18.sinp.msu.ru
			lcg52.sinp.msu.ru
			lcgce01.phy.bris.ac.uk
			lcgce03.phy.bris.ac.uk
			lcgce04.phy.bris.ac.uk
			lcgce12.jinr.ru
			lcgce1.shef.ac.uk
			lcgce21.jinr.ru
			lcgce2.shef.ac.uk
			lcg-cream.ifh.de
			llrcream.in2p3.fr
			lpnhe-cream.in2p3.fr
			lyogrid07.in2p3.fr
			magic.cse.buffalo.edu
			mwt2-gk.campuscluster.illinois.edu
			ndcms.crc.nd.edu
			node01-03.usm.renam.md
			node01-04.grid.renam.md
			node05-02.imi.renam.md
			node74.datagrid.cea.fr
			nodeslab-0002.nlab.tb.hiit.fi
			ntugrid2.phys.ntu.edu.tw
			ntugrid5.phys.ntu.edu.tw
			nys1.cac.cornell.edu
			osgce.hepgrid.uerj.br
			osg-ce.sprace.org.br
			osg-gk.mwt2.org
			osg-gw-6.t2.ucsd.edu
			osg-gw-7.t2.ucsd.edu
			osg.hpc.ufl.edu
			osg-nemo-ce.phys.uwm.edu
			osg.rcac.purdue.edu
			osgserv01.slac.stanford.edu
			osgserv02.slac.stanford.edu
			ouhep0.nhn.ou.edu
			pamelace01.na.infn.it
			pcncp04.ncp.edu.pk
			pcncp05.ncp.edu.pk
			pre7230.datagrid.cea.fr
			prod-ce-01.pd.infn.it
			razi.biruni.upm.my
			recasce01.na.infn.it
			red-gw1.unl.edu
			red-gw2.unl.edu
			red.unl.edu
			rossmann-osg.rcac.purdue.edu
			sbgce2.in2p3.fr
			snf-189278.vm.okeanos.grnet.gr
			snf-458754.vm.okeanos.grnet.gr
			spacina-ce.scope.unina.it
			svr009.gla.scotgrid.ac.uk
			svr010.gla.scotgrid.ac.uk
			svr011.gla.scotgrid.ac.uk
			svr014.gla.scotgrid.ac.uk
			t2arc01.physics.ox.ac.uk
			t2-ce-01.lnl.infn.it
			t2-ce-01.to.infn.it
			t2-ce-02.lnl.infn.it
			t2ce02.physics.ox.ac.uk
			t2-ce-03.lnl.infn.it
			t2-ce-04.lnl.infn.it
			t2-ce-04.mi.infn.it
			t2ce04.physics.ox.ac.uk
			t2-ce-05.mi.infn.it
			t2-ce-06.lnl.infn.it
			t2ce06.physics.ox.ac.uk
			t3serv007.mit.edu
			tau-cream.hep.tau.ac.il
			tech-crm.hep.technion.ac.il
			top.ucr.edu
			umiss001.hep.olemiss.edu
			uosaf0008.sscc.uos.ac.kr
			uscms1.fltech-grid3.fit.edu
			v6ce00.grid.hep.ph.ic.ac.uk
			vserv13.hep.phy.cam.ac.uk
			wipp-crm.weizmann.ac.il
		"""
		import sys, time
		from python_compat import set, imap, lfilter, sorted

		counter = 0
		used = set()
		for line in imap(str.strip, ce_list_str.splitlines()):
			time.sleep(0.2)
			match = get_geo_match(line)
			if not match:
				counter += 1
				sys.stderr.write('\t%r: %r\n' % (line, self._geocode(line)))
			else:
				used.add(match)
		sys.stderr.write('%s unmatched entries\n' % counter)
		sys.stderr.write('unused entries:\n%s\n' % repr(lfilter(lambda x: x not in used, _GEO_DICT)))

		sys.stdout.write('_GEO_DICT = {\n')
		geo_dict_key_list = sorted(_GEO_DICT.keys(), key=lambda x: str.join('.', reversed(x.split('.'))))
		for entry in geo_dict_key_list:
			sys.stdout.write('\t%r: (%.6f, %.6f),\n' % (entry, _GEO_DICT[entry][0], _GEO_DICT[entry][1]))
		sys.stdout.write('}\n')

	def _geocode(self, loc):
		result = self._jrc.get(params={'address': str.join('.', loc.split('.')[2:]), 'sensor': 'false'})
		if 'Placemark' in result:  # unfold placemake entries
			place_list = []
			for entry in result['Placemark']:
				place_list.append((entry['address'], tuple(reversed(entry['Point']['coordinates'][:2]))))
			return place_list
		return result

Пример #16

Показать файл

Файл: provider_cms.py Проект: grid-control/grid-control

class CMSBaseProvider(DataProvider):
	# required format: <dataset path>[@<instance>][#<block>]
	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		dataset_config = config.change_view(default_on_change=TriggerResync(['datasets', 'parameters']))
		self._lumi_filter = dataset_config.get_lookup(['lumi filter', '%s lumi filter' % datasource_name],
			default={}, parser=parse_lumi_filter, strfun=str_lumi)
		if not self._lumi_filter.empty():
			config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = dataset_config.get_bool(
			['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty())
		config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=')
		# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
		self._phedex_filter = dataset_config.get_filter('phedex sites', '-* T1_*_Disk T2_* T3_*',
			default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter')
		self._only_complete = dataset_config.get_bool('only complete sites', True)
		self._only_valid = dataset_config.get_bool('only valid', True)
		self._location_format = dataset_config.get_enum('location format',
			CMSLocationFormat, CMSLocationFormat.hostname)
		self._pjrc = JSONRestClient(url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
		self._sitedb = SiteDB()

		dataset_expr_parts = split_opt(dataset_expr, '@#')
		(self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts
		instance_default = dataset_config.get('dbs instance', '')
		self._dataset_instance = self._dataset_instance or instance_default
		if not self._dataset_instance:
			self._dataset_instance = 'prod/global'
		elif '/' not in self._dataset_instance:
			self._dataset_instance = 'prod/%s' % self._dataset_instance
		self._dataset_block_selector = self._dataset_block_selector or 'all'

	def check_splitter(self, splitter):
		# Check if splitter is valid
		if (DataSplitter.Skipped in splitter.get_needed_enums()) and not self._lumi_filter.empty():
			self._log.debug('Selected splitter %s is not compatible with active lumi filter!',
				splitter.__name__)
			self._log.warning('Active lumi section filter forced selection of HybridSplitter')
			return HybridSplitter
		return splitter

	def get_dataset_name_list(self):
		if self._cache_dataset is None:
			self._cache_dataset = [self._dataset_path]
			if '*' in self._dataset_path:
				activity = Activity('Getting dataset list for %s' % self._dataset_path)
				self._cache_dataset = list(self._get_cms_dataset_list(self._dataset_path))
				if not self._cache_dataset:
					raise DatasetError('No datasets selected by DBS wildcard %s !' % self._dataset_path)
				activity.finish()
		return self._cache_dataset

	def get_query_interval(self):
		# Define how often the dataprovider can be queried automatically
		return 2 * 60 * 60  # 2 hour delay minimum

	def _fill_cms_fi_list(self, block, block_path):
		activity_fi = Activity('Getting file information')
		lumi_used = False
		lumi_info_dict = {}
		if self._lumi_query:  # central lumi query
			lumi_info_dict = self._get_cms_lumi_dict(block_path)
		fi_list = []
		for (fi, lumi_info_list) in self._iter_cms_files(block_path, self._only_valid, self._lumi_query):
			self._raise_on_abort()
			if lumi_info_dict and not lumi_info_list:
				lumi_info_list = lumi_info_dict.get(fi[DataProvider.URL], [])
			if lumi_info_list:
				(run_list_result, lumi_list_result) = ([], [])
				for (run, lumi_list) in sorted(lumi_info_list):
					run_list_result.extend([run] * len(lumi_list))
					lumi_list_result.extend(lumi_list)
				assert len(run_list_result) == len(lumi_list_result)
				fi[DataProvider.Metadata] = [run_list_result, lumi_list_result]
				lumi_used = True
			fi_list.append(fi)
		if lumi_used:
			block.setdefault(DataProvider.Metadata, []).extend(['Runs', 'Lumi'])
		block[DataProvider.FileList] = fi_list
		activity_fi.finish()

	def _filter_cms_blockinfo_list(self, dataset_path, do_query_sites):
		iter_dataset_block_name_selist = self._iter_cms_blocks(dataset_path, do_query_sites)
		n_blocks = 0
		selected_blocks = False
		for (dataset_block_name, selist) in iter_dataset_block_name_selist:
			n_blocks += 1
			block_name = str.split(dataset_block_name, '#')[1]
			if (self._dataset_block_selector != 'all') and (block_name != self._dataset_block_selector):
				continue
			selected_blocks = True
			yield (dataset_block_name, selist)
		if (n_blocks > 0) and not selected_blocks:
			raise DatasetError('Dataset %r contains %d blocks, but none were selected by %r' % (
				dataset_path, n_blocks, self._dataset_block_selector))

	def _get_cms_dataset_list(self, dataset_path):
		raise AbstractError

	def _get_cms_lumi_dict(self, block_path):
		return None

	def _get_gc_block_list(self, use_phedex):
		dataset_name_list = self.get_dataset_name_list()
		progress_ds = ProgressActivity('Getting dataset', len(dataset_name_list))
		for dataset_idx, dataset_path in enumerate(dataset_name_list):
			progress_ds.update_progress(dataset_idx, msg='Getting dataset %s' % dataset_path)
			counter = 0
			blockinfo_list = list(self._filter_cms_blockinfo_list(dataset_path, not use_phedex))
			progress_block = ProgressActivity('Getting block information', len(blockinfo_list))
			for (block_path, replica_infos) in blockinfo_list:
				result = {}
				result[DataProvider.Dataset] = block_path.split('#')[0]
				result[DataProvider.BlockName] = block_path.split('#')[1]
				progress_block.update_progress(counter,
					msg='Getting block information for ' + result[DataProvider.BlockName])

				if use_phedex:  # Start parallel phedex query
					replicas_dict = {}
					phedex_thread = start_thread('Query phedex site info for %s' % block_path,
						self._get_phedex_replica_list, block_path, replicas_dict)
					self._fill_cms_fi_list(result, block_path)
					phedex_thread.join()
					replica_infos = replicas_dict.get(block_path)
				else:
					self._fill_cms_fi_list(result, block_path)
				result[DataProvider.Locations] = self._process_replica_list(block_path, replica_infos)

				if len(result[DataProvider.FileList]):
					counter += 1
					yield result
			progress_block.finish()

			if counter == 0:
				raise DatasetError('Dataset %s does not contain any valid blocks!' % dataset_path)
		progress_ds.finish()

	def _get_phedex_replica_list(self, block_path, replicas_dict):
		activity_fi = Activity('Getting file replica information from PhEDex')
		# Get dataset se list from PhEDex (perhaps concurrent with get_dbs_file_list)
		replicas_dict[block_path] = []
		for phedex_block in self._pjrc.get(params={'block': block_path})['phedex']['block']:
			for replica in phedex_block['replica']:
				replica_info = (replica['node'], replica.get('se'), replica['complete'] == 'y')
				replicas_dict[block_path].append(replica_info)
		activity_fi.finish()

	def _iter_cms_blocks(self, dataset_path, do_query_sites):
		raise AbstractError

	def _iter_cms_files(self, block_path, query_only_valid, query_lumi):
		raise AbstractError

	def _iter_formatted_locations(self, replica_infos):
		for replica_info in replica_infos:
			(_, _, completed) = replica_info
			if completed:
				for entry in self._iter_replica_locations(replica_info):
					yield entry
			else:
				for entry in self._iter_replica_locations(replica_info):
					yield '(%s)' % entry

	def _iter_replica_locations(self, replica_info):
		(name_node, name_hostname, _) = replica_info
		if self._location_format == CMSLocationFormat.siteDB:
			yield name_node
		else:
			if name_hostname is not None:
				name_hostnames = [name_hostname]
			else:
				name_hostnames = self._sitedb.cms_name_to_se(name_node)
			for name_hostname in name_hostnames:
				if self._location_format == CMSLocationFormat.hostname:
					yield name_hostname
				else:
					yield '%s/%s' % (name_node, name_hostname)

	def _process_replica_list(self, block_path, replica_infos):
		def _empty_with_warning(error_msg, *args):
			self._log.warning('Dataset block %r ' + error_msg, block_path, *args)
			return []

		def _expanded_replica_locations(replica_infos):
			for replica_info in replica_infos:
				for entry in self._iter_replica_locations(replica_info):
					yield entry

		if not replica_infos:
			return _empty_with_warning('has no replica information!')
		replica_infos_selected = self._phedex_filter.filter_list(replica_infos, key=itemgetter(0))
		if not replica_infos_selected:
			return _empty_with_warning('is not available at the selected locations!\n' +
				'Available locations: %s', str.join(', ', self._iter_formatted_locations(replica_infos)))
		if not self._only_complete:
			return list(_expanded_replica_locations(replica_infos_selected))
		replica_infos_complete = lfilter(lambda nn_nh_c: nn_nh_c[2], replica_infos_selected)
		if not replica_infos_complete:
			return _empty_with_warning('is not completely available at the selected locations!\n' +
				'Available locations: %s', str.join(', ', self._iter_formatted_locations(replica_infos)))
		return list(_expanded_replica_locations(replica_infos_complete))

Пример #17

Показать файл

Файл: geodb.py Проект: grid-control/grid-control

	def __init__(self):
		from grid_control.utils.webservice import JSONRestClient
		self._jrc = JSONRestClient(url='http://maps.googleapis.com/maps/api/geocode/json')

Пример #18

Показать файл

class CMSBaseProvider(DataProvider):
    def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0):
        changeTrigger = triggerResync(['datasets', 'parameters'])
        self._lumi_filter = config.getLookup('lumi filter', {},
                                             parser=parseLumiFilter,
                                             strfun=strLumi,
                                             onChange=changeTrigger)
        if not self._lumi_filter.empty():
            config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataProvider.__init__(self, config, datasetExpr, datasetNick,
                              datasetID)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = config.getBool('lumi metadata',
                                          not self._lumi_filter.empty(),
                                          onChange=changeTrigger)
        # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
        self._phedexFilter = config.getFilter('phedex sites',
                                              '-T3_US_FNALLPC',
                                              defaultMatcher='blackwhite',
                                              defaultFilter='weak',
                                              onChange=changeTrigger)
        self._phedexT1Filter = config.getFilter('phedex t1 accept',
                                                'T1_DE_KIT T1_US_FNAL',
                                                defaultMatcher='blackwhite',
                                                defaultFilter='weak',
                                                onChange=changeTrigger)
        self._phedexT1Mode = config.getEnum('phedex t1 mode',
                                            PhedexT1Mode,
                                            PhedexT1Mode.disk,
                                            onChange=changeTrigger)
        self.onlyComplete = config.getBool('only complete sites',
                                           True,
                                           onChange=changeTrigger)
        self._locationFormat = config.getEnum('location format',
                                              CMSLocationFormat,
                                              CMSLocationFormat.hostname,
                                              onChange=changeTrigger)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )

        (self._datasetPath, self._url,
         self._datasetBlock) = optSplit(datasetExpr, '@#')
        self._url = self._url or config.get('dbs instance', '')
        self._datasetBlock = self._datasetBlock or 'all'
        self.onlyValid = config.getBool('only valid',
                                        True,
                                        onChange=changeTrigger)

    # Define how often the dataprovider can be queried automatically
    def queryLimit(self):
        return 2 * 60 * 60  # 2 hour delay minimum

    # Check if splitterClass is valid
    def checkSplitter(self, splitterClass):
        if (DataSplitter.Skipped in splitterClass.neededEnums()
            ) and not self._lumi_filter.empty():
            self._log.debug(
                'Selected splitter %s is not compatible with active lumi filter!',
                splitterClass.__name__)
            self._log.warning(
                'Active lumi section filter forced selection of HybridSplitter'
            )
            return HybridSplitter
        return splitterClass

    def _nodeFilter(self, nameSiteDB, complete):
        # Remove T0 and T1 by default
        result = not (nameSiteDB.startswith('T0_')
                      or nameSiteDB.startswith('T1_'))
        # check if listed on the accepted list
        if self._phedexT1Mode in [PhedexT1Mode.disk, PhedexT1Mode.accept]:
            result = result or (self._phedexT1Filter.filterList([nameSiteDB])
                                == [nameSiteDB])
        if self._phedexT1Mode == PhedexT1Mode.disk:
            result = result or nameSiteDB.lower().endswith('_disk')
        # apply phedex blacklist
        result = result and (self._phedexFilter.filterList([nameSiteDB])
                             == [nameSiteDB])
        # check for completeness at the site
        result = result and (complete or not self.onlyComplete)
        return result

    # Get dataset se list from PhEDex (perhaps concurrent with listFiles)
    def _getPhedexSEList(self, blockPath, dictSE):
        dictSE[blockPath] = []
        for phedexBlock in self._pjrc.get(
                params={'block': blockPath})['phedex']['block']:
            for replica in phedexBlock['replica']:
                if self._nodeFilter(replica['node'],
                                    replica['complete'] == 'y'):
                    location = None
                    if self._locationFormat == CMSLocationFormat.hostname:
                        location = replica.get('se')
                    elif self._locationFormat == CMSLocationFormat.siteDB:
                        location = replica.get('node')
                    elif (self._locationFormat == CMSLocationFormat.both) and (
                            replica.get('node') or replica.get('se')):
                        location = '%s/%s' % (replica.get('node'),
                                              replica.get('se'))
                    if location:
                        dictSE[blockPath].append(location)
                    else:
                        self._log.warning(
                            'Dataset block %s replica at %s / %s is skipped!',
                            blockPath, replica.get('node'), replica.get('se'))

    def getDatasets(self):
        if self._cache_dataset is None:
            self._cache_dataset = [self._datasetPath]
            if '*' in self._datasetPath:
                self._cache_dataset = list(
                    self.getCMSDatasets(self._datasetPath))
                if not self._cache_dataset:
                    raise DatasetError(
                        'No datasets selected by DBS wildcard %s !' %
                        self._datasetPath)
        return self._cache_dataset

    def getCMSBlocks(self, datasetPath, getSites):
        iter_blockname_selist = self.getCMSBlocksImpl(datasetPath, getSites)
        n_blocks = 0
        selected_blocks = False
        for (blockname, selist) in iter_blockname_selist:
            n_blocks += 1
            if (self._datasetBlock != 'all') and (str.split(blockname, '#')[1]
                                                  != self._datasetBlock):
                continue
            selected_blocks = True
            yield (blockname, selist)
        if (n_blocks > 0) and not selected_blocks:
            raise DatasetError(
                'Dataset %r contains %d blocks, but none were selected by %r' %
                (datasetPath, n_blocks, self._datasetBlock))

    def fillCMSFiles(self, block, blockPath):
        lumi_used = False
        lumiDict = {}
        if self._lumi_query:  # central lumi query
            lumiDict = self.getCMSLumisImpl(blockPath)
        fileList = []
        for (fileInfo,
             listLumi) in self.getCMSFilesImpl(blockPath, self.onlyValid,
                                               self._lumi_query):
            if lumiDict and not listLumi:
                listLumi = lumiDict.get(fileInfo[DataProvider.URL], [])
            if listLumi:
                (listLumiExt_Run, listLumiExt_Lumi) = ([], [])
                for (run, lumi_list) in sorted(listLumi):
                    listLumiExt_Run.extend([run] * len(lumi_list))
                    listLumiExt_Lumi.extend(lumi_list)
                fileInfo[DataProvider.Metadata] = [
                    listLumiExt_Run, listLumiExt_Lumi
                ]
                lumi_used = True
            fileList.append(fileInfo)
        if lumi_used:
            block.setdefault(DataProvider.Metadata,
                             []).extend(['Runs', 'Lumi'])
        block[DataProvider.FileList] = fileList

    def getCMSLumisImpl(self, blockPath):
        return None

    def getGCBlocks(self, usePhedex):
        for datasetPath in self.getDatasets():
            counter = 0
            for (blockPath,
                 listSE) in self.getCMSBlocks(datasetPath,
                                              getSites=not usePhedex):
                result = {}
                result[DataProvider.Dataset] = blockPath.split('#')[0]
                result[DataProvider.BlockName] = blockPath.split('#')[1]

                if usePhedex:  # Start parallel phedex query
                    dictSE = {}
                    tPhedex = start_thread(
                        'Query phedex site info for %s' % blockPath,
                        self._getPhedexSEList, blockPath, dictSE)
                    self.fillCMSFiles(result, blockPath)
                    tPhedex.join()
                    listSE = dictSE.get(blockPath)
                else:
                    self.fillCMSFiles(result, blockPath)
                result[DataProvider.Locations] = listSE

                if len(result[DataProvider.FileList]):
                    counter += 1
                    yield result

            if counter == 0:
                raise DatasetError(
                    'Dataset %s does not contain any valid blocks!' %
                    datasetPath)

Пример #19

Показать файл

Файл: provider_cms.py Проект: nils-braun/grid-control

class CMSBaseProvider(DataProvider):
    def __init__(self, config, datasetExpr, datasetNick=None):
        self._changeTrigger = triggerResync(['datasets', 'parameters'])
        self._lumi_filter = config.getLookup('lumi filter', {},
                                             parser=parseLumiFilter,
                                             strfun=strLumi,
                                             onChange=self._changeTrigger)
        if not self._lumi_filter.empty():
            config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataProvider.__init__(self, config, datasetExpr, datasetNick)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = config.getBool('lumi metadata',
                                          not self._lumi_filter.empty(),
                                          onChange=self._changeTrigger)
        config.set('phedex sites matcher mode', 'shell', '?=')
        # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
        self._phedexFilter = config.getFilter('phedex sites',
                                              '-* T1_*_Disk T2_* T3_*',
                                              defaultMatcher='blackwhite',
                                              defaultFilter='strict',
                                              onChange=self._changeTrigger)
        self._onlyComplete = config.getBool('only complete sites',
                                            True,
                                            onChange=self._changeTrigger)
        self._locationFormat = config.getEnum('location format',
                                              CMSLocationFormat,
                                              CMSLocationFormat.hostname,
                                              onChange=self._changeTrigger)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )
        self._sitedb = SiteDB()

        (self._datasetPath, self._datasetInstance,
         self._datasetBlock) = optSplit(datasetExpr, '@#')
        instance_default = config.get('dbs instance',
                                      '',
                                      onChange=self._changeTrigger)
        self._datasetInstance = self._datasetInstance or instance_default
        if not self._datasetInstance:
            self._datasetInstance = 'prod/global'
        elif '/' not in self._datasetInstance:
            self._datasetInstance = 'prod/%s' % self._datasetInstance
        self._datasetBlock = self._datasetBlock or 'all'
        self.onlyValid = config.getBool('only valid',
                                        True,
                                        onChange=self._changeTrigger)

    # Define how often the dataprovider can be queried automatically
    def queryLimit(self):
        return 2 * 60 * 60  # 2 hour delay minimum

    # Check if splitterClass is valid
    def checkSplitter(self, splitterClass):
        if (DataSplitter.Skipped in splitterClass.neededEnums()
            ) and not self._lumi_filter.empty():
            self._log.debug(
                'Selected splitter %s is not compatible with active lumi filter!',
                splitterClass.__name__)
            self._log.warning(
                'Active lumi section filter forced selection of HybridSplitter'
            )
            return HybridSplitter
        return splitterClass

    def _replicaLocation(self, replica_info):
        (name_node, name_hostname, _) = replica_info
        if self._locationFormat == CMSLocationFormat.siteDB:
            yield name_node
        else:
            if name_hostname is not None:
                name_hostnames = [name_hostname]
            else:
                name_hostnames = self._sitedb.cms_name_to_se(name_node)
            for name_hostname in name_hostnames:
                if self._locationFormat == CMSLocationFormat.hostname:
                    yield name_hostname
                else:
                    yield '%s/%s' % (name_node, name_hostname)

    def _fmtLocations(self, replica_infos):
        for replica_info in replica_infos:
            (_, _, completed) = replica_info
            if completed:
                for entry in self._replicaLocation(replica_info):
                    yield entry
            else:
                for entry in self._replicaLocation(replica_info):
                    yield '(%s)' % entry

    def _processReplicas(self, blockPath, replica_infos):
        def empty_with_warning(*args):
            self._log.warning(*args)
            return []

        def expanded_replica_locations(replica_infos):
            for replica_info in replica_infos:
                for entry in self._replicaLocation(replica_info):
                    yield entry

        if not replica_infos:
            return empty_with_warning(
                'Dataset block %r has no replica information!', blockPath)
        replica_infos_selected = self._phedexFilter.filterList(
            replica_infos, key=itemgetter(0))
        if not replica_infos_selected:
            return empty_with_warning(
                'Dataset block %r is not available at the selected locations!\nAvailable locations: %s',
                blockPath, str.join(', ', self._fmtLocations(replica_infos)))
        if not self._onlyComplete:
            return list(expanded_replica_locations(replica_infos_selected))
        replica_infos_complete = lfilter(lambda nn_nh_c: nn_nh_c[2],
                                         replica_infos_selected)
        if not replica_infos_complete:
            return empty_with_warning(
                'Dataset block %r is not completely available at the selected locations!\nAvailable locations: %s',
                blockPath, str.join(', ', self._fmtLocations(replica_infos)))
        return list(expanded_replica_locations(replica_infos_complete))

    # Get dataset se list from PhEDex (perhaps concurrent with listFiles)
    def _getPhedexReplicas(self, blockPath, dictReplicas):
        dictReplicas[blockPath] = []
        for phedexBlock in self._pjrc.get(
                params={'block': blockPath})['phedex']['block']:
            for replica in phedexBlock['replica']:
                dictReplicas[blockPath].append(
                    (replica['node'], replica.get('se'),
                     replica['complete'] == 'y'))

    def getDatasets(self):
        if self._cache_dataset is None:
            self._cache_dataset = [self._datasetPath]
            if '*' in self._datasetPath:
                self._cache_dataset = list(
                    self._getCMSDatasets(self._datasetPath))
                if not self._cache_dataset:
                    raise DatasetError(
                        'No datasets selected by DBS wildcard %s !' %
                        self._datasetPath)
        return self._cache_dataset

    def _getCMSBlocks(self, datasetPath, getSites):
        iter_blockname_selist = self._getCMSBlocksImpl(datasetPath, getSites)
        n_blocks = 0
        selected_blocks = False
        for (blockname, selist) in iter_blockname_selist:
            n_blocks += 1
            if (self._datasetBlock != 'all') and (str.split(blockname, '#')[1]
                                                  != self._datasetBlock):
                continue
            selected_blocks = True
            yield (blockname, selist)
        if (n_blocks > 0) and not selected_blocks:
            raise DatasetError(
                'Dataset %r contains %d blocks, but none were selected by %r' %
                (datasetPath, n_blocks, self._datasetBlock))

    def _fillCMSFiles(self, block, blockPath):
        lumi_used = False
        lumiDict = {}
        if self._lumi_query:  # central lumi query
            lumiDict = self._getCMSLumisImpl(blockPath)
        fileList = []
        for (fileInfo,
             listLumi) in self._getCMSFilesImpl(blockPath, self.onlyValid,
                                                self._lumi_query):
            if lumiDict and not listLumi:
                listLumi = lumiDict.get(fileInfo[DataProvider.URL], [])
            if listLumi:
                (listLumiExt_Run, listLumiExt_Lumi) = ([], [])
                for (run, lumi_list) in sorted(listLumi):
                    listLumiExt_Run.extend([run] * len(lumi_list))
                    listLumiExt_Lumi.extend(lumi_list)
                fileInfo[DataProvider.Metadata] = [
                    listLumiExt_Run, listLumiExt_Lumi
                ]
                lumi_used = True
            fileList.append(fileInfo)
        if lumi_used:
            block.setdefault(DataProvider.Metadata,
                             []).extend(['Runs', 'Lumi'])
        block[DataProvider.FileList] = fileList

    def _getCMSLumisImpl(self, blockPath):
        return None

    def _getGCBlocks(self, usePhedex):
        for datasetPath in self.getDatasets():
            counter = 0
            for (blockPath,
                 replica_infos) in self._getCMSBlocks(datasetPath,
                                                      getSites=not usePhedex):
                result = {}
                result[DataProvider.Dataset] = blockPath.split('#')[0]
                result[DataProvider.BlockName] = blockPath.split('#')[1]

                if usePhedex:  # Start parallel phedex query
                    dictReplicas = {}
                    tPhedex = start_thread(
                        'Query phedex site info for %s' % blockPath,
                        self._getPhedexReplicas, blockPath, dictReplicas)
                    self._fillCMSFiles(result, blockPath)
                    tPhedex.join()
                    replica_infos = dictReplicas.get(blockPath)
                else:
                    self._fillCMSFiles(result, blockPath)
                result[DataProvider.Locations] = self._processReplicas(
                    blockPath, replica_infos)

                if len(result[DataProvider.FileList]):
                    counter += 1
                    yield result

            if counter == 0:
                raise DatasetError(
                    'Dataset %s does not contain any valid blocks!' %
                    datasetPath)

Пример #20

Показать файл

Файл: provider_cms.py Проект: artus-analysis/grid-control

class CMSBaseProvider(DataProvider):
	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = changeTrigger)
		if not self._lumi_filter.empty():
			config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = changeTrigger)
		# PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
		self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC',
			defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger)
		self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL',
			defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger)
		self._phedexT1Mode = config.getEnum('phedex t1 mode', PhedexT1Mode, PhedexT1Mode.disk, onChange = changeTrigger)
		self.onlyComplete = config.getBool('only complete sites', True, onChange = changeTrigger)
		self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = changeTrigger)
		self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')

		(self._datasetPath, self._url, self._datasetBlock) = optSplit(datasetExpr, '@#')
		self._url = self._url or config.get('dbs instance', '')
		self._datasetBlock = self._datasetBlock or 'all'
		self.onlyValid = config.getBool('only valid', True, onChange = changeTrigger)


	# Define how often the dataprovider can be queried automatically
	def queryLimit(self):
		return 2 * 60 * 60 # 2 hour delay minimum


	# Check if splitterClass is valid
	def checkSplitter(self, splitterClass):
		if (DataSplitter.Skipped in splitterClass.neededEnums()) and not self._lumi_filter.empty():
			self._log.debug('Selected splitter %s is not compatible with active lumi filter!', splitterClass.__name__)
			self._log.warning('Active lumi section filter forced selection of HybridSplitter')
			return HybridSplitter
		return splitterClass


	def _nodeFilter(self, nameSiteDB, complete):
		# Remove T0 and T1 by default
		result = not (nameSiteDB.startswith('T0_') or nameSiteDB.startswith('T1_'))
		# check if listed on the accepted list
		if self._phedexT1Mode in [PhedexT1Mode.disk, PhedexT1Mode.accept]:
			result = result or (self._phedexT1Filter.filterList([nameSiteDB]) == [nameSiteDB])
		if self._phedexT1Mode == PhedexT1Mode.disk:
			result = result or nameSiteDB.lower().endswith('_disk')
		# apply phedex blacklist
		result = result and (self._phedexFilter.filterList([nameSiteDB]) == [nameSiteDB])
		# check for completeness at the site
		result = result and (complete or not self.onlyComplete)
		return result


	# Get dataset se list from PhEDex (perhaps concurrent with listFiles)
	def _getPhedexSEList(self, blockPath, dictSE):
		dictSE[blockPath] = []
		for phedexBlock in self._pjrc.get(params = {'block': blockPath})['phedex']['block']:
			for replica in phedexBlock['replica']:
				if self._nodeFilter(replica['node'], replica['complete'] == 'y'):
					location = None
					if self._locationFormat == CMSLocationFormat.hostname:
						location = replica.get('se')
					elif self._locationFormat == CMSLocationFormat.siteDB:
						location = replica.get('node')
					elif (self._locationFormat == CMSLocationFormat.both) and (replica.get('node') or replica.get('se')):
						location = '%s/%s' % (replica.get('node'), replica.get('se'))
					if location:
						dictSE[blockPath].append(location)
					else:
						self._log.warning('Dataset block %s replica at %s / %s is skipped!',
							blockPath, replica.get('node'), replica.get('se'))


	def getDatasets(self):
		if self._cache_dataset is None:
			self._cache_dataset = [self._datasetPath]
			if '*' in self._datasetPath:
				self._cache_dataset = list(self.getCMSDatasets(self._datasetPath))
				if not self._cache_dataset:
					raise DatasetError('No datasets selected by DBS wildcard %s !' % self._datasetPath)
		return self._cache_dataset


	def getCMSBlocks(self, datasetPath, getSites):
		iter_blockname_selist = self.getCMSBlocksImpl(datasetPath, getSites)
		n_blocks = 0
		selected_blocks = False
		for (blockname, selist) in iter_blockname_selist:
			n_blocks += 1
			if (self._datasetBlock != 'all') and (str.split(blockname, '#')[1] != self._datasetBlock):
				continue
			selected_blocks = True
			yield (blockname, selist)
		if (n_blocks > 0) and not selected_blocks:
			raise DatasetError('Dataset %r contains %d blocks, but none were selected by %r' % (datasetPath, n_blocks, self._datasetBlock))


	def fillCMSFiles(self, block, blockPath):
		lumi_used = False
		lumiDict = {}
		if self._lumi_query: # central lumi query
			lumiDict = self.getCMSLumisImpl(blockPath)
		fileList = []
		for (fileInfo, listLumi) in self.getCMSFilesImpl(blockPath, self.onlyValid, self._lumi_query):
			if lumiDict and not listLumi:
				listLumi = lumiDict.get(fileInfo[DataProvider.URL], [])
			if listLumi:
				(listLumiExt_Run, listLumiExt_Lumi) = ([], [])
				for (run, lumi_list) in sorted(listLumi):
					listLumiExt_Run.extend([run] * len(lumi_list))
					listLumiExt_Lumi.extend(lumi_list)
				fileInfo[DataProvider.Metadata] = [listLumiExt_Run, listLumiExt_Lumi]
				lumi_used = True
			fileList.append(fileInfo)
		if lumi_used:
			block.setdefault(DataProvider.Metadata, []).extend(['Runs', 'Lumi'])
		block[DataProvider.FileList] = fileList


	def getCMSLumisImpl(self, blockPath):
		return None


	def getGCBlocks(self, usePhedex):
		for datasetPath in self.getDatasets():
			counter = 0
			for (blockPath, listSE) in self.getCMSBlocks(datasetPath, getSites = not usePhedex):
				result = {}
				result[DataProvider.Dataset] = blockPath.split('#')[0]
				result[DataProvider.BlockName] = blockPath.split('#')[1]

				if usePhedex: # Start parallel phedex query
					dictSE = {}
					tPhedex = start_thread('Query phedex site info for %s' % blockPath, self._getPhedexSEList, blockPath, dictSE)
					self.fillCMSFiles(result, blockPath)
					tPhedex.join()
					listSE = dictSE.get(blockPath)
				else:
					self.fillCMSFiles(result, blockPath)
				result[DataProvider.Locations] = listSE

				if len(result[DataProvider.FileList]):
					counter += 1
					yield result

			if counter == 0:
				raise DatasetError('Dataset %s does not contain any valid blocks!' % datasetPath)