def resolve_file_locations(dataset, sites=None, cloud=None, token='ATLASDATADISK', debug=False): ''' Summarize the locations of files (in terms of sitename) of a dataset. If the sites argument is given, ignoring cloud and token arguments; otherwise using cloud and toke to retrieve sites from TiersOfATLAS. ''' if not sites: logger.debug('resolving sites with token: %s' % token) sites = dm_util.get_srmv2_sites(cloud, token=token, debug=debug) logger.debug('checking replicas at sites: %s' % str(sites)) replicas = {} # preparing the queue for querying lfn wq = Queue(len(sites)) for site in sites: wq.put(site) mylock = Lock() def worker(id): dq2 = DQ2() while not wq.empty(): try: site = wq.get(block=True, timeout=1) replicaInfo = dq2.listFileReplicas(site, dataset) logger.debug('resolving dataset files at %s, no files: %d' % (site,len(replicaInfo[0]['content'])) ) if replicaInfo: mylock.acquire() for guid in replicaInfo[0]['content']: if guid not in replicas: replicas[guid] = [] replicas[guid].append(site) mylock.release() except Empty: pass except DQException as err: logger.warning(str(err)) logger.warning('site %s excluded' % site) pass threads = [] nthread = len(sites) if nthread > 10: nthread = 10 for i in range(nthread): t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i}) # t.setDaemon(False) threads.append(t) for t in threads: t.start() for t in threads: t.join() return replicas
def __resolve_containers(self, containers, nthreads=10): '''resolving dataset containers''' datasets = {} wq = Queue(len(containers)) for ds in containers: wq.put(ds) mylock = Lock() def worker(id): dq2 = DQ2() while not wq.empty(): try: ds = wq.get(block=True, timeout=1) logger.debug('worker id: %d on dataset container: %s' % (id, ds)) datasets[ds] = [] ds_tmp = dq2.listDatasetsInContainer(ds) mylock.acquire() datasets[ds] = ds_tmp mylock.release() except DQException as err: logger.warning(str(err)) except Empty: pass profiler = ElapsedTimeProfiler(logger=logger) profiler.start() threads = [] for i in range(nthreads): t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i}) # t.setDaemon(False) threads.append(t) for t in threads: t.start() for t in threads: t.join() profiler.check('resolving %d containers' % len(containers)) return datasets
class TaskRegistry(Registry): def __init__(self, name, doc, dirty_flush_counter=10, update_index_time=30): super(TaskRegistry, self).__init__( name, doc, dirty_flush_counter=dirty_flush_counter, update_index_time=update_index_time ) self._main_thread = None self.stored_slice = TaskRegistrySlice(self.name) self.stored_slice.objects = self self.stored_proxy = TaskRegistrySliceProxy(self.stored_slice) def getSlice(self): return self.stored_slice def getProxy(self): return self.stored_proxy def getIndexCache(self, obj): if obj._data is None: raise Exception("Currently don't support Index Caching") cached_values = ['status', 'id', 'name'] c = {} for cv in cached_values: if cv in obj._data: c[cv] = getattr(obj, cv) this_slice = TaskRegistrySlice("tmp") for dpv in this_slice._display_columns: c["display:" + dpv] = this_slice._get_display_value(obj, dpv) return c def _thread_main(self): """ This is an internal function; the main loop of the background thread """ from Ganga.Core.GangaRepository import getRegistry while getRegistry("jobs").hasStarted() is not True: time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return while True: from Ganga.Core import monitoring_component if (not monitoring_component is None and monitoring_component.enabled) or config['ForceTaskMonitoring']: break time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY # and must go away soon for tid in self.ids(): try: self[tid].startup() except Exception as err: logger.error("Unknown/Unexpected Error in starting up tasks main loop") logger.error("Exiting: err=%s" % str(err)) return logger.debug("Entering main loop") # Main loop while self._main_thread is not None and not self._main_thread.should_stop(): # If monitoring is enabled (or forced for Tasks) loop over each one and update if (config['ForceTaskMonitoring'] or monitoring_component.enabled) and not config['disableTaskMon']: for tid in self.ids(): logger.debug("Running over tid: %s" % str(tid)) try: p = self[tid] p.update() except Exception as x: logger.error( "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x)) type_, value_, traceback_ = sys.exc_info() logger.error("Full traceback:\n %s" % ' '.join( traceback.format_exception(type_, value_, traceback_))) p.pause() if self._main_thread.should_stop(): break if self._main_thread.should_stop(): break logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency'])) # Sleep interruptible for 10 seconds for i in range(0, int(config['TaskLoopFrequency'] * 100)): if self._main_thread.should_stop(): break time.sleep(0.01) def startup(self): """ Start a background thread that periodically run()s""" super(TaskRegistry, self).startup() from Ganga.Core.GangaThread import GangaThread self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main) self._main_thread.start() # create a registry flusher self.flush_thread = RegistryFlusher(self) self.flush_thread.start() def shutdown(self): self.flush_thread.join() super(TaskRegistry, self).shutdown() def stop(self): if self._main_thread is not None: self._main_thread.stop() self._main_thread.join()
class TaskRegistry(Registry): def __init__(self, name, doc): super(TaskRegistry, self).__init__(name, doc) self._main_thread = None self.stored_slice = TaskRegistrySlice(self.name) self.stored_slice.objects = self self.stored_proxy = TaskRegistrySliceProxy(self.stored_slice) def getSlice(self): return self.stored_slice def getProxy(self): return self.stored_proxy def getIndexCache(self, obj): cached_values = ['status', 'id', 'name'] c = {} for cv in cached_values: if hasattr(obj, cv): c[cv] = getattr(obj, cv) this_slice = TaskRegistrySlice("tmp") for dpv in this_slice._display_columns: c["display:" + dpv] = this_slice._get_display_value(obj, dpv) return c def _thread_main(self): """ This is an internal function; the main loop of the background thread """ from Ganga.Core.GangaRepository import getRegistry while getRegistry("jobs").hasStarted() is not True: time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return while True: from Ganga.Core import monitoring_component if (not monitoring_component is None and monitoring_component.enabled ) or config['ForceTaskMonitoring']: break time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY # and must go away soon for tid in self.ids(): try: self[tid].startup() except Exception as err: logger.error( "Unknown/Unexpected Error in starting up tasks main loop") logger.error("Exiting: err=%s" % str(err)) return logger.debug("Entering main loop") # Main loop while self._main_thread is not None and not self._main_thread.should_stop( ): # If monitoring is enabled (or forced for Tasks) loop over each one and update if (config['ForceTaskMonitoring'] or monitoring_component.enabled ) and not config['disableTaskMon']: for tid in self.ids(): logger.debug("Running over tid: %s" % str(tid)) try: p = self[tid] p.update() except Exception as x: logger.error( "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x)) type_, value_, traceback_ = sys.exc_info() logger.error("Full traceback:\n %s" % ' '.join( traceback.format_exception(type_, value_, traceback_))) p.pause() if self._main_thread.should_stop(): break if self._main_thread.should_stop(): break logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency'])) # Sleep interruptible for 10 seconds for i in range(0, int(config['TaskLoopFrequency'] * 100)): if self._main_thread.should_stop(): break time.sleep(0.01) def startup(self): """ Start a background thread that periodically run()s""" super(TaskRegistry, self).startup() from Ganga.Core.GangaThread import GangaThread self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main) self._main_thread.start() # create a registry flusher self.flush_thread = RegistryFlusher(self, 'TaskRegistryFlusher') self.flush_thread.start() def shutdown(self): self.flush_thread.join() super(TaskRegistry, self).shutdown() def stop(self): if self._main_thread is not None: self._main_thread.stop() self._main_thread.join()
def get_complete_files_replicas(self, nthread=10, diskOnly=True): '''Gets a comprehensive dataset information about the contents and the location of COMPLETE replicas''' if not self.complete_files_replicas: re_tapeSite = re.compile('.*TAPE$') ds_info = {} self.__expand_datasets() wq = Queue(len(self.dataset)) for ds in self.dataset: wq.put(ds) mylock = Lock() def worker(id): dq2 = DQ2() while not wq.empty(): try: ds = wq.get(block=True, timeout=1) logger.debug('worker id: %d on dataset: %s' % (id, ds)) # get contents (guids) of the complete dataset contents = dq2.listFilesInDataset(ds) # get locations of the complete dataset replicas locations = dq2.listDatasetReplicas(ds,complete=1) vuid = None try: vuid = locations.keys()[0] except IndexError as err: pass mylock.acquire() # updating ds_info hastable if vuid: ds_info[ds] = [] ds_sites = [] if diskOnly: for site in locations[vuid][1]: if not re_tapeSite.match(site): ds_sites.append(site) else: ds_sites = locations[vuid][1] ds_info[ds] += [ contents[0], ds_sites ] else: logger.warning('dataset not available: %s' % ds) mylock.release() except DQException as err: logger.warning(str(err)) except Empty: pass # prepare and run the query threads profiler = ElapsedTimeProfiler(logger=logger) profiler.start() threads = [] for i in range(nthread): t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i}) # t.setDaemon(False) threads.append(t) for t in threads: t.start() for t in threads: t.join() self.complete_files_replicas = ds_info profiler.check( 'information collected: %d datasets' % ( len(self.complete_files_replicas.keys()) ) ) else: logger.debug('using cached complete_files_replicas') pass return self.complete_files_replicas
def get_complete_files_replicas(self, nthread=10, diskOnly=True): '''Gets a comprehensive dataset information about the contents and the location of COMPLETE replicas''' if not self.complete_files_replicas: re_tapeSite = re.compile('.*TAPE$') ds_info = {} self.__expand_datasets() wq = Queue(len(self.dataset)) for ds in self.dataset: wq.put(ds) mylock = Lock() def worker(id): dq2 = DQ2() while not wq.empty(): try: ds = wq.get(block=True, timeout=1) logger.debug('worker id: %d on dataset: %s' % (id, ds)) # get contents (guids) of the complete dataset contents = dq2.listFilesInDataset(ds) # get locations of the complete dataset replicas locations = dq2.listDatasetReplicas(ds, complete=1) vuid = None try: vuid = locations.keys()[0] except IndexError as err: pass mylock.acquire() # updating ds_info hastable if vuid: ds_info[ds] = [] ds_sites = [] if diskOnly: for site in locations[vuid][1]: if not re_tapeSite.match(site): ds_sites.append(site) else: ds_sites = locations[vuid][1] ds_info[ds] += [contents[0], ds_sites] else: logger.warning('dataset not available: %s' % ds) mylock.release() except DQException as err: logger.warning(str(err)) except Empty: pass # prepare and run the query threads profiler = ElapsedTimeProfiler(logger=logger) profiler.start() threads = [] for i in range(nthread): t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i}) # t.setDaemon(False) threads.append(t) for t in threads: t.start() for t in threads: t.join() self.complete_files_replicas = ds_info profiler.check('information collected: %d datasets' % (len(self.complete_files_replicas.keys()))) else: logger.debug('using cached complete_files_replicas') pass return self.complete_files_replicas
def resolve_file_locations(dataset, sites=None, cloud=None, token='ATLASDATADISK', debug=False): ''' Summarize the locations of files (in terms of sitename) of a dataset. If the sites argument is given, ignoring cloud and token arguments; otherwise using cloud and toke to retrieve sites from TiersOfATLAS. ''' if not sites: logger.debug('resolving sites with token: %s' % token) sites = dm_util.get_srmv2_sites(cloud, token=token, debug=debug) logger.debug('checking replicas at sites: %s' % str(sites)) replicas = {} # preparing the queue for querying lfn wq = Queue(len(sites)) for site in sites: wq.put(site) mylock = Lock() def worker(id): dq2 = DQ2() while not wq.empty(): try: site = wq.get(block=True, timeout=1) replicaInfo = dq2.listFileReplicas(site, dataset) logger.debug('resolving dataset files at %s, no files: %d' % (site, len(replicaInfo[0]['content']))) if replicaInfo: mylock.acquire() for guid in replicaInfo[0]['content']: if guid not in replicas: replicas[guid] = [] replicas[guid].append(site) mylock.release() except Empty: pass except DQException as err: logger.warning(str(err)) logger.warning('site %s excluded' % site) pass threads = [] nthread = len(sites) if nthread > 10: nthread = 10 for i in range(nthread): t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i}) # t.setDaemon(False) threads.append(t) for t in threads: t.start() for t in threads: t.join() return replicas