Пример #1
0
    def get_file_locations(self, complete=1, sites=None, cloud=None):
        '''Helper function to resolve replica locations of files of the given datasets
           
           NB: by default, it checks only sites with complete datasets   
        '''

        profiler = ElapsedTimeProfiler(logger=logger)

        # get the dataset locations 
        ds_sites = self.get_locations(complete=complete, overlap=False)

        logger.info('resolving dataset file locations')
        profiler.start()
        replicas = {}
         
        #if complete:
        #    pass
        #else:
        for ds in ds_sites.keys():
            logger.debug('dataset: %s' % ds)
            logger.debug('sites: %s' % repr(ds_sites[ds]))
            replicas.update(resolve_file_locations(ds, sites=ds_sites[ds]))
         
        profiler.check('%d datasets %d files' % ( len(ds_sites.keys()), len(replicas.keys()) ))

        return replicas
Пример #2
0
    def get_datasets(self, name, filter=True):
        '''Get datasets names, especially when the name is given as a wildcard pattern'''
        profiler = ElapsedTimeProfiler(logger=logger)
        profiler.start()
        datasets = listDatasets(name, filter)
        profiler.check('listing %d datasets/containers' % len(datasets))

        return datasets
Пример #3
0
    def __resolve_containers(self, containers, nthreads=10):
        '''resolving dataset containers'''

        datasets = {} 
        
        wq = Queue(len(containers))
        for ds in containers:
            wq.put(ds)

        mylock = Lock()
        def worker(id):
            dq2 = DQ2()
            while not wq.empty():
                try:
                    ds = wq.get(block=True, timeout=1)
                    logger.debug('worker id: %d on dataset container: %s' % (id, ds))
       
                    datasets[ds] = []
 
                    ds_tmp = dq2.listDatasetsInContainer(ds)

                    mylock.acquire()
                    datasets[ds] = ds_tmp
                    mylock.release()
                except DQException as err:
                    logger.warning(str(err))
                except Empty:
                    pass

        profiler = ElapsedTimeProfiler(logger=logger)
        profiler.start()
        threads = []
        for i in range(nthreads):
            t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i})
#            t.setDaemon(False)
            threads.append(t)
        
        for t in threads:
            t.start()
        
        for t in threads:
            t.join()
        profiler.check('resolving %d containers' % len(containers))

        return datasets
Пример #4
0
    def get_file_locations(self, complete=1, sites=None, cloud=None):
        '''Helper function to resolve replica locations of files of the given datasets
           
           NB: by default, it checks only sites with complete datasets   
        '''

        profiler = ElapsedTimeProfiler(logger=logger)

        # get the dataset locations
        ds_sites = self.get_locations(complete=complete, overlap=False)

        logger.info('resolving dataset file locations')
        profiler.start()
        replicas = {}

        #if complete:
        #    pass
        #else:
        for ds in ds_sites.keys():
            logger.debug('dataset: %s' % ds)
            logger.debug('sites: %s' % repr(ds_sites[ds]))
            replicas.update(resolve_file_locations(ds, sites=ds_sites[ds]))

        profiler.check('%d datasets %d files' %
                       (len(ds_sites.keys()), len(replicas.keys())))

        return replicas
Пример #5
0
    def get_datasets(self, name, filter=True):
        '''Get datasets names, especially when the name is given as a wildcard pattern'''
        profiler = ElapsedTimeProfiler(logger=logger)
        profiler.start()
        datasets = listDatasets(name, filter)
        profiler.check('listing %d datasets/containers' % len(datasets))

        return datasets
Пример #6
0
    def __resolve_containers(self, containers, nthreads=10):
        '''resolving dataset containers'''

        datasets = {}

        wq = Queue(len(containers))
        for ds in containers:
            wq.put(ds)

        mylock = Lock()

        def worker(id):
            dq2 = DQ2()
            while not wq.empty():
                try:
                    ds = wq.get(block=True, timeout=1)
                    logger.debug('worker id: %d on dataset container: %s' %
                                 (id, ds))

                    datasets[ds] = []

                    ds_tmp = dq2.listDatasetsInContainer(ds)

                    mylock.acquire()
                    datasets[ds] = ds_tmp
                    mylock.release()
                except DQException as err:
                    logger.warning(str(err))
                except Empty:
                    pass

        profiler = ElapsedTimeProfiler(logger=logger)
        profiler.start()
        threads = []
        for i in range(nthreads):
            t = GangaThread(name='stager_ds_w_%d' % i,
                            target=worker,
                            kwargs={'id': i})
            #            t.setDaemon(False)
            threads.append(t)

        for t in threads:
            t.start()

        for t in threads:
            t.join()
        profiler.check('resolving %d containers' % len(containers))

        return datasets
Пример #7
0
    def get_complete_files_replicas(self, nthread=10, diskOnly=True):
        '''Gets a comprehensive dataset information about the contents and the
           location of COMPLETE replicas'''

        if not self.complete_files_replicas:

            re_tapeSite = re.compile('.*TAPE$') 


            ds_info = {}
            self.__expand_datasets()
         
            wq = Queue(len(self.dataset))
            for ds in self.dataset:
                wq.put(ds)
         
            mylock = Lock()
            def worker(id):
         
                dq2 = DQ2()
                while not wq.empty():
                    try:
         
                        ds = wq.get(block=True, timeout=1)
                        logger.debug('worker id: %d on dataset: %s' % (id, ds))
         
                        # get contents (guids) of the complete dataset
                        contents = dq2.listFilesInDataset(ds)
         
                        # get locations of the complete dataset replicas
                        locations = dq2.listDatasetReplicas(ds,complete=1)
         
                        vuid = None
                        try:
                            vuid = locations.keys()[0]
                        except IndexError as err:
                            pass
         
                        mylock.acquire()
         
                        # updating ds_info hastable
                        if vuid:
                            ds_info[ds] = []
                            ds_sites = []
 
                            if diskOnly:
                                for site in locations[vuid][1]:
                                    if not re_tapeSite.match(site):
                                        ds_sites.append(site)
                            else:
                                ds_sites = locations[vuid][1]
 
                            ds_info[ds] += [ contents[0], ds_sites ]
                        else:
                            logger.warning('dataset not available: %s' % ds)
         
                        mylock.release()
         
                    except DQException as err:
                        logger.warning(str(err))
         
                    except Empty:
                        pass
         
            # prepare and run the query threads
            profiler = ElapsedTimeProfiler(logger=logger)
            profiler.start()
            threads = []
            for i in range(nthread):
                t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i})
#                t.setDaemon(False)
                threads.append(t)
         
            for t in threads:
                t.start()
         
            for t in threads:
                t.join()

            self.complete_files_replicas = ds_info

            profiler.check( 'information collected: %d datasets' % ( len(self.complete_files_replicas.keys()) ) )
        else:
            logger.debug('using cached complete_files_replicas')
            pass
 
        return self.complete_files_replicas 
Пример #8
0
    def get_complete_files_replicas(self, nthread=10, diskOnly=True):
        '''Gets a comprehensive dataset information about the contents and the
           location of COMPLETE replicas'''

        if not self.complete_files_replicas:

            re_tapeSite = re.compile('.*TAPE$')

            ds_info = {}
            self.__expand_datasets()

            wq = Queue(len(self.dataset))
            for ds in self.dataset:
                wq.put(ds)

            mylock = Lock()

            def worker(id):

                dq2 = DQ2()
                while not wq.empty():
                    try:

                        ds = wq.get(block=True, timeout=1)
                        logger.debug('worker id: %d on dataset: %s' % (id, ds))

                        # get contents (guids) of the complete dataset
                        contents = dq2.listFilesInDataset(ds)

                        # get locations of the complete dataset replicas
                        locations = dq2.listDatasetReplicas(ds, complete=1)

                        vuid = None
                        try:
                            vuid = locations.keys()[0]
                        except IndexError as err:
                            pass

                        mylock.acquire()

                        # updating ds_info hastable
                        if vuid:
                            ds_info[ds] = []
                            ds_sites = []

                            if diskOnly:
                                for site in locations[vuid][1]:
                                    if not re_tapeSite.match(site):
                                        ds_sites.append(site)
                            else:
                                ds_sites = locations[vuid][1]

                            ds_info[ds] += [contents[0], ds_sites]
                        else:
                            logger.warning('dataset not available: %s' % ds)

                        mylock.release()

                    except DQException as err:
                        logger.warning(str(err))

                    except Empty:
                        pass

            # prepare and run the query threads
            profiler = ElapsedTimeProfiler(logger=logger)
            profiler.start()
            threads = []
            for i in range(nthread):
                t = GangaThread(name='stager_ds_w_%d' % i,
                                target=worker,
                                kwargs={'id': i})
                #                t.setDaemon(False)
                threads.append(t)

            for t in threads:
                t.start()

            for t in threads:
                t.join()

            self.complete_files_replicas = ds_info

            profiler.check('information collected: %d datasets' %
                           (len(self.complete_files_replicas.keys())))
        else:
            logger.debug('using cached complete_files_replicas')
            pass

        return self.complete_files_replicas