Python SiteManager.get_over_soft_limit示例

编程语言: Python

命名空间/包名称: cuadrnt.data_management.tools.sites

类/类型: SiteManager

方法/功能: get_over_soft_limit

hotexamples.com的示例: 2

Python SiteManager.get_over_soft_limit - 已找到2个示例。这些是从开源项目中提取的最受好评的cuadrnt.data_management.tools.sites.SiteManager.get_over_soft_limit现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SiteManager(6)

get_available_storage(2)

get_active_sites(1)

get_all_available_storage(1)

get_available_sites(1)

get_over_soft_limit(1)

get_performance(1)

示例#1

显示文件

class Ranker(object):
    """
    Generic Ranking class
    """
    def __init__(self, config=dict()):
        self.logger = logging.getLogger(__name__)
        self.config = config
        self.sites = SiteManager(self.config)
        self.datasets = DatasetManager(self.config)
        self.popularity = PopularityManager(self.config)
        self.storage = StorageManager(self.config)
        self.max_replicas = int(config['rocker_board']['max_replicas'])
        self.MAX_THREADS = int(config['threading']['max_threads'])
        self.dataset_popularity = dict()

    def get_dataset_rankings(self,
                             date=datetime_day(datetime.datetime.utcnow())):
        """
        Generate dataset rankings
        """
        self.dataset_popularity = dict()
        dataset_names = self.datasets.get_db_datasets()
        q = Queue.Queue()
        for i in range(self.MAX_THREADS):
            worker = threading.Thread(target=self.get_dataset_popularity,
                                      args=(q, ))
            worker.daemon = True
            worker.start()
        # self.dataset_features = self.popularity.get_features(dataset_names, date)
        # self.dataset_tiers = self.datasets.get_data_tiers(dataset_names)
        for dataset_name in dataset_names:
            q.put((dataset_name, date))
        q.join()
        dataset_rankings = self.normalize_popularity(date)
        return dataset_rankings

    def get_site_rankings(self, date=datetime_day(datetime.datetime.utcnow())):
        """
        Generate site rankings
        """
        # get all sites which can be replicated to
        site_names = self.sites.get_available_sites()
        site_rankings = dict()
        for site_name in site_names:
            # get popularity
            popularity = self.get_site_popularity(site_name, date)
            # get cpu and storage (performance)
            performance = self.sites.get_performance(site_name)
            # get available storage
            available_storage_tb = self.sites.get_available_storage(
                site_name) / 10**3
            if available_storage_tb <= 0:
                available_storage_tb = 0
            else:
                available_storage_tb = 1
            #calculate rank
            try:
                rank = (performance * available_storage_tb) / popularity
            except:
                rank = 0.0
            # store into dict
            site_rankings[site_name] = rank
            # insert into database
            coll = 'site_rankings'
            query = {'name': site_name, 'date': date}
            data = {
                '$set': {
                    'name': site_name,
                    'date': date,
                    'rank': rank,
                    'popularity': popularity
                }
            }
            self.storage.update_data(coll=coll,
                                     query=query,
                                     data=data,
                                     upsert=True)
        return site_rankings

    def get_dataset_popularity(self, q):
        """
        Get the estimated popularity for dataset
        """
        while True:
            # collect features
            data = q.get()
            dataset_name = data[0]
            date = data[1]
            popularity = 0.0
            # get average
            popularity = self.popularity.get_average_popularity(
                dataset_name, date)
            self.dataset_popularity[dataset_name] = popularity
            q.task_done()

    def get_site_popularity(self,
                            site_name,
                            date=datetime_day(datetime.datetime.utcnow())):
        """
        Get popularity for site
        """
        # get all datasets with a replica at the site and how many replicas it has
        coll = 'dataset_data'
        pipeline = list()
        match = {'$match': {'replicas': site_name}}
        pipeline.append(match)
        project = {'$project': {'name': 1, '_id': 0}}
        pipeline.append(project)
        data = self.storage.get_data(coll=coll, pipeline=pipeline)
        popularity = 0.0
        dataset_names = [dataset_data['name'] for dataset_data in data]
        # get the popularity of the dataset and decide by number of replicas
        coll = 'dataset_rankings'
        pipeline = list()
        match = {'$match': {'date': date}}
        pipeline.append(match)
        match = {'$match': {'name': {'$in': dataset_names}}}
        pipeline.append(match)
        group = {
            '$group': {
                '_id': '$date',
                'total_popularity': {
                    '$sum': '$popularity'
                }
            }
        }
        pipeline.append(group)
        project = {'$project': {'total_popularity': 1, '_id': 0}}
        pipeline.append(project)
        data = self.storage.get_data(coll=coll, pipeline=pipeline)
        try:
            popularity = data[0]['total_popularity']
        except:
            popularity = 0.0
        return popularity

    def get_site_storage_rankings(self, subscriptions):
        """
        Return the amount over the soft limit sites are including new subscriptions
        If site is not over just set to 0
        """
        site_rankings = dict()
        available_sites = self.sites.get_available_sites()
        for site_name in available_sites:
            site_rankings[site_name] = self.sites.get_over_soft_limit(
                site_name)
        for subscription in subscriptions:
            site_rankings[subscription[1]] += self.datasets.get_size(
                subscription[0])
        for site_name in available_sites:
            if site_rankings[site_name] < 0:
                del site_rankings[site_name]
        return site_rankings

    def normalize_popularity(self, date):
        """
        Normalize popularity values to be between 1 and max_replicas
        """
        dataset_rankings = dict()
        max_pop = max(self.dataset_popularity.iteritems(),
                      key=operator.itemgetter(1))[1]
        min_pop = min(self.dataset_popularity.iteritems(),
                      key=operator.itemgetter(1))[1]
        n = float(min_pop + (self.max_replicas - 1)) / max_pop
        m = 1 - n * min_pop
        for dataset_name, popularity in self.dataset_popularity.items():
            # store into dict
            rank = int(n * self.dataset_popularity[dataset_name] + m)
            dataset_rankings[dataset_name] = rank
            coll = 'dataset_rankings'
            query = data = {'name': dataset_name, 'date': date}
            data = {
                '$set': {
                    'name': dataset_name,
                    'date': date,
                    'rank': rank,
                    'popularity': popularity
                }
            }
            self.storage.update_data(coll=coll,
                                     query=query,
                                     data=data,
                                     upsert=True)
        return dataset_rankings

示例#2

显示文件

文件： ranker.py 项目： bbarrefors/CUADRnT

class Ranker(object):
    """
    Generic Ranking class
    """
    def __init__(self, config=dict()):
        self.logger = logging.getLogger(__name__)
        self.config = config
        self.sites = SiteManager(self.config)
        self.datasets = DatasetManager(self.config)
        self.popularity = PopularityManager(self.config)
        self.storage = StorageManager(self.config)
        self.max_replicas = int(config['rocker_board']['max_replicas'])
        self.MAX_THREADS = int(config['threading']['max_threads'])
        self.dataset_popularity = dict()

    def get_dataset_rankings(self, date=datetime_day(datetime.datetime.utcnow())):
        """
        Generate dataset rankings
        """
        self.dataset_popularity = dict()
        dataset_names = self.datasets.get_db_datasets()
        q = Queue.Queue()
        for i in range(self.MAX_THREADS):
            worker = threading.Thread(target=self.get_dataset_popularity, args=(q,))
            worker.daemon = True
            worker.start()
        # self.dataset_features = self.popularity.get_features(dataset_names, date)
        # self.dataset_tiers = self.datasets.get_data_tiers(dataset_names)
        for dataset_name in dataset_names:
            q.put((dataset_name, date))
        q.join()
        dataset_rankings = self.normalize_popularity(date)
        return dataset_rankings

    def get_site_rankings(self, date=datetime_day(datetime.datetime.utcnow())):
        """
        Generate site rankings
        """
        # get all sites which can be replicated to
        site_names = self.sites.get_available_sites()
        site_rankings = dict()
        for site_name in site_names:
            # get popularity
            popularity = self.get_site_popularity(site_name, date)
            # get cpu and storage (performance)
            performance = self.sites.get_performance(site_name)
            # get available storage
            available_storage_tb = self.sites.get_available_storage(site_name)/10**3
            if available_storage_tb <= 0:
                available_storage_tb = 0
            else:
                available_storage_tb = 1
            #calculate rank
            try:
                rank = (performance*available_storage_tb)/popularity
            except:
                rank = 0.0
            # store into dict
            site_rankings[site_name] = rank
            # insert into database
            coll = 'site_rankings'
            query = {'name':site_name, 'date':date}
            data = {'$set':{'name':site_name, 'date':date, 'rank':rank, 'popularity':popularity}}
            self.storage.update_data(coll=coll, query=query, data=data, upsert=True)
        return site_rankings

    def get_dataset_popularity(self, q):
        """
        Get the estimated popularity for dataset
        """
        while True:
            # collect features
            data = q.get()
            dataset_name = data[0]
            date = data[1]
            popularity = 0.0
            # get average
            popularity = self.popularity.get_average_popularity(dataset_name, date)
            self.dataset_popularity[dataset_name] = popularity
            q.task_done()

    def get_site_popularity(self, site_name, date=datetime_day(datetime.datetime.utcnow())):
        """
        Get popularity for site
        """
        # get all datasets with a replica at the site and how many replicas it has
        coll = 'dataset_data'
        pipeline = list()
        match = {'$match':{'replicas':site_name}}
        pipeline.append(match)
        project = {'$project':{'name':1, '_id':0}}
        pipeline.append(project)
        data = self.storage.get_data(coll=coll, pipeline=pipeline)
        popularity = 0.0
        dataset_names = [dataset_data['name'] for dataset_data in data]
        # get the popularity of the dataset and decide by number of replicas
        coll = 'dataset_rankings'
        pipeline = list()
        match = {'$match':{'date':date}}
        pipeline.append(match)
        match = {'$match':{'name':{'$in':dataset_names}}}
        pipeline.append(match)
        group = {'$group':{'_id':'$date', 'total_popularity':{'$sum':'$popularity'}}}
        pipeline.append(group)
        project = {'$project':{'total_popularity':1, '_id':0}}
        pipeline.append(project)
        data = self.storage.get_data(coll=coll, pipeline=pipeline)
        try:
            popularity = data[0]['total_popularity']
        except:
            popularity = 0.0
        return popularity

    def get_site_storage_rankings(self, subscriptions):
        """
        Return the amount over the soft limit sites are including new subscriptions
        If site is not over just set to 0
        """
        site_rankings = dict()
        available_sites = self.sites.get_available_sites()
        for site_name in available_sites:
            site_rankings[site_name] = self.sites.get_over_soft_limit(site_name)
        for subscription in subscriptions:
            site_rankings[subscription[1]] += self.datasets.get_size(subscription[0])
        for site_name in available_sites:
            if site_rankings[site_name] < 0:
                del site_rankings[site_name]
        return site_rankings

    def normalize_popularity(self, date):
        """
        Normalize popularity values to be between 1 and max_replicas
        """
        dataset_rankings = dict()
        max_pop = max(self.dataset_popularity.iteritems(), key=operator.itemgetter(1))[1]
        min_pop = min(self.dataset_popularity.iteritems(), key=operator.itemgetter(1))[1]
        n = float(min_pop + (self.max_replicas - 1))/max_pop
        m = 1 - n*min_pop
        for dataset_name, popularity in self.dataset_popularity.items():
            # store into dict
            rank = int(n*self.dataset_popularity[dataset_name] + m)
            dataset_rankings[dataset_name] = rank
            coll = 'dataset_rankings'
            query = data = {'name':dataset_name, 'date':date}
            data = {'$set':{'name':dataset_name, 'date':date, 'rank':rank, 'popularity':popularity}}
            self.storage.update_data(coll=coll, query=query, data=data, upsert=True)
        return dataset_rankings