def clean(self, dataset_rankings, site_rankings): """ Suggest deletions based on dataset and site rankings """ deletions = list() deleted_gb = 0 while site_rankings: tmp_site_rankings = dict() dataset = min(dataset_rankings.iteritems(), key=operator.itemgetter(1)) dataset_name = dataset[0] size_gb = self.datasets.get_size(dataset_name) available_sites = set(self.datasets.get_sites(dataset_name)) for site_name in available_sites: try: tmp_site_rankings[site_name] = site_rankings[site_name] except: continue if not tmp_site_rankings: del dataset_rankings[dataset_name] continue site_name = weighted_choice(tmp_site_rankings) deletion = (dataset_name, site_name) deletions.append(deletion) deleted_gb += size_gb site_rankings[site_name] -= size_gb dataset_rankings[dataset_name] += 1 if site_rankings[site_name] <= 0: del site_rankings[site_name] self.logger.info("Deleted %dGB", deleted_gb) return deletions
def balance(self): """ Balance system by creating new replicas based on popularity """ subscriptions = list() dataset_rankings = self.rankings.dataset_rankings() site_rankings = self.rankings.site_rankings() subscribed_gb = 0 while subscribed_gb < self.max_gb: tmp_site_rankings = site_rankings dataset_name = weighted_choice(dataset_rankings) if (not dataset_name) or (dataset_rankings[dataset_name] < self.min_rank): break size_gb = self.datasets.get_size(dataset_name) unavailable_sites = set(self.datasets.get_sites(dataset_name)) for site_name in tmp_site_rankings.keys(): if (self.sites.get_available_storage(site_name) < size_gb) or (tmp_site_rankings[site_name] <= 0): unavailable_sites.add(site_name) for site_name in unavailable_sites: try: del tmp_site_rankings[site_name] except: continue if not tmp_site_rankings: break site_name = weighted_choice(tmp_site_rankings) subscription = (dataset_name, site_name) subscriptions.append(subscription) subscribed_gb += size_gb avail_storage = self.sites.get_available_storage(site_name) self.logger.info('rank: %s\tsize: %.2f\tdataset: %s', dataset_rankings[dataset_name], size_gb, dataset_name) self.logger.info('rank: %s\tstorage: %d\site: %s', site_rankings[site_name], avail_storage, site_name) new_avail_storage = avail_storage - self.datasets.get_size(dataset_name) if new_avail_storage > 0: new_rank = 0.0 else: new_rank = (site_rankings[site_name]/avail_storage)*new_avail_storage site_rankings[site_name] = new_rank del dataset_rankings[dataset_name] self.logger.info('Subscribed %dGB', subscribed_gb) return subscriptions
def replicate(self, dataset_rankings, site_rankings): """ Balance system by creating new replicas based on popularity """ subscriptions = list() subscribed_gb = 0 sites_available_storage_gb = self.sites.get_all_available_storage() while (subscribed_gb < self.max_gb) and site_rankings: tmp_site_rankings = dict() for k, v in site_rankings.items(): tmp_site_rankings[k] = v dataset = max(dataset_rankings.iteritems(), key=operator.itemgetter(1)) dataset_name = dataset[0] dataset_rank = dataset[1] if (not dataset_name) or (dataset_rank < 1): break size_gb = self.datasets.get_size(dataset_name) unavailable_sites = set(self.datasets.get_sites(dataset_name)) for site_name in tmp_site_rankings.keys(): if (self.sites.get_available_storage(site_name) < size_gb) or (tmp_site_rankings[site_name] <= 0): unavailable_sites.add(site_name) for site_name in unavailable_sites: try: del tmp_site_rankings[site_name] except: continue if not tmp_site_rankings: del dataset_rankings[dataset_name] continue site_name = weighted_choice(tmp_site_rankings) subscription = (dataset_name, site_name) subscriptions.append(subscription) subscribed_gb += size_gb sites_available_storage_gb[site_name] -= size_gb self.logger.info("%s : added", dataset_name) if sites_available_storage_gb[site_name] <= 0: del site_rankings[site_name] dataset_rankings[dataset_name] -= 1 self.logger.info("Subscribed %dGB", subscribed_gb) return subscriptions
def replicate(self, dataset_rankings, site_rankings): """ Balance system by creating new replicas based on popularity """ subscriptions = list() subscribed_gb = 0 sites_available_storage_gb = self.sites.get_all_available_storage() while (subscribed_gb < self.max_gb) and site_rankings: tmp_site_rankings = dict() for k, v in site_rankings.items(): tmp_site_rankings[k] = v dataset = max(dataset_rankings.iteritems(), key=operator.itemgetter(1)) dataset_name = dataset[0] dataset_rank = dataset[1] if (not dataset_name) or (dataset_rank < 1): break size_gb = self.datasets.get_size(dataset_name) unavailable_sites = set(self.datasets.get_sites(dataset_name)) for site_name in tmp_site_rankings.keys(): if (self.sites.get_available_storage(site_name) < size_gb) or (tmp_site_rankings[site_name] <= 0): unavailable_sites.add(site_name) for site_name in unavailable_sites: try: del tmp_site_rankings[site_name] except: continue if not tmp_site_rankings: del dataset_rankings[dataset_name] continue site_name = weighted_choice(tmp_site_rankings) subscription = (dataset_name, site_name) subscriptions.append(subscription) subscribed_gb += size_gb sites_available_storage_gb[site_name] -= size_gb self.logger.info('%s : added', dataset_name) if sites_available_storage_gb[site_name] <= 0: del site_rankings[site_name] dataset_rankings[dataset_name] -= 1 self.logger.info('Subscribed %dGB', subscribed_gb) return subscriptions
def test_weighted_choice(self): "Test weighted_choice function" choices = {'foo': 1.5, 'bar': 5.9} expected = choices.keys() result = weighted_choice(choices) self.assertTrue(result in expected)
def test_weighted_choice(self): "Test weighted_choice function" choices = {'foo':1.5, 'bar':5.9} expected = choices.keys() result = weighted_choice(choices) self.assertTrue(result in expected)