def analyze(self): self.pre_sanity_check_args() c = Crush(backward_compatibility=self.args.backward_compatibility) c.parse(self.main.convert_to_crushmap(self.args.crushmap)) self.post_sanity_check_args() (take, failure_domain) = c.rule_get_take_failure_domain(self.args.rule) d = self.run_simulation(c, take, failure_domain) worst = self.analyze_failures(c, take, failure_domain) return (d, worst, failure_domain)
def run_optimize(self, p, rule_name, crushmap, with_positions=True): pd.set_option('display.max_rows', None) pd.set_option('display.width', 160) p.extend(['--rule', rule_name]) a = Ceph().constructor([ 'analyze', ] + p) c = Crush(backward_compatibility=True) c.parse(crushmap) (take, failure_domain) = c.rule_get_take_failure_domain(rule_name) crushmap = c.get_crushmap() crushmap['choose_args'] = { "optimize": [], } d = a.run_simulation(c, take, failure_domain) if d['~overweight~'].any(): raise ValueError( 'no way to optimize when there is an overweight item') print(str(d)) print(a._format_report(d, 'device')) print(a._format_report(d, failure_domain)) print(a.analyze_failures(c, take, failure_domain)) p.extend(['--choose-args', 'optimize']) pool = Pool() children = [c.find_bucket(take)] while len(children) > 0: a = [(p, crushmap, item, with_positions) for item in children] r = pool.map(o, a) # r = map(o, a) choose_args = filter(None, r) crushmap['choose_args']['optimize'].extend(choose_args) nc = [] for item in children: nc.extend(item.get('children', [])) # fail if all children are not of the same type children = nc pprint.pprint(crushmap) c.parse(crushmap) a = Ceph().constructor([ 'analyze', ] + p) d = a.run_simulation(c, take, failure_domain) print(a._format_report(d, 'device')) print(a._format_report(d, failure_domain)) print(a.analyze_failures(c, take, failure_domain))
def optimize(self, crushmap): c = Crush(backward_compatibility=self.args.backward_compatibility) c.parse(crushmap) crushmap = c.get_crushmap() if 'choose_args' not in crushmap: crushmap['choose_args'] = {} c.parse(crushmap) if self.args.choose_args not in crushmap['choose_args']: crushmap['choose_args'][self.args.choose_args] = [] c.parse(crushmap) (take, failure_domain) = c.rule_get_take_failure_domain(self.args.rule) parser = analyze.Analyze.get_parser() self.main.hook_analyze_args(parser) p = self.main.get_trimmed_argv(parser, self.args) a = self.main.clone().constructor(['analyze'] + p) if self.args.multithread: from multiprocessing import Pool pool = Pool() children = [c.find_bucket(take)] total_count = 0 over_step = False n = self.main.value_name() while not over_step and len(children) > 0: a = [(self, p, c.get_crushmap(), item) for item in children] if self.args.multithread: r = list(pool.map(top_optimize, a)) else: r = list(map(top_optimize, a)) for i in range(len(children)): if r[i] is None: continue (count, choose_arg) = r[i] total_count += count c.update_choose_args(self.args.choose_args, [choose_arg]) log.info(children[i]['name'] + " weights updated with " + str(choose_arg)) if self.args.step and count > 0: log.warning(children[i]['name'] + " will swap " + str(count) + " " + n) over_step = self.args.step and total_count > self.args.step if over_step: break nc = [] for item in children: nc.extend(item.get('children', [])) # fail if all children are not of the same type children = nc return (total_count, c.get_crushmap())
def analyze_crushmap(self, crushmap): c = Crush(backward_compatibility=self.args.backward_compatibility) c.parse(crushmap) (take, failure_domain) = c.rule_get_take_failure_domain(self.args.rule) return self.run_simulation(c, take, failure_domain)
def optimize_replica(self, p, origin_crushmap, crushmap, bucket, replication_count, choose_arg_position): a = self.main.clone().constructor(['analyze'] + p) a.args.replication_count = replication_count parser = compare.Compare.get_parser() self.main.hook_compare_args(parser) cp = self.main.get_trimmed_argv(parser, self.args) compare_instance = self.main.clone().constructor(['compare'] + cp) compare_instance.args.replication_count = replication_count compare_instance.set_origin_crushmap(origin_crushmap) choose_arg = self.get_choose_arg(crushmap, bucket) self.set_choose_arg_position(choose_arg, bucket, choose_arg_position) id2weight = collections.OrderedDict() for pos in range(len(bucket['children'])): v = choose_arg['weight_set'][choose_arg_position][pos] id2weight[bucket['children'][pos]['id']] = v log.info(bucket['name'] + " optimizing replica " + str(replication_count) + " " + str(list(id2weight.values()))) log.debug(bucket['name'] + " optimizing replica " + str(replication_count) + " " + str(dict(id2weight))) c = Crush(backward_compatibility=self.args.backward_compatibility) c.parse(crushmap) (take, failure_domain) = c.rule_get_take_failure_domain(a.args.rule) # # initial simulation # i = a.run_simulation(c, take, failure_domain) i = i.reset_index() s = i['~name~'] == 'KKKK' # init to False, there must be a better way for item in bucket['children']: s |= (i['~name~'] == item['name']) previous_delta = None improve_tolerance = 10 no_improvement = 0 max_iterations = 1000 from_to_count = 0 best_weights = list(id2weight.values()) n = self.main.value_name() for iterations in range(max_iterations): choose_arg['weight_set'][choose_arg_position] = list(id2weight.values()) c.parse(crushmap) z = a.run_simulation(c, take, failure_domain) z = z.reset_index() d = z[s].copy() d['~delta~'] = d['~' + n + '~'] - d['~expected~'] d['~delta%~'] = d['~delta~'] / d['~expected~'] delta = d['~delta~'].abs().sum() if previous_delta is not None: if previous_delta < delta: no_improvement += 1 else: previous_delta = delta best_weights = list(id2weight.values()) no_improvement = 0 if no_improvement >= improve_tolerance: log.info("stop because " + str(no_improvement) + " tries") break else: best_weights = list(id2weight.values()) previous_delta = delta if delta == 0: log.info("stop because the distribution is perfect") break log.info(bucket['name'] + " delta " + str(delta)) if self.args.step and no_improvement == 0: compare_instance.set_destination(c) (from_to, in_out) = compare_instance.compare_bucket(bucket) from_to_count = sum(map(lambda x: sum(x.values()), from_to.values())) in_out_count = sum(map(lambda x: sum(x.values()), in_out.values())) log.debug("moved from_to " + str(from_to_count) + " in_out " + str(in_out_count)) if from_to_count > self.args.step: log.info("stopped because moved " + str(from_to_count) + " --step " + str(self.args.step)) break d = d.sort_values('~delta~', ascending=False) if d.iloc[0]['~delta~'] <= 0 or d.iloc[-1]['~delta~'] >= 0: log.info("stop because [" + str(d.iloc[0]['~delta~']) + "," + str(d.iloc[-1]['~delta~']) + "]") break # there should not be a need to keep the sum of the weights to the same value, they # are only used locally for placement and have no impact on the upper weights # nor are they derived from the weights from below *HOWEVER* in case of a failure # the weights need to be as close as possible from the target weight to limit # the negative impact shift = int(id2weight[d.iloc[0]['~id~']] * min(0.01, abs(d.iloc[0]['~delta%~']))) if shift <= 0: log.info("stop because shift is zero") break log.debug("shift from " + str(d.iloc[0]['~id~']) + " to " + str(d.iloc[-1]['~id~'])) id2weight[d.iloc[0]['~id~']] -= shift id2weight[d.iloc[-1]['~id~']] += shift choose_arg['weight_set'][choose_arg_position] = best_weights c.parse(crushmap) compare_instance.set_destination(c) (from_to, in_out) = compare_instance.compare_bucket(bucket) from_to_count = sum(map(lambda x: sum(x.values()), from_to.values())) if iterations >= max_iterations - 1: log.info("stopped after " + str(iterations)) log.info(bucket['name'] + " replica " + str(replication_count) + " optimized") log.info(bucket['name'] + " weights " + str(choose_arg['weight_set'][choose_arg_position])) return from_to_count
def optimize_replica(a, crushmap, bucket, replication_count, choose_arg, choose_arg_position): a.args.replication_count = replication_count id2weight = collections.OrderedDict([(i['id'], i['weight']) for i in bucket['children']]) c = Crush(backward_compatibility=True) c.parse(crushmap) (take, failure_domain) = c.rule_get_take_failure_domain(a.args.rule) pd.set_option('precision', 2) c.parse(crushmap) # # initial simulation # i = a.run_simulation(c, take, failure_domain) #print(str(i)) # select desired items i = i.reset_index() s = i['~name~'] == 'KKKK' # init to False, there must be a better way for item in bucket['children']: s |= (i['~name~'] == item['name']) i['~delta~'] = i.loc[s, '~objects~'] - i.loc[s, '~expected~'] i.loc[s, '~delta%~'] = (i.loc[s, '~objects~'] - i.loc[s, '~expected~']) / i.loc[s, '~expected~'] * 100 i = i.sort_values('~delta~', ascending=False) i = i[pd.notnull(i['~delta~'])] previous_kl = None improve_tolerance = 10 no_improvement = 0 max_iterations = 1000 for iterations in range(max_iterations): # print(bucket['name'] + " weights " + str(id2weight.values())) choose_arg['weight_set'][choose_arg_position] = id2weight.values() # a.args.verbose = 1 c.parse(crushmap) z = a.run_simulation(c, take, failure_domain) z = z.reset_index() d = z[s].copy() d['~delta~'] = d['~objects~'] - d['~expected~'] d['~delta%~'] = d['~delta~'] / d['~expected~'] kl = d['~delta~'].abs().sum() # kl = entropy(d.loc[s, '~expected~'], d.loc[s, '~objects~']) # stop when kl is small enough or when it increases meaning # what we're doing is no longer reducing kl if previous_kl is not None: if previous_kl < kl: no_improvement += 1 else: previous_kl = kl best_weights = id2weight.values() no_improvement = 0 if no_improvement >= improve_tolerance: choose_arg['weight_set'][choose_arg_position] = best_weights break else: best_weights = id2weight.values() previous_kl = kl print(bucket['name'] + " kl " + str(kl) + " no_improvement " + str(no_improvement)) # if kl < 1e-6: # break d = d.sort_values('~delta~', ascending=False) # print(str(d)) if d.iloc[0]['~delta~'] <= 0 or d.iloc[-1]['~delta~'] >= 0: break # there should not be a need to keep the sum of the weights to the same value, they # are only used locally for placement and have no impact on the upper weights # nor are they derived from the weights from below *HOWEVER* in case of a failure # the weights need to be as close as possible from the target weight to limit # the negative impact shift = id2weight[d.iloc[0]['~id~']] * min(0.01, d.iloc[0]['~delta%~']) if id2weight[d.iloc[-1]['~id~']] < shift: break id2weight[d.iloc[0]['~id~']] -= shift id2weight[d.iloc[-1]['~id~']] += shift if iterations >= max_iterations - 1: print("!!!!!!!! stoped after " + str(iterations)) print("Done " + str(no_improvement))