def ddpg(env, config, n_loops=100): results = [] x_axis = [] num_collections = config['num_collections'] gamma = config['gamma'] a_lr = config['a_lr'] c_lr = config['c_lr'] n_epochs = config['n_epochs'] ahs = config['a_hidden_sizes'] chs = config['c_hidden_sizes'] model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, clr=c_lr, alr=a_lr, shift=0, a_hidden_sizes=ahs, c_hidden_sizes=chs) knob_data = np.random.rand(env.knob_dim) prev_metric_data = np.zeros(env.metric_dim) for i in range(num_collections): action = np.random.rand(env.knob_dim) reward, metric_data = env.simulate(action) if i > 0: model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data) prev_metric_data = metric_data prev_knob_data = knob_data prev_reward = reward for i in range(n_loops): reward, metric_data = env.simulate(knob_data) model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, prev_metric_data) prev_metric_data = metric_data prev_knob_data = knob_data prev_reward = reward for _ in range(n_epochs): model_ddpg.update() results.append(reward) x_axis.append(i + 1) LOG.info('loop: %d reward: %f', i, reward[0]) knob_data = model_ddpg.choose_action(metric_data) return np.array(results), np.array(x_axis)
def train_ddpg(result_id): LOG.info('Add training data to ddpg and train ddpg') result = Result.objects.get(pk=result_id) session = Result.objects.get(pk=result_id).session session_results = Result.objects.filter( session=session, creation_time__lt=result.creation_time) result_info = {} result_info['newest_result_id'] = result_id if len(session_results) == 0: LOG.info('No previous result. Abort.') return result_info # Extract data from result result = Result.objects.filter(pk=result_id) base_result_id = session_results[0].pk base_result = Result.objects.filter(pk=base_result_id) agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() base_metric_data = ( DataUtil.aggregate_data(base_result))['y_matrix'].flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] # Clean knob data cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_data = np.array(cleaned_knob_data[0]) knob_labels = np.array(cleaned_knob_data[1]) knob_bounds = np.vstack( DataUtil.get_knob_bounds(knob_labels.flatten(), session)) knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0] knob_num = len(knob_data) metric_num = len(metric_data) LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num) # Filter ys by current target objective metric result = Result.objects.get(pk=result_id) target_objective = result.session.target_objective target_obj_idx = [ i for i, n in enumerate(agg_data['y_columnlabels']) if n == target_objective ] if len(target_obj_idx) == 0: raise Exception(('Could not find target objective in metrics ' '(target_obj={})').format(target_objective)) elif len(target_obj_idx) > 1: raise Exception( ('Found {} instances of target objective in ' 'metrics (target_obj={})').format(len(target_obj_idx), target_objective)) objective = metric_data[target_obj_idx] base_objective = base_metric_data[target_obj_idx] metric_meta = db.target_objectives.get_metric_metadata( result.session.dbms.pk, result.session.target_objective) # Calculate the reward objective = objective / base_objective if metric_meta[target_objective].improvement == '(less is better)': reward = -objective else: reward = objective LOG.info('reward: %f', reward) # Update ddpg ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, clr=CRITIC_LEARNING_RATE, gamma=0, batch_size=DDPG_BATCH_SIZE) if session.ddpg_actor_model and session.ddpg_critic_model: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory: ddpg.replay_memory.set(session.ddpg_reply_memory) ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data) for _ in range(25): ddpg.update() session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() session.ddpg_reply_memory = ddpg.replay_memory.get() session.save() return result_info
def train_ddpg(result_id): LOG.info('Add training data to ddpg and train ddpg') result = Result.objects.get(pk=result_id) session = Result.objects.get(pk=result_id).session params = JSONUtil.loads(session.hyperparameters) session_results = Result.objects.filter( session=session, creation_time__lt=result.creation_time) result_info = {} result_info['newest_result_id'] = result_id # Extract data from result and previous results result = Result.objects.filter(pk=result_id) if len(session_results) == 0: base_result_id = result_id prev_result_id = result_id else: base_result_id = session_results[0].pk prev_result_id = session_results[len(session_results) - 1].pk base_result = Result.objects.filter(pk=base_result_id) prev_result = Result.objects.filter(pk=prev_result_id) agg_data = DataUtil.aggregate_data(result) base_metric_data = ( DataUtil.aggregate_data(base_result))['y_matrix'].flatten() prev_metric_data = ( DataUtil.aggregate_data(prev_result))['y_matrix'].flatten() result = Result.objects.get(pk=result_id) target_objective = result.session.target_objective prev_obj_idx = [ i for i, n in enumerate(agg_data['y_columnlabels']) if n == target_objective ] # Clean metric data metric_data, metric_labels = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session) metric_data = metric_data.flatten() metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) normalized_metric_data = metric_scalar.transform(metric_data.reshape( 1, -1))[0] # Clean knob data cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) knob_data = np.array(cleaned_knob_data[0]) knob_labels = np.array(cleaned_knob_data[1]) knob_bounds = np.vstack( DataUtil.get_knob_bounds(knob_labels.flatten(), session)) knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0] knob_num = len(knob_data) metric_num = len(metric_data) LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num) # Filter ys by current target objective metric target_obj_idx = [ i for i, n in enumerate(metric_labels) if n == target_objective ] if len(target_obj_idx) == 0: raise Exception(('Could not find target objective in metrics ' '(target_obj={})').format(target_objective)) elif len(target_obj_idx) > 1: raise Exception( ('Found {} instances of target objective in ' 'metrics (target_obj={})').format(len(target_obj_idx), target_objective)) objective = metric_data[target_obj_idx] base_objective = base_metric_data[prev_obj_idx] prev_objective = prev_metric_data[prev_obj_idx] metric_meta = db.target_objectives.get_metric_metadata( result.session.dbms.pk, result.session.target_objective) # Calculate the reward if params['DDPG_SIMPLE_REWARD']: objective = objective / base_objective if metric_meta[target_objective].improvement == '(less is better)': reward = -objective else: reward = objective else: if metric_meta[target_objective].improvement == '(less is better)': if objective - base_objective <= 0: # positive reward reward = (np.square((2 * base_objective - objective) / base_objective) - 1)\ * abs(2 * prev_objective - objective) / prev_objective else: # negative reward reward = -(np.square(objective / base_objective) - 1) * objective / prev_objective else: if objective - base_objective > 0: # positive reward reward = (np.square(objective / base_objective) - 1) * objective / prev_objective else: # negative reward reward = -(np.square((2 * base_objective - objective) / base_objective) - 1)\ * abs(2 * prev_objective - objective) / prev_objective LOG.info('reward: %f', reward) # Update ddpg ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=params['DDPG_ACTOR_LEARNING_RATE'], clr=params['DDPG_CRITIC_LEARNING_RATE'], gamma=params['DDPG_GAMMA'], batch_size=params['DDPG_BATCH_SIZE'], a_hidden_sizes=params['DDPG_ACTOR_HIDDEN_SIZES'], c_hidden_sizes=params['DDPG_CRITIC_HIDDEN_SIZES'], use_default=params['DDPG_USE_DEFAULT']) if session.ddpg_actor_model and session.ddpg_critic_model: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory: ddpg.replay_memory.set(session.ddpg_reply_memory) ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data) for _ in range(params['DDPG_UPDATE_EPOCHS']): ddpg.update() session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() session.ddpg_reply_memory = ddpg.replay_memory.get() session.save() return result_info