Exemplo n.º 1
0
def prepare():
    if not os.path.exists(CONST.LOG_PATH):
        os.mkdir(CONST.LOG_PATH)
    if not os.path.exists(CONST.LOG_SYSBENCH_PATH):
        os.mkdir(CONST.LOG_SYSBENCH_PATH)

    global opt, task_detail, instance_detail, model_detail

    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size',
                        type=int,
                        default=2,
                        help='Training Batch Size')
    parser.add_argument('--memory',
                        type=str,
                        default='',
                        help='add replay memory')
    parser.add_argument('--task_id',
                        type=int,
                        required=True,
                        help='get task info')
    parser.add_argument('--inst_id',
                        type=int,
                        required=True,
                        help='get inst info')
    parser.add_argument('--model_id',
                        type=int,
                        required=True,
                        help='get model info')
    parser.add_argument('--host',
                        type=str,
                        required=True,
                        help='cluster host for set mysql param')

    opt = parser.parse_args()

    task_id = opt.task_id
    inst_id = opt.inst_id
    model_id = opt.model_id

    init_logger(task_id, False, True)
    CONST.TASK_ID = task_id

    logger.info("start cdbtune")
    logger.info(opt)
    # taskdb = database("127.0.0.1",3306,"root","123456","cdbtune")
    taskdb = database("10.249.50.200", 4839, "cdbtune", "123456", "cdbtune")

    rsp_task = taskdb.fetch_all("select * from tb_task where task_id = %d" %
                                task_id)
    rsp_inst = taskdb.fetch_all(
        "select * from tb_mysql_inst where inst_id = %d" % inst_id)
    rsp_model = taskdb.fetch_all(
        "select * from tb_models where model_id = %d" % model_id)

    if len(rsp_task) == 0 or len(rsp_inst) == 0 or len(rsp_model) == 0:
        os_quit(Err.INPUT_ERROR, "task_id or inst_id or model_id doesn`t exit")

    task_detail = rsp_task[0]
    instance_detail = rsp_inst[0]
    model_detail = rsp_model[0]

    method = model_detail["method"]
    model_path = model_detail["position"]
    num_knobs = model_detail["knobs"]
    num_metrics = model_detail["dimension"]

    env = environment.TencentServer(instance=instance_detail,
                                    task_detail=task_detail,
                                    model_detail=model_detail,
                                    host=opt.host)

    # Build models
    if method == 'ddpg':
        ddpg_opt = dict()
        ddpg_opt['tau'] = 0.001
        ddpg_opt['alr'] = 0.00001
        ddpg_opt['clr'] = 0.00001
        ddpg_opt['model'] = model_path

        gamma = 0.99
        memory_size = 100000
        ddpg_opt['gamma'] = gamma
        ddpg_opt['batch_size'] = opt.batch_size
        ddpg_opt['memory_size'] = memory_size

        model = models.DDPG(n_states=num_metrics,
                            n_actions=num_knobs,
                            opt=ddpg_opt,
                            ouprocess=True)
    else:
        model = models.DQN()
        pass

    if len(opt.memory) > 0:
        model.replay_memory.load_memory(opt.memory)
        logger.info("Load Memory: {}".format(len(model.replay_memory)))

    # Load mean value and varianc

    current_knob = environment.get_init_knobs()

    return env, model
Exemplo n.º 2
0
if not os.path.exists('test_knob'):
    os.mkdir('test_knob')

expr_name = 'eval_{}_{}'.format(opt.method, str(utils.get_timestamp()))

logger = utils.Logger(
    name=opt.method,
    log_file='log/{}.log'.format(expr_name)
)

if opt.other_knob != 0:
    logger.warn('USE Other Knobs')

# Load mean value and varianc

current_knob = environment.get_init_knobs()


def compute_percentage(default, current):
    """ compute metrics percentage versus default settings
    Args:
        default: dict, metrics from default settings
        current: dict, metrics from current settings
    """
    delta_tps = 100*(current[0] - default[0]) / default[0]
    delta_latency = 100*(-current[1] + default[1]) / default[1]
    return delta_tps, delta_latency


def generate_knob(action, method):
    if method == 'ddpg':
            batch_actions = [x[1].tolist() for x in batch_data]

            test_loss += model.train_actor((batch_states, batch_actions), is_train=False)

        print("[Epoch {}] Test Loss: {}".format(epoch, test_loss))
        model.save_actor('sl_model_params/sl_train_actor_{}.pth'.format(epoch))

else:
    # Create Environment
    if opt.tencent:
        env = environment.TencentServer(wk_type=opt.workload, instance_name=opt.instance,
                                        request_url=tuner_configs.TENCENT_URL)
    else:
        env = environment.DockerServer(wk_type=opt.workload, instance_name=opt.instance)

    current_knob = environment.get_init_knobs()

    expr_name = 'sl_test_ddpg_{}'.format(str(utils.get_timestamp()))

    logger = utils.Logger(
        name='train_supervised',
        log_file='log/{}.log'.format(expr_name)
    )

    assert len(opt.params) != 0, "Please add params' path"

    def generate_knob(action):
        return environment.gen_continuous(action)

    model.load_actor(opt.params)