Пример #1
0
def main():
    start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                               time.localtime(time.time()))
    print('[START]', start_time, '=' * 30)
    # RL configuration
    env = 'gpu'
    pretrained_folder = '2019-06-20-22-49-55-sl_cat'
    pretrained_model_id = 41

    exp_dir = os.path.join('sys_config_log_model', pretrained_folder,
                           'rl-' + start_time)
    # create exp folder
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)

    rl_config = Pack(
        train_path='../data/norm-multi-woz/train_dials.json',
        valid_path='../data/norm-multi-woz/val_dials.json',
        test_path='../data/norm-multi-woz/test_dials.json',
        sv_config_path=os.path.join('sys_config_log_model', pretrained_folder,
                                    'config.json'),
        sv_model_path=os.path.join('sys_config_log_model', pretrained_folder,
                                   '{}-model'.format(pretrained_model_id)),
        rl_config_path=os.path.join(exp_dir, 'rl_config.json'),
        rl_model_path=os.path.join(exp_dir, 'rl_model'),
        ppl_best_model_path=os.path.join(exp_dir, 'ppl_best.model'),
        reward_best_model_path=os.path.join(exp_dir, 'reward_best.model'),
        record_path=exp_dir,
        record_freq=200,
        sv_train_freq=
        0,  # TODO pay attention to main.py, cuz it is also controlled there
        use_gpu=env == 'gpu',
        nepoch=10,
        nepisode=0,
        tune_pi_only=False,
        max_words=100,
        temperature=1.0,
        episode_repeat=1.0,
        rl_lr=0.01,
        momentum=0.0,
        nesterov=False,
        gamma=0.99,
        rl_clip=5.0,
        random_seed=100,
    )

    # save configuration
    with open(rl_config.rl_config_path, 'w') as f:
        json.dump(rl_config, f, indent=4)

    # set random seed
    set_seed(rl_config.random_seed)

    # load previous supervised learning configuration and corpus
    sv_config = Pack(json.load(open(rl_config.sv_config_path)))
    sv_config['dropout'] = 0.0
    sv_config['use_gpu'] = rl_config.use_gpu
    corpus = NormMultiWozCorpus(sv_config)

    # TARGET AGENT
    sys_model = SysPerfectBD2Cat(corpus, sv_config)
    if sv_config.use_gpu:
        sys_model.cuda()
    sys_model.load_state_dict(
        th.load(rl_config.sv_model_path,
                map_location=lambda storage, location: storage))
    sys_model.eval()
    sys = OfflineLatentRlAgent(sys_model,
                               corpus,
                               rl_config,
                               name='System',
                               tune_pi_only=rl_config.tune_pi_only)

    # start RL
    reinforce = OfflineTaskReinforce(sys, corpus, sv_config, sys_model,
                                     rl_config, task_generate)
    reinforce.run()

    end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
    print('[END]', end_time, '=' * 30)
Пример #2
0
    improve_threshold = 0.996,
    patient_increase = 2.0,
    save_model = True,
    early_stop = False,
    gen_type = 'greedy',
    preview_batch_num = 1,
    max_dec_len = 40,
    k = domain_info.input_length(),
    goal_embed_size = 64,
    goal_nhid = 64,
    init_range = 0.1,
    pretrain_folder = '2018-11-19-21-28-29-sl_latent',
    forward_only = False
)

set_seed(10)

if config.forward_only:
    saved_path = os.path.join(stats_path, config.pretrain_folder)
    config = Pack(json.load(open(os.path.join(saved_path, 'config.json'))))
    config['forward_only'] = True
else:
    saved_path = os.path.join(stats_path, start_time+'-'+os.path.basename(__file__).split('.')[0])
    if not os.path.exists(saved_path):
        os.mkdir(saved_path)

config.saved_path = saved_path

prepare_dirs_loggers(config)
logger = logging.getLogger()
logger.info('[START]\n{}\n{}'.format(start_time, '='*30))
Пример #3
0
def main():
    start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                               time.localtime(time.time()))
    print('[START]', start_time, '=' * 30)

    # RL configuration
    folder = '2019-06-20-10-24-23-sl_gauss'
    epoch_id = '28'

    env = 'gpu'
    sim_epoch_id = '23'
    simulator_folder = '2019-06-20-09-19-39-sl_word'
    exp_dir = os.path.join('config_log_model', folder, 'rl-' + start_time)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)

    rl_config = Pack(
        train_path='../data/negotiate/train.txt',
        val_path='../data/negotiate/val.txt',
        test_path='../data/negotiate/test.txt',
        selfplay_path='../data/negotiate/selfplay.txt',
        selfplay_eval_path='../data/negotiate/selfplay_eval.txt',
        sim_config_path=os.path.join('config_log_model', simulator_folder,
                                     'config.json'),
        sim_model_path=os.path.join('config_log_model', simulator_folder,
                                    '{}-model'.format(sim_epoch_id)),
        sv_config_path=os.path.join('config_log_model', folder, 'config.json'),
        sv_model_path=os.path.join('config_log_model', folder,
                                   '{}-model'.format(epoch_id)),
        rl_config_path=os.path.join(exp_dir, 'rl_config.json'),
        rl_model_path=os.path.join(exp_dir, 'rl_model'),
        ppl_best_model_path=os.path.join(exp_dir, 'ppl_best_model'),
        reward_best_model_path=os.path.join(exp_dir, 'reward_best_model'),
        judger_model_path=os.path.join('../FB', 'sv_model.th'),
        judger_config_path=os.path.join('../FB', 'judger_config.json'),
        record_path=exp_dir,
        record_freq=50,
        use_gpu=env == 'gpu',
        nepoch=4,
        nepisode=0,
        sv_train_freq=
        0,  # TODO pay attention to main.py, cuz it is also controlled there
        eval_freq=0,
        max_words=100,
        rl_lr=0.2,
        momentum=0.1,
        nesterov=True,
        gamma=0.95,
        rl_clip=1.0,
        ref_text='../data/negotiate/train.txt',
        domain='object_division',
        max_nego_turn=50,
        random_seed=0,
        use_latent_rl=True)

    # save configuration
    with open(rl_config.rl_config_path, 'w') as f:
        json.dump(rl_config, f, indent=4)

    # set random seed
    set_seed(rl_config.random_seed)

    # load previous supervised learning configuration and corpus
    sv_config = Pack(json.load(open(rl_config.sv_config_path)))
    sim_config = Pack(json.load(open(rl_config.sim_config_path)))

    # TODO revise the use_gpu in the config
    sv_config['use_gpu'] = rl_config.use_gpu
    sim_config['use_gpu'] = rl_config.use_gpu
    corpus = DealCorpus(sv_config)

    # load models for two agents
    # TARGET AGENT
    sys_model = models_deal.GaussHRED(corpus, sv_config)
    if sv_config.use_gpu:  # TODO gpu -> cpu transfer
        sys_model.cuda()
    sys_model.load_state_dict(
        th.load(rl_config.sv_model_path,
                map_location=lambda storage, location: storage))
    # we don't want to use Dropout during RL
    sys_model.eval()
    sys = LatentRlAgent(sys_model,
                        corpus,
                        rl_config,
                        name='System',
                        use_latent_rl=rl_config.use_latent_rl)

    # SIMULATOR we keep usr frozen, i.e. we don't update its parameters
    usr_model = models_deal.HRED(corpus, sim_config)
    if sim_config.use_gpu:  # TODO gpu -> cpu transfer
        usr_model.cuda()
    usr_model.load_state_dict(
        th.load(rl_config.sim_model_path,
                map_location=lambda storage, location: storage))
    usr_model.eval()
    usr_type = LstmAgent
    usr = usr_type(usr_model, corpus, rl_config, name='User')

    # load FB judger model
    # load FB judger model
    judger_config = Pack(json.load(open(rl_config.judger_config_path)))
    judger_config['cuda'] = rl_config.use_gpu
    judger_config['data'] = '../data/negotiate'
    judger_device_id = FB_use_cuda(judger_config.cuda)
    judger_word_corpus = FbWordCorpus(judger_config.data,
                                      freq_cutoff=judger_config.unk_threshold,
                                      verbose=True)
    judger_model = FbDialogModel(judger_word_corpus.word_dict,
                                 judger_word_corpus.item_dict,
                                 judger_word_corpus.context_dict,
                                 judger_word_corpus.output_length,
                                 judger_config, judger_device_id)
    if judger_device_id is not None:
        judger_model.cuda(judger_device_id)
    judger_model.load_state_dict(
        th.load(rl_config.judger_model_path,
                map_location=lambda storage, location: storage))
    judger_model.eval()
    judger = Judger(judger_model, judger_device_id)

    # initialize communication dialogue between two agents
    dialog = Dialog([sys, usr], judger, rl_config)
    ctx_gen = ContextGenerator(rl_config.selfplay_path)

    # simulation module
    dialog_eval = DialogEval([sys, usr], judger, rl_config)
    ctx_gen_eval = ContextGeneratorEval(rl_config.selfplay_eval_path)

    # start RL
    reinforce = Reinforce(dialog, ctx_gen, corpus, sv_config, sys_model,
                          usr_model, rl_config, dialog_eval, ctx_gen_eval)
    reinforce.run()

    # save sys model
    th.save(sys_model.state_dict(), rl_config.rl_model_path)

    end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
    print('[END]', end_time, '=' * 30)
Пример #4
0
    fix_batch=True,
    fix_train_batch=False,
    avg_type='word',
    print_step=300,
    ckpt_step=1416,
    improve_threshold=0.996,
    patient_increase=2.0,
    save_model=True,
    early_stop=False,
    gen_type='greedy',
    preview_batch_num=None,
    k=domain_info.input_length(),
    init_range=0.1,
    pretrain_folder='2019-06-20-21-43-06-sl_cat',
    forward_only=False)
set_seed(config.seed)
start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
stats_path = 'sys_config_log_model'
if config.forward_only:
    saved_path = os.path.join(stats_path, config.pretrain_folder)
    config = Pack(json.load(open(os.path.join(saved_path, 'config.json'))))
    config['forward_only'] = True
else:
    saved_path = os.path.join(
        stats_path,
        start_time + '-' + os.path.basename(__file__).split('.')[0])
    if not os.path.exists(saved_path):
        os.makedirs(saved_path)
config.saved_path = saved_path

prepare_dirs_loggers(config)
Пример #5
0
    #forward_only = True,
    # different batching style
    seq=True,
    # use oracle context and proposal parse
    oracle_context=True,
    #oracle_context = False,
    #oracle_parse = False,
    oracle_parse=True,
    semisupervised=False,
    #prop_weight = 0.1,
    prop_weight=1,
    #prop_weight = 0,
    tie_prop_utt_enc=False,
)

set_seed(config.random_seed)

if config.forward_only:
    saved_path = os.path.join(stats_path, config.pretrain_folder)
    config = Pack(json.load(open(os.path.join(saved_path, 'config.json'))))
    config['forward_only'] = True
else:
    saved_path = os.path.join(
        stats_path,
        start_time + '-' + os.path.basename(__file__).split('.')[0])
    if not os.path.exists(saved_path):
        os.mkdir(saved_path)

config.saved_path = saved_path

prepare_dirs_loggers(config)