else:
    saved_path = os.path.join(stats_path, start_time+'-'+os.path.basename(__file__).split('.')[0])
    if not os.path.exists(saved_path):
        os.mkdir(saved_path)

config.saved_path = saved_path

prepare_dirs_loggers(config)
logger = logging.getLogger()
logger.info('[START]\n{}\n{}'.format(start_time, '='*30))

# save configuration
with open(os.path.join(saved_path, 'config.json'), 'w') as f:
    json.dump(config, f, indent=4) # sort_keys=True

corpus = DealCorpus(config)
train_dial, val_dial, test_dial = corpus.get_corpus()

train_data = DealDataLoaders('Train', train_dial, config)
val_data = DealDataLoaders('Val', val_dial, config)
test_data = DealDataLoaders('Test', test_dial, config)

evaluator = BleuEvaluator('Deal')

model = GaussHRED(corpus, config)

if config.use_gpu:
    model.cuda()

best_epoch = None
if not config.forward_only:
示例#2
0
def main():
    start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                               time.localtime(time.time()))
    print('[START]', start_time, '=' * 30)

    # RL configuration
    folder = '2019-06-20-10-24-23-sl_gauss'
    epoch_id = '28'

    env = 'gpu'
    sim_epoch_id = '23'
    simulator_folder = '2019-06-20-09-19-39-sl_word'
    exp_dir = os.path.join('config_log_model', folder, 'rl-' + start_time)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)

    rl_config = Pack(
        train_path='../data/negotiate/train.txt',
        val_path='../data/negotiate/val.txt',
        test_path='../data/negotiate/test.txt',
        selfplay_path='../data/negotiate/selfplay.txt',
        selfplay_eval_path='../data/negotiate/selfplay_eval.txt',
        sim_config_path=os.path.join('config_log_model', simulator_folder,
                                     'config.json'),
        sim_model_path=os.path.join('config_log_model', simulator_folder,
                                    '{}-model'.format(sim_epoch_id)),
        sv_config_path=os.path.join('config_log_model', folder, 'config.json'),
        sv_model_path=os.path.join('config_log_model', folder,
                                   '{}-model'.format(epoch_id)),
        rl_config_path=os.path.join(exp_dir, 'rl_config.json'),
        rl_model_path=os.path.join(exp_dir, 'rl_model'),
        ppl_best_model_path=os.path.join(exp_dir, 'ppl_best_model'),
        reward_best_model_path=os.path.join(exp_dir, 'reward_best_model'),
        judger_model_path=os.path.join('../FB', 'sv_model.th'),
        judger_config_path=os.path.join('../FB', 'judger_config.json'),
        record_path=exp_dir,
        record_freq=50,
        use_gpu=env == 'gpu',
        nepoch=4,
        nepisode=0,
        sv_train_freq=
        0,  # TODO pay attention to main.py, cuz it is also controlled there
        eval_freq=0,
        max_words=100,
        rl_lr=0.2,
        momentum=0.1,
        nesterov=True,
        gamma=0.95,
        rl_clip=1.0,
        ref_text='../data/negotiate/train.txt',
        domain='object_division',
        max_nego_turn=50,
        random_seed=0,
        use_latent_rl=True)

    # save configuration
    with open(rl_config.rl_config_path, 'w') as f:
        json.dump(rl_config, f, indent=4)

    # set random seed
    set_seed(rl_config.random_seed)

    # load previous supervised learning configuration and corpus
    sv_config = Pack(json.load(open(rl_config.sv_config_path)))
    sim_config = Pack(json.load(open(rl_config.sim_config_path)))

    # TODO revise the use_gpu in the config
    sv_config['use_gpu'] = rl_config.use_gpu
    sim_config['use_gpu'] = rl_config.use_gpu
    corpus = DealCorpus(sv_config)

    # load models for two agents
    # TARGET AGENT
    sys_model = models_deal.GaussHRED(corpus, sv_config)
    if sv_config.use_gpu:  # TODO gpu -> cpu transfer
        sys_model.cuda()
    sys_model.load_state_dict(
        th.load(rl_config.sv_model_path,
                map_location=lambda storage, location: storage))
    # we don't want to use Dropout during RL
    sys_model.eval()
    sys = LatentRlAgent(sys_model,
                        corpus,
                        rl_config,
                        name='System',
                        use_latent_rl=rl_config.use_latent_rl)

    # SIMULATOR we keep usr frozen, i.e. we don't update its parameters
    usr_model = models_deal.HRED(corpus, sim_config)
    if sim_config.use_gpu:  # TODO gpu -> cpu transfer
        usr_model.cuda()
    usr_model.load_state_dict(
        th.load(rl_config.sim_model_path,
                map_location=lambda storage, location: storage))
    usr_model.eval()
    usr_type = LstmAgent
    usr = usr_type(usr_model, corpus, rl_config, name='User')

    # load FB judger model
    # load FB judger model
    judger_config = Pack(json.load(open(rl_config.judger_config_path)))
    judger_config['cuda'] = rl_config.use_gpu
    judger_config['data'] = '../data/negotiate'
    judger_device_id = FB_use_cuda(judger_config.cuda)
    judger_word_corpus = FbWordCorpus(judger_config.data,
                                      freq_cutoff=judger_config.unk_threshold,
                                      verbose=True)
    judger_model = FbDialogModel(judger_word_corpus.word_dict,
                                 judger_word_corpus.item_dict,
                                 judger_word_corpus.context_dict,
                                 judger_word_corpus.output_length,
                                 judger_config, judger_device_id)
    if judger_device_id is not None:
        judger_model.cuda(judger_device_id)
    judger_model.load_state_dict(
        th.load(rl_config.judger_model_path,
                map_location=lambda storage, location: storage))
    judger_model.eval()
    judger = Judger(judger_model, judger_device_id)

    # initialize communication dialogue between two agents
    dialog = Dialog([sys, usr], judger, rl_config)
    ctx_gen = ContextGenerator(rl_config.selfplay_path)

    # simulation module
    dialog_eval = DialogEval([sys, usr], judger, rl_config)
    ctx_gen_eval = ContextGeneratorEval(rl_config.selfplay_eval_path)

    # start RL
    reinforce = Reinforce(dialog, ctx_gen, corpus, sv_config, sys_model,
                          usr_model, rl_config, dialog_eval, ctx_gen_eval)
    reinforce.run()

    # save sys model
    th.save(sys_model.state_dict(), rl_config.rl_model_path)

    end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
    print('[END]', end_time, '=' * 30)