"WARNING: You have a CUDA device, so you should probably run with --deviceId [1|2|3]"
        )
    else:
        torch.cuda.manual_seed(opt.random_seed)
np.random.seed(opt.random_seed)

dataroot = opt.dataroot

tag_vocab_dir = dataroot + '/vocab.slot'
class_vocab_dir = dataroot + '/vocab.intent'
train_data_dir = dataroot + '/train'
valid_data_dir = dataroot + '/valid'
test_data_dir = dataroot + '/test'

if not opt.testing:
    tag_to_idx, idx_to_tag = vocab_reader.read_vocab_file(tag_vocab_dir,
                                                          bos_eos=opt.enc_dec)
    class_to_idx, idx_to_class = vocab_reader.read_vocab_file(class_vocab_dir,
                                                              bos_eos=False)
else:
    tag_to_idx, idx_to_tag = vocab_reader.read_vocab_file(opt.read_vocab +
                                                          '.tag',
                                                          bos_eos=False,
                                                          no_pad=True,
                                                          no_unk=True)
    class_to_idx, idx_to_class = vocab_reader.read_vocab_file(opt.read_vocab +
                                                              '.class',
                                                              bos_eos=False,
                                                              no_pad=True,
                                                              no_unk=True)

logger.info("Vocab size: %s %s" % (len(tag_to_idx), len(class_to_idx)))
Пример #2
0
    if opt.device.type != 'cuda':
        logger.info("WARNING: You have a CUDA device, so you should probably run with --deviceId [1|2|3]")
    else:
        torch.cuda.manual_seed(opt.random_seed)
np.random.seed(opt.random_seed)

dataroot = opt.dataroot

tag_vocab_dir = dataroot + '/vocab.slot' 
class_vocab_dir = dataroot + '/vocab.intent'
train_data_dir = dataroot + '/train'
valid_data_dir = dataroot + '/valid'
test_data_dir = dataroot + '/test'

#if not opt.testing:
tag_to_idx, idx_to_tag = vocab_reader.read_vocab_file(tag_vocab_dir, bos_eos=opt.enc_dec)
class_to_idx, idx_to_class = vocab_reader.read_vocab_file(class_vocab_dir, bos_eos=False)
#else:
#    tag_to_idx, idx_to_tag = vocab_reader.read_vocab_file(opt.read_vocab+'.tag', bos_eos=False, no_pad=True, no_unk=True)
#    class_to_idx, idx_to_class = vocab_reader.read_vocab_file(opt.read_vocab+'.class', bos_eos=False, no_pad=True, no_unk=True)

logger.info("Vocab size: %s %s" % (len(tag_to_idx), len(class_to_idx)))
if not opt.testing:
    vocab_reader.save_vocab(idx_to_tag, os.path.join(exp_path, opt.save_vocab+'.tag'))
    vocab_reader.save_vocab(idx_to_class, os.path.join(exp_path, opt.save_vocab+'.class'))
opt.out_label=len(tag_to_idx)
opt.out_int=len(class_to_idx)
opt.word_lowercase = False
if not opt.testing:
    train_feats, train_tags, train_class = data_reader.read_seqtag_data_with_class(train_data_dir, tag_to_idx, class_to_idx, multiClass=opt.multiClass, lowercase=opt.word_lowercase)
    valid_feats, valid_tags, valid_class = data_reader.read_seqtag_data_with_class(valid_data_dir, tag_to_idx, class_to_idx, multiClass=opt.multiClass, keep_order=opt.testing, lowercase=opt.word_lowercase)