Пример #1
0
                                              args=args)
     print('\nAccepted performance: ' + str(result) + " at test dataset;\n")
     if not os.path.exists(args.log_dir):
         os.makedirs(args.log_dir)
     with open(os.path.join(args.log_dir, args.log_name), 'w') as fw:
         fw.write(str(best_epoch) + ',' + str(result))
 elif args.method == "borderWithoutbert":
     dataset = DatasetManager(args)
     dataset.quick_build()
     dataset.show_summary()
     model = CPosModel(args, len(dataset.word_alphabet),
                       len(dataset.slot_alphabet),
                       len(dataset.intent_alphabet))
     print(model)
     model.show_summary()
     processor = Processor(dataset, model, args)
     if args.do_train:
         best_epoch = processor.train()
     result = Processor.validate(os.path.join(args.save_dir,
                                              "model/model.pkl"),
                                 dataset,
                                 args.batch_size,
                                 len(dataset.intent_alphabet),
                                 use_mask=args.use_mask,
                                 args=args)
     print('\nAccepted performance: ' + str(result) + " at test dataset;\n")
     if not os.path.exists(args.log_dir):
         os.makedirs(args.log_dir)
     with open(os.path.join(args.log_dir, args.log_name), 'w') as fw:
         fw.write(str(best_epoch) + ',' + str(result))
 elif args.method == "borderWithoutNoSlot":
Пример #2
0
    # Instantiate a dataset object. 输入dataset
    dataset = DatasetManager(args)
    dataset.quick_build() #这里打印的三次

    mem_sentence_size = dataset.get_mem_sentence_size()


    # Instantiate a network model object. 实例化model  输入构建好的dataset中的word slot的length
    model = ModelManager(args, len(dataset.word_alphabet),len(dataset.slot_alphabet_list[0]),len(dataset.slot_alphabet_list[1]),len(dataset.slot_alphabet_list[2]),len(dataset.intent_alphabet),len(dataset.kb_alphabet),len(dataset.history_alphabet),mem_sentence_size=mem_sentence_size)

    if not if_exk:
        model.show_summary() #原本显示 暂时隐藏
        dataset.show_summary()  # 原本显示 暂时隐藏

    # To train and evaluate the models.  在这里真正进行dataset的输入
    process = Processor(dataset, model, args)
    process.train(exp_index)

    if not if_exk:
        model.show_summary() #原本显示 暂时隐藏
        dataset.show_summary()  # 原本显示 暂时隐藏

    print("-------------------------VALIDATE----------------------------------")
    print('\nAccepted performance: ' +
        str(
            Processor.validate(  #slot_f1, intent_acc, sent_acc
                os.path.join(args.save_dir, "model/model_"+str(exp_index)+".pkl"),
                os.path.join(args.save_dir, "model/dataset_"+str(exp_index)+".pkl"),
                args.batch_size
            )
        ) + " at test dataset;\n")
Пример #3
0
    random.seed(args.random_state)
    np.random.seed(args.random_state)

    # Fix the random seed of Pytorch when using GPU.
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.random_state)
        torch.cuda.manual_seed(args.random_state)

    # Fix the random seed of Pytorch when using CPU.
    torch.manual_seed(args.random_state)
    torch.random.manual_seed(args.random_state)

    # Instantiate a dataset object.
    dataset = DatasetManager(args)
    dataset.quick_build()
    dataset.show_summary()

    # Instantiate a network model object.
    model = ModelManager(args, len(dataset.word_alphabet),
                         len(dataset.slot_alphabet))
    model.show_summary()

    # To train and evaluate the models.
    process = Processor(dataset, model, args.batch_size)
    process.train()

    print('\nAccepted performance: ' + str(
        Processor.validate(os.path.join(args.save_dir, "model/model.pkl"),
                           os.path.join(args.save_dir, "model/dataset.pkl"),
                           args.batch_size)) + " at test dataset;\n")
Пример #4
0
    torch.random.manual_seed(args.random_state)

    # Instantiate a dataset object.
    dataset = DatasetManager(args)
    dataset.quick_build()
    dataset.show_summary()
    #  dataset.ids["test"]

    # Instantiate a network model object.
    model = ModelManager(args, len(dataset.word_alphabet),
                         len(dataset.slot_alphabet),
                         len(dataset.intent_alphabet))
    model.show_summary()

    # To train and evaluate the models.
    process = Processor(dataset, model, args.batch_size)
    process.train()

    res, _ = Processor.validate(
        os.path.join(args.save_dir, "model/model.pkl"),
        os.path.join(args.save_dir, "model/dataset.pkl"), args.batch_size)

    print('\nAccepted performance: ' + str(res) + " at test dataset;\n")
    #  pred_res_dir = os.path.join(args.save_dir, "results")
    #  if not os.path.exists(pred_res_dir):
    #  os.mkdir(pred_res_dir)
    #  torch.save(pred, os.path.join(pred_res_dir, "test.pkl"))

    #  if not sorted_ids[i].endswith("full"):
    #  length = int(sorted_ids[10:])
    #  test_set[sent_id].length.append(length)
Пример #5
0
parser.add_argument('--intent_embedding_dim', '-ied', type=int, default=8)
parser.add_argument('--slot_embedding_dim', '-sed', type=int, default=32)
parser.add_argument('--slot_decoder_hidden_dim', '-sdhd', type=int, default=64)
parser.add_argument('--intent_decoder_hidden_dim', '-idhd', type=int, default=64)
parser.add_argument('--attention_hidden_dim', '-ahd', type=int, default=1024)
parser.add_argument('--attention_output_dim', '-aod', type=int, default=128)

if __name__ == "__main__":
    args = parser.parse_args()

    dataset = DatasetManager(args)
    dataset.quick_build()
    dataset.show_summary()


    result_per_intent = Processor.validate_per_intent(
        os.path.join(args.save_dir, "model/model.pkl"),
        os.path.join(args.save_dir, "model/dataset.pkl"),
        args.batch_size)

    print(f'Result in directory {args.save_dir}')
    print('*'*50)
    print('intent slot_f1 intent_acc sen_acc num_utt')
    print('overall' + ' ' + str(result_per_intent['overall'][0])+ ' ' + str(result_per_intent['overall'][1])
          + ' ' + str(result_per_intent['overall'][2])
          + ' ' + str(result_per_intent['overall'][3]))

    for intent in result_per_intent:
        if intent != 'overall':
            print(str(intent) + ' ' + str(result_per_intent[intent][0])+ ' ' + str(result_per_intent[intent][1])+ ' ' + str(result_per_intent[intent][2])
              + ' ' + str(result_per_intent[intent][3]))
Пример #6
0
    # Fix the random seed of Pytorch when using GPU.
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.random_state)
        torch.cuda.manual_seed(args.random_state)

    # Fix the random seed of Pytorch when using CPU.
    torch.manual_seed(args.random_state)
    torch.random.manual_seed(args.random_state)

    # Instantiate a dataset object.
    dataset = DatasetManager(args)
    dataset.quick_build()
    dataset.show_summary()

    # Instantiate a network model object.
    model = ModelManager(
        args, len(dataset.word_alphabet),
        len(dataset.slot_alphabet),
        len(dataset.intent_alphabet))
    model.show_summary()

    # To train and evaluate the models.
    process = Processor(dataset, model, args.batch_size)
    process.train()

    print('\nAccepted performance: ' + str(Processor.validate(
        os.path.join(args.save_dir, "model/model.pkl"),
        os.path.join(args.save_dir, "model/dataset.pkl"),
        args.batch_size)) + " at test dataset;\n")
Пример #7
0
from utils.process import Processor
from utils.loader import DatasetManager

import argparse
import os

parser = argparse.ArgumentParser()

parser.add_argument('--data_dir', '-dd', type=str, default='data')
parser.add_argument('--save_dir', '-sd', type=str, default='save')
parser.add_argument('--epoch', '-e', type=int, default=100)
parser.add_argument('--random_state', '-rs', type=int, default=2020)
parser.add_argument('--embedding_dim', '-ed', type=int, default=85)
parser.add_argument('--hidden_dim', '-hd', type=int, default=256)
parser.add_argument('--dropout_rate', '-dr', type=float, default=0.1)
parser.add_argument('--learning_rate', '-lr', type=float, default=0.001)
parser.add_argument('--batch_size', '-bs', type=int, default=32)
parser.add_argument('--l2_penalty', '-lp', type=float, default=1e-5)

if __name__ == '__main__':
    args = parser.parse_args()

    dataset = DatasetManager(args)
    dataset.build_exam_dataset()

    Processor.test(os.path.join(args.save_dir, 'model.pkl'), dataset, args)
Пример #8
0
def main(args):
    if args.task_name == "multiwoz21":
        cfg = Multiwoz21Config()
    elif args.task_name == "dstc8":
        cfg = Dstc8Config()
    else:
        raise AssertionError(
            "Task name should be included in [multiwoz21, dstc8].")

    if cfg.batch_size == 1:
        raise SystemExit(
            "Exit!\nBatch size can not be set to 1 for BatchNorm1d used in pytorch!"
        )

    cfg.model = args.model

    dataset_config = DATASET_CONFIG[args.task_name]
    dialog_paths = {}
    for dataset_split in ["train", "dev", "test"]:
        dialog_paths[dataset_split] = [
            Path(cfg.input_data_dir) / dataset_split /
            f"dialogues_{i:03d}.annotation"
            for i in dataset_config.file_ranges[dataset_split]
        ]

    vocab = Vocabulary()
    vocab.load(cfg.vocab_path)
    print("Vocab length is %d" % (len(vocab.stoi)), end='\n\n')

    train_iter = DataIterator(dialog_paths["train"], vocab)
    dev_iter = DataIterator(dialog_paths["dev"], vocab)
    test_iter = DataIterator(dialog_paths["test"], vocab)
    dataset = {"train": train_iter, "dev": dev_iter, "test": test_iter}

    processor = Processor(cfg, dataset, vocab, dialog_paths)

    if args.run_mode == 'train':
        if cfg.model == 'dsi-gm':
            processor.pre_train()
        processor.train()
        predictions = processor.predict()
        turn_predictions, joint_predictions = pred_utils.get_predicted_dialogs(
            predictions, dialog_paths['test'], cfg.threshold)
        turn_metric, joint_metric = processor.evaluate(turn_predictions,
                                                       joint_predictions)
    else:
        processor.model.load_cpu_model(cfg.model_path)
        predictions = processor.predict()
        turn_predictions, joint_predictions = pred_utils.get_predicted_dialogs(
            predictions, dialog_paths['test'], cfg.threshold)
        turn_metric, joint_metric = processor.evaluate(turn_predictions,
                                                       joint_predictions)

    print("Turn level metrics:")
    print(f"ACC: {turn_metric.acc_score:.1%}, F1: {turn_metric.f1_score:.1%}, \
            P: {turn_metric.precision_score:.1%}, R: {turn_metric.recall_score:.1%}"
          )
    print("Joint level metrics:")
    print(
        f"ACC: {joint_metric.acc_score:.1%}, F1: {joint_metric.f1_score:.1%}, \
            P: {joint_metric.precision_score:.1%}, R: {joint_metric.recall_score:.1%}"
    )