Exemplo n.º 1
0
    def __init__(self, model_name, model_step=None, use_gpu=False):
        # if model_name not in ["HIER", "HIERDIV", "AMI_MT_DIV", "SPOTIFY_short", "SPOTIFY_long"]:
        #     raise ValueError("model name not exist")

        if model_name == "HIER":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_CNNDM_FEB26A-ep17.pt"
            self.load_option = 1
        elif model_name == "HIERDIV":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_CNNDMDIV_APR14A-ep02.pt"
            self.load_option = 2
        elif model_name == "AMI_MT_DIV":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_APR12ALL100-ep20.pt"
            self.load_option = 1

        elif model_name == "SPOTIFY_short":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models_spotify/HGRUV5DIV_SPOTIFY_JUNE18_v4-step{}.pt".format(
                model_step)
            self.load_option = 2

        elif model_name == "SPOTIFY_long":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models_spotify/HGRUV5DIV_SPOTIFY_JUNE18_v3-step{}.pt".format(
                model_step)
            self.load_option = 2

        else:
            self.model_path = model_name
            self.load_option = 2  # new version

        args = {}
        args['vocab_size'] = 30522  # BERT tokenizer
        args['embedding_dim'] = 256  # word embeeding dimension
        args['rnn_hidden_size'] = 512  # RNN hidden size
        args['dropout'] = 0.0
        args['num_layers_enc'] = 2
        args['num_layers_dec'] = 1
        args['memory_utt'] = False

        self.device = 'cuda' if use_gpu else 'cpu'
        self.model = EncoderDecoder(args, self.device)
        self.load_model()
        self.model.eval()
Exemplo n.º 2
0
def train_v5(start_idx, end_idx):
    print("Start training hierarchical RNN model")
    # ---------------------------------------------------------------------------------- #
    args = {}
    args['use_gpu'] = True
    args['num_utterances'] = 1500  # max no. utterance in a meeting
    args['num_words'] = 64  # max no. words in an utterance
    args['summary_length'] = 300  # max no. words in a summary
    args['summary_type'] = 'short'  # long or short summary
    args['vocab_size'] = 30522  # BERT tokenizer
    args['embedding_dim'] = 256  # word embeeding dimension
    args['rnn_hidden_size'] = 512  # RNN hidden size

    args['dropout'] = 0.1
    args['num_layers_enc'] = 2  # in total it's num_layers_enc*2 (word/utt)
    args['num_layers_dec'] = 1

    args['batch_size'] = 1

    args['memory_utt'] = False

    args[
        'model_save_dir'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/"
    args[
        'load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_APR16C2-ep0.pt"
    load_option = 2  # 1=old | 2=new
    # ---------------------------------------------------------------------------------- #
    # print_config(args)

    if args['use_gpu']:
        if 'X_SGE_CUDA_DEVICE' in os.environ:  # to run on CUED stack machine
            print('running on the stack... 1 GPU')
            cuda_device = os.environ['X_SGE_CUDA_DEVICE']
            print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device))
            os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device
        else:
            print('running locally...')
            os.environ[
                "CUDA_VISIBLE_DEVICES"] = '0'  # choose the device (GPU) here
        device = 'cuda'
    else:
        device = 'cpu'
    print("device = {}".format(device))

    train_data = load_ami_data('test')

    model = EncoderDecoder(args, device=device)
    # print(model)

    # Load model if specified (path to pytorch .pt)
    if args['load_model'] != None:
        trained_model = args['load_model']
        if device == 'cuda':
            try:
                state = torch.load(trained_model)
                if load_option == 1:
                    model.load_state_dict(state)
                elif load_option == 2:
                    model_state_dict = state['model']
                    model.load_state_dict(model_state_dict)
            except RuntimeError:  # need to remove module
                # Main model
                model_state_dict = torch.load(trained_model)
                new_model_state_dict = OrderedDict()
                for key in model_state_dict.keys():
                    new_model_state_dict[key.replace(
                        "module.", "")] = model_state_dict[key]
                model.load_state_dict(new_model_state_dict)
        else:
            try:
                state = torch.load(trained_model,
                                   map_location=torch.device('cpu'))
                if load_option == 1:
                    model.load_state_dict(state)
                elif load_option == 2:
                    model_state_dict = state['model']
                    model.load_state_dict(model_state_dict)
            except:
                model_state_dict = torch.load(trained_model,
                                              map_location=torch.device('cpu'))
                new_model_state_dict = OrderedDict()
                for key in model_state_dict.keys():
                    new_model_state_dict[key.replace(
                        "module.", "")] = model_state_dict[key]
                model.load_state_dict(new_model_state_dict)
        model.eval()
        print("Loaded model from {}".format(args['load_model']))
    else:
        print("Train a new model")

    print("Train a new model")

    # Hyperparameters
    BATCH_SIZE = args['batch_size']
    if BATCH_SIZE != 1: raise ValueError("Batch Size must be 1")

    num_train_data = len(train_data)
    num_batches = int(num_train_data / BATCH_SIZE)
    print("num_batches = {}".format(num_batches))

    idx = 0

    decode_dict = {
        'batch_size': BATCH_SIZE,
        'k': 10,
        'search_method': 'argmax',
        'time_step': args['summary_length'],
        'vocab_size': 30522,
        'device': device,
        'start_token_id': START_TOKEN_ID,
        'stop_token_id': STOP_TOKEN_ID,
        'alpha': 2.0,
        'length_offset': 5,
        'penalty_ug': 0.0,
        'keypadmask_dtype': KEYPADMASK_DTYPE,
        'memory_utt': args['memory_utt']
    }

    print("DECODING: {}".format(decoding_method))

    with torch.no_grad():
        for bn in range(start_idx, end_idx):

            if check_if_id_exists(bn): continue

            input, u_len, w_len, target, tgt_len = get_a_batch(
                train_data, bn, BATCH_SIZE, args['num_utterances'],
                args['num_words'], args['summary_length'],
                args['summary_type'], device)

            # decoder target
            decoder_target, decoder_mask = shift_decoder_target(
                target, tgt_len, device, mask_offset=True)
            decoder_target = decoder_target.view(-1)
            decoder_mask = decoder_mask.view(-1)

            if teacherforcing == True:
                try:
                    # decoder_output = model(input, u_len, w_len, target)
                    decoder_output, _, attn_scores, _, u_attn_scores = model(
                        input, u_len, w_len, target)
                except IndexError:
                    print(
                        "there is an IndexError --- likely from if segment_indices[bn][-1] == u_len[bn]-1:"
                    )
                    print("for now just skip this batch!")
                    idx += BATCH_SIZE  # previously I forget to add this line!!!
                    continue

                output = torch.argmax(decoder_output,
                                      dim=-1).cpu().numpy().tolist()[0]
                max_l = decoder_output.size(1)

            else:
                output, attn_score, attn_score_u = model.decode_beamsearch(
                    input, u_len, w_len, decode_dict)

                # shift decoder_output by one
                output = output.tolist()
                max_l = len(output)
                u_attn_scores = attn_score_u.unsqueeze(0)

            try:
                dec_len = output.index(103)
            except ValueError:
                dec_len = max_l
            dec_sep_pos = []
            for i, v in enumerate(output):
                if i == dec_len: break
                if v == 102: dec_sep_pos.append(i)

            if len(dec_sep_pos) == 0:
                dec_sep_pos.append(max_l)

            enc_len = u_len[0]
            dec_start_pos = [0] + [x + 1 for x in dec_sep_pos[:-1]]
            this_attn = u_attn_scores[0, :dec_len, :enc_len].cpu()

            mean_div_within_sentence, mean_div_between_sentences = diversity1_sent(
                this_attn, dec_start_pos, dec_sep_pos)
            write_attn_scores(bn, mean_div_within_sentence,
                              mean_div_between_sentences)
Exemplo n.º 3
0
def train_v5():
    print("Start training hierarchical RNN model")
    # ---------------------------------------------------------------------------------- #
    args = {}
    args['use_gpu']        = True
    args['num_utterances'] = 1500  # max no. utterance in a meeting
    args['num_words']      = 64    # max no. words in an utterance
    args['summary_length'] = 300   # max no. words in a summary
    args['summary_type']   = 'short'   # long or short summary
    args['vocab_size']     = 30522 # BERT tokenizer
    args['embedding_dim']   = 256   # word embeeding dimension
    args['rnn_hidden_size'] = 512 # RNN hidden size

    args['dropout']        = 0.1
    args['num_layers_enc'] = 2    # in total it's num_layers_enc*2 (word/utt)
    args['num_layers_dec'] = 1

    args['batch_size']      = 1
    args['update_nbatches'] = 2
    args['num_epochs']      = 20
    args['random_seed']     = 777
    args['best_val_loss']     = 1e+10
    args['val_batch_size']    = 1 # 1 for now --- evaluate ROUGE
    args['val_stop_training'] = 5

    args['lr']         = 1.0
    args['adjust_lr']  = True     # if True overwrite the learning rate above
    args['initial_lr'] = 0.01       # lr = lr_0*step^(-decay_rate)
    args['decay_rate'] = 0.5
    args['label_smoothing'] = 0.1

    args['a_da']  = 0.2
    args['a_ext'] = 0.2
    args['a_cov'] = 0.0
    args['a_div'] = 1.0

    args['memory_utt'] = False

    args['model_save_dir'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/"
    args['load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_CNNDM_FEB26A-ep12-bn0" # add .pt later
    # args['load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_FEB28A-ep6"
    # args['load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5MEM_APR8A-ep1"
    # args['load_model'] = None
    args['model_name'] = 'HGRUV5_APR16H5'
    # ---------------------------------------------------------------------------------- #
    print_config(args)

    if args['use_gpu']:
        if 'X_SGE_CUDA_DEVICE' in os.environ: # to run on CUED stack machine
            print('running on the stack... 1 GPU')
            cuda_device = os.environ['X_SGE_CUDA_DEVICE']
            print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device))
            os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device
        else:
            print('running locally...')
            os.environ["CUDA_VISIBLE_DEVICES"] = '0' # choose the device (GPU) here
        device = 'cuda'
    else:
        device = 'cpu'
    print("device = {}".format(device))

    # random seed
    random.seed(args['random_seed'])
    torch.manual_seed(args['random_seed'])
    np.random.seed(args['random_seed'])

    train_data = load_ami_data('train')
    valid_data = load_ami_data('valid')
    # make the training data 100
    random.shuffle(valid_data)
    train_data.extend(valid_data[:6])
    valid_data = valid_data[6:]

    model = EncoderDecoder(args, device=device)
    print(model)
    NUM_DA_TYPES = len(DA_MAPPING)
    da_labeller = DALabeller(args['rnn_hidden_size'], NUM_DA_TYPES, device)
    print(da_labeller)
    ext_labeller = EXTLabeller(args['rnn_hidden_size'], device)
    print(ext_labeller)

    # Load model if specified (path to pytorch .pt)
    if args['load_model'] != None:
        model_path = args['load_model'] + '.pt'
        try:
            model.load_state_dict(torch.load(model_path))
        except RuntimeError: # need to remove module
            # Main model
            model_state_dict = torch.load(model_path)
            new_model_state_dict = OrderedDict()
            for key in model_state_dict.keys():
                new_model_state_dict[key.replace("module.","")] = model_state_dict[key]

            if args['memory_utt']:
                model.load_state_dict(new_model_state_dict, strict=False)
            else:
                model.load_state_dict(new_model_state_dict)

        model.train()
        print("Loaded model from {}".format(args['load_model']))
    else:
        print("Train a new model")


    # Hyperparameters
    BATCH_SIZE = args['batch_size']
    NUM_EPOCHS = args['num_epochs']
    VAL_BATCH_SIZE = args['val_batch_size']
    VAL_STOP_TRAINING = args['val_stop_training']

    if args['label_smoothing'] > 0.0:
        criterion = LabelSmoothingLoss(num_classes=args['vocab_size'],
                        smoothing=args['label_smoothing'], reduction='none')
    else:
        criterion = nn.NLLLoss(reduction='none')

    da_criterion = nn.NLLLoss(reduction='none')
    ext_criterion = nn.BCELoss(reduction='none')

    # ONLY train the momory part #
    # for name, param in model.named_parameters():
    #     if "utt" in name:
    #         pass
    #     else:
    #         param.requires_grad = False
    # optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr=args['lr'],betas=(0.9,0.999),eps=1e-08,weight_decay=0)
    # -------------------------- #

    optimizer = optim.Adam(model.parameters(),lr=args['lr'],betas=(0.9,0.999),eps=1e-08,weight_decay=0)
    optimizer.zero_grad()

    # DA labeller optimiser
    da_optimizer = optim.Adam(da_labeller.parameters(),lr=args['lr'],betas=(0.9,0.999),eps=1e-08,weight_decay=0)
    da_optimizer.zero_grad()

    # extractive labeller optimiser
    ext_optimizer = optim.Adam(ext_labeller.parameters(),lr=args['lr'],betas=(0.9,0.999),eps=1e-08,weight_decay=0)
    ext_optimizer.zero_grad()

    # validation losses
    best_val_loss = args['best_val_loss']
    best_epoch    = 0
    stop_counter  = 0

    training_step = 0

    for epoch in range(NUM_EPOCHS):
        print("======================= Training epoch {} =======================".format(epoch))
        num_train_data = len(train_data)
        # num_batches = int(num_train_data/BATCH_SIZE) + 1
        num_batches = int(num_train_data/BATCH_SIZE)
        print("num_batches = {}".format(num_batches))

        print("shuffle train data")
        random.shuffle(train_data)

        idx = 0

        for bn in range(num_batches):

            input, u_len, w_len, target, tgt_len, _, dialogue_acts, extractive_label = get_a_batch(
                    train_data, idx, BATCH_SIZE,
                    args['num_utterances'], args['num_words'],
                    args['summary_length'], args['summary_type'], device)

            # decoder target
            decoder_target, decoder_mask = shift_decoder_target(target, tgt_len, device, mask_offset=True)
            decoder_target = decoder_target.view(-1)
            decoder_mask = decoder_mask.view(-1)

            decoder_output, u_output, attn_scores, cov_scores, u_attn_scores = model(input, u_len, w_len, target)

            loss = criterion(decoder_output.view(-1, args['vocab_size']), decoder_target)
            loss = (loss * decoder_mask).sum() / decoder_mask.sum()

            # COVLOSS:
            # loss_cov = compute_covloss(attn_scores, cov_scores)
            # loss_cov = (loss_cov.view(-1) * decoder_mask).sum() / decoder_mask.sum()

            # Diversity Loss (4):
            intra_div, inter_div = diverisity_loss(u_attn_scores, decoder_target, u_len, tgt_len)
            if inter_div == 0:
                loss_div = 0
            else:
                loss_div = intra_div/inter_div


            # multitask(2): dialogue act prediction
            da_output = da_labeller(u_output)
            loss_utt_mask = length2mask(u_len, BATCH_SIZE, args['num_utterances'], device)
            loss_da = da_criterion(da_output.view(-1, NUM_DA_TYPES), dialogue_acts.view(-1)).view(BATCH_SIZE, -1)
            loss_da = (loss_da * loss_utt_mask).sum() / loss_utt_mask.sum()

            # multitask(3): extractive label prediction
            ext_output = ext_labeller(u_output).squeeze(-1)
            loss_ext = ext_criterion(ext_output, extractive_label)
            loss_ext = (loss_ext * loss_utt_mask).sum() / loss_utt_mask.sum()

            # total_loss = loss + args['a_da']*loss_da + args['a_ext']*loss_ext + args['a_cov']*loss_cov
            total_loss = loss + args['a_da']*loss_da + args['a_ext']*loss_ext + args['a_div']*loss_div
            # total_loss = loss + args['a_da']*loss_da + args['a_ext']*loss_ext
            # total_loss = loss + args['a_div']*loss_div

            total_loss.backward()
            # loss.backward()

            idx += BATCH_SIZE

            if bn % args['update_nbatches'] == 0:
                # gradient_clipping
                max_norm = 0.5
                nn.utils.clip_grad_norm_(model.parameters(), max_norm)
                nn.utils.clip_grad_norm_(da_labeller.parameters(), max_norm)
                nn.utils.clip_grad_norm_(ext_labeller.parameters(), max_norm)
                # update the gradients
                if args['adjust_lr']:
                    adjust_lr(optimizer, args['initial_lr'], args['decay_rate'], training_step)
                    adjust_lr(da_optimizer, args['initial_lr'], args['decay_rate'], training_step)
                    adjust_lr(ext_optimizer, args['initial_lr'], args['decay_rate'], training_step)
                optimizer.step()
                optimizer.zero_grad()
                da_optimizer.step()
                da_optimizer.zero_grad()
                ext_optimizer.step()
                ext_optimizer.zero_grad()
                training_step += args['batch_size']*args['update_nbatches']

            if bn % 1 == 0:
                print("[{}] batch {}/{}: loss = {:.5f} | loss_div = {:.5f} | loss_da = {:.5f} | loss_ext = {:.5f}".
                    format(str(datetime.now()), bn, num_batches, loss, loss_div, loss_da, loss_ext))
                # print("[{}] batch {}/{}: loss = {:.5f} | loss_da = {:.5f} | loss_ext = {:.5f}".
                #     format(str(datetime.now()), bn, num_batches, loss, loss_da, loss_ext))
                # print("[{}] batch {}/{}: loss = {:.5f} | loss_div = {:.5f}".
                    # format(str(datetime.now()), bn, num_batches, loss, loss_div))
                # print("[{}] batch {}/{}: loss = {:.5f}".format(str(datetime.now()), bn, num_batches, loss))
                sys.stdout.flush()

            if bn % 10 == 0:

                print("======================== GENERATED SUMMARY ========================")
                print(bert_tokenizer.decode(torch.argmax(decoder_output[0], dim=-1).cpu().numpy()[:tgt_len[0]]))
                print("======================== REFERENCE SUMMARY ========================")
                print(bert_tokenizer.decode(decoder_target.view(BATCH_SIZE,args['summary_length'])[0,:tgt_len[0]].cpu().numpy()))

            if bn == 0: # e.g. eval every epoch
                # ---------------- Evaluate the model on validation data ---------------- #
                print("Evaluating the model at epoch {} step {}".format(epoch, bn))
                print("learning_rate = {}".format(optimizer.param_groups[0]['lr']))

                # switch to evaluation mode
                model.eval()
                da_labeller.eval()
                ext_labeller.eval()

                with torch.no_grad():
                    avg_val_loss = evaluate(model, valid_data, VAL_BATCH_SIZE, args, device, use_rouge=True)
                    # avg_val_loss = evaluate_greedy(model, valid_data, VAL_BATCH_SIZE, args, device)

                print("avg_val_loss_per_token = {}".format(avg_val_loss))

                # switch to training mode
                model.train()
                da_labeller.train()
                ext_labeller.train()
                # ------------------- Save the model OR Stop training ------------------- #
                state = {
                    'epoch': epoch, 'bn': bn,
                    'training_step': training_step,
                    'model': model.state_dict(),
                    'da_labeller': da_labeller.state_dict(),
                    'ext_labeller': ext_labeller.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_val_loss': best_val_loss
                }
                if avg_val_loss < best_val_loss:
                    stop_counter = 0
                    best_val_loss = avg_val_loss
                    best_epoch = epoch

                    savepath = args['model_save_dir']+"model-{}-ep{}.pt".format(args['model_name'], 999) # 999 = best
                    torch.save(state, savepath)
                    print("Model improved & saved at {}".format(savepath))
                else:
                    print("Model not improved #{}".format(stop_counter))
                    savepath = args['model_save_dir']+"model-{}-ep{}.pt".format(args['model_name'], 000) # 000 = current
                    torch.save(state, savepath)
                    print("Model NOT improved & saved at {}".format(savepath))
                    if stop_counter < VAL_STOP_TRAINING:
                        print("Just continue training ---- no loading old weights")
                        stop_counter += 1
                    else:
                        print("Model has not improved for {} times! Stop training.".format(VAL_STOP_TRAINING))
                        return

    print("End of training hierarchical RNN model")
Exemplo n.º 4
0
def train_v5_cnndm():
    print("Start training hierarchical RNN model")
    # ---------------------------------------------------------------------------------- #
    args = {}
    args['use_gpu'] = True
    args['num_utterances'] = 640  # max no. utterance in a meeting
    args['num_words'] = 50  # max no. words in an utterance
    args['summary_length'] = 144  # max no. words in a summary
    args['summary_type'] = 'long'  # long or short summary
    args['vocab_size'] = 30522  # BERT tokenizer
    args['embedding_dim'] = 256  # word embeeding dimension
    args['rnn_hidden_size'] = 512  # RNN hidden size

    args['dropout'] = 0.1
    args['num_layers_enc'] = 2  # in total it's num_layers_enc*2 (word/utt)
    args['num_layers_dec'] = 1

    args['random_seed'] = 78
    # args['a_div']       = 1.0
    args['memory_utt'] = False

    args[
        'model_save_dir'] = "/home/alta/summary/pm574/summariser1/lib/trained_models_spotify/"
    args[
        'load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_CNNDMDIV_APR14A-ep02.pt"
    args['model_name'] = 'HGRUV5DIV_SPOTIFY_JUNE18_v2'
    # ---------------------------------------------------------------------------------- #
    print_config(args)

    if args['use_gpu']:
        if 'X_SGE_CUDA_DEVICE' in os.environ:  # to run on CUED stack machine
            print('running on the stack... 1 GPU')
            cuda_device = os.environ['X_SGE_CUDA_DEVICE']
            print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device))
            os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device
        else:
            print('running locally...')
            os.environ[
                "CUDA_VISIBLE_DEVICES"] = '0,1,2,3'  # choose the device (GPU) here
        device = 'cuda'
    else:
        device = 'cpu'
    print("device = {}".format(device))

    # random seed
    random.seed(args['random_seed'])
    torch.manual_seed(args['random_seed'])
    np.random.seed(args['random_seed'])

    # Data
    podcasts = load_podcast_data(sets=-1)
    batcher = HierBatcher(bert_tokenizer, args, podcasts, device)
    val_podcasts = load_podcast_data(sets=[10])
    val_batcher = HierBatcher(bert_tokenizer, args, val_podcasts, device)

    model = EncoderDecoder(args, device=device)
    # print(model)

    # Load model if specified (path to pytorch .pt)
    state = torch.load(args['load_model'])
    model_state_dict = state['model']
    model.load_state_dict(model_state_dict)
    print("load succesful #1")

    criterion = nn.NLLLoss(reduction='none')

    # we use two separate optimisers (encoder & decoder)
    optimizer = optim.Adam(model.parameters(),
                           lr=2e-20,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0)
    optimizer.zero_grad()

    # validation losses
    training_step = 0
    batch_size = 4 * 4
    gradient_accum = 1
    total_step = 1000000
    valid_step = 2000
    best_val_loss = 99999999

    # to use multiple GPUs
    if torch.cuda.device_count() > 1:
        print("Multiple GPUs: {}".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)

    while training_step < total_step:

        # get a batch
        input, u_len, w_len, target, tgt_len = batcher.get_a_batch(batch_size)

        # decoder target
        decoder_target, decoder_mask = shift_decoder_target(target,
                                                            tgt_len,
                                                            device,
                                                            mask_offset=True)
        decoder_target = decoder_target.view(-1)
        decoder_mask = decoder_mask.view(-1)

        # decoder_output, _, _, _, u_attn_scores = model(input, u_len, w_len, target)
        decoder_output = model(input, u_len, w_len, target)

        loss = criterion(decoder_output.view(-1, args['vocab_size']),
                         decoder_target)
        loss = (loss * decoder_mask).sum() / decoder_mask.sum()
        loss.backward()

        # Diversity Loss:
        # if batch_size == 1:
        #     intra_div, inter_div = diverisity_loss(u_attn_scores, decoder_target, u_len, tgt_len)
        #     if inter_div == 0:
        #         loss_div = 0
        #     else:
        #         loss_div = intra_div/inter_div
        # else:
        #     dec_target_i = 0
        #     loss_div     = 0
        #     for bi in range(batch_size):
        #         one_u_attn_scores  = u_attn_scores[bi:bi+1,:,:]
        #         one_decoder_target = decoder_target[dec_target_i:dec_target_i+args['summary_length']]
        #         one_u_len   = u_len[bi:bi+1]
        #         one_tgt_len = tgt_len[bi:bi+1]
        #         intra_div, inter_div = diverisity_loss(one_u_attn_scores, one_decoder_target, one_u_len, one_tgt_len)
        #         if inter_div == 0:
        #             loss_div += 0
        #         else:
        #             loss_div += intra_div/inter_div
        #         dec_target_i += args['summary_length']
        #     loss_div /= batch_size
        #
        # total_loss = loss + args['a_div']*loss_div
        # total_loss.backward()

        if training_step % gradient_accum == 0:
            adjust_lr(optimizer, training_step)
            optimizer.step()
            optimizer.zero_grad()

        if training_step % 1 == 0:
            # print("[{}] step {}/{}: loss = {:.5f} | loss_div = {:.5f}".format(
            #     str(datetime.now()), training_step, total_step, loss, loss_div))
            print("[{}] step {}/{}: loss = {:.5f}".format(
                str(datetime.now()), training_step, total_step, loss))
            sys.stdout.flush()

        if training_step % 10 == 0:
            print(
                "======================== GENERATED SUMMARY ========================"
            )
            print(
                bert_tokenizer.decode(
                    torch.argmax(decoder_output[0],
                                 dim=-1).cpu().numpy()[:tgt_len[0]]))
            print(
                "======================== REFERENCE SUMMARY ========================"
            )
            print(
                bert_tokenizer.decode(
                    decoder_target.view(
                        batch_size,
                        args['summary_length'])[0, :tgt_len[0]].cpu().numpy()))

        if training_step % valid_step == 0:
            # ---------------- Evaluate the model on validation data ---------------- #
            print("Evaluating the model at training step {}".format(
                training_step))
            print("learning_rate = {}".format(optimizer.param_groups[0]['lr']))
            # switch to evaluation mode
            model.eval()
            with torch.no_grad():
                valid_loss = evaluate(model, val_batcher, batch_size, args,
                                      device)
            print("valid_loss = {}".format(valid_loss))
            # switch to training mode
            model.train()
            if valid_loss < best_val_loss:
                stop_counter = 0
                best_val_loss = valid_loss
                print("Model improved".format(stop_counter))
            else:
                stop_counter += 1
                print("Model not improved #{}".format(stop_counter))
                if stop_counter == 3:
                    print("Stop training!")
                    return
            state = {
                'training_step': training_step,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'best_val_loss': best_val_loss
            }
            savepath = args['model_save_dir'] + "{}-step{}.pt".format(
                args['model_name'], training_step)
            torch.save(state, savepath)
            print("Saved at {}".format(savepath))

        training_step += 1

    print("End of training hierarchical RNN model")
Exemplo n.º 5
0
def train_mbr():
    print("Start training hierarchical RNN model")
    # ---------------------------------------------------------------------------------- #
    args = {}
    args['use_gpu'] = True
    args['num_utterances'] = 1200  # max no. utterance in a meeting
    args['num_words'] = 50  # max no. words in an utterance
    args['summary_length'] = 300  # max no. words in a summary
    args['summary_type'] = 'short'  # long or short summary
    args['vocab_size'] = 30522  # BERT tokenizer
    args['embedding_dim'] = 256  # word embeeding dimension
    args['rnn_hidden_size'] = 512  # RNN hidden size

    args['dropout'] = 0.1
    args['num_layers_enc'] = 2  # in total it's num_layers_enc*3 (word/utt/seg)
    args['num_layers_dec'] = 1

    args['batch_size'] = 1
    args['update_nbatches'] = 1
    args['num_epochs'] = 10
    args['random_seed'] = 88
    args['best_val_loss'] = 1e+10
    args['val_batch_size'] = 1  # 1 for now --- evaluate ROUGE
    args['val_stop_training'] = 50

    args['lr'] = 1e-3
    args['adjust_lr'] = False  # if True overwrite the learning rate above
    args['initial_lr'] = 0.2  # lr = lr_0*step^(-decay_rate)
    args['decay_rate'] = 0.5

    args['a_ts'] = 0.0
    args['a_da'] = 0.0
    args['a_ext'] = 0.0
    args['a_cov'] = 0.0

    args['N'] = 10
    args['lambda'] = 0.0

    args[
        'model_save_dir'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/"
    args[
        'load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_FEB25A-ep8"  # add .pt later
    # args['load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_CNNDM_FEB26A-ep8-bn2000" # add .pt later
    # args['load_model'] = None
    args['model_name'] = 'MBR_GS_FEB27C'
    # ---------------------------------------------------------------------------------- #
    print_config(args)

    if args['use_gpu']:
        if 'X_SGE_CUDA_DEVICE' in os.environ:  # to run on CUED stack machine
            print('running on the stack... 1 GPU')
            cuda_device = os.environ['X_SGE_CUDA_DEVICE']
            print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device))
            os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device
        else:
            print('running locally...')
            os.environ[
                "CUDA_VISIBLE_DEVICES"] = '0'  # choose the device (GPU) here
        device = 'cuda'
    else:
        device = 'cpu'
    print("device = {}".format(device))

    # random seed
    random.seed(args['random_seed'])
    torch.manual_seed(args['random_seed'])
    np.random.seed(args['random_seed'])

    train_data = load_ami_data('train')
    valid_data = load_ami_data('valid')
    # make the training data 100
    # random.shuffle(valid_data)
    # train_data.extend(valid_data[:6])
    # valid_data = valid_data[6:]

    model = EncoderDecoder(args, device=device)
    print(model)

    # to use multiple GPUs
    if torch.cuda.device_count() > 1:
        print("Multiple GPUs: {}".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)

    # Load model if specified (path to pytorch .pt)
    if args['load_model'] != None:
        model_path = args['load_model'] + '.pt'
        if device == 'cuda':
            try:
                model.load_state_dict(torch.load(model_path))
            except RuntimeError:  # need to remove module
                # Main model
                model_state_dict = torch.load(model_path)
                new_model_state_dict = OrderedDict()
                for key in model_state_dict.keys():
                    new_model_state_dict[key.replace(
                        "module.", "")] = model_state_dict[key]
                model.load_state_dict(new_model_state_dict)
        else:
            model.load_state_dict(
                torch.load(model_path, map_location=torch.device('cpu')))
        model.train()
        print("Loaded model from {}".format(args['load_model']))
    else:
        print("Train a new model")

    # Hyperparameters
    BATCH_SIZE = args['batch_size']
    NUM_EPOCHS = args['num_epochs']
    VAL_BATCH_SIZE = args['val_batch_size']
    VAL_STOP_TRAINING = args['val_stop_training']

    # Cross-Entropy
    ce_criterion = nn.NLLLoss(reduction='none')

    # we use two separate optimisers (encoder & decoder)
    optimizer = optim.Adam(model.parameters(),
                           lr=args['lr'],
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0)
    optimizer.zero_grad()

    # validation losses
    best_val_loss = args['best_val_loss']
    best_epoch = 0
    stop_counter = 0

    training_step = 0

    for epoch in range(NUM_EPOCHS):
        print(
            "======================= Training epoch {} ======================="
            .format(epoch))
        num_train_data = len(train_data)
        num_batches = int(num_train_data / BATCH_SIZE)
        print("num_batches = {}".format(num_batches))

        print("shuffle train data")
        random.shuffle(train_data)

        idx = 0

        for bn in range(num_batches):

            input, u_len, w_len, target, tgt_len, _, _, _ = get_a_batch(
                train_data, idx, BATCH_SIZE, args['num_utterances'],
                args['num_words'], args['summary_length'],
                args['summary_type'], device)

            # decoder target
            decoder_target, decoder_mask = shift_decoder_target(
                target, tgt_len, device, mask_offset=True)
            # LOSS - Minimum Bayes Risk
            if bn % 1 == 0: printout = True
            else: printout = False
            R1, R2, RL = grad_sampling(model,
                                       input,
                                       u_len,
                                       w_len,
                                       decoder_target,
                                       num_samples=args['N'],
                                       lambda1=args['lambda'],
                                       device=device,
                                       printout=printout)

            # LOSS - Croos Entropy
            decoder_target = decoder_target.view(-1)
            decoder_mask = decoder_mask.view(-1)
            decoder_output, _, _, _ = model(input, u_len, w_len, target)
            loss = ce_criterion(decoder_output.view(-1, args['vocab_size']),
                                decoder_target)
            loss = (loss * decoder_mask).sum() / decoder_mask.sum()
            loss.backward()

            # R1, R2, RL = beamsearch_approx(model,
            #                     input, u_len, w_len, decoder_target,
            #                     beam_width=args['N'],
            #                     device=device, printout=printout)

            idx += BATCH_SIZE

            if bn % args['update_nbatches'] == 0:
                # update the gradients
                if args['adjust_lr']:
                    adjust_lr(optimizer, args['initial_lr'],
                              args['decay_rate'], training_step)
                optimizer.step()
                optimizer.zero_grad()
                training_step += args['batch_size'] * args['update_nbatches']

            if bn % 1 == 0:
                print(
                    "[{}] batch {}/{}: loss = {:.5f} | R1={:.2f} | R2={:.2f} | RL = {:.2f}"
                    .format(str(datetime.now()), bn, num_batches, loss,
                            R1 * 100, R2 * 100, RL * 100))
                print(
                    "-----------------------------------------------------------------------------------------"
                )
                sys.stdout.flush()

            if bn % 19 == 0:  # e.g. eval every epoch
                # ---------------- Evaluate the model on validation data ---------------- #
                print("Evaluating the model at epoch {} step {}".format(
                    epoch, bn))
                print("learning_rate = {}".format(
                    optimizer.param_groups[0]['lr']))

                # switch to evaluation mode
                model.eval()

                with torch.no_grad():
                    avg_val_loss = evaluate_greedy(model, valid_data,
                                                   VAL_BATCH_SIZE, args,
                                                   device)
                    # avg_val_loss = evaluate(model, valid_data, VAL_BATCH_SIZE, args, device, use_rouge=True)

                print("avg_val_loss_per_token = {}".format(avg_val_loss))

                # switch to training mode
                model.train()
                # ------------------- Save the model OR Stop training ------------------- #
                if avg_val_loss < best_val_loss:
                    stop_counter = 0
                    best_val_loss = avg_val_loss
                    best_epoch = epoch

                    savepath = args[
                        'model_save_dir'] + "model-{}-ep{}-bn{}.pt".format(
                            args['model_name'], epoch, bn)

                    torch.save(model.state_dict(), savepath)
                    print("Model improved & saved at {}".format(savepath))
                else:
                    print("Model not improved #{}".format(stop_counter))
                    if stop_counter < VAL_STOP_TRAINING:
                        print(
                            "Just continue training ---- no loading old weights"
                        )
                        stop_counter += 1
                    else:
                        print(
                            "Model has not improved for {} times! Stop training."
                            .format(VAL_STOP_TRAINING))
                        return

    print("End of training hierarchical RNN model")
Exemplo n.º 6
0
def train_v5():
    print("Start training hierarchical RNN model")
    # ---------------------------------------------------------------------------------- #
    args = {}
    args['use_gpu'] = True
    args['num_utterances'] = 50  # max no. utterance in a meeting
    args['num_words'] = 32  # max no. words in an utterance
    args['summary_length'] = 144  # max no. words in a summary
    args['summary_type'] = 'long'  # long or short summary
    args['vocab_size'] = 30522  # BERT tokenizer
    args['embedding_dim'] = 256  # word embeeding dimension
    args['rnn_hidden_size'] = 512  # RNN hidden size

    args['dropout'] = 0.1
    args['num_layers_enc'] = 2  # in total it's num_layers_enc*2 (word/utt)
    args['num_layers_dec'] = 1

    args['batch_size'] = 8
    args['update_nbatches'] = 1
    args['num_epochs'] = 20
    args['random_seed'] = 78
    args['best_val_loss'] = 1e+10
    args['val_batch_size'] = 64  # 1 for now --- evaluate ROUGE
    args['val_stop_training'] = 10

    args['adjust_lr'] = True  # if True overwrite the learning rate above
    args['initial_lr'] = 1e-2  # lr = lr_0*step^(-decay_rate)
    args['decay_rate'] = 0.25
    args['label_smoothing'] = 0.1

    args[
        'model_save_dir'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/"
    args[
        'load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_CNNDM_FEB26A-ep17-bn0.pt"  # add .pt later
    # args['load_model'] = None
    args['model_name'] = 'HGRUV5_CNNDM_APR1'
    # ---------------------------------------------------------------------------------- #
    print_config(args)

    if args['use_gpu']:
        if 'X_SGE_CUDA_DEVICE' in os.environ:  # to run on CUED stack machine
            print('running on the stack... 1 GPU')
            cuda_device = os.environ['X_SGE_CUDA_DEVICE']
            print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device))
            os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device
        else:
            print('running locally...')
            os.environ[
                "CUDA_VISIBLE_DEVICES"] = '1'  # choose the device (GPU) here
        device = 'cuda'
    else:
        device = 'cpu'
    print("device = {}".format(device))

    # random seed
    random.seed(args['random_seed'])
    torch.manual_seed(args['random_seed'])
    np.random.seed(args['random_seed'])

    args[
        'model_data_dir'] = "/home/alta/summary/pm574/summariser0/lib/model_data/"
    args['max_pos_embed'] = 512
    args['max_num_sentences'] = 32
    args['max_summary_length'] = args['summary_length']
    train_data = load_cnndm_data(args, 'trainx', dump=False)
    # train_data = load_cnndm_data(args, 'test', dump=False)
    # print("loaded TEST data")
    valid_data = load_cnndm_data(args, 'valid', dump=False)

    model = EncoderDecoder(args, device=device)
    print(model)

    # Load model if specified (path to pytorch .pt)
    if args['load_model'] != None:
        model_path = args['load_model']
        try:
            model.load_state_dict(torch.load(model_path))
        except RuntimeError:  # need to remove module
            # Main model
            model_state_dict = torch.load(model_path)
            new_model_state_dict = OrderedDict()
            for key in model_state_dict.keys():
                new_model_state_dict[key.replace("module.",
                                                 "")] = model_state_dict[key]
            model.load_state_dict(new_model_state_dict)
        model.train()
        print("Loaded model from {}".format(args['load_model']))
    else:
        print("Train a new model")

    # to use multiple GPUs
    if torch.cuda.device_count() > 1:
        print("Multiple GPUs: {}".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)

    print("Train a new model")

    # Hyperparameters
    BATCH_SIZE = args['batch_size']
    NUM_EPOCHS = args['num_epochs']
    VAL_BATCH_SIZE = args['val_batch_size']
    VAL_STOP_TRAINING = args['val_stop_training']

    if args['label_smoothing'] > 0.0:
        criterion = LabelSmoothingLoss(num_classes=args['vocab_size'],
                                       smoothing=args['label_smoothing'],
                                       reduction='none')
    else:
        criterion = nn.NLLLoss(reduction='none')

    # we use two separate optimisers (encoder & decoder)
    optimizer = optim.Adam(model.parameters(),
                           lr=0.77,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0)
    optimizer.zero_grad()

    # validation losses
    best_val_loss = args['best_val_loss']
    best_epoch = 0
    stop_counter = 0

    training_step = 0

    for epoch in range(NUM_EPOCHS):
        print(
            "======================= Training epoch {} ======================="
            .format(epoch))
        num_train_data = len(train_data)
        # num_batches = int(num_train_data/BATCH_SIZE) + 1
        num_batches = int(num_train_data / BATCH_SIZE)
        print("num_batches = {}".format(num_batches))

        print("shuffle train data")
        random.shuffle(train_data)

        idx = 0

        for bn in range(num_batches):

            input, u_len, w_len, target, tgt_len = get_a_batch(
                train_data, idx, BATCH_SIZE, args['num_utterances'],
                args['num_words'], args['summary_length'],
                args['summary_type'], device)

            # decoder target
            decoder_target, decoder_mask = shift_decoder_target(
                target, tgt_len, device, mask_offset=True)
            decoder_target = decoder_target.view(-1)
            decoder_mask = decoder_mask.view(-1)

            try:
                # decoder_output = model(input, u_len, w_len, target)
                decoder_output, _, attn_scores, _, u_attn_scores = model(
                    input, u_len, w_len, target)
                import pdb
                pdb.set_trace()
            except IndexError:
                print(
                    "there is an IndexError --- likely from if segment_indices[bn][-1] == u_len[bn]-1:"
                )
                print("for now just skip this batch!")
                idx += BATCH_SIZE  # previously I forget to add this line!!!
                continue

            loss = criterion(decoder_output.view(-1, args['vocab_size']),
                             decoder_target)
            loss = (loss * decoder_mask).sum() / decoder_mask.sum()

            loss.backward()

            idx += BATCH_SIZE

            if bn % args['update_nbatches'] == 0:
                # gradient_clipping
                max_norm = 0.5
                nn.utils.clip_grad_norm_(model.parameters(), max_norm)
                # update the gradients
                if args['adjust_lr']:
                    adjust_lr(optimizer, args['initial_lr'],
                              args['decay_rate'], training_step)
                optimizer.step()
                optimizer.zero_grad()
                training_step += args['batch_size'] * args['update_nbatches']

            if bn % 1 == 0:
                print("[{}] batch {}/{}: loss = {:5f}".format(
                    str(datetime.now()), bn, num_batches, loss))
                sys.stdout.flush()

            if bn % 50 == 0:

                print(
                    "======================== GENERATED SUMMARY ========================"
                )
                print(
                    bert_tokenizer.decode(
                        torch.argmax(decoder_output[0],
                                     dim=-1).cpu().numpy()[:tgt_len[0]]))
                print(
                    "======================== REFERENCE SUMMARY ========================"
                )
                print(
                    bert_tokenizer.decode(
                        decoder_target.view(BATCH_SIZE, args['summary_length'])
                        [0, :tgt_len[0]].cpu().numpy()))

            if bn % 500 == 0:
                # ---------------- Evaluate the model on validation data ---------------- #
                print("Evaluating the model at epoch {} step {}".format(
                    epoch, bn))
                print("learning_rate = {}".format(
                    optimizer.param_groups[0]['lr']))
                # switch to evaluation mode
                model.eval()

                with torch.no_grad():
                    avg_val_loss = evaluate(model, valid_data, VAL_BATCH_SIZE,
                                            args, device)

                print("avg_val_loss_per_token = {}".format(avg_val_loss))

                # switch to training mode
                model.train()
                # ------------------- Save the model OR Stop training ------------------- #
                if avg_val_loss < best_val_loss:
                    stop_counter = 0
                    best_val_loss = avg_val_loss
                    best_epoch = epoch

                    savepath = args[
                        'model_save_dir'] + "model-{}-ep{}.pt".format(
                            args['model_name'], epoch)
                    torch.save(model.state_dict(), savepath)
                    print("Model improved & saved at {}".format(savepath))
                else:
                    print("Model not improved #{}".format(stop_counter))
                    if stop_counter < VAL_STOP_TRAINING:
                        print(
                            "Just continue training ---- no loading old weights"
                        )
                        stop_counter += 1
                    else:
                        print(
                            "Model has not improved for {} times! Stop training."
                            .format(VAL_STOP_TRAINING))
                        return

    print("End of training hierarchical RNN model")
Exemplo n.º 7
0
class HierarchicalModel(object):
    def __init__(self, model_name, model_step=None, use_gpu=False):
        # if model_name not in ["HIER", "HIERDIV", "AMI_MT_DIV", "SPOTIFY_short", "SPOTIFY_long"]:
        #     raise ValueError("model name not exist")

        if model_name == "HIER":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_CNNDM_FEB26A-ep17.pt"
            self.load_option = 1
        elif model_name == "HIERDIV":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_CNNDMDIV_APR14A-ep02.pt"
            self.load_option = 2
        elif model_name == "AMI_MT_DIV":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-HGRUV5_APR12ALL100-ep20.pt"
            self.load_option = 1

        elif model_name == "SPOTIFY_short":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models_spotify/HGRUV5DIV_SPOTIFY_JUNE18_v4-step{}.pt".format(
                model_step)
            self.load_option = 2

        elif model_name == "SPOTIFY_long":
            self.model_path = "/home/alta/summary/pm574/summariser1/lib/trained_models_spotify/HGRUV5DIV_SPOTIFY_JUNE18_v3-step{}.pt".format(
                model_step)
            self.load_option = 2

        else:
            self.model_path = model_name
            self.load_option = 2  # new version

        args = {}
        args['vocab_size'] = 30522  # BERT tokenizer
        args['embedding_dim'] = 256  # word embeeding dimension
        args['rnn_hidden_size'] = 512  # RNN hidden size
        args['dropout'] = 0.0
        args['num_layers_enc'] = 2
        args['num_layers_dec'] = 1
        args['memory_utt'] = False

        self.device = 'cuda' if use_gpu else 'cpu'
        self.model = EncoderDecoder(args, self.device)
        self.load_model()
        self.model.eval()

    def load_model(self):
        if self.device == 'cuda':
            try:
                state = torch.load(self.model_path)
                if self.load_option == 1:
                    self.model.load_state_dict(state)
                elif self.load_option == 2:
                    model_state_dict = state['model']
                    self.model.load_state_dict(model_state_dict)
                print("load succesful #1")
            except:
                if self.load_option == 1:
                    model_state_dict = torch.load(self.model_path)
                new_model_state_dict = OrderedDict()
                for key in model_state_dict.keys():
                    new_model_state_dict[key.replace(
                        "module.", "")] = model_state_dict[key]
                self.model.load_state_dict(new_model_state_dict)
                print("load succesful #2")
        elif self.device == 'cpu':
            try:
                state = torch.load(self.model_path,
                                   map_location=torch.device('cpu'))
                if self.load_option == 1:
                    self.model.load_state_dict(state)
                elif self.load_option == 2:
                    model_state_dict = state['model']
                    self.model.load_state_dict(model_state_dict)
                print("load succesful #3")
            except:
                if self.load_option == 1:
                    model_state_dict = torch.load(
                        self.model_path, map_location=torch.device('cpu'))

                new_model_state_dict = OrderedDict()
                for key in model_state_dict.keys():
                    new_model_state_dict[key.replace(
                        "module.", "")] = model_state_dict[key]
                self.model.load_state_dict(new_model_state_dict)
                print("load succesful #4")

    def decode(self,
               tokenizer,
               batches,
               beam_width=4,
               time_step=144,
               penalty_ug=0.0,
               alpha=1.25,
               length_offset=5):
        decode_dict = {
            'k': beam_width,
            'time_step': time_step,
            'vocab_size': 30522,
            'device': self.device,
            'start_token_id': 101,
            'stop_token_id': 103,
            'alpha': alpha,
            'length_offset': length_offset,
            'penalty_ug': penalty_ug,
            'keypadmask_dtype': KEYPADMASK_DTYPE,
            'memory_utt': False,
            'batch_size': 1
        }
        summaries = [None for _ in range(len(batches))]
        for i, batch in enumerate(batches):
            summary_id = self.beam_search(batch, decode_dict)
            sentences = tokenizer.tgtids2summary(summary_id)
            summaries[i] = " ".join(sentences)
        return summaries

    def beam_search(self, batch, decode_dict):
        input = batch.input
        u_len = batch.u_len
        w_len = batch.w_len
        with torch.no_grad():
            summary_id, _, _ = self.model.decode_beamsearch(
                input, u_len, w_len, decode_dict)
        return summary_id

    def get_utt_attn_with_ref(self, enc_batch, target, tgt_len):
        # batch_size should be 1
        with torch.no_grad():
            # Teacher Forcing
            _, _, _, _, u_attn_scores = self.model(enc_batch.input,
                                                   enc_batch.u_len,
                                                   enc_batch.w_len, target)
        N = enc_batch.u_len[0].item()
        T = tgt_len[0].item()
        attention = u_attn_scores[0, :T, :N].sum(
            dim=0) / u_attn_scores[0, :T, :N].sum()
        attention = attention.cpu().numpy()
        return attention

    def get_utt_attn_without_ref(self,
                                 enc_batch,
                                 beam_width=4,
                                 time_step=144,
                                 penalty_ug=0.0,
                                 alpha=1.25,
                                 length_offset=5):
        decode_dict = {
            'k': beam_width,
            'time_step': time_step,
            'vocab_size': 30522,
            'device': self.device,
            'start_token_id': 101,
            'stop_token_id': 103,
            'alpha': alpha,
            'length_offset': length_offset,
            'penalty_ug': penalty_ug,
            'keypadmask_dtype': KEYPADMASK_DTYPE,
            'memory_utt': False,
            'batch_size': 1
        }
        # batch_size should be 1
        with torch.no_grad():

            summary_ids, attn_scores, u_attn_scores = self.model.decode_beamsearch(
                enc_batch.input, enc_batch.u_len, enc_batch.w_len, decode_dict)

        N = enc_batch.u_len[0].item()
        attention = u_attn_scores[:, :N].sum(
            dim=0) / u_attn_scores[:, :N].sum()
        attention = attention.cpu().numpy()
        return attention
Exemplo n.º 8
0
def evaluate_label_task(model_name, epoch, multitask):
    # ---------------------------------------------------------------------------------- #
    args = {}
    args['use_gpu'] = True
    args['num_utterances'] = 1500  # max no. utterance in a meeting
    args['num_words'] = 64  # max no. words in an utterance
    args['summary_length'] = 300  # max no. words in a summary
    args['summary_type'] = 'short'  # long or short summary
    args['vocab_size'] = 30522  # BERT tokenizer
    args['embedding_dim'] = 256  # word embeeding dimension
    args['rnn_hidden_size'] = 512  # RNN hidden size

    args['dropout'] = 0.5
    args['num_layers_enc'] = 2  # in total it's num_layers_enc*3 (word/utt/seg)
    args['num_layers_dec'] = 1
    args['memory_utt'] = False

    args['batch_size'] = 2
    args['random_seed'] = 444

    args[
        'model_save_dir'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/"
    args[
        'load_model'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/model-{}-ep{}.pt".format(
            model_name, epoch)
    # ---------------------------------------------------------------------------------- #

    if args['use_gpu']:
        if 'X_SGE_CUDA_DEVICE' in os.environ:  # to run on CUED stack machine
            print('running on the stack... 1 GPU')
            cuda_device = os.environ['X_SGE_CUDA_DEVICE']
            print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device))
            os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device
        else:
            print('running locally...')
            os.environ[
                "CUDA_VISIBLE_DEVICES"] = '0'  # choose the device (GPU) here
        device = 'cuda'
    else:
        device = 'cpu'

    test_data = load_ami_data('test')
    valid_data = load_ami_data('valid')

    # random seed
    random.seed(args['random_seed'])
    torch.manual_seed(args['random_seed'])
    np.random.seed(args['random_seed'])

    model = EncoderDecoder(args, device=device)

    NUM_DA_TYPES = len(DA_MAPPING)
    da_labeller = DALabeller(args['rnn_hidden_size'], NUM_DA_TYPES, device)
    ext_labeller = EXTLabeller(args['rnn_hidden_size'], device)

    if device == 'cuda': state = torch.load(args['load_model'])
    else:
        state = torch.load(args['load_model'],
                           map_location=torch.device('cpu'))
    model_state_dict = state['model']
    model.load_state_dict(model_state_dict)
    if multitask:
        da_labeller.load_state_dict(state['da_labeller'])
        ext_labeller.load_state_dict(state['ext_labeller'])

    BATCH_SIZE = args['batch_size']
    num_test_data = len(test_data)

    num_batches = int(num_test_data / BATCH_SIZE)

    idx = 0

    model = model.eval()
    da_labeller = da_labeller.eval()
    ext_labeller = ext_labeller.eval()

    da_true = 0
    da_total = 0

    ext_tp = 0
    ext_tn = 0
    ext_fp = 0
    ext_fn = 0

    for bn in range(num_batches):

        input, u_len, w_len, target, tgt_len, topic_boundary_label, dialogue_acts, extractive_label = get_a_batch(
            test_data, idx, BATCH_SIZE, args['num_utterances'],
            args['num_words'], args['summary_length'], args['summary_type'],
            device)

        # decoder target
        decoder_target, decoder_mask = shift_decoder_target(target,
                                                            tgt_len,
                                                            device,
                                                            mask_offset=True)
        decoder_target = decoder_target.view(-1)
        decoder_mask = decoder_mask.view(-1)

        decoder_output, u_output, _, _, u_attn_scores = model(
            input, u_len, w_len, target)

        # multitask(1): dialogue act prediction
        da_output = da_labeller(u_output)

        # multitask(2): extractive label prediction
        ext_output = ext_labeller(u_output).squeeze(-1)

        multitask_mask = length2mask(u_len, BATCH_SIZE, args['num_utterances'],
                                     device)

        t, total = multiclass_eval(torch.argmax(da_output, dim=-1),
                                   dialogue_acts, multitask_mask)
        da_true += t
        da_total += total

        tp, tn, fp, fn = labelling_eval(ext_output,
                                        extractive_label,
                                        multitask_mask,
                                        threshold=0.5)
        ext_tp += tp
        ext_tn += tn
        ext_fp += fp
        ext_fn += fn
        idx += BATCH_SIZE
        print("#", end='')
        sys.stdout.flush()

    print()

    print("Model:", args['load_model'])
    print("[2] ======= Dialogue Act Prediction Task =======")
    try:
        accuracy = da_true / da_total
        print("Acc: {:.4f}".format(accuracy))
    except ZeroDivisionError:
        print("zerodivision")
    print("[3] ============== Extractive Task ==============")
    accuracy = (ext_tp + ext_tn) / (ext_tp + ext_tn + ext_fp + ext_fn)
    if ext_tp + ext_fp > 0:
        precision = ext_tp / (ext_tp + ext_fp)
    else:
        precision = 0
    recall = ext_tp / (ext_tp + ext_fn)
    if precision != 0 and recall != 0:
        f1 = 2 * precision * recall / (precision + recall)
    else:
        f1 = 0
    print("Acc: {:.4f} | Pre: {:.4f} | Rec: {:.4f} | F-1: {:.4f}".format(
        accuracy, precision, recall, f1))
Exemplo n.º 9
0
def decode(start_idx, end_idx):
    # ---------------------------------------------------------------------------------- #
    args = {}
    args['use_gpu'] = False
    args['num_utterances'] = 50  # max no. utterance in a meeting
    args['num_words'] = 32  # max no. words in an utterance
    args['summary_length'] = 144  # max no. words in a summary
    args['summary_type'] = 'long'  # max no. words in a summary
    args['vocab_size'] = 30522  # BERT tokenizer
    args['embedding_dim'] = 256  # word embeeding dimension
    args['rnn_hidden_size'] = 512  # RNN hidden size

    args['dropout'] = 0.0
    args['num_layers_enc'] = 2
    args['num_layers_dec'] = 1

    args[
        'model_save_dir'] = "/home/alta/summary/pm574/summariser1/lib/trained_models2/"
    args[
        'model_data_dir'] = "/home/alta/summary/pm574/summariser1/lib/model_data/"

    args['memory_utt'] = False

    args['model_name'] = "HGRUV5_CNNDM_FEB26A"
    args['model_epoch'] = 17
    # ---------------------------------------------------------------------------------- #
    start_idx = start_idx
    batch_size = 1
    num_batches = end_idx - start_idx

    search_method = 'argmax'
    beam_width = 4
    alpha = 0.75
    penalty_ug = 0.0
    random_seed = 28
    # ---------------------------------------------------------------------------------- #
    args['summary_out_dir'] = \
    '/home/alta/summary/pm574/summariser1/out_summary_cnndm/{}-ep{}-len{}/width{}-{}-alpha{}-penalty{}/' \
        .format(args['model_name'], args['model_epoch'], args['summary_length'],
                beam_width, search_method, alpha, penalty_ug)
    # ---------------------------------------------------------------------------------- #

    if args['use_gpu']:
        if 'X_SGE_CUDA_DEVICE' in os.environ:  # to run on CUED stack machine
            print('running on the stack...')
            cuda_device = os.environ['X_SGE_CUDA_DEVICE']
            print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device))
            os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device
        else:
            print('running locally...')
            os.environ[
                "CUDA_VISIBLE_DEVICES"] = '1'  # choose the device (GPU) here
        device = 'cuda'
    else:
        device = 'cpu'
    args['device'] = device
    print("device = {}".format(device))

    # random seed
    random.seed(random_seed)
    torch.manual_seed(random_seed)
    np.random.seed(random_seed)

    # Define and Load the model
    model = EncoderDecoder(args, device)

    trained_model = args['model_save_dir'] + "model-{}-ep{}.pt".format(
        args['model_name'], args['model_epoch'])
    if device == 'cuda':
        try:
            model.load_state_dict(torch.load(trained_model))
        except:
            model_state_dict = torch.load(trained_model)
            new_model_state_dict = OrderedDict()
            for key in model_state_dict.keys():
                new_model_state_dict[key.replace("module.",
                                                 "")] = model_state_dict[key]
            model.load_state_dict(new_model_state_dict)
    elif device == 'cpu':
        try:
            model.load_state_dict(
                torch.load(trained_model, map_location=torch.device('cpu')))
        except:
            model_state_dict = torch.load(trained_model,
                                          map_location=torch.device('cpu'))
            new_model_state_dict = OrderedDict()
            for key in model_state_dict.keys():
                new_model_state_dict[key.replace("module.",
                                                 "")] = model_state_dict[key]
            model.load_state_dict(new_model_state_dict)
        # model.load_state_dict(torch.load(trained_model, map_location=torch.device('cpu')))

    model.eval()  # switch it to eval mode
    print("Restored model from {}".format(trained_model))

    # Load and prepare data
    test_data = load_cnndm_data(args, 'test', dump=False)

    print("========================================================")
    print("start decoding: idx [{},{})".format(
        start_idx, start_idx + batch_size * num_batches))
    print("========================================================")

    with torch.no_grad():
        print("beam_width = {}".format(beam_width))
        decoding(model,
                 test_data,
                 args,
                 start_idx,
                 batch_size,
                 num_batches,
                 k=beam_width,
                 search_method=search_method,
                 alpha=alpha,
                 penalty_ug=penalty_ug)
    print("finish decoding: idx [{},{})".format(
        start_idx, start_idx + batch_size * num_batches))