示例#1
0
def load_model(save_dir,
               model_file='model.pt',
               text_encoder_file='text_encoder.pkl',
               label_encoder_file='label_encoder.pkl'):

    model = DoubleHeadModel.load_from_file(join(save_dir, model_file))
    with open(join(save_dir, text_encoder_file), 'rb') as f:
        text_encoder = pickle.load(f)
    with open(join(save_dir, label_encoder_file), 'rb') as f:
        label_encoder = pickle.load(f)

    return model, text_encoder, label_encoder
                              for x in vaX] + [len(x[:max_len])
                                               for x in teX]) + 3, n_ctx)

    vocab = n_vocab + n_special + n_ctx
    training_engine = TrainingEngine()
    trX, trM = training_engine.transform_veracity(trX)
    vaX, vaM = training_engine.transform_veracity(vaX)
    if submit:
        teX, teM = training_engine.transform_veracity(teX)

    n_train = len(trY)
    n_valid = len(vaY)
    n_batch_train = args.n_batch * max(n_gpu, 1)
    n_updates_total = (n_train // n_batch_train) * args.n_iter

    dh_model = DoubleHeadModel(args, clf_token, ('classification', 3), vocab,
                               n_ctx)

    criterion = nn.CrossEntropyLoss(reduction='none')
    model_opt = OpenAIAdam(dh_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
    compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion,
                                                 args.lm_coef, model_opt)
示例#3
0
        ] + [
            len(x1[:max_len]) + max(len(x2[:max_len]), len(x3[:max_len]))
            for x1, x2, x3 in zip(teX1, teX2, teX3)
        ]) + 3, n_ctx)
    vocab = n_vocab + n_special + n_ctx
    trX, trM = transform_roc(trX1, trX2, trX3)
    vaX, vaM = transform_roc(vaX1, vaX2, vaX3)
    if submit:
        teX, teM = transform_roc(teX1, teX2, teX3)

    n_train = len(trY)
    n_valid = len(vaY)
    n_batch_train = args.n_batch * max(n_gpu, 1)
    n_updates_total = (n_train // n_batch_train) * args.n_iter

    dh_model = DoubleHeadModel(args, clf_token, 'multiple_choice', vocab,
                               n_ctx)

    criterion = nn.CrossEntropyLoss(reduce=False)
    model_opt = OpenAIAdam(dh_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
    compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion,
                                                 args.lm_coef, model_opt)
        start_token = n_vocab
        clf_token = n_vocab + 1
        n_special = 2
        max_len = 140
        n_ctx = max_len + 2

        vocab = n_vocab + n_special + n_ctx
        trX, trM = transform_tweet(trX1)
        n_train = len(trY)

        n_batch_train = args.n_batch * max(n_gpu, 1)
        n_updates_total = (n_train // n_batch_train) * args.n_iter

        print("updates total", n_updates_total)

        dh_model = DoubleHeadModel(args, clf_token, ('classification', 2), vocab, n_ctx)

        criterion = nn.CrossEntropyLoss(reduce=False)
        model_opt = OpenAIAdam(dh_model.parameters(),
                               lr=args.lr,
                               schedule=args.lr_schedule,
                               warmup=args.lr_warmup,
                               t_total=n_updates_total,
                               b1=args.b1,
                               b2=args.b2,
                               e=args.e,
                               l2=args.l2,
                               vector_l2=args.vector_l2,
                               max_grad_norm=args.max_grad_norm)
        compute_loss_fct = MultipleChoiceLossCompute(criterion,
                                                     criterion,
示例#5
0
                    attn_pdrop=args.attn_pdrop,
                    resid_pdrop=args.resid_pdrop,
                    afn=args.afn,
                    clf_pdrop=args.clf_pdrop,
                    skip_connections=args.skip_connections,
                )),
            clf_token=clf_token,
            task_head_type=['classification', n_class],
            vocab=vocab,
            n_ctx=n_ctx,
        ),
        encoder=dict(max_len=max_len, ),
    )
    print(meta)

    dh_model = DoubleHeadModel(**meta['dh_model'])
    if args.snapshot_dir is not None:
        dh_model.to(device)
        dh_model = nn.DataParallel(dh_model)
        print("Loading snapshot...")
        snapshot_dict = torch.load(
            os.path.join(args.snapshot_dir, 'best_params'))
        if args.snapshot_mode == 'transformer_only':
            model_dict = dh_model.state_dict()
            model_dict.update({
                k: v
                for k, v in snapshot_dict.items() if 'task_head' not in k
            })
            snapshot_dict = model_dict
        dh_model.load_state_dict(snapshot_dict)
    else:
示例#6
0
    seq = sorted(seq, key=lambda x: len(x))

    #Setup Model
    encoder['_start_'] = len(encoder)
    encoder['_delimiter_'] = len(encoder)
    encoder['_classify_'] = len(encoder)
    clf_token = encoder['_classify_']

    n_special = 3
    n_ctx = np.array([len(t) for t in seq]).max() + 2
    n_ctx = int(n_ctx)

    print(n_ctx)

    vocab = int(n_vocab + n_special + n_ctx)
    dh_model = DoubleHeadModel(args, clf_token, ('classification', 1), vocab,
                               n_ctx)
    load_openai_pretrained_model(dh_model.transformer,
                                 n_ctx=n_ctx,
                                 n_special=n_special)

    if GPU:
        dh_model = dh_model.cuda()  #.half()

    if half:
        dh_model = dh_model.half()

    #dh_model = dh_model.type(torch.cuda.HalfTensor)

    #for layer in dh_model.modules():
    #    if isinstance(layer, LayerNorm):
    #        layer.float()
示例#7
0
        x1 = dev_queries[i]
        for j in range(10):
            x2 = dev_passages[j][i]
            tmp_length.append(len(x1[:q_max_len]) + len(x2[:p_max_len]))
        t.append(max(tmp_length))

    n_ctx = min(max(t) + 3, n_ctx)
    print('n_ctx is: ', n_ctx)
    vocab = n_vocab + n_special + n_ctx

    n_train = len(train_queries)
    n_valid = len(dev_queries)
    n_batch_train = args.n_batch * max(n_gpu, 1)
    n_updates_total = (n_train // n_batch_train) * args.n_iter

    dh_model = DoubleHeadModel(args, clf_token, 'msmarco_para_select', vocab,
                               n_ctx)

    criterion_lm = nn.CrossEntropyLoss(reduce=False)
    criterion_clf = nn.KLDivLoss()
    model_opt = OpenAIAdam(dh_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
    load_openai_pretrained_model(dh_model.transformer,
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input_file', required=True)
    parser.add_argument('-o', '--output_file', required=True)
    parser.add_argument('--n_batch', type=int, default=8)
    parser.add_argument('--skip_preprocess', action='store_true')
    parser.add_argument('--sentence_pair', action='store_true')
    parser.add_argument('--force_delimiter', action='store_true')
    parser.add_argument('--encoder_path', type=str, default='model/encoder_bpe_40000.json')
    parser.add_argument('--bpe_path', type=str, default='model/vocab_40000.bpe')
    parser.add_argument('--model_dir', required=True)
    parser.add_argument('--mc_dropout_iter', type=int, default=0)
    args = parser.parse_args()

    meta = json.load(open(os.path.join(args.model_dir, 'meta.json'), 'r', encoding='utf8'))

    text_encoder = TextEncoder(args.encoder_path, args.bpe_path)
    encoder = text_encoder.encoder
    n_vocab = len(text_encoder.encoder)

    encoder['_start_'] = len(encoder)
    if args.sentence_pair or args.force_delimiter:
        encoder['_delimiter_'] = len(encoder)
    encoder['_classify_'] = len(encoder)
    clf_token = encoder['_classify_']
    n_special = 2 + int('_delimiter_' in encoder)
    n_ctx = meta['dh_model']['n_ctx']
    max_len = meta['encoder']['max_len']
    if args.sentence_pair:
        max_len = min(max_len, n_ctx // 2 - 2)

    texts, labels = load_headerless_tsv(args.input_file, sentence_pair=args.sentence_pair)
    ((X, Y),) = encode_dataset(*[(texts, labels)],
                               encoder=text_encoder,
                               skip_preprocess=args.skip_preprocess)

    X, M = transform_classification(X, max_len, encoder['_start_'], clf_token,
                                    n_vocab, n_special, n_ctx, encoder.get('_delimiter_'))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    n_batch_train = args.n_batch * max(n_gpu, 1)

    meta['dh_model']['cfg'] = dotdict(meta['dh_model']['cfg'])
    dh_model = DoubleHeadModel(**meta['dh_model'])
    dh_model.to(device)
    dh_model = torch.nn.DataParallel(dh_model)
    path = os.path.join(args.model_dir, 'best_params')
    if device == torch.device('cpu'):
        map_location = lambda storage, loc: storage
    else:
        map_location = None

    dh_model.load_state_dict(torch.load(path, map_location=map_location))
    prediction_output = predict(X=X,
                                submission_dir=None,
                                filename=None,
                                pred_fn=lambda x: x,
                                label_decoder=None,
                                dh_model=dh_model,
                                n_batch_train=n_batch_train,
                                device=device)

    predictions = np.argmax(prediction_output, axis=1)
    if type(texts) is tuple:
        df = pd.DataFrame({'question': texts[0], 'text': texts[1], 'label': labels, 'prediction': predictions})
    else:
        df = pd.DataFrame({'text': texts, 'label': labels, 'prediction': predictions})
    df.to_csv(args.output_file,
              index=False,
              sep='\t',
              header=False,
              columns=['text', 'label', 'prediction'],
              float_format='%.0f')

    accuracy = accuracy_score(Y, predictions) * 100.
    print('Accuracy: {}%'.format(accuracy))

    basename = os.path.splitext(args.output_file)[0]

    prediction_output_file = basename + '_output.npy'
    np.savetxt(prediction_output_file, prediction_output)
    prediction_probs = np_softmax(prediction_output)
    prediction_probs_file = basename + '_probs.npy'
    np.savetxt(prediction_probs_file, prediction_probs)

    mc_dropout_prediction_output = []
    for _ in tqdm(range(args.mc_dropout_iter)):
        prediction_output = predict(X=X,
                                    submission_dir=None,
                                    filename=None,
                                    pred_fn=lambda x: x,
                                    label_decoder=None,
                                    dh_model=dh_model,
                                    n_batch_train=n_batch_train,
                                    device=device,
                                    enable_dropout=True)
        mc_dropout_prediction_output.append(prediction_output)

    if mc_dropout_prediction_output:
        mc_dropout_prediction_output = np.asarray(mc_dropout_prediction_output)
        mc_dropout_prediction_probs = np.zeros(mc_dropout_prediction_output.shape)
        for i in range(mc_dropout_prediction_output.shape[0]):
            mc_dropout_prediction_probs[i, ...] = np_softmax(mc_dropout_prediction_output[i, ...])

        transpose_dims = (2, 1, 0)
        mc_dropout_prediction_output = mc_dropout_prediction_output.transpose(transpose_dims)
        mc_dropout_prediction_probs = mc_dropout_prediction_probs.transpose(transpose_dims)
        for i in range(mc_dropout_prediction_output.shape[0]):
            prediction_output_file = '{}_class{}_{}'.format(basename, i, 'output.npy')
            np.savetxt(prediction_output_file, mc_dropout_prediction_output[i, ...])
            prediction_probs_file = '{}_class{}_{}'.format(basename, i, 'probs.npy')
            np.savetxt(prediction_probs_file, mc_dropout_prediction_probs[i, ...])