Exemplo n.º 1
0
        os.makedirs(output_path)

    args.output_file = os.path.join(output_path, suffix)
    if args.started_sentence_id==1 and os.path.exists(args.output_file):
        os.remove(args.output_file)
    print('The output file is ', args.output_file)

    args.input_file = os.path.join(args.input_file, f'''{args.dataset}/{args.keywords}keywords.txt''')

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("device:", device)

    if args.random==0:
        classifier_model_path = '../checkpoints/xlnet_classifier/{}'.format(args.dataset)
        args.classifier_model_path = classifier_model_path
        classifier_model = XLNetForTokenClassification.from_pretrained(classifier_model_path,num_labels=4)
        classifier_model_tokenizer = XLNetTokenizer.from_pretrained(classifier_model_path)

        logger.logger.info('Initialize backward XLNetForTokenClassification from checkpoint {}.'.format(classifier_model_path))
        classifier_model = classifier_model.to(device)
        classifier_model.eval()
    else:
        classifier_model = None
        classifier_model_tokenizer = None


    if args.model_name == 'LSTMLMGenerate':
        forward_lm_path = '../checkpoints/forward_lstm_lm/{}/best.pt'.format(args.dataset)
        backward_lm_path = '../checkpoints/backward_lstm_lm/{}/best.pt'.format(args.dataset)
        args.forward_lm_path = forward_lm_path
        args.backward_lm_path = backward_lm_path
Exemplo n.º 2
0
    for i, j in enumerate(sep_sentence[1:-2]):
        out[i + pad_num + 1] = max(char_label[current_idx:current_idx +
                                              len(j)])

        if j == "<unk>":
            current_idx = current_idx + 1
        else:
            current_idx = current_idx + len(j)

    return out.tolist()


config = AutoConfig.from_pretrained(model_path)
tokenizer = XLNetTokenizer.from_pretrained(model_path, unk_token=unk_token)
model = XLNetForTokenClassification.from_pretrained(model_path, num_labels=13)

if torch.cuda.is_available():
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
# device = torch.device("cpu")

model.to(device)

train_input_ids = []
train_labels = []
train_masks = []
Exemplo n.º 3
0
    args.model_path = model_path
    args.log_path = log_path

    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_file = '{}/{}.log'.format(log_path, args.dataset)
    print('The log file is ', log_file)
    logger = Logger(log_file)
    logger.logger.info(args)

    if not os.path.exists(model_path):
        os.makedirs(model_path)
    try:
        # load the pre-trained model and tokenizer
        tokenizer = XLNetTokenizer.from_pretrained(args.model_path)
        model = XLNetForTokenClassification.from_pretrained(
            args.model_path, num_labels=args.num_labels)
        logger.logger.info('Initialize XLNet from checkpoint {}.'.format(
            args.model_path))
    except:
        tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
        model = XLNetForTokenClassification.from_pretrained(
            'xlnet-base-cased', num_labels=args.num_labels)
        logger.logger.info('Initialize XLNet with default parameters.')

    model = XLNetClassifier(model)
    """
    copy: 0
    replace: 1
    insert: 2
    delete: 3
    """