示例#1
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.static_embedding = nn.Embedding(25419, config.hidden_size) # (num_entities, hidden_size)
        self.scorer = nn.CosineSimilarity()
        self.init_weights()
示例#2
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        # self.scorer = nn.CosineSimilarity()
        self.el_criterion = CrossEntropyLoss()
        self.init_weights()
示例#3
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = 10
        self.mention_boundary_embeddings = nn.Embedding(
            config.type_vocab_size, config.hidden_size)
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, 1)

        self.init_weights()
示例#4
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.start_tag_idx = config.num_labels
        self.stop_tag_idx = config.num_labels + 1
        self.tagset_size = config.num_labels + 2

        self.bert = BertModel(config)
        # self.bert.init_weights()
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.tagset_size)

        # Matrix of transition parameters.  Entry i,j is the score of
        # transitioning *to* i *from* j.
        self.transitions = nn.Parameter(
            torch.randn(self.tagset_size, self.tagset_size))

        # These two statements enforce the constraint that we never transfer
        # to the start tag and we never transfer from the stop tag
        self.transitions.data[self.start_tag_idx, :] = -10000
        self.transitions.data[:, self.stop_tag_idx] = -10000
示例#5
0
    def __init__(self, config, dec_config, n_op, n_domain,
                 update_id, mask_word_id, eos_id, pad_id, val_sep_id,
                 type_vocab_size, exclude_domain=False):
        super(TransformerDST, self).__init__(config)

        self.val_sep_id = val_sep_id  # TODO: v2 special
        print("### word index of '-', ", self.val_sep_id)

        self.hidden_size = config.hidden_size
        self.n_op = n_op
        self.update_id = update_id
        self.mask_word_id = mask_word_id

        self.bert = BertModel(config, type_vocab_size)

        # predictor
        self.encoder = Encoder(config, self.bert, n_op, n_domain, update_id, exclude_domain)

        self.decoder = BertForSeq2SeqDecoder(config, dec_config, self.bert, self.bert.embeddings.word_embeddings.weight, mask_word_id, eos_id, pad_id)

        self.apply(self.init_weights)
示例#6
0
    def __init__(self, opt):
        self.opt = opt
        out_file = './stat/{}_{}_domain{}_adv{}_aux{}_resplit{}_epoch{}'.format(
            self.opt.model_name, self.opt.dataset, self.opt.domain,
            str(self.opt.adv), str(self.opt.aux), str(self.opt.resplit),
            (self.opt.num_epoch))
        print(out_file)
        if 'bert' in opt.model_name:
            # if opt.model_name == 'bert_kg':
            #     tokenizer = Tokenizer4Bert(opt.max_seq_len, opt.pretrained_bert_name)
            #     bert = BertForTokenClassification.from_pretrained('ernie_base')
            #     self.model = opt.model_class(bert, opt).to(opt.device)
            #     self.model.to(opt.device)
            if opt.model_name == 'lcf_bert':
                from pytorch_transformers import BertModel, BertForTokenClassification, BertConfig
                tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                           opt.pretrained_bert_name)
                config = BertConfig.from_pretrained(opt.pretrained_bert_name,
                                                    output_attentions=False)
                bert = BertModel.from_pretrained(opt.pretrained_bert_name,
                                                 config=config)
                self.model = opt.model_class(bert, opt).to(opt.device)
            elif opt.model_name == 'bert':
                from pytorch_transformers import BertModel, BertForTokenClassification, BertConfig

                tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                           opt.pretrained_bert_name)
                config = BertConfig.from_pretrained(opt.pretrained_bert_name,
                                                    output_attentions=True)
                bert = BertModel.from_pretrained(opt.pretrained_bert_name,
                                                 config=config)
                self.model = opt.model_class(bert, opt).to(opt.device)
            elif opt.model_name in ['bert_spc', 'td_bert']:
                from pytorch_transformers import BertModel, BertForTokenClassification, BertConfig

                tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                           opt.pretrained_bert_name)
                config = BertConfig.from_pretrained(opt.pretrained_bert_name,
                                                    output_attentions=True)
                bert = BertModel.from_pretrained(opt.pretrained_bert_name,
                                                 config=config)
                self.model = opt.model_class(bert, opt).to(opt.device)
                # self.model.load_state_dict(torch.load('./state_dict/bert_multi_target_val_acc0.7714'))
            elif opt.model_name == 'bert_label':
                tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                           opt.pretrained_bert_name)
                config = BertConfig.from_pretrained(opt.pretrained_bert_name,
                                                    output_attentions=True)
                bert = BertModel.from_pretrained(opt.pretrained_bert_name,
                                                 config=config)
                self.model = opt.model_class(bert, opt).to(opt.device)
            elif opt.model_name == 'bert_compete':
                tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                           opt.pretrained_bert_name)
                config = BertConfig.from_pretrained(opt.pretrained_bert_name,
                                                    output_attentions=True)
                bert = BertModel.from_pretrained(opt.pretrained_bert_name,
                                                 config=config)

                num_added_tokens = tokenizer.add_tokens(
                    ['[aspect_b]', '[aspect_e]'])
                bert.resize_token_embeddings(len(tokenizer.tokenizer))
                self.model = opt.model_class(bert, opt).to(opt.device)
            else:
                from modeling_bert import BertModel, BertForTokenClassification, BertConfig
                # bert_mulit_target
                tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                           opt.pretrained_bert_name)
                config = BertConfig.from_pretrained(opt.pretrained_bert_name,
                                                    output_attentions=True)
                bert = BertModel.from_pretrained(opt.pretrained_bert_name,
                                                 config=config)
            if opt.domain == 'pt':
                bert = BertModel.from_pretrained(
                    './bert_models/pt_bert-base-uncased_amazon_yelp')
            if opt.domain == 'joint':
                bert = BertModel.from_pretrained(
                    './bert_models/laptops_and_restaurants_2mio_ep15')
            if opt.domain == 'res':
                bert = BertModel.from_pretrained(
                    './bert_models/restaurants_10mio_ep3')
            if opt.domain == 'laptop':
                bert = BertModel.from_pretrained(
                    './bert_models/laptops_1mio_ep30')
            if opt.domain == 'ernie':
                bert = BertModel.from_pretrained(
                    './bert_models/ERNIE_Base_en_stable-2.0.0_pytorch')

            # num_added_tokens = tokenizer.add_tokens(['[target_b]','[target_e]'])
            # num_added_tokens = tokenizer.add_tokens(['[aspect_b]','[aspect_e]'])
            for i in range(20):
                b = '[' + str(i) + 'b]'
                e = '[' + str(i) + 'e]'
                num_added_tokens = tokenizer.add_tokens([b, e])
            bert.resize_token_embeddings(len(tokenizer.tokenizer))
            self.model = opt.model_class(bert, opt).to(opt.device)
            # self.model.load_state_dict(torch.load('./state_dict/state_dict/bert_multi_target_restaurant_doamin-res_can0_adv0_aux1.0_val_acc0.8688'))

        else:
            tokenizer = build_tokenizer(
                fnames=[opt.dataset_file['train'], opt.dataset_file['test']],
                max_seq_len=opt.max_seq_len,
                dat_fname='{0}_tokenizer.dat'.format(opt.dataset))
            embedding_matrix = build_embedding_matrix(
                word2idx=tokenizer.word2idx,
                embed_dim=opt.embed_dim,
                dat_fname='{0}_{1}_embedding_matrix.dat'.format(
                    str(opt.embed_dim), opt.dataset))
            self.model = opt.model_class(embedding_matrix, opt).to(opt.device)

        self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer,
                                    'train', opt)
        self.testset = ABSADataset(opt.dataset_file['test'], tokenizer, 'test',
                                   opt)
        if int(opt.resplit) == 0:
            valset_ratio = 0.05
        assert 0 <= opt.valset_ratio < 1
        if opt.valset_ratio > 0:
            valset_len = int(len(self.trainset) * opt.valset_ratio)
            self.trainset, self.valset = random_split(
                self.trainset, (len(self.trainset) - valset_len, valset_len))
        else:
            if int(self.opt.resplit) == 1 or int(self.opt.resplit) == 2:
                self.valset = ABSADataset('valid', tokenizer, 'valid', opt)
            else:
                self.valset = self.testset

        if opt.device.type == 'cuda':
            logger.info('cuda memory allocated: {}'.format(
                torch.cuda.memory_allocated(device=opt.device.index)))

        # if opt.load_mode == 1:
        # self.model.load_state_dict(torch.load('/home/nus/temp/ABSA-PyTorch/state_dict/bert_spc_twitter_val_acc0.7384'))
        # find the highese
        # model.load_state_dict(torch.load(PATH))
        self._print_args()
示例#7
0
 def __init__(self, config):
     super().__init__(config)
     self.bert = BertModel(config)
示例#8
0
def main():
    # Required parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', dest='data_dir', required=True,
                        help='Which directory contains a {train,val,test}.jsonl file?')
    parser.add_argument('--output_dir', dest='output_dir', required=True,
                        help='Where shall we write intermediate models + final data to?')
    parser.add_argument('--model_params', dest='model_params', required=True,
                        help='JSoN file for loading arbitrary model parameters (e.g. optimizers, pre-saved files, etc.')
    parser.add_argument("--model_type", default=None, type=str, required=True,
                        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
    parser.add_argument("--cache_dir", default="", type=str,
                        help="Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument("--do_train", action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval", action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--eval_split", type=str, default="val")
    parser.add_argument("--evaluate_during_training", action='store_true',
                        help="Rul evaluation during training at each logging step.")
    parser.add_argument("--debug", action="store_true", default=False)
    parser.add_argument("--tf_summary", action="store_true", default=False)
    parser.add_argument("--out_domain", action="store_true", default=False)
    parser.add_argument("--random_evidence", action="store_true", default=False)
    parser.add_argument("--low_resource", action="store_true", default=False)

    # Input parameters
    parser.add_argument("--max_seq_length", default=384, type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. Sequences "
                             "longer than this will be truncated, and sequences shorter than this will be padded.")
    parser.add_argument("--max_query_length", default=64, type=int)
    # Variants of baselines that changes what input is loaded
    parser.add_argument("--full_doc", action="store_true", default=False)
    parser.add_argument("--gold_evidence", action="store_true", default=False)
    parser.add_argument("--focus_attention", action="store_true", default=False)
    parser.add_argument("--pal_attention", action="store_true", default=False)
    parser.add_argument("--multitask", action="store_true", default=False)
    parser.add_argument("--predicted_train_evidence_file", type=str, default=None)
    parser.add_argument("--predicted_eval_evidence_file", type=str, default=None)
    parser.add_argument("--gamma", type=float, default=1.0, help="How much gold to feed to the supervision branch")

    # Training parameters
    parser.add_argument("--per_gpu_train_batch_size", default=8, type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument("--learning_rate", default=5e-5, type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--wait_step", default=5, type=int)
    parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--weight_decay", default=0.0, type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs", default=50.0, type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--max_steps", default=-1, type=int,
                        help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
    parser.add_argument("--warmup_proportion", default=0.0, type=float,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")

    # Logging
    parser.add_argument('--logging_steps', type=int, default=50,
                        help="Log every X updates steps.")
    parser.add_argument('--save_steps', type=int, default=200,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument("--eval_all_checkpoints", action='store_true',
                        help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number")
    parser.add_argument("--no_cuda", action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--overwrite_output_dir', action='store_true',
                        help="Overwrite the content of the output directory")
    parser.add_argument('--overwrite_cache', action='store_true',
                        help="Overwrite the cached training and evaluation sets")
    parser.add_argument('--seed', type=int, default=42,
                        help="random seed for initialization")

    # Multi-GPU
    parser.add_argument("--local_rank", type=int, default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--fp16', action='store_true',
                        help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit")
    parser.add_argument('--fp16_opt_level', type=str, default='O1',
                        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
                             "See details at https://nvidia.github.io/apex/amp.html")
    parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
    parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")


    args = parser.parse_args()

    # Parse model args json
    with open(args.model_params, 'r') as fp:
        logging.debug(f'Loading model parameters from {args.model_params}')
        model_params = json.load(fp)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir:
        raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(args.output_dir))

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend='nccl')
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt = '%m/%d/%Y %H:%M:%S',
                        level = logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
                    args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16)

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(model_params["config_name"] if model_params["config_name"] else model_params["model_name_or_path"])
    config.num_labels = len(model_params['classes'])
    tokenizer = tokenizer_class.from_pretrained(model_params["tokenizer_name"] if model_params["tokenizer_name"] else model_params["model_name_or_path"],
                                                do_lower_case=model_params["do_lower_case"])
    model = model_class.from_pretrained(model_params["model_name_or_path"], from_tf=bool('.ckpt' in model_params["model_name_or_path"]),
                                        config=config)
    if args.pal_attention:
        model.bert_pal = BertModel.from_pretrained(model_params["model_name_or_path"], config=config)

    if args.local_rank == 0:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    if args.do_train:
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)
        model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        train_dataset, _ = load_and_cache_examples(args, model_params, tokenizer, evaluate=False, split="train", output_examples=False)
        global_step, tr_loss = train(args, model_params, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)

    if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))
        # model = model_class.from_pretrained(args.output_dir)
        # tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=model_params["do_lower_case"])
        # model.to(args.device)

    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        checkpoints = [args.output_dir + "/best_model"]
        if args.eval_all_checkpoints:
            checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
            logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN)  # Reduce model loading logs

        logger.info("Evaluate the following checkpoints: %s", checkpoints)

        for checkpoint in checkpoints:
            # Reload the model
            global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
            model = model_class.from_pretrained(checkpoint, force_download=True)
            #if args.pal_attention:
            #    # TODO: Wrong since we need to initialize using exact params?
            #    model.bert_pal = BertModel.from_pretrained(model_params["model_name_or_path"], config=config)
            model.to(args.device)

            # Evaluate
            result = evaluate(args, model_params, model, tokenizer, prefix=global_step, output_examples=True, split=args.eval_split)
            results = {"Best F1":result[0], "Best Accuracy":result[1]}

    logger.info("Results on the split {} : {}".format(args.eval_split, results))
    return results