Exemplo n.º 1
0
def load_bert(model_path="bert/model/pytorch_model.bin",
              config_file="bert/config_parameters/config.json"):
    print("Loading BERT-model...")
    config = BertConfig(config_file)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(
        torch.load(model_path, map_location=torch.device("cpu")))
    print("Model loaded.\n\n")
    return model
def test_BertForQuestionAnswering():
    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
    config = BertConfig(vocab_size_or_config_json_file=32000,
                        hidden_size=768,
                        num_hidden_layers=12,
                        num_attention_heads=12,
                        intermediate_size=3072)
    model = BertForQuestionAnswering(config)
    print(model(input_ids, token_type_ids, input_mask))
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--bert_model", default=None, type=str, required=True,
                        help="Bert pre-trained model selected in the list: bert-base-uncased, "
                        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
                        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--output_dir", default=None, type=str, required=True,
                        help="The output directory where the model checkpoints and predictions will be written.")

    ## Other parameters
    parser.add_argument("--train_file", default=None, type=str, help="SQuAD json for training. E.g., train-v1.1.json")
    parser.add_argument("--predict_file", default=None, type=str,
                        help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json")
    parser.add_argument("--max_seq_length", default=384, type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. Sequences "
                             "longer than this will be truncated, and sequences shorter than this will be padded.")
    parser.add_argument("--doc_stride", default=128, type=int,
                        help="When splitting up a long document into chunks, how much stride to take between chunks.")
    parser.add_argument("--max_query_length", default=64, type=int,
                        help="The maximum number of tokens for the question. Questions longer than this will "
                             "be truncated to this length.")
    parser.add_argument("--do_train", action='store_true', help="Whether to run training.")
    parser.add_argument("--do_predict", action='store_true', help="Whether to run eval on the dev set.")
    parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.")
    parser.add_argument("--predict_batch_size", default=8, type=int, help="Total batch size for predictions.")
    parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs", default=3.0, type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--warmup_proportion", default=0.1, type=float,
                        help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% "
                             "of training.")
    parser.add_argument("--n_best_size", default=20, type=int,
                        help="The total number of n-best predictions to generate in the nbest_predictions.json "
                             "output file.")
    parser.add_argument("--max_answer_length", default=30, type=int,
                        help="The maximum length of an answer that can be generated. This is needed because the start "
                             "and end predictions are not conditioned on one another.")
    parser.add_argument("--verbose_logging", action='store_true',
                        help="If true, all of the warnings related to data processing will be printed. "
                             "A number of warnings are expected for a normal SQuAD evaluation.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument('--gradient_accumulation_steps',
                        type=int,
                        default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--do_lower_case",
                        action='store_true',
                        help="Whether to lower case the input text. True for uncased models, False for cased models.")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--fp16',
                        action='store_true',
                        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument('--loss_scale',
                        type=float, default=0,
                        help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
                             "0 (default value): dynamic loss scaling.\n"
                             "Positive power of 2: static loss scaling value.\n")
    parser.add_argument('--version_2_with_negative',
                        action='store_true',
                        help='If true, the SQuAD examples contain some that do not have an answer.')
    parser.add_argument('--null_score_diff_threshold',
                        type=float, default=0.0,
                        help="If null_score - best_non_null is greater than the threshold predict null.")
    args = parser.parse_args()

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
        device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
                            args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if not args.do_train and not args.do_predict:
        raise ValueError("At least one of `do_train` or `do_predict` must be True.")

    if args.do_train:
        if not args.train_file:
            raise ValueError(
                "If `do_train` is True, then `train_file` must be specified.")
    if args.do_predict:
        if not args.predict_file:
            raise ValueError(
                "If `do_predict` is True, then `predict_file` must be specified.")

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train:
        raise ValueError("Output directory () already exists and is not empty.")
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_optimization_steps = None
    if args.do_train:
        train_examples = read_squad_examples(
            input_file=args.train_file, is_training=True, version_2_with_negative=args.version_2_with_negative)
        num_train_optimization_steps = int(
            len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs
        if args.local_rank != -1:
            num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size()

    # Prepare model
    model = BertForQuestionAnswering.from_pretrained(args.bert_model,
                cache_dir=os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, 'distributed_{}'.format(args.local_rank)))

    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

        model = DDP(model)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())

    # hack to remove pooler, which is not used
    # thus it produce None grad that break apex
    param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

    if args.fp16:
        try:
            from apex.optimizers import FP16_Optimizer
            from apex.optimizers import FusedAdam
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

        optimizer = FusedAdam(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              bias_correction=False,
                              max_grad_norm=1.0)
        if args.loss_scale == 0:
            optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
        else:
            optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
    else:
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.learning_rate,
                             warmup=args.warmup_proportion,
                             t_total=num_train_optimization_steps)

    global_step = 0
    if args.do_train:
        cached_train_features_file = args.train_file+'_{0}_{1}_{2}_{3}'.format(
            list(filter(None, args.bert_model.split('/'))).pop(), str(args.max_seq_length), str(args.doc_stride), str(args.max_query_length))
        train_features = None
        try:
            with open(cached_train_features_file, "rb") as reader:
                train_features = pickle.load(reader)
        except:
            train_features = convert_examples_to_features(
                examples=train_examples,
                tokenizer=tokenizer,
                max_seq_length=args.max_seq_length,
                doc_stride=args.doc_stride,
                max_query_length=args.max_query_length,
                is_training=True)
            if args.local_rank == -1 or torch.distributed.get_rank() == 0:
                logger.info("  Saving train features into cached file %s", cached_train_features_file)
                with open(cached_train_features_file, "wb") as writer:
                    pickle.dump(train_features, writer)
        logger.info("***** Running training *****")
        logger.info("  Num orig examples = %d", len(train_examples))
        logger.info("  Num split examples = %d", len(train_features))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
        all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.long)
        all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
                                   all_start_positions, all_end_positions)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)

        model.train()
        for _ in trange(int(args.num_train_epochs), desc="Epoch"):
            for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
                if n_gpu == 1:
                    batch = tuple(t.to(device) for t in batch) # multi-gpu does scattering it-self
                input_ids, input_mask, segment_ids, start_positions, end_positions = batch
                loss = model(input_ids, segment_ids, input_mask, start_positions, end_positions)
                if n_gpu > 1:
                    loss = loss.mean() # mean() to average on multi-gpu.
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        # modify learning rate with special warm up BERT uses
                        # if args.fp16 is False, BertAdam is used and handles this automatically
                        lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion)
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr_this_step
                    optimizer.step()
                    optimizer.zero_grad()
                    global_step += 1

    if args.do_train:
        # Save a trained model and the associated configuration
        model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
        output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
        torch.save(model_to_save.state_dict(), output_model_file)
        output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
        with open(output_config_file, 'w') as f:
            f.write(model_to_save.config.to_json_string())

        # Load a trained model and config that you have fine-tuned
        config = BertConfig(output_config_file)
        model = BertForQuestionAnswering(config)
        model.load_state_dict(torch.load(output_model_file))
    else:
        model = BertForQuestionAnswering.from_pretrained(args.bert_model)

    model.to(device)

    if args.do_predict and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        eval_examples = read_squad_examples(
            input_file=args.predict_file, is_training=False, version_2_with_negative=args.version_2_with_negative)
        eval_features = convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            doc_stride=args.doc_stride,
            max_query_length=args.max_query_length,
            is_training=False)

        logger.info("***** Running predictions *****")
        logger.info("  Num orig examples = %d", len(eval_examples))
        logger.info("  Num split examples = %d", len(eval_features))
        logger.info("  Batch size = %d", args.predict_batch_size)

        all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
        all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size)

        model.eval()
        all_results = []
        logger.info("Start evaluating")
        for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating"):
            if len(all_results) % 1000 == 0:
                logger.info("Processing example: %d" % (len(all_results)))
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            with torch.no_grad():
                batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask)
            for i, example_index in enumerate(example_indices):
                start_logits = batch_start_logits[i].detach().cpu().tolist()
                end_logits = batch_end_logits[i].detach().cpu().tolist()
                eval_feature = eval_features[example_index.item()]
                unique_id = int(eval_feature.unique_id)
                all_results.append(RawResult(unique_id=unique_id,
                                             start_logits=start_logits,
                                             end_logits=end_logits))
        output_prediction_file = os.path.join(args.output_dir, "predictions.json")
        output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json")
        output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json")
        write_predictions(eval_examples, eval_features, all_results,
                          args.n_best_size, args.max_answer_length,
                          args.do_lower_case, output_prediction_file,
                          output_nbest_file, output_null_log_odds_file, args.verbose_logging,
                          args.version_2_with_negative, args.null_score_diff_threshold)
Exemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--paragraph", default=None, type=str)
    parser.add_argument("--model", default=None, type=str)
    parser.add_argument("--max_seq_length", default=384, type=int)
    parser.add_argument("--doc_stride", default=128, type=int)
    parser.add_argument("--max_query_length", default=64, type=int)
    parser.add_argument("--config_file", default=None, type=str)
    parser.add_argument("--max_answer_length", default=30, type=int)

    args = parser.parse_args()
    para_file = args.paragraph
    model_path = args.model

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()

    ### Loading Pretrained model for QnA
    print("Loading BERT-model...\n\n")
    config = BertConfig(args.config_file)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(
        torch.load(model_path, map_location=torch.device("cpu")))
    model.to(device)

    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased",
                                              do_lower_case=True)

    while True:
        print("Please specify paragraph: \n "
              "1: Assisted Time Holdover \n "
              "2: Semcon short version \n "
              "3: Semcon long version")
        choice = input()
        if choice == "1":
            break
        elif choice == "2":
            para_file = "bert/input/semcon_short.txt"
            break
        elif choice == "3":
            para_file = "bert/input/semcon.txt"
            break
        else:
            print("I did not understand that, please type in 1, 2 or 3. \n")

    ### Reading paragraph
    f = open(para_file, "r")
    para = f.read()
    f.close()
    print("\nParagraph:\n", para)

    while True:
        input_data = []
        paragraphs = {}
        paragraphs["id"] = 1
        #    paragraphs["text"] = splits[0].replace("Paragraph:", "").strip("\n")
        paragraphs["text"] = para
        paragraphs["ques"] = [input("\n What is your question?\n")]
        if paragraphs["ques"] == ["exit"]:
            exit()
        start = time.time()
        input_data.append(paragraphs)
        ## input_data is a list of dictionary which has a paragraph and questions
        examples = read_squad_examples(input_data)

        eval_features = convert_examples_to_features(
            examples=examples,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            doc_stride=args.doc_stride,
            max_query_length=args.max_query_length,
        )

        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_example_index = torch.arange(all_input_ids.size(0),
                                         dtype=torch.long)

        pred_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_example_index)
        # Run prediction for full data
        pred_sampler = SequentialSampler(pred_data)
        pred_dataloader = DataLoader(pred_data,
                                     sampler=pred_sampler,
                                     batch_size=9)

        predictions = []
        for input_ids, input_mask, segment_ids, example_indices in pred_dataloader:
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)

            with torch.no_grad():
                batch_start_logits, batch_end_logits = model(
                    input_ids, segment_ids, input_mask)

            features = []
            example = []
            all_results = []

            for i, example_index in enumerate(example_indices):
                start_logits = batch_start_logits[i].detach().cpu().tolist()
                end_logits = batch_end_logits[i].detach().cpu().tolist()
                feature = eval_features[example_index.item()]
                unique_id = int(feature.unique_id)
                features.append(feature)
                all_results.append(
                    RawResult(
                        unique_id=unique_id,
                        start_logits=start_logits,
                        end_logits=end_logits,
                    ))

            output = predict(examples, features, all_results,
                             args.max_answer_length)
            predictions.append(output)

        prediction = colored(
            predictions[math.floor(examples[0].unique_id / 12)][examples[0]],
            "green",
            attrs=["reverse"],
        )
        print(prediction, "\n")
        print("Time: ", time.time() - start)
    """
Exemplo n.º 5
0
def answer_prediction(paras,question,model,config_file,max_seq_length=384,doc_stride=128,max_query_length=64,max_answer_length=60):
    
    
    #para_file = 'Input_file.txt'
    model_path = model
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    
    ### Raeding paragraph
    ## Reading question
#     f = open(ques_file, 'r')
#     ques = f.read()
#     f.close()
     
    ## input_data is a list of dictionary which has a paragraph and questions
    #para_list = para.split('\n\n')
    #print(paras)
    input_data = []
    i = 1
    for i,para in enumerate(paras):
       # print(para)
        paragraphs = {}
        #splits = para.split('\nQuestions:')
        paragraphs['id'] = i
        paragraphs['text'] = para
        paragraphs['ques']= question
        input_data.append(paragraphs)
           
    
    examples = read_paragraphs(input_data,question)
    tokenizer = AutoTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad', do_lower_case=True)
    
    
    eval_features = convert_examples_to_features(
            examples = examples,
            tokenizer=tokenizer,
            max_seq_length=max_seq_length,
            doc_stride=doc_stride,
            max_query_length=max_query_length)
    
    
    
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
    
    ### Loading Pretrained model for QnA 
    config = BertConfig(config_file)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(torch.load(model_path,map_location='cpu'))
    model.to(device)
   

    pred_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index)
    # Run prediction for full data
    pred_sampler = SequentialSampler(pred_data)
    pred_dataloader = DataLoader(pred_data, sampler=pred_sampler, batch_size=10)
    
    predictions = []

    for input_ids, input_mask, segment_ids, example_indices in tqdm(pred_dataloader):
        model.eval()
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)
        
        with torch.no_grad():
            batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask)
            
                    
        features=[]
        example = []
        all_results = []
       
        for i, example_index in enumerate(example_indices):
                start_logits = batch_start_logits[i].detach().cpu().tolist()
                end_logits =   batch_end_logits[i].detach().cpu().tolist()
                feature = eval_features[example_index.item()]
                unique_id = int(feature.unique_id)
                features.append(feature)
                all_results.append(RawResult(unique_id=unique_id,
                                             start_logits=start_logits,
                                             end_logits=end_logits))
                
       
        output = predict(examples, features, all_results,max_answer_length)
        predictions.append(output)
 
   
    ### For printing the results ####
    final_preds = []
    final_paras = []
    final_probs = []
    final_scores = []
    final_ques = []
    index = None
    for i,example in enumerate(examples):
        if index!= example.example_id:
            index = example.example_id
#          
        ques_text = colored(example.question_text, 'blue')

        prediction = predictions[math.floor(example.unique_id/12)][example]

        prob = predictions[math.floor(example.unique_id/12)]['prob'+str(example)]

        final_ques.append(ques_text)
        final_preds.append(prediction)
        final_paras.append(example.para_text)
        final_probs.append(prob)
        
    return final_ques,final_preds,final_paras,final_probs
Exemplo n.º 6
0
def start():
    app = Flask(__name__)
    host = "0.0.0.0"
    port = 8000
    debug = True

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--max_query_length",
        default=64,
        type=int,
        help=
        "The maximum number of tokens for the question. Questions longer than this will "
        "be truncated to this length.")
    parser.add_argument("--predict_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for predictions.")
    parser.add_argument(
        "--verbose_logging",
        action='store_true',
        help=
        "If true, all of the warnings related to data processing will be printed. "
        "A number of warnings are expected for a normal SQuAD evaluation.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )

    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument(
        '--fp16',
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n")

    parser.add_argument(
        '--null_score_diff_threshold',
        type=float,
        default=0.0,
        help=
        "If null_score - best_non_null is greater than the threshold predict null."
    )
    args = parser.parse_args()
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased",
                                              do_lower_case=True)

    config = BertConfig("./output/config.json")
    model = BertForQuestionAnswering(config)
    model.load_state_dict(
        torch.load("./output/pytorch_model.bin", map_location='cpu'))
    model.to(device)

    @app.route('/', methods=['POST'])
    def filter():
        dat_in = {
            "index":
            2,
            "original_sentence":
            "existing image captioning models do not generalize well to out-of-domain images containing novel scenes or objects . this limitation severely hinders the use of these models in real world applications dealing with images in the wild . we address this problem using a flexible approach that enables existing deep captioning architectures to take advantage of image taggers at test time , without re-training . our method uses constrained beam search to force the inclusion of selected tag words in the output , and fixed , pretrained word embeddings to facilitate vocabulary expansion to previously unseen tag words . using this approach we achieve state of the art results for out-of-domain captioning on mscoco -LRB- and improved results for in-domain captioning -RRB- . perhaps surprisingly , our results significantly outperform approaches that incorporate the same tag predictions into the learning algorithm . we also show that we can significantly improve the quality of generated imagenet captions by leveraging ground-truth labels . ",
            "tagged_sentence":
            "existing│O_ANS image│O_ANS captioning│O_ANS models│O_ANS do│O_ANS not│O_ANS generalize│O_ANS well│O_ANS to│O_ANS out-of-domain│O_ANS images│O_ANS containing│O_ANS novel│O_ANS scenes│O_ANS or│O_ANS objects│O_ANS .│O_ANS this│O_ANS limitation│O_ANS severely│O_ANS hinders│O_ANS the│O_ANS use│O_ANS of│O_ANS these│O_ANS models│O_ANS in│O_ANS real│O_ANS world│O_ANS applications│O_ANS dealing│O_ANS with│O_ANS images│O_ANS in│O_ANS the│O_ANS wild│O_ANS .│O_ANS we│O_ANS address│O_ANS this│O_ANS problem│O_ANS using│O_ANS a│O_ANS flexible│O_ANS approach│O_ANS that│O_ANS enables│O_ANS existing│O_ANS deep│O_ANS captioning│O_ANS architectures│O_ANS to│O_ANS take│O_ANS advantage│O_ANS of│O_ANS image│O_ANS taggers│O_ANS at│O_ANS test│O_ANS time│O_ANS ,│O_ANS without│O_ANS re-training│O_ANS .│O_ANS our│O_ANS method│O_ANS uses│O_ANS constrained│O_ANS beam│O_ANS search│O_ANS to│O_ANS force│O_ANS the│O_ANS inclusion│O_ANS of│O_ANS selected│O_ANS tag│O_ANS words│O_ANS in│O_ANS the│O_ANS output│O_ANS ,│O_ANS and│O_ANS fixed│O_ANS ,│O_ANS pretrained│O_ANS word│B_ANS embeddings│I_ANS to│O_ANS facilitate│O_ANS vocabulary│O_ANS expansion│O_ANS to│O_ANS previously│O_ANS unseen│O_ANS tag│O_ANS words│O_ANS .│O_ANS using│O_ANS this│O_ANS approach│O_ANS we│O_ANS achieve│O_ANS state│O_ANS of│O_ANS the│O_ANS art│O_ANS results│O_ANS for│O_ANS out-of-domain│O_ANS captioning│O_ANS on│O_ANS mscoco│O_ANS -LRB-│O_ANS and│O_ANS improved│O_ANS results│O_ANS for│O_ANS in-domain│O_ANS captioning│O_ANS -RRB-│O_ANS .│O_ANS perhaps│O_ANS surprisingly│O_ANS ,│O_ANS our│O_ANS results│O_ANS significantly│O_ANS outperform│O_ANS approaches│O_ANS that│O_ANS incorporate│O_ANS the│O_ANS same│O_ANS tag│O_ANS predictions│O_ANS into│O_ANS the│O_ANS learning│O_ANS algorithm│O_ANS .│O_ANS we│O_ANS also│O_ANS show│O_ANS that│O_ANS we│O_ANS can│O_ANS significantly│O_ANS improve│O_ANS the│O_ANS quality│O_ANS of│O_ANS generated│O_ANS imagenet│O_ANS captions│O_ANS by│O_ANS leveraging│O_ANS ground-truth│O_ANS labels│O_ANS .│O_ANS ",
            "answer":
            "word embeddings",
            "question": [
                "What does pretrained stand for ?", "What is pretrained ?",
                "What does re-training stand for ?",
                "What is the pretrained ?", "What is the term for pretrained ?"
            ],
            "score": [
                -2.3564553260803223, -3.8269970417022705, -4.229936122894287,
                -5.298074722290039, -5.689377307891846
            ]
        }

        eval_examples = read_squad_examples(input_data=dat_in,
                                            is_training=False,
                                            version_2_with_negative=True)
        eval_features = convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=384,
            doc_stride=128,
            max_query_length=args.max_query_length,
            is_training=False)

        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_example_index = torch.arange(all_input_ids.size(0),
                                         dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_example_index)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=args.predict_batch_size)

        model.eval()
        all_results = []
        logger.info("Start evaluating")
        for input_ids, input_mask, segment_ids, example_indices in tqdm(
                eval_dataloader,
                desc="Evaluating",
                disable=args.local_rank not in [-1, 0]):
            if len(all_results) % 1000 == 0:
                logger.info("Processing example: %d" % (len(all_results)))
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            with torch.no_grad():
                batch_start_logits, batch_end_logits = model(
                    input_ids, segment_ids, input_mask)
            for i, example_index in enumerate(example_indices):
                start_logits = batch_start_logits[i].detach().cpu().tolist()
                end_logits = batch_end_logits[i].detach().cpu().tolist()
                eval_feature = eval_features[example_index.item()]
                unique_id = int(eval_feature.unique_id)
                all_results.append(
                    RawResult(unique_id=unique_id,
                              start_logits=start_logits,
                              end_logits=end_logits))
        result = write_predictions(eval_examples, eval_features, all_results,
                                   20, 30, True, args.verbose_logging, True,
                                   args.null_score_diff_threshold)

        # inputs = request.get_json(force=True)
        return result

    app.run(debug=debug,
            host=host,
            port=port,
            use_reloader=False,
            threaded=True)
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument(
        "--bert_model",
        default=None,
        type=str,
        required=True,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model checkpoints and predictions will be written."
    )

    ## Other parameters
    parser.add_argument("--model", default=None, type=str)
    parser.add_argument("--train_file",
                        default=None,
                        type=str,
                        help="SQuAD json for training. E.g., train-v1.1.json")

    parser.add_argument(
        "--max_seq_length",
        default=384,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. Sequences "
        "longer than this will be truncated, and sequences shorter than this will be padded."
    )
    parser.add_argument(
        "--doc_stride",
        default=128,
        type=int,
        help=
        "When splitting up a long document into chunks, how much stride to take between chunks."
    )
    parser.add_argument(
        "--max_query_length",
        default=64,
        type=int,
        help=
        "The maximum number of tokens for the question. Questions longer than this will "
        "be truncated to this length.")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_predict",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--predict_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for predictions.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--warmup_proportion",
        default=0.1,
        type=float,
        help=
        "Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% "
        "of training.")
    parser.add_argument(
        "--n_best_size",
        default=20,
        type=int,
        help=
        "The total number of n-best predictions to generate in the nbest_predictions.json "
        "output file.")
    parser.add_argument(
        "--max_answer_length",
        default=30,
        type=int,
        help=
        "The maximum length of an answer that can be generated. This is needed because the start "
        "and end predictions are not conditioned on one another.")
    parser.add_argument(
        "--verbose_logging",
        action='store_true',
        help=
        "If true, all of the warnings related to data processing will be printed. "
        "A number of warnings are expected for a normal SQuAD evaluation.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help=
        "Whether to lower case the input text. True for uncased models, False for cased models."
    )
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument(
        '--fp16',
        action='store_true',
        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument(
        '--loss_scale',
        type=float,
        default=0,
        help=
        "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
        "0 (default value): dynamic loss scaling.\n"
        "Positive power of 2: static loss scaling value.\n")
    parser.add_argument(
        '--version_2_with_negative',
        action='store_true',
        help=
        'If true, the SQuAD examples contain some that do not have an answer.')
    parser.add_argument(
        '--null_score_diff_threshold',
        type=float,
        default=0.0,
        help=
        "If null_score - best_non_null is greater than the threshold predict null."
    )
    parser.add_argument("--config_file", default=None, type=str)
    args = parser.parse_args()

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir) and args.do_train:
        raise ValueError(
            "Output directory () already exists and is not empty.")
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    config = BertConfig(args.config_file)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(torch.load(args.model, map_location='cpu'))
    model.to(device)

    if args.do_predict and (args.local_rank == -1
                            or torch.distributed.get_rank() == 0):
        eval_examples = read_squad_examples(
            is_training=False,
            version_2_with_negative=args.version_2_with_negative)
        eval_features = convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            doc_stride=args.doc_stride,
            max_query_length=args.max_query_length,
            is_training=False)

        logger.info("***** Running predictions *****")
        logger.info("  Num orig examples = %d", len(eval_examples))
        logger.info("  Num split examples = %d", len(eval_features))
        logger.info("  Batch size = %d", args.predict_batch_size)

        all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                       dtype=torch.long)
        all_example_index = torch.arange(all_input_ids.size(0),
                                         dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask,
                                  all_segment_ids, all_example_index)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data,
                                     sampler=eval_sampler,
                                     batch_size=args.predict_batch_size)

        model.eval()
        all_results = []
        logger.info("Start evaluating")
        for input_ids, input_mask, segment_ids, example_indices in tqdm(
                eval_dataloader,
                desc="Evaluating",
                disable=args.local_rank not in [-1, 0]):
            if len(all_results) % 1000 == 0:
                logger.info("Processing example: %d" % (len(all_results)))
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            with torch.no_grad():
                batch_start_logits, batch_end_logits = model(
                    input_ids, segment_ids, input_mask)
            for i, example_index in enumerate(example_indices):
                start_logits = batch_start_logits[i].detach().cpu().tolist()
                end_logits = batch_end_logits[i].detach().cpu().tolist()
                eval_feature = eval_features[example_index.item()]
                unique_id = int(eval_feature.unique_id)
                all_results.append(
                    RawResult(unique_id=unique_id,
                              start_logits=start_logits,
                              end_logits=end_logits))
        output_prediction_file = os.path.join(args.output_dir,
                                              "predictions.json")
        output_nbest_file = os.path.join(args.output_dir,
                                         "nbest_predictions.json")
        output_null_log_odds_file = os.path.join(args.output_dir,
                                                 "null_odds.json")
        write_predictions(eval_examples, eval_features, all_results,
                          args.n_best_size, args.max_answer_length,
                          args.do_lower_case, output_prediction_file,
                          output_nbest_file, output_null_log_odds_file,
                          args.verbose_logging, args.version_2_with_negative,
                          args.null_score_diff_threshold)
Exemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--paragraph", default=None, type=str)
    parser.add_argument("--model", default=None, type=str)
    parser.add_argument("--max_seq_length", default=384, type=int)
    parser.add_argument("--doc_stride", default=128, type=int)
    parser.add_argument("--max_query_length", default=64, type=int)
    parser.add_argument("--config_file", default=None, type=str)
    parser.add_argument("--max_answer_length", default=30, type=int)

    args = parser.parse_args()
    para_file = args.paragraph
    model_path = args.model

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    n_gpu = torch.cuda.device_count()

    ### Raeding paragraph
    f = open(para_file, 'r')
    para = f.read()
    f.close()

    ## Reading question
    #     f = open(ques_file, 'r')
    #     ques = f.read()
    #     f.close()

    para_list = para.split('\n\n')

    input_data = []
    i = 1
    for para in para_list:
        paragraphs = {}
        splits = para.split('\nQuestions:')
        paragraphs['id'] = i
        paragraphs['text'] = splits[0].replace('Paragraph:', '').strip('\n')
        paragraphs['ques'] = splits[1].lstrip('\n').split('\n')
        input_data.append(paragraphs)
        i += 1

    ## input_data is a list of dictionary which has a paragraph and questions

    examples = read_squad_examples(input_data)
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                              do_lower_case=True)

    eval_features = convert_examples_to_features(
        examples=examples,
        tokenizer=tokenizer,
        max_seq_length=args.max_seq_length,
        doc_stride=args.doc_stride,
        max_query_length=args.max_query_length)

    all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                 dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                  dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                   dtype=torch.long)
    all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)

    ### Loading Pretrained model for QnA
    config = BertConfig(args.config_file)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(
        torch.load(model_path, map_location=torch.device('cpu')))
    model.to(device)

    pred_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
                              all_example_index)
    # Run prediction for full data
    pred_sampler = SequentialSampler(pred_data)
    pred_dataloader = DataLoader(pred_data, sampler=pred_sampler, batch_size=9)

    predictions = []
    for input_ids, input_mask, segment_ids, example_indices in tqdm(
            pred_dataloader):
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)

        with torch.no_grad():
            batch_start_logits, batch_end_logits = model(
                input_ids, segment_ids, input_mask)

        features = []
        example = []
        all_results = []

        for i, example_index in enumerate(example_indices):
            start_logits = batch_start_logits[i].detach().cpu().tolist()
            end_logits = batch_end_logits[i].detach().cpu().tolist()
            feature = eval_features[example_index.item()]
            unique_id = int(feature.unique_id)
            features.append(feature)
            all_results.append(
                RawResult(unique_id=unique_id,
                          start_logits=start_logits,
                          end_logits=end_logits))

        output = predict(examples, features, all_results,
                         args.max_answer_length)
        predictions.append(output)

    ### For printing the results ####
    index = None
    for example in examples:
        if index != example.example_id:
            print(example.para_text)
            index = example.example_id
            print('\n')
            print(
                colored('***********Question and Answers *************',
                        'red'))

        ques_text = colored(example.question_text, 'blue')
        print(ques_text)
        prediction = colored(predictions[math.floor(example.unique_id /
                                                    12)][example],
                             'green',
                             attrs=['reverse', 'blink'])
        print(prediction)
        print('\n')
Exemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--paragraph", default=None, type=str)
    parser.add_argument("--question", default=None, type=str)
    parser.add_argument("--model", default=None, type=str)
    parser.add_argument("--max_seq_length", default=384, type=int)
    parser.add_argument("--doc_stride", default=128, type=int)
    parser.add_argument("--max_query_length", default=64, type=int)
    parser.add_argument("--config_file", default=None, type=str)
    parser.add_argument("--max_answer_length", default=30, type=int)

    args = parser.parse_args()
    para_file = args.paragraph
    question_file = args.question
    model_path = args.model
    device = torch.device("cpu")

    ### Raeding paragraph
    # f = open(para_file, 'r')
    # para = f.read()
    # f.close()

    ## Reading question
    #     f = open(ques_file, 'r')
    #     ques = f.read()
    #     f.close()

    # para_list = para.split('\n\n')
    f = open(para_file, "rb")
    para = f.read()
    para = para.decode('windows-1252')
    para = para.strip("\n").replace("\r", " ").replace("\n", "")
    #print(para)

    # print(para)
    f.close()

    f_ = open(question_file, "r")
    question = f_.read()
    question = question.split("\n")
    while "" in question:
        question.remove("")
    for q in question:
        q = q.strip("\n")
    f_.close()
    input_data = []
    pfinder = ParaFinder(para)
    i = 0
    for q in question:
        closest_para = pfinder.closestParagraph(q)
        paragraphs = {}
        paragraphs["id"] = i
        paragraphs["text"] = closest_para
        paragraphs["ques"] = [q]
        i += 1
        input_data.append(paragraphs)

    # print(input_data)
    ## input_data is a list of dictionary which has a paragraph and questions

    examples = read_squad_examples(input_data)
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                              do_lower_case=True)

    eval_features = convert_examples_to_features(
        examples=examples,
        tokenizer=tokenizer,
        max_seq_length=args.max_seq_length,
        doc_stride=args.doc_stride,
        max_query_length=args.max_query_length)

    all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                 dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                  dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                   dtype=torch.long)
    all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)

    ### Loading Pretrained model for QnA
    config = BertConfig(args.config_file)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)

    pred_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
                              all_example_index)
    # Run prediction for full data
    pred_sampler = SequentialSampler(pred_data)
    pred_dataloader = DataLoader(pred_data, sampler=pred_sampler, batch_size=9)

    predictions = []
    for input_ids, input_mask, segment_ids, example_indices in pred_dataloader:
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)

        with torch.no_grad():
            batch_start_logits, batch_end_logits = model(
                input_ids, segment_ids, input_mask)

        features = []
        example = []
        all_results = []

        for i, example_index in enumerate(example_indices):
            start_logits = batch_start_logits[i].detach().cpu().tolist()
            end_logits = batch_end_logits[i].detach().cpu().tolist()
            feature = eval_features[example_index.item()]
            unique_id = int(feature.unique_id)
            features.append(feature)
            all_results.append(
                RawResult(unique_id=unique_id,
                          start_logits=start_logits,
                          end_logits=end_logits))

        output = predict(examples, features, all_results,
                         args.max_answer_length)
        predictions.append(output)

    ### For printing the results ####
    index = None
    for example in examples:
        if index != example.example_id:
            # print(example.para_text)
            index = example.example_id
            # print('\n')
            # print(colored('***********Question and Answers *************', 'red'))

        ques_text = example.question_text
        print(ques_text)
        prediction, prob = predictions[math.floor(example.unique_id /
                                                  12)][example]
        if prob > 0.35:
            print(prediction)
            #print(type(prediction))
        else:
            print("No result found")
Exemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--bert_token_model", default=None, type=str, required=True,
                        help="Bert pre-trained model selected in the list: bert-base-uncased, "
                             "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
                             "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--model_dir", default=None, type=str, required=True,
                        help="학습된 모델이 저장되어 있는 path")
    parser.add_argument("--output_dir", default=None, type=str, required=True,
                        help="The output directory where the model checkpoints and predictions will be written.")

    ## Other parameters
    parser.add_argument("--predict_file", default=None, type=str,
                        help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json")
    parser.add_argument("--max_seq_length", default=384, type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. Sequences "
                             "longer than this will be truncated, and sequences shorter than this will be padded.")
    parser.add_argument("--doc_stride", default=128, type=int,
                        help="When splitting up a long document into chunks, how much stride to take between chunks.")
    parser.add_argument("--max_query_length", default=64, type=int,
                        help="The maximum number of tokens for the question. Questions longer than this will "
                             "be truncated to this length.")
    parser.add_argument("--do_predict", action='store_true', help="Whether to run eval on the dev set.")
    parser.add_argument("--predict_batch_size", default=8, type=int, help="Total batch size for predictions.")
    parser.add_argument("--n_best_size", default=20, type=int,
                        help="The total number of n-best predictions to generate in the nbest_predictions.json "
                             "output file.")
    parser.add_argument("--max_answer_length", default=30, type=int,
                        help="The maximum length of an answer that can be generated. This is needed because the start "
                             "and end predictions are not conditioned on one another.")
    parser.add_argument("--verbose_logging", action='store_true',
                        help="If true, all of the warnings related to data processing will be printed. "
                             "A number of warnings are expected for a normal SQuAD evaluation.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument("--do_lower_case",
                        action='store_true',
                        help="Whether to lower case the input text. True for uncased models, False for cased models.")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--fp16',
                        action='store_true',
                        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument('--version_2_with_negative',
                        action='store_true',
                        help='If true, the SQuAD examples contain some that do not have an answer.')
    parser.add_argument('--null_score_diff_threshold',
                        type=float, default=0.0,
                        help="If null_score - best_non_null is greater than the threshold predict null.")
    parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
    parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
    args = parser.parse_args()
    #python run_triviaqa.py --bert_token_model bert-base-uncased --model_dir bert_triviaQA/ --output_dir result/ --predict_file dev-wiki-triviaqa_m.json --no_cuda --do_lower_case --predict_batch_size 40

    print(args)

    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
        ptvsd.wait_for_attach()

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')

    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)

    logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
        device, n_gpu, bool(args.local_rank != -1), args.fp16))

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if args.do_predict:
        if not args.predict_file:
            raise ValueError(
                "If `do_predict` is True, then `predict_file` must be specified.")

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        raise ValueError("Output directory () already exists and is not empty.")
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    tokenizer = BertTokenizer.from_pretrained(args.bert_token_model, do_lower_case=args.do_lower_case)


    # Load Pretrained Model
    config_path = os.path.join(args.model_dir, CONFIG_NAME)
    model_path = os.path.join(args.model_dir, WEIGHTS_NAME)
    config = BertConfig(config_path)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(torch.load(model_path, map_location='cpu'))

    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

        model = DDP(model)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    if args.do_predict and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        eval_examples = read_squad_examples(
            input_file=args.predict_file, is_training=False, version_2_with_negative=args.version_2_with_negative)
        eval_features = convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            doc_stride=args.doc_stride,
            max_query_length=args.max_query_length,
            is_training=False)

        logger.info("***** Running predictions *****")
        logger.info("  Num orig examples = %d", len(eval_examples))
        logger.info("  Num split examples = %d", len(eval_features))
        logger.info("  Batch size = %d", args.predict_batch_size)

        all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
        all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size)

        model.eval()
        all_results = []
        logger.info("Start evaluating")
        for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating",
                                                                        disable=args.local_rank not in [-1, 0]):
            if len(all_results) % 1000 == 0:
                logger.info("Processing example: %d" % (len(all_results)))
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            with torch.no_grad():
                batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask)

            for i, example_index in enumerate(example_indices):
                start_logits = batch_start_logits[i].detach().cpu().tolist()
                end_logits = batch_end_logits[i].detach().cpu().tolist()
                eval_feature = eval_features[example_index.item()]
                unique_id = int(eval_feature.unique_id)
                all_results.append(RawResult(unique_id=unique_id,
                                             start_logits=start_logits,
                                             end_logits=end_logits))

        output_prediction_file = os.path.join(args.output_dir, "predictions.json")
        output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json")
        output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json")
        write_predictions(eval_examples, eval_features, all_results,
                          args.n_best_size, args.max_answer_length,
                          args.do_lower_case, output_prediction_file,
                          output_nbest_file, output_null_log_odds_file, args.verbose_logging,
                          args.version_2_with_negative, args.null_score_diff_threshold)
Exemplo n.º 11
0
    def __init__(self):
        # Hyperparameters
        self.BERT_MODEL = "bert-base-uncased"
        self.OUTPUT_DIR = "bert-model"
        self.TRAIN_FILE = ""
        self.PREDICT_FILE = "squad/test-pred.json"
        self.MAX_SEQ_LENGTH = 384
        self.DOC_STRIDE = 128
        self.MAX_QUERY_LENGTH = 64
        self.DO_TRAIN = False
        self.DO_PREDICT = True
        self.TRAIN_BATCH_SIZE = 12
        self.PREDICT_BATCH_SIZE = 8
        self.LEARNING_RATE = 3e-5
        self.NUM_TRAIN_EPOCHS = 2.0
        self.WARMUP_PROPORTION = 0.1
        self.N_BEST_SIZE = 20
        self.MAX_ANSWER_LENGTH = 30
        self.VERBOSE_LOGGING = False
        self.NO_CUDA = False
        self.SEED = 42
        self.GRADIENT_ACCUMULATION_STEPS = 1
        self.DO_LOWER_CASE = True
        self.LOCAL_RANK = -1
        self.FP16 = False
        self.LOSS_SCALE = 0
        self.VERSION_2_WITH_NEGATIVE = True
        self.NULL_SCORE_DIFF_THRESHOLD = 0.0

        if self.LOCAL_RANK == -1 or self.NO_CUDA:
            self.device = torch.device("cuda" if torch.cuda.is_available()
                                       and not self.NO_CUDA else "cpu")
            n_gpu = torch.cuda.device_count()
        else:
            torch.cuda.set_device(self.LOCAL_RANK)
            self.device = torch.device("cuda", self.LOCAL_RANK)
            n_gpu = 1
            # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
            torch.distributed.init_process_group(backend='nccl')
        logger.info(
            "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
            .format(self.device, n_gpu, bool(self.LOCAL_RANK != -1),
                    self.FP16))

        if self.GRADIENT_ACCUMULATION_STEPS < 1:
            raise ValueError(
                "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
                .format(self.GRADIENT_ACCUMULATION_STEPS))

        self.TRAIN_BATCH_SIZE = self.TRAIN_BATCH_SIZE // self.GRADIENT_ACCUMULATION_STEPS

        random.seed(self.SEED)
        np.random.seed(self.SEED)
        torch.manual_seed(self.SEED)
        if n_gpu > 0:
            torch.cuda.manual_seed_all(self.SEED)

        if not self.DO_TRAIN and not self.DO_PREDICT:
            raise ValueError(
                "At least one of `do_train` or `do_predict` must be True.")

        if self.DO_TRAIN:
            if not self.TRAIN_FILE:
                raise ValueError(
                    "If `do_train` is True, then `train_file` must be specified."
                )
        if self.DO_PREDICT:
            if not self.PREDICT_FILE:
                raise ValueError(
                    "If `do_predict` is True, then `predict_file` must be specified."
                )

        if os.path.exists(self.OUTPUT_DIR) and os.listdir(
                self.OUTPUT_DIR) and self.DO_TRAIN:
            raise ValueError(
                "Output directory () already exists and is not empty.")
        if not os.path.exists(self.OUTPUT_DIR):
            os.makedirs(self.OUTPUT_DIR)

        self.tokenizer = BertTokenizer.from_pretrained(
            self.BERT_MODEL, do_lower_case=self.DO_LOWER_CASE)

        train_examples = None
        num_train_optimization_steps = None

        # Prepare model
        self.model = BertForQuestionAnswering.from_pretrained(
            self.BERT_MODEL,
            cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE),
                                   'distributed_{}'.format(self.LOCAL_RANK)))

        if self.FP16:
            self.model.half()
        self.model.to(self.device)
        if self.LOCAL_RANK != -1:
            try:
                from apex.parallel import DistributedDataParallel as DDP
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
                )

            self.model = DDP(self.model)
        elif n_gpu > 1:
            self.model = torch.nn.DataParallel(self.model)

        # Prepare optimizer
        param_optimizer = list(self.model.named_parameters())

        # hack to remove pooler, which is not used
        # thus it produce None grad that break apex
        param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]

        if self.FP16:
            try:
                from apex.optimizers import FP16_Optimizer
                from apex.optimizers import FusedAdam
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
                )

            optimizer = FusedAdam(optimizer_grouped_parameters,
                                  lr=self.LEARNING_RATE,
                                  bias_correction=False,
                                  max_grad_norm=1.0)
            if self.LOSS_SCALE == 0:
                optimizer = self.FP16_Optimizer(optimizer,
                                                dynamic_loss_scale=True)
            else:
                optimizer = self.FP16_Optimizer(
                    optimizer, static_loss_scale=self.LOSS_SCALE)
        else:
            optimizer = BertAdam(optimizer_grouped_parameters,
                                 lr=self.LEARNING_RATE,
                                 warmup=self.WARMUP_PROPORTION,
                                 t_total=num_train_optimization_steps)

        # self.model = BertForQuestionAnswering.from_pretrained(self.BERT_MODEL)

        output_model_file = os.path.join(self.OUTPUT_DIR, WEIGHTS_NAME)
        output_config_file = os.path.join(self.OUTPUT_DIR, CONFIG_NAME)

        # Load a trained model and config that you have fine-tuned
        config = BertConfig(output_config_file)
        self.model = BertForQuestionAnswering(config)
        if torch.cuda.is_available():
            self.model.load_state_dict(torch.load(output_model_file))
        else:
            self.model.load_state_dict(
                torch.load(output_model_file, map_location='cpu'))

        self.model.to(self.device)
        print('\n*** QA MODULE READY [1/3] ***\n')
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--input_file", default=None, type=str, required=True)
    parser.add_argument("--output_file", default=None, type=str, required=True)
    parser.add_argument("--bert_model", default=None, type=str, required=True,
                        help="Bert pre-trained model selected in the list: bert-base-uncased, "
                             "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.")

    ## Other parameters
    parser.add_argument("--pretrained_squad_model", default=None, type=str)
    parser.add_argument("--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.")
    parser.add_argument("--layers", default="-1,-2,-3,-4", type=str)
    parser.add_argument("--max_seq_length", default=128, type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. Sequences longer "
                            "than this will be truncated, and sequences shorter than this will be padded.")
    parser.add_argument("--batch_size", default=32, type=int, help="Batch size for predictions.")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help = "local_rank for distributed training on gpus")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")

    args = parser.parse_args()

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    logger.info("device: {} n_gpu: {} distributed training: {}".format(device, n_gpu, bool(args.local_rank != -1)))

    layer_indexes = [int(x) for x in args.layers.split(",")]

    tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)

    examples = read_examples(args.input_file)

    features = convert_examples_to_features(
        examples=examples, seq_length=args.max_seq_length, tokenizer=tokenizer)

    unique_id_to_feature = {}
    for feature in features:
        unique_id_to_feature[feature.unique_id] = feature


    if args.pretrained_squad_model:
        input_config_file = os.path.join(args.pretrained_squad_model, CONFIG_NAME)
        input_model_file = os.path.join(args.pretrained_squad_model, WEIGHTS_NAME)
        config = BertConfig(input_config_file)
        qa_model = BertForQuestionAnswering(config)
        qa_model.load_state_dict(torch.load(input_model_file, map_location=device))
        model = qa_model.bert  # The model we will use for extracting
    else:
        model = BertModel.from_pretrained(args.bert_model)
    model.to(device)

    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
                                                          output_device=args.local_rank)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
    all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)

    eval_data = TensorDataset(all_input_ids, all_input_mask, all_example_index)
    if args.local_rank == -1:
        eval_sampler = SequentialSampler(eval_data)
    else:
        eval_sampler = DistributedSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.batch_size)

    model.eval()
    with open(args.output_file, "w", encoding='utf-8') as writer:
        for input_ids, input_mask, example_indices in eval_dataloader:
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)

            all_encoder_layers, _ = model(input_ids, token_type_ids=None, attention_mask=input_mask)
            all_encoder_layers = all_encoder_layers

            for b, example_index in enumerate(example_indices):
                feature = features[example_index.item()]
                unique_id = int(feature.unique_id)
                # feature = unique_id_to_feature[unique_id]
                output_json = collections.OrderedDict()
                output_json["linex_index"] = unique_id
                all_out_features = []
                for (i, token) in enumerate(feature.tokens):
                    all_layers = []
                    for (j, layer_index) in enumerate(layer_indexes):
                        layer_output = all_encoder_layers[int(layer_index)].detach().cpu().numpy()
                        layer_output = layer_output[b]
                        layers = collections.OrderedDict()
                        layers["index"] = layer_index
                        layers["values"] = [
                            round(x.item(), 6) for x in layer_output[i]
                        ]
                        all_layers.append(layers)
                    out_features = collections.OrderedDict()
                    out_features["token"] = token
                    out_features["layers"] = all_layers
                    all_out_features.append(out_features)
                output_json["features"] = all_out_features
                writer.write(json.dumps(output_json) + "\n")
Exemplo n.º 13
0
n_gpu = torch.cuda.device_count()

RawResult = collections.namedtuple("RawResult",
                                   ["unique_id", "start_logits", "end_logits"])

# para_file = "../Input_file.txt"
para_file = "/content/drive/My Drive/train-v2.0.json"  # TODO: use proper file path
model_path = "/content/drive/My Drive/pytorch_model.bin"  # TODO: use proper file path

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                          do_lower_case=True)

### Loading Pretrained model for QnA

config = BertConfig("../Results/bert_config.json")
model = BertForQuestionAnswering(config)
model.load_state_dict(torch.load(model_path, map_location='cpu'))
# model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
model.to(device)
print()

### initializing the autoencoder

hidden_size = 384
encoder1 = EncoderRNN(384, config.hidden_size, hidden_size).to(device)
decoder1 = DecoderRNN(384, config.hidden_size, hidden_size).to(device)
encoder_optimizer = optim.Adam(encoder1.parameters())
decoder_optimizer = optim.Adam(decoder1.parameters())
criterion = nn.MSELoss()

pp = pprint.PrettyPrinter(indent=4)