Exemplo n.º 1
0
def evaluate_treccar(model_path, test_art_qrels, test_top_qrels,
                     test_hier_qrels, test_paratext, level):
    test_page_paras, test_rev_para_top, test_rev_para_hier = get_trec_dat(
        test_art_qrels, test_top_qrels, test_hier_qrels)
    test_len_paras = np.array(
        [len(test_page_paras[page]) for page in test_page_paras.keys()])
    print('test mean paras: %.2f, std: %.2f, max paras: %.2f' %
          (np.mean(test_len_paras), np.std(test_len_paras),
           np.max(test_len_paras)))
    test_ptext_dict = get_paratext_dict(test_paratext)
    test_top_cluster_data = []
    test_hier_cluster_data = []
    max_num_doc_test = max(
        [len(test_page_paras[p]) for p in test_page_paras.keys()])
    test_pages = list(test_page_paras.keys())
    for i in trange(len(test_pages)):
        page = test_pages[i]
        paras = test_page_paras[page]
        paratexts = [test_ptext_dict[p] for p in paras]
        top_sections = list(set([test_rev_para_top[p] for p in paras]))
        top_labels = [top_sections.index(test_rev_para_top[p]) for p in paras]
        hier_sections = list(set([test_rev_para_hier[p] for p in paras]))
        hier_labels = [
            hier_sections.index(test_rev_para_hier[p]) for p in paras
        ]
        query_text = ' '.join(page.split('enwiki:')[1].split('%20'))
        n = len(paras)
        paras = paras + ['dummy'] * (max_num_doc_test - n)
        paratexts = paratexts + [''] * (max_num_doc_test - n)
        top_labels = top_labels + [-1] * (max_num_doc_test - n)
        hier_labels = hier_labels + [-1] * (max_num_doc_test - n)
        test_top_cluster_data.append(
            InputTRECCARExample(qid=page,
                                q_context=query_text,
                                pids=paras,
                                texts=paratexts,
                                label=np.array(top_labels)))
        test_hier_cluster_data.append(
            InputTRECCARExample(qid=page,
                                q_context=query_text,
                                pids=paras,
                                texts=paratexts,
                                label=np.array(hier_labels)))
    print("Top-level datasets")
    print("Test instances: %5d" % len(test_top_cluster_data))

    model = SentenceTransformer(model_path)
    if level == 'h':
        print('Evaluating hiererchical clusters')
        test_evaluator = ClusterEvaluator.from_input_examples(
            test_hier_cluster_data)
        model.evaluate(test_evaluator)
    else:
        print('Evaluating toplevel clusters')
        test_evaluator = ClusterEvaluator.from_input_examples(
            test_top_cluster_data)
        model.evaluate(test_evaluator)
Exemplo n.º 2
0
def evaluate_ng20(model_path, test_cluster_data, gpu_eval):
    if torch.cuda.is_available():
        print('CUDA is available')
        device = torch.device('cuda')
    else:
        print('Using CPU')
        device = torch.device('cpu')
    model = SentenceTransformer(model_path)
    model.to(device)
    test_evaluator = ClusterEvaluator.from_input_examples(
        test_cluster_data, gpu_eval)
    model.evaluate(test_evaluator)
def main():
    model = SentenceTransformer('bert-base-nli-mean-tokens')

    sts_reader = STSDataReader('datasets/stsbenchmark')

    test_data = SentencesDataset(
        examples=sts_reader.get_examples('sts-test.csv'),
        model=model,
        dataset_cache_id='sts-eval')
    test_dataloader = DataLoader(test_data, shuffle=False, batch_size=16)
    evaluator = EmbeddingSimilarityEvaluator(test_dataloader)

    model.evaluate(evaluator)
    def pretrained_model_score(self, model_name, expected_score):
        model = SentenceTransformer(model_name)
        sts_dataset_path = 'datasets/stsbenchmark.tsv.gz'

        if not os.path.exists(sts_dataset_path):
            util.http_get('https://sbert.net/datasets/stsbenchmark.tsv.gz',
                          sts_dataset_path)

        train_samples = []
        dev_samples = []
        test_samples = []
        with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
            reader = csv.DictReader(fIn,
                                    delimiter='\t',
                                    quoting=csv.QUOTE_NONE)
            for row in reader:
                score = float(
                    row['score']) / 5.0  # Normalize score to range 0 ... 1
                inp_example = InputExample(
                    texts=[row['sentence1'], row['sentence2']], label=score)

                if row['split'] == 'dev':
                    dev_samples.append(inp_example)
                elif row['split'] == 'test':
                    test_samples.append(inp_example)
                else:
                    train_samples.append(inp_example)

        evaluator = EmbeddingSimilarityEvaluator.from_input_examples(
            test_samples, name='sts-test')

        score = model.evaluate(evaluator) * 100
        print(model_name,
              "{:.2f} vs. exp: {:.2f}".format(score, expected_score))
        assert score > expected_score or abs(score - expected_score) < 0.1
Exemplo n.º 5
0
def train_sbert(model_name, model_save_path):
    batch_size = 16
    nli_reader, sts_reader = load_dataset()
    train_num_labels = nli_reader.get_num_labels()
    # Use BERT for mapping tokens to embeddings
    word_embedding_model = models.BERT(model_name)

    # Apply mean pooling to get one fixed sized sentence vector
    pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
                                pooling_mode_mean_tokens=True,
                                pooling_mode_cls_token=False,
                                pooling_mode_max_tokens=False)

    model = SentenceTransformer(modules=[word_embedding_model, pooling_model])


    # Convert the dataset to a DataLoader ready for training
    logging.info("Read AllNLI train dataset")
    train_data = SentencesDataset(nli_reader.get_examples('train.gz'), model=model)
    train_dataloader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=train_num_labels)

    logging.info("Read STSbenchmark dev dataset")
    dev_data = SentencesDataset(examples=sts_reader.get_examples('sts-dev.csv'), model=model)
    dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=batch_size)
    evaluator = EmbeddingSimilarityEvaluator(dev_dataloader)

    # Configure the training
    num_epochs = 1

    warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) #10% of train data for warm-up
    logging.info("Warmup-steps: {}".format(warmup_steps))
    # Train the model
    model.fit(train_objectives=[(train_dataloader, train_loss)],
            evaluator=evaluator,
            epochs=num_epochs,
            evaluation_steps=1000,
            warmup_steps=warmup_steps,
            output_path=model_save_path
            )

    model = SentenceTransformer(model_save_path)
    test_data = SentencesDataset(examples=sts_reader.get_examples("sts-test.csv"), model=model)
    test_dataloader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
    evaluator = EmbeddingSimilarityEvaluator(test_dataloader)

    model.evaluate(evaluator)
Exemplo n.º 6
0
    def pretrained_model_score(self, model_name, expected_score):
        model = SentenceTransformer(model_name)
        sts_reader = STSDataReader('../examples/datasets/stsbenchmark')

        test_data = SentencesDataset(
            examples=sts_reader.get_examples("sts-test.csv"), model=model)
        test_dataloader = DataLoader(test_data, shuffle=False, batch_size=8)
        evaluator = EmbeddingSimilarityEvaluator(test_dataloader)

        score = model.evaluate(evaluator) * 100
        print(model_name,
              "{:.2f} vs. exp: {:.2f}".format(score, expected_score))
        assert abs(score - expected_score) < 0.1
Exemplo n.º 7
0
def run():
    test_file = config.TEST_FILE
    test_batch = config.TEST_BATCH_SIZE
    model_save_path = config.MODEL_SAVE_PATH

    dfs = pd.read_csv(test_file,
                      sep='\t',
                      names=['idx', 'sent1', 'sent2', 'label'])
    dfs['label'] = pd.to_numeric(dfs['label'], downcast='float')

    dataset_reader = dataset.Dataset()
    test_sent1, test_sent2, test_labels = dataset_reader.read(dfs)

    evaluator = evaluation.BinaryClassificationEvaluator(
        test_sent1,
        test_sent2,
        test_labels,
        batch_size=test_batch,
        show_progress_bar=True)

    model = SentenceTransformer(model_save_path)
    model.evaluate(evaluator)
Exemplo n.º 8
0
Arquivo: eval.py Projeto: BenfenYU/Gun
def test_self():
    sts_reader = Self_csv_DataReader('./self_dataset/')
    model_save_path = './output'
    dir_list = os.listdir(model_save_path)
    dir_list.sort(key=lambda fn: os.path.getmtime(model_save_path + '/' + fn))
    model_save_path = os.path.join(model_save_path, dir_list[-1])
    model_save_path = './output/training_nli_.-pretrained_model-bert-base-chinese-2020-07-30_15-59-13'

    model = SentenceTransformer(model_save_path)
    examples, label_text = sts_reader.get_examples("test.csv", _eval=True)
    test_data = SentencesDataset(examples=examples, model=model)
    test_dataloader = DataLoader(test_data,
                                 shuffle=False,
                                 batch_size=config.train_batch_size)
    evaluator = LabelAccuracyEvaluator(
        test_dataloader,
        softmax_model=Softmax_label(model=model,
                                    sentence_embedding_dimension=model.
                                    get_sentence_embedding_dimension(),
                                    num_labels=config.train_num_labels),
        label_text=label_text)

    model.evaluate(evaluator, output_path=model_save_path)
    os.path.join(script_folder_path, args.sts_corpus))
for idx, target in enumerate(target_eval_files):
    output_filename_eval = os.path.join(script_folder_path,
                                        args.sts_corpus + target + "-test.csv")
    if args.whitening:
        evaluators[target[:5]].append(
            WhiteningEmbeddingSimilarityEvaluator.from_input_examples(
                sts_reader.get_examples(output_filename_eval),
                measure_data_num=target_eval_data_num[idx],
                embed_dim=args.embed_dim,
                name=target,
                main_similarity=SimilarityFunction.COSINE))
    else:
        evaluators[target[:5]].append(
            EmbeddingSimilarityEvaluator.from_input_examples(
                sts_reader.get_examples(output_filename_eval),
                name=target,
                main_similarity=SimilarityFunction.COSINE))

all_results = []
logger_text = ""
for task, sequential_evaluator in evaluators.items():
    result = model.evaluate(
        SequentialEvaluator(
            sequential_evaluator,
            main_score_function=lambda scores: np.mean(scores)))
    logger_text += "%.2f \t" % (result * 100)
    all_results.append(result * 100)
logger.info(" \t".join(target_eval_tasks) + " \tOverall.")
logger.info(logger_text + "%.2f" % np.mean(all_results))
Exemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task.",
    )

    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help=
        "Path to pre-trained model or shortcut name selected in the list: ",
    )

    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written.",
    )

    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument(
        "--max_seq_length",
        default=510,
        type=int,
        help=
        "The maximum total input sequence length after tokenization. Sequences longer "
        "than this will be truncated, sequences shorter will be padded.",
    )

    parser.add_argument(
        "--per_gpu_train_batch_size",
        default=8,
        type=int,
        help="Batch size per GPU/CPU for training.",
    )
    parser.add_argument(
        "--per_gpu_eval_batch_size",
        default=8,
        type=int,
        help="Batch size per GPU/CPU for evaluation.",
    )
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="For distributed training: local_rank")
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action="store_true",
                        help="Whether to run eval on the dev set.")

    args = parser.parse_args()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )

    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
    )

    #TODO: Prepare Dataloader
    patent_reader = PatentDataReader(args.data_dir, normalize_scores=True)
    model = SentenceTransformer(args.model_name_or_path)
    test_data = SentencesDataset(examples=patent_reader.get_examples(
        "dev.tsv", max_examples=40),
                                 model=model)
    test_dataloader = DataLoader(test_data,
                                 shuffle=False,
                                 batch_size=args.per_gpu_train_batch_size)
    evaluator = EmbeddingSimilarityEvaluator(test_dataloader)
    model.evaluate(evaluator)

    # Convert the dataset to a DataLoader ready for training
    print("Read STSbenchmark train dataset")
    train_data = SentencesDataset(
        patent_reader.get_examples('train.tsv', max_examples=17714), model)
    train_dataloader = DataLoader(train_data,
                                  shuffle=True,
                                  batch_size=args.per_gpu_train_batch_size)
    train_loss = losses.CosineSimilarityLoss(model=model)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        train_data = SentencesDataset(
            patent_reader.get_examples('train.tsv', max_examples=17714), model)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)
Exemplo n.º 11
0
    parser.add_argument('--trained_model_path',
                        type=str,
                        default='./model_save')
    parser.add_argument('--output_dir', type=str, default='./performance/')
    parser.add_argument('--dataset',
                        type=str,
                        default='msrp',
                        choices=['msrp', 'sts', 'atec', 'ccks', 'chsts'])
    parser.add_argument('--task_type',
                        type=str,
                        default='',
                        choices=['classification', 'regression'])
    args = parser.parse_args()

    trained_model_path = args.trained_model_path
    output_dir = args.output_dir
    dataset = args.dataset
    task_type = args.task_type

    test_examples = ld.load_dataset(dataset_name=dataset, dataset_type='test')

    if task_type == "classification":
        evaluator = evaluation.BinaryClassificationEvaluator.from_input_examples(
            test_examples)
    else:
        evaluator = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(
            test_examples)

    model = SentenceTransformer(trained_model_path)
    model.evaluate(evaluator, output_dir)
Exemplo n.º 12
0
    def fit(self,
            train_objectives: Iterable[Tuple[DataLoader, nn.Module]],
            evaluator: SentenceEvaluator = None,
            test_evaluator: SentenceEvaluator = None,
            epochs: int = 1,
            steps_per_epoch=None,
            scheduler: str = 'WarmupLinear',
            warmup_steps: int = 10000,
            optimizer_class: Type[Optimizer] = transformers.AdamW,
            optimizer_params: Dict[str, object] = {'lr': 2e-5},
            weight_decay: float = 0.01,
            evaluation_steps: int = 0,
            output_path: str = None,
            save_best_model: bool = True,
            max_grad_norm: float = 1,
            use_amp: bool = False,
            callback: Callable[[float, int, int], None] = None,
            show_progress_bar: bool = True):
        tensorboard_writer = SummaryWriter('./tensorboard_logs')
        if use_amp:
            from torch.cuda.amp import autocast
            scaler = torch.cuda.amp.GradScaler()

        self.to(self._target_device)
        GPUtil.showUtilization()

        if output_path is not None:
            os.makedirs(output_path, exist_ok=True)

        dataloaders = [dataloader for dataloader, _ in train_objectives]

        # Use smart batching
        for dataloader in dataloaders:
            dataloader.collate_fn = self.smart_batching_collate

        loss_models = [loss for _, loss in train_objectives]
        for loss_model in loss_models:
            loss_model.to(self._target_device)

        self.best_score = -9999999

        if steps_per_epoch is None or steps_per_epoch == 0:
            steps_per_epoch = min(
                [len(dataloader) for dataloader in dataloaders])

        num_train_steps = int(steps_per_epoch * epochs)

        # Prepare optimizers
        optimizers = []
        schedulers = []
        for loss_model in loss_models:
            param_optimizer = list(loss_model.named_parameters())

            no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
            optimizer_grouped_parameters = [{
                'params': [
                    p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                weight_decay
            }, {
                'params': [
                    p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                0.0
            }]

            optimizer = optimizer_class(optimizer_grouped_parameters,
                                        **optimizer_params)
            scheduler_obj = self._get_scheduler(optimizer,
                                                scheduler=scheduler,
                                                warmup_steps=warmup_steps,
                                                t_total=num_train_steps)

            optimizers.append(optimizer)
            schedulers.append(scheduler_obj)

        global_step = 0
        data_iterators = [iter(dataloader) for dataloader in dataloaders]

        num_train_objectives = len(train_objectives)

        skip_scheduler = False

        config = {'epochs': epochs, 'steps_per_epoch': steps_per_epoch}
        for epoch in trange(config.get('epochs'),
                            desc="Epoch",
                            disable=not show_progress_bar):
            training_steps = 0
            running_loss_0 = 0.0

            for loss_model in loss_models:
                loss_model.zero_grad()
                loss_model.train()

            for _ in trange(config.get('steps_per_epoch'),
                            desc="Iteration",
                            smoothing=0.05,
                            disable=not show_progress_bar):
                for train_idx in range(num_train_objectives):
                    loss_model = loss_models[train_idx]
                    optimizer = optimizers[train_idx]
                    scheduler = schedulers[train_idx]
                    data_iterator = data_iterators[train_idx]

                    try:
                        data = next(data_iterator)
                    except StopIteration:
                        data_iterator = iter(dataloaders[train_idx])
                        data_iterators[train_idx] = data_iterator
                        data = next(data_iterator)

                    features, labels = data

                    if use_amp:
                        with autocast():
                            loss_value = loss_model(features, labels)

                        scale_before_step = scaler.get_scale()
                        scaler.scale(loss_value).backward()
                        scaler.unscale_(optimizer)
                        torch.nn.utils.clip_grad_norm_(loss_model.parameters(),
                                                       max_grad_norm)
                        scaler.step(optimizer)
                        scaler.update()
                        if train_idx == 0:
                            running_loss_0 += loss_value.item()

                        skip_scheduler = scaler.get_scale(
                        ) != scale_before_step
                    else:
                        loss_value = loss_model(features, labels)
                        if train_idx == 0:
                            running_loss_0 += loss_value.item()
                        loss_value.backward()
                        torch.nn.utils.clip_grad_norm_(loss_model.parameters(),
                                                       max_grad_norm)
                        optimizer.step()

                    optimizer.zero_grad()

                    if not skip_scheduler:
                        scheduler.step()

                training_steps += 1
                global_step += 1

                if evaluation_steps > 0 and training_steps % evaluation_steps == 0:
                    tensorboard_writer.add_scalar(
                        'training_loss', running_loss_0 / evaluation_steps,
                        global_step)
                    #logger.report_scalar('Loss', 'training_loss', iteration=global_step, value=running_loss_0/evaluation_steps)
                    running_loss_0 = 0.0
                    #self._eval_during_training(evaluator, output_path, save_best_model, epoch, training_steps, callback)
                    if evaluator is not None:
                        score = evaluator(self,
                                          output_path=output_path,
                                          epoch=epoch,
                                          steps=training_steps)
                        tensorboard_writer.add_scalar('val_ARI', score,
                                                      global_step)
                        #logger.report_scalar('Training progress', 'val_ARI', iteration=global_step, value=score)
                        if callback is not None:
                            callback(score, epoch, training_steps)
                        if score > self.best_score:
                            self.best_score = score
                            if save_best_model:
                                print('Saving model at: ' + output_path)
                                self.save(output_path)
                    for loss_model in loss_models:
                        loss_model.zero_grad()
                        loss_model.train()

            #self._eval_during_training(evaluator, output_path, save_best_model, epoch, -1, callback)
            #tensorboard_writer.add_scalar('training_loss', running_loss_0 / evaluation_steps, global_step)
            #logger.report_scalar('Loss', 'training_loss', iteration=global_step, value=running_loss_0 / evaluation_steps)
            if evaluator is not None:
                score = evaluator(self,
                                  output_path=output_path,
                                  epoch=epoch,
                                  steps=training_steps)
                tensorboard_writer.add_scalar('val_ARI', score, global_step)
                #logger.report_scalar('Training progress', 'val_ARI', iteration=global_step, value=score)
                if callback is not None:
                    callback(score, epoch, training_steps)
                if score > self.best_score:
                    self.best_score = score
                    if save_best_model:
                        self.save(output_path)
            if test_evaluator is not None:
                best_model = SentenceTransformer(output_path)
                device = self.device
                if torch.cuda.is_available():
                    self.to(torch.device('cpu'))
                    best_model.to(device)
                    test_ari = best_model.evaluate(test_evaluator)
                    best_model.to(torch.device('cpu'))
                    self.to(device)
                else:
                    test_ari = best_model.evaluate(test_evaluator)
                tensorboard_writer.add_scalar('test_ARI', test_ari,
                                              global_step)
                #logger.report_scalar('Training progress', 'test_ARI', iteration=global_step, value=test_ari)

        if evaluator is None and output_path is not None:  # No evaluator, but output path: save final model version
            self.save(output_path)
Exemplo n.º 13
0
train_loss = losses.BatchSemiHardTripletLoss(sentence_embedder=model)

logging.info("Read TREC val dataset")
dev_evaluator = TripletEvaluator.from_input_examples(dev_set, name='dev')

logging.info("Performance before fine-tuning:")
dev_evaluator(model)

warmup_steps = int(len(train_dataset) * num_epochs / train_batch_size *
                   0.1)  # 10% of train data

# Train the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=dev_evaluator,
    epochs=num_epochs,
    evaluation_steps=1000,
    warmup_steps=warmup_steps,
    output_path=output_path,
)

##############################################################################
#
# Load the stored model and evaluate its performance on TREC dataset
#
##############################################################################

logging.info("Evaluating model on test set")
test_evaluator = TripletEvaluator.from_input_examples(test_set, name='test')
model.evaluate(test_evaluator)
evaluator = EmbeddingSimilarityEvaluator(dev_dataloader)

# Configure the training. We skip evaluation in this example
warmup_steps = math.ceil(
    len(train_data) * args.num_epochs / args.batch_size *
    0.1)  #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))

# Train the model
model.fit(train_objectives=[(train_dataloader, train_loss)],
          evaluator=evaluator,
          epochs=args.num_epochs,
          evaluation_steps=1000,
          warmup_steps=warmup_steps,
          output_path=args.ckpt_path)

##############################################################################
#
# Load the stored model and evaluate its performance on STS benchmark dataset
#
##############################################################################

model = SentenceTransformer(args.ckpt_path)
test_data = SentencesDataset(
    examples=sts_reader.get_examples("sts-test_vi.csv"), model=model)
test_dataloader = DataLoader(test_data,
                             shuffle=False,
                             batch_size=args.batch_size)
evaluator = EmbeddingSimilarityEvaluator(test_dataloader)
model.evaluate(evaluator, args.ckpt_path)
model.add(tf.keras.layers.Dense(units, activation='relu'))
# model.add(tf.keras.layers.Embedding(len(test_encoding), 64))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))
# model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))
# One or more dense layers.
# Edit the list in the `for` line to experiment with layer sizes.
for units in [64, 64]:
  model.add(tf.keras.layers.Dense(units, activation='relu'))

# Output layer. The first argument is the number of labels.
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn')])

model.fit(train_data, epochs=100, validation_data=test_data)

loss, accuracy, tp, fp, tn, fn,   = model.evaluate(test_data)
print('Test Loss: {}'.format(loss))
print('Test Accuracy: {}'.format(accuracy))
print('Test TP: {}'.format(tp))
print('Test FP: {}'.format(fp))
print('Test TN: {}'.format(tn))
print('Test FN: {}'.format(fn))

# print('\nEval loss: {:.3f}, Eval accuracy: {:.3f}'.format(eval_loss, eval_acc))
Exemplo n.º 16
0
    # Apply mean pooling to get one fixed sized sentence vector
    pooling_model = models.Pooling(
        word_embedding_model.get_word_embedding_dimension())
    model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

# sts_reader = STSBenchmarkDataReader(os.path.join(script_folder_path, '../datasets/stsbenchmark'))
sts_reader = STSBenchmarkDataReader(data_folder,
                                    s1_col_idx=0,
                                    s2_col_idx=1,
                                    score_col_idx=2,
                                    delimiter="\t",
                                    min_score=0,
                                    max_score=1)

test_data = SentencesDataset(
    examples=sts_reader.get_examples("test_sts.tsv"),
    model=model,
)
print("DataLoader")
test_dataloader = DataLoader(test_data, shuffle=False, batch_size=8)
print("EmbeddingSimilarityEvaluator")
evaluator = EmbeddingSimilarityEvaluator(test_dataloader,
                                         show_progress_bar=False)

print(evaluator)
# print(model)
# print(model.evaluate)
# exit(1)

model.evaluate(evaluator, output_path)
Exemplo n.º 17
0
class BertTrainer:
    """
    Class to train NLI model
    :param logger: logger to use in model
    """
    def __init__(self, logger: Logger, train_path: str, dev_path: str,
                 test_path: str, base_model: str, batch_size: int,
                 path_to_save: str, **kwargs):
        self.logger = logger
        self.logger.info("Models are loaded and ready to use.")

        self.train_path = train_path
        self.dev_path = dev_path
        self.test_path = test_path

        self.base_model = base_model
        self.batch_size = batch_size

        dataset = 'snli'
        if dataset == 'snli':
            self.label2int = {
                "contradiction": 0,
                "entailment": 1,
                "neutral": 2
            }
        else:
            self.label2int = {"SUPPORTS": 1, "REFUTES": 0}

        self.path_to_save = path_to_save

    def initialize_model(self):
        # Read the dataset
        # Use BERT for mapping tokens to embeddings
        word_embedding_model = models.Transformer(self.base_model,
                                                  max_seq_length=128)
        # Apply mean pooling to get one fixed sized sentence vector
        pooling_model = models.Pooling(
            word_embedding_model.get_word_embedding_dimension(),
            pooling_mode_mean_tokens=True,
            pooling_mode_cls_token=False,
            pooling_mode_max_tokens=False)
        self.model = SentenceTransformer(
            modules=[word_embedding_model, pooling_model])
        self.train_loss_nli = losses.SoftmaxLoss(
            model=self.model,
            sentence_embedding_dimension=self.model.
            get_sentence_embedding_dimension(),
            num_labels=len(self.label2int))

    def preparing_data(self):
        """
        Method used for data preparation before training
        it reads data from files predefined in config and process them
        Uses for SNLI data format
        """
        train_snli = _create_examples_snli(_read_tsv(self.train_path),
                                           'train_s')
        dev_snli = _create_examples_snli(_read_tsv(self.dev_path), 'dev_s')
        test_snli = _create_examples_snli(_read_tsv(self.test_path), 'test_s')
        # Convert the dataset to a DataLoader ready for training
        self.logger.info("Read train dataset")

        train_nli_samples = []
        dev_nli_samples = []
        test_nli_samples = []

        for row in tqdm(train_snli):
            label_id = self.label2int[row[3]]
            train_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))
        for row in tqdm(dev_snli):
            label_id = self.label2int[row[3]]
            dev_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))
        for row in tqdm(test_snli):
            label_id = self.label2int[row[3]]
            test_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))

        train_data_nli = SentencesDataset(train_nli_samples, model=self.model)
        self.train_dataloader_nli = DataLoader(train_data_nli,
                                               shuffle=True,
                                               batch_size=self.batch_size)
        dev_data_nli = SentencesDataset(dev_nli_samples, model=self.model)
        self.dev_dataloader_nli = DataLoader(dev_data_nli,
                                             shuffle=True,
                                             batch_size=self.batch_size)
        test_data_nli = SentencesDataset(test_nli_samples, model=self.model)
        self.test_dataloader_nli = DataLoader(test_data_nli,
                                              shuffle=True,
                                              batch_size=self.batch_size)

    def preparing_data_fever(self):
        """
        Method used for data preparation before training
        it reads data from files predefined in config and process them
        Uses for FEVER SNLI-style data format
        """
        def read_fever(path):
            df = pd.read_csv(path)
            df.dropna(inplace=True)
            df.reset_index(drop=True, inplace=True)
            return df

        train_snli = _create_examples_fever(read_fever(self.train_path),
                                            'train_s')
        dev_snli = _create_examples_fever(read_fever(self.dev_path), 'dev_s')
        test_snli = _create_examples_fever(read_fever(self.test_path),
                                           'test_s')

        # Convert the dataset to a DataLoader ready for training
        self.logger.info("Read train dataset")

        train_nli_samples = []
        dev_nli_samples = []
        test_nli_samples = []

        for row in tqdm(train_snli):
            label_id = self.label2int[row[3]]
            train_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))
        for row in tqdm(dev_snli):
            label_id = self.label2int[row[3]]
            dev_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))
        for row in tqdm(test_snli):
            label_id = self.label2int[row[3]]
            test_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))

        train_data_nli = SentencesDataset(train_nli_samples, model=self.model)
        self.train_dataloader_nli = DataLoader(train_data_nli,
                                               shuffle=True,
                                               batch_size=self.batch_size)
        dev_data_nli = SentencesDataset(dev_nli_samples, model=self.model)
        self.dev_dataloader_nli = DataLoader(dev_data_nli,
                                             shuffle=True,
                                             batch_size=self.batch_size)
        test_data_nli = SentencesDataset(test_nli_samples, model=self.model)
        self.test_dataloader_nli = DataLoader(test_data_nli,
                                              shuffle=True,
                                              batch_size=self.batch_size)

    def preparing_data_mnli(self):
        """
         Method used for data preparation before training
         it reads data from files predefined in config and process them
         Uses for MNLI data format
        """
        def read_mnli(path):
            df = pd.read_table(path, error_bad_lines=False)
            df.sentence1 = df.sentence1.astype(str)
            df.sentence2 = df.sentence2.astype(str)
            df.gold_label = df.gold_label.astype(str)
            df = df[df.gold_label != '-']
            df.dropna(inplace=True)
            return df

        train_snli = _create_examples_mnli(read_mnli(self.train_path),
                                           'train_s')
        dev_snli = _create_examples_mnli(read_mnli(self.dev_path), 'dev_s')
        test_snli = _create_examples_mnli(read_mnli(self.test_path), 'test_s')

        # Convert the dataset to a DataLoader ready for training
        self.logger.info("Read train dataset")

        train_nli_samples = []
        dev_nli_samples = []
        test_nli_samples = []

        print(len(train_snli))
        for row in tqdm(train_snli):
            label_id = self.label2int[row[3]]
            train_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))
        for row in tqdm(dev_snli):
            label_id = self.label2int[row[3]]
            dev_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))
        for row in tqdm(test_snli):
            label_id = self.label2int[row[3]]
            test_nli_samples.append(
                InputExample(guid=row[0],
                             texts=[row[1], row[2]],
                             label=label_id))

        print(len(train_nli_samples))
        train_data_nli = SentencesDataset(train_nli_samples, model=self.model)
        self.train_dataloader_nli = DataLoader(train_data_nli,
                                               shuffle=True,
                                               batch_size=self.batch_size)
        dev_data_nli = SentencesDataset(dev_nli_samples, model=self.model)
        self.dev_dataloader_nli = DataLoader(dev_data_nli,
                                             shuffle=True,
                                             batch_size=self.batch_size)
        test_data_nli = SentencesDataset(test_nli_samples, model=self.model)
        self.test_dataloader_nli = DataLoader(test_data_nli,
                                              shuffle=True,
                                              batch_size=self.batch_size)

    def save_model(self):
        """
        Method used for model saving
        """
        torch.save(self.train_loss_nli.classifier.cpu(),
                   self.path_to_save + 'classifier_model')
        self.model.save(self.path_to_save + "bert_model_trained")

    def load_model(self, text_model_path, classifier_path):
        """
        Method used for pretrained model loading
        """
        self.model = SentenceTransformer(text_model_path)
        self.classification_model = torch.load(classifier_path)
        self.train_loss_nli = losses.SoftmaxLoss(
            model=self.model,
            sentence_embedding_dimension=self.model.
            get_sentence_embedding_dimension(),
            num_labels=len(self.label2int))
        self.train_loss_nli.classifier = self.classification_model

    def train_model(self, number_of_epochs=1):
        """
        Method implements model training process
        """
        warmup_steps = 10000
        self.logger.info("Warmup-steps: {}".format(warmup_steps))
        train_objectives = [(self.train_dataloader_nli, self.train_loss_nli)]

        validation_performance = []
        test_performance = []

        test_evaluator = LabelAccuracyEvaluator(
            self.test_dataloader_nli,
            name='nli_test',
            softmax_model=self.train_loss_nli)
        dev_evaluator = LabelAccuracyEvaluator(
            self.dev_dataloader_nli,
            name='nli_test',
            softmax_model=self.train_loss_nli)

        for i in range(number_of_epochs):
            self.model.fit(train_objectives=train_objectives)
            validation_performance.append(self.model.evaluate(dev_evaluator))
            test_performance.append(self.model.evaluate(test_evaluator))
            print(f'Iteration - {i + 1} ...')
            print(f'Validation performance - {validation_performance[-1]} ...')
            print(f'Test performance - {test_performance[-1]} ...')
        return validation_performance, test_performance
Exemplo n.º 18
0
import numpy as np

device = "cuda"

model = SentenceTransformer(
    '/home/xstefan3/arqmath/compubert/out_whole_sampled_eval')

clef_home_directory_file_path = '/home/xstefan3/arqmath/data/Collection'
dr = DataReaderRecord(clef_home_directory_file_path)

all_examples = list(examples_from_questions_tup(dr.post_parser.map_questions))
examples_len = len(all_examples)

train_dev_test_split = (int(0.8 * examples_len), int(0.9 * examples_len))

# model = SentenceTransformer('/home/xstefan3/arqmath/compubert/out_whole', logfile="train_whole_sampled_eval.log")

test_data = SentencesDataset(all_examples[train_dev_test_split[1]:],
                             model,
                             show_progress_bar=True)
# test_sampler = RandomSampler(dev_data, replacement=True, num_samples=250)

test_loader = DataLoader(test_data, batch_size=16)

evaluator = EmbeddingSimilarityEvaluator(test_loader,
                                         show_progress_bar=True,
                                         device=device)

test_val = model.evaluate(evaluator)
print(test_val)
output_path = "output/bert-base-wikipedia-sections-mean-tokens"

num_epochs = 1
warmup_steps = int(len(train_data)*num_epochs/train_batch_size/10) #10% of train data
train_config = TrainConfig(learning_rate=2e-5,
                           weight_decay=0.01,
                           epochs=num_epochs,
                           evaluation_steps=1000,
                           output_path=output_path,
                           save_best_model=True,
                           evaluator=evaluator,
                           warmup_steps=warmup_steps)


embedder.train(dataloader=train_dataloader, train_config=train_config)

##############################################################################
#
# Load the stored model and evaluate its performance on STS benchmark dataset
#
##############################################################################

embedder = SentenceTransformer(output_path)
test_data =SentencesDataset(examples=triplet_reader.get_examples('test.csv'), model=embedder)
test_dataloader = DataLoader(test_data, shuffle=False, batch_size=train_batch_size, collate_fn=embedder.encoder.smart_batching_collate)
evaluator = TripletEvaluator(test_dataloader)

embedder.evaluate(evaluator)

Exemplo n.º 20
0
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

train_examples = []
test_examples = []
for index, row in corpus.iterrows():
    train_examples.append(InputExample(texts=[row['sentence_A'], row['sentence_B']], label=row['relatedness_score'])) 
    s3.append(row['sentence_A'])
    s3.append(row['sentence_B'])
    
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=8)
train_loss = losses.CosineSimilarityLoss(model)
model.fit(train_objectives=[(train_dataloader, train_loss)],
          epochs=5, warmup_steps=100, evaluator=evaluator, evaluation_steps=500)

model.best_score #1epochs 0.9232 #5epochs 0.932
model.evaluate(evaluator) #0.9232
# model.save("roberta_base_CDS_train_biencoder")

# -----------------------------------------------
# from sentence_transformers import CrossEncoder
# model = CrossEncoder('roberta_base', max_length=256)
# model.fit(train_dataloader,
#           epochs=1, warmup_steps=100)

# scores = model.predict([[sentences1,sentences2 ],[sentences3,sentences2],[sentences1,sentences3 ]])
# #pretrained model 0.48104742, 0.48180264, 0.47577295
# #after training 0.26556703, 0.03470451, 0.03307376
# --------------------------------------------

sentences1 = 'Piłka nożna z wieloma grającymi facetami'
sentences2 = 'Jacyś mężczyźni grają w futbol'
               evaluation_steps=1000,
               warmup_steps=warmup_steps,
               output_path=bi_encoder_path,
               output_path_ignore_not_empty=True)

###############################################################
#
# Evaluate Augmented SBERT performance on QQP benchmark dataset
#
###############################################################

# Loading the augmented sbert model
bi_encoder = SentenceTransformer(bi_encoder_path)

logging.info("Read QQP test dataset")
test_sentences1 = []
test_sentences2 = []
test_labels = []

with open(os.path.join(qqp_dataset_path, "classification/test_pairs.tsv"),
          encoding='utf8') as fIn:
    reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)
    for row in reader:
        test_sentences1.append(row['question1'])
        test_sentences2.append(row['question2'])
        test_labels.append(int(row['is_duplicate']))

evaluator = BinaryClassificationEvaluator(test_sentences1, test_sentences2,
                                          test_labels)
bi_encoder.evaluate(evaluator)
Exemplo n.º 22
0
            os.remove(os.path.join(curr_dir, "prediction_results.csv"))

        # Model path
        model_save_path = curr_dir
        batch_size = 24
        agb_reader = TestAGBReader('datasets/og-test')
        train_num_labels = agb_reader.get_num_labels()

        model = SentenceTransformer(model_save_path, device="cpu")

        train_loss = losses.SoftmaxLoss(model=model,
                                        sentence_embedding_dimension=model.get_sentence_embedding_dimension(),
                                        num_labels=train_num_labels)
        train_loss.classifier = torch.load(os.path.join(model_save_path, "2_Softmax/pytorch_model.bin"))

        print("test")
        test_dir = "/data/daumiller/sentence-transformers/examples/datasets/og-test"
        for fn in sorted(os.listdir(test_dir)):
            examples = agb_reader.get_examples(fn)
            if not examples:
                continue
            # Hack to avoid problems with docs almost as long as batch size
            if len(examples) == batch_size + 1:
                batch_size_used = batch_size - 3
            else:
                batch_size_used = batch_size
            test_data = SentencesDataset(examples=examples, model=model, shorten=True)
            test_dataloader = DataLoader(test_data, shuffle=False, batch_size=batch_size_used)
            evaluator = LabelGenerationEvaluator(test_dataloader, softmax_model=train_loss)
            model.evaluate(evaluator, model_save_path)
Exemplo n.º 23
0
def train(triplet_data_dir, output):
    logging.basicConfig(format='%(asctime)s - %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S',
                        level=logging.INFO,
                        handlers=[LoggingHandler()])

    ### Create a torch.DataLoader that passes training batch instances to our model
    train_batch_size = 16
    triplet_reader = TripletReader(triplet_data_dir,
                                   s1_col_idx=1,
                                   s2_col_idx=2,
                                   s3_col_idx=3,
                                   delimiter=',',
                                   quoting=csv.QUOTE_MINIMAL,
                                   has_header=True)
    # output_path = "output/bert-base-wikipedia-sections-mean-tokens-"+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    output_path = output + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    num_epochs = 1

    ### Configure sentence transformers for training and train on the provided dataset
    # Use BERT for mapping tokens to embeddings
    word_embedding_model = models.BERT('bert-base-uncased')

    # Apply mean pooling to get one fixed sized sentence vector
    pooling_model = models.Pooling(
        word_embedding_model.get_word_embedding_dimension(),
        pooling_mode_mean_tokens=True,
        pooling_mode_cls_token=False,
        pooling_mode_max_tokens=False)

    model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

    logging.info("Read Triplet train dataset")
    train_data = SentencesDataset(examples=triplet_reader.get_examples(
        'train.csv', 2000000),
                                  model=model)
    train_dataloader = DataLoader(train_data,
                                  shuffle=True,
                                  batch_size=train_batch_size)
    train_loss = losses.TripletLoss(model=model)

    logging.info("Read Wikipedia Triplet dev dataset")
    dev_data = SentencesDataset(examples=triplet_reader.get_examples(
        'validation.csv', 10000),
                                model=model)
    dev_dataloader = DataLoader(dev_data,
                                shuffle=False,
                                batch_size=train_batch_size)
    evaluator = TripletEvaluator(dev_dataloader)

    warmup_steps = int(len(train_data) * num_epochs / train_batch_size *
                       0.1)  #10% of train data

    # Train the model
    model.fit(train_objectives=[(train_dataloader, train_loss)],
              evaluator=evaluator,
              epochs=num_epochs,
              evaluation_steps=1000,
              warmup_steps=warmup_steps,
              output_path=output_path)

    ##############################################################################
    #
    # Load the stored model and evaluate its performance on STS benchmark dataset
    #
    ##############################################################################

    model = SentenceTransformer(output_path)
    test_data = SentencesDataset(
        examples=triplet_reader.get_examples('test.csv'), model=model)
    test_dataloader = DataLoader(test_data,
                                 shuffle=False,
                                 batch_size=train_batch_size)
    evaluator = TripletEvaluator(test_dataloader)

    model.evaluate(evaluator)
Exemplo n.º 24
0
                            model=model)
dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=batch_size)
evaluator = EmbeddingSimilarityEvaluator(dev_dataloader)

# Configure the training
num_epochs = 10
warmup_steps = math.ceil(len(train_data) * num_epochs / batch_size *
                         0.1)  #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))

# Train the model
model.fit(train_objectives=[(train_dataloader, train_loss)],
          evaluator=evaluator,
          epochs=num_epochs,
          warmup_steps=warmup_steps,
          output_path=model_save_path)

##############################################################################
#
# Load the stored model and evaluate its performance on STS benchmark dataset
#
##############################################################################

model = SentenceTransformer(model_save_path)
test_data = SentencesDataset(examples=sts_reader.get_examples("sts-test.csv"),
                             model=model)
test_dataloader = DataLoader(test_data, shuffle=False, batch_size=batch_size)
evaluator = EmbeddingSimilarityEvaluator(test_dataloader)

model.evaluate(evaluator)
Exemplo n.º 25
0
def train_nli():

    #### Just some code to print debug information to stdout
    logging.basicConfig(format='%(asctime)s - %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S',
                        level=logging.INFO,
                        handlers=[LoggingHandler()])
    #### /print debug information to stdout

    #You can specify any huggingface/transformers pre-trained model here, for example, bert-base-uncased, roberta-base, xlm-roberta-base
    #model_name = sys.argv[1] if len(sys.argv) > 1 else 'bert-base-uncased'
    model_name = 'pretrained_model/bert-base-uncased'

    # Read the dataset
    train_batch_size = 6
    nli_reader = NLIDataReader('./examples/datasets/AllNLI')
    sts_reader = STSBenchmarkDataReader('./examples/datasets/stsbenchmark')
    train_num_labels = nli_reader.get_num_labels()
    model_save_path = 'output/training_nli_'+model_name.replace("/", "-")+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


    # Use Huggingface/transformers model (like BERT, RoBERTa, XLNet, XLM-R) for mapping tokens to embeddings
    word_embedding_model = models.Transformer(model_name)

    # Apply mean pooling to get one fixed sized sentence vector
    pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
                                pooling_mode_mean_tokens=True,
                                pooling_mode_cls_token=False,
                                pooling_mode_max_tokens=False)

    model = SentenceTransformer(modules=[word_embedding_model, pooling_model])


    # Convert the dataset to a DataLoader ready for training
    logging.info("Read AllNLI train dataset")
    train_dataset = SentencesDataset(nli_reader.get_examples('train.gz'), model=model)
    train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch_size)
    train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=train_num_labels)



    logging.info("Read STSbenchmark dev dataset")
    dev_data = SentencesDataset(examples=sts_reader.get_examples('sts-dev.csv'), model=model)
    dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=train_batch_size)
    evaluator = LabelAccuracyEvaluator(dev_dataloader,softmax_model = Softmax_label(model = model,
                                                                                    sentence_embedding_dimension = model.get_sentence_embedding_dimension(),
                                                                                    num_labels = train_num_labels))


    # Configure the training
    num_epochs = 1

    warmup_steps = math.ceil(len(train_dataset) * num_epochs / train_batch_size * 0.1) #10% of train data for warm-up
    logging.info("Warmup-steps: {}".format(warmup_steps))



    # Train the model
    model.fit(train_objectives=[(train_dataloader, train_loss)],
            evaluator=evaluator,
            epochs=num_epochs,
            evaluation_steps=100,
            warmup_steps=warmup_steps,
            output_path=model_save_path
            )



    ##############################################################################
    #
    # Load the stored model and evaluate its performance on STS benchmark dataset
    #
    ##############################################################################

    #model = SentenceTransformer(model_save_path)
    test_data = SentencesDataset(examples=sts_reader.get_examples("sts-test.csv"), model=model)
    test_dataloader = DataLoader(test_data, shuffle=False, batch_size=train_batch_size)
    #evaluator = EmbeddingSimilarityEvaluator(test_dataloader)

    model.evaluate(evaluator)