Пример #1
0
    async def accuracy(self, sources: Sources):
        if not os.path.isfile(
                os.path.join(self.parent.config.output_dir, "tf_model.h5")):
            raise ModelNotTrained("Train model before assessing for accuracy.")

        config = self.parent.config._asdict()
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.parent.config.output_dir)
        eval_features = await self._preprocess_data(sources)
        eval_dataset = await self.example_features_to_dataset(eval_features)

        def compute_metrics(p: EvalPrediction) -> Dict:
            preds = self.np.argmax(p.predictions, axis=1)
            return classification_compute_metrics(preds, p.label_ids)

        with self.parent.config.strategy.scope():
            self.model = TFAutoModelForSequenceClassification.from_pretrained(
                config["directory"])
        trainer = TFTrainer(
            model=self.model,
            args=self.parent.config,
            eval_dataset=eval_dataset,
            compute_metrics=compute_metrics,
        )
        result = trainer.evaluate()
        return Accuracy(result["eval_acc"])
Пример #2
0
    async def accuracy(self, sources: Sources):
        if not os.path.isfile(
            os.path.join(self.parent.config.output_dir, "tf_model.h5")
        ):
            raise ModelNotTrained("Train model before assessing for accuracy.")

        data_df = await self._preprocess_data(sources)
        eval_dataset = self.get_dataset(data_df, self.tokenizer, mode="eval",)
        with self.parent.config.strategy.scope():
            self.model = TFAutoModelForTokenClassification.from_pretrained(
                self.parent.config.output_dir,
                config=self.config,
                cache_dir=self.parent.config.cache_dir,
            )

        trainer = TFTrainer(
            model=self.model,
            args=self.parent.config,
            train_dataset=None,
            eval_dataset=eval_dataset.get_dataset(),
            compute_metrics=self.compute_metrics,
        )

        result = trainer.evaluate()
        return Accuracy(result["eval_f1"])
Пример #3
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     try:
         with open("acc.txt", "r") as f:
             self.accuracy_value = float(f.read())
     except:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     return Accuracy(self.accuracy_value)
Пример #4
0
    async def accuracy(self, sources: Sources) -> Accuracy:
        if not os.path.isfile(os.path.join(self.model_path)):
            raise ModelNotTrained("Train model before assessing for accuracy.")

        dataset, size = await self.dataset_generator(sources)
        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=self.parent.config.batch_size,
            shuffle=True,
            num_workers=4,
        )

        self._model.eval()
        running_corrects = 0

        for inputs, labels in dataloader:
            inputs = inputs.to(inputs)
            labels = labels.to(inputs)

            with torch.set_grad_enabled(False):
                outputs = self._model(inputs)
                _, preds = torch.max(outputs, 1)

            running_corrects += torch.sum(preds == labels.data)
            acc = running_corrects.double() / size

        return Accuracy(acc)
Пример #5
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     """
     Evaluates the accuracy of our model after training using the input records
     as test data.
     """
     if not os.path.isdir(self.model_dir_path):
         raise ModelNotTrained("Train model before assessing for accuracy.")
     input_fn = await self.accuracy_input_fn(sources)
     accuracy_score = self.model.evaluate(input_fn=input_fn)
     return Accuracy(accuracy_score["accuracy"])
Пример #6
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     """
     Evaluates the accuracy of our model after training using the input records
     as test data.
     """
     if not os.path.isfile(
             os.path.join(self.model_dir_path, "saved_model.pb")):
         raise ModelNotTrained("Train model before assessing for accuracy.")
     x, y = await self.train_data_generator(sources)
     accuracy_score = self._model.evaluate(x, y)
     return Accuracy(accuracy_score[1])
Пример #7
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     """
     Evaluates the accuracy of our model after training using the input repos
     as test data.
     """
     if not os.path.isdir(self.model_dir_path):
         raise NotADirectoryError("Model not trained")
     input_fn = await self.evaluate_input_fn(sources,
                                             batch_size=20,
                                             shuffle=False,
                                             epochs=1)
     metrics = self.model.evaluate(input_fn=input_fn)
     return Accuracy(1 - metrics["loss"])  # 1 - mse
Пример #8
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if not self.model:
         raise ModelNotTrained("Train the model before assessing accuracy")
     test_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         test_data.append(record.features())
     df = pd.DataFrame(test_data)
     y_test = df[[self.parent.config.predict.name]]
     x_test = df.drop(columns=[self.parent.config.predict.name])
     predictions = await self.get_predictions(x_test)
     accuracy = await self.accuracy_score(y_test, predictions)
     return Accuracy(accuracy)
Пример #9
0
    async def accuracy(self, sources: Sources) -> Accuracy:
        """
        Assess the accuracy of the network on the test data after training on records
        """
        if not os.path.isfile(os.path.join(self.model_path)):
            raise ModelNotTrained("Train model before assessing for accuracy.")

        dataset, size = await self.dataset_generator(sources)
        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=self.parent.config.batch_size,
            shuffle=True,
            num_workers=4,
        )

        self._model.eval()

        if self.classifications:
            running_corrects = 0

            for inputs, labels in dataloader:
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)

                with torch.set_grad_enabled(False):
                    outputs = self._model(inputs)
                    _, preds = torch.max(outputs, 1)

                running_corrects += torch.sum(preds == labels.data)
                acc = running_corrects.double() / size
        else:
            running_loss = 0.0

            for inputs, labels in dataloader:
                inputs = inputs.to(inputs)
                labels = labels.to(inputs)

                with torch.set_grad_enabled(False):
                    outputs = self._model(inputs)
                    loss = self.criterion(inputs, outputs)

                running_loss += loss.item() * inputs.size(0)

            total_loss = running_loss / size
            acc = 1.0 - total_loss

        return Accuracy(acc)
Пример #10
0
    async def accuracy(self, sources: Sources):
        if not os.path.isfile(
                os.path.join(self.parent.config.output_dir,
                             "pytorch_model.bin")):
            raise ModelNotTrained("Train model before assessing for accuracy.")
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.parent.config.output_dir,
            do_lower_case=self.parent.config.do_lower_case,
        )
        eval_examples = await self._preprocess_data(sources)
        features, dataset = squad_convert_examples_to_features(
            examples=eval_examples,
            tokenizer=self.tokenizer,
            max_seq_length=self.parent.config.max_seq_length,
            doc_stride=self.parent.config.doc_stride,
            max_query_length=self.parent.config.max_query_length,
            is_training=False,
            return_dataset="pt",
        )

        results = {}
        if self.parent.config.local_rank in [-1, 0]:
            logger.info(
                "Loading checkpoints saved during training for evaluation")
            self.model = AutoModelForQuestionAnswering.from_pretrained(
                self.parent.config.output_dir)
            self.model.to(self.parent.config.device)

            # Evaluate
            predictions = await self._custom_accuracy(eval_examples, features,
                                                      dataset)
            results = squad_evaluate(eval_examples, predictions)

        logger.info("Results: {}".format(results))

        # return results
        return Accuracy(results["f1"])
Пример #11
0
    async def accuracy(self, sources: Sources):
        if not os.path.isfile(
            os.path.join(self.parent.config.output_dir, "tf_model.h5")
        ):
            raise ModelNotTrained("Train model before assessing for accuracy.")
        config = self.parent.config._asdict()
        config["strategy"] = self.parent.config.strategy
        config["n_device"] = self.parent.config.n_device
        self.tokenizer = self.tokenizer_class.from_pretrained(
            config["output_dir"], do_lower_case=config["do_lower_case"]
        )
        eval_batch_size = (
            config["per_device_eval_batch_size"] * config["n_device"]
        )
        data_df = await self._preprocess_data(sources)
        eval_dataset, num_eval_examples = self.get_dataset(
            data_df,
            self.tokenizer,
            self.pad_token_label_id,
            eval_batch_size,
            mode="accuracy",
        )
        eval_dataset = self.parent.config.strategy.experimental_distribute_dataset(
            eval_dataset
        )

        checkpoints = []
        results = []

        if config["eval_all_checkpoints"]:
            checkpoints = list(
                os.path.dirname(c)
                for c in sorted(
                    pathlib(
                        config["output_dir"] + "/**/" + TF2_WEIGHTS_NAME
                    ).glob(recursive=True),
                    key=lambda f: int("".join(filter(str.isdigit, f)) or -1),
                )
            )

        if len(checkpoints) == 0:
            checkpoints.append(config["output_dir"])

        self.logger.info("Evaluate the following checkpoints: %s", checkpoints)

        for checkpoint in checkpoints:
            global_step = (
                checkpoint.split("-")[-1]
                if re.match(".*checkpoint-[0-9]", checkpoint)
                else "final"
            )

            with self.parent.config.strategy.scope():
                self.model = self.model_class.from_pretrained(checkpoint)

            y_true, y_pred, eval_loss = self._custom_accuracy(
                eval_dataset,
                self.tokenizer,
                self.model,
                num_eval_examples,
                eval_batch_size,
            )
            report = classification_report(y_true, y_pred, digits=4)

            if global_step:
                results.append(
                    {
                        global_step + "_report": report,
                        global_step + "_loss": eval_loss,
                    }
                )

        output_eval_file = os.path.join(
            config["output_dir"], "accuracy_results.txt"
        )
        # create the report and save in output_dir
        with self.tf.io.gfile.GFile(output_eval_file, "w") as writer:
            for res in results:
                for key, val in res.items():
                    if "loss" in key:
                        self.logger.debug(key + " = " + str(val))
                        writer.write(key + " = " + str(val))
                        writer.write("\n")
                    else:
                        self.logger.debug(key)
                        self.logger.debug("\n" + report)
                        writer.write(key + "\n")
                        writer.write(report)
                        writer.write("\n")
        # Return accuracy for the last checkpoint
        return Accuracy(f1_score(y_true, y_pred))
Пример #12
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     accuracy: int = 0
     async for repo in sources.repos():
         accuracy += int(repo.key)
     return Accuracy(accuracy)
Пример #13
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if self.regression_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     accuracy_value = self.regression_line[2]
     return Accuracy(accuracy_value)
Пример #14
0
 def test_str(self):
     self.assertEqual(str(Accuracy(0.04242)), "4.24")