示例#1
0
    def _log_tensors(self, tensor_values):
        """Update timer, log tensors, send to MLFlow and Graphite"""
        self._timer.update_last_triggered_step(self._iter_count)
        global_step = tensor_values["global_step"]
        if self.skip_after_step is not None and global_step >= self.skip_after_step:
            return

        # Log tensor and function values
        ord_tensor_values = [(tag, tensor_values[tag])
                             for tag in self._tag_order]
        ord_function_values = [
            (tag, self.functions[tag]()) for tag in self._fn_order
        ] if self.functions else []
        LOGGER.info(", ".join(
            self.formatter(tag, value)
            for tag, value in ord_tensor_values + ord_function_values))

        # Send to MLFlow and Graphite
        for tag, value in ord_tensor_values + ord_function_values:
            if self.use_graphite:
                graphite.log_metric(tag, value, postfix=self.name)
            if self.use_mlflow:
                tag = tag if self.name is None else f"{self.name}_{tag}"
                mlflow.log_metric(sanitize_metric_name(tag),
                                  value,
                                  step=global_step)
示例#2
0
    def run(self):
        with dpr.io.ParquetDataset(self.path_predictions).open() as ds:
            predictions = ds.read_pandas().to_pandas()
            users = np.stack(predictions["user"])
            ones = np.ones([users.shape[0], 1], np.float32)
            users_with_ones = np.concatenate([users, ones], axis=-1)

        with dpr.io.ParquetDataset(self.path_embeddings).open() as ds:
            embeddings = ds.read_pandas().to_pandas()
            embeddings = embeddings.to_numpy()

        with dpr.io.ParquetDataset(self.path_biases).open() as ds:
            biases = ds.read_pandas().to_pandas()
            biases = biases.to_numpy()

        embeddings_with_biases = np.concatenate([embeddings, biases], axis=-1)

        index = faiss.IndexFlatIP(embeddings_with_biases.shape[-1])
        index.add(np.ascontiguousarray(embeddings_with_biases))
        _, indices = index.search(users_with_ones, k=self.num_queries)

        k_values = [self.k] if isinstance(self.k, int) else self.k
        for k in k_values:
            precision, recall, f1, ndcg = compute_metrics(predictions["input"], predictions["target"], indices, k=k)
            LOGGER.info(
                f"precision@{k} = {precision}\n" f"recall@{k} = {recall}\n" f"f1@{k} = {f1}\n" f"NDCG@{k} = {ndcg}"
            )
            if self.use_mlflow:
                mlflow.log_metric(key=f"precision_at_{k}", value=precision)
                mlflow.log_metric(key=f"recall_at_{k}", value=recall)
                mlflow.log_metric(key=f"f1_at_{k}", value=f1)
                mlflow.log_metric(key=f"ndcg_at_{k}", value=ndcg)
示例#3
0
    def export(self, estimator: tf.estimator.Estimator):
        # Reload summaries and select best step
        LOGGER.info(f"Reloading summaries from {estimator.model_dir}")
        summaries = read_eval_metrics(estimator.eval_dir()).items()
        for step, metrics in sorted(summaries):
            LOGGER.info(f"- {step}: {metrics}")
        sorted_summaries = sorted(summaries, key=lambda t: t[1][self.metric])
        if self.mode == BestMode.INCREASE:
            best_step, best_metrics = sorted_summaries[-1]
        elif self.mode == BestMode.DECREASE:
            best_step, best_metrics = sorted_summaries[0]
        else:
            raise ValueError(f"Mode {self.mode} not recognized.")
        LOGGER.info(f"Best summary at step {best_step}: {best_metrics}")

        # List available checkpoints and select closes to best_step
        checkpoints = Path(estimator.model_dir).glob(_CHEKPOINT_PATTERN)
        checkpoint_steps = [
            int(re.findall(r"-(\d+).index", str(path))[0])
            for path in checkpoints
        ]
        selected_step = sorted(checkpoint_steps,
                               key=lambda step: abs(step - best_step))[0]
        LOGGER.info(f"Selected checkpoint {selected_step}")

        # Change checkpoint information
        with Path(estimator.model_dir, "checkpoint").open("r") as file:
            lines = file.read().split("\n")
            lines[0] = f'model_checkpoint_path: "model.ckpt-{selected_step}"'

        with Path(estimator.model_dir, "checkpoint").open("w") as file:
            file.write("\n".join(lines))

        # Check that change is effective
        global_step = estimator.get_variable_value("global_step")
        if not global_step == selected_step:
            msg = f"Changed checkpoint file to use step {selected_step}, but estimator uses {global_step}"
            raise ValueError(msg)

        # Log to MLFlow
        if self.use_mlflow:
            mlflow.log_metric(key=self.tag, value=global_step)
示例#4
0
    def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
        """Log Steps per second and write summary"""
        if self.skip_after_step is not None and global_step >= self.skip_after_step:
            return

        # Compute steps and number of examples per second
        metrics = {
            "steps_per_sec": elapsed_steps / elapsed_time,
            "examples_per_sec": self.batch_size * elapsed_steps / elapsed_time,
        }

        # Log tensor values
        LOGGER.info(", ".join(f"{tag} = {value:.2f}"
                              for tag, value in metrics.items()))

        # Send to MLFlow and Graphite
        for tag, value in metrics.items():
            if self.use_graphite:
                graphite.log_metric(tag, value, postfix=self.name)
            if self.use_mlflow:
                tag = tag if self.name is None else f"{self.name}_{tag}"
                mlflow.log_metric(tag, value, step=global_step)
示例#5
0
 def run(self):
     LOGGER.info(f"{self.key}: {self.value}")
     if self.use_mlflow:
         mlflow.log_metric(key=self.key, value=self.value)