示例#1
0
 def _build_and_restore_models(self, dataset: tf.data.Dataset):
     restorer = ashpy.restorers.ClassifierRestorer(self._logdir)
     (x, _) = next(iter(dataset.take(1)))
     # Invoke model on sample input
     self._model(x)
     restorer.restore_model(self._model)
     self._deferred_restoration = False
def train(
    epoch: int,
    dataset: tf.data.Dataset,
    model: tf.keras.models.Model,
    loss_fn: LossFunction,
    optimizer: tf.optimizers.Optimizer,
    args: dict,
    metrics: dict,
):
    #for batch_idx, data in enumerate(dataset):
    train_iter = dataset.take(10000 // hvd.size())
    compress = args.get('fp16_allreduce', True)
    log_interval = args.get('log_interval', 10)
    for batch_idx, data in enumerate(train_iter):
        first_batch = (epoch == 0 and batch_idx == 0)
        loss, output = train_step(data,
                                  model,
                                  loss_fn,
                                  optimizer,
                                  first_batch,
                                  compress=compress)
        metrics['train_accuracy'].update_state(data[1], output)

        if batch_idx % log_interval == 0:
            metrics_ = {
                'epoch': epoch,
                'loss': loss,
                'accuracy': metrics['train_accuracy'].result(),
            }
            io.print_metrics(metrics_, pre=f'[{hvd.rank()}] ', logger=logger)
示例#3
0
 def analyse(self,
             dataset: tf.data.Dataset,
             steps: Optional[int] = None,
             verbose: int = 0):
     reverse_label_map = {
         value: key
         for key, value in self.data_container.label_map.items()
     }
     images = []
     label_codes = []
     for image, label_code in dataset.take(steps).unbatch():
         label_codes.append(label_code.numpy())
         images.append(image.numpy())
     labels = [reverse_label_map[label_code] for label_code in label_codes]
     probs = self.learner.model.predict(dataset, steps=steps)
     pred_codes = probs.argmax(axis=1)
     preds = [reverse_label_map[pred_code] for pred_code in pred_codes]
     return pd.DataFrame.from_dict({
         "image":
         images,
         "label":
         labels,
         "label_code":
         label_codes,
         "pred":
         preds,
         "pred_code":
         pred_codes,
         "label_probs":
         probs[:, label_codes][np.eye(len(labels), dtype=bool)],
         "pred_probs":
         probs[:, pred_codes][np.eye(len(pred_codes), dtype=bool)],
     })
示例#4
0
    def _valid_step(self, dataset: tf.data.Dataset, steps_per_epoch: int,
                    progress_bar: ProgressBar, *args, **kwargs) -> Dict:
        """ 验证步

        :param dataset: 验证步的dataset
        :param steps_per_epoch: 验证总步数
        :param progress_bar: 进度管理器
        :return: 返回所得指标字典
        """
        print("验证轮次")
        start_time = time.time()
        self.loss_metric.reset_states()
        self.accuracy_metric.reset_states()
        progress_bar.reset(total=steps_per_epoch, num=self.batch_size)

        for (batch, (inputs, targets,
                     _)) in enumerate(dataset.take(steps_per_epoch)):
            result = self._valid_one_step(inputs=inputs, targets=targets)
            progress_bar(current=batch + 1,
                         metrics=get_dict_string(data=result))

        progress_bar.done(step_time=time.time() - start_time)

        return {
            "valid_loss": self.loss_metric.result(),
            "valid_accuracy": self.accuracy_metric.result()
        }
示例#5
0
文件: gan.py 项目: qooglewb/QBuilder
    def call(self, dataset: tf.data.Dataset):
        r"""
        Perform the adversarial training.

        Args:
            dataset (:py:class:`tf.data.Dataset`): The adversarial training dataset.
        """
        current_epoch = self._current_epoch()

        self._update_global_batch_size(
            dataset, [self._d_loss, self._g_loss, self._e_loss]
        )

        dataset = wrap(
            dataset.unbatch().batch(self._global_batch_size, drop_remainder=True)
        )

        samples = next(iter(dataset.take(1)))
        gen_inputs = samples[1]

        with self._train_summary_writer.as_default():
            self._log("real_x", samples[0][0])
            self._log("real_y", samples[0][1])

            for epoch in tf.range(current_epoch, self._epochs):
                distribute_dataset = self._distribute_strategy.experimental_distribute_dataset(
                    dataset
                )

                for example in distribute_dataset:
                    d_loss, g_loss, e_loss, fake, generator_of_encoder = self._train_step(
                        example
                    )
                    self._global_step.assign_add(1)

                    if tf.equal(tf.math.mod(self._global_step, 10), 0):
                        tf.print(
                            f"[{self._global_step.numpy()}] g_loss: {g_loss} - "
                            f"d_loss: {d_loss} - e_loss: {e_loss}"
                        )
                        self._measure_performance(
                            tf.data.Dataset.from_tensor_slices(example).batch(
                                self._global_batch_size
                            )
                        )

                self._epoch_completed(epoch + 1)
                if self._log_eval_mode == LogEvalMode.TEST:
                    self._log("generator", self._generator(gen_inputs, training=False))

                    self._log(
                        "generator_of_encoder",
                        self._generator(
                            self._encoder(samples[0][0], training=False), training=False
                        ),
                    )
                elif self._log_eval_mode == LogEvalMode.TRAIN:
                    self._log("generator", fake)
                    self._log("generator_of_encoder", generator_of_encoder)
示例#6
0
def get_real_samples(
        dataset: tf.data.Dataset,
        batch_size: int) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
    batch = list(dataset.take(1))[0]
    color_batch = batch[0]
    bw_batch = batch[1]
    y_batch = tf.ones((batch_size, 1))
    return color_batch, bw_batch, y_batch
示例#7
0
 def report(self,
            dataset: tf.data.Dataset,
            steps: Optional[int] = None,
            verbose: int = 0):
     return classification_report(
         [label.numpy() for _, label in dataset.take(steps).unbatch()],
         self.learner.model.predict(dataset, steps=steps).argmax(axis=1),
     )
示例#8
0
文件: module.py 项目: DengBoCong/hlp
def _valid_step(model: tf.keras.Model, dataset: tf.data.Dataset,
                steps_per_epoch: int,
                tokenizer: tf.keras.preprocessing.text.Tokenizer):
    """
    验证模块
    :param model: 模型
    :param dataset: 验证数据dataset
    :param steps_per_epoch: 验证训练步
    :param tokenizer: 分词器
    :return: 损失、wer、ler
    """
    print("验证轮次")
    start_time = time.time()
    total_loss = 0
    aver_wers = 0
    aver_norm_lers = 0

    for (batch, (audio_feature, sentence,
                 length)) in enumerate(dataset.take(steps_per_epoch)):
        batch_start = time.time()

        predictions = model(audio_feature)
        input_length = compute_ctc_input_length(audio_feature.shape[1],
                                                predictions.shape[1],
                                                length[:, 1:])
        loss = tf.keras.backend.ctc_batch_cost(y_true=sentence,
                                               y_pred=predictions,
                                               input_length=input_length,
                                               label_length=length[:, 0:1])
        output = tf.keras.backend.ctc_decode(y_pred=predictions,
                                             greedy=True,
                                             input_length=tf.reshape(
                                                 input_length,
                                                 [input_length.shape[0]]))

        results = tokenizer.sequences_to_texts(output[0][0].numpy())
        sentence = tokenizer.sequences_to_texts(sentence.numpy())

        _, aver_wer = wers(sentence, results)
        _, norm_aver_ler = lers(sentence, results)

        aver_wers += aver_wer
        aver_norm_lers += norm_aver_ler

        loss = tf.reduce_mean(loss)
        total_loss += loss
        print('\r{}/{} [Batch {} Loss {:.4f} {:.1f}s]'.format(
            (batch + 1), steps_per_epoch, batch + 1, loss.numpy(),
            (time.time() - batch_start)),
              end='')
    print(' - {:.0f}s/step - loss: {:.4f} - average_wer:{:.4f} - '
          'average_norm_ler:{:.4f}'.format(
              (time.time() - start_time) / steps_per_epoch,
              total_loss / steps_per_epoch, aver_wers / steps_per_epoch,
              aver_norm_lers / steps_per_epoch))

    return total_loss / steps_per_epoch, aver_wers / steps_per_epoch, aver_norm_lers / steps_per_epoch
示例#9
0
def split_dataset(
    dataset: tf.data.Dataset,
) -> Tuple[tf.data.Dataset, tf.data.Dataset, tf.data.Dataset]:
    train_size, val_size, test_size = get_split_sizes(TOTAL_SAMPLES)
    train = dataset.take(train_size)
    test = dataset.skip(train_size)
    val = test.skip(val_size)
    test = test.take(test_size)
    return train, val, test
示例#10
0
 def _build_and_restore_models(self, dataset: tf.data.Dataset):
     restorer = ashpy.restorers.AdversarialRestorer(self._logdir)
     (x, _), z = next(iter(dataset.take(1)))
     # Invoke model on sample input
     self._generator(z)
     self._discriminator(x)
     restorer.restore_generator(self._generator)
     restorer.restore_discriminator(self._discriminator)
     self._deferred_restoration = False
示例#11
0
文件: module.py 项目: DengBoCong/hlp
def _valid_step(model: tf.keras.Model, dataset: tf.data.Dataset,
                steps_per_epoch: int,
                tokenizer: tf.keras.preprocessing.text.Tokenizer,
                enc_hidden: tf.Tensor, dec_input: tf.Tensor):
    """
    验证模块
    :param model: 模型
    :param dataset: 验证数据dataset
    :param steps_per_epoch: 验证训练步
    :param tokenizer: 分词器
    :param enc_hidden: encoder初始化隐藏层
    :param dec_input: 解码器输入
    :return: 损失、wer、ler
    """
    print("验证轮次")
    start_time = time.time()
    total_loss = 0
    aver_wers = 0
    aver_norm_lers = 0

    for (batch, (audio_feature, sentence,
                 length)) in enumerate(dataset.take(steps_per_epoch)):
        loss = 0
        batch_start = time.time()
        result = dec_input

        for t in range(1, sentence.shape[1]):
            dec_input = dec_input[:, -1:]
            predictions, _ = model(audio_feature, enc_hidden, dec_input)
            loss += loss_func_mask(sentence[:, t], predictions)
            predictions = tf.argmax(predictions, axis=-1)

            dec_input = tf.expand_dims(predictions, axis=-1)
            result = tf.concat([result, dec_input], axis=-1)

        batch_loss = (loss / int(sentence.shape[0]))
        results = tokenizer.sequences_to_texts(result.numpy())
        sentence = tokenizer.sequences_to_texts(sentence.numpy())

        _, aver_wer = wers(sentence, results)
        _, norm_aver_ler = lers(sentence, results)

        aver_wers += aver_wer
        aver_norm_lers += norm_aver_ler

        total_loss += batch_loss
        print('\r{}/{} [Batch {} Loss {:.4f} {:.1f}s]'.format(
            (batch + 1), steps_per_epoch, batch + 1, batch_loss.numpy(),
            (time.time() - batch_start)),
              end='')
    print(' - {:.0f}s/step - loss: {:.4f} - average_wer:{:.4f} - '
          'average_norm_ler:{:.4f}'.format(
              (time.time() - start_time) / steps_per_epoch,
              total_loss / steps_per_epoch, aver_wers / steps_per_epoch,
              aver_norm_lers / steps_per_epoch))

    return total_loss / steps_per_epoch, aver_wers / steps_per_epoch, aver_norm_lers / steps_per_epoch
示例#12
0
    def evaluate_tflite(self,
                        tflite_filepath: str,
                        dataset: tf.data.Dataset,
                        steps: int,
                        json_file: Optional[str] = None) -> Dict[str, float]:
        """Evaluate the EfficientDet TFLite model.

    Args:
      tflite_filepath: File path to the TFLite model.
      dataset: tf.data.Dataset used for evaluation.
      steps: Number of steps to evaluate the model.
      json_file: JSON with COCO data format containing golden bounding boxes.
        Used for validation. If None, use the ground truth from the dataloader.
        Refer to
        https://towardsdatascience.com/coco-data-format-for-object-detection-a4c5eaf518c5
          for the description of COCO data format.

    Returns:
      A dict contains AP metrics.
    """
        # TODO(b/182441458): Use the task library for evaluation instead once it
        # supports python interface.
        evaluator, label_map = self._get_evaluator_and_label_map(json_file)
        dataset = dataset.take(steps)

        lite_runner = eval_tflite.LiteRunner(tflite_filepath,
                                             only_network=False)
        progbar = tf.keras.utils.Progbar(steps)
        for i, (images, labels) in enumerate(dataset):
            # Get the output result after post-processing NMS op.
            nms_boxes, nms_classes, nms_scores, _ = lite_runner.run(images)

            # CLASS_OFFSET is used since label_id for `background` is 0 in label_map
            # while it's not actually included the model. We don't need to add the
            # offset in the Android application.
            nms_classes += postprocess.CLASS_OFFSET

            height, width = utils.parse_image_size(self.config.image_size)
            normalize_factor = tf.constant([height, width, height, width],
                                           dtype=tf.float32)
            nms_boxes *= normalize_factor
            if labels['image_scales'] is not None:
                scales = tf.expand_dims(
                    tf.expand_dims(labels['image_scales'], -1), -1)
                nms_boxes = nms_boxes * tf.cast(scales, nms_boxes.dtype)
            detections = postprocess.generate_detections_from_nms_output(
                nms_boxes, nms_classes, nms_scores, labels['source_ids'])

            detections = postprocess.transform_detections(detections)
            evaluator.update_state(labels['groundtruth_data'].numpy(),
                                   detections.numpy())
            progbar.update(i + 1)
        print()

        metric_dict = self._get_metric_dict(evaluator, label_map)
        return metric_dict
示例#13
0
def _train(dataset: tf.data.Dataset, image_shape: Tuple[int, int, int],
           epochs: int) -> tf.keras.Model:
    """学習処理を指定エポック数実行する

    Args:
        dataset (tf.data.Dataset): 学習データセット
        image_shape (Tuple[int, int, int]): 学習画像一枚当たりのサイズ
        epochs (int): 学習するエポック数

    Returns:
        tf.keras.Model: 学習したモデル
    """
    input_shape = image_shape[0] * image_shape[1] * image_shape[2]
    output_base = pathlib.Path("data/dense_ae")
    history_filepath = output_base.joinpath("history.pkl")
    history_imagepath = output_base.joinpath("history.png")
    reconstruct_filepath = output_base.joinpath("reconstruct.png")

    model = network.Autoencoder(input_shape)
    optimizer = tf.keras.optimizers.Adam(1e-4)
    loss = tf.keras.losses.mean_squared_error

    checkpoint = Checkpoint(
        save_dir=str(output_base.joinpath("ckpts")),
        max_to_keep=3,
        restore=True,
        model=model,
        optimizer=optimizer,
    )
    epoch_history = history.restore(history_filepath)
    input_example = [data for data in dataset.take(1)][-1]
    progress_bar = tqdm(range(checkpoint.save_counter(), epochs))
    for epoch in progress_bar:
        # learning
        batch_history = history.Batch()
        for batch in dataset:
            model.train_step(batch, loss, optimizer, batch_history)

        # save results
        checkpoint.save()
        batch_history.result()
        epoch_history.result(batch_history)
        history.save(epoch_history, history_filepath)

        # show results
        progress_bar.set_description(
            f"epoch: {epoch}, {epoch_history.get_latest()}")
        history.show_image(epoch_history, filepath=history_imagepath)
        visualize.show_images(
            input_example,
            network.reconstruct(model, input_example),
            image_shape,
            reconstruct_filepath,
        )

    return model
def plot_predicted_images(y_pred: np.ndarray, test_dataset: tf.data.Dataset) -> None:
    batch = test_dataset.take(1)
    images, _ = batch.as_numpy_iterator().next()
    for i in range(10):
        if y_pred[i][0] > 0.5:
            print("I am {a:.2%} sure I am Cat".format(a=y_pred[i][0]))
        else:
            print("I am {a:.2%} sure I am Dog".format(a=(1-y_pred[i][0])))
        plt.imshow(images[i])
        plt.show()
示例#15
0
    def _build_and_restore_models(self, dataset: tf.data.Dataset):
        restorer = ashpy.restorers.AdversarialEncoderRestorer(self._logdir)
        (x, _), _ = next(iter(dataset.take(1)))

        # Invoke model on sample input
        self._encoder(x)
        restorer.restore_encoder(self._encoder)

        super()._build_and_restore_models(dataset)
        self._deferred_restoration = False
示例#16
0
    def _valid_step(self, dataset: tf.data.Dataset, steps_per_epoch: int,
                    progress_bar: ProgressBar, *args, **kwargs) -> Dict:
        """ 验证步

        :param dataset: 验证步的dataset
        :param valid_loss: 损失计算器
        :param steps_per_epoch: 验证总步数
        :param batch_size: batch大小
        :param valid_accuracy: 精度计算器
        :return: 返回所得指标字典
        """
        print("验证轮次")
        start_time = time.time()
        self.loss_metric.reset_states()
        self.accuracy_metric.reset_states()
        progress_bar = ProgressBar(total=steps_per_epoch, num=self.batch_size)

        scores = tf.constant([], dtype=self.model.dtype)
        labels = tf.constant([], dtype=self.model.dtype)
        for (batch, (utterances, responses,
                     label)) in enumerate(dataset.take(steps_per_epoch)):
            score = self._valid_ont_step(utterances=utterances,
                                         responses=responses,
                                         label=label)
            scores = tf.concat(values=[scores, score[:, 1]], axis=0)
            labels = tf.concat(
                values=[labels,
                        tf.cast(x=label, dtype=self.model.dtype)],
                axis=0)

            progress_bar(
                current=batch + 1,
                metrics="- train_loss: {:.4f} - train_accuracy: {:.4f}".format(
                    self.loss_metric.result(), self.accuracy_metric.result()))

        rn_k = recall_at_position_k_in_n(
            labels=[scores.numpy(), labels.numpy()],
            k=[1, 2, 5],
            n=10,
            tar=1.0)
        message = {
            "train_loss": self.loss_metric.result(),
            "train_accuracy": self.accuracy_metric.result(),
            "valid_R10@1": rn_k[0],
            "valid_R10@2": rn_k[1],
            "valid_R10@5": rn_k[2]
        }

        progress_bar(current=steps_per_epoch,
                     metrics=get_dict_string(data=message))
        progress_bar.done(step_time=time.time() - start_time)

        return message
示例#17
0
def make_submission(model,
                    image_ds: tf.data.Dataset,
                    filename: str = "submission.csv"):

    f = open(filename, "w")
    f.write("image_id,label\n")

    for image, image_id in image_ds.take(-1):
        pred = tf.argmax(model(image), axis=-1)
        f.write(f"{image_id.numpy().decode('utf-8')},{pred[0]}\n")

    f.close()
示例#18
0
def _valid_step(model: tf.keras.Model,
                dataset: tf.data.Dataset,
                progress_bar: ProgressBar,
                batch_size: Any,
                loss_metric: tf.keras.metrics.Mean,
                max_train_steps: Any = -1) -> Dict:
    """ 验证步

    :param model: 验证模型
    :param dataset: 验证数据集
    :param progress_bar: 进度管理器
    :param batch_size: batch大小
    :param loss_metric: 损失计算器
    :param max_train_steps: 验证步数
    :return: 验证指标
    """
    print("验证轮次")
    start_time = time.time()
    loss_metric.reset_states()
    result, targets = tf.convert_to_tensor(
        [], dtype=tf.float32), tf.convert_to_tensor([], dtype=tf.int32)

    for (batch, (queries, _, true_outputs,
                 labels)) in enumerate(dataset.take(max_train_steps)):
        outputs = model(inputs=queries)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(
            reduction=tf.keras.losses.Reduction.NONE)(true_outputs, outputs)
        mask = tf.cast(x=tf.math.not_equal(true_outputs, 0), dtype=tf.float32)
        batch_loss = tf.reduce_sum(mask * loss) / batch_size

        loss_metric(batch_loss)

        result = tf.concat(
            [result,
             tf.nn.softmax(logits=outputs[:, 0, 5:7], axis=-1)[:, 1]],
            axis=0)
        targets = tf.concat([targets, labels], axis=0)

        progress_bar(
            current=batch + 1,
            metrics=get_dict_string(data={"valid_loss": loss_metric.result()}))

    auc_score = roc_auc_score(y_true=targets, y_score=result)
    progress_bar(current=progress_bar.total,
                 metrics=get_dict_string(data={
                     "valid_loss": loss_metric.result(),
                     "valid_auc": auc_score
                 }))

    progress_bar.done(step_time=time.time() - start_time)

    return {"valid_loss": loss_metric.result(), "valid_auc": auc_score}
    def adapt(self, ds: tf.data.Dataset) -> None:
        """
        Compute cross-sample statistics. Assumes that ``ds`` is a batched dataset.

        Args:
            ds: Instance of ``tf.data.Dataset`` containing train data to compute statistics from.

        Raises:
            RuntimeError: Raised when axes are set to SAMPLE_WISE, in which case there is no point in
                calling adapt.
        """
        if self.axes == NormalizeAxes.SAMPLE_WISE:
            raise RuntimeError("No point in adapting when axes are SAMPLE_WISE")

        axis: Optional[int]
        if self.axes == NormalizeAxes.VARIABLE_WISE:
            axis = 0
            shape = tf.shape(next(ds.take(1).as_numpy_iterator())[0])
        else:
            axis = None
            shape = ()
        mean_x = tf.zeros(shape)
        mean_x2 = tf.zeros(shape)
        count = tf.constant(0.0)

        for x in ds:
            elem_mean_x = tf.reduce_mean(tf.cast(x, tf.float32), axis=axis)
            elem_mean_x2 = tf.reduce_mean(tf.square(tf.cast(x, tf.float32)), axis=axis)
            elem_count = tf.cast(tf.size(x) / tf.size(elem_mean_x), tf.float32)
            new_count = count + elem_count
            mean_x = mean_x * (count / new_count) + elem_mean_x * (
                elem_count / new_count
            )
            mean_x2 = mean_x2 * (count / new_count) + elem_mean_x2 * (
                elem_count / new_count
            )
            count = new_count

        self.mean = self.add_weight(
            "mean",
            initializer=tf.keras.initializers.Constant(mean_x),
            trainable=False,
            shape=shape,
        )
        self.stddev = self.add_weight(
            "stddev",
            initializer=tf.keras.initializers.Constant(
                tf.sqrt(mean_x2 - tf.square(mean_x))
            ),
            shape=shape,
            trainable=False,
        )
示例#20
0
def split_dataset(ds: tf.data.Dataset):
    val_num = 500
    test_num = 500

    train_dataset = ds.skip(val_num + test_num)

    test_val_ds = ds.take(val_num + test_num)

    test_dataset = test_val_ds.take(test_num)
    val_dataset = test_val_ds.skip(test_num)

    print("dataset splitted")
    return train_dataset, test_dataset, val_dataset