示例#1
0
    def evaluate_tflite(self,
                        tflite_filepath: str,
                        dataset: tf.data.Dataset,
                        steps: int,
                        json_file: str = None) -> Dict[str, float]:
        """Evaluate the EfficientDet TFLite model.

    Args:
      tflite_filepath: File path to the TFLite model.
      dataset: tf.data.Dataset used for evaluation.
      steps: Number of steps to evaluate the model.
      json_file: JSON with COCO data format containing golden bounding boxes.
        Used for validation. If None, use the ground truth from the dataloader.
        Refer to
        https://towardsdatascience.com/coco-data-format-for-object-detection-a4c5eaf518c5
          for the description of COCO data format.

    Returns:
      A dict contains AP metrics.
    """
        # TODO(b/182441458): Use the task library for evaluation instead once it
        # supports python interface.
        evaluator, label_map = self._get_evaluator_and_label_map(json_file)
        dataset = dataset.take(steps)

        lite_runner = eval_tflite.LiteRunner(tflite_filepath,
                                             only_network=False)
        progbar = tf.keras.utils.Progbar(steps)
        for i, (images, labels) in enumerate(dataset):
            # Get the output result after post-processing NMS op.
            nms_boxes, nms_classes, nms_scores, _ = lite_runner.run(images)

            # CLASS_OFFSET is used since label_id for `background` is 0 in label_map
            # while it's not actually included the model. We don't need to add the
            # offset in the Android application.
            nms_classes += postprocess.CLASS_OFFSET

            height, width = utils.parse_image_size(self.config.image_size)
            normalize_factor = tf.constant([height, width, height, width],
                                           dtype=tf.float32)
            nms_boxes *= normalize_factor
            if labels['image_scales'] is not None:
                scales = tf.expand_dims(
                    tf.expand_dims(labels['image_scales'], -1), -1)
                nms_boxes = nms_boxes * tf.cast(scales, nms_boxes.dtype)
            detections = postprocess.generate_detections_from_nms_output(
                nms_boxes, nms_classes, nms_scores, labels['source_ids'])

            detections = postprocess.transform_detections(detections)
            evaluator.update_state(labels['groundtruth_data'].numpy(),
                                   detections.numpy())
            progbar.update(i)

        metric_dict = self._get_metric_dict(evaluator, label_map)
        return metric_dict
示例#2
0
def _train(dataset: tf.data.Dataset, image_shape: Tuple[int, int, int],
           epochs: int) -> tf.keras.Model:
    """学習処理を指定エポック数実行する

    Args:
        dataset (tf.data.Dataset): 学習データセット
        image_shape (Tuple[int, int, int]): 学習画像一枚当たりのサイズ
        epochs (int): 学習するエポック数

    Returns:
        tf.keras.Model: 学習したモデル
    """
    output_base = pathlib.Path("data/simple_cae")
    history_filepath = output_base.joinpath("history.pkl")
    history_imagepath = output_base.joinpath("history.png")
    reconstruct_filepath = output_base.joinpath("reconstruct.png")

    model = network.Autoencoder(image_shape)
    optimizer = tf.keras.optimizers.Adam(1e-4)
    loss = tf.keras.losses.mean_squared_error

    checkpoint = Checkpoint(
        save_dir=str(output_base.joinpath("ckpts")),
        max_to_keep=3,
        restore=True,
        model=model,
        optimizer=optimizer,
    )
    epoch_history = history.restore(history_filepath)
    input_example = [data for data in dataset.take(1)][-1]
    progress_bar = tqdm(range(checkpoint.save_counter(), epochs))
    for epoch in progress_bar:
        # learning
        batch_history = history.Batch()
        for batch in dataset:
            model.train_step(batch, loss, optimizer, batch_history)

        # save results
        checkpoint.save()
        batch_history.result()
        epoch_history.result(batch_history)
        history.save(epoch_history, history_filepath)

        # show results
        progress_bar.set_description(
            f"epoch: {epoch}, {epoch_history.get_latest()}")
        history.show_image(epoch_history, filepath=history_imagepath)
        visualize.show_images(
            input_example,
            network.reconstruct(model, input_example),
            image_shape,
            reconstruct_filepath,
        )

    return model
def convert_snippets_to_character_sequence_examples(
    dataset: tf.data.Dataset,
    batch_size: int,
    epochs: int,
    shuffle_buffer_size: int = 50,
    sequence_length: int = SEQUENCE_LENGTH,
    max_batches_per_client: int = -1) -> tf.data.Dataset:
  """Convert a dataset of string snippets to a dataset of input/output character ID sequences.

  Args:
    dataset: the `tf.data.Dataset` to apply preprocessing to.
    batch_size: the number of examples per yielded batch
    epochs: the number of times to repeat the dataset in one epoch.
    shuffle_buffer_size: Buffer size for shuffling the dataset. If nonpositive,
      no shuffling occurs.
    sequence_length: the length of each example in the batch.
    max_batches_per_client: If set to a positive integer, the maximum number of
      batches in each client's dataset.

  Returns:
    A `tf.data.Dataset` yielding `(sequence of character IDs, sequence of
    character IDs)` where each sequence has `sequence_length` values.
  """
  to_tokens = _build_tokenize_fn(split_length=sequence_length + 1)
  dataset = dataset.repeat(epochs)
  if shuffle_buffer_size > 0:
    dataset = dataset.shuffle(shuffle_buffer_size)
  return (
      # Convert snippets to int64 tokens and pad.
      dataset.map(to_tokens, num_parallel_calls=tf.data.experimental.AUTOTUNE)
      # Separate into individual tokens
      .unbatch()
      # Join into sequences of the desired length. The previous call of
      # map(to_ids,...) ensures that the collection of tokens has length
      # divisible by sequence_length + 1, so no batch dropping is expected.
      .batch(sequence_length + 1, drop_remainder=True)
      # Batch sequences together for mini-batching purposes.
      .batch(batch_size)
      # Convert batches into training examples.
      .map(_split_target, num_parallel_calls=tf.data.experimental.AUTOTUNE)
      # Take a maximum number of batches
      .take(max_batches_per_client))
示例#4
0
def processing(dataset: tf.data.Dataset, window_size, batch_size):
    dataset = dataset.map(lambda x: table.lookup(x))
    dataset = dataset.unbatch()
    dataset = dataset.window(window_size+1, shift = 1, drop_remainder=True)
    dataset = dataset.flat_map(lambda ds: ds.batch(window_size+1))
    dataset = dataset.map(lambda x: (x[:-1], x[-1]-1))
    dataset = dataset.shuffle(10000)
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset
def compute_predictions(
    model: PredictionModel, dataset: tf.data.Dataset,
    strategy: tf.distribute.Strategy, batch_size: int
) -> Iterator[Tuple[types.ModelPredictions, types.Features]]:
  """Yield the predictions of the model on the given dataset.

  Args:
    model: A function that takes tensor-valued features and returns a vector of
      predictions.
    dataset: The dataset that the function consumes to produce the predictions.
    strategy: The distribution strategy to use when computing.
    batch_size: The batch size that should be used.

  Yields:
    Pairs of model predictions and the corresponding metadata.
  """
  with strategy.scope():
    dataset = dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    options = tf.data.Options()
    options.experimental_distribute.auto_shard_policy = (
        tf.data.experimental.AutoShardPolicy.DATA)
    dataset = dataset.with_options(options)

  for features in strategy.experimental_distribute_dataset(dataset):
    time_start = time.time()
    if isinstance(strategy, tf.distribute.experimental.TPUStrategy):
      # TODO(josipd): Figure this out better. We can't easily filter,
      #               as they are PerReplica values, not tensors.
      features_model = {"image": features["image"]}
    else:
      features_model = features
    predictions = materialize(strategy,
                              strategy.run(model, args=(features_model,)))
    time_end = time.time()
    time_delta_per_example = (time_end - time_start) / predictions.shape[0]
    metadatas = materialize(strategy, features["metadata"])
    for i in range(predictions.shape[0]):
      model_predictions = types.ModelPredictions(
          predictions=[predictions[i]],
          time_in_s=time_delta_per_example)
      metadata_i = _slice_dictionary(metadatas, i)
      yield model_predictions, metadata_i
    def __init__(self, factory: TFToxicDataSetsFactory,
                 dataset: tf.data.Dataset, size: int):
        assert isinstance(factory, TFToxicDataSetsFactory) and isinstance(
            dataset, tf.data.Dataset)
        assert size > 0

        self._factory = factory
        self._dataset = dataset.shuffle(1000).batch(
            self.batch_size).prefetch(1)
        self._size = size
        self._batch_index = 0
    def preprocess(self, dataset: tf.data.Dataset) -> tf.data.Dataset:
        """Applies the preprocessing to the inputs and the targets."""
        def preprocess(target_ghi_dummy, metadata, image, target_ghi):
            image = self.scaling_image.normalize(image)
            metadata = self.scaling_ghi.normalize(metadata)
            target_ghi = self.scaling_ghi.normalize(target_ghi)

            return target_ghi_dummy, metadata, image, target_ghi

        return dataset.map(preprocess,
                           num_parallel_calls=tf.data.experimental.AUTOTUNE)
示例#8
0
文件: general.py 项目: rlinus/sleap
    def transform_dataset(self, input_ds: tf.data.Dataset) -> tf.data.Dataset:
        """Create a dataset that contains filtered data."""
        def filter_keys(example):
            """Local processing function for dataset mapping."""
            return {key: example[key] for key in self.keep_keys}

        # Map the main processing function to each example.
        output_ds = input_ds.map(
            filter_keys, num_parallel_calls=tf.data.experimental.AUTOTUNE)

        return output_ds
示例#9
0
def pretext_dataset(dataset:tf.data.Dataset, start_label:int)->tf.data.Dataset:
    filtered = dataset.filter(lambda data:data['label'] >= start_label)

    def supervised_transform(data):
        image = data['image']
        image = tf.cast(image, tf.float32)
        image = image / 255.0


    def random_transform(image):
        pass
示例#10
0
def _add_parsing(dataset: tf.data.Dataset) -> tf.data.Dataset:
    def _parse_example_bytes(serialized_proto_tensor):
        field_dict = {
            'snippets': tf.io.FixedLenFeature(shape=(), dtype=tf.string)
        }
        parsed_fields = tf.io.parse_example(serialized_proto_tensor,
                                            field_dict)
        return collections.OrderedDict(snippets=parsed_fields['snippets'])

    return dataset.map(_parse_example_bytes,
                       num_parallel_calls=tf.data.AUTOTUNE)
示例#11
0
def wrap_detection_dataset(ds: tf.data.Dataset, im_size: Tuple[int, int],
                           num_classes: int) -> tf.data.Dataset:

    anchors = _generate_anchors(config.AnchorsConfig(), im_size[0])

    # Wrap datasets so they return the anchors labels
    dataset_training_head_fn = functools.partial(_compute_gt,
                                                 anchors=anchors,
                                                 num_classes=num_classes)

    return ds.map(dataset_training_head_fn)
def plot_predicted_images(y_pred: np.ndarray,
                          test_dataset: tf.data.Dataset) -> None:
    batch = test_dataset.take(1)
    images, _ = batch.as_numpy_iterator().next()
    for i in range(10):
        if y_pred[i][0] > 0.5:
            print("I am {a:.2%} sure I am Cat".format(a=y_pred[i][0]))
        else:
            print("I am {a:.2%} sure I am Dog".format(a=(1 - y_pred[i][0])))
        plt.imshow(images[i])
        plt.show()
示例#13
0
def to_batch_dataset(dataset: tf.data.Dataset, batchsize: int = 100, drop_remainder: bool = False):
    """
    Function for converting from tf.data.Dataset type output by the `from_generator` function to a `BatchDataset`

    :param dataset: Tensorflow dataset generated from the use of `from_generator` Tensorflow function
    :param batchsize: The number of data records to be included in the batches for training
    :param drop_remainder: Boolean for determining whether or not data samples that dont fit in the specified batches
    should be dropped or not
    :return:
    """
    return dataset.batch(batchsize, drop_remainder)
def split_dataset(dataset: tf.data.Dataset, val_split: float,
                  test_split: float):
    # Splits a dataset of type tf.data.Dataset into a training and test dataset using given ratio. Fractions are
    #   rounded up to two decimal places.
    # Input:
    #       dataset: the input dataset to split.
    #       val_split: the fraction of val data as a float between 0 and 1.
    #       test_split: the fraction of the test data as a float between 0 and 1.
    # Return:
    #       a tuple of two tf.data.Datasets as (training, test)
    # Source: https://stackoverflow.com/questions/59669413/what-is-the-canonical-way-to-split-tf-dataset-into-test-and-validation-subsets

    test_data_percent = round(test_split * 100)
    if not (0 <= test_data_percent <= 100):
        raise ValueError("test data fraction must be ∈ [0,1]")

    val_data_percent = round(val_split * 100)
    if not (0 <= val_data_percent <= 100):
        raise ValueError("val data fraction must be ∈ [0,1]")

    dataset = dataset.enumerate()
    train_val_dataset = dataset.filter(
        lambda f, data: f % 100 > test_data_percent)
    test_dataset = dataset.filter(lambda f, data: f % 100 <= test_data_percent)

    # remove enumeration
    train_val_dataset = train_val_dataset.map(lambda f, data: data)
    test_dataset = test_dataset.map(lambda f, data: data)

    # add validation from training
    train_val_dataset = train_val_dataset.enumerate()
    train_dataset = train_val_dataset.filter(
        lambda f, data: f % 100 > val_data_percent)
    val_dataset = train_val_dataset.filter(
        lambda f, data: f % 100 <= val_data_percent)

    # remove enumeration
    train_dataset = train_dataset.map(lambda f, data: data)
    val_dataset = val_dataset.map(lambda f, data: data)

    return train_dataset, val_dataset, test_dataset
示例#15
0
def get_top_tokens(corpus: tf.data.Dataset,
                   n_top: int = 1000) -> Tuple[dict, int, int]:
    """
    Builds the token mapping which is used to initialize the word embeddings in the model.
    Get the most frequent terms which appear in the training corpus.

    Parameters
    ----------
    corpus : tf.data.Dataset
        Entire dataset object
    n_top : int, optional
        Number of most frequent vocab terms to keep for training, by default 1000

    Returns
    -------
    (dict, int, int)
        (token->integer lookup, maximum sequence length, size of data set)
    """

    lookup = Counter()
    max_sequence_length, data_set_size = 0, 0

    corpus = corpus.map(lambda x: tf.strings.split(x, sep=''),
                        num_parallel_calls=tf.data.experimental.AUTOTUNE)
    for tokens_list in corpus.apply(
            tf.data.experimental.dense_to_ragged_batch(32)).prefetch(5):
        lookup.update(tokens_list.flat_values.numpy())

        max_batch_seq_len = int(tokens_list.row_lengths().numpy().max())
        if max_batch_seq_len > max_sequence_length:
            max_sequence_length = max_batch_seq_len
        data_set_size += int(tokens_list.nrows())

    # tensorflow converts strings to bytes, let's maintain that (no decoding)
    hash_map = {
        key: idx + 2
        for idx, (key, value) in enumerate(lookup.most_common(n_top))
    }
    hash_map["<s>".encode('utf8')] = 0
    hash_map["</s>".encode('utf8')] = 1
    return hash_map, max_sequence_length, data_set_size
示例#16
0
    def get_test_tfdataset(self,
                           test_dataset: tf.data.Dataset) -> tf.data.Dataset:
        """
        Returns a test :class:`~tf.data.Dataset`.

        Args:
            test_dataset (:class:`~tf.data.Dataset`): The dataset to use.
        """
        ds = test_dataset.batch(self.args.eval_batch_size,
                                drop_remainder=self.args.dataloader_drop_last)

        return self.args.strategy.experimental_distribute_dataset(ds)
示例#17
0
  def _ApplyDecoderToDataset(
      self, dataset: tf.data.Dataset) -> tf.data.Dataset:
    decoder = tf_graph_record_decoder.load_decoder(self._saved_decoder_path)

    def _ParseFn(record):
      tensors_dict = decoder.decode_record(record)
      return {
          k: v
          for k, v in tensors_dict.items()
          if k in self.TensorRepresentations()
      }
    return dataset.map(_ParseFn)
示例#18
0
 def get_label(max_k: tf.data.Dataset, labels: List[str]) -> str:
     # to bytes: https://stackoverflow.com/questions/6269765/what-does-the-b-character-do-in-front-of-a-string-literal
     cats: List[bytes] = list(map(lambda s: s.encode("UTF-8"), labels))
     values, labels = max_k.as_numpy_iterator().next()
     out = (np.zeros(len(cats)), np.array(cats))
     # effectively reducing and leveraging the input category list
     for i, v in enumerate(values):
         out[0][np.argwhere(
             out[1] == labels[i]
         )] += v  # weights decline with distance because they have been inverted
     # returning the label with the highest aggregate, distance discounted weight
     return out[1][np.argmax(out[0])]
示例#19
0
def pack_as_supervised_ds(
    ds: tf.data.Dataset,
    ds_info: DatasetInfo,
) -> tf.data.Dataset:
    """Pack `(input, label)` dataset as `{'key0': input, 'key1': label}`."""
    if (ds_info.supervised_keys and isinstance(ds.element_spec, tuple)
            and len(ds.element_spec) == 2):
        x_key, y_key = ds_info.supervised_keys
        ds = ds.map(lambda x, y: {x_key: x, y_key: y})
        return ds
    else:  # If dataset isn't a supervised tuple (input, label), return as-is
        return ds
示例#20
0
def _prepare_ds(
    ds: tf.data.Dataset,
    img_shape: Tuple[Optional[int], Optional[int], Optional[int]],
    batch_size: int = 8,
):
    def prepare_img(image, label):
        size = list(img_shape)[:2]
        return tf.image.resize(image, size), label

    return (ds.map(
        prepare_img,
        num_parallel_calls=AUTOTUNE).batch(batch_size).prefetch(AUTOTUNE))
def get_normalization_layer(name: str, ds: tf.data.Dataset, weighted=False):
    """Function creates a normalization layer for the specified numeric feature.
    :param name: Name of the numeric column (feature)
    :param ds: Tensorflow Dataset object containing x and y values
    :param weighted: Boolean argument specifying if the dataset contains sample weights
    :return: Normalization layer adapted to the feature scale
    """
    # Normalization layer for the feature
    normalizer = tf.keras.layers.experimental.preprocessing.Normalization(
        axis=None)

    # Dataset that only yields specified feature
    if weighted:
        feature_ds = ds.map(lambda x, y, w: x[name])
    else:
        feature_ds = ds.map(lambda x, y: x[name])

    # Adapt the layer to the data scale
    normalizer.adapt(feature_ds)

    return normalizer
示例#22
0
def make_submission(model,
                    image_ds: tf.data.Dataset,
                    filename: str = "submission.csv"):

    f = open(filename, "w")
    f.write("image_id,label\n")

    for image, image_id in image_ds.take(-1):
        pred = tf.argmax(model(image), axis=-1)
        f.write(f"{image_id.numpy().decode('utf-8')},{pred[0]}\n")

    f.close()
示例#23
0
def _prepare_train_dataset(dataset: tf.data.Dataset,
                           batch_size,
                           cache_path='',
                           shuffle_buffer_size=1000):
    if cache_path != '':
        cache_filename = 'dataset_train.tfcache'
        dataset = dataset.cache(
            os.path.join(opt.data_path, cache_path, cache_filename))
        # dataset = dataset.cache(''.join([cache_path, '/', cache_filename]))

    dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)

    # repeat forever
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size=batch_size)

    # `prefetch` lets the dataset fetch batches in the background
    # while the model is training.
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)

    return dataset
示例#24
0
    def preprocess(self, dataset: tf.data.Dataset) -> tf.data.Dataset:
        """Encode images and return it as input and target."""

        def encoder(images):
            return self.encoder(images, training=False)

        def preprocess(images):
            images = self.scaling_image.normalize(images)
            image_features = tf.py_function(func=encoder, inp=[images], Tout=tf.float32)
            return (image_features[0:-1], image_features[1:])

        return dataset.map(preprocess)
示例#25
0
def prepare_dataset(
    ds: tf.data.Dataset,
    batch_size: int,
    shuffle: bool = False,
    drop_remainder: bool = False,
):
    size_of_dataset = ds.reduce(0, lambda x, _: x + 1).numpy()
    if shuffle:
        ds = ds.shuffle(buffer_size=size_of_dataset, seed=SEED)
    ds: tf.data.Dataset = ds.batch(batch_size, drop_remainder=drop_remainder)

    @tf.function
    def prepare_data(features):
        image = tf.cast(features["image"], tf.float32)
        bs = tf.shape(image)[0]
        image = tf.reshape(image / 255.0, (bs, -1))
        return image, features["label"]

    autotune = tf.data.experimental.AUTOTUNE
    ds = ds.map(prepare_data, num_parallel_calls=autotune).prefetch(autotune)
    return ds
示例#26
0
    def batch(self, dataset: tf.data.Dataset) -> tf.data.Dataset:
        bounds = list(range(self.hist_min, self.hist_max, self.hist_step))

        logging.info("Quantile bucketing from %d-%d with %d buckets" %
                     (bounds[0], bounds[-1], len(bounds)))

        return dataset.apply(
            ops.bucket_by_quantiles(
                len_fn=lambda x: tf.shape(x[PREMISE_KEY])[0],
                batch_size=self.batch_size,
                n_buckets=self.n_buckets,
                hist_bounds=bounds))
示例#27
0
    def _valid_step(self, dataset: tf.data.Dataset, steps_per_epoch: int,
                    progress_bar: ProgressBar, *args, **kwargs) -> Dict:
        """ 验证步

        :param dataset: 验证步的dataset
        :param valid_loss: 损失计算器
        :param steps_per_epoch: 验证总步数
        :param batch_size: batch大小
        :param valid_accuracy: 精度计算器
        :return: 返回所得指标字典
        """
        print("验证轮次")
        start_time = time.time()
        self.loss_metric.reset_states()
        self.accuracy_metric.reset_states()
        progress_bar = ProgressBar(total=steps_per_epoch, num=self.batch_size)

        scores = tf.constant([], dtype=self.model.dtype)
        labels = tf.constant([], dtype=self.model.dtype)
        for (batch, (utterances, responses,
                     label)) in enumerate(dataset.take(steps_per_epoch)):
            score = self._valid_ont_step(utterances=utterances,
                                         responses=responses,
                                         label=label)
            scores = tf.concat(values=[scores, score[:, 1]], axis=0)
            labels = tf.concat(
                values=[labels,
                        tf.cast(x=label, dtype=self.model.dtype)],
                axis=0)

            progress_bar(
                current=batch + 1,
                metrics="- train_loss: {:.4f} - train_accuracy: {:.4f}".format(
                    self.loss_metric.result(), self.accuracy_metric.result()))

        rn_k = recall_at_position_k_in_n(
            labels=[scores.numpy(), labels.numpy()],
            k=[1, 2, 5],
            n=10,
            tar=1.0)
        message = {
            "train_loss": self.loss_metric.result(),
            "train_accuracy": self.accuracy_metric.result(),
            "valid_R10@1": rn_k[0],
            "valid_R10@2": rn_k[1],
            "valid_R10@5": rn_k[2]
        }

        progress_bar(current=steps_per_epoch,
                     metrics=get_dict_string(data=message))
        progress_bar.done(step_time=time.time() - start_time)

        return message
示例#28
0
def supervised_dataset(dataset:tf.data.Dataset, max_label:int)->tf.data.Dataset:
    filtered = dataset.filter(lambda data:data['label'] < max_label)

    def supervised_transform(data):
        image = data['image']
        image = tf.cast(image, tf.float32)
        image = image / 255.0
        label = data['label']
        label = tf.one_hot(label, max_label)
        return image, label

    return filtered.map(supervised_transform, num_parallel_calls=tf.data.experimental.AUTOTUNE)
示例#29
0
    def transform_dataset(self, input_ds: tf.data.Dataset) -> tf.data.Dataset:
        """Create a dataset that contains instance cropped data."""
        keys_to_expand = ["scale", "video_ind", "frame_ind"]

        if self.other_keys_to_keep:
            keys_to_expand.extend(self.other_keys_to_keep)
        if self.keep_instances_gt:
            keys_to_expand.append("instances")

        def crop_instances(frame_data):
            """Local processing function for dataset mapping."""
            # Make bounding boxes from centroids.
            full_centroids = frame_data[self.centroids_key] / frame_data["scale"]
            full_centroids = full_centroids * frame_data[self.full_image_scale_key]
            bboxes = make_centered_bboxes(
                full_centroids, box_height=self.crop_height, box_width=self.crop_width
            )

            frame_data["scale"] = frame_data[self.full_image_scale_key]

            # Crop images from bounding boxes.
            instance_images = crop_bboxes(frame_data[self.full_image_key], bboxes)
            n_instances = tf.shape(bboxes)[0]

            # Create multi-instance example.
            instances_data = {
                "instance_image": instance_images,
                "bbox": bboxes,
                "center_instance_ind": tf.range(n_instances, dtype=tf.int32),
                "centroid": full_centroids,
                "centroid_confidence": frame_data[self.centroid_confidences_key],
                "full_image_height": tf.repeat(
                    tf.shape(frame_data[self.full_image_key])[0], n_instances
                ),
                "full_image_width": tf.repeat(
                    tf.shape(frame_data[self.full_image_key])[1], n_instances
                ),
            }
            for key in keys_to_expand:
                instances_data[key] = tf.repeat(
                    tf.expand_dims(frame_data[key], axis=0), n_instances, axis=0
                )
            return instances_data

        # Map the main processing function to each example.
        output_ds = input_ds.map(
            crop_instances, num_parallel_calls=tf.data.experimental.AUTOTUNE
        )

        # Unbatch to split frame-level examples into individual instance-level examples.
        output_ds = output_ds.unbatch()

        return output_ds
示例#30
0
def separate_by_target(ds: tf.data.Dataset, idx: int = 1, thr: float = 0.5
                       ) -> typing.Tuple[tf.data.Dataset, tf.data.Dataset]:
    def _cond0(*args):
        return tf.cast(args[idx], tf.float32) < thr

    def _cond1(*args):
        return tf.cast(args[idx], tf.float32) >= thr

    ds0 = ds.filter(_cond0)
    ds1 = ds.filter(_cond1)

    return ds0, ds1
示例#31
0
文件: dataset.py 项目: bcho/homework
    def draw_images(self, ds: tf.data.Dataset, n=9):
        """Draw images from dataset.

        Args:
            ds: dataset
            n: first most n images to draw
        """
        import matplotlib.pyplot as plt

        cols = 3
        rows = n // cols
        n = rows * cols
        fig, ax = plt.subplots(ncols=cols, nrows=rows)

        it = ds.make_one_shot_iterator()
        b = it.get_next()
        i = 0
        with tf.Session() as s:
            while True:
                if i >= n:
                    break
                try:
                    image, label = s.run(b)
                except tf.errors.OutOfRangeError:
                    break
                class_idx = next(
                    idx for idx, i in enumerate(label[0]) if i == 1)
                class_name = self.image_classes[class_idx]

                image_data = np.asarray(image).astype(np.uint8)
                image_data = np.reshape(image_data, (224, 224, 3))
                image_fig = ax[i // 3, i % 3]
                image_fig.imshow(image_data)
                image_fig.set_title(class_name)
                i = i + 1
        fig.tight_layout()