示例#1
0
    def _prepare_expected_chunked_dataset(self):
        # Expcted result of prep_data with p_w = 3, predictions_in_chunk = 2
        builder = tc.SFrameBuilder(
            [array.array, int, str, array.array, array.array],
            ['features', 'chunk_len', 'session_id', 'target', 'weights'])
        builder.append([[0, 0, 1, 10, 2, 20, 3, 30] + [0] * 4, 4, 's1', [1, 2],
                        [1, 1]])
        builder.append([[4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 9, 90], 6, 's2',
                        [1, 3], [1, 1]])
        builder.append([[10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150],
                        6, 's3', [1, 3], [1, 1]])
        builder.append([[16, 160, 17, 170] + [0] * 8, 2, 's3', [2, 0], [1, 0]])
        self.expected_chunked_3_2 = builder.close()

        # Expcted result of prep_data with p_w = 2, predictions_in_chunk = 3
        builder = tc.SFrameBuilder(
            [array.array, int, str, array.array, array.array],
            ['features', 'chunk_len', 'session_id', 'target', 'weights'])
        builder.append([[0, 0, 1, 10, 2, 20, 3, 30] + [0] * 4, 4, 's1',
                        [1, 2, 0], [1, 1, 0]])
        builder.append([[4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 9, 90], 6, 's2',
                        [1, 1, 3], [1, 1, 1]])
        builder.append([[10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150],
                        6, 's3', [1, 2, 2], [1, 1, 1]])
        builder.append([[16, 160, 17, 170] + [0] * 8, 2, 's3', [2, 0, 0],
                        [1, 0, 0]])
        self.expected_chunked_2_3 = builder.close()

        # Expcted result of prep_data with p_w = 4, predictions_in_chunk = 2
        builder = tc.SFrameBuilder(
            [array.array, int, str, array.array, array.array],
            ['features', 'chunk_len', 'session_id', 'target', 'weights'])
        builder.append([[0, 0, 1, 10, 2, 20, 3, 30] + [0] * 8, 4, 's1', [1, 0],
                        [1, 0]])
        builder.append([[4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 9, 90] + [0] * 4,
                        6, 's2', [1, 3], [1, 1]])
        builder.append([[
            10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150, 16, 160, 17,
            170
        ], 8, 's3', [1, 2], [1, 1]])
        self.expected_chunked_4_2 = builder.close()
示例#2
0
    def _predict_with_options(self, dataset, with_ground_truth,
                              postprocess=True, confidence_threshold=0.001,
                              verbose=True):
        """
        Predict with options for what kind of SFrame should be returned.

        If postprocess is False, a single numpy array with raw unprocessed
        results will be returned.
        """
        _raise_error_if_not_detection_sframe(dataset, self.feature, self.annotations,
                                             require_annotations=with_ground_truth)
        from ._sframe_loader import SFrameDetectionIter as _SFrameDetectionIter
        from ._detection import (yolo_map_to_bounding_boxes as _yolo_map_to_bounding_boxes,
                                 non_maximum_suppression as _non_maximum_suppression,
                                 bbox_to_ybox as _bbox_to_ybox)
        import mxnet as _mx
        loader = _SFrameDetectionIter(dataset,
                                      batch_size=self.batch_size,
                                      input_shape=self.input_image_shape[1:],
                                      output_shape=self._grid_shape,
                                      anchors=self.anchors,
                                      class_to_index=self._class_to_index,
                                      loader_type='stretched',
                                      load_labels=with_ground_truth,
                                      shuffle=False,
                                      epochs=1,
                                      feature_column=self.feature,
                                      annotations_column=self.annotations)

        num_anchors = len(self.anchors)

        # If prediction is done with ground truth, two sframes of the same
        # structure are returned, the second one containing ground truth labels
        num_returns = 2 if with_ground_truth else 1

        sf_builders = [
            _tc.SFrameBuilder([int, str, float, float, float, float, float],
                              column_names=['row_id', 'label', 'confidence',
                                            'x', 'y', 'width', 'height'])
            for _ in range(num_returns)
        ]

        dataset_size = len(dataset)
        ctx = _mxnet_utils.get_mxnet_context()
        done = False
        last_time = 0
        raw_results = []
        for batch in loader:
            if batch.pad is not None:
                size = self.batch_size - batch.pad
                b_data = _mx.nd.slice_axis(batch.data[0], axis=0, begin=0, end=size)
                b_indices = _mx.nd.slice_axis(batch.label[1], axis=0, begin=0, end=size)
                b_oshapes = _mx.nd.slice_axis(batch.label[2], axis=0, begin=0, end=size)
            else:
                b_data = batch.data[0]
                b_indices = batch.label[1]
                b_oshapes = batch.label[2]
                size = self.batch_size

            if b_data.shape[0] < len(ctx):
                ctx0 = ctx[:b_data.shape[0]]
            else:
                ctx0 = ctx

            split_data = _mx.gluon.utils.split_and_load(b_data, ctx_list=ctx0, even_split=False)
            split_indices = _mx.gluon.utils.split_data(b_indices, num_slice=len(ctx0), even_split=False)
            split_oshapes = _mx.gluon.utils.split_data(b_oshapes, num_slice=len(ctx0), even_split=False)

            for data, indices, oshapes in zip(split_data, split_indices, split_oshapes):
                z = self._model(data).asnumpy()
                if not postprocess:
                    raw_results.append(z)
                    continue

                ypred = z.transpose(0, 2, 3, 1)
                ypred = ypred.reshape(ypred.shape[:-1] + (num_anchors, -1))

                zipped = zip(indices.asnumpy(), ypred, oshapes.asnumpy())
                for index0, output0, oshape0 in zipped:
                    index0 = int(index0)
                    x_boxes, x_classes, x_scores = _yolo_map_to_bounding_boxes(
                            output0[_np.newaxis], anchors=self.anchors,
                            confidence_threshold=confidence_threshold,
                            nms_thresh=None)

                    x_boxes0 = _np.array(x_boxes).reshape(-1, 4)

                    # Normalize
                    x_boxes0[:, 0::2] /= self.input_image_shape[1]
                    x_boxes0[:, 1::2] /= self.input_image_shape[2]

                    # Re-shape to original input size
                    x_boxes0[:, 0::2] *= oshape0[0]
                    x_boxes0[:, 1::2] *= oshape0[1]

                    # Clip the boxes to the original sizes
                    x_boxes0[:, 0::2] = _np.clip(x_boxes0[:, 0::2], 0, oshape0[0])
                    x_boxes0[:, 1::2] = _np.clip(x_boxes0[:, 1::2], 0, oshape0[1])

                    # Non-maximum suppression (also limit to 100 detection per
                    # image, inspired by the evaluation in COCO)
                    x_boxes0, x_classes, x_scores = _non_maximum_suppression(
                            x_boxes0, x_classes, x_scores,
                            num_classes=self.num_classes, threshold=self.non_maximum_suppression_threshold,
                            limit=100)

                    for bbox, cls, s in zip(x_boxes0, x_classes, x_scores):
                        cls = int(cls)
                        values = [index0, self.classes[cls], s] + list(_bbox_to_ybox(bbox))
                        sf_builders[0].append(values)

                    if index0 == len(dataset) - 1:
                        done = True

                    cur_time = _time.time()
                    # Do not print process if only a few samples are predicted
                    if verbose and (dataset_size >= 5 and cur_time > last_time + 10 or done):
                        print('Predicting {cur_n:{width}d}/{max_n:{width}d}'.format(
                            cur_n=index0 + 1, max_n=dataset_size, width=len(str(dataset_size))))
                        last_time = cur_time

                    if done:
                        break

            # Ground truth
            if with_ground_truth:
                zipped = _itertools.islice(zip(batch.label[1].asnumpy(), batch.raw_bboxes, batch.raw_classes), size)
                for index0, bbox0, cls0 in zipped:
                    index0 = int(index0)
                    for bbox, cls in zip(bbox0, cls0):
                        cls = int(cls)
                        if cls == -1:
                            break
                        values = [index0, self.classes[cls], 1.0] + list(bbox)
                        sf_builders[1].append(values)

                    if index0 == len(dataset) - 1:
                        break

        if postprocess:
            ret = tuple([sb.close() for sb in sf_builders])
            if len(ret) == 1:
                return ret[0]
            else:
                return ret
        else:
            return _np.concatenate(raw_results, axis=0)
示例#3
0
def load_audio(path,
               with_path=True,
               recursive=True,
               ignore_failure=True,
               random_order=False):
    """
    Loads WAV file(s) from a path.

    Parameters
    ----------
    path : str
        Path to WAV files to be loaded.

    with_path : bool, optional
        Indicates whether a path column is added to the returned SFrame.

    recursive : bool, optional
        Indicates whether ``load_audio`` should do a recursive directory traversal,
        or only load audio files directly under ``path``.

    ignore_failure : bool, optional
        If True, only print warnings for failed files and keep loading the remaining
        audio files.

    random_order : bool, optional
        Load audio files in random order.

    Returns
    -------
    out : SFrame
        Returns an SFrame with either an 'audio' column or both an 'audio' and
        a 'path' column. The 'audio' column is a column of dictionaries.

        Each dictionary contains two items. One item is the sample rate, in
        samples per second (int type). The other item will be the data in a numpy
        array. If the wav file has a single channel, the array will have a single
        dimension. If there are multiple channels, the array will have shape
        (L,C) where L is the number of samples and C is the number of channels.

    Examples
    --------
    >>> audio_path = "~/Documents/myAudioFiles/"
    >>> audio_sframe = tc.audio_analysis.load_audio(audio_path, recursive=True)
    """
    from scipy.io import wavfile as _wavfile

    all_wav_files = []

    if _fnmatch(path, '*.wav'):  # single file
        all_wav_files.append(path)
    elif recursive:
        for (dir_path, _, file_names) in _os.walk(path):
            for cur_file in file_names:
                if _fnmatch(cur_file, '*.wav'):
                    all_wav_files.append(dir_path + '/' + cur_file)
    else:
        all_wav_files = _glob(path + '/*.wav')

    if random_order:
        _shuffle(all_wav_files)

    result_builder = _tc.SFrameBuilder(column_types=[dict, str],
                                       column_names=['audio', 'path'])
    for cur_file_path in all_wav_files:
        try:
            sample_rate, data = _wavfile.read(cur_file_path)
        except Exception as e:
            error_string = "Could not read {}: {}".format(cur_file_path, e)
            if not ignore_failure:
                raise _ToolkitError(error_string)
            else:
                print(error_string)
                continue

        result_builder.append([{
            'sample_rate': sample_rate,
            'data': data
        }, cur_file_path])

    result = result_builder.close()
    if not with_path:
        del result['path']
    return result
示例#4
0
    def stylize(self, images, style=None, verbose=True, max_size=800, batch_size = 4):
        """
        Stylize an SFrame of Images given a style index or a list of
        styles.

        Parameters
        ----------
        images : SFrame | Image
            A dataset that has the same content image column that was used
            during training.

        style : int or list, optional
            The selected style or list of styles to use on the ``images``. If
            `None`, all styles will be applied to each image in ``images``.

        verbose : bool, optional
            If True, print progress updates.

        max_size : int or tuple
            Max input image size that will not get resized during stylization.

            Images with a side larger than this value, will be scaled down, due
            to time and memory constraints. If tuple, interpreted as (max
            width, max height). Without resizing, larger input images take more
            time to stylize.  Resizing can effect the quality of the final
            stylized image.

        batch_size : int, optional
            If you are getting memory errors, try decreasing this value. If you
            have a powerful computer, increasing this value may improve
            performance.

        Returns
        -------
        out : SFrame or SArray or turicreate.Image
            If ``style`` is a list, an SFrame is always returned. If ``style``
            is a single integer, the output type will match the input type
            (Image, SArray, or SFrame).

        See Also
        --------
        create

        Examples
        --------
        >>> image = tc.Image("/path/to/image.jpg")
        >>> stylized_images = model.stylize(image, style=[0, 1])
        Data:
        +--------+-------+------------------------+
        | row_id | style |     stylized_image     |
        +--------+-------+------------------------+
        |   0    |   0   | Height: 256 Width: 256 |
        |   0    |   1   | Height: 256 Width: 256 |
        +--------+-------+------------------------+
        [2 rows x 3 columns]

        >>> images = tc.image_analysis.load_images('/path/to/images')
        >>> stylized_images = model.stylize(images)
        Data:
        +--------+-------+------------------------+
        | row_id | style |     stylized_image     |
        +--------+-------+------------------------+
        |   0    |   0   | Height: 256 Width: 256 |
        |   0    |   1   | Height: 256 Width: 256 |
        |   0    |   2   | Height: 256 Width: 256 |
        |   0    |   3   | Height: 256 Width: 256 |
        |   1    |   0   | Height: 640 Width: 648 |
        |   1    |   1   | Height: 640 Width: 648 |
        |   1    |   2   | Height: 640 Width: 648 |
        |   1    |   3   | Height: 640 Width: 648 |
        +--------+-------+------------------------+
        [8 rows x 3 columns]
        """
        if(batch_size < 1):
            raise _ToolkitError("'batch_size' must be greater than or equal to 1")

        from ._sframe_loader import SFrameSTIter as _SFrameSTIter
        import mxnet as _mx
        from mxnet import gluon as _gluon
        set_of_all_idx = self._style_indices()
        style, single_style = self._style_input_check(style)

        if isinstance(max_size, _six.integer_types):
            input_shape = (max_size, max_size)
        else:
            # Outward-facing, we use (width, height), but internally we use
            # (height, width)
            input_shape = max_size[::-1]

        images, unpack = self._canonize_content_input(images, single_style=single_style)

        dataset_size = len(images)
        output_size = dataset_size * len(style)
        batch_size_each = min(batch_size, output_size)
        num_mxnet_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=batch_size_each)

        if num_mxnet_gpus == 0:
            # CPU processing prefers native size to prevent stylizing
            # unnecessary regions
            batch_size_each = 1
            loader_type = 'favor-native-size'
        else:
            # GPU processing prefers batches of same size, using padding
            # for smaller images
            loader_type = 'pad'

        self._model.batch_size = batch_size_each
        self._model.hybridize()

        ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size_each)
        batch_size = max(num_mxnet_gpus, 1) * batch_size_each
        last_time = 0
        if dataset_size == 0:
            raise _ToolkitError("SFrame cannot be empty")
        content_feature = _tkutl._find_only_image_column(images)
        _raise_error_if_not_training_sframe(images, content_feature)

        max_h = 0
        max_w = 0
        oversized_count = 0
        for img in images[content_feature]:
            if img.height > input_shape[0] or img.width > input_shape[1]:
                oversized_count += 1
            max_h = max(img.height, max_h)
            max_w = max(img.width, max_w)

        if input_shape[0] > max_h:
            input_shape = (max_h, input_shape[1])
        if input_shape[1] > max_w:
            input_shape = (input_shape[0], max_w)

        # If we find large images, let's switch to sequential iterator
        # pre-processing, to prevent memory issues.
        sequential = max(max_h, max_w) > 2000

        if verbose and output_size != 1:
            print('Stylizing {} image(s) using {} style(s)'.format(dataset_size, len(style)))
            if oversized_count > 0:
                print('Scaling down {} image(s) exceeding {}x{}'.format(oversized_count, input_shape[1], input_shape[0]))

        content_images_loader = _SFrameSTIter(images, batch_size,
                                              shuffle=False,
                                              feature_column=content_feature,
                                              input_shape=input_shape,
                                              num_epochs=1,
                                              loader_type=loader_type,
                                              repeat_each_image=len(style),
                                              sequential=sequential)

        sb = _tc.SFrameBuilder([int, int, _tc.Image],
                               column_names=['row_id', 'style', 'stylized_{}'.format(self.content_feature)])

        count = 0
        for i, batch in enumerate(content_images_loader):
            if loader_type == 'favor-native-size':
                c_data = [batch.data[0][0].expand_dims(0)]
            else:
                c_data = _gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            indices_data = _gluon.utils.split_and_load(_mx.nd.array(batch.repeat_indices, dtype=_np.int64),
                                                       ctx_list=ctx, batch_axis=0)
            outputs = []
            for b_img, b_indices in zip(c_data, indices_data):
                mx_style = _mx.nd.array(style, dtype=_np.int64, ctx=b_indices.context)
                b_batch_styles = mx_style[b_indices]
                output = self._model(b_img, b_batch_styles)
                outputs.append(output)

            image_data = _np.concatenate([
                (output.asnumpy().transpose(0, 2, 3, 1) * 255).astype(_np.uint8)
                for output in outputs], axis=0)

            batch_styles = [style[idx] for idx in batch.repeat_indices]

            for b in range(batch_size - (batch.pad or 0)):
                image = image_data[b]
                # Crop to remove added padding
                crop = batch.crop[b]
                cropped_image = image[crop[0]:crop[1], crop[2]:crop[3]]
                tc_img = _tc.Image(_image_data=cropped_image.tobytes(),
                                   _width=cropped_image.shape[1],
                                   _height=cropped_image.shape[0],
                                   _channels=cropped_image.shape[2],
                                   _format_enum=2,
                                   _image_data_size=cropped_image.size)
                sb.append([batch.indices[b], batch_styles[b], tc_img])
                count += 1

            cur_time = _time.time()
            if verbose and output_size != 1 and (cur_time > last_time + 10 or count == output_size):
                print('Stylizing {curr_image:{width}d}/{max_n:{width}d}'.
                      format(curr_image=count, max_n=output_size, width=len(str(output_size))))
                last_time = cur_time

        return unpack(sb.close())