Exemplo n.º 1
0
    def _extract_features(self, preprocessed_data, verbose=True):
        """
        Parameters
        ----------
        preprocessed_data : SArray

        Returns
        -------
        numpy array containing the deep features
        """
        last_progress_update = _time.time()
        progress_header_printed = False

        deep_features = _tc.SArrayBuilder(_np.ndarray)

        if _mac_ver() < (10, 14):
            # Use TensorFlow/Keras

            # Transpose data from channel first to channel last
            preprocessed_data = _np.transpose(preprocessed_data, (0, 2, 3, 1))

            for i, cur_example in enumerate(preprocessed_data):
                y = self.vggish_model.predict([[cur_example]])
                deep_features.append(y[0])

                # If `verbose` is set, print an progress update about every 20s
                if verbose and _time.time() - last_progress_update >= 20:
                    if not progress_header_printed:
                        print("Extracting deep features -")
                        progress_header_printed = True
                    print("Extracted {} of {}".format(i,
                                                      len(preprocessed_data)))
                    last_progress_update = _time.time()
            if progress_header_printed:
                print("Extracted {} of {}\n".format(len(preprocessed_data),
                                                    len(preprocessed_data)))

        else:
            # Use Core ML

            for i, cur_example in enumerate(preprocessed_data):
                for cur_frame in cur_example:
                    x = {'input1': _np.asarray([cur_frame])}
                    y = self.vggish_model.predict(x)
                    deep_features.append(y['output1'])

                # If `verbose` is set, print an progress update about every 20s
                if verbose and _time.time() - last_progress_update >= 20:
                    if not progress_header_printed:
                        print("Extracting deep features -")
                        progress_header_printed = True
                    print("Extracted {} of {}".format(i,
                                                      len(preprocessed_data)))
                    last_progress_update = _time.time()
            if progress_header_printed:
                print("Extracted {} of {}\n".format(len(preprocessed_data),
                                                    len(preprocessed_data)))

        return deep_features.close()
Exemplo n.º 2
0
    def extract_features(self, dataset, feature, batch_size=512, verbose=False):
        """
        Parameters
        ----------
        dataset: SFrame
            SFrame of images
        """
        from ..mx import SFrameImageIter as _SFrameImageIter
        import turicreate as _tc
        import array

        if len(dataset) == 0:
            return _tc.SArray([], array.array)

        # Resize images if needed
        preprocessed_dataset =  _tc.SFrame()
        if verbose:
            print("Resizing images...")
        preprocessed_dataset[feature] = _tc.image_analysis.resize(
                dataset[feature],  *tuple(reversed(self.image_shape)))

        batch_size = min(len(dataset), batch_size)
        # Make a data iterator
        dataIter = _SFrameImageIter(sframe=preprocessed_dataset, data_field=[feature], batch_size=batch_size)

        # Setup the MXNet model
        model = MXFeatureExtractor._get_mx_module(self.ptModel.mxmodel,
                self.data_layer, self.feature_layer, self.context, self.image_shape, batch_size)

        out = _tc.SArrayBuilder(dtype = array.array)
        num_processed = 0
        if verbose:
            print("Performing feature extraction on resized images...")
        while dataIter.has_next:
            if dataIter.data_shape[1:] != self.image_shape:
                raise RuntimeError("Expected image of size %s. Got %s instead." % (
                                               self.image_shape, dataIter.data_shape[1:]))
            model.forward(next(dataIter))
            mx_out = [array.array('d',m) for m in model.get_outputs()[0].asnumpy()]
            if dataIter.getpad() != 0:
                # If batch size is not evenly divisible by the length, it will loop back around.
                # We don't want that.
                mx_out = mx_out[:-dataIter.getpad()]
            out.append_multiple(mx_out)

            num_processed += batch_size
            num_processed = min(len(dataset), num_processed)
            if verbose:
                print('Completed {num_processed:{width}d}/{total:{width}d}'.format(
                    num_processed = num_processed, total=len(dataset), width = len(str(len(dataset)))))

        return out.close()
    def _extract_features(self, preprocessed_data, verbose=True):
        """
        Parameters
        ----------
        preprocessed_data : SArray

        Returns
        -------
        numpy array containing the deep features
        """
        last_progress_update = _time.time()
        progress_header_printed = False

        deep_features = _tc.SArrayBuilder(_np.ndarray)

        if _mac_ver() < (10, 14):
            # Use MXNet
            preprocessed_data = mx.nd.array(preprocessed_data)

            ctx_list = self.ctx
            if len(preprocessed_data) < len(ctx_list):
                ctx_list = ctx_list[:len(preprocessed_data)]
            batches = utils.split_and_load(preprocessed_data,
                                           ctx_list=ctx_list,
                                           even_split=False)

            for i, cur_batch in enumerate(batches):
                y = self.vggish_model.forward(cur_batch).asnumpy()
                for j in y:
                    deep_features.append(j)

                # If `verbose` is set, print an progress update about every 20s
                if verbose and _time.time() - last_progress_update >= 20:
                    if not progress_header_printed:
                        print("Extracting deep features -")
                        progress_header_printed = True
                    print("Extracted {} of {} batches".format(i, len(batches)))
                    last_progress_update = _time.time()
            if progress_header_printed:
                print("Extracted {} of {} batches\n".format(
                    len(batches), len(batches)))

        else:
            # Use Core ML
            for i, cur_example in enumerate(preprocessed_data):
                for cur_frame in cur_example:
                    x = {'input1': [cur_frame]}
                    y = self.vggish_model.predict(x)
                    deep_features.append(y['output1'])

                # If `verbose` is set, print an progress update about every 20s
                if verbose and _time.time() - last_progress_update >= 20:
                    if not progress_header_printed:
                        print("Extracting deep features -")
                        progress_header_printed = True
                    print("Extracted {} of {}".format(i,
                                                      len(preprocessed_data)))
                    last_progress_update = _time.time()
            if progress_header_printed:
                print("Extracted {} of {}\n".format(len(preprocessed_data),
                                                    len(preprocessed_data)))

        return deep_features.close()
    def extract_features(self, dataset, feature, batch_size=64, verbose=False):
        """
        Parameters
        ----------
        dataset: SFrame
            SFrame of images
        """
        from ._mxnet._mx_sframe_iter import SFrameImageIter as _SFrameImageIter
        from six.moves.queue import Queue as _Queue
        from threading import Thread as _Thread
        import turicreate as _tc
        import array

        if len(dataset) == 0:
            return _tc.SArray([], array.array)

        batch_size = min(len(dataset), batch_size)
        # Make a data iterator
        dataIter = _SFrameImageIter(sframe=dataset,
                                    data_field=[feature],
                                    batch_size=batch_size,
                                    image_shape=self.image_shape)

        # Setup the MXNet model
        model = MXFeatureExtractor._get_mx_module(self.ptModel.mxmodel,
                                                  self.data_layer,
                                                  self.feature_layer,
                                                  self.context,
                                                  self.image_shape, batch_size)

        out = _tc.SArrayBuilder(dtype=array.array)
        progress = {'num_processed': 0, 'total': len(dataset)}
        if verbose:
            print("Performing feature extraction on resized images...")

        # Encapsulates the work done by the MXNet model for a single batch
        def handle_request(batch):
            model.forward(batch)
            mx_out = [
                array.array('d', m) for m in model.get_outputs()[0].asnumpy()
            ]
            if batch.pad != 0:
                # If batch size is not evenly divisible by the length, it will loop back around.
                # We don't want that.
                mx_out = mx_out[:-batch.pad]
            return mx_out

        # Copies the output from MXNet into the SArrayBuilder and emits progress
        def consume_response(mx_out):
            out.append_multiple(mx_out)

            progress['num_processed'] += len(mx_out)
            if verbose:
                print('Completed {num_processed:{width}d}/{total:{width}d}'.
                      format(width=len(str(progress['total'])), **progress))

        # Create a dedicated thread for performing MXNet work, using two FIFO
        # queues for communication back and forth with this thread, with the
        # goal of keeping MXNet busy throughout.
        request_queue = _Queue()
        response_queue = _Queue()

        def mx_worker():
            while True:
                batch = request_queue.get()  # Consume request
                if batch is None:
                    # No more work remains. Allow the thread to finish.
                    return
                response_queue.put(handle_request(batch))  # Produce response

        mx_worker_thread = _Thread(target=mx_worker)
        mx_worker_thread.start()

        try:
            # Attempt to have two requests in progress at any one time (double
            # buffering), so that the iterator is creating one batch while MXNet
            # performs inference on the other.
            if dataIter.has_next:
                request_queue.put(next(dataIter))  # Produce request
                while dataIter.has_next:
                    request_queue.put(next(dataIter))  # Produce request
                    consume_response(response_queue.get())
                consume_response(response_queue.get())
        finally:
            # Tell the worker thread to shut down.
            request_queue.put(None)

        return out.close()
Exemplo n.º 5
0
    def __init__(self,
                 sframe,
                 batch_size,
                 shuffle,
                 feature_column,
                 input_shape,
                 num_epochs=None,
                 repeat_each_image=1,
                 loader_type='stretch',
                 aug_params={},
                 sequential=True):

        if sframe[feature_column].dtype != _tc.Image:
            raise _ToolkitError('Feature column must be of type Image')

        if loader_type in {'stretch', 'stretch-with-augmentation'}:
            img_prep_fn = lambda img: _stretch_resize(img, input_shape)
        elif loader_type in {
                'pad', 'pad-with-augmentation', 'favor-native-size'
        }:
            img_prep_fn = lambda img: _resize_if_too_large(img, input_shape)
        else:
            raise ValueError('Unknown loader-type')

        if loader_type.endswith('-with-augmentation'):
            augs = _mx.image.CreateDetAugmenter(
                data_shape=(3, ) + tuple(input_shape),
                resize=aug_params['aug_resize'],
                rand_crop=aug_params['aug_rand_crop'],
                rand_pad=aug_params['aug_rand_pad'],
                rand_mirror=aug_params['aug_horizontal_flip'],
                rand_gray=aug_params['aug_rand_gray'],
                mean=_np.zeros(3),
                std=_np.ones(3),
                brightness=aug_params['aug_brightness'],
                contrast=aug_params['aug_contrast'],
                saturation=aug_params['aug_saturation'],
                hue=aug_params['aug_hue'],
                pca_noise=aug_params['aug_pca_noise'],
                inter_method=aug_params['aug_inter_method'],
                min_object_covered=aug_params['aug_min_object_covered'],
                pad_val=(128, 128, 128),
                area_range=aug_params['aug_area_range'])
        else:
            augs = []

        self.augmentations = augs
        self.cur_batch = 0
        self.batch_size = batch_size
        self.input_shape = input_shape
        self.shuffle = shuffle
        self.feature_column = feature_column
        self.cur_epoch = 0
        self.cur_sample = 0
        self.cur_repeat = 0
        self.num_epochs = num_epochs
        self.repeat_each_image = repeat_each_image
        self.loader_type = loader_type

        # Make shallow copy, so that temporary columns do not change input
        self.sframe = sframe.copy()

        # Convert images to raw to eliminate overhead of decoding
        if sequential:
            builder = _tc.SArrayBuilder(_tc.Image)
            for img in self.sframe[self.feature_column]:
                builder.append(img_prep_fn(img))
            self.sframe[_TMP_COL_PREP_IMAGE] = builder.close()
        else:
            self.sframe[_TMP_COL_PREP_IMAGE] = self.sframe[
                self.feature_column].apply(img_prep_fn)

        self._provide_data = [
            _mx.io.DataDesc(name='image',
                            shape=(batch_size, 3) + tuple(input_shape),
                            layout='NCHW')
        ]