def _prepare_expected_chunked_dataset(self): # Expcted result of prep_data with p_w = 3, predictions_in_chunk = 2 builder = tc.SFrameBuilder( [array.array, int, str, array.array, array.array], ['features', 'chunk_len', 'session_id', 'target', 'weights']) builder.append([[0, 0, 1, 10, 2, 20, 3, 30] + [0] * 4, 4, 's1', [1, 2], [1, 1]]) builder.append([[4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 9, 90], 6, 's2', [1, 3], [1, 1]]) builder.append([[10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150], 6, 's3', [1, 3], [1, 1]]) builder.append([[16, 160, 17, 170] + [0] * 8, 2, 's3', [2, 0], [1, 0]]) self.expected_chunked_3_2 = builder.close() # Expcted result of prep_data with p_w = 2, predictions_in_chunk = 3 builder = tc.SFrameBuilder( [array.array, int, str, array.array, array.array], ['features', 'chunk_len', 'session_id', 'target', 'weights']) builder.append([[0, 0, 1, 10, 2, 20, 3, 30] + [0] * 4, 4, 's1', [1, 2, 0], [1, 1, 0]]) builder.append([[4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 9, 90], 6, 's2', [1, 1, 3], [1, 1, 1]]) builder.append([[10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150], 6, 's3', [1, 2, 2], [1, 1, 1]]) builder.append([[16, 160, 17, 170] + [0] * 8, 2, 's3', [2, 0, 0], [1, 0, 0]]) self.expected_chunked_2_3 = builder.close() # Expcted result of prep_data with p_w = 4, predictions_in_chunk = 2 builder = tc.SFrameBuilder( [array.array, int, str, array.array, array.array], ['features', 'chunk_len', 'session_id', 'target', 'weights']) builder.append([[0, 0, 1, 10, 2, 20, 3, 30] + [0] * 8, 4, 's1', [1, 0], [1, 0]]) builder.append([[4, 40, 5, 50, 6, 60, 7, 70, 8, 80, 9, 90] + [0] * 4, 6, 's2', [1, 3], [1, 1]]) builder.append([[ 10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150, 16, 160, 17, 170 ], 8, 's3', [1, 2], [1, 1]]) self.expected_chunked_4_2 = builder.close()
def _predict_with_options(self, dataset, with_ground_truth, postprocess=True, confidence_threshold=0.001, verbose=True): """ Predict with options for what kind of SFrame should be returned. If postprocess is False, a single numpy array with raw unprocessed results will be returned. """ _raise_error_if_not_detection_sframe(dataset, self.feature, self.annotations, require_annotations=with_ground_truth) from ._sframe_loader import SFrameDetectionIter as _SFrameDetectionIter from ._detection import (yolo_map_to_bounding_boxes as _yolo_map_to_bounding_boxes, non_maximum_suppression as _non_maximum_suppression, bbox_to_ybox as _bbox_to_ybox) import mxnet as _mx loader = _SFrameDetectionIter(dataset, batch_size=self.batch_size, input_shape=self.input_image_shape[1:], output_shape=self._grid_shape, anchors=self.anchors, class_to_index=self._class_to_index, loader_type='stretched', load_labels=with_ground_truth, shuffle=False, epochs=1, feature_column=self.feature, annotations_column=self.annotations) num_anchors = len(self.anchors) # If prediction is done with ground truth, two sframes of the same # structure are returned, the second one containing ground truth labels num_returns = 2 if with_ground_truth else 1 sf_builders = [ _tc.SFrameBuilder([int, str, float, float, float, float, float], column_names=['row_id', 'label', 'confidence', 'x', 'y', 'width', 'height']) for _ in range(num_returns) ] dataset_size = len(dataset) ctx = _mxnet_utils.get_mxnet_context() done = False last_time = 0 raw_results = [] for batch in loader: if batch.pad is not None: size = self.batch_size - batch.pad b_data = _mx.nd.slice_axis(batch.data[0], axis=0, begin=0, end=size) b_indices = _mx.nd.slice_axis(batch.label[1], axis=0, begin=0, end=size) b_oshapes = _mx.nd.slice_axis(batch.label[2], axis=0, begin=0, end=size) else: b_data = batch.data[0] b_indices = batch.label[1] b_oshapes = batch.label[2] size = self.batch_size if b_data.shape[0] < len(ctx): ctx0 = ctx[:b_data.shape[0]] else: ctx0 = ctx split_data = _mx.gluon.utils.split_and_load(b_data, ctx_list=ctx0, even_split=False) split_indices = _mx.gluon.utils.split_data(b_indices, num_slice=len(ctx0), even_split=False) split_oshapes = _mx.gluon.utils.split_data(b_oshapes, num_slice=len(ctx0), even_split=False) for data, indices, oshapes in zip(split_data, split_indices, split_oshapes): z = self._model(data).asnumpy() if not postprocess: raw_results.append(z) continue ypred = z.transpose(0, 2, 3, 1) ypred = ypred.reshape(ypred.shape[:-1] + (num_anchors, -1)) zipped = zip(indices.asnumpy(), ypred, oshapes.asnumpy()) for index0, output0, oshape0 in zipped: index0 = int(index0) x_boxes, x_classes, x_scores = _yolo_map_to_bounding_boxes( output0[_np.newaxis], anchors=self.anchors, confidence_threshold=confidence_threshold, nms_thresh=None) x_boxes0 = _np.array(x_boxes).reshape(-1, 4) # Normalize x_boxes0[:, 0::2] /= self.input_image_shape[1] x_boxes0[:, 1::2] /= self.input_image_shape[2] # Re-shape to original input size x_boxes0[:, 0::2] *= oshape0[0] x_boxes0[:, 1::2] *= oshape0[1] # Clip the boxes to the original sizes x_boxes0[:, 0::2] = _np.clip(x_boxes0[:, 0::2], 0, oshape0[0]) x_boxes0[:, 1::2] = _np.clip(x_boxes0[:, 1::2], 0, oshape0[1]) # Non-maximum suppression (also limit to 100 detection per # image, inspired by the evaluation in COCO) x_boxes0, x_classes, x_scores = _non_maximum_suppression( x_boxes0, x_classes, x_scores, num_classes=self.num_classes, threshold=self.non_maximum_suppression_threshold, limit=100) for bbox, cls, s in zip(x_boxes0, x_classes, x_scores): cls = int(cls) values = [index0, self.classes[cls], s] + list(_bbox_to_ybox(bbox)) sf_builders[0].append(values) if index0 == len(dataset) - 1: done = True cur_time = _time.time() # Do not print process if only a few samples are predicted if verbose and (dataset_size >= 5 and cur_time > last_time + 10 or done): print('Predicting {cur_n:{width}d}/{max_n:{width}d}'.format( cur_n=index0 + 1, max_n=dataset_size, width=len(str(dataset_size)))) last_time = cur_time if done: break # Ground truth if with_ground_truth: zipped = _itertools.islice(zip(batch.label[1].asnumpy(), batch.raw_bboxes, batch.raw_classes), size) for index0, bbox0, cls0 in zipped: index0 = int(index0) for bbox, cls in zip(bbox0, cls0): cls = int(cls) if cls == -1: break values = [index0, self.classes[cls], 1.0] + list(bbox) sf_builders[1].append(values) if index0 == len(dataset) - 1: break if postprocess: ret = tuple([sb.close() for sb in sf_builders]) if len(ret) == 1: return ret[0] else: return ret else: return _np.concatenate(raw_results, axis=0)
def load_audio(path, with_path=True, recursive=True, ignore_failure=True, random_order=False): """ Loads WAV file(s) from a path. Parameters ---------- path : str Path to WAV files to be loaded. with_path : bool, optional Indicates whether a path column is added to the returned SFrame. recursive : bool, optional Indicates whether ``load_audio`` should do a recursive directory traversal, or only load audio files directly under ``path``. ignore_failure : bool, optional If True, only print warnings for failed files and keep loading the remaining audio files. random_order : bool, optional Load audio files in random order. Returns ------- out : SFrame Returns an SFrame with either an 'audio' column or both an 'audio' and a 'path' column. The 'audio' column is a column of dictionaries. Each dictionary contains two items. One item is the sample rate, in samples per second (int type). The other item will be the data in a numpy array. If the wav file has a single channel, the array will have a single dimension. If there are multiple channels, the array will have shape (L,C) where L is the number of samples and C is the number of channels. Examples -------- >>> audio_path = "~/Documents/myAudioFiles/" >>> audio_sframe = tc.audio_analysis.load_audio(audio_path, recursive=True) """ from scipy.io import wavfile as _wavfile all_wav_files = [] if _fnmatch(path, '*.wav'): # single file all_wav_files.append(path) elif recursive: for (dir_path, _, file_names) in _os.walk(path): for cur_file in file_names: if _fnmatch(cur_file, '*.wav'): all_wav_files.append(dir_path + '/' + cur_file) else: all_wav_files = _glob(path + '/*.wav') if random_order: _shuffle(all_wav_files) result_builder = _tc.SFrameBuilder(column_types=[dict, str], column_names=['audio', 'path']) for cur_file_path in all_wav_files: try: sample_rate, data = _wavfile.read(cur_file_path) except Exception as e: error_string = "Could not read {}: {}".format(cur_file_path, e) if not ignore_failure: raise _ToolkitError(error_string) else: print(error_string) continue result_builder.append([{ 'sample_rate': sample_rate, 'data': data }, cur_file_path]) result = result_builder.close() if not with_path: del result['path'] return result
def stylize(self, images, style=None, verbose=True, max_size=800, batch_size = 4): """ Stylize an SFrame of Images given a style index or a list of styles. Parameters ---------- images : SFrame | Image A dataset that has the same content image column that was used during training. style : int or list, optional The selected style or list of styles to use on the ``images``. If `None`, all styles will be applied to each image in ``images``. verbose : bool, optional If True, print progress updates. max_size : int or tuple Max input image size that will not get resized during stylization. Images with a side larger than this value, will be scaled down, due to time and memory constraints. If tuple, interpreted as (max width, max height). Without resizing, larger input images take more time to stylize. Resizing can effect the quality of the final stylized image. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. Returns ------- out : SFrame or SArray or turicreate.Image If ``style`` is a list, an SFrame is always returned. If ``style`` is a single integer, the output type will match the input type (Image, SArray, or SFrame). See Also -------- create Examples -------- >>> image = tc.Image("/path/to/image.jpg") >>> stylized_images = model.stylize(image, style=[0, 1]) Data: +--------+-------+------------------------+ | row_id | style | stylized_image | +--------+-------+------------------------+ | 0 | 0 | Height: 256 Width: 256 | | 0 | 1 | Height: 256 Width: 256 | +--------+-------+------------------------+ [2 rows x 3 columns] >>> images = tc.image_analysis.load_images('/path/to/images') >>> stylized_images = model.stylize(images) Data: +--------+-------+------------------------+ | row_id | style | stylized_image | +--------+-------+------------------------+ | 0 | 0 | Height: 256 Width: 256 | | 0 | 1 | Height: 256 Width: 256 | | 0 | 2 | Height: 256 Width: 256 | | 0 | 3 | Height: 256 Width: 256 | | 1 | 0 | Height: 640 Width: 648 | | 1 | 1 | Height: 640 Width: 648 | | 1 | 2 | Height: 640 Width: 648 | | 1 | 3 | Height: 640 Width: 648 | +--------+-------+------------------------+ [8 rows x 3 columns] """ if(batch_size < 1): raise _ToolkitError("'batch_size' must be greater than or equal to 1") from ._sframe_loader import SFrameSTIter as _SFrameSTIter import mxnet as _mx from mxnet import gluon as _gluon set_of_all_idx = self._style_indices() style, single_style = self._style_input_check(style) if isinstance(max_size, _six.integer_types): input_shape = (max_size, max_size) else: # Outward-facing, we use (width, height), but internally we use # (height, width) input_shape = max_size[::-1] images, unpack = self._canonize_content_input(images, single_style=single_style) dataset_size = len(images) output_size = dataset_size * len(style) batch_size_each = min(batch_size, output_size) num_mxnet_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=batch_size_each) if num_mxnet_gpus == 0: # CPU processing prefers native size to prevent stylizing # unnecessary regions batch_size_each = 1 loader_type = 'favor-native-size' else: # GPU processing prefers batches of same size, using padding # for smaller images loader_type = 'pad' self._model.batch_size = batch_size_each self._model.hybridize() ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size_each) batch_size = max(num_mxnet_gpus, 1) * batch_size_each last_time = 0 if dataset_size == 0: raise _ToolkitError("SFrame cannot be empty") content_feature = _tkutl._find_only_image_column(images) _raise_error_if_not_training_sframe(images, content_feature) max_h = 0 max_w = 0 oversized_count = 0 for img in images[content_feature]: if img.height > input_shape[0] or img.width > input_shape[1]: oversized_count += 1 max_h = max(img.height, max_h) max_w = max(img.width, max_w) if input_shape[0] > max_h: input_shape = (max_h, input_shape[1]) if input_shape[1] > max_w: input_shape = (input_shape[0], max_w) # If we find large images, let's switch to sequential iterator # pre-processing, to prevent memory issues. sequential = max(max_h, max_w) > 2000 if verbose and output_size != 1: print('Stylizing {} image(s) using {} style(s)'.format(dataset_size, len(style))) if oversized_count > 0: print('Scaling down {} image(s) exceeding {}x{}'.format(oversized_count, input_shape[1], input_shape[0])) content_images_loader = _SFrameSTIter(images, batch_size, shuffle=False, feature_column=content_feature, input_shape=input_shape, num_epochs=1, loader_type=loader_type, repeat_each_image=len(style), sequential=sequential) sb = _tc.SFrameBuilder([int, int, _tc.Image], column_names=['row_id', 'style', 'stylized_{}'.format(self.content_feature)]) count = 0 for i, batch in enumerate(content_images_loader): if loader_type == 'favor-native-size': c_data = [batch.data[0][0].expand_dims(0)] else: c_data = _gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) indices_data = _gluon.utils.split_and_load(_mx.nd.array(batch.repeat_indices, dtype=_np.int64), ctx_list=ctx, batch_axis=0) outputs = [] for b_img, b_indices in zip(c_data, indices_data): mx_style = _mx.nd.array(style, dtype=_np.int64, ctx=b_indices.context) b_batch_styles = mx_style[b_indices] output = self._model(b_img, b_batch_styles) outputs.append(output) image_data = _np.concatenate([ (output.asnumpy().transpose(0, 2, 3, 1) * 255).astype(_np.uint8) for output in outputs], axis=0) batch_styles = [style[idx] for idx in batch.repeat_indices] for b in range(batch_size - (batch.pad or 0)): image = image_data[b] # Crop to remove added padding crop = batch.crop[b] cropped_image = image[crop[0]:crop[1], crop[2]:crop[3]] tc_img = _tc.Image(_image_data=cropped_image.tobytes(), _width=cropped_image.shape[1], _height=cropped_image.shape[0], _channels=cropped_image.shape[2], _format_enum=2, _image_data_size=cropped_image.size) sb.append([batch.indices[b], batch_styles[b], tc_img]) count += 1 cur_time = _time.time() if verbose and output_size != 1 and (cur_time > last_time + 10 or count == output_size): print('Stylizing {curr_image:{width}d}/{max_n:{width}d}'. format(curr_image=count, max_n=output_size, width=len(str(output_size)))) last_time = cur_time return unpack(sb.close())