def _load_data(dataset_path, reshape_to=None, subtract_mean=False, flatten=False, labels_to_keep=()): with open(dataset_path, 'rb') as dataset_file: dataset, label_to_index = pkl_utils.load(dataset_file) if labels_to_keep: labels_to_keep = set(labels_to_keep.split(',')) unknown_labels = labels_to_keep.difference(label_to_index) if unknown_labels: raise ValueError('Unknown labels passed %s' % unknown_labels) old_label_index_to_new = dict(zip((label_to_index[l] for l in labels_to_keep), xrange(len(labels_to_keep)))) old_label_indexes_to_keep = [label_to_index[l] for l in labels_to_keep] map_labels = np.vectorize(lambda li: old_label_index_to_new[li], otypes=['int32']) def drop_labels(data, labels): ind = np.in1d(labels, old_label_indexes_to_keep) return data[ind], map_labels(labels[ind]) _transform_dataset(dataset, drop_labels) label_to_index = {l: old_label_index_to_new[label_to_index[l]] for l in labels_to_keep} if reshape_to: reshape_to = literal_eval(reshape_to) _transform_dataset(dataset, lambda data, labels: (data.reshape((data.shape[0], ) + reshape_to), labels)) if subtract_mean: training_mean = np.mean(dataset['training'][0], axis=0, dtype='float32') _transform_dataset(dataset, lambda data, labels: (data - training_mean, labels)) if flatten: _transform_dataset(dataset, lambda data, labels: ((data.reshape((data.shape[0], np.prod(data.shape[1:]))), labels) if len(data.shape) > 2 else (data, labels))) _transform_dataset(dataset, skutils.shuffle) return dataset, label_to_index
def resize_rgb_dataset(in_filename, out_filename, x_size=0, y_size=0): """Resize an RGB dataset so that every image is of the given shape.""" with open(in_filename, 'rb') as dataset_file: in_dataset, label_to_index = pkl_utils.load(dataset_file) shape = (x_size, y_size) out_dataset = {} for subset, (instances, labels) in in_dataset.iteritems(): resized_instances = np.zeros(shape=(len(instances), 3) + shape, dtype=instances.dtype) for i, instance in enumerate(instances): # Transpose the image to (height, width, channels), resize, transpose back, and multiply by 255 to get uint8 resized_instances[i] = resize(instance.transpose(1, 2, 0), shape).transpose(2, 0, 1) * 255 out_dataset[subset] = (resized_instances, labels) with open(out_filename, 'wb') as out: pkl_utils.dump((out_dataset, label_to_index), out)
def _load_data(dataset_path, reshape_to=None, subtract_mean=False, flatten=False, labels_to_keep=()): with open(dataset_path, 'rb') as dataset_file: dataset, label_to_index = pkl_utils.load(dataset_file) if labels_to_keep: labels_to_keep = set(labels_to_keep.split(',')) unknown_labels = labels_to_keep.difference(label_to_index) if unknown_labels: raise ValueError('Unknown labels passed %s' % unknown_labels) old_label_index_to_new = dict( zip((label_to_index[l] for l in labels_to_keep), xrange(len(labels_to_keep)))) old_label_indexes_to_keep = [label_to_index[l] for l in labels_to_keep] map_labels = np.vectorize(lambda li: old_label_index_to_new[li], otypes=['int32']) def drop_labels(data, labels): ind = np.in1d(labels, old_label_indexes_to_keep) return data[ind], map_labels(labels[ind]) _transform_dataset(dataset, drop_labels) label_to_index = { l: old_label_index_to_new[label_to_index[l]] for l in labels_to_keep } if reshape_to: reshape_to = literal_eval(reshape_to) _transform_dataset( dataset, lambda data, labels: (data.reshape( (data.shape[0], ) + reshape_to), labels)) if subtract_mean: training_mean = np.mean(dataset['training'][0], axis=0, dtype='float32') _transform_dataset(dataset, lambda data, labels: (data - training_mean, labels)) if flatten: _transform_dataset( dataset, lambda data, labels: ((data.reshape((data.shape[0], np.prod(data.shape[1:]))), labels) if len(data.shape) > 2 else (data, labels))) _transform_dataset(dataset, skutils.shuffle) return dataset, label_to_index
def _load_model_snapshot(snapshot_path): print('Loading pickled model from %s' % snapshot_path) with open(snapshot_path, 'rb') as snapshot_file: return pkl_utils.load(snapshot_file)