def check_one_shot_input(data, target): if not isinstance(target, str): raise TypeError("'target' must be of type string.") _tkutl._raise_error_if_column_exists(data, target, "data", target) if isinstance(data, _tc.SFrame): image_column_name = _tkutl._find_only_image_column(data) target_column_name = target dataset_to_augment = data elif isinstance(data, _tc.Image): image_column_name = "image" target_column_name = "target" dataset_to_augment = _tc.SFrame({ image_column_name: [data], target_column_name: [target] }) else: raise TypeError("'data' must be of type SFrame or Image.") return dataset_to_augment, image_column_name, target_column_name
def check_one_shot_input(data, target, backgrounds): if backgrounds is not None and not(isinstance(backgrounds, _tc.SArray)): raise TypeError("'backgrounds' must be None or an SArray.") if (isinstance(backgrounds, _tc.SArray) and len(backgrounds) == 0): raise _ToolkitError('Unable to train with no background images') if not isinstance(target, str): raise TypeError("'target' must be of type string.") if isinstance(data, _tc.SFrame): _tkutl._raise_error_if_column_exists(data, target, "data", target) image_column_name = _tkutl._find_only_image_column(data) target_column_name = target dataset_to_augment = data elif isinstance(data, _tc.Image): image_column_name = "image" target_column_name = "target" dataset_to_augment = _tc.SFrame({image_column_name: [data], target_column_name: [target]}) else: raise TypeError("'data' must be of type SFrame or Image.") return dataset_to_augment, image_column_name, target_column_name
def create(dataset, annotations=None, feature=None, model='darknet-yolo', classes=None, max_iterations=0, verbose=True, **kwargs): """ Create a :class:`ObjectDetector` model. Parameters ---------- dataset : SFrame Input data. The columns named by the ``feature`` and ``annotations`` parameters will be extracted for training the detector. annotations : string Name of the column containing the object detection annotations. This column should be a list of dictionaries, with each dictionary representing a bounding box of an object instance. Here is an example of the annotations for a single image with two object instances:: [{'label': 'dog', 'type': 'rectangle', 'coordinates': {'x': 223, 'y': 198, 'width': 130, 'height': 230}}, {'label': 'cat', 'type': 'rectangle', 'coordinates': {'x': 40, 'y': 73, 'width': 80, 'height': 123}}] The value for `x` is the horizontal center of the box paired with `width` and `y` is the vertical center of the box paired with `height`. 'None' (the default) indicates the only list column in `dataset` should be used for the annotations. feature : string Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. model : string optional Object detection model to use: - "darknet-yolo" : Fast and medium-sized model classes : list optional List of strings containing the names of the classes of objects. Inferred from the data if not provided. max_iterations : int The number of training iterations. If 0, then it will be automatically be determined based on the amount of data you provide. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : ObjectDetector A trained :class:`ObjectDetector` model. See Also -------- ObjectDetector Examples -------- .. sourcecode:: python # Train an object detector model >>> model = turicreate.object_detector.create(data) # Make predictions on the training set and as column to the SFrame >>> data['predictions'] = model.predict(data) # Visualize predictions by generating a new column of marked up images >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions']) """ _raise_error_if_not_sframe(dataset, "dataset") from ._mx_detector import YOLOLoss as _YOLOLoss from ._model import tiny_darknet as _tiny_darknet from ._sframe_loader import SFrameDetectionIter as _SFrameDetectionIter from ._manual_scheduler import ManualScheduler as _ManualScheduler import mxnet as _mx if len(dataset) == 0: raise _ToolkitError('Unable to train on empty dataset') _numeric_param_check_range('max_iterations', max_iterations, 0, _six.MAXSIZE) start_time = _time.time() supported_detectors = ['darknet-yolo'] if feature is None: feature = _tkutl._find_only_image_column(dataset) if verbose: print("Using '%s' as feature column" % feature) if annotations is None: annotations = _tkutl._find_only_column_of_type(dataset, target_type=list, type_name='list', col_name='annotations') if verbose: print("Using '%s' as annotations column" % annotations) _raise_error_if_not_detection_sframe(dataset, feature, annotations, require_annotations=True) _tkutl._check_categorical_option_type('model', model, supported_detectors) base_model = model.split('-', 1)[0] ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]() params = { 'anchors': [ (1.0, 2.0), (1.0, 1.0), (2.0, 1.0), (2.0, 4.0), (2.0, 2.0), (4.0, 2.0), (4.0, 8.0), (4.0, 4.0), (8.0, 4.0), (8.0, 16.0), (8.0, 8.0), (16.0, 8.0), (16.0, 32.0), (16.0, 16.0), (32.0, 16.0), ], 'grid_shape': [13, 13], 'batch_size': 32, 'aug_resize': 0, 'aug_rand_crop': 0.9, 'aug_rand_pad': 0.9, 'aug_rand_gray': 0.0, 'aug_aspect_ratio': 1.25, 'aug_hue': 0.05, 'aug_brightness': 0.05, 'aug_saturation': 0.05, 'aug_contrast': 0.05, 'aug_horizontal_flip': True, 'aug_min_object_covered': 0, 'aug_min_eject_coverage': 0.5, 'aug_area_range': (.15, 2), 'aug_pca_noise': 0.0, 'aug_max_attempts': 20, 'aug_inter_method': 2, 'lmb_coord_xy': 10.0, 'lmb_coord_wh': 10.0, 'lmb_obj': 100.0, 'lmb_noobj': 5.0, 'lmb_class': 2.0, 'non_maximum_suppression_threshold': 0.45, 'rescore': True, 'clip_gradients': 0.025, 'learning_rate': 1.0e-3, 'shuffle': True, } if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError('Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) anchors = params['anchors'] num_anchors = len(anchors) num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=params['batch_size']) batch_size_each = params['batch_size'] // max(num_gpus, 1) # Note, this may slightly alter the batch size to fit evenly on the GPUs batch_size = max(num_gpus, 1) * batch_size_each grid_shape = params['grid_shape'] input_image_shape = (3, grid_shape[0] * ref_model.spatial_reduction, grid_shape[1] * ref_model.spatial_reduction) try: instances = (dataset.stack(annotations, new_column_name='_bbox', drop_na=True) .unpack('_bbox', limit=['label'])) except (TypeError, RuntimeError): # If this fails, the annotation format isinvalid at the coarsest level raise _ToolkitError("Annotations format is invalid. Must be a list of " "dictionaries containing 'label' and 'coordinates'.") num_images = len(dataset) num_instances = len(instances) if classes is None: classes = instances['_bbox.label'].unique() classes = sorted(classes) # Make a class-to-index look-up table class_to_index = {name: index for index, name in enumerate(classes)} num_classes = len(classes) # Create data loader loader = _SFrameDetectionIter(dataset, batch_size=batch_size, input_shape=input_image_shape[1:], output_shape=grid_shape, anchors=anchors, class_to_index=class_to_index, aug_params=params, shuffle=params['shuffle'], loader_type='augmented', feature_column=feature, annotations_column=annotations) # Predictions per anchor box: x/y + w/h + object confidence + class probs preds_per_box = 5 + num_classes output_size = preds_per_box * num_anchors ymap_shape = (batch_size_each,) + tuple(grid_shape) + (num_anchors, preds_per_box) net = _tiny_darknet(output_size=output_size) loss = _YOLOLoss(input_shape=input_image_shape[1:], output_shape=grid_shape, batch_size=batch_size_each, num_classes=num_classes, anchors=anchors, parameters=params) base_lr = params['learning_rate'] if max_iterations == 0: # Set number of iterations through a heuristic num_iterations_raw = 5000 * _np.sqrt(num_instances) / batch_size num_iterations = 1000 * max(1, int(round(num_iterations_raw / 1000))) else: num_iterations = max_iterations steps = [num_iterations // 2, 3 * num_iterations // 4, num_iterations] steps_and_factors = [(step, 10**(-i)) for i, step in enumerate(steps)] steps, factors = zip(*steps_and_factors) lr_scheduler = _ManualScheduler(step=steps, factor=factors) ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size) net_params = net.collect_params() net_params.initialize(_mx.init.Xavier(), ctx=ctx) net_params['conv7_weight'].initialize(_mx.init.Xavier(factor_type='avg'), ctx=ctx, force_reinit=True) net_params['conv8_weight'].initialize(_mx.init.Uniform(0.00005), ctx=ctx, force_reinit=True) # Initialize object confidence low, preventing an unnecessary adjustment # period toward conservative estimates bias = _np.zeros(output_size, dtype=_np.float32) bias[4::preds_per_box] -= 6 from ._mx_detector import ConstantArray net_params['conv8_bias'].initialize(ConstantArray(bias), ctx, force_reinit=True) # Take a subset and then load the rest of the parameters. It is possible to # do allow_missing=True directly on net_params. However, this will more # easily hide bugs caused by names getting out of sync. ref_model.available_parameters_subset(net_params).load(ref_model.model_path, ctx) options = {'learning_rate': base_lr, 'lr_scheduler': lr_scheduler, 'momentum': 0.9, 'wd': 0.00005, 'rescale_grad': 1.0} clip_grad = params.get('clip_gradients') if clip_grad: options['clip_gradient'] = clip_grad trainer = _mx.gluon.Trainer(net.collect_params(), 'sgd', options) iteration = 0 smoothed_loss = None last_time = 0 while iteration < num_iterations: loader.reset() for batch in loader: data = _mx.gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = _mx.gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) Ls = [] with _mx.autograd.record(): for x, y in zip(data, label): z = net(x) z0 = _mx.nd.transpose(z, [0, 2, 3, 1]).reshape(ymap_shape) L = loss(z0, y) Ls.append(L) for L in Ls: L.backward() cur_loss = _np.mean([L.asnumpy()[0] for L in Ls]) if smoothed_loss is None: smoothed_loss = cur_loss else: smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss trainer.step(1) iteration += 1 cur_time = _time.time() if verbose and cur_time > last_time + 10: print('{now:%Y-%m-%d %H:%M:%S} Training {cur_iter:{width}d}/{num_iterations:{width}d} Loss {loss:6.3f}'.format( now=_datetime.now(), cur_iter=iteration, num_iterations=num_iterations, loss=smoothed_loss, width=len(str(num_iterations)))) last_time = cur_time if iteration == num_iterations: break training_time = _time.time() - start_time # Save the model state = { '_model': net, '_class_to_index': class_to_index, '_training_time_as_string': _seconds_as_string(training_time), '_grid_shape': grid_shape, 'anchors': anchors, 'model': model, 'classes': classes, 'batch_size': batch_size, 'input_image_shape': input_image_shape, 'feature': feature, 'non_maximum_suppression_threshold': params['non_maximum_suppression_threshold'], 'annotations': annotations, 'num_classes': num_classes, 'num_examples': num_images, 'num_bounding_boxes': num_instances, 'training_time': training_time, 'training_epochs': loader.cur_epoch, 'training_iterations': iteration, 'max_iterations': max_iterations, 'training_loss': smoothed_loss, } return ObjectDetector(state)
def create( dataset, target, feature=None, model='resnet-50', l2_penalty=0.01, l1_penalty=0.0, solver='auto', feature_rescaling=True, convergence_threshold=_DEFAULT_SOLVER_OPTIONS['convergence_threshold'], step_size=_DEFAULT_SOLVER_OPTIONS['step_size'], lbfgs_memory_level=_DEFAULT_SOLVER_OPTIONS['lbfgs_memory_level'], max_iterations=_DEFAULT_SOLVER_OPTIONS['max_iterations'], class_weights=None, validation_set='auto', verbose=True, seed=None, batch_size=64): """ Create a :class:`ImageClassifier` model. Parameters ---------- dataset : SFrame Input data. The column named by the 'feature' parameter will be extracted for modeling. target : string, or int Name of the column containing the target variable. The values in this column must be of string or integer type. String target variables are automatically mapped to integers in the order in which they are provided. For example, a target variable with 'cat' and 'dog' as possible values is mapped to 0 and 1 respectively with 0 being the base class and 1 being the reference class. Use `model.classes` to retrieve the order in which the classes are mapped. feature : string, optional indicates that the SFrame has only column of Image type and that will Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. l2_penalty : float, optional Weight on l2 regularization of the model. The larger this weight, the more the model coefficients shrink toward 0. This introduces bias into the model but decreases variance, potentially leading to better predictions. The default value is 0.01; setting this parameter to 0 corresponds to unregularized logistic regression. See the ridge regression reference for more detail. l1_penalty : float, optional Weight on l1 regularization of the model. Like the l2 penalty, the higher the l1 penalty, the more the estimated coefficients shrink toward 0. The l1 penalty, however, completely zeros out sufficiently small coefficients, automatically indicating features that are not useful for the model. The default weight of 0 prevents any features from being discarded. See the LASSO regression reference for more detail. solver : string, optional Name of the solver to be used to solve the regression. See the references for more detail on each solver. Available solvers are: - *auto (default)*: automatically chooses the best solver for the data and model parameters. - *newton*: Newton-Raphson - *lbfgs*: limited memory BFGS - *fista*: accelerated gradient descent For this model, the Newton-Raphson method is equivalent to the iteratively re-weighted least squares algorithm. If the l1_penalty is greater than 0, use the 'fista' solver. The model is trained using a carefully engineered collection of methods that are automatically picked based on the input data. The ``newton`` method works best for datasets with plenty of examples and few features (long datasets). Limited memory BFGS (``lbfgs``) is a robust solver for wide datasets (i.e datasets with many coefficients). ``fista`` is the default solver for l1-regularized linear regression. The solvers are all automatically tuned and the default options should function well. See the solver options guide for setting additional parameters for each of the solvers. See the user guide for additional details on how the solver is chosen. (see `here <https://apple.github.io/turicreate/docs/userguide/supervised-learning/linear-regression.html>`_) feature_rescaling : boolean, optional Feature rescaling is an important pre-processing step that ensures that all features are on the same scale. An l2-norm rescaling is performed to make sure that all features are of the same norm. Categorical features are also rescaled by rescaling the dummy variables that are used to represent them. The coefficients are returned in original scale of the problem. This process is particularly useful when features vary widely in their ranges. convergence_threshold : float, optional Convergence is tested using variation in the training objective. The variation in the training objective is calculated using the difference between the objective values between two steps. Consider reducing this below the default value (0.01) for a more accurately trained model. Beware of overfitting (i.e a model that works well only on the training data) if this parameter is set to a very low value. lbfgs_memory_level : float, optional The L-BFGS algorithm keeps track of gradient information from the previous ``lbfgs_memory_level`` iterations. The storage requirement for each of these gradients is the ``num_coefficients`` in the problem. Increasing the ``lbfgs_memory_level ``can help improve the quality of the model trained. Setting this to more than ``max_iterations`` has the same effect as setting it to ``max_iterations``. model : string optional Uses a pretrained model to bootstrap an image classifier: - "resnet-50" : Uses a pretrained resnet model. Exported Core ML model will be ~90M. - "squeezenet_v1.1" : Uses a pretrained squeezenet model. Exported Core ML model will be ~4.7M. - "VisionFeaturePrint_Scene": Uses an OS internal feature extractor. Only on available on iOS 12.0+, macOS 10.14+ and tvOS 12.0+. Exported Core ML model will be ~41K. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. step_size : float, optional The starting step size to use for the ``fista`` solver. The default is set to 1.0, this is an aggressive setting. If the first iteration takes a considerable amount of time, reducing this parameter may speed up model training. class_weights : {dict, `auto`}, optional Weights the examples in the training data according to the given class weights. If set to `None`, all classes are supposed to have weight one. The `auto` mode set the class weight to be inversely proportional to number of examples in the training data with the given class. validation_set : SFrame, optional A dataset for monitoring the model's generalization performance. The format of this SFrame must be the same as the training set. By default this argument is set to 'auto' and a validation set is automatically sampled and used for progress printing. If validation_set is set to None, then no additional metrics are computed. The default value is 'auto'. max_iterations : int, optional The maximum number of allowed passes through the data. More passes over the data can result in a more accurately trained model. Consider increasing this (the default value is 10) if the training accuracy is low and the *Grad-Norm* in the display is large. verbose : bool, optional If True, prints progress updates and model details. seed : int, optional Seed for random number generation. Set this value to ensure that the same model is created every time. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. Returns ------- out : ImageClassifier A trained :class:`ImageClassifier` model. Examples -------- .. sourcecode:: python >>> model = turicreate.image_classifier.create(data, target='is_expensive') # Make predictions (in various forms) >>> predictions = model.predict(data) # predictions >>> predictions = model.classify(data) # predictions with confidence >>> predictions = model.predict_topk(data) # Top-5 predictions (multiclass) # Evaluate the model with ground truth data >>> results = model.evaluate(data) See Also -------- ImageClassifier """ start_time = _time.time() # Check model parameter allowed_models = list(_pre_trained_models.MODELS.keys()) if _mac_ver() >= (10, 14): allowed_models.append('VisionFeaturePrint_Scene') # Also, to make sure existing code doesn't break, replace incorrect name # with the correct name version if model == "VisionFeaturePrint_Screen": print( "WARNING: Correct spelling of model name is VisionFeaturePrint_Scene; VisionFeaturePrint_Screen will be removed in subsequent versions." ) model = "VisionFeaturePrint_Scene" _tkutl._check_categorical_option_type('model', model, allowed_models) # Check dataset parameter if len(dataset) == 0: raise _ToolkitError('Unable to train on empty dataset') if (feature is not None) and (feature not in dataset.column_names()): raise _ToolkitError("Image feature column '%s' does not exist" % feature) if target not in dataset.column_names(): raise _ToolkitError("Target column '%s' does not exist" % target) if (batch_size < 1): raise ValueError("'batch_size' must be greater than or equal to 1") if not (isinstance(validation_set, _tc.SFrame) or validation_set == 'auto' or validation_set is None): raise TypeError("Unrecognized value for 'validation_set'.") if feature is None: feature = _tkutl._find_only_image_column(dataset) feature_extractor = _image_feature_extractor._create_feature_extractor( model) # Extract features extracted_features = _tc.SFrame({ target: dataset[target], '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose, batch_size=batch_size), }) if isinstance(validation_set, _tc.SFrame): extracted_features_validation = _tc.SFrame({ target: validation_set[target], '__image_features__': feature_extractor.extract_features(validation_set, feature, verbose=verbose, batch_size=batch_size), }) else: extracted_features_validation = validation_set # Train a classifier using the extracted features extracted_features[target] = dataset[target] lr_model = _tc.logistic_classifier.create( extracted_features, features=['__image_features__'], target=target, max_iterations=max_iterations, validation_set=extracted_features_validation, seed=seed, verbose=verbose, l2_penalty=l2_penalty, l1_penalty=l1_penalty, solver=solver, feature_rescaling=feature_rescaling, convergence_threshold=convergence_threshold, step_size=step_size, lbfgs_memory_level=lbfgs_memory_level, class_weights=class_weights) # set input image shape if model in _pre_trained_models.MODELS: input_image_shape = _pre_trained_models.MODELS[model].input_image_shape else: # model == VisionFeaturePrint_Scene input_image_shape = (3, 299, 299) # Save the model state = { 'classifier': lr_model, 'model': model, 'max_iterations': max_iterations, 'feature_extractor': feature_extractor, 'input_image_shape': input_image_shape, 'target': target, 'feature': feature, 'num_features': 1, 'num_classes': lr_model.num_classes, 'classes': lr_model.classes, 'num_examples': lr_model.num_examples, 'training_time': _time.time() - start_time, 'training_loss': lr_model.training_loss, } return ImageClassifier(state)
def create(dataset, label=None, feature=None, model="resnet-50", verbose=True, batch_size=64): """ Create a :class:`ImageSimilarityModel` model. Parameters ---------- dataset : SFrame Input data. The column named by the 'feature' parameter will be extracted for modeling. label : string Name of the SFrame column with row labels to be used as uuid's to identify the data. If 'label' is set to None, row numbers are used to identify reference dataset rows when the model is queried. feature : string Name of the column containing the input images. 'None' (the default) indicates that the SFrame has only one column of Image type and that will be used for similarity. model: string, optional Uses a pretrained model to bootstrap an image similarity model - "resnet-50" : Uses a pretrained resnet model. - "squeezenet_v1.1" : Uses a pretrained squeezenet model. - "VisionFeaturePrint_Scene": Uses an OS internal feature extractor. Only on available on iOS 12.0+, macOS 10.14+ and tvOS 12.0+. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. verbose : bool, optional If True, print progress updates and model details. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. Returns ------- out : ImageSimilarityModel A trained :class:`ImageSimilarityModel` model. See Also -------- ImageSimilarityModel Examples -------- .. sourcecode:: python # Train an image similarity model >>> model = turicreate.image_similarity.create(data) # Query the model for similar images >>> similar_images = model.query(data) +-------------+-----------------+-------------------+------+ | query_label | reference_label | distance | rank | +-------------+-----------------+-------------------+------+ | 0 | 0 | 0.0 | 1 | | 0 | 519 | 12.5319706301 | 2 | | 0 | 1619 | 12.5563764596 | 3 | | 0 | 186 | 12.6132604915 | 4 | | 0 | 1809 | 12.9180964745 | 5 | | 1 | 1 | 2.02304872852e-06 | 1 | | 1 | 1579 | 11.4288186151 | 2 | | 1 | 1237 | 12.3764325949 | 3 | | 1 | 80 | 12.7264363676 | 4 | | 1 | 58 | 12.7675058558 | 5 | +-------------+-----------------+-------------------+------+ [500 rows x 4 columns] """ start_time = _time.time() if not isinstance(dataset, _tc.SFrame): raise TypeError("'dataset' must be of type SFrame.") # Check parameters allowed_models = list(_pre_trained_models.IMAGE_MODELS.keys()) if _mac_ver() >= (10, 14): allowed_models.append("VisionFeaturePrint_Scene") # Also, to make sure existing code doesn't break, replace incorrect name # with the correct name version if model == "VisionFeaturePrint_Screen": print( "WARNING: Correct spelling of model name is VisionFeaturePrint_Scene. VisionFeaturePrint_Screen will be removed in future releases." ) model = "VisionFeaturePrint_Scene" _tkutl._check_categorical_option_type("model", model, allowed_models) if len(dataset) == 0: raise _ToolkitError("Unable to train on empty dataset") if (label is not None) and (label not in dataset.column_names()): raise _ToolkitError("Row label column '%s' does not exist" % label) if (feature is not None) and (feature not in dataset.column_names()): raise _ToolkitError("Image feature column '%s' does not exist" % feature) if batch_size < 1: raise ValueError("'batch_size' must be greater than or equal to 1") # Set defaults if feature is None: feature = _tkutl._find_only_image_column(dataset) feature_extractor = _image_feature_extractor._create_feature_extractor( model) # Extract features extracted_features = _tc.SFrame({ "__image_features__": feature_extractor.extract_features(dataset, feature, verbose=verbose, batch_size=batch_size), }) # Train a similarity model using the extracted features if label is not None: extracted_features[label] = dataset[label] nn_model = _tc.nearest_neighbors.create( extracted_features, label=label, features=["__image_features__"], verbose=verbose, ) # set input image shape if model in _pre_trained_models.IMAGE_MODELS: input_image_shape = _pre_trained_models.IMAGE_MODELS[ model].input_image_shape else: # model == VisionFeaturePrint_Scene input_image_shape = (3, 299, 299) # Save the model state = { "similarity_model": nn_model, "model": model, "feature_extractor": feature_extractor, "input_image_shape": input_image_shape, "label": label, "feature": feature, "num_features": 1, "num_examples": nn_model.num_examples, "training_time": _time.time() - start_time, } return ImageSimilarityModel(state)
def annotate(data, image_column=None, annotation_column="annotations"): """ Annotate images using a GUI assisted application. When the GUI is terminated an SFrame with the representative images and annotations is returned. Parameters ---------- data : SArray | SFrame The data containing the input images. image_column: string, optional The name of the input column in the SFrame that contains the image that needs to be annotated. In case `data` is of type SArray, then the output SFrame contains a column (with this name) containing the input images. annotation_column : string, optional The column containing the annotations in the output SFrame. Returns ------- out : SFrame A new SFrame that contains the newly annotated data. Examples -------- >>> import turicreate as tc >>> images = tc.image_analysis.load_images("path/to/images") >>> print(images) +------------------------+--------------------------+ | path | image | +------------------------+--------------------------+ | /Users/username/Doc... | Height: 1712 Width: 1952 | | /Users/username/Doc... | Height: 1386 Width: 1000 | | /Users/username/Doc... | Height: 536 Width: 858 | | /Users/username/Doc... | Height: 1512 Width: 2680 | +------------------------+--------------------------+ [4 rows x 2 columns] >>> images = tc.image_classifier.annotate(images) >>> print(images) +------------------------+--------------------------+-------------------+ | path | image | annotations | +------------------------+--------------------------+-------------------+ | /Users/username/Doc... | Height: 1712 Width: 1952 | dog | | /Users/username/Doc... | Height: 1386 Width: 1000 | dog | | /Users/username/Doc... | Height: 536 Width: 858 | cat | | /Users/username/Doc... | Height: 1512 Width: 2680 | mouse | +------------------------+--------------------------+-------------------+ [4 rows x 3 columns] """ # Check Value of Column Variables if not isinstance(data, __tc.SFrame): raise TypeError('"data" must be of type SFrame.') # Check if Value is Empty if data.num_rows() == 0: raise Exception("input data cannot be empty") if image_column == None: image_column = _tkutl._find_only_image_column(data) if image_column == None: raise ValueError("'image_column' cannot be 'None'") if type(image_column) != str: raise TypeError("'image_column' has to be of type 'str'") if annotation_column == None: annotation_column = "" if type(annotation_column) != str: raise TypeError("'annotation_column' has to be of type 'str'") # Check Data Structure if type(data) == __tc.data_structures.image.Image: data = __tc.SFrame({image_column: __tc.SArray([data])}) elif type(data) == __tc.data_structures.sframe.SFrame: if data.shape[0] == 0: return data if not (data[image_column].dtype == __tc.data_structures.image.Image): raise TypeError("'data[image_column]' must be an SFrame or SArray") elif type(data) == __tc.data_structures.sarray.SArray: if data.shape[0] == 0: return data data = __tc.SFrame({image_column: data}) else: raise TypeError("'data' must be an SFrame or SArray") annotation_window = __tc.extensions.create_image_classification_annotation( data, [image_column], annotation_column) with _QuietProgress(False): annotation_window.annotate(_get_client_app_path()) return annotation_window.returnAnnotations()
def create(dataset, target, feature = None, model = 'resnet-50', validation_set='auto', max_iterations = 10, verbose = True, seed = None, batch_size=64): """ Create a :class:`ImageClassifier` model. Parameters ---------- dataset : SFrame Input data. The column named by the 'feature' parameter will be extracted for modeling. target : string, or int Name of the column containing the target variable. The values in this column must be of string or integer type. String target variables are automatically mapped to integers in the order in which they are provided. For example, a target variable with 'cat' and 'dog' as possible values is mapped to 0 and 1 respectively with 0 being the base class and 1 being the reference class. Use `model.classes` to retrieve the order in which the classes are mapped. feature : string, optional indicates that the SFrame has only column of Image type and that will Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. model : string optional Uses a pretrained model to bootstrap an image classifier: - "resnet-50" : Uses a pretrained resnet model. Exported Core ML model will be ~90M. - "squeezenet_v1.1" : Uses a pretrained squeezenet model. Exported Core ML model will be ~4.7M. - "VisionFeaturePrint_Screen": Uses an OS internal feature extractor. Only on available on iOS 12.0+, macOS 10.14+ and tvOS 12.0+. Exported Core ML model will be ~41K. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. validation_set : SFrame, optional A dataset for monitoring the model's generalization performance. The format of this SFrame must be the same as the training set. By default this argument is set to 'auto' and a validation set is automatically sampled and used for progress printing. If validation_set is set to None, then no additional metrics are computed. The default value is 'auto'. max_iterations : float, optional The maximum number of allowed passes through the data. More passes over the data can result in a more accurately trained model. Consider increasing this (the default value is 10) if the training accuracy is low and the *Grad-Norm* in the display is large. verbose : bool, optional If True, prints progress updates and model details. seed : int, optional Seed for random number generation. Set this value to ensure that the same model is created every time. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. Returns ------- out : ImageClassifier A trained :class:`ImageClassifier` model. Examples -------- .. sourcecode:: python >>> model = turicreate.image_classifier.create(data, target='is_expensive') # Make predictions (in various forms) >>> predictions = model.predict(data) # predictions >>> predictions = model.classify(data) # predictions with confidence >>> predictions = model.predict_topk(data) # Top-5 predictions (multiclass) # Evaluate the model with ground truth data >>> results = model.evaluate(data) See Also -------- ImageClassifier """ start_time = _time.time() # Check model parameter allowed_models = list(_pre_trained_models.MODELS.keys()) if _mac_ver() >= (10,14): allowed_models.append('VisionFeaturePrint_Screen') _tkutl._check_categorical_option_type('model', model, allowed_models) # Check dataset parameter if len(dataset) == 0: raise _ToolkitError('Unable to train on empty dataset') if (feature is not None) and (feature not in dataset.column_names()): raise _ToolkitError("Image feature column '%s' does not exist" % feature) if target not in dataset.column_names(): raise _ToolkitError("Target column '%s' does not exist" % target) if(batch_size < 1): raise ValueError("'batch_size' must be greater than or equal to 1") if not (isinstance(validation_set, _tc.SFrame) or validation_set == 'auto' or validation_set is None): raise TypeError("Unrecognized value for 'validation_set'.") if feature is None: feature = _tkutl._find_only_image_column(dataset) feature_extractor = _image_feature_extractor._create_feature_extractor(model) # Extract features extracted_features = _tc.SFrame({ target: dataset[target], '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose, batch_size=batch_size), }) if isinstance(validation_set, _tc.SFrame): extracted_features_validation = _tc.SFrame({ target: validation_set[target], '__image_features__': feature_extractor.extract_features(validation_set, feature, verbose=verbose, batch_size=batch_size), }) else: extracted_features_validation = validation_set # Train a classifier using the extracted features extracted_features[target] = dataset[target] lr_model = _tc.logistic_classifier.create(extracted_features, features=['__image_features__'], target=target, max_iterations=max_iterations, validation_set=extracted_features_validation, seed=seed, verbose=verbose) # set input image shape if model in _pre_trained_models.MODELS: input_image_shape = _pre_trained_models.MODELS[model].input_image_shape else: # model == VisionFeaturePrint_Screen input_image_shape = (3, 299, 299) # Save the model state = { 'classifier': lr_model, 'model': model, 'max_iterations': max_iterations, 'feature_extractor': feature_extractor, 'input_image_shape': input_image_shape, 'target': target, 'feature': feature, 'num_features': 1, 'num_classes': lr_model.num_classes, 'classes': lr_model.classes, 'num_examples': lr_model.num_examples, 'training_time': _time.time() - start_time, 'training_loss': lr_model.training_loss, } return ImageClassifier(state)
def create(dataset, target, feature=None, model='resnet-50', max_iterations=10, verbose=True, seed=None): """ Create a :class:`ImageClassifier` model. Parameters ---------- dataset : SFrame Input data. The column named by the 'feature' parameter will be extracted for modeling. target : string, or int Name of the column containing the target variable. The values in this column must be of string or integer type. String target variables are automatically mapped to integers in the order in which they are provided. For example, a target variable with 'cat' and 'dog' as possible values is mapped to 0 and 1 respectively with 0 being the base class and 1 being the reference class. Use `model.classes` to retrieve the order in which the classes are mapped. feature : string, optional indicates that the SFrame has only column of Image type and that will Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. model : string optional Uses a pretrained model to bootstrap an image classifier - "resnet-50" : Uses a pretrained resnet model. - "squeezenet_v1.1" : Uses a pretrained squeezenet model. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. max_iterations : float, optional The maximum number of allowed passes through the data. More passes over the data can result in a more accurately trained model. Consider increasing this (the default value is 10) if the training accuracy is low and the *Grad-Norm* in the display is large. verbose : bool, optional If True, prints progress updates and model details. seed : int, optional Seed for random number generation. Set this value to ensure that the same model is created every time. Returns ------- out : ImageClassifier A trained :class:`ImageClassifier` model. Examples -------- .. sourcecode:: python >>> model = turicreate.image_classifier.create(data, target='is_expensive') # Make predictions (in various forms) >>> predictions = model.predict(data) # predictions >>> predictions = model.classify(data) # predictions with confidence >>> predictions = model.predict_topk(data) # Top-5 predictions (multiclass) # Evaluate the model with ground truth data >>> results = model.evaluate(data) See Also -------- ImageClassifier """ start_time = _time.time() # Check parameters _tkutl._check_categorical_option_type('model', model, _pre_trained_models.MODELS.keys()) if len(dataset) == 0: raise _ToolkitError('Unable to train on empty dataset') if (feature is not None) and (feature not in dataset.column_names()): raise _ToolkitError("Image feature column '%s' does not exist" % feature) if target not in dataset.column_names(): raise _ToolkitError("Target column '%s' does not exist" % target) if feature is None: feature = _tkutl._find_only_image_column(dataset) # Load pre-trained model & feature extractor ptModel = _pre_trained_models.MODELS[model]() feature_extractor = _image_feature_extractor.MXFeatureExtractor(ptModel) # Extract features extracted_features = _tc.SFrame({ target: dataset[target], '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose), }) # Train a classifier using the extracted features extracted_features[target] = dataset[target] lr_model = _tc.logistic_classifier.create(extracted_features, features=['__image_features__'], target=target, max_iterations=max_iterations, seed=seed, verbose=verbose) # Save the model state = { 'classifier': lr_model, 'model': model, 'max_iterations': max_iterations, 'feature_extractor': feature_extractor, 'input_image_shape': ptModel.input_image_shape, 'target': target, 'feature': feature, 'num_features': 1, 'num_classes': lr_model.num_classes, 'classes': lr_model.classes, 'num_examples': lr_model.num_examples, 'training_time': _time.time() - start_time, 'training_loss': lr_model.training_loss, } return ImageClassifier(state)
def create(dataset, label=None, feature=None, model='resnet-50', verbose=True): """ Create a :class:`ImageSimilarityModel` model. Parameters ---------- dataset : SFrame Input data. The column named by the 'feature' parameter will be extracted for modeling. label : string Name of the SFrame column with row labels to be used as uuid's to identify the data. If 'label' is set to None, row numbers are used to identify reference dataset rows when the model is queried. feature : string indicates that the SFrame has only column of Image type and that will Name of the column containing the input images. 'None' (the default) be used for similarity. model: string, optional Uses a pretrained model to bootstrap an image similarity model - "resnet-50" : Uses a pretrained resnet model. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : ImageSimilarityModel A trained :class:`ImageSimilarityModel` model. See Also -------- ImageSimilarityModel Examples -------- .. sourcecode:: python # Train an image similarity model >>> model = turicreate.image_similarity.create(data) # Query the model for similar images >>> similar_images = model.query(data) +-------------+-----------------+-------------------+------+ | query_label | reference_label | distance | rank | +-------------+-----------------+-------------------+------+ | 0 | 0 | 0.0 | 1 | | 0 | 519 | 12.5319706301 | 2 | | 0 | 1619 | 12.5563764596 | 3 | | 0 | 186 | 12.6132604915 | 4 | | 0 | 1809 | 12.9180964745 | 5 | | 1 | 1 | 2.02304872852e-06 | 1 | | 1 | 1579 | 11.4288186151 | 2 | | 1 | 1237 | 12.3764325949 | 3 | | 1 | 80 | 12.7264363676 | 4 | | 1 | 58 | 12.7675058558 | 5 | +-------------+-----------------+-------------------+------+ [500 rows x 4 columns] """ start_time = _time.time() # Check parameters _tkutl._check_categorical_option_type('model', model, _pre_trained_models.MODELS.keys()) if len(dataset) == 0: raise _ToolkitError('Unable to train on empty dataset') if (label is not None) and (label not in dataset.column_names()): raise _ToolkitError("Row label column '%s' does not exist" % label) if (feature is not None) and (feature not in dataset.column_names()): raise _ToolkitError("Image feature column '%s' does not exist" % feature) # Set defaults if feature is None: feature = _tkutl._find_only_image_column(dataset) # Load pre-trained model & feature extractor ptModel = _pre_trained_models.MODELS[model]() feature_extractor = _image_feature_extractor.MXFeatureExtractor(ptModel) # Extract features extracted_features = _tc.SFrame({ '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose), }) # Train a similarity model using the extracted features if label is not None: extracted_features[label] = dataset[label] nn_model = _tc.nearest_neighbors.create(extracted_features, label=label, features=['__image_features__'], verbose=verbose) # Save the model state = { 'similarity_model': nn_model, 'model': model, 'feature_extractor': feature_extractor, 'input_image_shape': ptModel.input_image_shape, 'label': label, 'feature': feature, 'num_features': 1, 'num_examples': nn_model.num_examples, 'training_time': _time.time() - start_time, } return ImageSimilarityModel(state)
def stylize(self, images, style=None, verbose=True, max_size=800, batch_size = 4): """ Stylize an SFrame of Images given a style index or a list of styles. Parameters ---------- images : SFrame | Image A dataset that has the same content image column that was used during training. style : int or list, optional The selected style or list of styles to use on the ``images``. If `None`, all styles will be applied to each image in ``images``. verbose : bool, optional If True, print progress updates. max_size : int or tuple Max input image size that will not get resized during stylization. Images with a side larger than this value, will be scaled down, due to time and memory constraints. If tuple, interpreted as (max width, max height). Without resizing, larger input images take more time to stylize. Resizing can effect the quality of the final stylized image. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. Returns ------- out : SFrame or SArray or turicreate.Image If ``style`` is a list, an SFrame is always returned. If ``style`` is a single integer, the output type will match the input type (Image, SArray, or SFrame). See Also -------- create Examples -------- >>> image = tc.Image("/path/to/image.jpg") >>> stylized_images = model.stylize(image, style=[0, 1]) Data: +--------+-------+------------------------+ | row_id | style | stylized_image | +--------+-------+------------------------+ | 0 | 0 | Height: 256 Width: 256 | | 0 | 1 | Height: 256 Width: 256 | +--------+-------+------------------------+ [2 rows x 3 columns] >>> images = tc.image_analysis.load_images('/path/to/images') >>> stylized_images = model.stylize(images) Data: +--------+-------+------------------------+ | row_id | style | stylized_image | +--------+-------+------------------------+ | 0 | 0 | Height: 256 Width: 256 | | 0 | 1 | Height: 256 Width: 256 | | 0 | 2 | Height: 256 Width: 256 | | 0 | 3 | Height: 256 Width: 256 | | 1 | 0 | Height: 640 Width: 648 | | 1 | 1 | Height: 640 Width: 648 | | 1 | 2 | Height: 640 Width: 648 | | 1 | 3 | Height: 640 Width: 648 | +--------+-------+------------------------+ [8 rows x 3 columns] """ if(batch_size < 1): raise _ToolkitError("'batch_size' must be greater than or equal to 1") from ._sframe_loader import SFrameSTIter as _SFrameSTIter import mxnet as _mx from mxnet import gluon as _gluon set_of_all_idx = self._style_indices() style, single_style = self._style_input_check(style) if isinstance(max_size, _six.integer_types): input_shape = (max_size, max_size) else: # Outward-facing, we use (width, height), but internally we use # (height, width) input_shape = max_size[::-1] images, unpack = self._canonize_content_input(images, single_style=single_style) dataset_size = len(images) output_size = dataset_size * len(style) batch_size_each = min(batch_size, output_size) num_mxnet_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=batch_size_each) if num_mxnet_gpus == 0: # CPU processing prefers native size to prevent stylizing # unnecessary regions batch_size_each = 1 loader_type = 'favor-native-size' else: # GPU processing prefers batches of same size, using padding # for smaller images loader_type = 'pad' self._model.batch_size = batch_size_each self._model.hybridize() ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size_each) batch_size = max(num_mxnet_gpus, 1) * batch_size_each last_time = 0 if dataset_size == 0: raise _ToolkitError("SFrame cannot be empty") content_feature = _tkutl._find_only_image_column(images) _raise_error_if_not_training_sframe(images, content_feature) max_h = 0 max_w = 0 oversized_count = 0 for img in images[content_feature]: if img.height > input_shape[0] or img.width > input_shape[1]: oversized_count += 1 max_h = max(img.height, max_h) max_w = max(img.width, max_w) if input_shape[0] > max_h: input_shape = (max_h, input_shape[1]) if input_shape[1] > max_w: input_shape = (input_shape[0], max_w) # If we find large images, let's switch to sequential iterator # pre-processing, to prevent memory issues. sequential = max(max_h, max_w) > 2000 if verbose and output_size != 1: print('Stylizing {} image(s) using {} style(s)'.format(dataset_size, len(style))) if oversized_count > 0: print('Scaling down {} image(s) exceeding {}x{}'.format(oversized_count, input_shape[1], input_shape[0])) content_images_loader = _SFrameSTIter(images, batch_size, shuffle=False, feature_column=content_feature, input_shape=input_shape, num_epochs=1, loader_type=loader_type, repeat_each_image=len(style), sequential=sequential) sb = _tc.SFrameBuilder([int, int, _tc.Image], column_names=['row_id', 'style', 'stylized_{}'.format(self.content_feature)]) count = 0 for i, batch in enumerate(content_images_loader): if loader_type == 'favor-native-size': c_data = [batch.data[0][0].expand_dims(0)] else: c_data = _gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) indices_data = _gluon.utils.split_and_load(_mx.nd.array(batch.repeat_indices, dtype=_np.int64), ctx_list=ctx, batch_axis=0) outputs = [] for b_img, b_indices in zip(c_data, indices_data): mx_style = _mx.nd.array(style, dtype=_np.int64, ctx=b_indices.context) b_batch_styles = mx_style[b_indices] output = self._model(b_img, b_batch_styles) outputs.append(output) image_data = _np.concatenate([ (output.asnumpy().transpose(0, 2, 3, 1) * 255).astype(_np.uint8) for output in outputs], axis=0) batch_styles = [style[idx] for idx in batch.repeat_indices] for b in range(batch_size - (batch.pad or 0)): image = image_data[b] # Crop to remove added padding crop = batch.crop[b] cropped_image = image[crop[0]:crop[1], crop[2]:crop[3]] tc_img = _tc.Image(_image_data=cropped_image.tobytes(), _width=cropped_image.shape[1], _height=cropped_image.shape[0], _channels=cropped_image.shape[2], _format_enum=2, _image_data_size=cropped_image.size) sb.append([batch.indices[b], batch_styles[b], tc_img]) count += 1 cur_time = _time.time() if verbose and output_size != 1 and (cur_time > last_time + 10 or count == output_size): print('Stylizing {curr_image:{width}d}/{max_n:{width}d}'. format(curr_image=count, max_n=output_size, width=len(str(output_size)))) last_time = cur_time return unpack(sb.close())
def create(style_dataset, content_dataset, style_feature=None, content_feature=None, max_iterations=None, model='resnet-16', verbose=True, batch_size = 6, **kwargs): """ Create a :class:`StyleTransfer` model. Parameters ---------- style_dataset: SFrame Input style images. The columns named by the ``style_feature`` parameters will be extracted for training the model. content_dataset : SFrame Input content images. The columns named by the ``content_feature`` parameters will be extracted for training the model. style_feature: string Name of the column containing the input images in style SFrame. 'None' (the default) indicates the only image column in the style SFrame should be used as the feature. content_feature: string Name of the column containing the input images in content SFrame. 'None' (the default) indicates the only image column in the content SFrame should be used as the feature. max_iterations : int The number of training iterations. If 'None' (the default), then it will be automatically determined based on the amount of data you provide. model : string optional Style transfer model to use: - "resnet-16" : Fast and small-sized residual network that uses VGG-16 as reference network during training. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve training throughput. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : StyleTransfer A trained :class:`StyleTransfer` model. See Also -------- StyleTransfer Examples -------- .. sourcecode:: python # Create datasets >>> content_dataset = turicreate.image_analysis.load_images('content_images/') >>> style_dataset = turicreate.image_analysis.load_images('style_images/') # Train a style transfer model >>> model = turicreate.style_transfer.create(content_dataset, style_dataset) # Stylize an image on all styles >>> stylized_images = model.stylize(data) # Visualize the stylized images >>> stylized_images.explore() """ if len(style_dataset) == 0: raise _ToolkitError("style_dataset SFrame cannot be empty") if len(content_dataset) == 0: raise _ToolkitError("content_dataset SFrame cannot be empty") if(batch_size < 1): raise _ToolkitError("'batch_size' must be greater than or equal to 1") from ._sframe_loader import SFrameSTIter as _SFrameSTIter import mxnet as _mx if style_feature is None: style_feature = _tkutl._find_only_image_column(style_dataset) if content_feature is None: content_feature = _tkutl._find_only_image_column(content_dataset) if verbose: print("Using '{}' in style_dataset as feature column and using " "'{}' in content_dataset as feature column".format(style_feature, content_feature)) _raise_error_if_not_training_sframe(style_dataset, style_feature) _raise_error_if_not_training_sframe(content_dataset, content_feature) params = { 'batch_size': batch_size, 'vgg16_content_loss_layer': 2, # conv3_3 layer 'lr': 0.001, 'content_loss_mult': 1.0, 'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4], # conv 1-4 layers 'finetune_all_params': False, 'print_loss_breakdown': False, 'input_shape': (256, 256), 'training_content_loader_type': 'stretch', 'use_augmentation': False, 'sequential_image_processing': False, # Only used if use_augmentaion is True 'aug_resize': 0, 'aug_min_object_covered': 0, 'aug_rand_crop': 0.9, 'aug_rand_pad': 0.9, 'aug_rand_gray': 0.0, 'aug_aspect_ratio': 1.25, 'aug_hue': 0.05, 'aug_brightness': 0.05, 'aug_saturation': 0.05, 'aug_contrast': 0.05, 'aug_horizontal_flip': True, 'aug_area_range': (.05, 1.5), 'aug_pca_noise': 0.0, 'aug_max_attempts': 20, 'aug_inter_method': 2, } if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError('Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) _content_loss_mult = params['content_loss_mult'] _style_loss_mult = params['style_loss_mult'] num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=params['batch_size']) batch_size_each = params['batch_size'] // max(num_gpus, 1) batch_size = max(num_gpus, 1) * batch_size_each input_shape = params['input_shape'] iterations = 0 if max_iterations is None: max_iterations = len(style_dataset) * 500 + 2000 if verbose: print('Setting max_iterations to be {}'.format(max_iterations)) # data loader if params['use_augmentation']: content_loader_type = '%s-with-augmentation' % params['training_content_loader_type'] else: content_loader_type = params['training_content_loader_type'] content_images_loader = _SFrameSTIter(content_dataset, batch_size, shuffle=True, feature_column=content_feature, input_shape=input_shape, loader_type=content_loader_type, aug_params=params, sequential=params['sequential_image_processing']) ctx = _mxnet_utils.get_mxnet_context(max_devices=params['batch_size']) num_styles = len(style_dataset) # TRANSFORMER MODEL from ._model import Transformer as _Transformer transformer_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[model]().get_model_path() transformer = _Transformer(num_styles, batch_size_each) transformer.collect_params().initialize(ctx=ctx) transformer.load_params(transformer_model_path, ctx, allow_missing=True) # For some reason, the transformer fails to hybridize for training, so we # avoid this until resolved # transformer.hybridize() # VGG MODEL from ._model import Vgg16 as _Vgg16 vgg_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS['Vgg16']().get_model_path() vgg_model = _Vgg16() vgg_model.collect_params().initialize(ctx=ctx) vgg_model.load_params(vgg_model_path, ctx=ctx, ignore_extra=True) vgg_model.hybridize() # TRAINER from mxnet import gluon as _gluon from ._model import gram_matrix as _gram_matrix if params['finetune_all_params']: trainable_params = transformer.collect_params() else: trainable_params = transformer.collect_params('.*gamma|.*beta') trainer = _gluon.Trainer(trainable_params, 'adam', {'learning_rate': params['lr']}) mse_loss = _gluon.loss.L2Loss() start_time = _time.time() smoothed_loss = None last_time = 0 cuda_gpus = _mxnet_utils.get_gpus_in_use(max_devices=params['batch_size']) num_mxnet_gpus = len(cuda_gpus) if verbose: # Estimate memory usage (based on experiments) cuda_mem_req = 260 + batch_size_each * 880 + num_styles * 1.4 _tkutl._print_neural_compute_device(cuda_gpus=cuda_gpus, use_mps=False, cuda_mem_req=cuda_mem_req, has_mps_impl=False) # # Pre-compute gram matrices for style images # if verbose: print('Analyzing visual features of the style images') style_images_loader = _SFrameSTIter(style_dataset, batch_size, shuffle=False, num_epochs=1, feature_column=style_feature, input_shape=input_shape, loader_type='stretch', sequential=params['sequential_image_processing']) num_layers = len(params['style_loss_mult']) gram_chunks = [[] for _ in range(num_layers)] for s_batch in style_images_loader: s_data = _gluon.utils.split_and_load(s_batch.data[0], ctx_list=ctx, batch_axis=0) for s in s_data: vgg16_s = _vgg16_data_prep(s) ret = vgg_model(vgg16_s) grams = [_gram_matrix(x) for x in ret] for i, gram in enumerate(grams): if gram.context != _mx.cpu(0): gram = gram.as_in_context(_mx.cpu(0)) gram_chunks[i].append(gram) del style_images_loader grams = [ # The concatenated styles may be padded, so we slice overflow _mx.nd.concat(*chunks, dim=0)[:num_styles] for chunks in gram_chunks ] # A context->grams look-up table, where all the gram matrices have been # distributed ctx_grams = {} if ctx[0] == _mx.cpu(0): ctx_grams[_mx.cpu(0)] = grams else: for ctx0 in ctx: ctx_grams[ctx0] = [gram.as_in_context(ctx0) for gram in grams] # # Training loop # vgg_content_loss_layer = params['vgg16_content_loss_layer'] rs = _np.random.RandomState(1234) while iterations < max_iterations: content_images_loader.reset() for c_batch in content_images_loader: c_data = _gluon.utils.split_and_load(c_batch.data[0], ctx_list=ctx, batch_axis=0) Ls = [] curr_content_loss = [] curr_style_loss = [] with _mx.autograd.record(): for c in c_data: # Randomize styles to train indices = _mx.nd.array(rs.randint(num_styles, size=batch_size_each), dtype=_np.int64, ctx=c.context) # Generate pastiche p = transformer(c, indices) # mean subtraction vgg16_p = _vgg16_data_prep(p) vgg16_c = _vgg16_data_prep(c) # vgg forward p_vgg_outputs = vgg_model(vgg16_p) c_vgg_outputs = vgg_model(vgg16_c) c_content_layer = c_vgg_outputs[vgg_content_loss_layer] p_content_layer = p_vgg_outputs[vgg_content_loss_layer] # Calculate Loss # Style Loss between style image and stylized image # Ls = sum of L2 norm of gram matrix of vgg16's conv layers style_losses = [] for gram, p_vgg_output, style_loss_mult in zip(ctx_grams[c.context], p_vgg_outputs, _style_loss_mult): gram_s_vgg = gram[indices] gram_p_vgg = _gram_matrix(p_vgg_output) style_losses.append(style_loss_mult * mse_loss(gram_s_vgg, gram_p_vgg)) style_loss = _mx.nd.add_n(*style_losses) # Content Loss between content image and stylized image # Lc = L2 norm at a single layer in vgg16 content_loss = _content_loss_mult * mse_loss(c_content_layer, p_content_layer) curr_content_loss.append(content_loss) curr_style_loss.append(style_loss) # Divide loss by large number to get into a more legible # range total_loss = (content_loss + style_loss) / 10000.0 Ls.append(total_loss) for L in Ls: L.backward() cur_loss = _np.mean([L.asnumpy()[0] for L in Ls]) if smoothed_loss is None: smoothed_loss = cur_loss else: smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss iterations += 1 trainer.step(batch_size) if verbose and iterations == 1: # Print progress table header column_names = ['Iteration', 'Loss', 'Elapsed Time'] num_columns = len(column_names) column_width = max(map(lambda x: len(x), column_names)) + 2 hr = '+' + '+'.join(['-' * column_width] * num_columns) + '+' print(hr) print(('| {:<{width}}' * num_columns + '|').format(*column_names, width=column_width-1)) print(hr) cur_time = _time.time() if verbose and (cur_time > last_time + 10 or iterations == max_iterations): # Print progress table row elapsed_time = cur_time - start_time print("| {cur_iter:<{width}}| {loss:<{width}.3f}| {time:<{width}.1f}|".format( cur_iter = iterations, loss = smoothed_loss, time = elapsed_time , width = column_width-1)) if params['print_loss_breakdown']: print_content_loss = _np.mean([L.asnumpy()[0] for L in curr_content_loss]) print_style_loss = _np.mean([L.asnumpy()[0] for L in curr_style_loss]) print('Total Loss: {:6.3f} | Content Loss: {:6.3f} | Style Loss: {:6.3f}'.format(cur_loss, print_content_loss, print_style_loss)) last_time = cur_time if iterations == max_iterations: print(hr) break training_time = _time.time() - start_time style_sa = style_dataset[style_feature] idx_column = _tc.SArray(range(0, style_sa.shape[0])) style_sframe = _tc.SFrame({"style": idx_column, style_feature: style_sa}) # Save the model state state = { '_model': transformer, '_training_time_as_string': _seconds_as_string(training_time), 'batch_size': batch_size, 'num_styles': num_styles, 'model': model, 'input_image_shape': input_shape, 'styles': style_sframe, 'num_content_images': len(content_dataset), 'training_time': training_time, 'max_iterations': max_iterations, 'training_iterations': iterations, 'training_epochs': content_images_loader.cur_epoch, 'style_feature': style_feature, 'content_feature': content_feature, "_index_column": "style", 'training_loss': smoothed_loss, } return StyleTransfer(state)
def create(dataset, annotations=None, feature=None, model="darknet-yolo", classes=None, batch_size=0, max_iterations=0, verbose=True, grid_shape=[13, 13], **kwargs): """ Create a :class:`ObjectDetector` model. Parameters ---------- dataset : SFrame Input data. The columns named by the ``feature`` and ``annotations`` parameters will be extracted for training the detector. annotations : string Name of the column containing the object detection annotations. This column should be a list of dictionaries (or a single dictionary), with each dictionary representing a bounding box of an object instance. Here is an example of the annotations for a single image with two object instances:: [{'label': 'dog', 'type': 'rectangle', 'coordinates': {'x': 223, 'y': 198, 'width': 130, 'height': 230}}, {'label': 'cat', 'type': 'rectangle', 'coordinates': {'x': 40, 'y': 73, 'width': 80, 'height': 123}}] The value for `x` is the horizontal center of the box paired with `width` and `y` is the vertical center of the box paired with `height`. 'None' (the default) indicates the only list column in `dataset` should be used for the annotations. feature : string Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. model : string optional Object detection model to use: - "darknet-yolo" : Fast and medium-sized model grid_shape : array optional Shape of the grid used for object detection. Higher values increase precision for small objects, but at a higher computational cost - [13, 13] : Default grid value for a Fast and medium-sized model classes : list optional List of strings containing the names of the classes of objects. Inferred from the data if not provided. batch_size: int The number of images per training iteration. If 0, then it will be automatically determined based on resource availability. max_iterations : int The number of training iterations. If 0, then it will be automatically be determined based on the amount of data you provide. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : ObjectDetector A trained :class:`ObjectDetector` model. See Also -------- ObjectDetector Examples -------- .. sourcecode:: python # Train an object detector model >>> model = turicreate.object_detector.create(data) # Make predictions on the training set and as column to the SFrame >>> data['predictions'] = model.predict(data) # Visualize predictions by generating a new column of marked up images >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions']) """ _raise_error_if_not_sframe(dataset, "dataset") if len(dataset) == 0: raise _ToolkitError("Unable to train on empty dataset") _numeric_param_check_range("max_iterations", max_iterations, 0, _six.MAXSIZE) start_time = _time.time() supported_detectors = ["darknet-yolo"] if feature is None: feature = _tkutl._find_only_image_column(dataset) if verbose: print("Using '%s' as feature column" % feature) if annotations is None: annotations = _tkutl._find_only_column_of_type( dataset, target_type=[list, dict], type_name="list", col_name="annotations") if verbose: print("Using '%s' as annotations column" % annotations) _raise_error_if_not_detection_sframe(dataset, feature, annotations, require_annotations=True) _tkutl._handle_missing_values(dataset, feature, "dataset") _tkutl._check_categorical_option_type("model", model, supported_detectors) base_model = model.split("-", 1)[0] ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]() pretrained_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[ "darknet_mlmodel"]() pretrained_model_path = pretrained_model.get_model_path() params = { "anchors": [ (1.0, 2.0), (1.0, 1.0), (2.0, 1.0), (2.0, 4.0), (2.0, 2.0), (4.0, 2.0), (4.0, 8.0), (4.0, 4.0), (8.0, 4.0), (8.0, 16.0), (8.0, 8.0), (16.0, 8.0), (16.0, 32.0), (16.0, 16.0), (32.0, 16.0), ], "grid_shape": grid_shape, "aug_resize": 0, "aug_rand_crop": 0.9, "aug_rand_pad": 0.9, "aug_rand_gray": 0.0, "aug_aspect_ratio": 1.25, "aug_hue": 0.05, "aug_brightness": 0.05, "aug_saturation": 0.05, "aug_contrast": 0.05, "aug_horizontal_flip": True, "aug_min_object_covered": 0, "aug_min_eject_coverage": 0.5, "aug_area_range": (0.15, 2), "aug_pca_noise": 0.0, "aug_max_attempts": 20, "aug_inter_method": 2, "lmb_coord_xy": 10.0, "lmb_coord_wh": 10.0, "lmb_obj": 100.0, "lmb_noobj": 5.0, "lmb_class": 2.0, "non_maximum_suppression_threshold": 0.45, "rescore": True, "clip_gradients": 0.025, "weight_decay": 0.0005, "sgd_momentum": 0.9, "learning_rate": 1.0e-3, "shuffle": True, "mps_loss_mult": 8, # This large buffer size (8 batches) is an attempt to mitigate against # the SFrame shuffle operation that can occur after each epoch. "io_thread_buffer_size": 8, "mlmodel_path": pretrained_model_path, } # create tensorflow model here import turicreate.toolkits.libtctensorflow if classes == None: classes = [] _raise_error_if_not_iterable(classes) _raise_error_if_not_iterable(grid_shape) grid_shape = [int(x) for x in grid_shape] assert len(grid_shape) == 2 tf_config = { "grid_height": params["grid_shape"][0], "grid_width": params["grid_shape"][1], "mlmodel_path": params["mlmodel_path"], "classes": classes, "compute_final_metrics": False, "verbose": verbose, "model": "darknet-yolo", } # If batch_size or max_iterations = 0, they will be automatically # generated in C++. if batch_size > 0: tf_config["batch_size"] = batch_size if max_iterations > 0: tf_config["max_iterations"] = max_iterations model = _tc.extensions.object_detector() model.train( data=dataset, annotations_column_name=annotations, image_column_name=feature, options=tf_config, ) return ObjectDetector(model_proxy=model, name="object_detector")
def create(style_dataset, content_dataset, style_feature=None, content_feature=None, max_iterations=None, model='resnet-16', verbose=True, batch_size=1, **kwargs): """ Create a :class:`StyleTransfer` model. Parameters ---------- style_dataset: SFrame Input style images. The columns named by the ``style_feature`` parameters will be extracted for training the model. content_dataset : SFrame Input content images. The columns named by the ``content_feature`` parameters will be extracted for training the model. style_feature: string Name of the column containing the input images in style SFrame. 'None' (the default) indicates the only image column in the style SFrame should be used as the feature. content_feature: string Name of the column containing the input images in content SFrame. 'None' (the default) indicates the only image column in the content SFrame should be used as the feature. max_iterations : int The number of training iterations. If 'None' (the default), then it will be automatically determined based on the amount of data you provide. model : string optional Style transfer model to use: - "resnet-16" : Fast and small-sized residual network that uses VGG-16 as reference network during training. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve training throughput. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : StyleTransfer A trained :class:`StyleTransfer` model. See Also -------- StyleTransfer Examples -------- .. sourcecode:: python # Create datasets >>> content_dataset = turicreate.image_analysis.load_images('content_images/') >>> style_dataset = turicreate.image_analysis.load_images('style_images/') # Train a style transfer model >>> model = turicreate.style_transfer.create(content_dataset, style_dataset) # Stylize an image on all styles >>> stylized_images = model.stylize(data) # Visualize the stylized images >>> stylized_images.explore() """ if not isinstance(style_dataset, _tc.SFrame): raise TypeError('"style_dataset" must be of type SFrame.') if not isinstance(content_dataset, _tc.SFrame): raise TypeError('"content_dataset" must be of type SFrame.') if len(style_dataset) == 0: raise _ToolkitError("style_dataset SFrame cannot be empty") if len(content_dataset) == 0: raise _ToolkitError("content_dataset SFrame cannot be empty") if (batch_size < 1): raise _ToolkitError("'batch_size' must be greater than or equal to 1") if max_iterations is not None and (not isinstance(max_iterations, int) or max_iterations < 0): raise _ToolkitError( "'max_iterations' must be an integer greater than or equal to 0") if style_feature is None: style_feature = _tkutl._find_only_image_column(style_dataset) if content_feature is None: content_feature = _tkutl._find_only_image_column(content_dataset) if verbose: print("Using '{}' in style_dataset as feature column and using " "'{}' in content_dataset as feature column".format( style_feature, content_feature)) _raise_error_if_not_training_sframe(style_dataset, style_feature) _raise_error_if_not_training_sframe(content_dataset, content_feature) _tkutl._handle_missing_values(style_dataset, style_feature, 'style_dataset') _tkutl._handle_missing_values(content_dataset, content_feature, 'content_dataset') params = { 'batch_size': batch_size, 'vgg16_content_loss_layer': 2, # conv3_3 layer 'lr': 0.001, 'content_loss_mult': 1.0, 'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4], # conv 1-4 layers 'finetune_all_params': True, 'pretrained_weights': False, 'print_loss_breakdown': False, 'input_shape': (256, 256), 'training_content_loader_type': 'stretch', 'use_augmentation': False, 'sequential_image_processing': False, # Only used if use_augmentaion is True 'aug_resize': 0, 'aug_min_object_covered': 0, 'aug_rand_crop': 0.9, 'aug_rand_pad': 0.9, 'aug_rand_gray': 0.0, 'aug_aspect_ratio': 1.25, 'aug_hue': 0.05, 'aug_brightness': 0.05, 'aug_saturation': 0.05, 'aug_contrast': 0.05, 'aug_horizontal_flip': True, 'aug_area_range': (.05, 1.5), 'aug_pca_noise': 0.0, 'aug_max_attempts': 20, 'aug_inter_method': 2, 'checkpoint': False, 'checkpoint_prefix': 'style_transfer', 'checkpoint_increment': 1000 } if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError( 'Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) name = 'style_transfer' import turicreate as _turicreate # Imports tensorflow import turicreate.toolkits.libtctensorflow model = _turicreate.extensions.style_transfer() pretrained_resnet_model = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[ 'resnet-16']() pretrained_vgg16_model = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[ 'Vgg16']() options = {} options['image_height'] = params['input_shape'][0] options['image_width'] = params['input_shape'][1] options['content_feature'] = content_feature options['style_feature'] = style_feature if verbose is not None: options['verbose'] = verbose else: options['verbose'] = False if batch_size is not None: options['batch_size'] = batch_size if max_iterations is not None: options['max_iterations'] = max_iterations options['num_styles'] = len(style_dataset) options['resnet_mlmodel_path'] = pretrained_resnet_model.get_model_path( 'coreml') options['vgg_mlmodel_path'] = pretrained_vgg16_model.get_model_path( 'coreml') model.train(style_dataset[style_feature], content_dataset[content_feature], options) return StyleTransfer(model_proxy=model, name=name)
def stylize(self, images, style=None, verbose=True, max_size=800, batch_size=4): """ Stylize an SFrame of Images given a style index or a list of styles. Parameters ---------- images : SFrame | SArray | turicreate.Image A dataset that has the same content image column that was used during training. style : None | int | list The selected style or list of styles to use on the ``images``. If `None`, all styles will be applied to each image in ``images``. verbose : bool, optional If True, print progress updates. max_size : int or tuple Max input image size that will not get resized during stylization. Images with a side larger than this value, will be scaled down, due to time and memory constraints. If tuple, interpreted as (max width, max height). Without resizing, larger input images take more time to stylize. Resizing can effect the quality of the final stylized image. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. Returns ------- out : SFrame or SArray or turicreate.Image If ``style`` is a list, an SFrame is always returned. If ``style`` is a single integer, the output type will match the input type (Image, SArray, or SFrame). See Also -------- create Examples -------- >>> image = tc.Image("/path/to/image.jpg") >>> stylized_images = model.stylize(image, style=[0, 1]) Data: +--------+-------+------------------------+ | row_id | style | stylized_image | +--------+-------+------------------------+ | 0 | 0 | Height: 256 Width: 256 | | 0 | 1 | Height: 256 Width: 256 | +--------+-------+------------------------+ [2 rows x 3 columns] >>> images = tc.image_analysis.load_images('/path/to/images') >>> stylized_images = model.stylize(images) Data: +--------+-------+------------------------+ | row_id | style | stylized_image | +--------+-------+------------------------+ | 0 | 0 | Height: 256 Width: 256 | | 0 | 1 | Height: 256 Width: 256 | | 0 | 2 | Height: 256 Width: 256 | | 0 | 3 | Height: 256 Width: 256 | | 1 | 0 | Height: 640 Width: 648 | | 1 | 1 | Height: 640 Width: 648 | | 1 | 2 | Height: 640 Width: 648 | | 1 | 3 | Height: 640 Width: 648 | +--------+-------+------------------------+ [8 rows x 3 columns] """ if not isinstance(images, (_tc.SFrame, _tc.SArray, _tc.Image)): raise TypeError( '"image" parameter must be of type SFrame, SArray or turicreate.Image.' ) if isinstance(images, (_tc.SFrame, _tc.SArray)) and len(images) == 0: raise _ToolkitError('"image" parameter cannot be empty') if style is not None and not isinstance(style, (int, list)): raise TypeError( '"style" must parameter must be a None, int or a list') if not isinstance(max_size, int): raise TypeError('"max_size" must parameter must be an int') if (max_size < 1): raise _ToolkitError( "'max_size' must be greater than or equal to 1") if not isinstance(batch_size, int): raise TypeError('"batch_size" must parameter must be an int') if (batch_size < 1): raise _ToolkitError( "'batch_size' must be greater than or equal to 1") options = {} options['style_idx'] = style options['verbose'] = verbose options['max_size'] = max_size options['batch_size'] = batch_size if isinstance(style, list) or style is None: if isinstance(images, _tc.SFrame): image_feature = _tkutl._find_only_image_column(images) stylized_images = self.__proxy__.predict( images[image_feature], options) stylized_images = stylized_images.rename( {'stylized_image': 'stylized_' + str(image_feature)}) return stylized_images return self.__proxy__.predict(images, options) else: if isinstance(images, _tc.SFrame): if len(images) == 0: raise _ToolkitError("SFrame cannot be empty") image_feature = _tkutl._find_only_image_column(images) stylized_images = self.__proxy__.predict( images[image_feature], options) stylized_images = stylized_images.rename( {'stylized_image': 'stylized_' + str(image_feature)}) return stylized_images elif isinstance(images, (_tc.Image)): stylized_images = self.__proxy__.predict(images, options) return stylized_images["stylized_image"][0] elif isinstance(images, (_tc.SArray)): stylized_images = self.__proxy__.predict(images, options) return stylized_images["stylized_image"]
def annotate(data, image_column=None, annotation_column='annotations'): """ Annotate your images loaded in either an SFrame or SArray Format The annotate util is a GUI assisted application used to create labels in SArray Image data. Specifying a column, with dtype Image, in an SFrame works as well since SFrames are composed of multiple SArrays. When the GUI is terminated an SFrame is returned with the representative, images and annotations. The returned SFrame includes the newly created annotations. Parameters -------------- data : SArray | SFrame The data containing the images. If the data type is 'SArray' the 'image_column', and 'annotation_column' variables are used to construct a new 'SFrame' containing the 'SArray' data for annotation. If the data type is 'SFrame' the 'image_column', and 'annotation_column' variables are used to annotate the images. image_column: string, optional If the data type is SFrame and the 'image_column' parameter is specified then the column name is used as the image column used in the annotation. If the data type is 'SFrame' and the 'image_column' variable is left empty. A default column value of 'image' is used in the annotation. If the data type is 'SArray', the 'image_column' is used to construct the 'SFrame' data for the annotation annotation_column : string, optional If the data type is SFrame and the 'annotation_column' parameter is specified then the column name is used as the annotation column used in the annotation. If the data type is 'SFrame' and the 'annotation_column' variable is left empty. A default column value of 'annotation' is used in the annotation. If the data type is 'SArray', the 'annotation_column' is used to construct the 'SFrame' data for the annotation Returns ------- out : SFrame A new SFrame that contains the newly annotated data. Examples -------- >> import turicreate as tc >> images = tc.image_analysis.load_images("path/to/images") >> print(images) Columns: path str image Image Rows: 4 Data: +------------------------+--------------------------+ | path | image | +------------------------+--------------------------+ | /Users/username/Doc... | Height: 1712 Width: 1952 | | /Users/username/Doc... | Height: 1386 Width: 1000 | | /Users/username/Doc... | Height: 536 Width: 858 | | /Users/username/Doc... | Height: 1512 Width: 2680 | +------------------------+--------------------------+ [4 rows x 2 columns] >> images = tc.image_classifier.annotate(images) >> print(images) Columns: path str image Image annotation str Rows: 4 Data: +------------------------+--------------------------+-------------------+ | path | image | annotation | +------------------------+--------------------------+-------------------+ | /Users/username/Doc... | Height: 1712 Width: 1952 | dog | | /Users/username/Doc... | Height: 1386 Width: 1000 | dog | | /Users/username/Doc... | Height: 536 Width: 858 | cat | | /Users/username/Doc... | Height: 1512 Width: 2680 | mouse | +------------------------+--------------------------+-------------------+ [4 rows x 3 columns] """ # Check Value of Column Variables if image_column == None: image_column = _tkutl._find_only_image_column(data) if image_column == None: raise ValueError("'image_column' cannot be 'None'") if type(image_column) != str: raise TypeError("'image_column' has to be of type 'str'") if annotation_column == None: annotation_column = "" if type(annotation_column) != str: raise TypeError("'annotation_column' has to be of type 'str'") # Check Data Structure if type(data) == __tc.data_structures.image.Image: data = __tc.SFrame({image_column: __tc.SArray([data])}) elif type(data) == __tc.data_structures.sframe.SFrame: if (data.shape[0] == 0): return data if not (data[image_column].dtype == __tc.data_structures.image.Image): raise TypeError("'data[image_column]' must be an SFrame or SArray") elif type(data) == __tc.data_structures.sarray.SArray: if (data.shape[0] == 0): return data data = __tc.SFrame({image_column: data}) else: raise TypeError("'data' must be an SFrame or SArray") _warning_annotations() annotation_window = __tc.extensions.create_image_classification_annotation( data, [image_column], annotation_column) annotation_window.annotate(_get_client_app_path()) return annotation_window.returnAnnotations()