def extract_contour_dataset(dataset_location='classification/sample_data/imgs_recycling/', dataset_name='recycling', enable_logging=True): # if we're not running inside AML WB, set up the share directory if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ: os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share' # create a dataset from a directory with folders representing different classes dataset = ClassificationDataset.create_from_dir(dataset_name, dataset_location, enable_logging=enable_logging) extract_contour(dataset)
def classify(dataset_location='classification/sample_data/imgs_recycling/', dataset_name='recycling', do_augmentations=False): """ a sample pipeline for classification. loads a dataset, optionally does some augmentations, creates and trains a model using transfer learning based on ResNet18, and returns the accuracy on a test set. Args: dataset_location: path to a dataset. there should be a top level folder containing folders for each class. see the sample recycling dataset for an example of the format dataset_name: the of the dataset. will be used in the dataset management functionality do_augmentations: boolean. specifies whether augmentations should be applied to the test set Returns: the accuracy on the test set """ # if we're not running inside AML WB, set up the share directory if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ: os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share' # create a dataset from a directory with folders representing different classes dataset = ClassificationDataset.create_from_dir(dataset_name, dataset_location) # print out some info about the dataset dataset.print_info() # split the full dataset into a train and test set # the stratify option will ensure that the different labels are balanced in the # train and test sets splitter = Splitter(dataset) train_set_orig, test_set = splitter.split(train_size=.8, stratify='label') # optionally augment images by cropping and rotating if do_augmentations: # here we create two pipelines for doing augmentations. the first # will rotate each image by between -45 and 45 degrees (the angle is # chosen at random). then the rotated images will be flipped from left # to right with probability .5. the second pipeline will randomly crop # images by between 0 and 10 percent. each pipeline will be applied to # the original dataset. the resulting dataset will three times as many # images as the original - the original dataset, the dataset after # augmentation by the rotate_and_flip pipeline, and the dataset # after augmentation by the crop pipeline rotate_and_flip = augmenters.Sequential( [augmenters.Affine(rotate=(-45, 45)), augmenters.Fliplr(.5)]) crop = augmenters.Sequential([augmenters.Crop(percent=(0, .1))]) train_set = augment_dataset(train_set_orig, [rotate_and_flip, crop]) else: train_set = train_set_orig # now create the model base_model_name = 'ResNet18_ImageNet_CNTK' model = CNTKTLModel(train_set.labels, base_model_name=base_model_name, output_path='.') # train the model using cntk model.train(train_set) # return the accuracy ce = ClassificationEvaluation(model, test_set, minibatch_size=16) acc = ce.compute_accuracy() return acc
def train_deploy(dataset_location='classification/sample_data/imgs_recycling', dataset_name='recycling', do_augmentations=True, deployment_name="testdeployment", azureml_rscgroup=None, azureml_cluster_name=None): """ a sample pipeline for deploying themodel that is trained on a dataset. loads a dataset, optionally does some augmentations, creates and trains a model using transfer learning based on ResNet18, deploys the trained model on the specified Azure ML cluster or picks up the one set using the CLI. and returns the Scoring URL. Args: dataset_location: path to a dataset. there should be a top level folder containing folders for each class. see the sample recycling dataset for an example of the format dataset_name: the of the dataset. will be used in the dataset management functionality do_augmentations: boolean. specifies whether augmentations should be applied to the test set deployment_name: the deployment of the deployment. Will be used in deployment management facility azureml_rscgroup: Azure ML resource group name of the model management account. If not set, default value will be picked up if set from CLI azureml_cluster_name: Azure ML cluster name where the model is deployed. If not set, default value will be picked up if set from CLI. Returns: the scoring API URL of the deployment """ # if we're not running inside AML WB, set up the share directory if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ: os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share' context = Context.get_global_context() # create a dataset from a directory with folders representing different classes dataset = ClassificationDataset.create_from_dir(dataset_name, dataset_location) # print out some info about the dataset dataset.print_info() # optionally augment images by cropping and rotating if do_augmentations: # here we create two pipelines for doing augmentations. the first # will rotate each image by between -45 and 45 degrees (the angle is # chosen at random). then the rotated images will be flipped from left # to right with probability .5. the second pipeline will randomly crop # images by between 0 and 10 percent. each pipeline will be applied to # the original dataset. the resulting dataset will three times as many # images as the original - the original dataset, the dataset after # augmentation by the rotate_and_flip pipeline, and the dataset # after augmentation by the crop pipeline rotate_and_flip = augmenters.Sequential([ augmenters.Affine(rotate=(-45, 45)), augmenters.Fliplr(.5)]) crop = augmenters.Sequential([augmenters.Crop(percent=(0, .1))]) train_set = augment_dataset(dataset, [rotate_and_flip, crop]) else: train_set = dataset # now create the model base_model_name = 'ResNet18_ImageNet_CNTK' model = CNTKTLModel(train_set.labels, base_model_name = base_model_name, output_path='.') # train the model using cntk num_epochs = 5 mb_size = 32 model.train(train_set, lr_per_mb=[.01] * 20 + [.001] * 20 + [.0001], num_epochs=num_epochs, mb_size=mb_size) print("Model state:", model.model_state) # check if the deployment exists, if yes remove it first AMLDeployment.delete_if_service_exist(deployment_name) #deploy the trained model deploy_obj = AMLDeployment( deployment_name=deployment_name, associated_DNNModel=model, aml_env = "cluster", replicas=1) deploy_obj.deploy() return deploy_obj.service_url
# Dataset Creation if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ: os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share' context = Context.get_global_context() dataset_name = "fashion" dataset_location = os.path.join( Context.get_global_context().storage.outputs_path, "data", dataset_name) print("Dataset Location:", dataset_location) print("Downloading images to: " + dataset_location) download_images.download_all(dataset_location) dataset = ClassificationDataset.create_from_dir(dataset_name, dataset_location) print("Dataset consists of {} images with {} labels.".format( len(dataset.images), len(dataset.labels))) # Split the data into train and test splitter = Splitter(dataset) train_set, test_set = splitter.split(train_size=.5, random_state=1, stratify="label") print("Number of original training images = {}.".format(train_set.size())) num_train_sets = 20 num_test_sets = 20 num_different_label = 50 trainPairs = ImagePairs(train_set, num_train_sets, num_different_label) print( 'There are {} sets of image pairs generated for all labels from training data.'
def classify(dataset_location='classification/sample_data/imgs_recycling/', dataset_name='recycling', do_augmentations=True, enable_logging=True): """ a sample pipeline for classification. loads a dataset, optionally does some augmentations, creates and trains a model using transfer learning based on ResNet18, and returns the accuracy on a test set. Args: dataset_location: path to a dataset. there should be a top level folder containing folders for each class. see the sample recycling dataset for an example of the format dataset_name: the of the dataset. will be used in the dataset management functionality do_augmentations: boolean. specifies whether augmentations should be applied to the test set Returns: the accuracy on the test set """ # if we're not running inside AML WB, set up the share directory if 'AZUREML_NATIVE_SHARE_DIRECTORY' not in os.environ: os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] = './share' # create a dataset from a directory with folders representing different classes dataset = ClassificationDataset.create_from_dir(dataset_name, dataset_location) # print out some info about the dataset print("DATASET INFO:") dataset.print_info() # split the full dataset into a train and test set # the stratify option will ensure that the different labels are balanced in the # train and test sets train_set_orig, test_set = dataset.split(train_size = 0.66, stratify = "label") # optionally augment images by cropping and rotating if do_augmentations: aug_sequence = augmenters.Sequential([ augmenters.Fliplr(0.5), # horizontally flip 50% of all images augmenters.Crop(percent=(0, 0.1)) # crop images by 0-10% of their height/width ]) train_set = augment_dataset(train_set_orig, [aug_sequence]) print("Number of original training images = {}, with augmented images included = {}.".format(train_set_orig.size(), train_set.size())) else: train_set = train_set_orig # model creation lr_per_mb = [0.05]*7 + [0.005]*7 + [0.0005] mb_size = 32 input_resoluton = 224 base_model_name = 'ResNet18_ImageNet_CNTK' model = CNTKTLModel(train_set.labels, base_model_name=base_model_name, image_dims = (3, input_resoluton, input_resoluton)) # train the model using cntk ce = ClassificationEvaluation(model, test_set, minibatch_size = mb_size) acc = ce.compute_accuracy() print("Accuracy = {:2.2f}%".format(100*acc)) cm = ce.compute_confusion_matrix() print("Confusion matrix = \n{}".format(cm)) cm_ui = ConfusionMatrixUI(cm, [l.name for l in test_set.labels]) show(cm_ui.ui) return acc
def create_dataset_from_json(): file_labels = "C:\\Users\\miprasad\\Downloads\\cvp-1.0.0b2-release5\\cvp-1.0.0b2-release\\cvp_project\\classification\\scripts\\file_labels.json" dataset = ClassificationDataset.create_from_json("recycling", file_labels, context=None) dataset.print_info()