def main():

    args = doParsing()
    print(args)

    # Read training configuration (config file is in common for dataset creation and training hyperparameters)
    configParams = ConfigParams(args.configFile)

    # Load data
    data = tc.SFrame(args.datasetFile)

    # Create and train model
    model = tc.image_classifier.create(data,
                                       model=configParams.architecture,
                                       max_iterations=configParams.iterations,
                                       target=const.DatasetFeatures.targets,
                                       verbose=True)

    # Save model
    model.save(os.path.join(args.modelOutputDir, configParams.architecture))
    print("Model saved")

    # Export to Core ML
    model.export_coreml(
        os.path.join(args.modelOutputDir,
                     configParams.architecture + '.mlmodel'))
    print("CoreML model exported")
示例#2
0
def main():
    """
    Script to export TF profiling results,
    tutorial from https://towardsdatascience.com/howto-profile-tensorflow-1a49fb18073d
    You will need Google Chrome to read exported tracing.
    """
    args = doParsing()
    print(args)

    # Load config (it includes preprocessing type)
    config = ConfigParams(args.configFile)

    # Load model
    model = TensorflowModel(args.modelPath)

    print("Loaded model from " + args.modelPath)

    inputPlaceholder = model.getGraph().get_tensor_by_name(config.inputName + ":0")
    outputTensor = model.getGraph().get_tensor_by_name(config.outputName + ":0")

    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()

    # One by one image prediction forcing CPU usage
    with model.getSession() as sess:

        with tf.device("/gpu:0"):

            for ix, file in tqdm(enumerate(sorted(glob.glob(args.datasetTestDir + "/*.jpg"))[:args.numImages])):

                image = ImageUtils.loadImage(file)
                # Resize image and preprocess (inception or vgg preprocessing based on config)
                processedImage = ImageUtils.preprocessing(image=image, width=config.inputSize, height=config.inputSize,
                                                          preprocessingType=config.preprocessType,
                                                          meanRGB=config.meanRGB)

                # Convert colorspace
                processedImage = ImageUtils.convertImageFormat(processedImage, format=config.inputFormat)

                # Add 1st dimension for image index in batch
                processedImage = np.expand_dims(processedImage, axis=0)

                # Get and print TOP1 class
                result = sess.run(outputTensor, feed_dict={inputPlaceholder: processedImage},
                                  options=options, run_metadata=run_metadata)
                print(os.path.basename(file) + " -> " + str(np.argmax(result[0])))

                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                # Create chrome tracing which you can load with chrome://tracing inside Chrome browser
                os.makedirs(os.path.join('profiling', 'chrome_tracing', config.architecture), exist_ok=True)
                with open(os.path.join('profiling', 'chrome_tracing', config.architecture, 'timeline_step_%d.json' % ix), 'w') as f:
                    f.write(chrome_trace)

    print("Test finished")
def main():

    args = doParsing()
    print(args)

    # Read training configuration (config file is in common for dataset creation and training hyperparameters)
    configParams = ConfigParams(args.configFile)

    # Select train device
    trainDevice = selectTrainDevice(args.useGpu)

    # Load DataProvider
    dataProvider = DatasetTFReader(
        datasetDir=args.datasetDir,
        datasetMetadata=DatasetMetadata().initFromJson(os.path.join(args.datasetDir, "metadata.json")),
        configParams=configParams)

    # Load base model graph (fine tuning training)
    baseTFModel = None
    try:
        baseTFModel = TensorflowModel(os.path.join(args.baseModelDir, "graph.pb"))
    except Exception as e:
        print("Warning: no base model provided or impossible to read,"
              " this works only for custom model created from scratch")
        baseTFModel = TensorflowModel()

    # Append classifier for fine tuning training
    trainingModel = ModelFactory.create(config=configParams, tfmodel=baseTFModel,
                                        dataProvider=dataProvider, trainDevice=trainDevice)

    # Run training
    trainProcess = TrainProcess(config=configParams, trainingModel=trainingModel,
                                dataProvider=dataProvider, outputDir=args.checkpointOutputDir,
                                tensorboardDir=args.tensorboardDir)
    trainProcess.runTrain()

    # Freeze graph (graphdef plus parameters),
    # this includes in the graph only the layers needed to provide the output_node_names
    freeze_graph(input_graph=args.checkpointOutputDir + "/model_graph.pb", input_saver="", input_binary=True,
                 input_checkpoint=args.checkpointOutputDir + "/model", output_node_names=configParams.outputName,
                 restore_op_name="save/restore_all", filename_tensor_name="save/Const:0",
                 output_graph=args.modelOutputDir + "/graph.pb", clear_devices=True, initializer_nodes="")
def main():

    args = do_parsing()
    print(args)

    # Read dataset configuration (config file is in common for dataset creation and training hyperparameters)
    datasetParams = ConfigParams(args.configFile)

    # Get dataset writer with training and validation splits
    dataset = DatasetWriterFactory.createDatasetWriter(datasetParams=datasetParams, scriptArgs=args)

    if os.path.exists(args.outputDir) is False:
        os.makedirs(args.outputDir)

    trainingOutputFile = os.path.join(args.outputDir, "data_train.tfrecords")
    validationOutputFile = os.path.join(args.outputDir, "data_val.tfrecords")
    jsonFilePath = os.path.join(args.outputDir, "metadata.json")

    # Export Train Samples
    with tf.python_io.TFRecordWriter(trainingOutputFile) as tfrecWriter:
        print("TRAINING")
        dataset.saveTFExamplesTraining(datasetParams=datasetParams, writer=tfrecWriter)
        print("Saving file...")

    # Export Validation Samples
    with tf.python_io.TFRecordWriter(validationOutputFile) as tfrecWriter:
        print("VALIDATION")
        dataset.saveTFExamplesValidation(datasetParams=datasetParams, writer=tfrecWriter)
        print("Saving file...")

    # Export metadata to JSON
    trainingSamplesNumber = dataset.getTrainingSamplesNumber()
    validationSamplesNumber = dataset.getValidationSamplesNumber()
    datasetMetadata = DatasetMetadata(trainingSamplesNumber, validationSamplesNumber, dataset.numClasses)

    with open(jsonFilePath, 'w') as jsonOutFile:
        json.dump(datasetMetadata, jsonOutFile, default=lambda o: o.__dict__, indent=4)

    print("Dataset successfully created in " + args.outputDir)
def main():
    """
    Script to export bottleneck features, Images are read one by one.
    """
    args = doParsing()
    print(args)

    # Load config (it includes preprocessing type)
    config = ConfigParams(args.configFile)

    # Load model
    model = TensorflowModel(args.modelPath)

    print("Loaded model from " + args.modelPath)

    inputPlaceholder = model.getGraph().get_tensor_by_name(config.inputName + ":0")
    outputTensor = model.getGraph().get_tensor_by_name(config.lastFrozenLayerName + ":0")

    # Evaluate if the dataset directory has all files or classes subdirectories
    dirs = [os.path.join(args.datasetDir, d) for d in os.listdir(args.datasetDir)
               if os.path.isdir(os.path.join(args.datasetDir, d))]

    # Create output directories (one per class)
    for dir in dirs:
        outputDir = os.path.join(args.outputDir, os.path.basename(dir))
        if os.path.exists(outputDir) is False:
            os.makedirs(outputDir)

    # Case with one directory with all files
    singleDir = False
    if dirs == []:
        singleDir = True
        dirs = [args.datasetDir]
        if os.path.exists(args.outputDir) is False:
            os.makedirs(args.outputDir)

    # One by one image prediction forcing CPU usage
    with model.getSession() as sess:

        with tf.device("/cpu:0"):

            for srcDir in tqdm(sorted(dirs), unit="directory"):

                for file in tqdm(sorted(glob.glob(srcDir + "/*.jpg")), unit="image"):

                    if singleDir:
                        outputDir = args.outputDir
                    else:
                        outputDir = os.path.join(args.outputDir, os.path.basename(srcDir))

                    image = ImageUtils.loadImage(file)
                    # Resize image and preprocess (inception or vgg preprocessing based on config)
                    processedImage = ImageUtils.preprocessing(image=image, width=config.inputSize, height=config.inputSize,
                                                              preprocessingType=config.preprocessType,
                                                              meanRGB=config.meanRGB)

                    # Convert colorspace
                    processedImage = ImageUtils.convertImageFormat(processedImage, format=config.inputFormat)

                    # Add 1st dimension for image index in batch
                    processedImage = np.expand_dims(processedImage, axis=0)

                    # Get and print TOP1 class
                    result = sess.run(outputTensor, feed_dict={inputPlaceholder: processedImage})

                    # Force global average in case of too big layer size (e.g. SqueezeNet is 14x14x512 at this stage
                    # because global average is applied to top classifier and not to bottlenecks
                    if len(result.shape) == 4 and (result.shape[1] != 1 or result.shape[2] != 1):
                        result = np.mean(result, axis=(1,2), keepdims=True)

                    # Save npy file (features have 1D shape, eg. mobilenet has 1024 elements, nasnet mobile 1056)
                    outputFileName = os.path.basename(file)
                    outputFileName = outputFileName[:outputFileName.rfind(".")]
                    np.save(os.path.join(outputDir, outputFileName + ".npy"), result.reshape(-1))

    print("Export features finished")
def main():

    # Load params
    args = do_parsing()
    print(args)

    # Load config file with model, hyperparameters and preprocessing
    config = ConfigParams(args.config_file)

    # Prepare preprocessing transform pipeline
    preprocessing_transforms = Preprocessing(config)
    preprocessing_transforms_train = preprocessing_transforms.get_transforms_train(
    )
    preprocessing_transforms_val = preprocessing_transforms.get_transforms_val(
    )

    # Read Dataset
    dataset_train = StandardDataset(args.dataset_train_dir,
                                    preprocessing_transforms_train)
    print("Train - Classes: {0}, Samples: {1}".format(
        str(len(dataset_train.get_classes())), str(len(dataset_train))))
    dataset_val = StandardDataset(args.dataset_val_dir,
                                  preprocessing_transforms_val)
    print("Validation - Classes: {0}, Samples: {1}".format(
        str(len(dataset_val.get_classes())), str(len(dataset_val))))
    print("Classes " + str(dataset_train.get_classes()))

    # Load model and apply .train() and .cuda()
    model = ModelsFactory.create(config, len(dataset_train.get_classes()))
    print(model)
    model.cuda()
    model.train()

    # Create a PyTorch DataLoader from CatDogDataset (two of them: train + val)
    train_loader = DataLoader(dataset_train,
                              batch_size=config.batch_size,
                              shuffle=True,
                              num_workers=8)
    val_loader = DataLoader(dataset_val,
                            batch_size=config.batch_size,
                            shuffle=False,
                            num_workers=8)

    # Set Optimizer and Loss
    # CrossEntropyLoss add LogSoftmax to the model while NLLLoss doesn't do it
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=config.learning_rate,
                          momentum=config.momentum)

    for epoch in range(config.epochs):

        running_loss = 0.0

        # Iterate on train batches and update weights using loss
        for batch_i, data in enumerate(train_loader):
            # get the input images and their corresponding labels
            images = data['image']
            gts = data['gt']

            # Move to GPU
            gts = gts.type(torch.cuda.LongTensor)
            images = images.type(torch.cuda.FloatTensor)

            # forward pass to get outputs
            output = model(images)

            # calculate the loss between predicted and target class
            loss = criterion(output, gts)

            # zero the parameter (weight) gradients
            optimizer.zero_grad()

            # backward pass to calculate the weight gradients
            loss.backward()

            # update the weights
            optimizer.step()

            # print loss statistics
            running_loss += loss.item()
            if batch_i % 10 == 9:  # print every 10 batches
                print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(
                    epoch + 1, batch_i + 1, running_loss / 10))
                running_loss = 0.0

        # Iterate on validation batches
        print("Calculating validation accuracy...")
        exact_match = 0
        for batch_i, data in enumerate(val_loader):
            # get the input images and their corresponding labels
            images = data['image']
            gts = data['gt']

            # Move to GPU
            gts = gts.type(torch.cuda.LongTensor)
            images = images.type(torch.cuda.FloatTensor)

            # forward pass to get outputs
            output = model(images)

            # Calculate Accuracy
            output = output.cpu().data.numpy()
            predictions = np.argmax(output, axis=1)
            gts_np = gts.cpu().data.numpy()
            correct = np.count_nonzero(predictions == gts_np)
            exact_match += correct

        print('Epoch: {}, Validation Accuracy: {}'.format(
            epoch + 1, exact_match / len(dataset_val)))

    # Save model
    torch.save(model.state_dict(), args.model_output_path)

    print("End")
示例#7
0
def main():

    args = doParsing()
    print(args)

    config = ConfigParams(args.configFile)

    preprocessFunction = getPreprocessFunction(config.preprocessType)

    # Image Generator using preprocess function, e.g. MobileNet needs [-1.0, 1.0] range (Inception like preprocessing)
    trainImageGenerator = ImageDataGenerator(preprocessing_function=preprocessFunction,
                                             rotation_range=10,
                                             width_shift_range=0.1,
                                             height_shift_range=0.1,
                                             zoom_range=.1,
                                             horizontal_flip=True)
    valImageGenerator = ImageDataGenerator(preprocessing_function=preprocessFunction)

    trainGenerator = trainImageGenerator.flow_from_directory(
        args.datasetTrainDir,
        # height, width
        target_size=(config.inputSize, config.inputSize),
        batch_size=config.batchSize,
        class_mode='categorical',
        shuffle=True)

    valGenerator = valImageGenerator.flow_from_directory(
        args.datasetValDir,
        # height, width
        target_size=(config.inputSize, config.inputSize),
        batch_size=config.batchSize,
        class_mode='categorical',
        shuffle=False)

    # Load model using config file
    model = ModelsFactory.create(config, trainGenerator.num_classes)

    print(model.summary())

    # Train as categorical crossentropy (works also with numclasses > 2)
    model.compile(loss='categorical_crossentropy',
                  optimizer=config.optimizer,
                  metrics=['categorical_accuracy'])

    # Callbacks for early stopping and best model save
    earlyStoppingCB = EarlyStopping(monitor='val_categorical_accuracy', min_delta=0, patience=config.patience, verbose=1, mode='auto')
    modelChkptCB = ModelCheckpoint(args.modelOutputPath, monitor='val_categorical_accuracy', verbose=1, save_best_only=True,
                                   save_weights_only=False, mode='auto', period=1)

    # fine-tune the model
    model.fit_generator(
        trainGenerator,
        steps_per_epoch=trainGenerator.samples//trainGenerator.batch_size,
        epochs=config.epochs,
        validation_data=valGenerator,
        validation_steps=valGenerator.samples//valGenerator.batch_size,
        callbacks=[earlyStoppingCB, modelChkptCB])

    print("Training finished")

    print("Model saved to " + args.modelOutputPath)

    # Export model to TF format
    if args.tfModelOutputDir is not None:
        exportModelToTF(args.tfModelOutputDir)
def main():
    """
    Script to export results for Kaggle, Images are read one by one
    """
    args = doParsing()
    print(args)

    # Load config (it includes preprocessing type)
    config = ConfigParams(args.configFile)

    # Load model
    model = TensorflowModel(args.modelPath)

    print("Loaded model from " + args.modelPath)

    # Dogs and cats test dataset has 12500 samples

    results = []

    inputPlaceholder = model.getGraph().get_tensor_by_name(config.inputName +
                                                           ":0")
    outputTensor = model.getGraph().get_tensor_by_name(config.outputName +
                                                       ":0")

    # One by one image prediction forcing CPU usage
    with model.getSession() as sess:

        with tf.device("/cpu:0"):

            for file in sorted(glob.glob(args.datasetTestDir + "/*.jpg")):

                image = ImageUtils.loadImage(file)
                # Resize image and preprocess (inception or vgg preprocessing based on config)
                processedImage = ImageUtils.preprocessing(
                    image=image,
                    width=config.inputSize,
                    height=config.inputSize,
                    preprocessingType=config.preprocessType,
                    meanRGB=config.meanRGB)

                # Convert colorspace
                processedImage = ImageUtils.convertImageFormat(
                    processedImage, format=config.inputFormat)

                # Add 1st dimension for image index in batch
                processedImage = np.expand_dims(processedImage, axis=0)

                # Get and print TOP1 class
                result = sess.run(outputTensor,
                                  feed_dict={inputPlaceholder: processedImage})
                print(
                    os.path.basename(file) + " -> " +
                    classes[int(np.argmax(result[0]))])

                # Get and save dog probability
                results.append((
                    os.path.basename(file)[:os.path.basename(file).rfind('.')],
                    result[0][classes.index("dog")]))

    print("Test finished")

    if args.kaggleExportFile is not None:
        exportResults(results, args.kaggleExportFile)
示例#9
0
def main():
    """
    Script to export TF profiling results
    following 1.13.1 up-to-date documentation https://www.tensorflow.org/api_docs/python/tf/profiler/Profiler
    You can use https://github.com/tensorflow/profiler-ui to view results.
    """
    args = doParsing()
    print(args)

    # Load config (it includes preprocessing type)
    config = ConfigParams(args.configFile)

    # Load model
    model = TensorflowModel(args.modelPath)

    print("Loaded model from " + args.modelPath)

    inputPlaceholder = model.getGraph().get_tensor_by_name(config.inputName +
                                                           ":0")
    outputTensor = model.getGraph().get_tensor_by_name(config.outputName +
                                                       ":0")

    profiler = Profiler(model.getGraph())

    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()

    # One by one image prediction forcing CPU usage
    with model.getSession() as sess:

        with tf.device("/gpu:0"):

            for ix, file in tqdm(
                    enumerate(
                        sorted(glob.glob(args.datasetTestDir +
                                         "/*.jpg"))[:args.numImages])):

                image = ImageUtils.loadImage(file)
                # Resize image and preprocess (inception or vgg preprocessing based on config)
                processedImage = ImageUtils.preprocessing(
                    image=image,
                    width=config.inputSize,
                    height=config.inputSize,
                    preprocessingType=config.preprocessType,
                    meanRGB=config.meanRGB)

                # Convert colorspace
                processedImage = ImageUtils.convertImageFormat(
                    processedImage, format=config.inputFormat)

                # Add 1st dimension for image index in batch
                processedImage = np.expand_dims(processedImage, axis=0)

                # Get and print TOP1 class
                result = sess.run(outputTensor,
                                  feed_dict={inputPlaceholder: processedImage},
                                  options=options,
                                  run_metadata=run_metadata)
                print(
                    os.path.basename(file) + " -> " +
                    str(np.argmax(result[0])))

                profiler.add_step(ix, run_metadata)

                # Profile the parameters of your model.
                #profiler.profile_name_scope(options=(tf.profiler.ProfileOptionBuilder
                #                                     .trainable_variables_parameter()))

                # Or profile the timing of your model operations.
                opts = tf.profiler.ProfileOptionBuilder.time_and_memory()
                profiler.profile_operations(options=opts)

                # Or you can generate a timeline:
                # opts = (tf.profiler.ProfileOptionBuilder(
                #     tf.profiler.ProfileOptionBuilder.time_and_memory())
                #         .with_step(ix)
                #         .with_timeline_output("timeline_step.json").build())
                #profiler.profile_graph(options=opts)

    os.makedirs(os.path.join('profiling', 'tfprof', config.architecture),
                exist_ok=True)
    # Auto detect problems and generate advice.
    profiler.advise(ALL_ADVICE)

    with open(
            os.path.join('profiling', 'tfprof', config.architecture,
                         'profiler.context'), 'wb') as f:
        f.write(profiler.serialize_to_string())

    print("Test finished")
示例#10
0
def main():

    # See this example https://gist.github.com/fchollet/7eb39b44eb9e16e59632d25fb3119975 for reference
    # and this blog post https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

    args = doParsing()
    print(args)

    config = ConfigParams(args.configFile)

    preprocessFunction = getPreprocessFunction(config.preprocessType)

    # Image Generator using preprocess function, e.g. MobileNet needs [-1.0, 1.0] range (Inception like preprocessing)
    trainImageGenerator = ImageDataGenerator(
        preprocessing_function=preprocessFunction, horizontal_flip=True)
    valImageGenerator = ImageDataGenerator(
        preprocessing_function=preprocessFunction)

    trainGenerator = trainImageGenerator.flow_from_directory(
        args.datasetTrainDir,
        # height, width
        target_size=(config.inputSize, config.inputSize),
        batch_size=config.batchSize,
        class_mode='categorical',
        shuffle=True)

    valGenerator = valImageGenerator.flow_from_directory(
        args.datasetValDir,
        # height, width
        target_size=(config.inputSize, config.inputSize),
        batch_size=config.batchSize,
        class_mode='categorical',
        shuffle=False)

    # Load model using config file
    model = ModelsFactory.create(config, trainGenerator.num_classes)

    print(model.summary())

    # Train as categorical crossentropy (works also with numclasses > 2)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.SGD(lr=config.learningRate,
                                           momentum=config.momentum),
                  metrics=['categorical_accuracy'])

    # TODO: Explains monitors in README

    # Callbacks for early stopping, LR step reducing and best model save
    earlyStoppingCB = EarlyStopping(monitor='val_categorical_accuracy',
                                    min_delta=0,
                                    patience=config.patience,
                                    verbose=1,
                                    mode='auto')

    modelChkptCB = ModelCheckpoint(args.modelOutputPath,
                                   monitor='val_categorical_accuracy',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='auto',
                                   period=1)

    reduceLROnPlateau = ReduceLROnPlateau(monitor='val_categorical_accuracy',
                                          factor=0.1,
                                          patience=config.patience / 2,
                                          verbose=1)

    # fine-tune the model
    model.fit_generator(
        trainGenerator,
        steps_per_epoch=trainGenerator.samples // trainGenerator.batch_size,
        epochs=config.epochs,
        validation_data=valGenerator,
        validation_steps=valGenerator.samples // valGenerator.batch_size,
        callbacks=[earlyStoppingCB, modelChkptCB, reduceLROnPlateau])

    print("Training finished")

    print("Model saved to " + args.modelOutputPath)

    # Export model to TF format
    if args.tfModelOutputDir is not None:
        exportModelToTF(args.tfModelOutputDir)
def main():

    # Load params
    args = do_parsing()
    print(args)

    # Load config file with model and preprocessing (must be the same used in training to be coherent)
    config = ConfigParams(args.config_file)

    # Prepare preprocessing transform pipeline (same processing of validation dataset)
    preprocessing_transforms = Preprocessing(config)
    preprocessing_transforms_test = preprocessing_transforms.get_transforms_val(
    )

    # Read test Dataset,
    dataset_test = SingleDirDataset(args.dataset_test_dir,
                                    preprocessing_transforms_test)
    print("Test - Samples: {0}".format(str(len(dataset_test))))

    # Load model and apply .eval() and .cuda()
    model = ModelsFactory.create(config, len(classes))
    print(model)
    model.cuda()
    model.eval()

    # Load trained weights
    model.load_state_dict(torch.load(args.model_path))

    # Create a PyTorch DataLoader from CatDogDataset
    test_loader = DataLoader(dataset_test,
                             batch_size=config.batch_size,
                             shuffle=False,
                             num_workers=8)

    # Results for each image compatible with kaggle format (we need to export the dog probability)
    results = []

    print("Evaluating test dataset...")

    for batch_i, data in enumerate(test_loader):

        # Retrieve images
        images = data["image"]

        # Move to GPU
        images = images.type(torch.cuda.FloatTensor)

        # forward pass to get outputs
        output = model(images)
        probs = nn.Softmax(dim=1)(output)
        probs_np = probs.cpu().data.numpy()

        files = data["file"]
        for index, file in enumerate(files):
            results.append(
                (os.path.basename(file)[:os.path.basename(file).rfind('.')],
                 probs_np[index][classes.index("dog")]))

    print("Test finished")

    if args.kaggle_export_file is not None:
        export_results(results, args.kaggle_export_file)