示例#1
0
    def test_classify_image(self):
        if os.path.exists(dirs.get_model_data_dir(TEST_BOT_ID)):
            shutil.rmtree(dirs.get_model_data_dir(TEST_BOT_ID))
        shutil.copytree(os.path.join(FILES_DIR, 'protobuf/bot_test'),
                        dirs.get_model_data_dir(TEST_BOT_ID))

        temp_file = tempfile.NamedTemporaryFile()
        temp_file.write(
            base64.b64encode(
                open(os.path.join(FILES_DIR, 'daisy.jpg'), "rb").read()))
        temp_file.seek(0)

        labels, probabilities = service.classify_image(TEST_BOT_ID,
                                                       temp_file.read())

        temp_file.close()
        self.assertEqual(1, len(labels))
        self.assertEqual(1, len(probabilities))
        # Clean the bot_model directory for next test run
        for file in os.listdir(dirs.get_model_data_dir(TEST_BOT_ID)):
            file_path = os.path.join(dirs.get_model_data_dir(TEST_BOT_ID),
                                     file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(e)
    def test_inference_on_image(self):
        if os.path.exists(dirs.get_model_data_dir(TEST_BOT_ID)):
            shutil.rmtree(dirs.get_model_data_dir(TEST_BOT_ID))
        shutil.copytree(os.path.join(FILES_DIR, 'protobuf/bot_test'),
                        dirs.get_model_data_dir(TEST_BOT_ID))

        temp_file = tempfile.NamedTemporaryFile()
        temp_file.write(
            open(os.path.join(FILES_DIR, 'daisy.jpg'), "rb").read())

        labels, probabilities = classifier.inference_on_image(
            TEST_BOT_ID,
            os.path.join(tempfile.gettempdir(), temp_file.name),
            return_labels=1)

        temp_file.close()

        self.assertEqual(len(labels), len(probabilities))
        self.assertEqual(1, len(labels))
        self.assertEqual(1, len(probabilities))

        # Clean the bot_model directory for next test run
        for file in os.listdir(dirs.get_model_data_dir(TEST_BOT_ID)):
            file_path = os.path.join(dirs.get_model_data_dir(TEST_BOT_ID),
                                     file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(e)
def train():
    protobuf_dir = dirs.get_protobuf_dir(
        'root')  # Read the protobuffer files for the initial car dataset

    # make sure ckpt files are there and correct data is in it
    labels = 0
    if not os.path.isfile(os.path.join(protobuf_dir, 'labels.txt')):
        print('Missing labels in %s' %
              os.path.join(protobuf_dir, 'labels.txt'))
        return None

    for _ in open(os.path.join(protobuf_dir, 'labels.txt')):
        labels += 1

    if not labels == 12:
        print('Wrong number of labels: %s in: %s' % (labels, protobuf_dir))
        return None
    # /home/markus/projects/cnn_server/model/bot_root
    bot_model_dir = dirs.get_model_data_dir('root')
    # make sure bot_model_dir is there and empty
    if not os.path.isdir(bot_model_dir):
        os.mkdir(bot_model_dir)

    trainer.train(
        bot_model_dir=bot_model_dir,
        protobuf_dir=protobuf_dir,
        max_train_time_sec=(60 * 60 * 24 * 7),  # Adjust training time here.
        optimization_params=None,
        log_every_n_steps=10)
def train(bot_id, test=False, max_train_time=None):
    if test:
        max_train_time = 60 # If we run a test, train for one minute only

    root_model_dir = dirs.get_root_model_dir()
    bot_model_dir = dirs.get_model_data_dir(bot_id)
    bot_protobuf_dir = dirs.get_protobuf_dir(bot_id)

    # root_model_dir must exist, not be empty and contain a checkpoints file
    if not os.path.exists(root_model_dir):
        print('root_model_dir %s does not exist' % root_model_dir)
        return False
    if not os.listdir(root_model_dir):
        print('root_model_dir %s is empty' % root_model_dir)
        return False
    if not os.path.isfile(os.path.join(root_model_dir, 'checkpoint')):
        print('no checkpoint files in root_model_dir %s' % root_model_dir)
        return False

    # bot_model_dir must exist and be empty
    if not os.path.exists(bot_model_dir):
        print('bot_model_dir %s does not exist' % bot_model_dir)
        return False
    if os.listdir(bot_model_dir):
        print('bot_model_dir %s is not empty' % bot_model_dir)
        return False

    # bot_protobuf_dir must exist and not be empty
    if not os.path.exists(bot_protobuf_dir):
        print('bot_protobuf_dir %s does not exist' % bot_protobuf_dir)
        return False
    if not os.listdir(bot_protobuf_dir):
        print('bot_protobuf_dir %s does not contain training data' % bot_protobuf_dir)
        return False

    transfer_learning.transfer_learning(
        root_model_dir=root_model_dir,
        bot_model_dir=bot_model_dir,
        protobuf_dir=bot_protobuf_dir,
        dataset_name='bot',
        dataset_split_name='train',
        model_name='inception_v4',
        checkpoint_exclude_scopes=['InceptionV4/Logits', 'InceptionV4/AuxLogits'],
        trainable_scopes=['InceptionV4/Logits', 'InceptionV4/AuxLogits'],
        max_train_time_sec=max_train_time
    )

    # After Transfer Learning bot_model_dir must exist, not be empty and contain a checkpoint file
    if not os.path.exists(bot_model_dir):
        print('bot_model_dir %s does not exist after transfer learning' % bot_model_dir)
        return False
    if not os.listdir(bot_model_dir):
        print('bot_model_dir %s is empty after transfer learning' % bot_model_dir)
        return False
    if not os.path.isfile(os.path.join(bot_model_dir, 'checkpoint')):
        print('no checkpoint file in bot_model_dir %s after transfer learning' % bot_model_dir)

    # TODO: Implement proper validation of the createed model file: read ckpt path from first line and lookup in folder
    return True
示例#5
0
 def test_get_bot_id_from_dir(self):
     bmw_models_bot_id = 'bmw_models'
     training_data_dir = dirs.get_training_data_dir(bmw_models_bot_id)
     protobuf_dir = dirs.get_protobuf_dir(bmw_models_bot_id)
     model_dir = dirs.get_model_data_dir(bmw_models_bot_id)
     bot_id = dirs.get_bot_id_from_dir(training_data_dir)
     self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match')
     bot_id = dirs.get_bot_id_from_dir(protobuf_dir)
     self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match')
     bot_id = dirs.get_bot_id_from_dir(model_dir)
     self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match')
示例#6
0
    def test_handle_post(self):
        if not os.listdir(dirs.get_protobuf_dir(TEST_BOT_ID)):
            shutil.copytree(os.path.join(FILES_DIR, 'flower_protobuf'), dirs.get_protobuf_dir(TEST_BOT_ID))

        if os.path.exists(dirs.get_model_data_dir(TEST_BOT_ID)):
            shutil.rmtree(dirs.get_model_data_dir(TEST_BOT_ID))
        shutil.copytree(os.path.join(FILES_DIR, 'protobuf/bot_test'), dirs.get_model_data_dir(TEST_BOT_ID))

        expected_return_labels = 3

        temp_file = tempfile.NamedTemporaryFile()
        temp_file.write(
            base64.b64encode(
                open(
                    os.path.join(FILES_DIR, 'tulip.jpg'), "rb"
                ).read()
            )
        )
        temp_file.seek(0)

        json_result, status = handler.handle_post(TEST_BOT_ID, temp_file.read(), return_labels=expected_return_labels)

        temp_file.close()

        json_result = json.loads(json_result)
        labels = json_result['labels']
        probs = json_result['probabilities']
        self.assertTrue(labels)
        self.assertTrue(probs)
        self.assertEqual(expected_return_labels, len(labels))
        self.assertEqual(expected_return_labels, len(probs))
        # Clean the bot_model directory for next test run
        for file in os.listdir(dirs.get_model_data_dir(TEST_BOT_ID)):
            file_path = os.path.join(dirs.get_model_data_dir(TEST_BOT_ID), file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(e)
    def test_train(self):
        msg, status = tl_handler.handle_put(TEST_BOT_ID,
                                            test=True,
                                            max_train_time=100)

        self.assertEqual(200, status, 'http status code is %s' % status)

        bot_model_dir = dirs.get_model_data_dir(TEST_BOT_ID)

        # Check if the bot model dir contains a model now
        self.assertTrue(
            os.listdir(bot_model_dir),
            'bot_model_dir %s is empty after transfer learning' %
            bot_model_dir)
        self.assertTrue(
            os.path.isfile(os.path.join(bot_model_dir, 'checkpoint')),
            'not checkpoints file in bot_model_dir %s after transfer learning'
            % bot_model_dir)

        # Mock a file for classification
        temp_file = tempfile.NamedTemporaryFile()
        temp_file.write(
            base64.b64encode(
                open(os.path.join(FILES_DIR, 'tulip.jpg'), "rb").read()))
        temp_file.seek(0)

        json_result, status = handler.handle_post(TEST_BOT_ID,
                                                  temp_file.read(),
                                                  return_labels=5)
        print(json_result)
        temp_file.close()

        self.assertTrue(json_result, 'Classification result is empty')

        json_result = json.loads(json_result)

        self.assertTrue(json_result['labels'],
                        'No labels in json result %s' % json_result)
        self.assertTrue(json_result['probabilities'],
                        'No predictions in json result %s' % json_result)

        print(json_result)

        # Clean the bot_model directory for next test run
        for file in os.listdir(bot_model_dir):
            file_path = os.path.join(bot_model_dir, file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(e)
def delete_bot_data(bot_id):
    """
    Delete all data of a bot in the filesystem if it exists
    """
    training_data_dir = dirs.get_training_data_dir(bot_id)
    protobuf_dir = dirs.get_protobuf_dir(bot_id)
    model_dir = dirs.get_model_data_dir(bot_id)

    if os.path.isdir(training_data_dir):
        print('[Training Data Service] - Deleteting %s' % training_data_dir)
        rmtree(training_data_dir)

    if os.path.isdir(protobuf_dir):
        print('[Training Data Service] - Deleteting %s' % protobuf_dir)
        rmtree(protobuf_dir)

    if os.path.isdir(model_dir):
        print('[Training Data Service] - Deleteting %s' % model_dir)
        rmtree(model_dir)

    return 'Successfully Deleted Data for Bot %s' % bot_id, 200
示例#9
0
def infere(bot_id,
           image_file,
           network_name='inception_v4',
           return_labels=None,
           prediction_dict=[]):
    """
    Loads the corresponding model checkpoint, network function and preprocessing routine based on bot_id and network_name,
    restores the graph and runs it to the prediction enpoint with the image as input
    :param bot_id: bot_id, used to reference to correct model directory
    :param image_file: reference to the temporary image file to be classified
    :param network_name: name of the network type to be used
    :param return_labels: number of labels to return
    :return: the top n labels with probabilities, where n = return_labels
    """

    # Get the model path
    model_path = dirs.get_model_data_dir(bot_id)

    # Get number of classes to predict
    protobuf_dir = dirs.get_protobuf_dir(bot_id)
    number_of_classes = dataset_utils.get_number_of_classes_by_labels(
        protobuf_dir)

    if not return_labels:
        return_labels = number_of_classes

    # Get the preprocessing and network construction functions
    preprocessing_fn = preprocessing_factory.get_preprocessing(
        network_name, is_training=False)
    network_fn = network_factory.get_network_fn(network_name,
                                                number_of_classes)

    # Process the temporary image file into a Tensor of shape [widht, height, channels]
    image_tensor = tf.gfile.FastGFile(image_file, 'rb').read()
    image_tensor = tf.image.decode_image(image_tensor, channels=0)

    # Perform preprocessing and reshape into [network.default_width, network.default_height, channels]
    network_default_size = network_fn.default_image_size
    image_tensor = preprocessing_fn(image_tensor, network_default_size,
                                    network_default_size)

    # Create an input batch of size one from the preprocessed image
    input_batch = tf.reshape(image_tensor, [1, 299, 299, 3])

    # Create the network up to the Predictions Endpoint
    logits, endpoints = network_fn(input_batch)

    restorer = tf.train.Saver()

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        # Restore the variables of the network from the last checkpoint and run the graph
        restorer.restore(sess, tf.train.latest_checkpoint(model_path))
        sess.run(endpoints)

        # Get the numpy array of predictions out of the
        predictions = endpoints['Predictions'].eval()[0]
        sess.close()
        prediction_dict['predictions'] = map_predictions_to_labels(
            protobuf_dir, predictions, return_labels)
示例#10
0
def eval(bot_id,
         bot_suffix='',
         setting_id=None,
         validation_setting=2,
         dataset_split='validation',
         dataset_name='bot',
         model_name='inception_v4',
         preprocessing=None,
         moving_average_decay=None,
         tf_master=''):
    full_id = bot_id + bot_suffix
    if setting_id:
        protobuf_dir = dirs.get_transfer_proto_dir(bot_id, validation_setting)
        model_dir = dirs.get_transfer_model_dir(full_id, setting_id)
    else:
        protobuf_dir = dirs.get_protobuf_dir(bot_id)
        model_dir = dirs.get_model_data_dir(full_id)

    _check_dir(protobuf_dir)
    _check_dir(model_dir)

    print("READIND FROM %s AND %s" % (protobuf_dir, model_dir))

    performance_data_dir = dirs.get_performance_data_dir(bot_id)
    #    if os.listdir(performance_data_dir):
    #        raise ValueError('%s is not empty' % performance_data_dir)

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(dataset_name, dataset_split,
                                              protobuf_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            model_name,
            num_classes=(dataset.num_classes - LABELS_OFFSET),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * BATCH_SIZE,
            common_queue_min=BATCH_SIZE)
        [image, label] = provider.get(['image', 'label'])
        label -= LABELS_OFFSET

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = preprocessing or model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = EVAL_IMAGE_SIZE or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch([image, label],
                                        batch_size=BATCH_SIZE,
                                        num_threads=NUM_THREADS,
                                        capacity=5 * BATCH_SIZE)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        if moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if MAX_NUM_BATCHES:
            num_batches = MAX_NUM_BATCHES
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples / float(BATCH_SIZE))

        if tf.gfile.IsDirectory(model_dir):
            checkpoint_path = tf.train.latest_checkpoint(model_dir)
        else:
            checkpoint_path = model_dir

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=tf_master,
            checkpoint_path=checkpoint_path,
            logdir=performance_data_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)
    def test_transfer_learning(self):
        # Root model to initialize from
        root_model_dir = dirs.get_test_root_model_dir()
        if not os.listdir(root_model_dir):
            print('root_model_dir %s empty. Cannot start test' %
                  root_model_dir)
            return None
        if not os.path.isfile(os.path.join(root_model_dir, 'checkpoint')):
            print('No Checkpoint File in %s. Cannot start test.' %
                  root_model_dir)
            return None

        # Folder to load the additional training data from
        bot_protobuf_dir = dirs.get_protobuf_dir(TEST_BOT_ID)
        if not os.path.isdir(bot_protobuf_dir):
            print('bot_protobuf_dir %s does not exist. Cannot start test' %
                  bot_protobuf_dir)
            return None
        if not os.listdir(bot_protobuf_dir):
            print("bot_protobuf_dir %s is empty. Cannot start test." %
                  bot_protobuf_dir)

        # Bot model folder to write the transfer learned model back to
        bot_model_dir = dirs.get_model_data_dir(TEST_BOT_ID)
        if not os.path.isdir(bot_model_dir):
            print('bot_model_dir %s does not exist. Cannot start test' %
                  bot_model_dir)
            return None
        if os.listdir(bot_model_dir):
            print('bot_model_dir %s is not emtpy. Cannot start test.' %
                  bot_model_dir)
            return None

        # Just run one step to make sure checkpoint files are written appropriately
        transfer_learning.transfer_learning(root_model_dir=root_model_dir,
                                            bot_model_dir=bot_model_dir,
                                            protobuf_dir=bot_protobuf_dir,
                                            max_train_time_sec=100,
                                            log_every_n_steps=2)

        # Check if the root model dir is still intact
        self.assertTrue(
            os.listdir(root_model_dir),
            'root_model_dir %s is empty after transfer learning.' %
            root_model_dir)
        self.assertTrue(
            os.path.isfile(os.path.join(root_model_dir, 'checkpoint')),
            'checkpoints file in root_model_dir %s is gone after transfer learning.'
            % root_model_dir)

        # Check if the bot model dir contains a model now
        self.assertTrue(
            os.listdir(bot_model_dir),
            'bot_model_dir %s is empty after transfer learning' %
            bot_model_dir)
        self.assertTrue(
            os.path.isfile(os.path.join(bot_model_dir, 'checkpoint')),
            'not checkpoints file in bot_model_dir %s after transfer learning'
            % bot_model_dir)

        # Mock a file for classification
        temp_file = tempfile.NamedTemporaryFile()
        temp_file.write(
            base64.b64encode(
                open(os.path.join(FILES_DIR, 'tulip.jpg'), "rb").read()))
        temp_file.seek(0)

        json_result, status = handler.handle_post(TEST_BOT_ID,
                                                  temp_file.read(),
                                                  return_labels=5)
        print(json_result)
        temp_file.close()

        self.assertTrue(json_result, 'Classification result is empty')

        json_result = json.loads(json_result)

        self.assertTrue(json_result['labels'],
                        'No labels in json result %s' % json_result)
        self.assertTrue(json_result['probabilities'],
                        'No predictions in json result %s' % json_result)

        print(json_result)

        # Clean the bot_model directory for next test run
        for file in os.listdir(bot_model_dir):
            file_path = os.path.join(bot_model_dir, file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(e)