def convert(bot_id):
    training_data_dir = dirs.get_training_data_dir(bot_id)
    protobuf_dir = dirs.get_protobuf_dir(bot_id)
    print('Converting training data for %s' % bot_id)
    start_time = time.time()
    if _check_training_dir(training_data_dir) and _check_proto_dir(
            protobuf_dir):
        converter.run(training_data_dir, protobuf_dir, fract_validation=0.2)
    print('Converted training data for %s in %s sec' %
          (bot_id, (time.time() - start_time)))
示例#2
0
 def test_get_bot_id_from_dir(self):
     bmw_models_bot_id = 'bmw_models'
     training_data_dir = dirs.get_training_data_dir(bmw_models_bot_id)
     protobuf_dir = dirs.get_protobuf_dir(bmw_models_bot_id)
     model_dir = dirs.get_model_data_dir(bmw_models_bot_id)
     bot_id = dirs.get_bot_id_from_dir(training_data_dir)
     self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match')
     bot_id = dirs.get_bot_id_from_dir(protobuf_dir)
     self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match')
     bot_id = dirs.get_bot_id_from_dir(model_dir)
     self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match')
def _create_label_folders(bot_id, labels, resume_from):
    training_data_dir = dirs.get_training_data_dir(bot_id)
    if os.listdir(training_data_dir) and not resume_from:
        print('Overwriting current training data in %s' % training_data_dir)
        shutil.rmtree(training_data_dir)
        os.mkdir(training_data_dir)
        for label in labels:
            os.mkdir(os.path.join(training_data_dir, label))
    if os.listdir(training_data_dir) and resume_from:
        print('Resuming from %s. Label Folders exist.' % resume_from)
    if not os.listdir(training_data_dir):
        print('Creating file structure for training data in %s' % training_data_dir)
        for label in labels:
            os.mkdir(os.path.join(training_data_dir, label))
 def test_get_split_size(self):
     bmw_model_bot_id = 'bmw_models'
     bmw_model_bot_dir = dirs.get_training_data_dir(bmw_model_bot_id)
     if not bmw_model_bot_dir:
         print(
             'Bot Training Data Dir %s is not available. Test cannot run.' %
             bmw_model_bot_dir)
         return None
     expected_train_set_size = 4099
     expected_val_set_size = 455
     train_split = utils.get_split_size(bmw_model_bot_dir, 'train')
     val_split = utils.get_split_size(bmw_model_bot_dir, 'validation')
     self.assertIn(expected_train_set_size - train_split, range(-1, 2))
     self.assertIn(expected_val_set_size - val_split, range(-1, 2))
def create_training_data_dir(bot_id: int, training_data_file):
    """
    Verify that the files training data directory is there and empty and write the zipped training data to it.
    """
    bot_training_dir = dirs.get_training_data_dir(bot_id)

    # If the training data directory is not there, create it
    if not os.path.exists(bot_training_dir):
        os.mkdir(bot_training_dir)

    # If the training data directory already contains data, don't do anything
    if os.listdir(bot_training_dir):
        return False

    # Extract the contents of the zip file to the training data directory

    ZipFile(training_data_file).extractall(bot_training_dir)
    return True
    def test_run(self):
        protobuf_dir = dirs.get_protobuf_dir(BOT_ID)
        training_data_dir = dirs.get_training_data_dir(BOT_ID)
        if not os.listdir(training_data_dir):
            print("Cannot start test. No data in %s" & training_data_dir)
            return
        if not os.path.exists(protobuf_dir):
            os.mkdir(protobuf_dir)
        if os.listdir(protobuf_dir):
            shutil.rmtree(protobuf_dir)
            os.mkdir(protobuf_dir)

        converter.run(training_data_dir, protobuf_dir, 0.1)

        # Check if the labels.txt has been created
        self.assertTrue(
            os.path.isfile(os.path.join(protobuf_dir, 'labels.txt')))

        # Make sure the labels file contains as mainy files as the training data folder has subfolders
        with open(os.path.join(protobuf_dir, 'labels.txt')) as f:
            for lndx, dir in enumerate(os.listdir(training_data_dir)):
                pass
            for fndx, ln in enumerate(f):
                pass
            self.assertEqual(lndx, fndx)

        # Make sure there are 10
        protofiles = 0
        training_files = 0
        validation_files = 0
        for file in os.listdir(protobuf_dir):
            if file.endswith('.tfrecord'):
                protofiles += 1
            if 'train' in file:
                training_files += 1
            if 'validation' in file:
                validation_files += 1
        self.assertEqual(10, protofiles)
        self.assertEqual(5, training_files)
        self.assertEqual(5, validation_files)

        if os.listdir(protobuf_dir):
            shutil.rmtree(protobuf_dir)
            os.mkdir(protobuf_dir)
def write_to_protobuffer(bot_id: int):
    """
    Read the data from the training data directory, convert them to protobuffer format and write them to the protobuffer
    directory
    """

    bot_training_data_dir = dirs.get_training_data_dir(bot_id)

    if not os.path.exists(bot_training_data_dir):
        return False

    bot_protobuf_dir = dirs.get_protobuf_dir(bot_id)

    if not os.path.exists(bot_protobuf_dir):
        os.mkdir(bot_protobuf_dir)

    converter.run(bot_training_data_dir, bot_protobuf_dir)

    return True
def delete_bot_data(bot_id):
    """
    Delete all data of a bot in the filesystem if it exists
    """
    training_data_dir = dirs.get_training_data_dir(bot_id)
    protobuf_dir = dirs.get_protobuf_dir(bot_id)
    model_dir = dirs.get_model_data_dir(bot_id)

    if os.path.isdir(training_data_dir):
        print('[Training Data Service] - Deleteting %s' % training_data_dir)
        rmtree(training_data_dir)

    if os.path.isdir(protobuf_dir):
        print('[Training Data Service] - Deleteting %s' % protobuf_dir)
        rmtree(protobuf_dir)

    if os.path.isdir(model_dir):
        print('[Training Data Service] - Deleteting %s' % model_dir)
        rmtree(model_dir)

    return 'Successfully Deleted Data for Bot %s' % bot_id, 200
示例#9
0
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
    """
    
    :param split_name: 
    :param dataset_dir: 
    :param file_pattern: 
    :param reader: 
    :return: 
    """

    if split_name not in ['train', 'validation']:
        raise ValueError('illegal split name %s ' % split_name)

    num_classes = dataset_utils.get_number_of_classes_by_labels(dataset_dir)

    if not num_classes:
        raise FileNotFoundError('Dataset in %s not Found' % dataset_dir)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    print("FILE PATTERN: %s" % file_pattern)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='png'),
        'image/class/label':
        tf.FixedLenFeature([],
                           tf.int64,
                           default_value=tf.zeros([], dtype=tf.int64)),
    }

    items_to_handlers = {
        'image': slim.tfexample_decoder.Image(),
        'label': slim.tfexample_decoder.Tensor('image/class/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)

    bot_id = dirs.get_bot_id_from_dir(dataset_dir)

    training_data_dir = dirs.get_training_data_dir(bot_id)
    print("READING TRAINING DATA FROM: %s" % training_data_dir)

    if not bot_id:
        raise ValueError('bot id not recognized from dataset_dir %s' %
                         dataset_dir)

    split_size = dataset_utils.get_split_size(training_data_dir, split_name,
                                              _SPLIT_FRAC)

    print("SPLIT SIZE: %s" % split_size)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=split_size,
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=num_classes,
                                labels_to_names=labels_to_names)
def sample(lst, n):
    sample = []
    ctr = 0
    N = len(lst)
    while ctr < n:
        index = randint(0, N - 1)
        sample.append(lst.pop(index))
        N = len(lst)
        print('Lentgh of list: %s \n Length of sample: %s' % (N, len(sample)))
        ctr += 1
    return sample


for car_class in car_classes:
    training_dir = os.path.join(dirs.get_training_data_dir('cars'), car_class)
    transfer_dir = os.path.join(dirs.get_transfer_data_dir('cars', _SETTING),
                                car_class)

    images = os.listdir(training_dir)
    image_sample = sample(images, _SAMPLE_SIZE)

    for image in image_sample:
        copyfile(os.path.join(training_dir, image),
                 os.path.join(transfer_dir, image))

for bmw_class in bmw_classes:
    training_dir = os.path.join(dirs.get_training_data_dir('bmw_models'),
                                bmw_class)
    transfer_dir = os.path.join(
        dirs.get_transfer_data_dir('bmw_models', _SETTING), bmw_class)
import unittest
from unittest import TestCase

import os
import shutil

from cnn_server.server import file_service as dirs
from cnn_server.training_data import training_data_service as service

FILES_DIR = 'files'
TRAINING_DATA_DIR = '/home/markus/projects/cnn_server/training_data/'
BOT_ID = 1
BOT_TRAINING_DATA_DIR = dirs.get_training_data_dir(BOT_ID)
BOT_PROTOBUF_DIR = dirs.get_protobuf_dir(BOT_ID)


class TestTrainingDatService(TestCase):
    def test_validate_training_data(self):

        # Read the ZIP Files
        valid_zip = os.path.join(FILES_DIR, 'valid_trainingdata.zip')
        invalid_zip_subfolder = os.path.join(
            FILES_DIR, 'invalid_training_data_subfolder.zip')
        invalid_zip_file = os.path.join(FILES_DIR,
                                        'invalid_training_data_file.zip')
        invalid_zip_emptysub = os.path.join(
            FILES_DIR, 'invalid_training_data_emptysub.zip')
        invalid_zip_emptysubend = os.path.join(
            FILES_DIR, 'invalid_training_data_emptysubend.zip')
        invalid_flowers = os.path.join(FILES_DIR, 'invalid_flower_photos.zip')
        some_file_path = os.path.join(FILES_DIR, 'some_file.txt')
def _get_image_path(bot_id, label, image_name, url):
    image_path = os.path.join(dirs.get_training_data_dir(bot_id), label)
    url_ending = re.sub(r"jpg.*", 'jpg', url.split('/')[-1])
    file_name = '%s_%s' % (image_name, url_ending)
    return os.path.join(image_path, file_name)
def _convert(bot_id):
    training_data_dir = dirs.get_training_data_dir(bot_id)
    protobuf_dir = dirs.get_protobuf_dir(bot_id)
    if _check_training_dir(training_data_dir) and _check_proto_dir(
            protobuf_dir):
        converter.run(training_data_dir, protobuf_dir)
    def test_get_filenames_and_classes(self):

        bmw3_exp = {'train': 2048, 'validation': 228}
        bmw5_exp = {'train': 515, 'validation': 57}
        bmw6_exp = {'train': 487, 'validation': 54}
        bmw7_exp = {'train': 1049, 'validation': 117}

        protobuf_dir = dirs.get_protobuf_dir(BOT_ID)
        training_data_dir = dirs.get_training_data_dir(BOT_ID)
        if not os.listdir(training_data_dir):
            print("Cannot start test. No data in %s" & training_data_dir)
            return
        if not os.path.exists(protobuf_dir):
            os.mkdir(protobuf_dir)
        if os.listdir(protobuf_dir):
            shutil.rmtree(protobuf_dir)
            os.mkdir(protobuf_dir)

        train, val, classes = converter._get_filenames_and_classes(
            training_data_dir, 0.1)
        bmw3_ctr_tr = 0
        bmw5_ctr_tr = 0
        bmw6_ctr_tr = 0
        bmw7_ctr_tr = 0
        for file in train:
            cl = os.path.basename(os.path.dirname(file))
            if cl == 'bmw3':
                bmw3_ctr_tr += 1
            elif cl == 'bmw5':
                bmw5_ctr_tr += 1
            elif cl == 'bmw6':
                bmw6_ctr_tr += 1
            elif cl == 'bmw7':
                bmw7_ctr_tr += 1

        bmw3_ctr_vl = 0
        bmw5_ctr_vl = 0
        bmw6_ctr_vl = 0
        bmw7_ctr_vl = 0
        for file in val:
            cl = os.path.basename(os.path.dirname(file))
            if cl == 'bmw3':
                bmw3_ctr_vl += 1
            elif cl == 'bmw5':
                bmw5_ctr_vl += 1
            elif cl == 'bmw6':
                bmw6_ctr_vl += 1
            elif cl == 'bmw7':
                bmw7_ctr_vl += 1
        self.assertIn(bmw3_exp['train'] - bmw3_ctr_tr, range(-2, 3))
        self.assertIn(bmw5_exp['train'] - bmw5_ctr_tr, range(-2, 3))
        self.assertIn(bmw6_exp['train'] - bmw6_ctr_tr, range(-2, 3))
        self.assertIn(bmw7_exp['train'] - bmw7_ctr_tr, range(-2, 3))
        self.assertIn(bmw3_exp['validation'] - bmw3_ctr_vl, range(-2, 3))
        self.assertIn(bmw5_exp['validation'] - bmw5_ctr_vl, range(-2, 3))
        self.assertIn(bmw6_exp['validation'] - bmw6_ctr_vl, range(-2, 3))
        self.assertIn(bmw7_exp['validation'] - bmw7_ctr_vl, range(-2, 3))

        if os.listdir(protobuf_dir):
            shutil.rmtree(protobuf_dir)
            os.mkdir(protobuf_dir)