def convert(bot_id): training_data_dir = dirs.get_training_data_dir(bot_id) protobuf_dir = dirs.get_protobuf_dir(bot_id) print('Converting training data for %s' % bot_id) start_time = time.time() if _check_training_dir(training_data_dir) and _check_proto_dir( protobuf_dir): converter.run(training_data_dir, protobuf_dir, fract_validation=0.2) print('Converted training data for %s in %s sec' % (bot_id, (time.time() - start_time)))
def test_get_bot_id_from_dir(self): bmw_models_bot_id = 'bmw_models' training_data_dir = dirs.get_training_data_dir(bmw_models_bot_id) protobuf_dir = dirs.get_protobuf_dir(bmw_models_bot_id) model_dir = dirs.get_model_data_dir(bmw_models_bot_id) bot_id = dirs.get_bot_id_from_dir(training_data_dir) self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match') bot_id = dirs.get_bot_id_from_dir(protobuf_dir) self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match') bot_id = dirs.get_bot_id_from_dir(model_dir) self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match')
def _create_label_folders(bot_id, labels, resume_from): training_data_dir = dirs.get_training_data_dir(bot_id) if os.listdir(training_data_dir) and not resume_from: print('Overwriting current training data in %s' % training_data_dir) shutil.rmtree(training_data_dir) os.mkdir(training_data_dir) for label in labels: os.mkdir(os.path.join(training_data_dir, label)) if os.listdir(training_data_dir) and resume_from: print('Resuming from %s. Label Folders exist.' % resume_from) if not os.listdir(training_data_dir): print('Creating file structure for training data in %s' % training_data_dir) for label in labels: os.mkdir(os.path.join(training_data_dir, label))
def test_get_split_size(self): bmw_model_bot_id = 'bmw_models' bmw_model_bot_dir = dirs.get_training_data_dir(bmw_model_bot_id) if not bmw_model_bot_dir: print( 'Bot Training Data Dir %s is not available. Test cannot run.' % bmw_model_bot_dir) return None expected_train_set_size = 4099 expected_val_set_size = 455 train_split = utils.get_split_size(bmw_model_bot_dir, 'train') val_split = utils.get_split_size(bmw_model_bot_dir, 'validation') self.assertIn(expected_train_set_size - train_split, range(-1, 2)) self.assertIn(expected_val_set_size - val_split, range(-1, 2))
def create_training_data_dir(bot_id: int, training_data_file): """ Verify that the files training data directory is there and empty and write the zipped training data to it. """ bot_training_dir = dirs.get_training_data_dir(bot_id) # If the training data directory is not there, create it if not os.path.exists(bot_training_dir): os.mkdir(bot_training_dir) # If the training data directory already contains data, don't do anything if os.listdir(bot_training_dir): return False # Extract the contents of the zip file to the training data directory ZipFile(training_data_file).extractall(bot_training_dir) return True
def test_run(self): protobuf_dir = dirs.get_protobuf_dir(BOT_ID) training_data_dir = dirs.get_training_data_dir(BOT_ID) if not os.listdir(training_data_dir): print("Cannot start test. No data in %s" & training_data_dir) return if not os.path.exists(protobuf_dir): os.mkdir(protobuf_dir) if os.listdir(protobuf_dir): shutil.rmtree(protobuf_dir) os.mkdir(protobuf_dir) converter.run(training_data_dir, protobuf_dir, 0.1) # Check if the labels.txt has been created self.assertTrue( os.path.isfile(os.path.join(protobuf_dir, 'labels.txt'))) # Make sure the labels file contains as mainy files as the training data folder has subfolders with open(os.path.join(protobuf_dir, 'labels.txt')) as f: for lndx, dir in enumerate(os.listdir(training_data_dir)): pass for fndx, ln in enumerate(f): pass self.assertEqual(lndx, fndx) # Make sure there are 10 protofiles = 0 training_files = 0 validation_files = 0 for file in os.listdir(protobuf_dir): if file.endswith('.tfrecord'): protofiles += 1 if 'train' in file: training_files += 1 if 'validation' in file: validation_files += 1 self.assertEqual(10, protofiles) self.assertEqual(5, training_files) self.assertEqual(5, validation_files) if os.listdir(protobuf_dir): shutil.rmtree(protobuf_dir) os.mkdir(protobuf_dir)
def write_to_protobuffer(bot_id: int): """ Read the data from the training data directory, convert them to protobuffer format and write them to the protobuffer directory """ bot_training_data_dir = dirs.get_training_data_dir(bot_id) if not os.path.exists(bot_training_data_dir): return False bot_protobuf_dir = dirs.get_protobuf_dir(bot_id) if not os.path.exists(bot_protobuf_dir): os.mkdir(bot_protobuf_dir) converter.run(bot_training_data_dir, bot_protobuf_dir) return True
def delete_bot_data(bot_id): """ Delete all data of a bot in the filesystem if it exists """ training_data_dir = dirs.get_training_data_dir(bot_id) protobuf_dir = dirs.get_protobuf_dir(bot_id) model_dir = dirs.get_model_data_dir(bot_id) if os.path.isdir(training_data_dir): print('[Training Data Service] - Deleteting %s' % training_data_dir) rmtree(training_data_dir) if os.path.isdir(protobuf_dir): print('[Training Data Service] - Deleteting %s' % protobuf_dir) rmtree(protobuf_dir) if os.path.isdir(model_dir): print('[Training Data Service] - Deleteting %s' % model_dir) rmtree(model_dir) return 'Successfully Deleted Data for Bot %s' % bot_id, 200
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): """ :param split_name: :param dataset_dir: :param file_pattern: :param reader: :return: """ if split_name not in ['train', 'validation']: raise ValueError('illegal split name %s ' % split_name) num_classes = dataset_utils.get_number_of_classes_by_labels(dataset_dir) if not num_classes: raise FileNotFoundError('Dataset in %s not Found' % dataset_dir) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) print("FILE PATTERN: %s" % file_pattern) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } items_to_handlers = { 'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) bot_id = dirs.get_bot_id_from_dir(dataset_dir) training_data_dir = dirs.get_training_data_dir(bot_id) print("READING TRAINING DATA FROM: %s" % training_data_dir) if not bot_id: raise ValueError('bot id not recognized from dataset_dir %s' % dataset_dir) split_size = dataset_utils.get_split_size(training_data_dir, split_name, _SPLIT_FRAC) print("SPLIT SIZE: %s" % split_size) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=split_size, items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=num_classes, labels_to_names=labels_to_names)
def sample(lst, n): sample = [] ctr = 0 N = len(lst) while ctr < n: index = randint(0, N - 1) sample.append(lst.pop(index)) N = len(lst) print('Lentgh of list: %s \n Length of sample: %s' % (N, len(sample))) ctr += 1 return sample for car_class in car_classes: training_dir = os.path.join(dirs.get_training_data_dir('cars'), car_class) transfer_dir = os.path.join(dirs.get_transfer_data_dir('cars', _SETTING), car_class) images = os.listdir(training_dir) image_sample = sample(images, _SAMPLE_SIZE) for image in image_sample: copyfile(os.path.join(training_dir, image), os.path.join(transfer_dir, image)) for bmw_class in bmw_classes: training_dir = os.path.join(dirs.get_training_data_dir('bmw_models'), bmw_class) transfer_dir = os.path.join( dirs.get_transfer_data_dir('bmw_models', _SETTING), bmw_class)
import unittest from unittest import TestCase import os import shutil from cnn_server.server import file_service as dirs from cnn_server.training_data import training_data_service as service FILES_DIR = 'files' TRAINING_DATA_DIR = '/home/markus/projects/cnn_server/training_data/' BOT_ID = 1 BOT_TRAINING_DATA_DIR = dirs.get_training_data_dir(BOT_ID) BOT_PROTOBUF_DIR = dirs.get_protobuf_dir(BOT_ID) class TestTrainingDatService(TestCase): def test_validate_training_data(self): # Read the ZIP Files valid_zip = os.path.join(FILES_DIR, 'valid_trainingdata.zip') invalid_zip_subfolder = os.path.join( FILES_DIR, 'invalid_training_data_subfolder.zip') invalid_zip_file = os.path.join(FILES_DIR, 'invalid_training_data_file.zip') invalid_zip_emptysub = os.path.join( FILES_DIR, 'invalid_training_data_emptysub.zip') invalid_zip_emptysubend = os.path.join( FILES_DIR, 'invalid_training_data_emptysubend.zip') invalid_flowers = os.path.join(FILES_DIR, 'invalid_flower_photos.zip') some_file_path = os.path.join(FILES_DIR, 'some_file.txt')
def _get_image_path(bot_id, label, image_name, url): image_path = os.path.join(dirs.get_training_data_dir(bot_id), label) url_ending = re.sub(r"jpg.*", 'jpg', url.split('/')[-1]) file_name = '%s_%s' % (image_name, url_ending) return os.path.join(image_path, file_name)
def _convert(bot_id): training_data_dir = dirs.get_training_data_dir(bot_id) protobuf_dir = dirs.get_protobuf_dir(bot_id) if _check_training_dir(training_data_dir) and _check_proto_dir( protobuf_dir): converter.run(training_data_dir, protobuf_dir)
def test_get_filenames_and_classes(self): bmw3_exp = {'train': 2048, 'validation': 228} bmw5_exp = {'train': 515, 'validation': 57} bmw6_exp = {'train': 487, 'validation': 54} bmw7_exp = {'train': 1049, 'validation': 117} protobuf_dir = dirs.get_protobuf_dir(BOT_ID) training_data_dir = dirs.get_training_data_dir(BOT_ID) if not os.listdir(training_data_dir): print("Cannot start test. No data in %s" & training_data_dir) return if not os.path.exists(protobuf_dir): os.mkdir(protobuf_dir) if os.listdir(protobuf_dir): shutil.rmtree(protobuf_dir) os.mkdir(protobuf_dir) train, val, classes = converter._get_filenames_and_classes( training_data_dir, 0.1) bmw3_ctr_tr = 0 bmw5_ctr_tr = 0 bmw6_ctr_tr = 0 bmw7_ctr_tr = 0 for file in train: cl = os.path.basename(os.path.dirname(file)) if cl == 'bmw3': bmw3_ctr_tr += 1 elif cl == 'bmw5': bmw5_ctr_tr += 1 elif cl == 'bmw6': bmw6_ctr_tr += 1 elif cl == 'bmw7': bmw7_ctr_tr += 1 bmw3_ctr_vl = 0 bmw5_ctr_vl = 0 bmw6_ctr_vl = 0 bmw7_ctr_vl = 0 for file in val: cl = os.path.basename(os.path.dirname(file)) if cl == 'bmw3': bmw3_ctr_vl += 1 elif cl == 'bmw5': bmw5_ctr_vl += 1 elif cl == 'bmw6': bmw6_ctr_vl += 1 elif cl == 'bmw7': bmw7_ctr_vl += 1 self.assertIn(bmw3_exp['train'] - bmw3_ctr_tr, range(-2, 3)) self.assertIn(bmw5_exp['train'] - bmw5_ctr_tr, range(-2, 3)) self.assertIn(bmw6_exp['train'] - bmw6_ctr_tr, range(-2, 3)) self.assertIn(bmw7_exp['train'] - bmw7_ctr_tr, range(-2, 3)) self.assertIn(bmw3_exp['validation'] - bmw3_ctr_vl, range(-2, 3)) self.assertIn(bmw5_exp['validation'] - bmw5_ctr_vl, range(-2, 3)) self.assertIn(bmw6_exp['validation'] - bmw6_ctr_vl, range(-2, 3)) self.assertIn(bmw7_exp['validation'] - bmw7_ctr_vl, range(-2, 3)) if os.listdir(protobuf_dir): shutil.rmtree(protobuf_dir) os.mkdir(protobuf_dir)