Пример #1
0
class Data():
    def __init__(self):
        """
        Sets up all default requirements and placeholders 
        needed for the Caffe Acute Lymphoblastic Leukemia CNN data script.
        """

        self.Helpers = Helpers()
        self.confs = self.Helpers.loadConfs()
        self.logFile = self.Helpers.setLogFile(
            self.confs["Settings"]["Logs"]["allCNN"])

        self.CaffeHelpers = CaffeHelpers(self.confs, self.Helpers,
                                         self.logFile)

        self.Helpers.logMessage(self.logFile, "allCNN", "Status",
                                "Data init complete")

    def sortData(self):
        """
        Prepares the data ready for training.
        """

        self.CaffeHelpers.deleteLMDB()
        self.CaffeHelpers.sortLabels()
        self.CaffeHelpers.sortTrainingData()
        self.CaffeHelpers.recreatePaperData()
        self.CaffeHelpers.createTrainingLMDB()
        self.CaffeHelpers.createValidationLMDB()
        self.CaffeHelpers.computeMean()

        self.Helpers.logMessage(self.logFile, "allCNN", "Status",
                                "Data sorting complete")
Пример #2
0
class Data():
        
    ###############################################################
    #
    # Core Data class wrapper.
    #
    ###############################################################

    def __init__(self):
        
        ###############################################################
        #
        # Sets up all default requirements and placeholders 
        # needed for this class. 
        #
        ###############################################################
        
        self.Helpers = Helpers()
        self.confs = self.Helpers.loadConfs()
        self.logFile = self.Helpers.setLogFile(self.confs["Settings"]["Logs"]["DataLogDir"])
        
        self.DataProcess = DataProcess()
        self.labelsToName = {}
        
        self.Helpers.logMessage(self.logFile, "init", "INFO", "Init complete")

    def sortData(self):
        
        ###############################################################
        #
        # Sorts the data 
        #
        ###############################################################

        humanStart, clockStart = self.Helpers.timerStart()
        self.Helpers.logMessage(self.logFile, "sortData", "INFO", "Loading & preparing training data")
        
        dataPaths, classes = self.DataProcess.processFilesAndClasses()

        classId = [ int(i) for i in classes]
        classNamesToIds = dict(zip(classes, classId))

        # Divide the training datasets into train and test
        numValidation = int(self.confs["Classifier"]["ValidationSize"] * len(dataPaths))
        self.Helpers.logMessage(self.logFile, "sortData", "Validation Size", str(numValidation))
        self.Helpers.logMessage(self.logFile, "sortData", "Class Size", str(len(classes)))
        random.seed(self.confs["Classifier"]["RandomSeed"])
        random.shuffle(dataPaths)
        trainingFiles = dataPaths[numValidation:]
        validationFiles = dataPaths[:numValidation]

        # Convert the training and validation sets
        self.DataProcess.convertToTFRecord('train', trainingFiles, classNamesToIds)
        self.DataProcess.convertToTFRecord('validation', validationFiles, classNamesToIds)

        # Write the labels to file
        labelsToClassNames = dict(zip(classId, classes))
        self.DataProcess.writeLabels(labelsToClassNames)
        self.Helpers.logMessage(self.logFile, "sortData", "COMPLETE", "Completed sorting data!")
Пример #3
0
class Data():
        
    ###############################################################
    #
    # Core Data class.
    #
    ###############################################################

    def __init__(self):
        
        ###############################################################
        #
        # Sets up all default requirements and placeholders 
        # needed for this class. 
        #
        ###############################################################
        
        self.Helpers = Helpers()
        self.confs = self.Helpers.loadConfs()
        self.logFile = self.Helpers.setLogFile(self.confs["Settings"]["Logs"]["DataLogDir"])

    def getLabelsAndDirectories(self):
        
        ###############################################################
        #
        # Returns a list of classes/labels and directories. 
        #
        ###############################################################

        labels = [name for name in os.listdir(self.confs["Classifier"]["DatasetDir"]) if os.path.isdir(os.path.join(self.confs["Classifier"]["DatasetDir"], name)) and name != '.ipynb_checkpoints']

        directories = []
        for dirName in os.listdir(self.confs["Classifier"]["DatasetDir"]):
            if dirName != '.ipynb_checkpoints':
                path = os.path.join(self.confs["Classifier"]["DatasetDir"], dirName)
                if os.path.isdir(path):
                    directories.append(path)
        return labels, directories

    def processFilesAndClasses(self):
        
        ###############################################################
        #
        # Returns a list of filenames and classes/labels. 
        #
        ###############################################################

        labels, directories = self.getLabelsAndDirectories()
        
        data = []
        for directory in directories:
            for filename in os.listdir(directory):
                if os.path.splitext(filename)[1] in self.confs["Classifier"]["ValidIType"]:
                    data.append(os.path.join(directory, filename))
                else:
                    continue
        return data, sorted(labels)

    def convertToTFRecord(self, split_name, filenames, labels_to_ids):
        
        ###############################################################
        #
        # Converts the given filenames to a TFRecord dataset. 
        #
        ###############################################################
        
        assert split_name in ['train', 'validation']

        num_per_shard = int(math.ceil(len(filenames) / float(self.confs["Classifier"]["Shards"])))
        self.Helpers.logMessage(self.logFile, "convertToTFRecord", "INFO", "Number of files: " + str(len(filenames)))
        self.Helpers.logMessage(self.logFile, "convertToTFRecord", "INFO", "Number per shard: " + str(num_per_shard))

        with tf.Graph().as_default():
            image_reader = ImageReader()
            with tf.Session('') as sess:
                for shard_id in range(self.confs["Classifier"]["Shards"]):
                    output_filename = self.getDatasetFilename(split_name, shard_id)
                    self.Helpers.logMessage(self.logFile, "convertToTFRecord", "STATUS", "Saving: " + str(output_filename))
                    with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
                        start_ndx = shard_id * num_per_shard
                        end_ndx = min((shard_id+1) * num_per_shard, len(filenames))
                        for i in range(start_ndx, end_ndx):
                            sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
                                i+1, len(filenames), shard_id))
                            sys.stdout.flush()
                            print("")
                            # Read the filename:
                            image_data = tf.gfile.FastGFile(filenames[i], 'rb').read()
                            height, width = image_reader.read_image_dims(sess, image_data)
                            class_name = os.path.basename(os.path.dirname(filenames[i]))
                            class_id = labels_to_ids[class_name]
                            self.Helpers.logMessage(self.logFile, "convertToTFRecord", "INFO", "class_name: " + str(class_name))
                            self.Helpers.logMessage(self.logFile, "convertToTFRecord", "INFO", "class_id: " + str(class_id))
                            example = self.imageToTFExample(
                                image_data, b'jpg', height, width, class_id)
                            tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

    def getDatasetFilename(self, split_name, shard_id):
        
        ###############################################################
        #
        # Gets the dataset filename
        #
        ###############################################################

        output_filename = '%s_%s_%05d-of-%05d.tfrecord' % (
            self.confs["Classifier"]["TFRecordFile"], split_name, shard_id, self.confs["Classifier"]["Shards"])
        return os.path.join(self.confs["Classifier"]["DatasetDir"], output_filename)

    def int64Feature(self, values):
        
        ###############################################################
        #
        # Returns a TF-Feature of int64s
        #
        ###############################################################
        
        if not isinstance(values, (tuple, list)):
            values = [values]
        return tf.train.Feature(int64_list=tf.train.Int64List(value=values))

    def bytesFeature(self, values):
        
        ###############################################################
        #
        # Returns a TF-Feature of bytes
        #
        ###############################################################
        
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))

    def imageToTFExample(self, image_data, image_format, height, width, class_id):
        
        ###############################################################
        #
        # Converts an image to a TF Example
        #
        ###############################################################

        return tf.train.Example(features=tf.train.Features(feature={
            'image/encoded': self.bytesFeature(image_data),
            'image/format': self.bytesFeature(image_format),
            'image/class/label': self.int64Feature(class_id),
            'image/height': self.int64Feature(height),
            'image/width': self.int64Feature(width)
        }))

    def writeLabels(self, labels_to_labels):
        
        ###############################################################
        #
        # Writes a file with the list of class names
        #
        ###############################################################

        labels_filename = os.path.join(self.confs["Classifier"]["DatasetDir"], self.confs["Classifier"]["Labels"])

        with tf.gfile.Open(self.confs["Classifier"]["Classes"], 'w') as f:
            for label in labels_to_labels:
                f.write('%s\n' % (label))
                
        with tf.gfile.Open(labels_filename, 'w') as f:
            for label in labels_to_labels:
                class_name = labels_to_labels[label]
                f.write('%d:%s\n' % (label, class_name))
Пример #4
0
class Data():
    
    def __init__(self):

        """
        Sets up all default requirements and placeholders 
        needed for this class. 
        """
        
        self.Helpers = Helpers()
        self.confs = self.Helpers.loadConfs()
        self.fixed = tuple((self.confs["Settings"]["ImgDims"], self.confs["Settings"]["ImgDims"]))
        
        self.filesMade = 0
        self.trainingDir = self.confs["Settings"]["TrainDir"]
        
    def writeImage(self, filename, image):

        """
        Writes an image based on the filepath and the image provided.
        """

        if filename is None:
            print("Filename does not exist, file cannot be written.")
            return
            
        if image is None:
            print("Image does not exist, file cannot be written.")
            return
            
        try:
           cv2.imwrite(filename, image)
        except:
            print("File was not written! "+filename)
        
    def resize(self, filePath, savePath, show = False):

        """
        Writes an image based on the filepath and the image provided. 
        """

        image = cv2.resize(cv2.imread(filePath), self.fixed)
        self.writeImage(savePath, image)
        self.filesMade += 1
        print("Resized image written to: " + savePath)
        
        if show is True:
            plt.imshow(image)
            plt.show()
            
        return image

    def grayScale(self, image, grayPath, show = False):

        """
        Writes a grayscale copy of the image to the filepath provided. 
        """
        
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        self.writeImage(grayPath, gray)
        self.filesMade += 1
        print("Grayscaled image written to: " + grayPath)
        
        if show is True:
            plt.imshow(gray)
            plt.show()
            
        return image, gray

    def equalizeHist(self, gray, histPath, show = False):

        """
        Writes histogram equalized copy of the image to the filepath provided. 
        """
        
        hist = cv2.equalizeHist(gray)
        self.writeImage(histPath, cv2.equalizeHist(gray))
        self.filesMade += 1
        print("Histogram equalized image written to: " + histPath)
        
        if show is True:
            plt.imshow(hist)
            plt.show()
            
        return hist

    def reflection(self, image, horPath, verPath, show = False):

        """
        Writes reflected copies of the image to the filepath provided. 
        """
        
        horImg = cv2.flip(image, 0)
        self.writeImage(horPath, horImg)
        self.filesMade += 1
        print("Horizontally reflected image written to: " + horPath)
        
        if show is True:
            plt.imshow(horImg)
            plt.show()
            
        verImg = cv2.flip(image, 1)
        self.writeImage(verPath, verImg)
        self.filesMade += 1
        print("Vertical reflected image written to: " + verPath)
        
        if show is True:
            plt.imshow(verImg)
            plt.show()
            
        return horImg, verImg

    def gaussian(self, filePath, gaussianPath, show = False):

        """
        Writes gaussian blurred copy of the image to the filepath provided. 
        """
        
        gaussianBlur = ndimage.gaussian_filter(plt.imread(filePath), sigma=5.11)
        self.writeImage(gaussianPath, gaussianBlur)
        self.filesMade += 1
        print("Gaussian image written to: " + gaussianPath)

        if show is True:
            plt.imshow(gaussianBlur)
            plt.show()
            
        return gaussianBlur
            
    def translate(self, image, filePath, show = False):

        """
        Writes transformed copy of the image to the filepath provided. 
        """
        
        cols, rows, chs = image.shape

        translated = cv2.warpAffine(image, np.float32([[1, 0, 84], [0, 1, 56]]), (rows, cols), 
                                    borderMode=cv2.BORDER_CONSTANT, borderValue=(144, 159, 162))
        
        self.writeImage(filePath, translated)
        self.filesMade += 1
        print("Translated image written to: " + filePath)

        if show is True:
            plt.imshow(translated)
            plt.show()
            
        return translated
        
    def rotation(self, path, filePath, filename, show = False): 

        """
        Writes rotated copies of the image to the filepath provided. 
        """
        
        img = Image.open(filePath)

        image = cv2.imread(filePath)
        cols, rows, chs = image.shape

        for i in range(0, 20):
            randDeg = random.randint(-180, 180)
            matrix = cv2.getRotationMatrix2D((cols/2, rows/2), randDeg, 0.70)
            rotated = cv2.warpAffine(image, matrix, (rows, cols), borderMode=cv2.BORDER_CONSTANT, 
                                         borderValue=(144, 159, 162))
            fullPath = os.path.join(path, str(randDeg) + '-' + str(i) + '-' + filename)
        
            self.writeImage(fullPath, rotated)
            self.filesMade += 1
            print("Rotated image written to: " + fullPath)

            if show is True:
                plt.imshow(rotated)
                plt.show()

    def processDataset(self):

        """
        Runs all of the above functions saving the new dataset to the 
        Augmented directory. 
        """
        
        for directory in os.listdir(self.trainingDir):
            
            # Skip none data directories
            if(directory==".ipynb_checkpoints" or directory=="__pycache__"):
                continue
                
            self.filesMade = 0
            
            path = os.path.join(self.confs["Settings"]["TrainDir"], directory)
            sortedPath = os.path.join(self.confs["Settings"]["AugDir"], directory)
            
            # Stops program from crashing if augmented folders do not exist
            if not os.path.exists(sortedPath):
                os.makedirs(sortedPath)
            
            if os.path.isdir(path):
                fCount = 0
                for filename in os.listdir(path):
                    if filename.endswith('.jpg'):
                        
                        filePath = os.path.join(path, filename)
                        fileSortedPath = sortedPath+"/"+filename
                        
                        image = self.resize(filePath, fileSortedPath, False)
                        image, gray = self.grayScale(image, os.path.join(sortedPath, "Gray-"+filename), False)
                        
                        hist = self.equalizeHist(gray, os.path.join(sortedPath, "Hist-"+filename), False)
                        
                        horImg, verImg = self.reflection(image, os.path.join(sortedPath, "Hor-"+filename), 
                                                         os.path.join(sortedPath, "Ver-"+filename), False)
                        
                        gaussianBlur = self.gaussian(fileSortedPath, os.path.join(sortedPath, "Gaus-"+filename), False)
                        
                        translated = self.translate(image, os.path.join(sortedPath, "Trans-"+filename), False)
                        
                        self.rotation(sortedPath, fileSortedPath, filename)
                        fCount += 1
                        print("Total augmented files created so far " + str(self.filesMade))
                        print("")
                    else:
                        print("File was not jpg! "+filename)
                        continue
                        
                print("AML/ALL Augmentation: " + self.Helpers.currentDateTime())
                print("Added filters to " + str(fCount) + " files in the " + str(directory) + " directory")
                print("Total of " + str(self.filesMade) + " augmented files created.")
                print("")
Пример #5
0
class Classifier():
    def __init__(self):

        self.Helpers = Helpers()
        self.confs = self.Helpers.loadConfs()
        self.logFile = self.Helpers.setLogFile(
            self.confs["Settings"]["Logs"]["DataLogDir"])
        self.Helpers.logMessage(self.logFile, "init", "INFO", "Init complete")

        self.movidius = None

        self.mean = 128
        self.std = 1 / 128

        self.categories = []
        self.graphfile = None
        self.graph = None
        self.reqsize = None

        self.extensions = [".jpg", ".png"]

        self.CheckDevices()

    def CheckDevices(self):

        #mvnc.SetGlobalOption(mvnc.GlobalOption.LOGLEVEL, 2)
        devices = mvnc.EnumerateDevices()
        if len(devices) == 0:
            self.Helpers.logMessage(self.logFile, "CheckDevices", "WARNING",
                                    "No Movidius Devices Found")
            quit()
        self.movidius = mvnc.Device(devices[0])
        self.movidius.OpenDevice()
        self.Helpers.logMessage(self.logFile, "CheckDevices", "STATUS",
                                "Movidius Connected")

    def AllocateGraph(self, graphfile):

        self.graph = self.movidius.AllocateGraph(graphfile)

    def LoadInception(self):

        self.reqsize = self.confs["Classifier"]["ImageSize"]
        with open(self.confs["Classifier"]["NetworkPath"] +
                  self.confs["Classifier"]["InceptionGraph"],
                  mode='rb') as f:
            self.graphfile = f.read()
        self.AllocateGraph(self.graphfile)
        self.Helpers.logMessage(self.logFile, "LoadInception", "STATUS",
                                "Graph Allocated")

        with open(
                self.confs["Classifier"]["NetworkPath"] + 'Model/classes.txt',
                'r') as f:
            for line in f:
                cat = line.split('\n')[0]
                if cat != 'classes':
                    self.categories.append(cat)
            f.close()
        self.Helpers.logMessage(self.logFile, "LoadInception", "STATUS",
                                "Categories Loaded")
Пример #6
0
class Trainer():

    ###############################################################
    #
    # Trainer class
    #
    ###############################################################

    def __init__(self):

        ###############################################################
        #
        # Sets up all default requirements and placeholders
        # needed for this class.
        #
        ###############################################################

        self.Helpers = Helpers()
        self.confs = self.Helpers.loadConfs()
        self.logFile = self.Helpers.setLogFile(
            self.confs["Settings"]["Logs"]["DataLogDir"])

        self.labelsToName = {}

        self.Helpers.logMessage(self.logFile, "init", "INFO", "Init complete")

    def getSplit(self, split_name):

        ###############################################################
        #
        # Obtains the training/validation split
        #
        ###############################################################

        #Check whether the split_name is train or validation
        if split_name not in ['train', 'validation']:
            raise ValueError(
                'The split_name %s is not recognized. Please input either train or validation as the split_name'
                % (split_name))

        #Create the full path for a general FilePattern to locate the tfrecord_files
        FilePattern_path = os.path.join(
            self.confs["Classifier"]["DatasetDir"],
            self.confs["Classifier"]["FilePattern"] % (split_name))

        #Count the total number of examples in all of these shard
        num_samples = 0
        FilePattern_for_counting = '200label_' + split_name
        tfrecords_to_count = [
            os.path.join(self.confs["Classifier"]["DatasetDir"], file)
            for file in os.listdir(self.confs["Classifier"]["DatasetDir"])
            if file.startswith(FilePattern_for_counting)
        ]

        #print(tfrecords_to_count)
        for tfrecord_file in tfrecords_to_count:
            for record in tf.python_io.tf_record_iterator(tfrecord_file):
                num_samples += 1

        #Create a reader, which must be a TFRecord reader in this case
        reader = tf.TFRecordReader

        #Create the keys_to_features dictionary for the decoder
        keys_to_features = {
            'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
            'image/format':
            tf.FixedLenFeature((), tf.string, default_value='jpg'),
            'image/class/label':
            tf.FixedLenFeature([],
                               tf.int64,
                               default_value=tf.zeros([], dtype=tf.int64)),
        }

        #Create the items_to_handlers dictionary for the decoder.
        items_to_handlers = {
            'image': slim.tfexample_decoder.Image(),
            'label': slim.tfexample_decoder.Tensor('image/class/label'),
        }

        #Start to create the decoder
        decoder = slim.tfexample_decoder.TFExampleDecoder(
            keys_to_features, items_to_handlers)

        #Create the labels_to_name file
        labels_to_name_dict = self.labelsToName

        #Actually create the dataset
        dataset = slim.dataset.Dataset(
            data_sources=FilePattern_path,
            decoder=decoder,
            reader=reader,
            num_readers=4,
            num_samples=num_samples,
            num_classes=self.confs["Classifier"]["NumClasses"],
            labels_to_name=labels_to_name_dict,
            items_to_descriptions=self.items_to_descriptions)

        return dataset

    def loadBatch(self, dataset, is_training=True):

        ###############################################################
        #
        # Loads a batch for training
        #
        ###############################################################

        #First create the data_provider object
        data_provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            common_queue_capacity=24 +
            3 * self.confs["Classifier"]["BatchSize"],
            common_queue_min=24)

        #Obtain the raw image using the get method
        raw_image, label = data_provider.get(['image', 'label'])

        #Perform the correct preprocessing for this image depending if it is training or evaluating
        image = Classes.inception_preprocessing.preprocess_image(
            raw_image, self.confs["Classifier"]["ImageSize"],
            self.confs["Classifier"]["ImageSize"], is_training)

        #As for the raw images, we just do a simple reshape to batch it up
        raw_image = tf.image.resize_image_with_crop_or_pad(
            raw_image, self.confs["Classifier"]["ImageSize"],
            self.confs["Classifier"]["ImageSize"])

        #Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch.
        images, raw_images, labels = tf.train.batch(
            [image, raw_image, label],
            batch_size=self.confs["Classifier"]["BatchSize"],
            num_threads=4,
            capacity=4 * self.confs["Classifier"]["BatchSize"],
            allow_smaller_final_batch=True)

        return images, raw_images, labels