Python DataLoader примеры, get_data.DataLoader Python примеры использования

Пример #1

0

Показать файл

Файл: reporting.py Проект: asantinc/selective_search_roofs

    def __init__(self, report_name=None, method=None, 
                        folder_name=None, save_imgs=True, out_path=None, 
                        detections=None, in_path=None, 
                        detector_names=None, 
                        mergeFalsePos=False,
                        separateDetections=True,
                        vocGood=0.1):
        '''
        Will score the detections class it contains.

        Parameters:
        --------------------
        separateDetections: bool
            Whether the metal detections can count as true positives for the thatch 
            detections and viceversa
        mergeFalsePos: bool
            Whether we need to keep track of the good and bad detections of metal
            and thatch separately or not. We cannot separate them if we want to 
            train a single neural network to distinguish between both types of roofs 
            since the bad detections of either roof must not contain a positive detection
            of the other type of roof (it should be background)
        '''
        self.save_imgs = save_imgs
        #these two are related to saving the FP and TP for neural training
        self.mergeFalsePos=mergeFalsePos

        self.keep_detections_separate = separateDetections  

        #threholds to classify detections as False/True positives and Good/Bad detections(these are to train the neural network on it)
        self.VOC_threshold = utils.VOC_threshold #threshold to assign a detection as a true positive
        self.VOC_good_detection_threshold = dict()
        self.VOC_good_detection_threshold['metal'] = utils.VOC_threshold 
        self.VOC_good_detection_threshold['thatch'] = utils.VOC_threshold
        self.detection_portion_threshold = 0.50

        self.detections = detections    #detection class

        self.in_path = in_path          #the path from which the images are taken
        self.img_names = [f for f in listdir(self.in_path) if f.endswith('.jpg')]

        #the ground truth roofs for every image and roof type
        self.correct_roofs = dict()
        for roof_type in utils.ROOF_TYPES:
            self.correct_roofs[roof_type] = dict()
        for img_name in self.img_names:
            for roof_type in utils.ROOF_TYPES:
                self.correct_roofs[roof_type][img_name] = DataLoader.get_polygons(roof_type=roof_type, 
                                                            xml_name=img_name[:-3]+'xml' , xml_path=self.in_path)
                self.detections.update_roof_num(self.correct_roofs[roof_type][img_name], roof_type)

        #init the report file
        self.out_path = out_path
        if detector_names is not None:
            self.init_report(detector_names, report_name=report_name)

        #variables needed to pickle the FP and TP to a file that makes sense
        assert method is not None
        self.method = method
        self.folder_name = folder_name

Пример #2

0

Показать файл

Файл: neural_data_setup.py Проект: asantinc/selective_search_roofs

    def load_data(self, non_roofs=np.inf, roof_type=None):
        '''
        Parameters:
        ----------
        non_roofs: float
            Determines what proportion of non_roofs should be added to the dataset
        roof_type: string
            If roof_type equals 'metal' or 'thatch' we only load patches for 
            that type of roof. Otherwise, we load both types
        '''
        assert roof_type=='metal' or roof_type=='thatch' or roof_type=='Both'

        self.ground_truth_metal_thatch = DataLoader.get_all_patches_folder(merge_imgs=True)
        self.viola_metal_thatch = self.get_viola_positive_patches(self.thatch_metal_TP_viola_path)
        if roof_type != 'Both':
            print 'Will load {0} data only'.format(roof_type)
            self.viola_background = self.get_viola_background_patches(self.background_FP_viola_path, roof_type=roof_type)
        else:
            self.viola_background = self.get_viola_background_patches(self.background_FP_viola_path)

        #need to randomize the patches
        total_length = self.count_patches_helper()

        self.X = np.empty((total_length, 3, utils.PATCH_W, utils.PATCH_H), dtype='float32')
        self.y = np.empty((total_length), dtype='int32')
        self.failed_patches = 0

        #process the metal and thatch
        self.roof_types = [roof_type] if roof_type!='Both' else utils.ROOF_TYPES
        index = 0 
        for roof_type in self.roof_types:
            if len(self.roof_types) > 1:
                label = utils.ROOF_LABEL[roof_type]
            else:
                label = 1
            for data_source in [self.ground_truth_metal_thatch[roof_type], self.viola_metal_thatch[roof_type]]: 
                for patch in data_source:
                    index = self.process_patch(patch, label, index)                    

        #limit the number of background patches
        self.non_roof_limit = (non_roofs*index) + index
        #process the background
        label = utils.ROOF_LABEL['background']
        for patch in self.viola_background:
            if index > self.non_roof_limit: #if we have obtained enough non_roofs, break
                break
            index = self.process_patch(patch, label, index)

        #here we can add more random negative patches if needed

        #remove the end if index<len(X) -- some patches failed to load
        self.X = self.X[:index, :,:,:]
        self.X = self.X.astype(np.float32)
        self.y = self.y[:index]
        self.y = self.y.astype(np.int32)
        
        print np.bincount(self.y)
        self.X, self.y = sklearn.utils.shuffle(self.X, self.y, random_state=42)  # shuffle train data    
        return self.X, self.y

Пример #3

0

Показать файл

    def setup_augmented_patches():
        '''
        No division between different roof sizes: if a roof has a size that is off, we resize it
        Make them lie down, save patches to folder
        Augment patches, save them to augmented folder
        '''
        in_path = utils.get_path(in_or_out=utils.IN, data_fold=utils.TRAINING)
        out_path = utils.get_path(viola=True, in_or_out=utils.IN, data_fold=utils.TRAINING)

        img_names_list = [img_name for img_name in os.listdir(in_path) if img_name.endswith('.jpg')]

        for roof_type in ['metal', 'thatch']:
            for img_id, img_name in enumerate(img_names_list):

                print 'Processing image: {0}'.format(img_name)
                img_path = in_path+img_name

                polygon_list = DataLoader.get_polygons(roof_type=roof_type, xml_name=img_name[:-3]+'xml', xml_path=in_path, padding=)
                roof_patches = DataLoader.extract_patches(polygon_list, img_path=img_path, grayscale=True)

                for roof_id, roof_img in enumerate(roof_patches):
                    print 'Processing image {0}: roof {1}'.format(img_id, roof_id)
                       
                    #if it's vertical, make it lie down
                    if roof_img.shape[0] > roof_img.shape[1]:
                        roof_img = DataAugmentation.rotateImage(roof_img, clockwise=True)
                    
                    #write basic positive example to the right folder
                    general_path = '{0}{1}_{2}_{3}'.format(out_path, roof_type, img_name[:-4], roof_id)

                    #calculate and write the augmented images 
                    for i in range(4):
                        roof_img_cp = np.copy(roof_img)

                        if i == 1:
                            roof_img_cp = cv2.flip(roof_img_cp,flipCode=0)
                        elif i == 2:
                            roof_img_cp = cv2.flip(roof_img_cp,flipCode=1)
                        elif i==3:
                            roof_img_cp = cv2.flip(roof_img_cp,flipCode=-1)

                        write_to_path = '{0}_flip{1}.jpg'.format(general_path, i)
                        cv2.imwrite(write_to_path, roof_img_cp)

Пример #4

0

Показать файл

 def get_thatch_template(self):
     #get some thatch roof
     roof = None
     for img_name in listdir(self.in_path):
         if img_name.endswith('.jpg'):
             roofs = DataLoader().get_roofs(
                 self.in_path + img_name[:-3] + 'xml', '')
             for r in roofs:
                 if r.roof_type == 'thatch':
                     roof = r
                     break
             if roof is not None:
                 break
     #extract patch
     img = cv2.imread(self.in_path + img_name)
     template = img[roof.ymin:roof.ymin + roof.height,
                    roof.xmin:roof.xmin + roof.width]
     img = cv2.imwrite('thatch_template.jpg', template)

Пример #5

0

Показать файл

import numpy as np
import pandas as pd
from get_data import DataLoader
from LR import LR
if __name__ == "__main__":
    file_name = 'data/train.csv'
    test_file = 'data/test.csv'
    data_loader = DataLoader(file_name)
    raw_data = data_loader.get_data_as_df()
    data_dict = data_loader.get_data_by_month(raw_data)
    train_data, labels, mean_x, std_x = data_loader.get_final_data(data_dict)
    #x_train_set,label_train_set,\
    #x_validation,label_validation = data_loader.split_train_and_valid(train_data,labels)
    test_data = pd.read_csv(test_file, header=None, encoding='big5')
    test_data = test_data.iloc[:, 2:]
    test_data[test_data == 'NR'] = 0
    test_data = test_data.to_numpy()
    test_x = np.empty([240, 18 * 9], dtype=float)
    for i in range(240):
        test_x[i] = test_data[18 * i:18 * (i + 1), :].reshape(1, -1)

    for i in range(len(test_x)):
        for j in range(len(test_x[0])):
            if std_x[j] != 0:
                test_x[i][j] = (test_x[i][j] - mean_x[j]) / std_x[j]
    test_x = np.concatenate((np.ones([240, 1]), test_x), axis=1).astype(float)

    linear_model = LR(train_data, labels)
    linear_model.train()
    linear_model.get_predict_csv(test_x)

Пример #6

0

Показать файл

Файл: data_augment.py Проект: asantinc/selective_search_roofs

        margin_0 = img.shape[0] - dst_shape[0]
        margin_1 = img.shape[1] - dst_shape[1]

        margin_0 = np.random.randint(0, margin_0)
        margin_1 = np.random.randint(0, margin_1)

        min_0 = np.random.randint(0, margin_0) if margin_0 > 0 else 0
        min_1 = np.random.randint(0, margin_1) if margin_1 > 0 else 0

        patch = img[min_0 : (min_0 + dst_shape[0]), min_1 : (min_1 + dst_shape[1]), :]
        return patch


if __name__ == "__main__":
    path = utils.get_path(data_fold=utils.TRAINING, in_or_out=utils.IN)
    roofs = DataLoader.get_all_patches_folder(folder_path=path, grayscale=False, merge_imgs=False)
    for img_name, roof_types in roofs.iteritems():
        for roof_type, roof_list in roof_types.iteritems():
            print roof_type
            if roof_type == "metal":
                continue
            for i, roof in enumerate(roof_list):
                cv2.imwrite("debug/{}_{}_1_{}.jpg".format(img_name, i, "normal"), roof)
                roof = utils.resize_rgb(roof, w=utils.PATCH_H, h=utils.PATCH_W)
                # rotate it
                # roof = Augmenter().random_full_rotation(roof)
                # cv2.imwrite('debug/{}_{}_2_{}.jpg'.format(img_name, i, 'rotated'), roof)
                # flip it
                roof = Augmenter().random_flip(roof)
                cv2.imwrite("debug/{}_{}_3_{}.jpg".format(img_name, i, "flip"), roof)
                # crop it

Пример #7

0

Показать файл

import os
from matplotlib import pyplot as plt

if __name__ == '__main__':
    full_path = utils.get_path(in_or_out=utils.IN,
                               data_fold=utils.TRAINING,
                               full_dataset=True)
    xml_files = [f for f in os.listdir(full_path) if f.endswith('.xml')]
    roof_types = set()
    roof_polygons = dict()
    total_metal = 0
    total_thatch = 0
    total_tiled = 0
    for xml_file in xml_files[:10]:
        img_name = xml_file[:-4]
        roof_polygons[img_name] = DataLoader.get_all_roofs_full_dataset(
            merge_tiled=True, xml_name=xml_file, xml_path=full_path)
        roof_types.update(roof_polygons[img_name].keys())

        try:
            print full_path + img_name + '.jpg'
            image = cv2.imread(full_path + img_name + '.jpg')
        except IOError as e:
            print e
            sys.exit()

        for r, roof_type in enumerate(roof_polygons[img_name].keys()):
            if roof_type == 'thatch':
                color = (255, 0, 0)
                total_thatch += len(roof_polygons[img_name][roof_type])
            elif roof_type == 'metal':
                color = (0, 255, 0)

Пример #8

0

Показать файл

Файл: neural_data_setup.py Проект: asantinc/roof-detect-network

    def load_data(self, non_roofs=None, roof_type=None, starting_batch=0):
        '''
        Parameters:
        ----------
        non_roofs: float
            Determines what proportion of non_roofs should be added to the dataset
        roof_type: string
            If roof_type equals 'metal' or 'thatch' we only load patches for 
            that type of roof. Otherwise, we load both types
        starting_batch: int
            when doing an ensemble, we specify which batch we want to start picking up data from
        '''
        assert roof_type=='metal' or roof_type=='thatch' or roof_type=='Both'
        #First get the positive patches
        self.ground_truth_metal_thatch = DataLoader.get_all_patches_folder(merge_imgs=True, full_dataset=self.full_dataset)
        self.viola_metal_thatch = self.get_viola_positive_patches(self.thatch_metal_TP_viola_path)
        total_length = self.count_patches_helper(non_roofs)

        self.X = np.empty((total_length, 3, utils.PATCH_W, utils.PATCH_H), dtype='float32')
        self.y = np.empty((total_length), dtype='int32')
        self.failed_patches = 0

        #process the metal and thatch
        self.roof_types = [roof_type] if roof_type!='Both' else utils.ROOF_TYPES
        index = 0 
        for roof_type in self.roof_types:
            if len(self.roof_types) > 1:
                label = utils.ROOF_LABEL[roof_type]
            else:
                label = 1
            for data_source in [self.ground_truth_metal_thatch[roof_type], self.viola_metal_thatch[roof_type]]: 
                for patch in data_source:
                    index = self.process_patch(patch, label, index)                    

        

        #limit the number of background patches
        self.non_roof_limit = (non_roofs*index) + index
        #BACKGROUND
        if self.method == 'slide': #self.full_dataset: #if we want the full dataset (sliding window only( then we have to access it in batches)
            self.viola_background = self.get_background_patches_from_batches(self.background_FP_viola_path, roof_type, starting_batch=starting_batch)
        else:
            if roof_type != 'Both':
                print 'Will load {0} data only'.format(roof_type)
                self.viola_background = self.get_viola_background_patches(self.background_FP_viola_path, roof_type=roof_type)
            else:
                self.viola_background = self.get_viola_background_patches(self.background_FP_viola_path)
        label = utils.ROOF_LABEL['background']

        for patch in self.viola_background:
            if index > self.non_roof_limit: #if we have obtained enough non_roofs, break
                break
            index = self.process_patch(patch, label, index)
        #here we can add more random negative patches if needed

        #remove the end if index<len(X) -- some patches failed to load
        self.X = self.X[:index, :,:,:]
        #self.X = self.X.astype(np.float32)
        self.y = self.y[:index]
        self.y = self.y.astype(np.int32)
        

        print np.bincount(self.y)
        self.X, self.y = sklearn.utils.shuffle(self.X, self.y, random_state=42)  # shuffle train data    

        #utils.debug_data(self.X, self.y, index, roof_type, flip(batch_size=128))
        return self.X, self.y

Пример #9

0

Показать файл

Файл: data_augment.py Проект: younia/roof-detect-network

        margin_0 = np.random.randint(0, margin_0)
        margin_1 = np.random.randint(0, margin_1)

        min_0 = np.random.randint(0, margin_0) if margin_0 > 0 else 0
        min_1 = np.random.randint(0, margin_1) if margin_1 > 0 else 0

        patch = img[min_0:(min_0 + dst_shape[0]),
                    min_1:(min_1 + dst_shape[1]), :]
        return patch


if __name__ == '__main__':
    path = utils.get_path(data_fold=utils.TRAINING, in_or_out=utils.IN)
    roofs = DataLoader.get_all_patches_folder(folder_path=path,
                                              grayscale=False,
                                              merge_imgs=False)
    for img_name, roof_types in roofs.iteritems():
        for roof_type, roof_list in roof_types.iteritems():
            print roof_type
            if roof_type == 'metal':
                continue
            for i, roof in enumerate(roof_list):
                cv2.imwrite(
                    'debug/{}_{}_1_{}.jpg'.format(img_name, i, 'normal'), roof)
                roof = utils.resize_rgb(roof, w=utils.PATCH_H, h=utils.PATCH_W)
                #rotate it
                #roof = Augmenter().random_full_rotation(roof)
                #cv2.imwrite('debug/{}_{}_2_{}.jpg'.format(img_name, i, 'rotated'), roof)
                #flip it
                roof = Augmenter().random_flip(roof)

Пример #10

0

Показать файл

    def load_data(self, non_roofs=None, roof_type=None, starting_batch=0):
        '''
        Parameters:
        ----------
        non_roofs: float
            Determines what proportion of non_roofs should be added to the dataset
        roof_type: string
            If roof_type equals 'metal' or 'thatch' we only load patches for 
            that type of roof. Otherwise, we load both types
        starting_batch: int
            when doing an ensemble, we specify which batch we want to start picking up data from
        '''
        assert roof_type == 'metal' or roof_type == 'thatch' or roof_type == 'Both'
        #First get the positive patches
        self.ground_truth_metal_thatch = DataLoader.get_all_patches_folder(
            merge_imgs=True, full_dataset=self.full_dataset)
        self.viola_metal_thatch = self.get_viola_positive_patches(
            self.thatch_metal_TP_viola_path)
        total_length = self.count_patches_helper(non_roofs)

        self.X = np.empty((total_length, 3, utils.PATCH_W, utils.PATCH_H),
                          dtype='float32')
        self.y = np.empty((total_length), dtype='int32')
        self.failed_patches = 0

        #process the metal and thatch
        self.roof_types = [roof_type
                           ] if roof_type != 'Both' else utils.ROOF_TYPES
        index = 0
        for roof_type in self.roof_types:
            if len(self.roof_types) > 1:
                label = utils.ROOF_LABEL[roof_type]
            else:
                label = 1
            for data_source in [
                    self.ground_truth_metal_thatch[roof_type],
                    self.viola_metal_thatch[roof_type]
            ]:
                for patch in data_source:
                    index = self.process_patch(patch, label, index)

        #limit the number of background patches
        self.non_roof_limit = (non_roofs * index) + index
        #BACKGROUND
        if self.method == 'slide':  #self.full_dataset: #if we want the full dataset (sliding window only( then we have to access it in batches)
            self.viola_background = self.get_background_patches_from_batches(
                self.background_FP_viola_path,
                roof_type,
                starting_batch=starting_batch)
        else:
            if roof_type != 'Both':
                print 'Will load {0} data only'.format(roof_type)
                self.viola_background = self.get_viola_background_patches(
                    self.background_FP_viola_path, roof_type=roof_type)
            else:
                self.viola_background = self.get_viola_background_patches(
                    self.background_FP_viola_path)
        label = utils.ROOF_LABEL['background']

        for patch in self.viola_background:
            if index > self.non_roof_limit:  #if we have obtained enough non_roofs, break
                break
            index = self.process_patch(patch, label, index)
        #here we can add more random negative patches if needed

        #remove the end if index<len(X) -- some patches failed to load
        self.X = self.X[:index, :, :, :]
        #self.X = self.X.astype(np.float32)
        self.y = self.y[:index]
        self.y = self.y.astype(np.int32)

        print np.bincount(self.y)
        self.X, self.y = sklearn.utils.shuffle(
            self.X, self.y, random_state=42)  # shuffle train data

        #utils.debug_data(self.X, self.y, index, roof_type, flip(batch_size=128))
        return self.X, self.y

Пример #11

0

Показать файл

Файл: reporting.py Проект: younia/roof-detect-network

    def __init__(self,
                 report_name=None,
                 method=None,
                 full_dataset=True,
                 folder_name=None,
                 save_imgs=True,
                 out_path=None,
                 detections=None,
                 in_path=None,
                 detector_names=None,
                 mergeFalsePos=False,
                 separateDetections=True,
                 vocGood=0.1,
                 negThres=0.3,
                 auc_threshold=0.5,
                 correct_roofs=None,
                 img_names=None):
        '''
        Will score the detections class it contains.

        Parameters:
        --------------------
        separateDetections: bool
            Whether the metal detections can count as true positives for the thatch 
            detections and viceversa
        mergeFalsePos: bool
            Whether we need to keep track of the good and bad detections of metal
            and thatch separately or not. We cannot separate them if we want to 
            train a single neural network to distinguish between both types of roofs 
            since the bad detections of either roof must not contain a positive detection
            of the other type of roof (it should be background)
        '''
        self.TOTAL = 0
        self.save_imgs = save_imgs
        #these two are related to saving the FP and TP for neural training
        self.mergeFalsePos = mergeFalsePos
        self.auc_threshold = auc_threshold

        self.keep_detections_separate = separateDetections

        #threholds to classify detections as False/True positives and Good/Bad detections(these are to train the neural network on it)
        self.VOC_threshold = utils.VOC_threshold  #threshold to assign a detection as a true positive
        self.VOC_good_detection_threshold = dict()
        self.VOC_good_detection_threshold['metal'] = utils.VOC_threshold
        self.VOC_good_detection_threshold['thatch'] = utils.VOC_threshold
        self.detection_portion_threshold = 0.50
        self.negThres = negThres

        self.detections = detections
        self.in_path = in_path  #the path from which the images are taken
        if img_names is None:
            self.img_names = [
                f for f in listdir(self.in_path) if f.endswith('.jpg')
            ]
        else:
            self.img_names = img_names

        #the ground truth roofs for every image and roof type
        if correct_roofs is None:
            self.correct_roofs = dict()
            for roof_type in utils.ROOF_TYPES:
                self.correct_roofs[roof_type] = dict()
            self.full_dataset = full_dataset
            if full_dataset == False:
                for img_name in self.img_names:
                    for roof_type in utils.ROOF_TYPES:
                        #we receive polygons, so we convert them into boxes so we can use the fast scoring
                        temp_roofs = DataLoader.get_polygons(
                            roof_type=roof_type,
                            xml_name=img_name[:-3] + 'xml',
                            xml_path=self.in_path)
                        if len(temp_roofs) > 0:
                            self.correct_roofs[roof_type][
                                img_name] = utils.polygons2boxes(temp_roofs)
                        else:
                            self.correct_roofs[roof_type][img_name] = []
                        self.detections.update_roof_num(
                            self.correct_roofs[roof_type][img_name], roof_type)
            else:
                for img_name in self.img_names:
                    current_roofs = DataLoader.get_all_roofs_full_dataset(
                        xml_name=img_name[:-3] + 'xml', xml_path=self.in_path)
                    for roof_type in utils.ROOF_TYPES:
                        if roof_type not in current_roofs:
                            self.correct_roofs[roof_type][img_name] = []
                        else:
                            self.correct_roofs[roof_type][
                                img_name] = current_roofs[roof_type]

                        self.detections.update_roof_num(
                            self.correct_roofs[roof_type][img_name], roof_type)
        else:
            self.correct_roofs = correct_roofs

        #init the report file
        self.out_path = out_path
        if report_name is not None:
            if detector_names is not None:
                self.init_report(detector_names, report_name=report_name)

        #variables needed to pickle the FP and TP to a file that makes sense
        assert method is not None
        self.method = method
        self.folder_name = folder_name

Python DataLoader примеры использования