def load_data(self, non_roofs=np.inf, roof_type=None):
        '''
        Parameters:
        ----------
        non_roofs: float
            Determines what proportion of non_roofs should be added to the dataset
        roof_type: string
            If roof_type equals 'metal' or 'thatch' we only load patches for 
            that type of roof. Otherwise, we load both types
        '''
        assert roof_type=='metal' or roof_type=='thatch' or roof_type=='Both'

        self.ground_truth_metal_thatch = DataLoader.get_all_patches_folder(merge_imgs=True)
        self.viola_metal_thatch = self.get_viola_positive_patches(self.thatch_metal_TP_viola_path)
        if roof_type != 'Both':
            print 'Will load {0} data only'.format(roof_type)
            self.viola_background = self.get_viola_background_patches(self.background_FP_viola_path, roof_type=roof_type)
        else:
            self.viola_background = self.get_viola_background_patches(self.background_FP_viola_path)

        #need to randomize the patches
        total_length = self.count_patches_helper()

        self.X = np.empty((total_length, 3, utils.PATCH_W, utils.PATCH_H), dtype='float32')
        self.y = np.empty((total_length), dtype='int32')
        self.failed_patches = 0

        #process the metal and thatch
        self.roof_types = [roof_type] if roof_type!='Both' else utils.ROOF_TYPES
        index = 0 
        for roof_type in self.roof_types:
            if len(self.roof_types) > 1:
                label = utils.ROOF_LABEL[roof_type]
            else:
                label = 1
            for data_source in [self.ground_truth_metal_thatch[roof_type], self.viola_metal_thatch[roof_type]]: 
                for patch in data_source:
                    index = self.process_patch(patch, label, index)                    

        #limit the number of background patches
        self.non_roof_limit = (non_roofs*index) + index
        #process the background
        label = utils.ROOF_LABEL['background']
        for patch in self.viola_background:
            if index > self.non_roof_limit: #if we have obtained enough non_roofs, break
                break
            index = self.process_patch(patch, label, index)

        #here we can add more random negative patches if needed

        #remove the end if index<len(X) -- some patches failed to load
        self.X = self.X[:index, :,:,:]
        self.X = self.X.astype(np.float32)
        self.y = self.y[:index]
        self.y = self.y.astype(np.int32)
        
        print np.bincount(self.y)
        self.X, self.y = sklearn.utils.shuffle(self.X, self.y, random_state=42)  # shuffle train data    
        return self.X, self.y
        margin_0 = img.shape[0] - dst_shape[0]
        margin_1 = img.shape[1] - dst_shape[1]

        margin_0 = np.random.randint(0, margin_0)
        margin_1 = np.random.randint(0, margin_1)

        min_0 = np.random.randint(0, margin_0) if margin_0 > 0 else 0
        min_1 = np.random.randint(0, margin_1) if margin_1 > 0 else 0

        patch = img[min_0 : (min_0 + dst_shape[0]), min_1 : (min_1 + dst_shape[1]), :]
        return patch


if __name__ == "__main__":
    path = utils.get_path(data_fold=utils.TRAINING, in_or_out=utils.IN)
    roofs = DataLoader.get_all_patches_folder(folder_path=path, grayscale=False, merge_imgs=False)
    for img_name, roof_types in roofs.iteritems():
        for roof_type, roof_list in roof_types.iteritems():
            print roof_type
            if roof_type == "metal":
                continue
            for i, roof in enumerate(roof_list):
                cv2.imwrite("debug/{}_{}_1_{}.jpg".format(img_name, i, "normal"), roof)
                roof = utils.resize_rgb(roof, w=utils.PATCH_H, h=utils.PATCH_W)
                # rotate it
                # roof = Augmenter().random_full_rotation(roof)
                # cv2.imwrite('debug/{}_{}_2_{}.jpg'.format(img_name, i, 'rotated'), roof)
                # flip it
                roof = Augmenter().random_flip(roof)
                cv2.imwrite("debug/{}_{}_3_{}.jpg".format(img_name, i, "flip"), roof)
                # crop it
        margin_0 = np.random.randint(0, margin_0)
        margin_1 = np.random.randint(0, margin_1)

        min_0 = np.random.randint(0, margin_0) if margin_0 > 0 else 0
        min_1 = np.random.randint(0, margin_1) if margin_1 > 0 else 0

        patch = img[min_0:(min_0 + dst_shape[0]),
                    min_1:(min_1 + dst_shape[1]), :]
        return patch


if __name__ == '__main__':
    path = utils.get_path(data_fold=utils.TRAINING, in_or_out=utils.IN)
    roofs = DataLoader.get_all_patches_folder(folder_path=path,
                                              grayscale=False,
                                              merge_imgs=False)
    for img_name, roof_types in roofs.iteritems():
        for roof_type, roof_list in roof_types.iteritems():
            print roof_type
            if roof_type == 'metal':
                continue
            for i, roof in enumerate(roof_list):
                cv2.imwrite(
                    'debug/{}_{}_1_{}.jpg'.format(img_name, i, 'normal'), roof)
                roof = utils.resize_rgb(roof, w=utils.PATCH_H, h=utils.PATCH_W)
                #rotate it
                #roof = Augmenter().random_full_rotation(roof)
                #cv2.imwrite('debug/{}_{}_2_{}.jpg'.format(img_name, i, 'rotated'), roof)
                #flip it
                roof = Augmenter().random_flip(roof)
    def load_data(self, non_roofs=None, roof_type=None, starting_batch=0):
        '''
        Parameters:
        ----------
        non_roofs: float
            Determines what proportion of non_roofs should be added to the dataset
        roof_type: string
            If roof_type equals 'metal' or 'thatch' we only load patches for 
            that type of roof. Otherwise, we load both types
        starting_batch: int
            when doing an ensemble, we specify which batch we want to start picking up data from
        '''
        assert roof_type=='metal' or roof_type=='thatch' or roof_type=='Both'
        #First get the positive patches
        self.ground_truth_metal_thatch = DataLoader.get_all_patches_folder(merge_imgs=True, full_dataset=self.full_dataset)
        self.viola_metal_thatch = self.get_viola_positive_patches(self.thatch_metal_TP_viola_path)
        total_length = self.count_patches_helper(non_roofs)

        self.X = np.empty((total_length, 3, utils.PATCH_W, utils.PATCH_H), dtype='float32')
        self.y = np.empty((total_length), dtype='int32')
        self.failed_patches = 0

        #process the metal and thatch
        self.roof_types = [roof_type] if roof_type!='Both' else utils.ROOF_TYPES
        index = 0 
        for roof_type in self.roof_types:
            if len(self.roof_types) > 1:
                label = utils.ROOF_LABEL[roof_type]
            else:
                label = 1
            for data_source in [self.ground_truth_metal_thatch[roof_type], self.viola_metal_thatch[roof_type]]: 
                for patch in data_source:
                    index = self.process_patch(patch, label, index)                    

        

        #limit the number of background patches
        self.non_roof_limit = (non_roofs*index) + index
        #BACKGROUND
        if self.method == 'slide': #self.full_dataset: #if we want the full dataset (sliding window only( then we have to access it in batches)
            self.viola_background = self.get_background_patches_from_batches(self.background_FP_viola_path, roof_type, starting_batch=starting_batch)
        else:
            if roof_type != 'Both':
                print 'Will load {0} data only'.format(roof_type)
                self.viola_background = self.get_viola_background_patches(self.background_FP_viola_path, roof_type=roof_type)
            else:
                self.viola_background = self.get_viola_background_patches(self.background_FP_viola_path)
        label = utils.ROOF_LABEL['background']

        for patch in self.viola_background:
            if index > self.non_roof_limit: #if we have obtained enough non_roofs, break
                break
            index = self.process_patch(patch, label, index)
        #here we can add more random negative patches if needed

        #remove the end if index<len(X) -- some patches failed to load
        self.X = self.X[:index, :,:,:]
        #self.X = self.X.astype(np.float32)
        self.y = self.y[:index]
        self.y = self.y.astype(np.int32)
        

        print np.bincount(self.y)
        self.X, self.y = sklearn.utils.shuffle(self.X, self.y, random_state=42)  # shuffle train data    

        #utils.debug_data(self.X, self.y, index, roof_type, flip(batch_size=128))
        return self.X, self.y
示例#5
0
    def load_data(self, non_roofs=None, roof_type=None, starting_batch=0):
        '''
        Parameters:
        ----------
        non_roofs: float
            Determines what proportion of non_roofs should be added to the dataset
        roof_type: string
            If roof_type equals 'metal' or 'thatch' we only load patches for 
            that type of roof. Otherwise, we load both types
        starting_batch: int
            when doing an ensemble, we specify which batch we want to start picking up data from
        '''
        assert roof_type == 'metal' or roof_type == 'thatch' or roof_type == 'Both'
        #First get the positive patches
        self.ground_truth_metal_thatch = DataLoader.get_all_patches_folder(
            merge_imgs=True, full_dataset=self.full_dataset)
        self.viola_metal_thatch = self.get_viola_positive_patches(
            self.thatch_metal_TP_viola_path)
        total_length = self.count_patches_helper(non_roofs)

        self.X = np.empty((total_length, 3, utils.PATCH_W, utils.PATCH_H),
                          dtype='float32')
        self.y = np.empty((total_length), dtype='int32')
        self.failed_patches = 0

        #process the metal and thatch
        self.roof_types = [roof_type
                           ] if roof_type != 'Both' else utils.ROOF_TYPES
        index = 0
        for roof_type in self.roof_types:
            if len(self.roof_types) > 1:
                label = utils.ROOF_LABEL[roof_type]
            else:
                label = 1
            for data_source in [
                    self.ground_truth_metal_thatch[roof_type],
                    self.viola_metal_thatch[roof_type]
            ]:
                for patch in data_source:
                    index = self.process_patch(patch, label, index)

        #limit the number of background patches
        self.non_roof_limit = (non_roofs * index) + index
        #BACKGROUND
        if self.method == 'slide':  #self.full_dataset: #if we want the full dataset (sliding window only( then we have to access it in batches)
            self.viola_background = self.get_background_patches_from_batches(
                self.background_FP_viola_path,
                roof_type,
                starting_batch=starting_batch)
        else:
            if roof_type != 'Both':
                print 'Will load {0} data only'.format(roof_type)
                self.viola_background = self.get_viola_background_patches(
                    self.background_FP_viola_path, roof_type=roof_type)
            else:
                self.viola_background = self.get_viola_background_patches(
                    self.background_FP_viola_path)
        label = utils.ROOF_LABEL['background']

        for patch in self.viola_background:
            if index > self.non_roof_limit:  #if we have obtained enough non_roofs, break
                break
            index = self.process_patch(patch, label, index)
        #here we can add more random negative patches if needed

        #remove the end if index<len(X) -- some patches failed to load
        self.X = self.X[:index, :, :, :]
        #self.X = self.X.astype(np.float32)
        self.y = self.y[:index]
        self.y = self.y.astype(np.int32)

        print np.bincount(self.y)
        self.X, self.y = sklearn.utils.shuffle(
            self.X, self.y, random_state=42)  # shuffle train data

        #utils.debug_data(self.X, self.y, index, roof_type, flip(batch_size=128))
        return self.X, self.y