def loadAllData(self, path_name):
     positive_data_images, positive_data_labels = load_dataset(
         path_name, 'traindata')
     negative_data_images, negative_data_labels = load_dataset(
         path_name, 'testdata')
     images = np.concatenate((positive_data_images, negative_data_images),
                             axis=0)
     labels = np.concatenate((positive_data_labels, negative_data_labels),
                             axis=0)
     return images, labels
Пример #2
0
    def load(self,
             img_rows=IMAGE_SIZE,
             img_cols=IMAGE_SIZE,
             img_channels=3,
             nb_classes=5):  #load data and preprocessing
        images, labels = load_dataset(self.path_name)

        train_images, valid_images, train_labels, valid_labels = train_test_split(
            images, labels, test_size=0.3, random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(
            images, labels, test_size=0.5, random_state=random.randint(0, 100))

        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            print(train_images.shape[0], 'train samples')
            print(valid_images.shape[0], 'valid samples')
            print(test_images.shape[0], 'test samples')

            #use the one hot coding method to vectorlize the labels
            train_labels = np_utils.to_categorical(train_labels, nb_classes)
            valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
            test_labels = np_utils.to_categorical(test_labels, nb_classes)

            #normlize the image as float format
            train_images = train_images.astype('float32')
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

            #normlize the image pixes into (0~1)
            train_images /= 255
            valid_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images = test_images
            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels = test_labels
Пример #3
0
 def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, 
          img_channels = 3, nb_classes = 5):
     #加载数据集到内存
     images, labels = load_dataset(self.path_name)        
     
     train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100))        
     _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5, random_state = random.randint(0, 100))                
     
     #当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels
     #这部分代码就是根据keras库要求的维度顺序重组训练数据集
     if K.image_dim_ordering() == 'th':
         train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
         valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols)
         test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
         self.input_shape = (img_channels, img_rows, img_cols)            
     else:
         train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
         valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
         test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
         self.input_shape = (img_rows, img_cols, img_channels)            
         
         #输出训练集、验证集、测试集的数量
         print(train_images.shape[0], 'train samples')
         print(valid_images.shape[0], 'valid samples')
         print(test_images.shape[0], 'test samples')
     
         #我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将
         #类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维
         train_labels = np_utils.to_categorical(train_labels, nb_classes)                        
         valid_labels = np_utils.to_categorical(valid_labels, nb_classes)            
         test_labels = np_utils.to_categorical(test_labels, nb_classes)                        
     
         #像素数据浮点化以便归一化
         train_images = train_images.astype('float32')            
         valid_images = valid_images.astype('float32')
         test_images = test_images.astype('float32')
         
         #将其归一化,图像的各像素值归一化到0~1区间
         train_images /= 255
         valid_images /= 255
         test_images /= 255            
     
         self.train_images = train_images
         self.valid_images = valid_images
         self.test_images  = test_images
         self.train_labels = train_labels
         self.valid_labels = valid_labels
         self.test_labels  = test_labels
Пример #4
0
 def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, 
          img_channels = 3, nb_classes = 2):
     #Load the data set into memory
     images, labels = load_dataset(self.path_name,self.username)        
     
     train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100))        
     _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5, random_state = random.randint(0, 100))                
     
     #if the shape is'th',then the order of putting images is:channels,rows,cols,otherwise :rows,cols,channels
     if K.image_dim_ordering() == 'th':
         train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
         valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols)
         test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
         self.input_shape = (img_channels, img_rows, img_cols)            
     else:
         train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
         valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
         test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
         self.input_shape = (img_rows, img_cols, img_channels)            
         
         #output
         print(train_images.shape[0], 'train samples')
         print(valid_images.shape[0], 'valid samples')
         print(test_images.shape[0], 'test samples')
     
         #Modle of loss function: categorical_crossentropy
         #Use nb_classes to trans to 2D
         train_labels = np_utils.to_categorical(train_labels, nb_classes)                        
         valid_labels = np_utils.to_categorical(valid_labels, nb_classes)            
         test_labels = np_utils.to_categorical(test_labels, nb_classes)                        
     
         #Pixel data floating point for normalization
         train_images = train_images.astype('float32')            
         valid_images = valid_images.astype('float32')
         test_images = test_images.astype('float32')
         
         #put into [0,1]
         train_images /= 255
         valid_images /= 255
         test_images /= 255            
     
         self.train_images = train_images
         self.valid_images = valid_images
         self.test_images  = test_images
         self.train_labels = train_labels
         self.valid_labels = valid_labels
         self.test_labels  = test_labels
Пример #5
0
def mainProgram(dataset):
    # load data
    X, Y = loadData.load_dataset(dataset)
    fold = 10
    kf = KFold(n_splits=fold)
    current_fold = 0
    acc = 0
    optimized_acc = 0

    # clusters
    X_train_clusters = []
    Y_train_clusters = []
    for train, test in kf.split(X):
        X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
        X_train = preprocessing.normalize(X_train)
        X_test = preprocessing.normalize(X_test)
        sil = []
        #dissimilarity would not be defined for a single cluster, thus, minimum number of clusters should be 2
        K = range(2, 20)
        for k in K:
            kmeans = KMeans(n_clusters=k).fit(X_train)
            labels = kmeans.labels_
            sil.append(silhouette_score(X_train, labels, metric='euclidean'))
        optimum_K = K[sil.index(max(sil))]
        plt.plot(K, sil, 'bx-')
        plt.xlabel('k')
        plt.ylabel('Silhouette Score')
        plt.title('Silhouette Dissimilarity Scores for various k')
        plt.axvline(x=optimum_K, color='r', linestyle='--')
        fig1 = plt.gcf()
        plt.show()
        plt.draw()
        fig1.savefig(dataset + '.png',
                     format='png',
                     bbox_inches='tight',
                     dpi=300)
        plt.close()
        kmeans = KMeans(n_clusters=optimum_K,
                        init='k-means++',
                        max_iter=300,
                        n_init=10,
                        random_state=0)
        kmeans.fit(X_train)
        count = len(np.unique(Y_train))
        for j in range(optimum_K):
            X_train_temp = X_train[kmeans.labels_ == j]
            Y_train_temp = Y_train[kmeans.labels_ == j]
            v = len(np.unique(Y_train_temp))
            if v > 1:
                X_train_clusters.append(X_train_temp)
                Y_train_clusters.append(Y_train_temp)
        count = int(len(X_test) / 5)
        valX = X_test[0:count]
        valy = Y_test[0:count]
        X_test = X_test[count:]
        Y_test = Y_test[count:]

        ensemble = trainClassifiers(X_train_clusters, Y_train_clusters)
        acc += decisionFusion(ensemble, X_test, Y_test)

        optimized_ensemble = optimizeEnsemble(ensemble, valX, valy)
        optimized_acc += decisionFusion(optimized_ensemble, X_test, Y_test)
        current_fold += 1
        print("Non_optimized and Optimized Accuracy for " + dataset + " is: " +
              str(acc / current_fold) + " and " +
              str(optimized_acc / current_fold))
    return ((acc / current_fold), (acc / current_fold))