示例#1
0
def create_pickle_train(image_path,
                        mask_path,
                        pkl_path,
                        img_pixel=10,
                        channels=3):
    m = 0
    n = 0
    # image_data = Multiband2Array(image_path)
    image_data = m1.Multiband2Array(image_path)
    # mask_data = cv2.split(cv2.imread(mask_path))[0] / 255
    # mask_data=np.asarray(Image.open(mask_path))//255

    mask_data = m1.Multiband2Array(mask_path) // 255

    x_size, y_size = image_data.shape[:2]

    data_list = []
    flag = True
    for i in range(0, x_size - img_pixel + 1, img_pixel // 2):  # 文件夹下的文件名
        if not flag: break
        if i + img_pixel > x_size:
            i = x_size - img_pixel - 1
        for j in range(0, y_size - img_pixel + 1, img_pixel // 2):
            if j + img_pixel > y_size:
                j = y_size - img_pixel - 1
            cropped_data = image_data[i:i + img_pixel, j:j + img_pixel]
            data1 = cropped_data.reshape(
                (-1, img_pixel * img_pixel * channels))  # 展成一行
            train_label = mask_data[i:i + img_pixel, j:j + img_pixel].max()
            # train_label = 1
            # train_label = mask_data[i:i + img_pixel, j:j + img_pixel].min()
            # train_label = int(mask_data[i:i + img_pixel, j:j + img_pixel].sum() / (img_pixel*img_pixel/2+1))
            data2 = np.append(data1, train_label)[np.newaxis, :]  # 数据+标签
            data_list.append(data2)

            m += 1
            if m >= 10000000:
                data_matrix = np.array(data_list, dtype=np.float32)
                data_matrix = data_matrix.reshape((-1, 301))
                with gzip.open(pkl_path + '_' + str(n) + '.pkl',
                               'wb') as writer:  # 以压缩包方式创建文件,进一步压缩文件
                    pickle.dump(data_matrix, writer)  # 数据存储成pickle文件
                data_list = []
                m = 0
                n += 1
                flag = False
                break
            # if m % 10000 == 0: print(datetime.datetime.now(), "compressed {number} images".format(number=m))
    # print(m)
    # data_matrix = np.array(data_list, dtype=int)
    if data_list != []:
        data_matrix = np.array(data_list, dtype=np.float32)
        data_matrix = data_matrix.reshape((-1, 301))
        data_matrix = data_matrix.astype(np.float32)
        # data_matrix = data_matrix.tostring()  # 转成byte,缩小文件大小
        with gzip.open(pkl_path + '.pkl',
                       'wb') as writer:  # 以压缩包方式创建文件,进一步压缩文件
            pickle.dump(data_matrix, writer)  # 数据存储成pickle文件
示例#2
0
def create_pickle_test(dir_name, img_pixel=60, channels=4, img_names=[]):
    flag = False
    for _, dirs, _ in os.walk(dir_name):
        for filename in dirs:  # 文件夹名 取文件名作为标签
            file_path = os.path.join(dir_name, filename)  # 文件夹路径
            # for _ , _,img in os.walk(file_path):
            for img_name in os.listdir(file_path):  # 文件夹下的文件名

                # img_names.append(img_name)  # 依次记录图像名

                imgae_path = os.path.join(file_path, img_name)  # 文件路径

                img = m1.Multiband2Array(
                    imgae_path)  # 使用GDAL方法读取影像 可以读取多于3个波段的影像

                data1 = img.reshape(
                    (-1, img_pixel * img_pixel * channels))  # 展成一行
                label = np.array([int(filename)])  # 文件名作为标签

                data2 = np.append(data1, label)[np.newaxis, :]  # 数据+标签

                data2 = data2.tostring()  # 转成byte,缩小文件大小
                data2 = zlib.compress(data2)  # 使用zlib将数据进一步压缩

                if flag == False:
                    data = data2
                if flag == True:
                    data = np.vstack((data, data2))  # 上下合并
                flag = True

    with gzip.open(dir_name + 'test_data.pkl',
                   'wb') as writer:  # 以压缩包方式创建文件,进一步压缩文件
        pickle.dump(data, writer)  # 数据存储成pickle文件
示例#3
0
def create_pickle_train(dir_name, img_pixel=60, channels=4):
    flag = False
    for _, dirs, _ in os.walk(dir_name):
        for filename in dirs:  # 文件夹名 取文件名作为标签
            file_path = os.path.join(dir_name, filename)  # 文件夹路径
            # for _ , _,img in os.walk(file_path):
            for img_name in os.listdir(file_path):  # 文件夹下的文件名
                imgae_path = os.path.join(file_path, img_name)  # 文件路径

                img = m1.Multiband2Array(
                    imgae_path)  # 使用GDAL方法读取影像 可以读取多于3个波段的影像

                data1 = img.reshape(
                    (-1, img_pixel * img_pixel * channels))  # 展成一行
                label = np.array([int(filename)])  # 文件名作为标签

                data2 = np.append(data1, label)[np.newaxis, :]  # 数据+标签

                # data2=data2.tostring()  # 转成byte,缩小文件大小
                # data2=zlib.compress(data2) # 使用zlib将数据进一步压缩

                if flag == False:
                    data = data2
                if flag == True:
                    data = np.vstack((data, data2))  # 上下合并
                flag = True
    return data
示例#4
0
def create_gzip_train(dir_name,img_pixel=60,channels=4):
    with gzip.open(dir_name + 'train_data.txt.gz', 'wb') as writer:
        for _,dirs,_ in os.walk(dir_name):
            for filename in dirs: # 文件夹名 取文件名作为标签
                file_path=os.path.join(dir_name,filename) # 文件夹路径
                # for _ , _,img in os.walk(file_path):
                for img_name in os.listdir(file_path): # 文件夹下的文件名
                    imgae_path = os.path.join(file_path, img_name) # 文件路径

                    img=m1.Multiband2Array(imgae_path) # 使用GDAL方法读取影像 可以读取多于3个波段的影像

                    data1=img.reshape((-1,img_pixel*img_pixel*channels)) # 展成一行
                    label=np.array([int(filename)]) # 文件名作为标签

                    data2=np.append(data1,label)[np.newaxis,:] # 数据+标签

                    data2=data2.tostring()  # 转成byte,缩小文件大小
                    # data2=zlib.compress(data2) # 使用zlib将数据进一步压缩
                    writer.write(data2+b'\n')
示例#5
0
def create_pickle_train(dir_name,img_pixel=60,channels=4):
    flag_0=False
    flag_1=False
    n0 = 636  # 0类样本数
    n1 = 681  # 1类样本数
    n_0=1
    n_1=1 #记录每类样本数
    for _,dirs,_ in os.walk(dir_name):
        for filename in dirs: # 文件夹名 取文件名作为标签
            if filename=='0' and n_0<=n0:
                file_path=os.path.join(dir_name,filename) # 文件夹路径
                # for _ , _,img in os.walk(file_path):
                # cdef char* img_name
                for img_name in os.listdir(file_path): # 文件夹下的文件名
                    imgae_path = os.path.join(file_path, img_name) # 文件路径

                    img=m1.Multiband2Array(imgae_path) # 使用GDAL方法读取影像 可以读取多于3个波段的影像

                    data1=img.reshape((-1,img_pixel*img_pixel*channels)) # 展成一行
                    label=np.array([int(filename)]) # 文件名作为标签

                    data2=np.append(data1,label)[np.newaxis,:] # 数据+标签

                    data2=data2.tostring()  # 转成byte,缩小文件大小
                    data2=zlib.compress(data2) # 使用zlib将数据进一步压缩

                    if flag_0==False:
                        data=data2
                    if flag_0==True:
                        data=np.vstack((data,data2))  # 上下合并
                    flag_0 = True
                    n_0=n_0+1
                    if n_0>n0:
                        with gzip.open(dir_name + 'train_data.pkl', 'wb') as writer: # 以压缩包方式创建文件,进一步压缩文件
                            pickle.dump(data, writer)  # 数据存储成pickle文件
                        del data,data2
                        break

            if filename == '1' and n_1<n1:
                # print("1")
                file_path = os.path.join(dir_name, filename)  # 文件夹路径
                # for _ , _,img in os.walk(file_path):
                for img_name in os.listdir(file_path):  # 文件夹下的文件名
                    imgae_path = os.path.join(file_path, img_name)  # 文件路径

                    img = m1.Multiband2Array(imgae_path)  # 使用GDAL方法读取影像 可以读取多于3个波段的影像

                    data1 = img.reshape((-1, img_pixel * img_pixel * channels))  # 展成一行
                    label = np.array([int(filename)])  # 文件名作为标签

                    data2 = np.append(data1, label)[np.newaxis, :]  # 数据+标签

                    data2 = data2.tostring()  # 转成byte,缩小文件大小
                    data2 = zlib.compress(data2)  # 使用zlib将数据进一步压缩

                    if flag_1==False:
                        data=data2
                    if flag_1==True:
                        data=np.vstack((data,data2))  # 上下合并
                    flag_1 = True
                    n_1=n_1+1
                    if n_1>n1:
                        with gzip.open(dir_name + 'train_data_1.pkl', 'wb') as writer: # 以压缩包方式创建文件,进一步压缩文件
                            pickle.dump(data, writer)  # 数据存储成pickle文件
                        del data,data2
                        break