Пример #1
0
    elif usage == "PrivateTest":
        valImages.append(image)
        valLabels.append(label)
    # otherwise, this must be a testing image
    else:
        testImages.append(image)
        testLabels.append(label)

# construct a list pairing the training, validation, and testing
# images along with their corresponding labels and output HDF5 files
datasets = [(trainImages, trainLabels, config.TRAIN_HDF5),
            (valImages, valLabels, config.VAL_HDF5),
            (testImages, testLabels, config.TEST_HDF5)]

# loop over the dataset tuples
for (images, labels, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = hdf5datasetwriter.HDF5DatasetWriter((len(images), 48, 48),
                                                 outputPath)

    # loop over the image and add them to the dataset
    for (image, label) in zip(images, labels):
        writer.add([image], [label])

    # close the HDF5 writer
    writer.close()

# close the input file
f.close()
(trainPaths, valPaths, trainLabels, valLabels) = split

# 将数据构建一个list,方便写入HDF5文件中
datasets = [("train", trainPaths, trainLabels, config.TRAIN_HDF5),
            ("val", valPaths, valLabels, config.VAL_HDF5),
            ("test", testPaths, testLabels, config.TEST_HDF5)]

# 数据预处理
aap = AAP.AspectAwarePreprocessor(256, 256)
(R, G, B) = ([], [], [])

# 遍历数据集
for (dType, paths, labels, outputPath) in datasets:
    # HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF.HDF5DatasetWriter((len(paths), 256, 256, 3), outputPath)

    # 初始化进度条
    widgets = [
        "Building Dataset: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start()

    # 遍历路径
    for (i, (path, label)) in enumerate(zip(paths, labels)):
        # 读取数据并预处理
        image = cv2.imread(path)
        image = aap.preprocess(image)
bs = args['batch_size']
print("[INFO] loading image....")
imagePaths = list(paths.list_images(args['dataset']))
# 混洗图像路径集
random.shuffle(imagePaths)
# 从图像路径中提取标签
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)

# 加载 VGG16网络
print("[INFO] loading network.....")
model = VGG16(weights='imagenet', include_top=False)
# 初始化 HDF5数据写入模块
dataset = hdf5DW.HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7),
                                   args['output'],
                                   dataKey='features',
                                   bufSize=args['buffer_size'])
dataset.storeClassLabels(le.classes_)
# 初始化进度条
widgets = [
    'EXtracting Features: ',
    progressbar.Percentage(), " ",
    progressbar.Bar(), ' ',
    progressbar.ETA()
]
pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()
# 每batch_size遍历全量图像数据
for i in np.arange(0, len(imagePaths), bs):
    # 提取batch_size的图像以及标签数据
    batchPaths = imagePaths[i:i + bs]
    batchLabels = labels[i:i + bs]
Пример #4
0
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args['dataset']))
# 混洗数据
random.shuffle(imagePaths)
# 标签获取
labels = [p.split(os.path.sep)[-1].split(".")[0] for p in imagePaths]
# 编码编码化
le = LabelEncoder()
labels = le.fit_transform(labels)
print("[INFO] loading network...")
# imagenet上训练的权重
model = ResNet50(weights='imagenet', include_top=False)
#ResNet50的最后一个average pooling层的维度是2048
dataset = HDF.HDF5DatasetWriter((len(imagePaths), 2048),
                                args['output'],
                                dataKey='feature',
                                buffSize=args['buffer_size'])
dataset.storeClassLabels(le.classes_)

# 初始化进度条
widgets = [
    'Extracting Features: ',
    progressbar.Percentage(), ' ',
    progressbar.Bar(), ' ',
    progressbar.ETA()
]
pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

for i in np.arange(0, len(imagePaths), bs):
    #提取图像和标签
    batchPaths = imagePaths[i:i + bs]
valPaths = [os.path.sep.join([config.VAL_IMAGES, m[0]]) for m in M]
valLabels = le.transform([m[1] for m in M])

# 将数据组成一个列表,方便使用
datasets = [("train", trainPaths, trainLabels, config.TRAIN_HDF5),
            ("val", valPaths, valLabels, config.VAL_HDF5),
            ("test", testPaths, testLabels, config.TEST_HDF5)]

# 初始化RGB三个颜色通道
(R, G, B) = ([], [], [])

# 遍历数据元祖
for (dType, paths, labels, outputPath) in datasets:
    # 初始化HDF5写入
    print("[INFO] building {} ....".format(outputPath))
    writer = HDFW.HDF5DatasetWriter((len(paths), 64, 64, 3), outputPath)

    # 初始化进度条
    widgets = [
        'Building Dataset: ',
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start()
    # 遍历图像路径
    for (i, (path, label)) in enumerate(zip(paths, labels)):
        # 从磁盘中读取数据
        image = cv2.imread(path)

        #计算均值