示例#1
0
def row_to_sample(row, schema, feature_cols, label_cols):
    from bigdl.util.common import Sample
    if label_cols:
        feature, label = convert_row_to_numpy(row, schema, feature_cols,
                                              label_cols)
        sample = Sample.from_ndarray(feature, label)
    else:
        feature, = convert_row_to_numpy(row, schema, feature_cols, label_cols)
        sample = Sample.from_ndarray(feature, np.array([0.0]))
    return sample
示例#2
0
def xshard_to_sample(data):
    from zoo.common.utils import Sample
    data = check_type_and_convert(data, allow_list=True, allow_tuple=False)
    features = data["x"]
    length = features[0].shape[0]
    if "y" in data:
        labels = data["y"]
    else:
        labels = np.array([[-1] * length])

    for i in range(length):
        fs = [feat[i] for feat in features]
        ls = [l[i] for l in labels]
        if len(fs) == 1:
            fs = fs[0]
        if len(ls) == 1:
            ls = ls[0]
        yield Sample.from_ndarray(fs, ls)
示例#3
0
    open(os.path.join(args.data_dir, "train_label.pkl"), "r"))
test_img = pickle.load(open(os.path.join(args.data_dir, "test_image.pkl"),
                            "r"))
test_lbl = pickle.load(open(os.path.join(args.data_dir, "test_label.pkl"),
                            "r"))

# 交换图像的维度适应 keras 并归一化像素值.
t_train_img = train_img.transpose((0, 1, 4, 2, 3)) / 225.0
t_test_img = test_img.transpose((0, 1, 4, 2, 3)) / 225.0

NUM_TRAIN_SMP, _, IMAGE_SIZE, _, NUM_IMAGE_CHANNEL = train_img.shape
NUM_TEST_SMP, NUM_CLASS_LABEL, _, _, _ = test_img.shape

# 将数据转为 RDD 的形式.
train_rdd = sc.parallelize(t_train_img).zip(sc.parallelize(
    train_lbl)).map(lambda (feature, label): Sample.from_ndarray(
        feature, label + 1)  # 如果用 keras.fit 则需要 -1.
                    )
test_rdd = sc.parallelize(t_test_img).zip(sc.parallelize(test_lbl)).map(
    lambda (feature, label): Sample.from_ndarray(feature, label + 1))

# 用 Zoo-Keras 定义模型的网络结构.
input_shape = (NUM_CLASS_LABEL, NUM_IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE)
both_input = Input(shape=input_shape)

convolve_net = Sequential()
convolve_net.add(
    Convolution2D(
        nb_filter=LAYER_1_NUM_CHANNEL,  # 通道: 4 -> 8.
        nb_row=CONVOLVE_1_KERNEL_SIZE,  # 尺寸: 32 - 9 + 1 = 24
        nb_col=CONVOLVE_1_KERNEL_SIZE,
        activation="relu",
示例#4
0
                            "rb"),
                       fix_imports=True)
test_lbl = pickle.load(open(os.path.join(args.data_dir, "test_label.pkl"),
                            "rb"),
                       fix_imports=True)

# Modelling structuring starts.
t_train_img = train_img.transpose((0, 1, 4, 2, 3)) / 225.0
t_test_img = test_img.transpose((0, 1, 4, 2, 3)) / 225.0

NUM_TRAIN_SMP, _, IMAGE_SIZE, _, NUM_IMAGE_CHANNEL = train_img.shape
NUM_TEST_SMP, NUM_CLASS_LABEL, _, _, _ = test_img.shape

# Making the RDD. (Resilient Distributed Datasets - DS for Apache Spark)
train_rdd = sc.parallelize(t_train_img).zip(
    sc.parallelize(train_lbl)).map(lambda featurelabel: Sample.from_ndarray(
        featurelabel[0], featurelabel[1] + 1))
test_rdd = sc.parallelize(t_test_img).zip(
    sc.parallelize(test_lbl)).map(lambda featurelabel: Sample.from_ndarray(
        featurelabel[0], featurelabel[1] + 1))

# Making a Zoo-Keras Pipeline with a CNN model.
input_shape = (NUM_CLASS_LABEL, NUM_IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE)
both_input = Input(shape=input_shape)

convolve_net = Sequential()
convolve_net.add(
    Convolution2D(
        nb_filter=LAYER_1_NUM_CHANNEL,  # 4 -> 8.
        nb_row=CONVOLVE_1_KERNEL_SIZE,  # Size: 32 - 9 + 1 = 24
        nb_col=CONVOLVE_1_KERNEL_SIZE,
        activation="relu",