def row_to_sample(row, schema, feature_cols, label_cols): from bigdl.util.common import Sample if label_cols: feature, label = convert_row_to_numpy(row, schema, feature_cols, label_cols) sample = Sample.from_ndarray(feature, label) else: feature, = convert_row_to_numpy(row, schema, feature_cols, label_cols) sample = Sample.from_ndarray(feature, np.array([0.0])) return sample
def xshard_to_sample(data): from zoo.common.utils import Sample data = check_type_and_convert(data, allow_list=True, allow_tuple=False) features = data["x"] length = features[0].shape[0] if "y" in data: labels = data["y"] else: labels = np.array([[-1] * length]) for i in range(length): fs = [feat[i] for feat in features] ls = [l[i] for l in labels] if len(fs) == 1: fs = fs[0] if len(ls) == 1: ls = ls[0] yield Sample.from_ndarray(fs, ls)
open(os.path.join(args.data_dir, "train_label.pkl"), "r")) test_img = pickle.load(open(os.path.join(args.data_dir, "test_image.pkl"), "r")) test_lbl = pickle.load(open(os.path.join(args.data_dir, "test_label.pkl"), "r")) # 交换图像的维度适应 keras 并归一化像素值. t_train_img = train_img.transpose((0, 1, 4, 2, 3)) / 225.0 t_test_img = test_img.transpose((0, 1, 4, 2, 3)) / 225.0 NUM_TRAIN_SMP, _, IMAGE_SIZE, _, NUM_IMAGE_CHANNEL = train_img.shape NUM_TEST_SMP, NUM_CLASS_LABEL, _, _, _ = test_img.shape # 将数据转为 RDD 的形式. train_rdd = sc.parallelize(t_train_img).zip(sc.parallelize( train_lbl)).map(lambda (feature, label): Sample.from_ndarray( feature, label + 1) # 如果用 keras.fit 则需要 -1. ) test_rdd = sc.parallelize(t_test_img).zip(sc.parallelize(test_lbl)).map( lambda (feature, label): Sample.from_ndarray(feature, label + 1)) # 用 Zoo-Keras 定义模型的网络结构. input_shape = (NUM_CLASS_LABEL, NUM_IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE) both_input = Input(shape=input_shape) convolve_net = Sequential() convolve_net.add( Convolution2D( nb_filter=LAYER_1_NUM_CHANNEL, # 通道: 4 -> 8. nb_row=CONVOLVE_1_KERNEL_SIZE, # 尺寸: 32 - 9 + 1 = 24 nb_col=CONVOLVE_1_KERNEL_SIZE, activation="relu",
"rb"), fix_imports=True) test_lbl = pickle.load(open(os.path.join(args.data_dir, "test_label.pkl"), "rb"), fix_imports=True) # Modelling structuring starts. t_train_img = train_img.transpose((0, 1, 4, 2, 3)) / 225.0 t_test_img = test_img.transpose((0, 1, 4, 2, 3)) / 225.0 NUM_TRAIN_SMP, _, IMAGE_SIZE, _, NUM_IMAGE_CHANNEL = train_img.shape NUM_TEST_SMP, NUM_CLASS_LABEL, _, _, _ = test_img.shape # Making the RDD. (Resilient Distributed Datasets - DS for Apache Spark) train_rdd = sc.parallelize(t_train_img).zip( sc.parallelize(train_lbl)).map(lambda featurelabel: Sample.from_ndarray( featurelabel[0], featurelabel[1] + 1)) test_rdd = sc.parallelize(t_test_img).zip( sc.parallelize(test_lbl)).map(lambda featurelabel: Sample.from_ndarray( featurelabel[0], featurelabel[1] + 1)) # Making a Zoo-Keras Pipeline with a CNN model. input_shape = (NUM_CLASS_LABEL, NUM_IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE) both_input = Input(shape=input_shape) convolve_net = Sequential() convolve_net.add( Convolution2D( nb_filter=LAYER_1_NUM_CHANNEL, # 4 -> 8. nb_row=CONVOLVE_1_KERNEL_SIZE, # Size: 32 - 9 + 1 = 24 nb_col=CONVOLVE_1_KERNEL_SIZE, activation="relu",