def top_level_task(): num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') print("shape: ", x_train.shape) input_tensor1 = Input(shape=(3, 32, 32), dtype="float32", name="input1") input_tensor2 = Input(shape=(3, 32, 32), dtype="float32", name="input2") ot1 = cifar_cnn_sub(input_tensor1, 1) model1 = Model(input_tensor1, ot1) print(model1.summary()) ot2 = cifar_cnn_sub(input_tensor2, 2) model2 = Model(input_tensor2, ot2) print(model2.summary()) output_tensor = Concatenate(axis=1)([model1.output, model2.output]) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu", name="conv2d_0_4")(output_tensor) output_tensor = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = Dense(512, activation="relu")(output_tensor) output_tensor = Dense(num_classes)(output_tensor) output_tensor = Activation("softmax")(output_tensor) model = Model([input_tensor1, input_tensor2], output_tensor) # print(model.summary()) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) print(model.summary()) model.fit([x_train, x_train], y_train, epochs=160, callbacks=[ VerifyMetrics(ModelAccuracy.CIFAR10_CNN), EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN) ])
def top_level_task(): backend.set_image_data_format('channels_first') num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') print("shape: ", x_train.shape) input_tensor1 = Input(shape=(3, 32, 32), dtype="float32") o1 = Conv2D(filters=32, input_shape=(3,32,32), kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(input_tensor1) o2 = Conv2D(filters=32, input_shape=(3,32,32), kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(input_tensor1) output_tensor = Concatenate(axis=1)([o1, o2]) output_tensor = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(output_tensor) output_tensor = MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid")(output_tensor) output_tensor = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(output_tensor) output_tensor = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="valid", activation="relu")(output_tensor) output_tensor = MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = Dense(512, activation="relu")(output_tensor) output_tensor = Dense(num_classes)(output_tensor) output_tensor = Activation("softmax")(output_tensor) model = Model({1: input_tensor1}, output_tensor) opt = optimizers.SGD(learning_rate=0.01) model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) print(model.summary()) model.fit(x_train, y_train, epochs=1)
def top_level_task(): num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 #x_train *= 0 #y_train = np.random.randint(1, 9, size=(num_samples,1), dtype='int32') y_train = y_train.astype('int32') print("shape: ", x_train.shape) model = Sequential() model.add(Conv2D(filters=32, input_shape=(3,32,32), kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu")) model.add(Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu")) model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid")) model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu")) model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu")) model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid")) model.add(Flatten()) model.add(Dense(512, activation="relu")) model.add(Dense(num_classes)) model.add(Activation("softmax")) print(model.summary()) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) model.compile(optimizer=opt) model.fit(x_train, y_train, epochs=1)
def top_level_task(): num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') print("shape: ", x_train.shape) model = Sequential() model.add(Conv2D(filters=32, input_shape=(3,32,32), kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu")) model.add(Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu")) model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid")) model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu")) model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding=(1,1), activation="relu")) model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding="valid")) model.add(Flatten()) model.add(Dense(512, activation="relu")) model.add(Dense(num_classes)) model.add(Activation("softmax")) opt = flexflow.keras.optimizers.SGD(learning_rate=0.02) model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) print(model.summary()) model.fit(x_train, y_train, epochs=80, callbacks=[VerifyMetrics(ModelAccuracy.CIFAR10_CNN), EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN)])
def top_level_task(): ffconfig = FFConfig() ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 229, 229] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) torch_model = PyTorchModel("alexnet.ff") output_tensors = torch_model.apply(ffmodel, [input_tensor]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label_tensor = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 y_train = y_train.astype('int32') full_label_np = y_train dataloader_input = ffmodel.create_data_loader(input_tensor, full_input_np) dataloader_label = ffmodel.create_data_loader(label_tensor, full_label_np) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time))
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 3, 229, 229] input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) onnx_model = ONNXModel("resnet18.onnx") t = onnx_model.apply(ffmodel, {"input.1": input}) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) label = ffmodel.label_tensor num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229,229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 y_train = y_train.astype('int32') full_label_np = y_train dataloader_input = ffmodel.create_data_loader(input, full_input_np) dataloader_label = ffmodel.create_data_loader(label, full_label_np) num_samples = dataloader_input.num_samples assert dataloader_input.num_samples == dataloader_label.num_samples ffmodel.init_layers() epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start); print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time)); perf_metrics = ffmodel.get_perf_metrics() accuracy = perf_metrics.get_accuracy() if accuracy < ModelAccuracy.CIFAR10_ALEXNET.value: assert 0, 'Check Accuracy'
def top_level_task(): ffconfig = FFConfig() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 3, 32, 32] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) output_tensors = file_to_ff("cnn.ff", ffmodel, [input_tensor, input_tensor]) t = ffmodel.softmax(output_tensors[0]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label_tensor = ffmodel.label_tensor num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train y_train = y_train.astype('int32') full_label_array = y_train dataloader_input = ffmodel.create_data_loader(input_tensor, full_input_array) dataloader_label = ffmodel.create_data_loader(label_tensor, full_label_array) num_samples = dataloader_input.num_samples ffmodel.init_layers() layers = ffmodel.get_layers() for layer in layers: print(layers[layer].name) layer = ffmodel.get_layer_by_name("relu_1") print(layer) epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time))
def cifar_cnn_concat(): num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 #x_train *= 0 #y_train = np.random.randint(1, 9, size=(num_samples,1), dtype='int32') y_train = y_train.astype('int32') print("shape: ", x_train.shape) input_tensor1 = Input(batch_shape=[0, 3, 32, 32], dtype="float32") input_tensor2 = Input(batch_shape=[0, 3, 32, 32], dtype="float32") ot1 = cifar_cnn_sub(input_tensor1, 1) ot2 = cifar_cnn_sub(input_tensor2, 2) ot3 = cifar_cnn_sub(input_tensor2, 3) output_tensor = Concatenate(axis=1)([ot1, ot2, ot3]) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) o1 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu", name="conv2d_0_4")(output_tensor) o2 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu", name="conv2d_1_4")(output_tensor) output_tensor = Concatenate(axis=1)([o1, o2]) output_tensor = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = Dense(512, activation="relu")(output_tensor) output_tensor = Dense(num_classes)(output_tensor) output_tensor = Activation("softmax")(output_tensor) model = Model([input_tensor1, input_tensor2], output_tensor) print(model.summary()) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) model.compile(optimizer=opt) model.fit([x_train, x_train], y_train, epochs=1)
def top_level_task(): num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') print("shape: ", x_train.shape) input_tensor1 = Input(shape=(3, 32, 32), dtype="float32") output_tensor1 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(input_tensor1) output_tensor1 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(output_tensor1) output_tensor1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor1) model1 = Model(input_tensor1, output_tensor1) input_tensor2 = Input(shape=(3, 32, 32), dtype="float32") output_tensor2 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(input_tensor2) output_tensor2 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(output_tensor2) output_tensor2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor2) output_tensor2 = Flatten()(output_tensor2) output_tensor2 = Dense(512, activation="relu")(output_tensor2) output_tensor2 = Dense(num_classes)(output_tensor2) output_tensor2 = Activation("softmax")(output_tensor2) model2 = Model(input_tensor2, output_tensor2) input_tensor3 = Input(shape=(3, 32, 32), dtype="float32") output_tensor3 = model1(input_tensor3) output_tensor3 = model2(output_tensor3) model = Model(input_tensor3, output_tensor3) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) print(model.summary()) model.fit(x_train, y_train, epochs=40, callbacks=[ VerifyMetrics(ModelAccuracy.CIFAR10_CNN), EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN) ])
def top_level_task(): ffconfig = FFConfig() ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) torch_model = PyTorchModel("cnn.ff") output_tensors = torch_model.apply(ffmodel, [input_tensor, input_tensor]) t = ffmodel.softmax(output_tensors[0]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) label = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train y_train = y_train.astype('int32') full_label_array = y_train dims_full_input = [num_samples, 3, 32, 32] full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_array) full_label.attach_numpy_array(ffconfig, full_label_array) dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() layers = ffmodel.get_layers() for layer in layers: print(layers[layer].name) layer = ffmodel.get_layer_by_name("relu_1") print(layer) epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() ffmodel.train((dataloader_input, dataloader_label), epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start); print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT) t1 = ffmodel.conv2d(input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t2 = ffmodel.conv2d(input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t3 = ffmodel.conv2d(input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.concat([t1, t2, t3], 1) ts = ffmodel.split(t, 3, 1) print("new", ts[0].handle.impl) t = ffmodel.conv2d(ts[1], 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d( t, 2, 2, 2, 2, 0, 0, ) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d(t, 2, 2, 2, 2, 0, 0) t = ffmodel.flat(t) t = ffmodel.dense(t, 512, ActiMode.AC_MODE_RELU) t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) print("end model", ts[0].handle.impl) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train print(full_input_array.__array_interface__["strides"]) y_train = y_train.astype('int32') full_label_array = y_train print(full_input_array.__array_interface__["strides"]) print(full_input_array.shape, full_label_array.shape) #print(full_input_array[0,:,:,:]) #print(full_label_array[0, 0:64]) print(full_label_array.__array_interface__["strides"]) dims_full_input = [num_samples, 3, 32, 32] full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_array) full_label.attach_numpy_array(ffconfig, full_label_array) dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() print("end init model", ts[0].handle.impl) epochs = ffconfig.get_epochs() #epochs = 10 ts_start = ffconfig.get_current_time() for epoch in range(0, epochs): dataloader_input.reset() dataloader_label.reset() ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) for iter in range(0, int(iterations)): dataloader_input.next_batch(ffmodel) dataloader_label.next_batch(ffmodel) if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) # perf_metrics = ffmodel.get_perf_metrics() # accuracy = perf_metrics.get_accuracy() # if accuracy < ModelAccuracy.CIFAR10_CNN.value: # assert 0, 'Check Accuracy' print("end", ts[0].handle.impl)
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 229, 229] #print(dims) input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT) # dims_label = [ffconfig.get_batch_size(), 1] # #print(dims) # label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32) kernel_init = GlorotUniformInitializer(123) bias_init = ZeroInitializer() # ts0 = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2, ActiMode.AC_MODE_NONE, True, kernel_init, bias_init) # ts1 = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2, ActiMode.AC_MODE_NONE, True, kernel_init, bias_init) # ts0 = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2) # ts1 = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2) t = ffmodel.conv2d(input, 64, 11, 11, 4, 4, 2, 2, ActiMode.AC_MODE_RELU, True, kernel_init, bias_init) #t = ffmodel.concat([ts0, ts1], 1) t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0) t = ffmodel.conv2d(t, 192, 5, 5, 1, 1, 2, 2, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0) t = ffmodel.conv2d(t, 384, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 256, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 256, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0) t = ffmodel.flat(t) t = ffmodel.dense(t, 4096, ActiMode.AC_MODE_RELU) t = ffmodel.dense(t, 4096, ActiMode.AC_MODE_RELU) t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.get_label_tensor() use_external = True if (use_external == True): num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image if (i == 0): print(image) full_input_np /= 255 print(full_input_np.shape) print(full_input_np.__array_interface__["strides"]) print(full_input_np[0, :, :, :]) y_train = y_train.astype('int32') full_label_np = y_train dims_full_input = [num_samples, 3, 229, 229] full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_np) full_label.attach_numpy_array(ffconfig, full_label_np) dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) #dataloader = DataLoader4D(ffmodel, input, label, full_input, full_label, num_samples) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() assert dataloader_input.get_num_samples( ) == dataloader_label.get_num_samples() else: # Data Loader dataloader = DataLoader4D(ffmodel, input, label, ffnetconfig=alexnetconfig) num_samples = dataloader.get_num_samples() # input.inline_map(ffconfig) # input_array = input.get_array(ffconfig, DataType.DT_FLOAT) # input_array *= 1.0 # print(input_array.shape) # input.inline_unmap(ffconfig) # label.inline_map(ffconfig) # label.inline_unmap(ffconfig) ffmodel.init_layers() # conv_2d1 = ffmodel.get_layer_by_id(11) # cbias_tensor = conv_2d1.get_weight_tensor() # input_tensor = conv_2d1.get_input_tensor_by_id(0) # cbias_tensor.inline_map(ffconfig) # cbias = cbias_tensor.get_array(ffconfig, DataType.DT_FLOAT) # # cbias += 0.125 # print(cbias.shape) # #print(cbias) # cbias_tensor.inline_unmap(ffconfig) #use_external = False epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() for epoch in range(0, epochs): if (use_external == True): dataloader_input.reset() dataloader_label.reset() else: dataloader.reset() ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) for iter in range(0, int(iterations)): if (use_external == True): dataloader_input.next_batch(ffmodel) dataloader_label.next_batch(ffmodel) else: dataloader.next_batch(ffmodel) if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) perf_metrics = ffmodel.get_perf_metrics() accuracy = perf_metrics.get_accuracy() if accuracy < ModelAccuracy.CIFAR10_ALEXNET.value: assert 0, 'Check Accuracy' #ffmodel.print_layers(13) conv_2d1 = ffmodel.get_layer_by_id(0) #cbias_tensor = conv_2d1.get_input_tensor() cbias_tensor = conv_2d1.get_input_tensor() cbias_tensor.inline_map(ffconfig) cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT) print(cbias.shape) print(cbias) #save_image(cbias, 2) cbias_tensor.inline_unmap(ffconfig) label.inline_map(ffconfig) label_array = label.get_flat_array(ffconfig, DataType.DT_INT32) print(label_array.shape) # print(cbias) print(label_array) label.inline_unmap(ffconfig)
def top_level_task(): num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image if (i == 0): print(image) full_input_np /= 255 y_train = y_train.astype('int32') full_label_np = y_train input_tensor = Input(shape=(3, 229, 229), dtype="float32") output = Conv2D(filters=64, input_shape=(3, 229, 229), kernel_size=(11, 11), strides=(4, 4), padding=(2, 2), activation="relu")(input_tensor) output = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(output) output = Conv2D(filters=192, kernel_size=(5, 5), strides=(1, 1), padding=(2, 2), activation="relu")(output) output = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(output) output = Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(output) output = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(output) output = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu")(output) output = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(output) output = Flatten()(output) output = Dense(4096, activation="relu")(output) output = Dense(4096, activation="relu")(output) output = Dense(10)(output) output = Activation("softmax")(output) model = Model(input_tensor, output) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) print(model.summary()) model.fit(full_input_np, full_label_np, epochs=160, callbacks=[ VerifyMetrics(ModelAccuracy.CIFAR10_ALEXNET), EpochVerifyMetrics(ModelAccuracy.CIFAR10_ALEXNET) ])
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) t = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d( t, 2, 2, 2, 2, 0, 0, ) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d(t, 2, 2, 2, 2, 0, 0) t = ffmodel.flat(t) t = ffmodel.dense(t, 512, ActiMode.AC_MODE_RELU) t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label_tensor = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train print(full_input_array.__array_interface__["strides"]) y_train = y_train.astype('int32') full_label_array = y_train print(full_input_array.__array_interface__["strides"]) print(full_input_array.shape, full_label_array.shape) #print(full_input_array[0,:,:,:]) #print(full_label_array[0, 0:64]) print(full_label_array.__array_interface__["strides"]) dataloader_input = ffmodel.create_data_loader(input_tensor, full_input_array) dataloader_label = ffmodel.create_data_loader(label_tensor, full_label_array) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) perf_metrics = ffmodel.get_perf_metrics() accuracy = perf_metrics.get_accuracy() if accuracy < ModelAccuracy.CIFAR10_CNN.value: assert 0, 'Check Accuracy'
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT) dims_label = [ffconfig.get_batch_size(), 1] label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32) num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 # x_train_t = x_train.transpose(3, 2, 1, 0) # # x_train = np.zeros((32,32,3,num_samples), dtype=np.float32) # # for i in range(0, num_samples): # for j in range(0, 3): # for k in range(0, 32): # for l in range(0, 32): # x_train[l][k][j][i] = x_train_t[l][k][j][i] full_input_array = x_train print(full_input_array.__array_interface__["strides"]) y_train = y_train.astype('int32') full_label_array = y_train print(full_input_array.__array_interface__["strides"]) print(full_input_array.shape, full_label_array.shape) print(full_label_array.__array_interface__["strides"]) next_batch(0, x_train, input, ffconfig) next_batch_label(0, y_train, label, ffconfig) t = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d("conv2", t, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d( "pool1", t, 2, 2, 2, 2, 0, 0, ) t = ffmodel.conv2d("conv3", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d("conv4", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d("pool2", t, 2, 2, 2, 2, 0, 0) t = ffmodel.flat("flat", t) t = ffmodel.dense("lienar1", t, 512, ActiMode.AC_MODE_RELU) t = ffmodel.dense("lienar1", t, 10) t = ffmodel.softmax("softmax", t, label) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.init_layers() epochs = ffconfig.get_epochs() #epochs = 10 ts_start = ffconfig.get_current_time() for epoch in range(0, epochs): ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) ct = 0 for iter in range(0, int(iterations)): next_batch(ct, x_train, input, ffconfig) next_batch_label(ct, y_train, label, ffconfig) ct += 1 if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) #ffmodel.print_layers(13) conv_2d1 = ffmodel.get_layer_by_id(0) cbias_tensor = conv_2d1.get_input_tensor() #cbias_tensor = conv_2d1.get_output_tensor() cbias_tensor.inline_map(ffconfig) cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT) print(cbias.shape) print(cbias) cbias_tensor.inline_unmap(ffconfig) label.inline_map(ffconfig) label_array = label.get_flat_array(ffconfig, DataType.DT_INT32) print(label_array.shape) # print(cbias) print(label_array) label.inline_unmap(ffconfig)
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT) dims_label = [ffconfig.get_batch_size(), 1] label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32) use_external = True if (use_external == True): num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 #x_train = x_train.transpose(2, 3, 1, 0) #full_input_array = np.zeros((x_train.shape[0], x_train.shape[1], x_train.shape[2], x_train.shape[3]), dtype=np.float32) full_input_array = x_train print(full_input_array.__array_interface__["strides"]) # ct = 0.0 # for i in range(0, x_train.shape[0]): # for j in range(0, x_train.shape[1]): # for k in range(0, x_train.shape[2]): # for l in range(0, x_train.shape[3]): # full_input_array[i, j, k, l] = x_train[i, j, k, l] # ct += 1 y_train = y_train.astype('int32') # y_train = y_train.transpose(1, 0) # full_label_array = np.zeros((y_train.shape[0], y_train.shape[1]), dtype=np.int32) # for i in range(0, y_train.shape[0]): # for j in range(0, y_train.shape[1]): # full_label_array[i, j] = y_train[i, j] full_label_array = y_train print(full_input_array.__array_interface__["strides"]) print(full_input_array.shape, full_label_array.shape) #print(full_input_array[0,:,:,:]) #print(full_label_array[0, 0:64]) print(full_label_array.__array_interface__["strides"]) dims_full_input = [num_samples, 3, 32, 32] full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_array) full_label.attach_numpy_array(ffconfig, full_label_array) dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() else: # Data Loader dataloader = DataLoader4D(ffmodel, input, label, ffnetconfig=alexnetconfig) num_samples = dataloader.get_num_samples() # t = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) # t = ffmodel.conv2d("conv2", t, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) # t = ffmodel.pool2d("pool1", t, 2, 2, 2, 2, 0, 0,) # t = ffmodel.conv2d("conv3", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) # t = ffmodel.conv2d("conv4", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) # t = ffmodel.pool2d("pool2", t, 2, 2, 2, 2, 0, 0) # t = ffmodel.flat("flat", t); # t = ffmodel.dense("lienar1", t, 512, ActiMode.AC_MODE_RELU) # t = ffmodel.dense("lienar1", t, 10) # t = ffmodel.softmax("softmax", t, label) t1 = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t1 = ffmodel.conv2d("conv2", t1, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t2 = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t2 = ffmodel.conv2d("conv2", t2, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t3 = ffmodel.conv2d("conv1", input, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t3 = ffmodel.conv2d("conv2", t3, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.concat("concat", [t1, t2, t3], 1) t = ffmodel.pool2d("pool1", t, 2, 2, 2, 2, 0, 0,) t1 = ffmodel.conv2d("conv3", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t2 = ffmodel.conv2d("conv3", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.concat("concat", [t1, t2], 1) t = ffmodel.conv2d("conv4", t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d("pool2", t, 2, 2, 2, 2, 0, 0) t = ffmodel.flat("flat", t); t = ffmodel.dense("lienar1", t, 512, ActiMode.AC_MODE_RELU) t = ffmodel.dense("lienar1", t, 10) t = ffmodel.softmax("softmax", t, label) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.init_layers() # conv_2d1 = ffmodel.get_layer_by_id(11) # cbias_tensor = conv_2d1.get_weight_tensor() # input_tensor = conv_2d1.get_input_tensor_by_id(0) # cbias_tensor.inline_map(ffconfig) # cbias = cbias_tensor.get_array(ffconfig, DataType.DT_FLOAT) # # cbias += 0.125 # print(cbias.shape) # #print(cbias) # cbias_tensor.inline_unmap(ffconfig) epochs = ffconfig.get_epochs() #epochs = 10 ts_start = ffconfig.get_current_time() for epoch in range(0,epochs): if (use_external == True): dataloader_input.reset() dataloader_label.reset() else: dataloader.reset() ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) for iter in range(0, int(iterations)): # if (len(alexnetconfig.dataset_path) == 0): # if (iter == 0 and epoch == 0): # dataloader.next_batch(ffmodel) # else: # dataloader.next_batch(ffmodel) if (use_external == True): dataloader_input.next_batch(ffmodel) dataloader_label.next_batch(ffmodel) else: dataloader.next_batch(ffmodel) if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start); print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time)); #ffmodel.print_layers(13) conv_2d1 = ffmodel.get_layer_by_id(0) cbias_tensor = conv_2d1.get_input_tensor() #cbias_tensor = conv_2d1.get_output_tensor() cbias_tensor.inline_map(ffconfig) cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT) print(cbias.shape) print(cbias) cbias_tensor.inline_unmap(ffconfig) label.inline_map(ffconfig) label_array = label.get_flat_array(ffconfig, DataType.DT_INT32) print(label_array.shape) # print(cbias) print(label_array) label.inline_unmap(ffconfig)
def inception(): ffconfig = FFConfig() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 3, 299, 299] #print(dims) input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) t = ffmodel.conv2d(input, 32, 3, 3, 2, 2, 0, 0) t = ffmodel.conv2d(t, 32, 3, 3, 1, 1, 0, 0) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1) t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0) t = ffmodel.conv2d(t, 80, 1, 1, 1, 1, 0, 0) t = ffmodel.conv2d(t, 192, 3, 3, 1, 1, 1, 1) t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0) t = InceptionA(ffmodel, t, 32) t = InceptionA(ffmodel, t, 64) t = InceptionA(ffmodel, t, 64) t = InceptionB(ffmodel, t) t = InceptionC(ffmodel, t, 128) t = InceptionC(ffmodel, t, 160) t = InceptionC(ffmodel, t, 160) t = InceptionC(ffmodel, t, 192) t = InceptionD(ffmodel, t) t = InceptionE(ffmodel, t) t = InceptionE(ffmodel, t) t = ffmodel.pool2d(t, 8, 8, 1, 1, 0, 0, PoolType.POOL_AVG) t = ffmodel.flat(t) t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.001) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.label_tensor num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 299, 299), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((299, 299), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 print(full_input_np.shape) print(full_input_np.__array_interface__["strides"]) print(full_input_np[0, :, :, :]) y_train = y_train.astype('int32') full_label_np = y_train dataloader_input = ffmodel.create_data_loader(input, full_input_np) dataloader_label = ffmodel.create_data_loader(label, full_label_np) num_samples = dataloader_input.num_samples assert dataloader_input.num_samples == dataloader_label.num_samples ffmodel.init_layers() epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, 8192 * epochs / run_time))
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 3, 229, 229] input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) torch_model = PyTorchModel("resnet.ff") output_tensors = torch_model.apply(ffmodel, [input]) t = ffmodel.softmax(output_tensors[0]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.label_tensor num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 y_train = y_train.astype('int32') full_label_np = y_train dims_full_input = [num_samples, 3, 229, 229] full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_np) full_label.attach_numpy_array(ffconfig, full_label_np) dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() assert dataloader_input.get_num_samples( ) == dataloader_label.get_num_samples() ffmodel.init_layers() epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time))
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 3, 32, 32] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) t1 = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t2 = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t3 = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.concat([t1, t2, t3], 1) ts = ffmodel.split(t, 3, 1) t = ffmodel.conv2d(ts[1], 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d( t, 2, 2, 2, 2, 0, 0, ) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d(t, 2, 2, 2, 2, 0, 0) t = ffmodel.flat(t) t = ffmodel.dense(t, 512, ActiMode.AC_MODE_RELU) t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label_tensor = ffmodel.label_tensor num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train print(full_input_array.__array_interface__["strides"]) y_train = y_train.astype('int32') full_label_array = y_train print(full_input_array.__array_interface__["strides"]) print(full_input_array.shape, full_label_array.shape) #print(full_input_array[0,:,:,:]) #print(full_label_array[0, 0:64]) print(full_label_array.__array_interface__["strides"]) dims_full_input = [num_samples, 3, 32, 32] full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_array) full_label.attach_numpy_array(ffconfig, full_label_array) dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label_tensor, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() print("end init model", ts[0].handle.impl) epochs = ffconfig.epochs #epochs = 10 ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time))
def top_level_task(): ffconfig = FFConfig() resnetconfig = NetConfig() print(resnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 229, 229] inputi = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) model = rgn.RegNetX32gf() model = nn.Sequential(model, nn.Flatten(), nn.Linear(2520 * 7 * 7, 1000)) onnx_input = torch.randn(64, 3, 229, 229) onnx_model = ONNXModel(model, onnx_input) t = onnx_model.apply(ffmodel, {"input.1": inputi}) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 print("$$$$$$$$$$$$$$$$$$$FULL_INPUT_NP$$$$$$$$$$$$$$$$$$$$") print(full_input_np) input("ENTER to continue.") y_train = y_train.astype('int32') full_label_np = y_train dims_full_input = [num_samples, 3, 229, 229] full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_np) full_label.attach_numpy_array(ffconfig, full_label_np) print(full_input) input("ENTER to continue.") dataloader_input = SingleDataLoader(ffmodel, inputi, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() assert dataloader_input.get_num_samples( ) == dataloader_label.get_num_samples() ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) perf_metrics = ffmodel.get_perf_metrics() accuracy = perf_metrics.get_accuracy() if accuracy < ModelAccuracy.CIFAR10_ALEXNET.value: assert 0, 'Check Accuracy'
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 229, 229] #print(dims) input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT) dims_label = [ffconfig.get_batch_size(), 1] #print(dims) label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32) use_external = True if (use_external == True): num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image if (i == 0): print(image) full_input_np /= 255 print(full_input_np.shape) print(full_input_np.__array_interface__["strides"]) print(full_input_np[0, :, :, :]) y_train = y_train.astype('int32') full_label_np = y_train dims_full_input = [num_samples, 3, 229, 229] full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_np) full_label.attach_numpy_array(ffconfig, full_label_np) dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) #dataloader = DataLoader4D(ffmodel, input, label, full_input, full_label, num_samples) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() assert dataloader_input.get_num_samples( ) == dataloader_label.get_num_samples() else: # Data Loader dataloader = DataLoader4D(ffmodel, input, label, ffnetconfig=alexnetconfig) num_samples = dataloader.get_num_samples() kernel_init = GlorotUniformInitializer(123) bias_init = ZeroInitializer() t = ffmodel.conv2d("conv1", input, 64, 7, 7, 2, 2, 3, 3) t = ffmodel.pool2d("pool1", t, 3, 3, 2, 2, 1, 1) for i in range(0, 3): t = BottleneckBlock(ffmodel, t, 64, 1) for i in range(0, 4): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 128, stride) for i in range(0, 6): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 256, stride) for i in range(0, 3): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 512, stride) t = ffmodel.pool2d("pool2", t, 7, 7, 1, 1, 0, 0, PoolType.POOL_AVG) t = ffmodel.flat("flat", t) t = ffmodel.dense("linear1", t, 10) t = ffmodel.softmax("softmax", t, label) ffoptimizer = SGDOptimizer(ffmodel, 0.001) ffmodel.set_sgd_optimizer(ffoptimizer) # input.inline_map(ffconfig) # input_array = input.get_array(ffconfig, DataType.DT_FLOAT) # input_array *= 1.0 # print(input_array.shape) # input.inline_unmap(ffconfig) # label.inline_map(ffconfig) # label.inline_unmap(ffconfig) ffmodel.init_layers() # conv_2d1 = ffmodel.get_layer_by_id(11) # cbias_tensor = conv_2d1.get_weight_tensor() # input_tensor = conv_2d1.get_input_tensor_by_id(0) # cbias_tensor.inline_map(ffconfig) # cbias = cbias_tensor.get_array(ffconfig, DataType.DT_FLOAT) # # cbias += 0.125 # print(cbias.shape) # #print(cbias) # cbias_tensor.inline_unmap(ffconfig) #use_external = False epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() for epoch in range(0, epochs): if (use_external == True): dataloader_input.reset() dataloader_label.reset() else: dataloader.reset() ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) for iter in range(0, int(iterations)): if (use_external == True): dataloader_input.next_batch(ffmodel) dataloader_label.next_batch(ffmodel) else: dataloader.next_batch(ffmodel) if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) #ffmodel.print_layers(13) conv_2d1 = ffmodel.get_layer_by_id(0) #cbias_tensor = conv_2d1.get_input_tensor() cbias_tensor = conv_2d1.get_input_tensor() cbias_tensor.inline_map(ffconfig) cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT) print(cbias.shape) print(cbias) cbias_tensor.inline_unmap(ffconfig) label.inline_map(ffconfig) label_array = label.get_flat_array(ffconfig, DataType.DT_INT32) print(label_array.shape) # print(cbias) print(label_array) label.inline_unmap(ffconfig)
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 3, 229, 229] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) kernel_init = GlorotUniformInitializer(123) bias_init = ZeroInitializer() t = ffmodel.conv2d(input_tensor, 64, 11, 11, 4, 4, 2, 2, ActiMode.AC_MODE_RELU, 1, True, None, kernel_init, bias_init) t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0) t = ffmodel.conv2d(t, 192, 5, 5, 1, 1, 2, 2, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0) t = ffmodel.conv2d(t, 384, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 256, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 256, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d(t, 3, 3, 2, 2, 0, 0) t = ffmodel.flat(t) t = ffmodel.dense(t, 4096, ActiMode.AC_MODE_RELU) t = ffmodel.dense(t, 4096, ActiMode.AC_MODE_RELU) t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label_tensor = ffmodel.label_tensor num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image if (i == 0): print(image) full_input_np /= 255 print(full_input_np.shape) print(full_input_np.__array_interface__["strides"]) print(full_input_np[0, :, :, :]) y_train = y_train.astype('int32') full_label_np = y_train dims_full_input = [num_samples, 3, 229, 229] full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_np) full_label.attach_numpy_array(ffconfig, full_label_np) dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label_tensor, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() assert dataloader_input.get_num_samples( ) == dataloader_label.get_num_samples() ffmodel.init_layers() epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) perf_metrics = ffmodel.get_perf_metrics() return perf_metrics accuracy = perf_metrics.get_accuracy() if accuracy < ModelAccuracy.CIFAR10_ALEXNET.value: assert 0, 'Check Accuracy'
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT) # dims_label = [ffconfig.get_batch_size(), 1] # label = ffmodel.create_tensor(dims_label, "", DataType.DT_INT32) onnx_model = ONNXModel("cifar10_cnn.onnx") t = onnx_model.apply(ffmodel, {"input.1": input}) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.get_label_tensor() use_external = True if (use_external == True): num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train print(full_input_array.__array_interface__["strides"]) y_train = y_train.astype('int32') full_label_array = y_train print(full_input_array.__array_interface__["strides"]) print(full_input_array.shape, full_label_array.shape) #print(full_input_array[0,:,:,:]) #print(full_label_array[0, 0:64]) print(full_label_array.__array_interface__["strides"]) dims_full_input = [num_samples, 3, 32, 32] full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_array) full_label.attach_numpy_array(ffconfig, full_label_array) dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() else: # Data Loader dataloader = DataLoader4D(ffmodel, input, label, ffnetconfig=alexnetconfig) num_samples = dataloader.get_num_samples() ffmodel.init_layers() epochs = ffconfig.get_epochs() #epochs = 10 ts_start = ffconfig.get_current_time() for epoch in range(0, epochs): if (use_external == True): dataloader_input.reset() dataloader_label.reset() else: dataloader.reset() ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) for iter in range(0, int(iterations)): # if (len(alexnetconfig.dataset_path) == 0): # if (iter == 0 and epoch == 0): # dataloader.next_batch(ffmodel) # else: # dataloader.next_batch(ffmodel) if (use_external == True): dataloader_input.next_batch(ffmodel) dataloader_label.next_batch(ffmodel) else: dataloader.next_batch(ffmodel) if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) perf_metrics = ffmodel.get_perf_metrics() accuracy = perf_metrics.get_accuracy() if accuracy < ModelAccuracy.CIFAR10_CNN.value: assert 0, 'Check Accuracy' conv_2d1 = ffmodel.get_layer_by_id(0) cbias_tensor = conv_2d1.get_input_tensor() #cbias_tensor = conv_2d1.get_output_tensor() cbias_tensor.inline_map(ffconfig) cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT) print(cbias.shape) print(cbias) cbias_tensor.inline_unmap(ffconfig) label.inline_map(ffconfig) label_array = label.get_flat_array(ffconfig, DataType.DT_INT32) print(label_array.shape) # print(cbias) print(label_array) label.inline_unmap(ffconfig)
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 3, 229, 229] input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) t = ffmodel.conv2d(input, 64, 7, 7, 2, 2, 3, 3) t = ffmodel.batch_norm(t); t = ffmodel.pool2d(t, 3, 3, 2, 2, 1, 1) for i in range(0, 3): t = BottleneckBlock(ffmodel, t, 64, 1) for i in range(0, 4): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 128, stride) for i in range(0, 6): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 256, stride) for i in range(0, 3): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 512, stride); t = ffmodel.pool2d(t, 7, 7, 1, 1, 0, 0, PoolType.POOL_AVG) t = ffmodel.flat(t); t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.001) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) label = ffmodel.label_tensor # load data num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229,229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 print(full_input_np.shape) print(full_input_np.__array_interface__["strides"]) print(full_input_np[0,:, :, :]) y_train = y_train.astype('int32') full_label_np = y_train dataloader_input = ffmodel.create_data_loader(input, full_input_np) dataloader_label = ffmodel.create_data_loader(label, full_label_np) num_samples = dataloader_input.num_samples assert dataloader_input.num_samples == dataloader_label.num_samples ffmodel.init_layers() epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start); print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
def top_level_task(): num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') print("shape: ", x_train.shape) #teacher input_tensor1 = Input(shape=(3, 32, 32), dtype="float32") c1 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu") c2 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") c3 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") c4 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") d1 = Dense(512, activation="relu") d2 = Dense(num_classes) output_tensor = c1(input_tensor1) output_tensor = c2(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(output_tensor) output_tensor = c3(output_tensor) output_tensor = c4(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = d1(output_tensor) output_tensor = d2(output_tensor) output_tensor = Activation("softmax")(output_tensor) teacher_model = Model(input_tensor1, output_tensor) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) teacher_model.compile( optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) teacher_model.fit(x_train, y_train, epochs=10) c1_kernel, c1_bias = c1.get_weights(teacher_model.ffmodel) c2_kernel, c2_bias = c2.get_weights(teacher_model.ffmodel) c3_kernel, c3_bias = c3.get_weights(teacher_model.ffmodel) c4_kernel, c4_bias = c4.get_weights(teacher_model.ffmodel) d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel) d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel) #d2_kernel *= 0 c2_kernel_new = np.concatenate((c2_kernel, c2_kernel), axis=1) print(c2_kernel.shape, c2_kernel_new.shape, c2_bias.shape) #student model input_tensor2 = Input(shape=(3, 32, 32), dtype="float32") sc1_1 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu") sc1_2 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu") sc2 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sc3 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sc4 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sd1 = Dense(512, activation="relu") sd2 = Dense(num_classes) t1 = sc1_1(input_tensor2) t2 = sc1_2(input_tensor2) output_tensor = Concatenate(axis=1)([t1, t2]) output_tensor = sc2(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(output_tensor) output_tensor = sc3(output_tensor) output_tensor = sc4(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = sd1(output_tensor) output_tensor = sd2(output_tensor) output_tensor = Activation("softmax")(output_tensor) student_model = Model(input_tensor2, output_tensor) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) student_model.compile( optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) sc1_1.set_weights(student_model.ffmodel, c1_kernel, c1_bias) sc1_2.set_weights(student_model.ffmodel, c1_kernel, c1_bias) sc2.set_weights(student_model.ffmodel, c2_kernel_new, c2_bias) sc3.set_weights(student_model.ffmodel, c3_kernel, c3_bias) sc4.set_weights(student_model.ffmodel, c4_kernel, c4_bias) sd1.set_weights(student_model.ffmodel, d1_kernel, d1_bias) sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias) student_model.fit(x_train, y_train, epochs=160, callbacks=[ VerifyMetrics(ModelAccuracy.CIFAR10_CNN), EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN) ])
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 229, 229] #print(dims) input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) t = ffmodel.conv2d(input, 64, 7, 7, 2, 2, 3, 3) t = ffmodel.batch_norm(t) t = ffmodel.pool2d(t, 3, 3, 2, 2, 1, 1) for i in range(0, 3): t = BottleneckBlock(ffmodel, t, 64, 1) for i in range(0, 4): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 128, stride) for i in range(0, 6): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 256, stride) for i in range(0, 3): if (i == 0): stride = 2 else: stride = 1 t = BottleneckBlock(ffmodel, t, 512, stride) t = ffmodel.pool2d(t, 7, 7, 1, 1, 0, 0, PoolType.POOL_AVG) t = ffmodel.flat(t) t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.001) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.get_label_tensor() # load data num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) full_input_np = np.zeros((num_samples, 3, 229, 229), dtype=np.float32) for i in range(0, num_samples): image = x_train[i, :, :, :] image = image.transpose(1, 2, 0) pil_image = Image.fromarray(image) pil_image = pil_image.resize((229, 229), Image.NEAREST) image = np.array(pil_image, dtype=np.float32) image = image.transpose(2, 0, 1) full_input_np[i, :, :, :] = image full_input_np /= 255 print(full_input_np.shape) print(full_input_np.__array_interface__["strides"]) print(full_input_np[0, :, :, :]) y_train = y_train.astype('int32') full_label_np = y_train dims_full_input = [num_samples, 3, 229, 229] full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_np) full_label.attach_numpy_array(ffconfig, full_label_np) dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() assert dataloader_input.get_num_samples( ) == dataloader_label.get_num_samples() ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() for epoch in range(0, epochs): dataloader_input.reset() dataloader_label.reset() ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) for iter in range(0, int(iterations)): dataloader_input.next_batch(ffmodel) dataloader_label.next_batch(ffmodel) if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time))
def top_level_task(): ffconfig = FFConfig() ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" %(ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input_tensor = ffmodel.create_tensor(dims_input, "", DataType.DT_FLOAT) output_tensors = ffmodel.construct_model_from_file([input_tensor, input_tensor], "cnn.ff") t = ffmodel.softmax(output_tensors[0]) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) label = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train y_train = y_train.astype('int32') full_label_array = y_train dims_full_input = [num_samples, 3, 32, 32] full_input = ffmodel.create_tensor(dims_full_input, "", DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, "", DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_array) full_label.attach_numpy_array(ffconfig, full_label_array) dataloader_input = SingleDataLoader(ffmodel, input_tensor, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() for epoch in range(0,epochs): dataloader_input.reset() dataloader_label.reset() ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) for iter in range(0, int(iterations)): dataloader_input.next_batch(ffmodel) dataloader_label.next_batch(ffmodel) if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start); print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" %(epochs, run_time, num_samples * epochs / run_time));
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) onnx_model = ONNXModel("cifar10_cnn.onnx") t = onnx_model.apply(ffmodel, {"input.1": input}) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.get_label_tensor() num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train print(full_input_array.__array_interface__["strides"]) y_train = y_train.astype('int32') full_label_array = y_train dims_full_input = [num_samples, 3, 32, 32] full_input = ffmodel.create_tensor(dims_full_input, DataType.DT_FLOAT) dims_full_label = [num_samples, 1] full_label = ffmodel.create_tensor(dims_full_label, DataType.DT_INT32) full_input.attach_numpy_array(ffconfig, full_input_array) full_label.attach_numpy_array(ffconfig, full_label_array) dataloader_input = SingleDataLoader(ffmodel, input, full_input, num_samples, DataType.DT_FLOAT) dataloader_label = SingleDataLoader(ffmodel, label, full_label, num_samples, DataType.DT_INT32) full_input.detach_numpy_array(ffconfig) full_label.detach_numpy_array(ffconfig) num_samples = dataloader_input.get_num_samples() ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) perf_metrics = ffmodel.get_perf_metrics() accuracy = perf_metrics.get_accuracy() if accuracy < ModelAccuracy.CIFAR10_CNN.value: assert 0, 'Check Accuracy'
def top_level_task(): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) ffconfig.parse_args() print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.get_batch_size(), ffconfig.get_workers_per_node(), ffconfig.get_num_nodes())) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.get_batch_size(), 3, 32, 32] input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train print(full_input_array.__array_interface__["strides"]) y_train = y_train.astype('int32') full_label_array = y_train print(full_input_array.__array_interface__["strides"]) print(full_input_array.shape, full_label_array.shape) print(full_label_array.__array_interface__["strides"]) t = ffmodel.conv2d(input_tensor, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 32, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d( t, 2, 2, 2, 2, 0, 0, ) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.conv2d(t, 64, 3, 3, 1, 1, 1, 1, ActiMode.AC_MODE_RELU) t = ffmodel.pool2d(t, 2, 2, 2, 2, 0, 0) t = ffmodel.flat(t) t = ffmodel.dense(t, 512, ActiMode.AC_MODE_RELU) t = ffmodel.dense(t, 10) t = ffmodel.softmax(t) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.set_sgd_optimizer(ffoptimizer) ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label_tensor = ffmodel.get_label_tensor() next_batch(0, x_train, input_tensor, ffconfig) next_batch_label(0, y_train, label_tensor, ffconfig) ffmodel.init_layers() epochs = ffconfig.get_epochs() ts_start = ffconfig.get_current_time() for epoch in range(0, epochs): ffmodel.reset_metrics() iterations = int(num_samples / ffconfig.get_batch_size()) print(iterations, num_samples) ct = 0 for iter in range(0, int(iterations)): next_batch(ct, x_train, input, ffconfig) next_batch_label(ct, y_train, label, ffconfig) ct += 1 if (epoch > 0): ffconfig.begin_trace(111) ffmodel.forward() ffmodel.zero_gradients() ffmodel.backward() ffmodel.update() if (epoch > 0): ffconfig.end_trace(111) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) perf_metrics = ffmodel.get_perf_metrics() accuracy = perf_metrics.get_accuracy() if accuracy < 0.3: assert 0, 'Check Accuracy' conv_2d1 = ffmodel.get_layer_by_id(0) cbias_tensor = conv_2d1.get_input_tensor() #cbias_tensor = conv_2d1.get_output_tensor() cbias_tensor.inline_map(ffconfig) cbias = cbias_tensor.get_flat_array(ffconfig, DataType.DT_FLOAT) print(cbias.shape) print(cbias) cbias_tensor.inline_unmap(ffconfig) label.inline_map(ffconfig) label_array = label.get_flat_array(ffconfig, DataType.DT_INT32) print(label_array.shape) # print(cbias) print(label_array) label.inline_unmap(ffconfig)
def top_level_task(test_type=1): ffconfig = FFConfig() alexnetconfig = NetConfig() print(alexnetconfig.dataset_path) print("Python API batchSize(%d) workersPerNodes(%d) numNodes(%d)" % (ffconfig.batch_size, ffconfig.workers_per_node, ffconfig.num_nodes)) ffmodel = FFModel(ffconfig) dims_input = [ffconfig.batch_size, 3, 32, 32] input = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT) if test_type == 1: onnx_model = ONNXModel("cifar10_cnn_pt.onnx") t = onnx_model.apply(ffmodel, {"input.1": input}) else: onnx_model = ONNXModelKeras("cifar10_cnn_keras.onnx", ffconfig, ffmodel) t = onnx_model.apply(ffmodel, {"input_1": input}) ffoptimizer = SGDOptimizer(ffmodel, 0.01) ffmodel.optimizer = ffoptimizer ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[ MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY ]) label = ffmodel.label_tensor num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 full_input_array = x_train print(full_input_array.__array_interface__["strides"]) y_train = y_train.astype('int32') full_label_array = y_train dataloader_input = ffmodel.create_data_loader(input, full_input_array) dataloader_label = ffmodel.create_data_loader(label, full_label_array) num_samples = dataloader_input.num_samples ffmodel.init_layers() epochs = ffconfig.epochs ts_start = ffconfig.get_current_time() ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs) ts_end = ffconfig.get_current_time() run_time = 1e-6 * (ts_end - ts_start) print("epochs %d, ELAPSED TIME = %.4fs, THROUGHPUT = %.2f samples/s\n" % (epochs, run_time, num_samples * epochs / run_time)) perf_metrics = ffmodel.get_perf_metrics() accuracy = perf_metrics.get_accuracy() if accuracy < ModelAccuracy.CIFAR10_CNN.value: assert 0, 'Check Accuracy'