def test_bigdl_pytorch_estimator_shard(self): class SimpleModel(nn.Module): def __init__(self): super(SimpleModel, self).__init__() self.fc = nn.Linear(2, 2) def forward(self, x): x = self.fc(x) return F.log_softmax(x, dim=1) model = SimpleModel() def loss_func(input, target): return nn.CrossEntropyLoss().forward(input, target.flatten().long()) def transform(df): result = { "x": [df['user'].to_numpy(), df['item'].to_numpy()], "y": df['label'].to_numpy() } return result OrcaContext.pandas_read_backend = "pandas" file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = read_csv(file_path) data_shard = data_shard.transform_shard(transform) estimator = Estimator.from_torch(model=model, loss=loss_func, optimizer=SGD(), backend="bigdl") estimator.fit(data=data_shard, epochs=4, batch_size=2, validation_data=data_shard, validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch()) estimator.evaluate(data_shard, validation_methods=[Accuracy()], batch_size=2)
def test_bigdl_pytorch_estimator_dataloader_creator(self): class SimpleModel(nn.Module): def __init__(self): super(SimpleModel, self).__init__() self.dense1 = nn.Linear(2, 4) self.bn1 = torch.nn.BatchNorm1d(4) self.dense2 = nn.Linear(4, 1) def forward(self, x): x = self.dense1(x) x = self.bn1(x) x = torch.sigmoid(self.dense2(x)) return x model = SimpleModel() estimator = Estimator.from_torch(model=model, loss=nn.BCELoss(), optimizer=Adam()) def get_dataloader(): inputs = torch.Tensor([[1, 2], [1, 3], [3, 2], [5, 6], [8, 9], [1, 9]]) targets = torch.Tensor([[0], [0], [0], [1], [1], [1]]) return torch.utils.data.DataLoader(TensorDataset(inputs, targets), batch_size=2) estimator.fit(data=get_dataloader, epochs=2, validation_data=get_dataloader, validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch()) estimator.evaluate(data=get_dataloader, validation_metrics=[Accuracy()]) model = estimator.get_model() assert isinstance(model, nn.Module)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--dir', default='/tmp/data', metavar='N', help='the folder store mnist data') parser.add_argument('--batch-size', type=int, default=256, metavar='N', help='input batch size for training per executor(default: 256)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing per executor(default: 1000)') parser.add_argument('--epochs', type=int, default=2, metavar='N', help='number of epochs to train (default: 2)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--cluster_mode', type=str, default="local", help='The mode for the Spark cluster. local or yarn.') args = parser.parse_args() torch.manual_seed(args.seed) train_loader = torch.utils.data.DataLoader( datasets.MNIST(args.dir, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader( datasets.MNIST(args.dir, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.test_batch_size, shuffle=False) if args.cluster_mode == "local": init_orca_context(cores=1, memory="2g") elif args.cluster_mode == "yarn": init_orca_context( cluster_mode="yarn-client", cores=4, num_nodes=2, memory="2g", driver_memory="10g", driver_cores=1, conf={"spark.rpc.message.maxSize": "1024", "spark.task.maxFailures": "1", "spark.driver.extraJavaOptions": "-Dbigdl.failure.retryTimes=1"}) model = LeNet() model.train() criterion = nn.NLLLoss() adam = torch.optim.Adam(model.parameters(), args.lr) est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion) est.fit(data=train_loader, epochs=args.epochs, validation_data=test_loader, validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch()) result = est.evaluate(data=test_loader, validation_metrics=[Accuracy()]) for r in result: print(str(r)) stop_orca_context()
def test_bigdl_pytorch_estimator_shard(self): class SimpleModel(nn.Module): def __init__(self): super(SimpleModel, self).__init__() self.fc = nn.Linear(2, 2) def forward(self, x): x = self.fc(x) return F.log_softmax(x, dim=1) model = SimpleModel() def loss_func(input, target): return nn.CrossEntropyLoss().forward(input, target.flatten().long()) def transform(df): result = { "x": np.stack([df['user'].to_numpy(), df['item'].to_numpy()], axis=1), "y": df['label'].to_numpy() } return result def transform_del_y(d): result = {"x": d["x"]} return result OrcaContext.pandas_read_backend = "pandas" file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = read_csv(file_path) data_shard = data_shard.transform_shard(transform) with tempfile.TemporaryDirectory() as temp_dir_name: estimator = Estimator.from_torch(model=model, loss=loss_func, metrics=[Accuracy()], optimizer=SGD(learningrate_schedule=Default()), model_dir=temp_dir_name) estimator.fit(data=data_shard, epochs=4, batch_size=2, validation_data=data_shard, checkpoint_trigger=EveryEpoch()) estimator.evaluate(data_shard, batch_size=2) est2 = Estimator.from_torch(model=model, loss=loss_func, metrics=[Accuracy()], optimizer=None) est2.load(temp_dir_name, loss=loss_func) est2.fit(data=data_shard, epochs=8, batch_size=2, validation_data=data_shard, checkpoint_trigger=EveryEpoch()) est2.evaluate(data_shard, batch_size=2) pred_result = est2.predict(data_shard) pred_c = pred_result.collect() assert(pred_result, SparkXShards) pred_shard = data_shard.transform_shard(transform_del_y) pred_result2 = est2.predict(pred_shard) pred_c_2 = pred_result2.collect() assert (pred_c[0]["prediction"] == pred_c_2[0]["prediction"]).all()
def test_bigdl_pytorch_estimator_pandas_dataframe(self): class SimpleModel(nn.Module): def __init__(self): super(SimpleModel, self).__init__() self.fc = nn.Linear(1, 10) def forward(self, x): x = torch.unsqueeze(x, dim=1) x = self.fc(x) return F.log_softmax(x, dim=1) def loss_func(input, target): return nn.CrossEntropyLoss().forward(input, target.flatten().long()) model = SimpleModel() OrcaContext.pandas_read_backend = "pandas" file_path = os.path.join(resource_path, "orca/learn/simple_feature_label.csv") data_shard = read_csv(file_path) with tempfile.TemporaryDirectory() as temp_dir_name: estimator = Estimator.from_torch( model=model, loss=loss_func, metrics=[Accuracy()], optimizer=SGD(learningrate_schedule=Default()), model_dir=temp_dir_name) estimator.fit(data=data_shard, epochs=1, batch_size=4, feature_cols=['feature'], label_cols=['label'], validation_data=data_shard, checkpoint_trigger=EveryEpoch()) estimator.evaluate(data_shard, batch_size=4, feature_cols=['feature'], label_cols=['label']) est2 = Estimator.from_torch(model=model, loss=loss_func, metrics=[Accuracy()], optimizer=None) est2.load_orca_checkpoint(temp_dir_name) est2.predict(data_shard, batch_size=4, feature_cols=['feature'])
def get_estimator(workers_per_node=1, model_fn=get_model): estimator = Estimator.from_torch(model=model_fn, optimizer=get_optimizer, loss=nn.BCELoss(), metrics=Accuracy(), config={"lr": 1e-2}, workers_per_node=workers_per_node, backend="torch_distributed") return estimator
def test_bigdl_pytorch_estimator_dataframe_fit_evaluate(self): class SimpleModel(nn.Module): def __init__(self): super(SimpleModel, self).__init__() self.fc = nn.Linear(5, 5) def forward(self, x): x = self.fc(x) return F.log_softmax(x, dim=1) model = SimpleModel() def loss_func(input, target): return nn.CrossEntropyLoss().forward(input, target.flatten().long()) rdd = self.sc.range(0, 100) df = rdd.map(lambda x: ([float(x)] * 5, [int(np.random.randint(0, 2, size=()))])).toDF( ["feature", "label"]) with tempfile.TemporaryDirectory() as temp_dir_name: estimator = Estimator.from_torch( model=model, loss=loss_func, optimizer=SGD(learningrate_schedule=Default()), model_dir=temp_dir_name) estimator.fit(data=df, epochs=4, batch_size=2, validation_data=df, validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch(), feature_cols=["feature"], label_cols=["label"]) estimator.evaluate(df, validation_metrics=[Accuracy()], batch_size=2, feature_cols=["feature"], label_cols=["label"])
def test_bigdl_pytorch_estimator_dataloader_creator(self): class SimpleModel(nn.Module): def __init__(self): super(SimpleModel, self).__init__() self.dense1 = nn.Linear(2, 4) self.bn1 = torch.nn.BatchNorm1d(4) self.dense2 = nn.Linear(4, 1) def forward(self, x): x = self.dense1(x) x = self.bn1(x) x = torch.sigmoid(self.dense2(x)) return x def model_creator(config): model = SimpleModel() return model def optim_creator(model, config): return optim.Adam(model.parameters(), lr=config.get("lr", 0.01)) estimator = Estimator.from_torch(model=model_creator, loss=nn.BCELoss(), metrics=[Accuracy()], optimizer=optim_creator, config={"lr": 0.001}) def get_dataloader(config, batch_size): inputs = torch.Tensor([[1, 2], [1, 3], [3, 2], [5, 6], [8, 9], [1, 9]]) targets = torch.Tensor([[0], [0], [0], [1], [1], [1]]) data_loader = torch.utils.data.DataLoader( TensorDataset(inputs, targets), batch_size=batch_size, num_workers=config.get("threads", 1)) return data_loader estimator.fit(data=get_dataloader, epochs=2, batch_size=2, validation_data=get_dataloader, checkpoint_trigger=EveryEpoch()) estimator.evaluate(data=get_dataloader, batch_size=2) model = estimator.get_model() assert isinstance(model, nn.Module)
def test_xshards_spark_estimator_multi_inputs(self): resource_path = os.path.join( os.path.split(__file__)[0], "../../../resources") def transform(df): result = { "x": [ np.expand_dims(df['user'].to_numpy(), axis=1), np.expand_dims(df['item'].to_numpy(), axis=1) ], "y": df['label'].to_numpy() } return result file_path = os.path.join(resource_path, "orca/learn/ncf2.csv") data_shard = read_csv(file_path) data_shard = data_shard.transform_shard(transform) zx1 = ZLayer.Input(shape=(1, )) zx2 = ZLayer.Input(shape=(1, )) zz = ZLayer.merge([zx1, zx2], mode="concat") zy = ZLayer.Dense(2)(zz) model = ZModel([zx1, zx2], zy) optim_method = SGD(learningrate=0.01) with tempfile.TemporaryDirectory() as temp_dir_name: estimator = Estimator.from_bigdl(model=model, optimizer=optim_method, loss=ClassNLLCriterion(), metrics=[Accuracy()], model_dir=temp_dir_name) estimator.set_constant_gradient_clipping(0.1, 1.2) r1 = estimator.predict(data=data_shard) r_c = r1.collect() estimator.set_tensorboard(log_dir=temp_dir_name, app_name="test") estimator.fit(data=data_shard, epochs=5, batch_size=8, validation_data=data_shard, checkpoint_trigger=EveryEpoch()) summary = estimator.get_train_summary(tag="Loss") temp_path = os.path.join(temp_dir_name, "save_model") estimator.save(temp_path) eval_result = estimator.evaluate(data=data_shard, batch_size=8)
def test_nnEstimator_evaluation(self): df = self.get_estimator_df2() linear_model = Sequential().add(Linear(2, 2)).add(LogSoftMax()) est = Estimator.from_bigdl(model=linear_model, loss=ClassNLLCriterion(), optimizer=Adam(), feature_preprocessing=SeqToTensor([2]), label_preprocessing=SeqToTensor([1]), metrics=Accuracy()) est.fit(data=df, epochs=10, batch_size=8) result = est.evaluate(df, batch_size=8) shift = udf(lambda p: float(p.index(max(p))), DoubleType()) pred = est.predict(df).withColumn("prediction", shift(col('prediction'))).cache() correct = pred.filter("label=prediction").count() overall = pred.count() accuracy = correct * 1.0 / overall assert accuracy == round(result['Top1Accuracy'], 2)
x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) net.train() orca_estimator = Estimator.from_torch(model=net, optimizer=optimizer, loss=criterion, metrics=[Accuracy()], backend="bigdl") orca_estimator.fit(data=trainloader, epochs=2, validation_data=testloader, checkpoint_trigger=EveryEpoch()) print('Finished Training') dataiter = iter(testloader) images, labels = dataiter.next() # print images imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) res = orca_estimator.evaluate(data=testloader) print("Accuracy of the network on the test images: %s" % res)
def bigdl_estimator(): from zoo.orca.learn.bigdl.estimator import Estimator from tensorflow.python.keras.datasets import imdb from tensorflow.python.keras.preprocessing import sequence from zoo.pipeline.api.keras.models import Model from zoo.pipeline.api.keras.objectives import SparseCategoricalCrossEntropy from zoo.orca.data import XShards from zoo.orca.learn.metrics import Accuracy import numpy as np # conf = {"spark.executor.extraJavaOptions": "-Xss512m", "spark.driver.extraJavaOptions": "-Xss512m"} # init_orca_context(cluster_mode="local", cores=8, memory="16g") init_orca_context(cluster_mode="local", cores=4, memory="16g") max_features = 200 max_len = 20 print("running bigdl estimator") (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) x_train = x_train[:1000] y_train = y_train[:1000] x_test = x_test[-1000:] y_test = y_test[-1000:] print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=max_len) x_test = sequence.pad_sequences(x_test, maxlen=max_len) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) train_pos = np.zeros((len(x_train), max_len), dtype=np.int32) val_pos = np.zeros((len(x_test), max_len), dtype=np.int32) for i in range(0, len(x_train)): train_pos[i, :] = np.arange(max_len) val_pos[i, :] = np.arange(max_len) train_dataset = XShards.partition({"x": (x_train, train_pos), "y": np.array(y_train)}) val_dataset = XShards.partition({"x": (x_test, val_pos), "y": np.array(y_test)}) token_shape = (max_len,) position_shape = (max_len,) token_input = Input(shape=token_shape) position_input = Input(shape=position_shape) O_seq = TransformerLayer.init(vocab=max_features, hidden_size=128, n_head=8, seq_len=max_len)([token_input, position_input]) # Select the first output of the Transformer. The second is the pooled output. O_seq = SelectTable(0)(O_seq) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.2)(O_seq) outputs = Dense(2, activation='softmax')(O_seq) model = Model([token_input, position_input], outputs) model.summary() batch_size = 64 print("Train started") est = Estimator.from_bigdl(model=model, loss=SparseCategoricalCrossEntropy(), optimizer=Adam(), metrics=[Accuracy()]) est.set_constant_gradient_clipping(0.1, 0.2) est.fit(data=train_dataset, batch_size=batch_size, epochs=1) result = est.evaluate(val_dataset) print(result) est.clear_gradient_clipping() est.set_l2_norm_gradient_clipping(0.5) est.fit(data=train_dataset, batch_size=batch_size, epochs=1) print("Train finished") print("Evaluating started") result = est.evaluate(val_dataset) print(result) print("Evaluating finished") est.save('work/saved_model') # est.load('work/saved_model') print("load and save API finished") est.get_train_summary(tag='Loss') est.get_validation_summary(tag='Top1Accuracy') print("get summary API finished") stop_orca_context()
def main(): parser = argparse.ArgumentParser(description='PyTorch Tensorboard Example') parser.add_argument('--cluster_mode', type=str, default="local", help='The cluster mode, such as local, yarn or k8s.') parser.add_argument('--backend', type=str, default="bigdl", help='The backend of PyTorch Estimator; ' 'bigdl and torch_distributed are supported.') args = parser.parse_args() if args.cluster_mode == "local": init_orca_context() elif args.cluster_mode == "yarn": init_orca_context(cluster_mode=args.cluster_mode, cores=4, num_nodes=2) tensorboard_dir = "runs" writer = SummaryWriter(tensorboard_dir + '/fashion_mnist_experiment_1') # constant for classes classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot') # plot some random training images dataiter = iter(train_data_creator(config={}, batch_size=4)) images, labels = dataiter.next() # create grid of images img_grid = torchvision.utils.make_grid(images) # show images matplotlib_imshow(img_grid, one_channel=True) # write to tensorboard writer.add_image('four_fashion_mnist_images', img_grid) # inspect the model using tensorboard writer.add_graph(model_creator(config={}), images) writer.close() # training loss vs. epochs criterion = nn.CrossEntropyLoss() batch_size = 4 epochs = 5 if args.backend == "bigdl": train_loader = train_data_creator(config={}, batch_size=batch_size) test_loader = validation_data_creator(config={}, batch_size=batch_size) net = model_creator(config={}) optimizer = optimizer_creator(model=net, config={"lr": 0.001}) orca_estimator = Estimator.from_torch(model=net, optimizer=optimizer, loss=criterion, metrics=[Accuracy()], backend="bigdl") orca_estimator.set_tensorboard(tensorboard_dir, "bigdl") orca_estimator.fit(data=train_loader, epochs=epochs, validation_data=test_loader, checkpoint_trigger=EveryEpoch()) res = orca_estimator.evaluate(data=test_loader) print("Accuracy of the network on the test images: %s" % res) elif args.backend == "torch_distributed": orca_estimator = Estimator.from_torch(model=model_creator, optimizer=optimizer_creator, loss=criterion, metrics=[Accuracy()], backend="torch_distributed") stats = orca_estimator.fit(train_data_creator, epochs=epochs, batch_size=batch_size) for stat in stats: writer.add_scalar("training_loss", stat['train_loss'], stat['epoch']) print("Train stats: {}".format(stats)) val_stats = orca_estimator.evaluate(validation_data_creator, batch_size=batch_size) print("Validation stats: {}".format(val_stats)) orca_estimator.shutdown() else: raise NotImplementedError( "Only bigdl and torch_distributed are supported " "as the backend, but got {}".format(args.backend)) stop_orca_context()
return x net = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) net.train() orca_estimator = Estimator.from_torch(model=net, optimizer=optimizer, loss=criterion, backend="bigdl") orca_estimator.fit(data=trainloader, epochs=2, validation_data=testloader, validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch()) print('Finished Training') dataiter = iter(testloader) images, labels = dataiter.next() # print images imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) res = orca_estimator.evaluate(data=testloader, validation_methods=[Accuracy()])[0] total_num = res.total_num result = res.result print("Accuracy of the network on the %s test images: %s" % (total_num, result))
(trainingDF, validationDF) = labelDF.randomSplit([0.9, 0.1]) # run training and evaluation featureTransformer = ChainedPreprocessing([ RowToImageFeature(), ImageCenterCrop(224, 224), ImageChannelNormalize(123.0, 117.0, 104.0, 255.0, 255.0, 255.0), ImageMatToTensor(), ImageFeatureToTensor() ]) est = Estimator.from_bigdl(model=zoo_model, loss=zoo_loss, optimizer=SGD(learningrate=0.001), feature_preprocessing=featureTransformer, metrics=Accuracy()) est.fit(data=trainingDF, batch_size=16, epochs=1, feature_cols="image", caching_sample=False, validation_data=validationDF, validation_trigger=EveryEpoch()) shift = udf(lambda p: float(p.index(max(p))), DoubleType()) predictionDF = est.predict(data=validationDF, feature_cols="image") \ .withColumn("prediction", shift(col('prediction'))).cache() correct = predictionDF.filter("label=prediction").count() overall = predictionDF.count() accuracy = correct * 1.0 / overall
def test_xshards_spark_estimator(self): resource_path = os.path.join( os.path.split(__file__)[0], "../../../resources") def transform(df): result = { "x": [df['user'].to_numpy(), df['item'].to_numpy()], "y": df['label'].to_numpy() } return result file_path = os.path.join(resource_path, "orca/learn/ncf2.csv") data_shard = read_csv(file_path) data_shard = data_shard.transform_shard(transform) model = Sequential() model.add(Linear(2, 2)) model.add(LogSoftMax()) optim_method = SGD(learningrate=0.01) with tempfile.TemporaryDirectory() as temp_dir_name: estimator = Estimator.from_bigdl( model=model, optimizer=optim_method, loss=ClassNLLCriterion(), model_dir=temp_dir_name, feature_preprocessing=SeqToTensor([2]), label_preprocessing=SeqToTensor([1])) estimator.set_constant_gradient_clipping(0.1, 1.2) r1 = estimator.predict(data=data_shard) r_c = r1.collect() estimator.set_tensorboard(log_dir=temp_dir_name, app_name="test") estimator.fit(data=data_shard, epochs=5, batch_size=8, validation_data=data_shard, validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch()) summary = estimator.get_train_summary(tag="Loss") temp_path = os.path.join(temp_dir_name, "save_model") estimator.save(temp_path) estimator.evaluate(data=data_shard, validation_metrics=[Accuracy()], batch_size=8) result = estimator.predict(data=data_shard) assert type(result).__name__ == 'SparkXShards' result_c = result.collect() df = self.get_estimator_df2() r0 = estimator.predict(df) r0_c = r0.collect() assert type(r0).__name__ == 'DataFrame' for idx in range(len(r0_c)): assert abs(r0_c[idx]["prediction"][0] - result_c[0]["prediction"][idx][0]) == 0 assert abs(r0_c[idx]["prediction"][1] - result_c[0]["prediction"][idx][1]) == 0 estimator.fit(data=df, epochs=6, batch_size=8, validation_data=df, validation_metrics=[Accuracy()], validation_trigger=EveryEpoch()) summary = estimator.get_train_summary() # test load from checkpoint est2 = Estimator.from_bigdl(model=Sequential(), optimizer=None, loss=None, model_dir=None) est2.load(temp_dir_name, loss=ClassNLLCriterion(), is_checkpoint=True) r2 = est2.predict(data=data_shard) r2_c = r2.collect() assert (result_c[0]["prediction"] == r2_c[0]["prediction"]).all() # resume training est2.fit(data=data_shard, epochs=10, batch_size=8, validation_data=data_shard, validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch()) est2.evaluate(data=data_shard, validation_metrics=[Accuracy()], batch_size=8) # test load from saved model est3 = Estimator.from_bigdl(model=Sequential(), optimizer=None, loss=None, model_dir=None) est3.load(temp_path, optimizer=optim_method, loss=ClassNLLCriterion()) r3 = est3.predict(data=data_shard) r3_c = r3.collect() assert (r3_c[0]["prediction"] == r2_c[0]["prediction"]).all()
x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) net.train() orca_estimator = Estimator.from_torch(model=net, optimizer=optimizer, loss=criterion, backend="bigdl") orca_estimator.fit(data=trainloader, epochs=2, validation_data=testloader, validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch()) print('Finished Training') dataiter = iter(testloader) images, labels = dataiter.next() # print images imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) res = orca_estimator.evaluate(data=testloader, validation_methods=[Accuracy()])[0] total_num = res.total_num result = res.result print("Accuracy of the network on the %s test images: %s" % (total_num, result)) stop_orca_context()
return x net = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) net.train() orca_estimator = Estimator.from_torch(model=net, optimizer=optimizer, loss=criterion, backend="bigdl") orca_estimator.fit(data=trainloader, epochs=2, validation_data=testloader, validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch()) print('Finished Training') dataiter = iter(testloader) images, labels = dataiter.next() # print images imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) res = orca_estimator.evaluate(data=testloader, validation_metrics=[Accuracy()])[0] total_num = res.total_num result = res.result print("Accuracy of the network on the %s test images: %s" % (total_num, result))
def test_bigdl_pytorch_estimator_save_and_load(self): class Network(nn.Module): def __init__(self): super(Network, self).__init__() self.fc1 = nn.Linear(28 * 28, 500) self.fc2 = nn.Linear(500, 10) def forward(self, x): x = x.view(-1, 28 * 28) x = F.relu(self.fc1(x)) x = self.fc2(x) return F.log_softmax(x, dim=1) model = Network() model.train() criterion = nn.NLLLoss() adam = torch.optim.Adam(model.parameters(), 0.001) dir = "./dataset" batch_size = 320 train_loader = torch.utils.data.DataLoader(datasets.MNIST( dir, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.MNIST( dir, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=batch_size, shuffle=False) # epoch 1 est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion, metrics=[Accuracy()]) est.fit(data=train_loader, epochs=1, validation_data=test_loader, batch_size=batch_size, checkpoint_trigger=EveryEpoch()) paras1 = list(est.get_model().named_parameters()) est.save("model_epoch_1") # epoch 2 est.fit(data=train_loader, epochs=2, validation_data=test_loader, batch_size=batch_size, checkpoint_trigger=EveryEpoch()) paras2 = list(est.get_model().named_parameters()) est.load("model_epoch_1") paras3 = list(est.get_model().named_parameters()) load_success = 0 for i in range(len(paras2)): name2, para2 = paras2[i] name3, para3 = paras3[i] if not torch.all(torch.eq(para2, para3)): load_success = 1 break if not load_success: raise Exception( "Load failed. Parameters did not change after loading.") for i in range(len(paras1)): name1, para1 = paras1[i] name3, para3 = paras3[i] if not torch.all(torch.eq(para1, para3)): raise Exception("After reloading the model," + name1 + "does not match.") print("pass")