def test_torchcriterion_constructor(self): criterion = nn.MSELoss() def lossFunc(input, label): loss1 = criterion(input[0], label[0]) loss2 = criterion(input[1], label[1]) loss = loss1 + 0.4 * loss2 return loss az_criterion = TorchCriterion.from_pytorch( lossFunc, sample_input=(torch.ones(2, 2), torch.ones(2, 3)), sample_label=(torch.ones(2, 2), torch.ones(2, 3))) az_criterion = TorchCriterion.from_pytorch(lossFunc, ([2, 2], [2, 3]), ([2, 2], [2, 3]))
def test_cross_entrophy_match(self): input = [[0.5, 1.], [-0.3, 1.2]] label = [3, 6] torch_input = torch.tensor(input) torch_label = torch.tensor(label).long() model = nn.Linear(2, 10) criterion = nn.CrossEntropyLoss() def lossFunc(input, target): return criterion.forward(input, target.flatten().long()) torch_output = model.forward(torch_input) torch_loss = criterion.forward(torch_output, torch_label) torch_loss.backward() torch_grad = model.weight.grad.flatten().tolist( ) + model.bias.grad.tolist() # AZ part az_net = TorchNet.from_pytorch(model, [1, 2]) az_criterion = TorchCriterion.from_pytorch(lossFunc, [1, 10], [1, 1]) az_input = np.array(input) az_label = np.array(label) az_output = az_net.forward(az_input) az_loss_output = az_criterion.forward(az_output, az_label) az_loss_backward = az_criterion.backward(az_output, az_label) az_model_backward = az_net.backward(az_input, az_loss_backward) az_grad = list(az_net.parameters().values())[0]['gradWeight'] assert np.allclose(torch_loss.tolist(), az_loss_output) assert np.allclose(torch_grad, az_grad.tolist())
def test_linear_gradient_match(self): input = [[0.5, 1.], [-0.3, 1.2]] label = [[0.6], [-0.9]] torch_input = torch.tensor(input) torch_label = torch.tensor(label) model = nn.Linear(2, 1) criterion = nn.MSELoss() torch_output = model.forward(torch_input) torch_loss = criterion.forward(torch_output, torch_label) torch_loss.backward() torch_grad = model.weight.grad.tolist()[0] + model.bias.grad.tolist() # AZ part az_net = TorchNet.from_pytorch(model, [1, 2]) az_criterion = TorchCriterion.from_pytorch(criterion, [1, 1], [1, 1]) az_input = np.array(input) az_label = np.array(label) az_output = az_net.forward(az_input) az_loss_output = az_criterion.forward(az_output, az_label) az_loss_backward = az_criterion.backward(az_output, az_label) az_model_backward = az_net.backward(az_input, az_loss_backward) az_grad = list(az_net.parameters().values())[0]['gradWeight'] assert np.allclose(torch_loss.tolist(), az_loss_output) assert np.allclose(torch_grad, az_grad.tolist())
def test_Lenet_gradient_match(self): class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5, 1) self.conv2 = nn.Conv2d(20, 50, 5, 1) self.fc1 = nn.Linear(4 * 4 * 50, 500) self.fc2 = nn.Linear(500, 10) def forward(self, x): x = F.relu(self.conv1(x)) x = F.max_pool2d(x, 2, 2) x = F.relu(self.conv2(x)) x = F.max_pool2d(x, 2, 2) x = x.view(-1, 4 * 4 * 50) x = F.relu(self.fc1(x)) x = self.fc2(x) return F.log_softmax(x, dim=1) input = np.random.rand(2, 1, 28, 28) label = [7, 3] torch_input = torch.tensor(input).float() torch_label = torch.tensor(label).long() torch_model = LeNet() torch_criterion = nn.CrossEntropyLoss() torch_output = torch_model.forward(torch_input) torch_loss = torch_criterion.forward(torch_output, torch_label) torch_loss.backward() torch_grad = torch_model.conv1.weight.grad.flatten().tolist() + \ torch_model.conv1.bias.grad.flatten().tolist() + \ torch_model.conv2.weight.grad.flatten().tolist() + \ torch_model.conv2.bias.grad.flatten().tolist() + \ torch_model.fc1.weight.grad.flatten().tolist() + \ torch_model.fc1.bias.grad.flatten().tolist() + \ torch_model.fc2.weight.grad.flatten().tolist() + \ torch_model.fc2.bias.grad.flatten().tolist() # AZ part az_net = TorchNet.from_pytorch(torch_model, [1, 1, 28, 28]) def lossFunc(input, target): return torch_criterion.forward(input, target.flatten().long()) az_criterion = TorchCriterion.from_pytorch(lossFunc, [1, 10], [1, 1]) az_input = np.array(input) az_label = np.array(label) az_output = az_net.forward(np.array(input)) az_loss_output = az_criterion.forward(az_output, az_label) az_loss_backward = az_criterion.backward(az_output, az_label) az_model_backward = az_net.backward(az_input, az_loss_backward) az_grad = list(az_net.parameters().values())[0]['gradWeight'] assert np.allclose(torch_loss.tolist(), az_loss_output) assert np.allclose(torch_grad, az_grad.tolist(), atol=1.e-5, rtol=1.e-3)
def test_model_train_with_multiple_input(self): class TwoInputModel(nn.Module): def __init__(self): super(TwoInputModel, self).__init__() self.dense1 = nn.Linear(2, 2) self.dense2 = nn.Linear(2, 1) def forward(self, x1, x2): x1 = self.dense1(x1) x2 = self.dense2(x2) return x1, x2 input = [[0.5, 1.], [-0.3, 1.2]] torch_input1 = torch.tensor(input, requires_grad=True) torch_input2 = torch.tensor(input, requires_grad=True) torch_label = (torch.ones(2, 2), torch.ones(2, 1)) model = TwoInputModel() criterion = nn.MSELoss() def lossFunc(input, label): loss1 = criterion(input[0], label[0]) loss2 = criterion(input[1], label[1]) loss = loss1 + 0.4 * loss2 return loss torch_output = model.forward(torch_input1, torch_input2) torch_loss = lossFunc(torch_output, torch_label) torch_loss.backward() torch_grad = model.dense1.weight.grad.tolist()[0] + \ model.dense1.weight.grad.tolist()[1] + \ model.dense1.bias.grad.tolist() + \ model.dense2.weight.grad.tolist()[0] + \ model.dense2.bias.grad.tolist() az_net = TorchNet.from_pytorch(model, sample_input=(torch.ones(2, 2), torch.ones(2, 2))) az_criterion = TorchCriterion.from_pytorch( loss=lossFunc, sample_input=(torch.ones(2, 2), torch.ones(2, 1)), sample_label=(torch.ones(2, 2), torch.ones(2, 1))) az_input = [np.array(input), np.array(input)] az_label = [np.ones([2, 2]), np.ones([2, 1])] az_output = az_net.forward(az_input) az_loss_output = az_criterion.forward(az_output, az_label) az_loss_backward = az_criterion.backward(az_output, az_label) az_model_backward = az_net.backward(az_input, az_loss_backward) az_grad = list(az_net.parameters().values())[0]['gradWeight'] assert np.allclose(torch_loss.tolist(), az_loss_output) assert np.allclose(torch_grad, az_grad.tolist()) assert np.allclose(az_model_backward[0], torch_input1.grad) assert np.allclose(az_model_backward[1], torch_input2.grad)
def test_conv2D_gradient_match(self): class SimpleTorchModel(nn.Module): def __init__(self): super(SimpleTorchModel, self).__init__() self.dense1 = nn.Linear(2, 48) self.conv1 = nn.Conv2d(3, 2, 2) self.dense2 = nn.Linear(2, 1) def forward(self, x): x = self.dense1(x) x = x.view(-1, 3, 4, 4) x = torch.relu(self.conv1(x)) x = F.max_pool2d(x, 2) x = x.view(x.size(0), -1) x = torch.sigmoid(self.dense2(x)) return x input = [[1., -0.5], [0.5, -1.]] label = [[1., -0.5]] torch_input = torch.tensor(input) torch_label = torch.tensor(label) torch_model = SimpleTorchModel() torch_criterion = nn.MSELoss() torch_output = torch_model.forward(torch_input) torch_loss = torch_criterion.forward(torch_output, torch_label) torch_loss.backward() torch_grad = torch_model.dense1.weight.grad.flatten().tolist() + \ torch_model.dense1.bias.grad.flatten().tolist() + \ torch_model.conv1.weight.grad.flatten().tolist() + \ torch_model.conv1.bias.grad.flatten().tolist() + \ torch_model.dense2.weight.grad.flatten().tolist() + \ torch_model.dense2.bias.grad.flatten().tolist() # AZ part az_net = TorchNet.from_pytorch(torch_model, [1, 2]) az_criterion = TorchCriterion.from_pytorch( loss=torch_criterion.forward, input_shape=[1, 1], label_shape=[1, 1]) az_input = np.array(input) az_label = np.array(label) az_output = az_net.forward(np.array(input)) az_loss_output = az_criterion.forward(az_output, az_label) az_loss_backward = az_criterion.backward(az_output, az_label) az_model_backward = az_net.backward(az_input, az_loss_backward) az_grad = list(az_net.parameters().values())[0]['gradWeight'] assert np.allclose(torch_loss.tolist(), az_loss_output) assert np.allclose(torch_grad, az_grad.tolist())
def test_model_save_load(self): class SimpleTorchModel(nn.Module): def __init__(self): super(SimpleTorchModel, self).__init__() self.dense1 = nn.Linear(2, 4) self.dense2 = nn.Linear(4, 1) def forward(self, x): x = self.dense1(x) x = torch.sigmoid(self.dense2(x)) return x df = self.sqlContext.createDataFrame([(Vectors.dense([2.0, 1.0]), 1.0), (Vectors.dense([1.0, 2.0]), 0.0), (Vectors.dense([2.0, 1.0]), 1.0), (Vectors.dense([1.0, 2.0]), 0.0) ], ["features", "label"]) torch_model = SimpleTorchModel() torch_criterion = nn.MSELoss() az_model = TorchNet.from_pytorch(torch_model, [1, 2]) az_criterion = TorchCriterion.from_pytorch(torch_criterion, [1, 1], [1, 1]) estimator = NNEstimator(az_model, az_criterion) \ .setBatchSize(4) \ .setLearningRate(0.01) \ .setMaxEpoch(10) nnModel = estimator.fit(df) res = nnModel.transform(df) try: tmp_dir = tempfile.mkdtemp() modelPath = os.path.join(tmp_dir, "model") az_model.savePytorch(modelPath) loaded = TorchNet(modelPath) resDF = NNModel(loaded).setPredictionCol("loaded").transform(res) assert resDF.filter("prediction==loaded").count() == resDF.count() finally: try: shutil.rmtree(tmp_dir) # delete directory except OSError as exc: if exc.errno != errno.ENOENT: # ENOENT - no such file or directory raise # re-raise exception
Y_train = mnist.train_labels.float().numpy() pd_df = pd.DataFrame() pd_df['features'] = X_train.reshape((X_train.shape[0], 784)).tolist() pd_df['label'] = Y_train.reshape((Y_train.shape[0])).tolist() mnistDF = spark.createDataFrame(pd_df) (trainingDF, validationDF) = mnistDF.randomSplit([0.8, 0.2]) trainingDF.show() # define loss with Pytorch API def lossFunc(input, target): return nn.CrossEntropyLoss().forward(input, target.flatten().long()) torch_model = LeNet() model = TorchNet.from_pytorch(torch_model, [1, 1, 28, 28]) criterion = TorchCriterion.from_pytorch(lossFunc, [1, 10], torch.LongTensor([5])) classifier = NNClassifier(model, criterion, SeqToTensor([1, 28, 28])) \ .setBatchSize(64) \ .setOptimMethod(Adam()) \ .setLearningRate(0.001)\ .setMaxEpoch(2) nnClassifierModel = classifier.fit(trainingDF) print("After training: ") shift = udf(lambda p: p - 1, DoubleType()) res = nnClassifierModel.transform(validationDF) \ .withColumn("prediction", shift(col('prediction'))) res.show(100) correct = res.filter("label=prediction").count()
pd_df['features'] = X_train.reshape((X_train.shape[0], 784)).tolist() pd_df['label'] = Y_train.reshape((Y_train.shape[0])).tolist() mnistDF = spark.createDataFrame(pd_df) (trainingDF, validationDF) = mnistDF.randomSplit([0.8, 0.2]) trainingDF.show() # define loss with Pytorch API def lossFunc(input, target): return nn.CrossEntropyLoss().forward(input, target.flatten().long()) torch_model = LeNet() model = TorchNet.from_pytorch(module=torch_model, input_shape=[1, 1, 28, 28]) criterion = TorchCriterion.from_pytorch(loss=lossFunc, input_shape=[1, 10], sample_label=torch.LongTensor([5])) classifier = NNClassifier(model, criterion, SeqToTensor([1, 28, 28])) \ .setBatchSize(64) \ .setOptimMethod(Adam()) \ .setLearningRate(0.001)\ .setMaxEpoch(2) nnClassifierModel = classifier.fit(trainingDF) print("After training: ") shift = udf(lambda p: p - 1, DoubleType()) res = nnClassifierModel.transform(validationDF) \ .withColumn("prediction", shift(col('prediction'))) res.show(100)
if len(sys.argv) != 2: print(sys.argv) print("Need parameters: <imagePath>") exit(-1) sparkConf = init_spark_conf().setAppName("resnet").setMaster("local[2]") \ .set('spark.driver.memory', '10g') sc = init_nncontext(sparkConf) spark = SparkSession.builder.config(conf=sparkConf).getOrCreate() torchnet = TorchNet.from_pytorch(CatDogModel(), [4, 3, 224, 224]) def lossFunc(input, target): return nn.CrossEntropyLoss().forward(input, target.flatten().long()) torchcriterion = TorchCriterion.from_pytorch(lossFunc, [1, 2], torch.LongTensor([1])) # prepare training data as Spark DataFrame image_path = sys.argv[1] imageDF = NNImageReader.readImages(image_path, sc, resizeH=256, resizeW=256, image_codec=1) getName = udf(lambda row: os.path.basename(row[0]), StringType()) getLabel = udf(lambda name: 1.0 if name.startswith('cat') else 0.0, DoubleType()) labelDF = imageDF.withColumn("name", getName(col("image"))) \ .withColumn("label", getLabel(col('name'))).cache() (trainingDF, validationDF) = labelDF.randomSplit([0.9, 0.1]) # run training and evaluation featureTransformer = ChainedPreprocessing( [RowToImageFeature(), ImageCenterCrop(224, 224), ImageChannelNormalize(123.0, 117.0, 104.0, 255.0, 255.0, 255.0), ImageMatToTensor(), ImageFeatureToTensor()])
sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster( 'local[1]') sc = init_nncontext(sparkConf) spark = SparkSession \ .builder \ .getOrCreate() df = spark.createDataFrame([(Vectors.dense([2.0, 1.0]), 1.0), (Vectors.dense([1.0, 2.0]), 0.0), (Vectors.dense([2.0, 1.0]), 1.0), (Vectors.dense([1.0, 2.0]), 0.0)], ["features", "label"]) torch_model = SimpleTorchModel() torch_criterion = nn.MSELoss() az_model = TorchNet.from_pytorch(torch_model, [1, 2]) az_criterion = TorchCriterion.from_pytorch(torch_criterion, [1, 1], [1, 1]) classifier = NNClassifier(az_model, az_criterion) \ .setBatchSize(4) \ .setOptimMethod(Adam()) \ .setLearningRate(0.01) \ .setMaxEpoch(10) nnClassifierModel = classifier.fit(df) print("After training: ") res = nnClassifierModel.transform(df) res.show(10, False)
def test_torchcriterion_constructor(self): # two inputs test criterion = nn.MSELoss() def lossFunc(input, label): loss1 = criterion(input[0], label[0]) loss2 = criterion(input[1], label[1]) loss = loss1 + 0.4 * loss2 return loss TorchCriterion.from_pytorch(lossFunc, (torch.ones(2, 2), torch.ones(2, 3)), (torch.ones(2, 2), torch.ones(2, 3))) TorchCriterion.from_pytorch(lossFunc, ([2, 2], [2, 3]), ([2, 2], [2, 3])) TorchCriterion.from_pytorch( lossFunc, [torch.ones(2, 2), torch.ones(2, 3)], [torch.ones(2, 2), torch.ones(2, 3)]) TorchCriterion.from_pytorch(lossFunc, [[2, 2], [2, 3]], [[2, 2], [2, 3]]) # one inputs test TorchCriterion.from_pytorch(criterion, [2, 1], [2, 1]) TorchCriterion.from_pytorch(criterion, torch.ones(2, 2), torch.ones(2, 2))
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) num_executors = 4 num_cores_per_executor = 1 hadoop_conf_dir = os.environ.get('HADOOP_CONF_DIR') sc = init_spark_on_yarn( hadoop_conf=hadoop_conf_dir, conda_name=os. environ["ZOO_CONDA_NAME"], # The name of the created conda-env num_executor=num_executors, executor_cores=num_cores_per_executor, executor_memory="10g", driver_memory="10g", driver_cores=1, spark_conf={"spark.rpc.message.maxSize": "1024"}) model.train() sgd = Adam() zooModel = TorchNet.from_pytorch(model, [64, 1, 28, 28]) def lossFunc(input, target): return nn.NLLLoss().forward(input, target.flatten().long()) zooCriterion = TorchCriterion.from_pytorch(lossFunc, [1, 2], torch.LongTensor([1])) # zooCriterion = SparseCategoricalCrossEntropy(zero_based_label=True) estimator = Estimator(zooModel, optim_methods=sgd) v_input = [] v_target = [] for data, target in test_loader: v_input.append([data.numpy()]) v_target.append([target.numpy()]) test_featureset = FeatureSet.minibatch(v_input, v_target) for epoch in range(1, args.epochs + 1): train(args, estimator, zooCriterion, train_loader, epoch) # test(args, estimator, zooCriterion, test_featureset) estimator.evaluate_minibatch( test_featureset, [Loss(zooCriterion), Accuracy()]) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pt")
def train(train_loader, model, criterion, optimizer, epoch, args): num_executors = 2 num_cores_per_executor = 4 hadoop_conf_dir = os.environ.get('HADOOP_CONF_DIR') sc = init_spark_on_yarn( hadoop_conf=hadoop_conf_dir, conda_name=os. environ["ZOO_CONDA_NAME"], # The name of the created conda-env num_executor=num_executors, executor_cores=num_cores_per_executor, executor_memory="20g", driver_memory="10g", driver_cores=1, spark_conf={"spark.rpc.message.maxSize": "1024"}) model.train() sgd = Adam() zooModel = TorchNet.from_pytorch(model, [4, 3, 224, 224]) def lossFunc(input, target): return nn.NLLLoss().forward(input, target.flatten().long()) zooCriterion = TorchCriterion.from_pytorch(lossFunc, [1, 2], torch.LongTensor([1])) # zooCriterion = SparseCategoricalCrossEntropy(zero_based_label=True) estimator = Estimator(zooModel, optim_methods=sgd) batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # if args.gpu is not None: # images = images.cuda(args.gpu, non_blocking=True) # target = target.cuda(args.gpu, non_blocking=True) # compute output estimator.train_minibatch(images.numpy(), target.numpy().astype(np.int32), zooCriterion) # output = model(images) # loss = criterion(output, target) # # # measure accuracy and record loss # acc1, acc5 = accuracy(output, target, topk=(1, 5)) # losses.update(loss.item(), images.size(0)) # top1.update(acc1[0], images.size(0)) # top5.update(acc5[0], images.size(0)) # # # compute gradient and do SGD step # optimizer.zero_grad() # loss.backward() # optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)