def test_caffe2_simple_model(self): model = ModelHelper(name="mnist") # how come those inputs don't break the forward pass =.=a workspace.FeedBlob("data", np.random.randn(1, 3, 64, 64).astype(np.float32)) workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int)) with core.NameScope("conv1"): conv1 = brew.conv(model, "data", 'conv1', dim_in=1, dim_out=20, kernel=5) # Image size: 24 x 24 -> 12 x 12 pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2) # Image size: 12 x 12 -> 8 x 8 conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=100, kernel=5) # Image size: 8 x 8 -> 4 x 4 pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2) with core.NameScope("classifier"): # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500) relu = brew.relu(model, fc3, fc3) pred = brew.fc(model, relu, 'pred', 500, 10) softmax = brew.softmax(model, pred, 'softmax') xent = model.LabelCrossEntropy([softmax, "label"], 'xent') # compute the expected loss loss = model.AveragedLoss(xent, "loss") model.net.RunAllOnMKL() model.param_init_net.RunAllOnMKL() model.AddGradientOperators([loss], skip=1) blob_name_tracker = {} graph = c2_graph.model_to_graph_def( model, blob_name_tracker=blob_name_tracker, shapes={}, show_simplified=False, ) compare_proto(graph, self)
def MLP(order, cudnn_ws, ideep): model = ModelHelper(name="benchmark") d = 256 depth = 20 width = 3 for i in range(depth): for j in range(width): current = "fc_{}_{}".format(i, j) if i > 0 else "data" next_ = "fc_{}_{}".format(i + 1, j) brew.fc(model, current, next_, dim_in=d, dim_out=d, weight_init=('XavierFill', {}), bias_init=('XavierFill', {})) brew.sum(model, ["fc_{}_{}".format(depth, j) for j in range(width)], ["sum"]) brew.fc(model, "sum", "last", dim_in=d, dim_out=1000, weight_init=('XavierFill', {}), bias_init=('XavierFill', {})) xent = model.LabelCrossEntropy(["last", "label"], "xent") model.AveragedLoss(xent, "loss") return model, d
def _createDense(self, dtype=core.DataType.FLOAT): perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32) np.random.seed(123) # make test deterministic numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16 initializer = Initializer if dtype == core.DataType.FLOAT else pFP16Initializer data = np.random.randint(2, size=(20, perfect_model.size)).astype(numpy_dtype) label = np.dot(data, perfect_model)[:, np.newaxis] model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) out = brew.fc(model, 'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}), ('ConstantFill', {}), axis=0, WeightInitializer=initializer, BiasInitializer=initializer) if dtype == core.DataType.FLOAT16: out = model.HalfToFloat(out, out + "_fp32") sq = model.SquaredL2Distance([out, 'label']) loss = model.AveragedLoss(sq, "avg_loss") grad_map = model.AddGradientOperators([loss]) self.assertIsInstance(grad_map['fc_w'], core.BlobReference) return (model, perfect_model, data, label)
def OverFeat(order, cudnn_ws, mkl): my_arg_scope = { 'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': str(cudnn_ws) } model = ModelHelper(name='overfeat', arg_scope=my_arg_scope) conv1 = brew.conv(model, "data", "conv1", 3, 96, 11, ('XavierFill', {}), ('ConstantFill', {}), stride=4) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2) conv2 = brew.conv(model, pool1, "conv2", 96, 256, 5, ('XavierFill', {}), ('ConstantFill', {})) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2) conv3 = brew.conv(model, pool2, "conv3", 256, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu3 = brew.relu(model, conv3, "conv3") conv4 = brew.conv(model, relu3, "conv4", 512, 1024, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu4 = brew.relu(model, conv4, "conv4") conv5 = brew.conv(model, relu4, "conv5", 1024, 1024, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu5 = brew.relu(model, conv5, "conv5") pool5 = brew.max_pool(model, relu5, "pool5", kernel=2, stride=2) fc6 = brew.fc(model, pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}), ('ConstantFill', {})) relu6 = brew.relu(model, fc6, "fc6") fc7 = brew.fc(model, relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {})) relu7 = brew.relu(model, fc7, "fc7") fc8 = brew.fc(model, relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})) pred = brew.softmax(model, fc8, "pred") xent = model.LabelCrossEntropy([pred, "label"], "xent") if not mkl: loss = model.AveragedLoss(xent, "loss") return model, 231
def GenerateLossOps(model: ModelHelper, model_id: str, output_blob: str, label_blob: str, loss_blob: str) -> None: """ Adds loss operators to net. The loss function is computed by a squared L2 distance, and then averaged over all items in the minibatch. :param model: ModelHelper object to add loss operators to. :param model_id: String identifier. :param output_blob: Blob containing output of net. :param label_blob: Blob containing labels. :param loss_blob: Blob in which to store loss. """ dist = model.SquaredL2Distance([label_blob, output_blob], model_id + "dist") model.AveragedLoss(dist, loss_blob)
def main(opt_name): workspace.FeedBlob('input', np.random.randn(2, 16).astype(np.float32)) workspace.FeedBlob('label', np.array([0, 1]).astype(np.float32)) helper = ModelHelper("sample_model") fc = brew.fc(helper, "input", "fc", dim_in=16, dim_out=8) relu = helper.Relu(fc, 'relu') fc2 = brew.fc(helper, relu, "fc2", dim_in=8, dim_out=1) label_ex = helper.ExpandDims("label", "label_ex", dims=[1]) xent = helper.SigmoidCrossEntropyWithLogits([fc2, label_ex], 'xent') loss = helper.AveragedLoss(xent, 'loss') helper.AddGradientOperators([loss]) if opt_name == "manual": ONE = helper.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0) LR = helper.param_init_net.ConstantFill([], "LR", shape=[1], value=-0.03) for param in helper.params: param_grad = helper.param_to_grad[param] helper.WeightedSum([param, ONE, param_grad, LR], param) elif opt_name == "sgd": optimizer.build_sgd(helper, 0.03) elif opt_name == "adagrad": optimizer.build_adagrad(helper, 0.03) # caffe2 does not support rowwise adagrad for dense parameters # caffe2 seems not have lamb support yet elif opt_name == "adam": optimizer.build_adam(helper, 0.03) else: assert False, f"Unsupported optimizer {opt_name}" workspace.RunNetOnce(helper.param_init_net) workspace.RunNetOnce(helper.net) import pdb pdb.set_trace()
def Inception(order, cudnn_ws, ideep): my_arg_scope = { 'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': str(cudnn_ws) } model = ModelHelper(name="inception", arg_scope=my_arg_scope) conv1 = brew.conv(model, "data", "conv1", 3, 64, 7, ('XavierFill', {}), ('ConstantFill', {}), stride=2, pad=3) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2, pad=1) conv2a = brew.conv(model, pool1, "conv2a", 64, 64, 1, ('XavierFill', {}), ('ConstantFill', {})) conv2a = brew.relu(model, conv2a, conv2a) conv2 = brew.conv(model, conv2a, "conv2", 64, 192, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2, pad=1) # Inception modules inc3 = _InceptionModule(model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32) inc4 = _InceptionModule(model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64) pool5 = brew.max_pool(model, inc4, "pool5", kernel=3, stride=2, pad=1) inc5 = _InceptionModule(model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64) inc6 = _InceptionModule(model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64) inc7 = _InceptionModule(model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64) inc8 = _InceptionModule(model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64) inc9 = _InceptionModule(model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128) pool9 = brew.max_pool(model, inc9, "pool9", kernel=3, stride=2, pad=1) inc10 = _InceptionModule(model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128) inc11 = _InceptionModule(model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128) pool11 = brew.average_pool(model, inc11, "pool11", kernel=7, stride=1) fc = brew.fc(model, pool11, "fc", 1024, 1000, ('XavierFill', {}), ('ConstantFill', {})) # It seems that Soumith's benchmark does not have softmax on top # for Inception. We will add it anyway so we can have a proper # backward pass. pred = brew.softmax(model, fc, "pred") xent = model.LabelCrossEntropy([pred, "label"], "xent") loss = model.AveragedLoss(xent, "loss") return model, 224
def VGGA(order, cudnn_ws, ideep): my_arg_scope = { 'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': str(cudnn_ws) } model = ModelHelper(name='vgg-a', arg_scope=my_arg_scope) conv1 = brew.conv(model, "data", "conv1", 3, 64, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2) conv2 = brew.conv(model, pool1, "conv2", 64, 128, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2) conv3 = brew.conv(model, pool2, "conv3", 128, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu3 = brew.relu(model, conv3, "conv3") conv4 = brew.conv(model, relu3, "conv4", 256, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu4 = brew.relu(model, conv4, "conv4") pool4 = brew.max_pool(model, relu4, "pool4", kernel=2, stride=2) conv5 = brew.conv(model, pool4, "conv5", 256, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu5 = brew.relu(model, conv5, "conv5") conv6 = brew.conv(model, relu5, "conv6", 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu6 = brew.relu(model, conv6, "conv6") pool6 = brew.max_pool(model, relu6, "pool6", kernel=2, stride=2) conv7 = brew.conv(model, pool6, "conv7", 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu7 = brew.relu(model, conv7, "conv7") conv8 = brew.conv(model, relu7, "conv8", 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu8 = brew.relu(model, conv8, "conv8") pool8 = brew.max_pool(model, relu8, "pool8", kernel=2, stride=2) fcix = brew.fc(model, pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}), ('ConstantFill', {})) reluix = brew.relu(model, fcix, "fcix") fcx = brew.fc(model, reluix, "fcx", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})) relux = brew.relu(model, fcx, "fcx") fcxi = brew.fc(model, relux, "fcxi", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})) pred = brew.softmax(model, fcxi, "pred") xent = model.LabelCrossEntropy([pred, "label"], "xent") loss = model.AveragedLoss(xent, "loss") return model, 231
def AlexNet(order, cudnn_ws, device): my_arg_scope = { 'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': str(cudnn_ws) } model = ModelHelper(name="alexnet", arg_scope=my_arg_scope) conv1 = brew.conv(model, "data", "conv1", 3, 64, 11, ('XavierFill', {}), ('ConstantFill', {}), stride=4, pad=2) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2) conv2 = brew.conv(model, pool1, "conv2", 64, 192, 5, ('XavierFill', {}), ('ConstantFill', {}), pad=2) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2) conv3 = brew.conv(model, pool2, "conv3", 192, 384, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu3 = brew.relu(model, conv3, "conv3") conv4 = brew.conv(model, relu3, "conv4", 384, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu4 = brew.relu(model, conv4, "conv4") conv5 = brew.conv(model, relu4, "conv5", 256, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu5 = brew.relu(model, conv5, "conv5") pool5 = brew.max_pool(model, relu5, "pool5", kernel=3, stride=2) fc6 = brew.fc(model, pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}), ('ConstantFill', {})) relu6 = brew.relu(model, fc6, "fc6") fc7 = brew.fc(model, relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})) relu7 = brew.relu(model, fc7, "fc7") fc8 = brew.fc(model, relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})) pred = brew.softmax(model, fc8, "pred") xent = model.LabelCrossEntropy([pred, "label"], "xent") if device != 'MKL': loss = model.AveragedLoss(xent, "loss") return model, 224
# Print the predict and init net to see what protobuf was created for this model print("************* Predict Net *************") print(regression_model.net.Proto()) print("\n************* Init Net *************") print(regression_model.param_init_net.Proto()) # #### Add the training operators and prime the workspace # # In this **very important** step, we specify the loss function, setup the SGD training algorithm, prime and initialize the workspace, and initialize our model's weights and biases. # In[5]: # The loss function is computed by a squared L2 distance, # and then averaged over all items. dist = regression_model.SquaredL2Distance(['Y_gt', y_pred], "dist") loss = regression_model.AveragedLoss(dist, "loss") # Add the gradient operators and setup the SGD algorithm regression_model.AddGradientOperators([loss]) optimizer.build_sgd(regression_model, base_learning_rate=learning_rate) # Prime the workspace with some data workspace.FeedBlob("Y_gt", Y_gt.astype(np.float32)) workspace.FeedBlob("X", X.astype(np.float32)) # Run the init net to prepare the workspace then create the net workspace.RunNetOnce(regression_model.param_init_net) workspace.CreateNet(regression_model.net) # Inject our desired initial weights and bias workspace.FeedBlob("y_pred_w", np.array([initial_weights]).astype(np.float32))