Пример #1
0
 def architecture_children(self):
     # TODO set LRN n = num_filters / 8 + 1
     nodes = [
         # NOTE: not explicitly giving the first conv a pad of "same",
         # since the first conv can have any output shape
         tn.DnnConv2DWithBiasNode(self.name + "_conv0"),
         tn.IdentityNode(self.name + "_z0"),
         tn.ReLUNode(self.name + "_z0_relu"),
         lrn.LocalResponseNormalizationNode(self.name + "_z0_lrn"),
         tn.IdentityNode(self.name + "_x0"),
     ]
     for t in range(1, self.steps + 1):
         nodes += [
             tn.DnnConv2DWithBiasNode(self.name + "_conv%d" % t,
                                      stride=(1, 1),
                                      pad="same"),
             tn.ElementwiseSumNode(self.name + "_sum%d" % t, [
                 tn.ReferenceNode(self.name + "_sum%d_curr" % t,
                                  reference=self.name + "_conv%d" % t),
                 tn.ReferenceNode(self.name + "_sum%d_prev" % t,
                                  reference=self.name + "_z0")
             ]),
             tn.IdentityNode(self.name + "_z%d" % t),
             tn.ReLUNode(self.name + "_z%d_relu" % t),
             lrn.LocalResponseNormalizationNode(self.name + "_z%d_lrn" % t),
             tn.IdentityNode(self.name + "_x%d" % t),
         ]
     return [tn.SequentialNode(self.name + "_sequential", nodes)]
Пример #2
0
    def architecture_children(self):
        gate_node = tn.SequentialNode(
            self.name + "_gate_seq",
            [
                batch_fold.AddAxisNode(self.name + "_add_axis", axis=2),
                batch_fold.FoldUnfoldAxisIntoBatchNode(
                    self.name + "_batch_fold",
                    # NOTE: using dnn conv, since pooling is normally strided
                    # and the normal conv is slow with strides
                    tn.DnnConv2DWithBiasNode(self.name + "_conv",
                                             num_filters=1),
                    axis=1),
                batch_fold.RemoveAxisNode(self.name + "_remove_axis", axis=2),
                tn.SigmoidNode(self.name + "_gate_sigmoid")
            ])

        inverse_gate_node = tn.SequentialNode(self.name + "_max_gate", [
            tn.ReferenceNode(self.name + "_gate_ref",
                             reference=gate_node.name),
            tn.MultiplyConstantNode(self.name + "_", value=-1),
            tn.AddConstantNode(self.name + "_add1", value=1)
        ])

        mean_node = tn.ElementwiseProductNode(
            self.name + "_mean_product",
            [tn.MeanPool2DNode(self.name + "_mean_pool"), gate_node])

        max_node = tn.ElementwiseProductNode(
            self.name + "_max_product",
            [tn.MaxPool2DNode(self.name + "_max_pool"), inverse_gate_node])

        return [
            tn.ElementwiseSumNode(self.name + "_sum", [mean_node, max_node])
        ]
Пример #3
0
    def architecture_children(self):
        children = self.raw_children()
        if "activation" in children:
            activation = children["activation"]
        else:
            activation = tn.ReLUNode(self.name + "_relu")

        path_1x1 = tn.SequentialNode(self.name + "_1x1", [
            tn.DnnConv2DWithBiasNode(
                self.name + "_1x1conv", filter_size=(1, 1), pad="same"),
            canopy.node_utils.format_node_name(activation,
                                               self.name + "_%s_1x1")
        ])
        path_3x3 = tn.SequentialNode(self.name + "_3x3", [
            tn.DnnConv2DWithBiasNode(
                self.name + "_3x3reduce", filter_size=(1, 1), pad="same"),
            canopy.node_utils.format_node_name(activation,
                                               self.name + "_%s_3x3reduce"),
            tn.DnnConv2DWithBiasNode(
                self.name + "_3x3conv", filter_size=(3, 3), pad="same"),
            canopy.node_utils.format_node_name(activation,
                                               self.name + "_%s_3x3")
        ])
        path_5x5 = tn.SequentialNode(self.name + "_5x5", [
            tn.DnnConv2DWithBiasNode(
                self.name + "_5x5reduce", filter_size=(1, 1), pad="same"),
            canopy.node_utils.format_node_name(activation,
                                               self.name + "_%s_5x5reduce"),
            tn.DnnConv2DWithBiasNode(
                self.name + "_5x5conv", filter_size=(5, 5), pad="same"),
            canopy.node_utils.format_node_name(activation,
                                               self.name + "_%s_5x5")
        ])
        path_pool = tn.SequentialNode(
            self.name + "_poolproj",
            [
                tn.DnnMaxPoolNode(
                    self.name + "_poolprojmax",
                    pool_stride=(1, 1),
                    # TODO parameterize
                    # also need to make padding be dependent on pool size
                    pool_size=(3, 3),
                    pad=(1, 1)),
                tn.DnnConv2DWithBiasNode(self.name + "_poolproj1x1",
                                         filter_size=(1, 1),
                                         pad="same"),
                canopy.node_utils.format_node_name(
                    activation, self.name + "_%s_poolproj1x1")
            ])

        return [
            tn.ConcatenateNode(self.name + "_concat",
                               [path_1x1, path_3x3, path_5x5, path_pool])
        ]
Пример #4
0
import treeano
import treeano.nodes as tn
import canopy
import canopy.sandbox.datasets

fX = theano.config.floatX
BATCH_SIZE = 256
train, valid, test = canopy.sandbox.datasets.cifar10()

# based off of architecture from "Scalable Bayesian Optimization Using
# Deep Neural Networks" http://arxiv.org/abs/1502.05700
model = tn.HyperparameterNode(
    "model",
    tn.SequentialNode("seq", [
        tn.InputNode("x", shape=(BATCH_SIZE, 3, 32, 32)),
        tn.DnnConv2DWithBiasNode("conv1", num_filters=96),
        tn.ReLUNode("relu1"),
        tn.DnnConv2DWithBiasNode("conv2", num_filters=96),
        tn.ReLUNode("relu2"),
        tn.MaxPool2DNode("mp1"),
        tn.DropoutNode("do1", dropout_probability=0.1),
        tn.DnnConv2DWithBiasNode("conv3", num_filters=192),
        tn.ReLUNode("relu3"),
        tn.DnnConv2DWithBiasNode("conv4", num_filters=192),
        tn.ReLUNode("relu4"),
        tn.DnnConv2DWithBiasNode("conv5", num_filters=192),
        tn.ReLUNode("relu5"),
        tn.MaxPool2DNode("mp2"),
        tn.DropoutNode("do2", dropout_probability=0.5),
        tn.DnnConv2DWithBiasNode("conv6", num_filters=192),
        tn.ReLUNode("relu6"),
Пример #5
0
def vgg_16_nodes(conv_only):
    """
    conv_only:
    whether or not to only return conv layers (before FC layers)
    """
    assert conv_only

    return tn.HyperparameterNode(
        "vgg16",
        tn.SequentialNode(
            "vgg16_seq",
            [
                tn.HyperparameterNode(
                    "conv_group_1",
                    tn.SequentialNode("conv_group_1_seq", [
                        tn.DnnConv2DWithBiasNode("conv1_1"),
                        tn.ReLUNode("relu1_1"),
                        tn.DnnConv2DWithBiasNode("conv1_2"),
                        tn.ReLUNode("relu1_2")
                    ]),
                    num_filters=64),
                tn.MaxPool2DNode("pool1"),
                tn.HyperparameterNode(
                    "conv_group_2",
                    tn.SequentialNode("conv_group_2_seq", [
                        tn.DnnConv2DWithBiasNode("conv2_1"),
                        tn.ReLUNode("relu2_1"),
                        tn.DnnConv2DWithBiasNode("conv2_2"),
                        tn.ReLUNode("relu2_2")
                    ]),
                    num_filters=128),
                tn.MaxPool2DNode("pool2"),
                tn.HyperparameterNode(
                    "conv_group_3",
                    tn.SequentialNode("conv_group_3_seq", [
                        tn.DnnConv2DWithBiasNode("conv3_1"),
                        tn.ReLUNode("relu3_1"),
                        tn.DnnConv2DWithBiasNode("conv3_2"),
                        tn.ReLUNode("relu3_2"),
                        tn.DnnConv2DWithBiasNode("conv3_3"),
                        tn.ReLUNode("relu3_3")
                    ]),
                    num_filters=256),
                tn.MaxPool2DNode("pool3"),
                tn.HyperparameterNode(
                    "conv_group_4",
                    tn.SequentialNode("conv_group_4_seq", [
                        tn.DnnConv2DWithBiasNode("conv4_1"),
                        tn.ReLUNode("relu4_1"),
                        tn.DnnConv2DWithBiasNode("conv4_2"),
                        tn.ReLUNode("relu4_2"),
                        tn.DnnConv2DWithBiasNode("conv4_3"),
                        tn.ReLUNode("relu4_3")
                    ]),
                    num_filters=512),
                tn.MaxPool2DNode("pool4"),
                tn.HyperparameterNode(
                    "conv_group_5",
                    tn.SequentialNode("conv_group_5_seq", [
                        tn.DnnConv2DWithBiasNode("conv5_1"),
                        tn.ReLUNode("relu5_1"),
                        tn.DnnConv2DWithBiasNode("conv5_2"),
                        tn.ReLUNode("relu5_2"),
                        tn.DnnConv2DWithBiasNode("conv5_3"),
                        tn.ReLUNode("relu5_3")
                    ]),
                    num_filters=512),
                tn.MaxPool2DNode("pool5"),
                # TODO add dense nodes
            ]),
        pad="same",
        filter_size=(3, 3),
        pool_size=(2, 2),
        # VGG net uses cross-correlation by default
        conv_mode="cross",
    )
Пример #6
0
# - 2x2 maxpool
# - 5x5 conv, 32 filters
# - ReLU
# - 2x2 maxpool
# - fully connected layer - 256 units
# - 50% dropout
# - fully connected layer- 10 units
# - softmax

# - the batch size can be provided as `None` to make the network
#   work for multiple different batch sizes
model = tn.HyperparameterNode(
    "model",
    tn.SequentialNode("seq", [
        tn.InputNode("x", shape=(None, 1, 28, 28)),
        tn.DnnConv2DWithBiasNode("conv1"),
        tn.ReLUNode("relu1"),
        tn.DnnMaxPoolNode("mp1"),
        tn.DnnConv2DWithBiasNode("conv2"),
        tn.ReLUNode("relu2"),
        tn.DnnMaxPoolNode("mp2"),
        tn.DenseNode("fc1"),
        tn.ReLUNode("relu3"),
        tn.DropoutNode("do1"),
        tn.DenseNode("fc2", num_units=10),
        tn.SoftmaxNode("pred"),
    ]),
    num_filters=32,
    filter_size=(5, 5),
    pool_size=(2, 2),
    num_units=256,
Пример #7
0
def load_network(update_scale_factor):
    localization_network = tn.HyperparameterNode(
        "loc",
        tn.SequentialNode(
            "loc_seq",
            [tn.DnnMaxPoolNode("loc_pool1"),
             tn.DnnConv2DWithBiasNode("loc_conv1"),
             tn.DnnMaxPoolNode("loc_pool2"),
             bn.NoScaleBatchNormalizationNode("loc_bn1"),
             tn.ReLUNode("loc_relu1"),
             tn.DnnConv2DWithBiasNode("loc_conv2"),
             bn.NoScaleBatchNormalizationNode("loc_bn2"),
             tn.ReLUNode("loc_relu2"),
             tn.DenseNode("loc_fc1", num_units=50),
             bn.NoScaleBatchNormalizationNode("loc_bn3"),
             tn.ReLUNode("loc_relu3"),
             tn.DenseNode("loc_fc2",
                          num_units=6,
                          inits=[treeano.inits.NormalWeightInit(std=0.001)])]),
        num_filters=20,
        filter_size=(5, 5),
        pool_size=(2, 2),
    )

    st_node = st.AffineSpatialTransformerNode(
        "st",
        localization_network,
        output_shape=(20, 20))

    model = tn.HyperparameterNode(
        "model",
        tn.SequentialNode(
            "seq",
            [tn.InputNode("x", shape=(None, 1, 60, 60)),
             # scaling the updates of the spatial transformer
             # seems to be very helpful, to allow the clasification
             # net to learn what to look for, before prematurely
             # looking
             tn.UpdateScaleNode(
                 "st_update_scale",
                 st_node,
                 update_scale_factor=update_scale_factor),
             tn.Conv2DWithBiasNode("conv1"),
             tn.MaxPool2DNode("mp1"),
             bn.NoScaleBatchNormalizationNode("bn1"),
             tn.ReLUNode("relu1"),
             tn.Conv2DWithBiasNode("conv2"),
             tn.MaxPool2DNode("mp2"),
             bn.NoScaleBatchNormalizationNode("bn2"),
             tn.ReLUNode("relu2"),
             tn.GaussianDropoutNode("do1"),
             tn.DenseNode("fc1"),
             bn.NoScaleBatchNormalizationNode("bn3"),
             tn.ReLUNode("relu3"),
             tn.DenseNode("fc2", num_units=10),
             tn.SoftmaxNode("pred"),
             ]),
        num_filters=32,
        filter_size=(3, 3),
        pool_size=(2, 2),
        num_units=256,
        dropout_probability=0.5,
        inits=[treeano.inits.HeUniformInit()],
        bn_update_moving_stats=True,
    )

    with_updates = tn.HyperparameterNode(
        "with_updates",
        tn.AdamNode(
            "adam",
            {"subtree": model,
             "cost": tn.TotalCostNode("cost", {
                 "pred": tn.ReferenceNode("pred_ref", reference="model"),
                 "target": tn.InputNode("y", shape=(None,), dtype="int32")},
             )}),
        cost_function=treeano.utils.categorical_crossentropy_i32,
        learning_rate=2e-3,
    )
    network = with_updates.network()
    network.build()  # build eagerly to share weights
    return network