Пример #1
0
 def __init__(self, blocks=None, net_dim=None):
     super(SeqNet, self).__init__()
     self.is_double = False
     self.dims = []
     self.net_dim = net_dim
     self.transform = None if net_dim is None else Upsample(size=self.net_dim, mode="bilinear", consolidate_errors=True)
     self.blocks = [] if self.transform is None else [self.transform]
     if blocks is not None:
         self.blocks += [*blocks]
         self.blocks = Sequential(*self.blocks)
Пример #2
0
    def _init_layers(self):  # FIXME multi-layer lstm actually not work
        if self.n_layers == 1:
            self.lstm_encoder = LstmLayer(self.input_size, self.hidden_size)
            self.lstm_decoder = LstmLayer(self.hidden_size, self.input_size)
        else:
            lstm_encoders = OrderedDict()
            lstm_encoders['lstm_0'] = LstmLayer(self.input_size,
                                                self.hidden_size)
            for i in range(1, self.n_layers):
                lstm_encoders['lstm_' + str(i)] = LstmLayer(
                    self.hidden_size, self.hidden_size)
            self.lstm_encoder = Sequential(lstm_encoders)

            lstm_decoders = OrderedDict()
            lstm_decoders['lstm_0'] = LstmLayer(self.hidden_size,
                                                self.hidden_size)
            for i in range(1, self.n_layers):
                lstm_decoders['lstm_' + str(i)] = LstmLayer(
                    self.hidden_size, self.hidden_size)
            self.lstm_decoder = Sequential(lstm_decoders)
Пример #3
0
 def train_model(self, layers, loss_metrics, x_train, y_train):
     # build model
     self.model = Sequential(layers, loss_metrics)
     # train the model
     loss = self.model.fit(x_train,
                           y_train,
                           self.epochs,
                           self.lr,
                           self.batch_size,
                           print_output=True)
     avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1)
     return avg_loss
Пример #4
0
 def converter(self, net):
     if isinstance(net, nn.Sequential):
         seq_model = net
     else:
         seq_model = net.module
     for idx, l in enumerate(seq_model):
         if isinstance(l, nn.Linear):
             self.layers[idx + 1].linear.weight.data.copy_(l.weight.data)
             self.layers[idx + 1].linear.bias.data.copy_(l.bias.data)
         if isinstance(l, nn.Conv2d):
             self.layers[idx + 1].conv.weight.data.copy_(l.weight.data)
             self.layers[idx + 1].conv.bias.data.copy_(l.bias.data)
     self.blocks = Sequential(*self.layers)
Пример #5
0
    def get_block(self, in_planes, out_planes, n_blocks, stride, dim):
        strides = [stride] + [1]*(n_blocks-1)
        layers = []

        if in_planes != out_planes:
            if self.res_block == FixupBasicBlock:
                downsample = Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
                # downsample = AvgPool2d(1, stride=stride)
            elif self.res_block == WideBlock:
                downsample = Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=True)
            else:
                downsample = [Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)]
                # downsample = [AvgPool2d(1, stride=stride)]
                downsample += [BatchNorm2d(out_planes)]
                downsample = Sequential(*downsample)

        for stride in strides:
            layers += [self.res_block(dim, in_planes, out_planes, stride, downsample)]
            downsample = None
            in_planes = out_planes
            dim = dim // stride
        return dim, Sequential(*layers)
Пример #6
0
    def __init__(self,
                 device,
                 dataset,
                 n_class=10,
                 input_size=32,
                 input_channel=3,
                 width1=1,
                 width2=1,
                 width3=1,
                 linear_size=100):
        super(ConvMedBig, self).__init__()

        mean, sigma = get_mean_sigma(device, dataset)
        self.normalizer = Normalization(mean, sigma)

        layers = [
            Normalization(mean, sigma),
            Conv2d(input_channel,
                   16 * width1,
                   3,
                   stride=1,
                   padding=1,
                   dim=input_size),
            ReLU((16 * width1, input_size, input_size)),
            Conv2d(16 * width1,
                   16 * width2,
                   4,
                   stride=2,
                   padding=1,
                   dim=input_size // 2),
            ReLU((16 * width2, input_size // 2, input_size // 2)),
            Conv2d(16 * width2,
                   32 * width3,
                   4,
                   stride=2,
                   padding=1,
                   dim=input_size // 2),
            ReLU((32 * width3, input_size // 4, input_size // 4)),
            Flatten(),
            Linear(32 * width3 * (input_size // 4) * (input_size // 4),
                   linear_size),
            ReLU(linear_size),
            Linear(linear_size, n_class),
        ]
        self.blocks = Sequential(*layers)
Пример #7
0
    def __init__(self, device, dataset, adv_pre, input_size, net, net_dim):
        super(UpscaleNet, self).__init__()
        self.net = net
        self.net_dim = net_dim
        self.blocks = []

        if input_size == net_dim:
            self.transform = None
        else:
            self.transform = Upsample(size=self.net_dim, mode="nearest", align_corners=False,consolidate_errors=False)
            self.blocks += [self.transform]

        if adv_pre:
            self.blocks += [Scale(2, fixed=True), Bias(-1, fixed=True)]
            self.normalization = Sequential(*self.blocks)
        else:
            mean, sigma = get_mean_sigma(device, dataset)
            self.normalization = Normalization(mean, sigma)
            self.blocks += [self.normalization]

        self.blocks += [self.net]
Пример #8
0
    def __init__(self,
                 device,
                 dataset,
                 sizes,
                 n_class=10,
                 input_size=32,
                 input_channel=3):
        super(FFNN, self).__init__()

        mean, sigma = get_mean_sigma(device, dataset)
        self.normalizer = Normalization(mean, sigma)

        layers = [
            Flatten(),
            Linear(input_size * input_size * input_channel, sizes[0]),
            ReLU(sizes[0])
        ]
        for i in range(1, len(sizes)):
            layers += [
                Linear(sizes[i - 1], sizes[i]),
                ReLU(sizes[i]),
            ]
        layers += [Linear(sizes[-1], n_class)]
        self.blocks = Sequential(*layers)
Пример #9
0
from layers import Sequential, Dense
import numpy as np
from utils import load_ionosphere

(X_train, y_train), (X_test, y_test) = load_ionosphere(0.7,
                                                       normalize=True,
                                                       shuffled=True)

model = Sequential()
model.add(Dense(15, input_shape=(X_train.shape[1], )))
model.add(Dense(15))
model.add(Dense(y_train.shape[1]))

model.fit(X_train,
          y_train,
          reg_factor=0.0,
          epochs=1000,
          learning_rate=0.1,
          batch_size=32,
          validation_data=(X_test, y_test),
          verbose=True)

model.plot_error()

# model.plot_train_error()
Пример #10
0
"""
Created on Sun Mar 25 19:52:43 2018

@author: kaushik
"""
import time
import numpy as np
import matplotlib.pyplot as plt
from layers.dataset import cifar100
from layers import (ConvLayer, FullLayer, FlattenLayer, MaxPoolLayer,
                    ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential)

(x_train, y_train), (x_test, y_test) = cifar100(1337)
model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(),
                           ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(),
                           FlattenLayer(), FullLayer(8 * 8 * 32,
                                                     4), SoftMaxLayer()),
                   loss=CrossEntropyLayer())
start_time = time.clock()
lr_vals = [0.1]
losses_train = list()
losses_test = list()
test_acc = np.zeros(len(lr_vals))
for j in range(len(lr_vals)):
    train_loss, test_loss = model.fit(x_train,
                                      y_train,
                                      x_test,
                                      y_test,
                                      epochs=8,
                                      lr=lr_vals[j],
                                      batch_size=128)
Пример #11
0
import numpy as np
from layers.dataset import cifar100
import matplotlib.pyplot as plt

# Please make sure that cifar-100-python is present in the same folder as dataset.py

(x_train, y_train), (x_test, y_test) = cifar100(1212356299)

from layers import (FullLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer,
                    Sequential)

model = Sequential(layers=(FullLayer(3072,
                                     500), ReluLayer(), FullLayer(500, 4),
                           SoftMaxLayer()),
                   loss=CrossEntropyLayer())

lr_accuracies = np.zeros((3, ))

loss1 = model.fit(x_train, y_train, lr=0.01, epochs=15)
y_predict = model.predict(x_test)

count = 0
for i in range(np.size(y_test)):
    if y_predict[i] == y_test[i]:
        count += 1

lr_accuracies[0] = (100.0 * count) / np.shape(y_predict)[0]

loss2 = model.fit(x_train, y_train, lr=0.1, epochs=15)

y_predict = model.predict(x_test)
Пример #12
0
    def __init__(self, device, args, dataset, trunk_net, input_size, input_channel, n_class, n_branches, gate_type,
                 branch_net_names, gate_net_names, evalFn, lossFn):
        super(MyDeepTrunkNet, self).__init__()
        self.dataset = dataset
        self.input_size = input_size
        self.input_channel = input_channel
        self.n_class = n_class
        self.gate_type = gate_type
        self.n_branches = n_branches
        self.trunk_net = trunk_net
        self.evalFn = evalFn
        self.lossFn = lossFn

        assert gate_type in ["entropy", "net"], f"Unknown gate mode: {gate_type:s}"

        self.exit_ids = [-1] + list(range(n_branches))

        self.threshold = {exit_idx: args.gate_threshold for exit_idx in self.exit_ids[1:]}
        self.gate_nets = {}
        self.branch_nets = {}

        if len(branch_net_names) != n_branches:
            print("Number of branches does not match branch net names")
            branch_net_names = n_branches * branch_net_names[0:1]

        if gate_net_names is None:
            gate_net_names = branch_net_names
        elif len(gate_net_names) != n_branches:
            print("Number of branches does not match gate net names")
            gate_net_names = n_branches * gate_net_names[0:1]

        if args.load_branch_model is not None and len(args.load_branch_model) != n_branches:
            args.load_branch_model = n_branches * args.load_branch_model[0:1]
        if args.load_gate_model is not None and len(args.load_gate_model) != n_branches:
            args.load_gate_model = n_branches * args.load_gate_model[0:1]

        for i, branch_net_name in zip(range(n_branches), branch_net_names):
            exit_idx = self.exit_ids[i+1]
            self.branch_nets[exit_idx] = get_net(device, dataset, branch_net_name, input_size, input_channel, n_class,
                                                 load_model=None if args.load_branch_model is None else args.load_branch_model[i],
                                                  net_dim=args.cert_net_dim)

            if gate_type == "net":
                self.gate_nets[exit_idx] = get_net(device, dataset, gate_net_names[i], input_size, input_channel, 1,
                                                   load_model=None if args.load_gate_model is None else args.load_gate_model[i],
                                                   net_dim=args.cert_net_dim)
            else:
                self.gate_nets[exit_idx] = SeqNet(Sequential(*[*self.branch_nets[exit_idx].blocks, Entropy(n_class, low_mem=True, neg=True)]))
                self.gate_nets[exit_idx].determine_dims(torch.randn((2, input_channel, input_size, input_size), dtype=torch.float).to(device))
                init_slopes(self.gate_nets[exit_idx], device, trainable=False)

            self.add_module("gateNet_{}".format(exit_idx), self.gate_nets[exit_idx])
            self.add_module("branchNet_{}".format(exit_idx), self.branch_nets[exit_idx])

        if args.load_model is not None:
            old_state = self.state_dict()
            load_state = torch.load(args.load_model)
            if args.cert_net_dim is not None and not ("gateNet_0.blocks.layers.1.mean" in load_state.keys()): # Only change keys if loading from a non mixed resolution to mixed resolution
                new_dict = {}
                for k in load_state.keys():
                    if k.startswith("trunk"):
                        new_k = k
                    else:
                        k_match = re.match("(^.*\.layers\.)([0-9]+)(\..*$)", k)
                        new_k = "%s%d%s" % (k_match.group(1), int(k_match.group(2)) + 1, k_match.group(3))
                    new_dict[new_k] = load_state[k]
                load_state.update(new_dict)

            # LiRPA requires parameters to have zero batch dimension. This makes old models compatible
            for k, v in load_state.items():
                if k.endswith("mean") or k.endswith("sigma"):
                    if k in old_state:
                        load_state.update({k: v.reshape(old_state[k].shape)})

            old_state.update({k:v.view(old_state[k].shape) for k,v in load_state.items() if
                              k in old_state and (
                              (k.startswith("trunk") and args.load_trunk_model is None)
                              or (k.startswith("gate") and args.load_gate_model is None)
                              or (k.startswith("branch") and args.load_branch_model is None))})
            missing_keys, extra_keys = self.load_state_dict(old_state, strict=False)
            assert len([x for x in missing_keys if "gateNet" in x or "branchNet" in x]) == 0
            print("Whole model loaded from %s" % args.load_model)

            ## Trunk and branch nets have to be loaded after the whole model
            if args.load_trunk_model is not None:
                load_net_state(self.trunk_net, args.load_trunk_model)

        if (args.load_model is not None or args.load_gate_model is not None) and args.gate_feature_extraction is not None:
            for i, net in enumerate(self.gate_nets.values()):
                extraction_layer = [ii for ii in range(len(net.blocks)) if isinstance(net.blocks[ii],Linear)]
                extraction_layer = extraction_layer[-min(len(extraction_layer),args.gate_feature_extraction)]
                net.freeze(extraction_layer-1)

        self.trunk_cnet = None
        self.gate_cnets = {k: None for k in self.gate_nets.keys()}
        self.branch_cnets = {k: None for k in self.branch_nets.keys()}
Пример #13
0
    def __init__(self, device, dataset, n_blocks, n_class=10, input_size=32, input_channel=3, block='basic',
                 in_planes=32, net_dim=None, widen_factor=1, pooling="global"):
        super(MyResnet, self).__init__(net_dim=None if net_dim == input_size else net_dim)
        if block == 'basic':
            self.res_block = BasicBlock
        elif block == 'preact':
            self.res_block = PreActBlock
        elif block == 'wide':
            self.res_block = WideBlock
        elif block == 'fixup':
            self.res_block = FixupBasicBlock
        else:
            assert False
        self.n_layers = sum(n_blocks)
        mean, sigma = get_mean_sigma(device, dataset)
        dim = input_size

        k = widen_factor

        layers = [Normalization(mean, sigma),
                  Conv2d(input_channel, in_planes, kernel_size=3, stride=1, padding=1, bias=(block == "wide"), dim=dim)]

        if not block == "wide":
            layers += [Bias() if block == 'fixup' else BatchNorm2d(in_planes),
                       ReLU((in_planes, input_size, input_size))]

        strides = [1, 2] + ([2] if len(n_blocks) > 2 else []) + [1] * max(0,(len(n_blocks)-3))

        n_filters = in_planes
        for n_block, n_stride in zip(n_blocks, strides):
            if n_stride > 1:
                n_filters *= 2
            dim, block_layers = self.get_block(in_planes, n_filters*k, n_block, n_stride, dim=dim)
            in_planes = n_filters*k
            layers += [block_layers]

        if block == 'fixup':
            layers += [Bias()]
        else:
            layers += [BatchNorm2d(n_filters*k)]

        if block == "wide":
            layers += [ReLU((n_filters*k, dim, dim))]

        if pooling == "global":
            layers += [GlobalAvgPool2d()]
            N = n_filters * k
        elif pooling == "None":      # old networks miss pooling layer and wont load
            N = n_filters * dim * dim * k
        elif isinstance(pooling, int):
            layers += [AvgPool2d(pooling)]
            dim = dim//pooling
            N = n_filters * dim * dim * k

        layers += [Flatten(), ReLU(N)]

        if block == 'fixup':
            layers += [Bias()]

        layers += [Linear(N, n_class)]

        self.blocks = Sequential(*layers)

        # Fixup initialization
        if block == 'fixup':
            for m in self.modules():
                if isinstance(m, FixupBasicBlock):
                    conv1, conv2 = m.residual[1].conv, m.residual[5].conv
                    nn.init.normal_(conv1.weight,
                                    mean=0,
                                    std=np.sqrt(2 / (conv1.weight.shape[0] * np.prod(conv1.weight.shape[2:]))) * self.n_layers ** (-0.5))
                    nn.init.constant_(conv2.weight, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.constant_(m.weight, 0)
                    nn.init.constant_(m.bias, 0)
Пример #14
0
    def __init__(self, device, dataset, n_class=10, input_size=32, input_channel=3, conv_widths=None,
                 kernel_sizes=None, linear_sizes=None, depth_conv=None, paddings=None, strides=None,
                 dilations=None, pool=False, net_dim=None, bn=False, max=False, scale_width=True):
        super(myNet, self).__init__(net_dim=None if net_dim == input_size else net_dim)
        if kernel_sizes is None:
            kernel_sizes = [3]
        if conv_widths is None:
            conv_widths = [2]
        if linear_sizes is None:
            linear_sizes = [200]
        if paddings is None:
            paddings = [1]
        if strides is None:
            strides = [2]
        if dilations is None:
            dilations = [1]
        if net_dim is None:
            net_dim = input_size

        if len(conv_widths) != len(kernel_sizes):
            kernel_sizes = len(conv_widths) * [kernel_sizes[0]]
        if len(conv_widths) != len(paddings):
            paddings = len(conv_widths) * [paddings[0]]
        if len(conv_widths) != len(strides):
            strides = len(conv_widths) * [strides[0]]
        if len(conv_widths) != len(dilations):
            dilations = len(conv_widths) * [dilations[0]]

        self.n_class=n_class
        self.input_size=input_size
        self.input_channel=input_channel
        self.conv_widths=conv_widths
        self.kernel_sizes=kernel_sizes
        self.paddings=paddings
        self.strides=strides
        self.dilations = dilations
        self.linear_sizes=linear_sizes
        self.depth_conv=depth_conv
        self.net_dim = net_dim
        self.bn=bn
        self.max=max

        mean, sigma = get_mean_sigma(device, dataset)
        layers = self.blocks
        layers += [Normalization(mean, sigma)]

        N = net_dim
        n_channels = input_channel
        self.dims += [(n_channels,N,N)]

        for width, kernel_size, padding, stride, dilation in zip(conv_widths, kernel_sizes, paddings, strides, dilations):
            if scale_width:
                width *= 16
            N = int(np.floor((N + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1))
            layers += [Conv2d(n_channels, int(width), kernel_size, stride=stride, padding=padding, dilation=dilation)]
            if self.bn:
                layers += [BatchNorm2d(int(width))]
            if self.max:
                layers += [MaxPool2d(int(width))]
            layers += [ReLU((int(width), N, N))]
            n_channels = int(width)
            self.dims += 2*[(n_channels,N,N)]


        if depth_conv is not None:
            layers += [Conv2d(n_channels, depth_conv, 1, stride=1, padding=0),
                       ReLU((n_channels, N, N))]
            n_channels = depth_conv
            self.dims += 2*[(n_channels,N,N)]

        if pool:
            layers += [GlobalAvgPool2d()]
            self.dims += 2 * [(n_channels, 1, 1)]
            N=1

        layers += [Flatten()]
        N = n_channels * N ** 2
        self.dims += [(N,)]


        for width in linear_sizes:
            if width == 0:
                continue
            layers += [Linear(int(N), int(width)),
                       ReLU(width)]
            N = width
            self.dims+=2*[(N,)]

        layers += [Linear(N, n_class)]
        self.dims+=[(n_class,)]

        self.blocks = Sequential(*layers)
Пример #15
0
import numpy as np
from tensor import Tensor
from layers import Sequential, Linear
from activations import Tanh, Sigmoid
from optimizers import SGD
from losses import MSELoss

np.random.seed(0)

data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True)

model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)

    loss.backward()
    optim.step()
    print(loss)