def __init__(self, blocks=None, net_dim=None): super(SeqNet, self).__init__() self.is_double = False self.dims = [] self.net_dim = net_dim self.transform = None if net_dim is None else Upsample(size=self.net_dim, mode="bilinear", consolidate_errors=True) self.blocks = [] if self.transform is None else [self.transform] if blocks is not None: self.blocks += [*blocks] self.blocks = Sequential(*self.blocks)
def _init_layers(self): # FIXME multi-layer lstm actually not work if self.n_layers == 1: self.lstm_encoder = LstmLayer(self.input_size, self.hidden_size) self.lstm_decoder = LstmLayer(self.hidden_size, self.input_size) else: lstm_encoders = OrderedDict() lstm_encoders['lstm_0'] = LstmLayer(self.input_size, self.hidden_size) for i in range(1, self.n_layers): lstm_encoders['lstm_' + str(i)] = LstmLayer( self.hidden_size, self.hidden_size) self.lstm_encoder = Sequential(lstm_encoders) lstm_decoders = OrderedDict() lstm_decoders['lstm_0'] = LstmLayer(self.hidden_size, self.hidden_size) for i in range(1, self.n_layers): lstm_decoders['lstm_' + str(i)] = LstmLayer( self.hidden_size, self.hidden_size) self.lstm_decoder = Sequential(lstm_decoders)
def train_model(self, layers, loss_metrics, x_train, y_train): # build model self.model = Sequential(layers, loss_metrics) # train the model loss = self.model.fit(x_train, y_train, self.epochs, self.lr, self.batch_size, print_output=True) avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1) return avg_loss
def converter(self, net): if isinstance(net, nn.Sequential): seq_model = net else: seq_model = net.module for idx, l in enumerate(seq_model): if isinstance(l, nn.Linear): self.layers[idx + 1].linear.weight.data.copy_(l.weight.data) self.layers[idx + 1].linear.bias.data.copy_(l.bias.data) if isinstance(l, nn.Conv2d): self.layers[idx + 1].conv.weight.data.copy_(l.weight.data) self.layers[idx + 1].conv.bias.data.copy_(l.bias.data) self.blocks = Sequential(*self.layers)
def get_block(self, in_planes, out_planes, n_blocks, stride, dim): strides = [stride] + [1]*(n_blocks-1) layers = [] if in_planes != out_planes: if self.res_block == FixupBasicBlock: downsample = Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) # downsample = AvgPool2d(1, stride=stride) elif self.res_block == WideBlock: downsample = Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=True) else: downsample = [Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)] # downsample = [AvgPool2d(1, stride=stride)] downsample += [BatchNorm2d(out_planes)] downsample = Sequential(*downsample) for stride in strides: layers += [self.res_block(dim, in_planes, out_planes, stride, downsample)] downsample = None in_planes = out_planes dim = dim // stride return dim, Sequential(*layers)
def __init__(self, device, dataset, n_class=10, input_size=32, input_channel=3, width1=1, width2=1, width3=1, linear_size=100): super(ConvMedBig, self).__init__() mean, sigma = get_mean_sigma(device, dataset) self.normalizer = Normalization(mean, sigma) layers = [ Normalization(mean, sigma), Conv2d(input_channel, 16 * width1, 3, stride=1, padding=1, dim=input_size), ReLU((16 * width1, input_size, input_size)), Conv2d(16 * width1, 16 * width2, 4, stride=2, padding=1, dim=input_size // 2), ReLU((16 * width2, input_size // 2, input_size // 2)), Conv2d(16 * width2, 32 * width3, 4, stride=2, padding=1, dim=input_size // 2), ReLU((32 * width3, input_size // 4, input_size // 4)), Flatten(), Linear(32 * width3 * (input_size // 4) * (input_size // 4), linear_size), ReLU(linear_size), Linear(linear_size, n_class), ] self.blocks = Sequential(*layers)
def __init__(self, device, dataset, adv_pre, input_size, net, net_dim): super(UpscaleNet, self).__init__() self.net = net self.net_dim = net_dim self.blocks = [] if input_size == net_dim: self.transform = None else: self.transform = Upsample(size=self.net_dim, mode="nearest", align_corners=False,consolidate_errors=False) self.blocks += [self.transform] if adv_pre: self.blocks += [Scale(2, fixed=True), Bias(-1, fixed=True)] self.normalization = Sequential(*self.blocks) else: mean, sigma = get_mean_sigma(device, dataset) self.normalization = Normalization(mean, sigma) self.blocks += [self.normalization] self.blocks += [self.net]
def __init__(self, device, dataset, sizes, n_class=10, input_size=32, input_channel=3): super(FFNN, self).__init__() mean, sigma = get_mean_sigma(device, dataset) self.normalizer = Normalization(mean, sigma) layers = [ Flatten(), Linear(input_size * input_size * input_channel, sizes[0]), ReLU(sizes[0]) ] for i in range(1, len(sizes)): layers += [ Linear(sizes[i - 1], sizes[i]), ReLU(sizes[i]), ] layers += [Linear(sizes[-1], n_class)] self.blocks = Sequential(*layers)
from layers import Sequential, Dense import numpy as np from utils import load_ionosphere (X_train, y_train), (X_test, y_test) = load_ionosphere(0.7, normalize=True, shuffled=True) model = Sequential() model.add(Dense(15, input_shape=(X_train.shape[1], ))) model.add(Dense(15)) model.add(Dense(y_train.shape[1])) model.fit(X_train, y_train, reg_factor=0.0, epochs=1000, learning_rate=0.1, batch_size=32, validation_data=(X_test, y_test), verbose=True) model.plot_error() # model.plot_train_error()
""" Created on Sun Mar 25 19:52:43 2018 @author: kaushik """ import time import numpy as np import matplotlib.pyplot as plt from layers.dataset import cifar100 from layers import (ConvLayer, FullLayer, FlattenLayer, MaxPoolLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential) (x_train, y_train), (x_test, y_test) = cifar100(1337) model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(), FlattenLayer(), FullLayer(8 * 8 * 32, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) start_time = time.clock() lr_vals = [0.1] losses_train = list() losses_test = list() test_acc = np.zeros(len(lr_vals)) for j in range(len(lr_vals)): train_loss, test_loss = model.fit(x_train, y_train, x_test, y_test, epochs=8, lr=lr_vals[j], batch_size=128)
import numpy as np from layers.dataset import cifar100 import matplotlib.pyplot as plt # Please make sure that cifar-100-python is present in the same folder as dataset.py (x_train, y_train), (x_test, y_test) = cifar100(1212356299) from layers import (FullLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential) model = Sequential(layers=(FullLayer(3072, 500), ReluLayer(), FullLayer(500, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) lr_accuracies = np.zeros((3, )) loss1 = model.fit(x_train, y_train, lr=0.01, epochs=15) y_predict = model.predict(x_test) count = 0 for i in range(np.size(y_test)): if y_predict[i] == y_test[i]: count += 1 lr_accuracies[0] = (100.0 * count) / np.shape(y_predict)[0] loss2 = model.fit(x_train, y_train, lr=0.1, epochs=15) y_predict = model.predict(x_test)
def __init__(self, device, args, dataset, trunk_net, input_size, input_channel, n_class, n_branches, gate_type, branch_net_names, gate_net_names, evalFn, lossFn): super(MyDeepTrunkNet, self).__init__() self.dataset = dataset self.input_size = input_size self.input_channel = input_channel self.n_class = n_class self.gate_type = gate_type self.n_branches = n_branches self.trunk_net = trunk_net self.evalFn = evalFn self.lossFn = lossFn assert gate_type in ["entropy", "net"], f"Unknown gate mode: {gate_type:s}" self.exit_ids = [-1] + list(range(n_branches)) self.threshold = {exit_idx: args.gate_threshold for exit_idx in self.exit_ids[1:]} self.gate_nets = {} self.branch_nets = {} if len(branch_net_names) != n_branches: print("Number of branches does not match branch net names") branch_net_names = n_branches * branch_net_names[0:1] if gate_net_names is None: gate_net_names = branch_net_names elif len(gate_net_names) != n_branches: print("Number of branches does not match gate net names") gate_net_names = n_branches * gate_net_names[0:1] if args.load_branch_model is not None and len(args.load_branch_model) != n_branches: args.load_branch_model = n_branches * args.load_branch_model[0:1] if args.load_gate_model is not None and len(args.load_gate_model) != n_branches: args.load_gate_model = n_branches * args.load_gate_model[0:1] for i, branch_net_name in zip(range(n_branches), branch_net_names): exit_idx = self.exit_ids[i+1] self.branch_nets[exit_idx] = get_net(device, dataset, branch_net_name, input_size, input_channel, n_class, load_model=None if args.load_branch_model is None else args.load_branch_model[i], net_dim=args.cert_net_dim) if gate_type == "net": self.gate_nets[exit_idx] = get_net(device, dataset, gate_net_names[i], input_size, input_channel, 1, load_model=None if args.load_gate_model is None else args.load_gate_model[i], net_dim=args.cert_net_dim) else: self.gate_nets[exit_idx] = SeqNet(Sequential(*[*self.branch_nets[exit_idx].blocks, Entropy(n_class, low_mem=True, neg=True)])) self.gate_nets[exit_idx].determine_dims(torch.randn((2, input_channel, input_size, input_size), dtype=torch.float).to(device)) init_slopes(self.gate_nets[exit_idx], device, trainable=False) self.add_module("gateNet_{}".format(exit_idx), self.gate_nets[exit_idx]) self.add_module("branchNet_{}".format(exit_idx), self.branch_nets[exit_idx]) if args.load_model is not None: old_state = self.state_dict() load_state = torch.load(args.load_model) if args.cert_net_dim is not None and not ("gateNet_0.blocks.layers.1.mean" in load_state.keys()): # Only change keys if loading from a non mixed resolution to mixed resolution new_dict = {} for k in load_state.keys(): if k.startswith("trunk"): new_k = k else: k_match = re.match("(^.*\.layers\.)([0-9]+)(\..*$)", k) new_k = "%s%d%s" % (k_match.group(1), int(k_match.group(2)) + 1, k_match.group(3)) new_dict[new_k] = load_state[k] load_state.update(new_dict) # LiRPA requires parameters to have zero batch dimension. This makes old models compatible for k, v in load_state.items(): if k.endswith("mean") or k.endswith("sigma"): if k in old_state: load_state.update({k: v.reshape(old_state[k].shape)}) old_state.update({k:v.view(old_state[k].shape) for k,v in load_state.items() if k in old_state and ( (k.startswith("trunk") and args.load_trunk_model is None) or (k.startswith("gate") and args.load_gate_model is None) or (k.startswith("branch") and args.load_branch_model is None))}) missing_keys, extra_keys = self.load_state_dict(old_state, strict=False) assert len([x for x in missing_keys if "gateNet" in x or "branchNet" in x]) == 0 print("Whole model loaded from %s" % args.load_model) ## Trunk and branch nets have to be loaded after the whole model if args.load_trunk_model is not None: load_net_state(self.trunk_net, args.load_trunk_model) if (args.load_model is not None or args.load_gate_model is not None) and args.gate_feature_extraction is not None: for i, net in enumerate(self.gate_nets.values()): extraction_layer = [ii for ii in range(len(net.blocks)) if isinstance(net.blocks[ii],Linear)] extraction_layer = extraction_layer[-min(len(extraction_layer),args.gate_feature_extraction)] net.freeze(extraction_layer-1) self.trunk_cnet = None self.gate_cnets = {k: None for k in self.gate_nets.keys()} self.branch_cnets = {k: None for k in self.branch_nets.keys()}
def __init__(self, device, dataset, n_blocks, n_class=10, input_size=32, input_channel=3, block='basic', in_planes=32, net_dim=None, widen_factor=1, pooling="global"): super(MyResnet, self).__init__(net_dim=None if net_dim == input_size else net_dim) if block == 'basic': self.res_block = BasicBlock elif block == 'preact': self.res_block = PreActBlock elif block == 'wide': self.res_block = WideBlock elif block == 'fixup': self.res_block = FixupBasicBlock else: assert False self.n_layers = sum(n_blocks) mean, sigma = get_mean_sigma(device, dataset) dim = input_size k = widen_factor layers = [Normalization(mean, sigma), Conv2d(input_channel, in_planes, kernel_size=3, stride=1, padding=1, bias=(block == "wide"), dim=dim)] if not block == "wide": layers += [Bias() if block == 'fixup' else BatchNorm2d(in_planes), ReLU((in_planes, input_size, input_size))] strides = [1, 2] + ([2] if len(n_blocks) > 2 else []) + [1] * max(0,(len(n_blocks)-3)) n_filters = in_planes for n_block, n_stride in zip(n_blocks, strides): if n_stride > 1: n_filters *= 2 dim, block_layers = self.get_block(in_planes, n_filters*k, n_block, n_stride, dim=dim) in_planes = n_filters*k layers += [block_layers] if block == 'fixup': layers += [Bias()] else: layers += [BatchNorm2d(n_filters*k)] if block == "wide": layers += [ReLU((n_filters*k, dim, dim))] if pooling == "global": layers += [GlobalAvgPool2d()] N = n_filters * k elif pooling == "None": # old networks miss pooling layer and wont load N = n_filters * dim * dim * k elif isinstance(pooling, int): layers += [AvgPool2d(pooling)] dim = dim//pooling N = n_filters * dim * dim * k layers += [Flatten(), ReLU(N)] if block == 'fixup': layers += [Bias()] layers += [Linear(N, n_class)] self.blocks = Sequential(*layers) # Fixup initialization if block == 'fixup': for m in self.modules(): if isinstance(m, FixupBasicBlock): conv1, conv2 = m.residual[1].conv, m.residual[5].conv nn.init.normal_(conv1.weight, mean=0, std=np.sqrt(2 / (conv1.weight.shape[0] * np.prod(conv1.weight.shape[2:]))) * self.n_layers ** (-0.5)) nn.init.constant_(conv2.weight, 0) elif isinstance(m, nn.Linear): nn.init.constant_(m.weight, 0) nn.init.constant_(m.bias, 0)
def __init__(self, device, dataset, n_class=10, input_size=32, input_channel=3, conv_widths=None, kernel_sizes=None, linear_sizes=None, depth_conv=None, paddings=None, strides=None, dilations=None, pool=False, net_dim=None, bn=False, max=False, scale_width=True): super(myNet, self).__init__(net_dim=None if net_dim == input_size else net_dim) if kernel_sizes is None: kernel_sizes = [3] if conv_widths is None: conv_widths = [2] if linear_sizes is None: linear_sizes = [200] if paddings is None: paddings = [1] if strides is None: strides = [2] if dilations is None: dilations = [1] if net_dim is None: net_dim = input_size if len(conv_widths) != len(kernel_sizes): kernel_sizes = len(conv_widths) * [kernel_sizes[0]] if len(conv_widths) != len(paddings): paddings = len(conv_widths) * [paddings[0]] if len(conv_widths) != len(strides): strides = len(conv_widths) * [strides[0]] if len(conv_widths) != len(dilations): dilations = len(conv_widths) * [dilations[0]] self.n_class=n_class self.input_size=input_size self.input_channel=input_channel self.conv_widths=conv_widths self.kernel_sizes=kernel_sizes self.paddings=paddings self.strides=strides self.dilations = dilations self.linear_sizes=linear_sizes self.depth_conv=depth_conv self.net_dim = net_dim self.bn=bn self.max=max mean, sigma = get_mean_sigma(device, dataset) layers = self.blocks layers += [Normalization(mean, sigma)] N = net_dim n_channels = input_channel self.dims += [(n_channels,N,N)] for width, kernel_size, padding, stride, dilation in zip(conv_widths, kernel_sizes, paddings, strides, dilations): if scale_width: width *= 16 N = int(np.floor((N + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1)) layers += [Conv2d(n_channels, int(width), kernel_size, stride=stride, padding=padding, dilation=dilation)] if self.bn: layers += [BatchNorm2d(int(width))] if self.max: layers += [MaxPool2d(int(width))] layers += [ReLU((int(width), N, N))] n_channels = int(width) self.dims += 2*[(n_channels,N,N)] if depth_conv is not None: layers += [Conv2d(n_channels, depth_conv, 1, stride=1, padding=0), ReLU((n_channels, N, N))] n_channels = depth_conv self.dims += 2*[(n_channels,N,N)] if pool: layers += [GlobalAvgPool2d()] self.dims += 2 * [(n_channels, 1, 1)] N=1 layers += [Flatten()] N = n_channels * N ** 2 self.dims += [(N,)] for width in linear_sizes: if width == 0: continue layers += [Linear(int(N), int(width)), ReLU(width)] N = width self.dims+=2*[(N,)] layers += [Linear(N, n_class)] self.dims+=[(n_class,)] self.blocks = Sequential(*layers)
import numpy as np from tensor import Tensor from layers import Sequential, Linear from activations import Tanh, Sigmoid from optimizers import SGD from losses import MSELoss np.random.seed(0) data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True) target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True) model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()]) criterion = MSELoss() optim = SGD(parameters=model.get_parameters(), alpha=1) for i in range(10): pred = model.forward(data) loss = criterion.forward(pred, target) loss.backward() optim.step() print(loss)