def __init__(self, ncha, size, taskcla, nhid): super(Acessibility, self).__init__() self.ec1 = torch.nn.Embedding(len(taskcla), 64) self.ec2 = torch.nn.Embedding(len(taskcla), 128) self.ec3 = torch.nn.Embedding(len(taskcla), 256) self.efc1 = torch.nn.Embedding(len(taskcla), nhid) self.efc2 = torch.nn.Embedding(len(taskcla), nhid) self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8) s = utils.compute_conv_output_size(size, size // 8) s = s // 2 self.c2 = torch.nn.Conv2d(64, 128, kernel_size=size // 10) s = utils.compute_conv_output_size(s, size // 10) s = s // 2 self.c3 = torch.nn.Conv2d(128, 256, kernel_size=2) s = utils.compute_conv_output_size(s, 2) s = s // 2 self.smid = s self.fc1 = torch.nn.Linear(256 * self.smid * self.smid, nhid) self.fc2 = torch.nn.Linear(nhid, nhid) self.last = torch.nn.ModuleList() for t, n in taskcla: self.last.append(torch.nn.Linear(nhid, n))
def __init__(self,inputsize,taskcla): super(Net,self).__init__() ncha,size,_=inputsize self.taskcla=taskcla self.conv1=torch.nn.Conv2d(ncha,64,kernel_size=size//8) s=utils.compute_conv_output_size(size,size//8) s=s//2 self.conv2=torch.nn.Conv2d(64,128,kernel_size=size//10) s=utils.compute_conv_output_size(s,size//10) s=s//2 self.conv3=torch.nn.Conv2d(128,256,kernel_size=2) s=utils.compute_conv_output_size(s,2) s=s//2 self.maxpool=torch.nn.MaxPool2d(2) self.relu=torch.nn.ReLU() self.drop1=torch.nn.Dropout(0.2) self.drop2=torch.nn.Dropout(0.5) self.fc1=torch.nn.Linear(256*s*s,2048) self.fc2=torch.nn.Linear(2048,2048) self.last=torch.nn.ModuleList() for t,n in self.taskcla: self.last.append(torch.nn.Linear(2048,n)) return
def __init__(self, inputsize, taskcla): super(Net, self).__init__() ncha, size, _ = inputsize self.taskcla = taskcla self.c1 = torch.nn.Conv2d(ncha, 32, kernel_size=3, padding=1) s = utils.compute_conv_output_size(size, 3, padding=1) # s=s//2 self.c2 = torch.nn.Conv2d(32, 32, kernel_size=3, padding=1) s = utils.compute_conv_output_size(s, 3, padding=1) s = s // 2 self.c3 = torch.nn.Conv2d(32, 64, kernel_size=3, padding=1) s = utils.compute_conv_output_size(s, 3, padding=1) # s=s//2 self.c4 = torch.nn.Conv2d(64, 64, kernel_size=3, padding=1) s = utils.compute_conv_output_size(s, 3, padding=1) s = s // 2 self.smid = s self.maxpool = torch.nn.MaxPool2d(2) self.relu = torch.nn.ReLU() self.drop1 = torch.nn.Dropout(0.2) self.drop2 = torch.nn.Dropout(0.5) self.fc1 = torch.nn.Linear(64 * self.smid * self.smid, 512) self.last = torch.nn.ModuleList() for t, n in self.taskcla: self.last.append(torch.nn.Linear(512, n)) return
def __init__(self, inputsize, taskcla): super(Net, self).__init__() ncha, size, _ = inputsize self.taskcla = taskcla self.c1 = torch.nn.Conv2d(ncha, 16, kernel_size=3, padding=1) s = utils.compute_conv_output_size(size, 3, padding=1) s = s // 2 self.c2 = torch.nn.Conv2d(16, 32, kernel_size=3, padding=1) s = utils.compute_conv_output_size(s, 3, padding=1) s = s // 2 self.smid = s self.maxpool = torch.nn.MaxPool2d(2) self.relu = torch.nn.ReLU() self.drop1 = torch.nn.Dropout(0.0) self.drop2 = torch.nn.Dropout(0.0) self.fc1 = torch.nn.Linear(32 * self.smid * self.smid, 100) self.last = torch.nn.ModuleList() self.scale1 = torch.nn.Embedding(len(self.taskcla), 16) self.scale2 = torch.nn.Embedding(len(self.taskcla), 32) self.scale3 = torch.nn.Embedding(len(self.taskcla), 100) self.shift1 = torch.nn.Embedding(len(self.taskcla), 16) self.shift2 = torch.nn.Embedding(len(self.taskcla), 32) self.shift3 = torch.nn.Embedding(len(self.taskcla), 100) for t, n in self.taskcla: self.last.append(torch.nn.Linear(100, n)) self.init_film() return
def __init__(self,inputsize,taskcla): super(Net,self).__init__() ncha,size,_=inputsize self.taskcla=taskcla self.c1=torch.nn.Conv2d(ncha,64,kernel_size=size//8) s=utils.compute_conv_output_size(size,size//8) s2=utils.get_conv_out_size(inputsize[1:],size//8, 0, 1) s=s//2 self.c2=torch.nn.Conv2d(64,128,kernel_size=size//10) tmp = s s=utils.compute_conv_output_size(s,size//10) s2=utils.get_conv_out_size([tmp,tmp],size//10, 0, 1) s=s//2 self.c3=torch.nn.Conv2d(128,256,kernel_size=2) tmp = s s=utils.compute_conv_output_size(s,2) s2=utils.get_conv_out_size([tmp,tmp],2, 0, 1) s=s//2 self.smid=s self.maxpool=torch.nn.MaxPool2d(2) self.relu=torch.nn.ReLU() self.drop1=torch.nn.Dropout(0.2) self.drop2=torch.nn.Dropout(0.5) self.fc1=torch.nn.Linear(256*self.smid*self.smid,2048) self.fc2=torch.nn.Linear(2048,2048) self.last=torch.nn.ModuleList() for t,n in self.taskcla: self.last.append(torch.nn.Linear(2048,n)) self.gate=torch.nn.Sigmoid() # All embedding stuff should start with 'e' ec1=torch.nn.Embedding(len(self.taskcla),64) ec2=torch.nn.Embedding(len(self.taskcla),128) ec3=torch.nn.Embedding(len(self.taskcla),256) efc1=torch.nn.Embedding(len(self.taskcla),2048) efc2=torch.nn.Embedding(len(self.taskcla),2048) # self.embeddings = torch.nn.ModuleList([ec1, ec2, ec3, efc1, efc2]) self.embeddings = torch.nn.ModuleDict(dict( ec1=ec1, ec2=ec2, ec3=ec3, efc1=efc1, efc2=efc2 )) # self.ec1 = ec1 # self.ec2 = ec2 # self.ec3 = ec3 # self.efc1 = efc1 # self.efc2 = efc2 """ (e.g., used in the compression experiments) lo,hi=0,2 self.ec1.weight.data.uniform_(lo,hi) self.ec2.weight.data.uniform_(lo,hi) self.ec3.weight.data.uniform_(lo,hi) self.efc1.weight.data.uniform_(lo,hi) self.efc2.weight.data.uniform_(lo,hi) #""" return
def __init__(self,inputsize,taskcla,nhid=2000,args=0): super(Net,self).__init__() ncha,size,_=inputsize self.taskcla=taskcla self.nhid = nhid self.conv1=torch.nn.Conv2d(ncha,64,kernel_size=size//8) s=utils.compute_conv_output_size(size,size//8) s=s//2 self.maxpool=torch.nn.MaxPool2d(2) self.relu=torch.nn.ReLU() pdrop1 = args.pdrop1 pdrop2 = args.pdrop2 self.drop1=torch.nn.Dropout(pdrop1) self.drop2=torch.nn.Dropout(pdrop2) self.fc1=torch.nn.Linear(64*s*s,nhid) self.fc2=torch.nn.Linear(nhid,nhid) self.last=torch.nn.ModuleList() for t,n in self.taskcla: self.last.append(torch.nn.Linear(nhid,n)) print('CNN') return
def __init__(self, taskcla, nhid, ncha, size): super(TransferLayer, self).__init__() self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8) s = utils.compute_conv_output_size(size, size // 8) s = s // 2 self.smid = s self.fc1 = torch.nn.Linear(64 * self.smid * self.smid, nhid) self.fc2 = torch.nn.Linear(nhid, nhid) self.fusion = torch.nn.Linear(nhid * 2, nhid) self.last = torch.nn.ModuleList() self.last_fusion = torch.nn.ModuleList() for t, n in taskcla: self.last.append(torch.nn.Linear(nhid, n)) self.last_fusion.append(torch.nn.Linear(nhid * 2, n)) self.transfer = torch.nn.ModuleList() for from_t, from_n in taskcla: self.transfer_to_n = torch.nn.ModuleList() for to_t, to_n in taskcla: self.transfer_to_n.append(torch.nn.Linear(from_n, to_n)) self.transfer.append(self.transfer_to_n)
def __init__(self, inputsize, taskcla): super(Net, self).__init__() ncha, size, _ = inputsize self.taskcla = taskcla self.c1 = torch.nn.Conv2d(ncha, 32, kernel_size=3, padding=1) s = utils.compute_conv_output_size(size, 3, padding=1) print(s) # s=s//2 self.c2 = torch.nn.Conv2d(32, 32, kernel_size=3, padding=1) s = utils.compute_conv_output_size(s, 3, padding=1) s = s // 2 self.c3 = torch.nn.Conv2d(32, 64, kernel_size=3, padding=1) s = utils.compute_conv_output_size(s, 3, padding=1) # s=s//2 self.c4 = torch.nn.Conv2d(64, 64, kernel_size=3, padding=1) s = utils.compute_conv_output_size(s, 3, padding=1) s = s // 2 self.smid = s self.maxpool = torch.nn.MaxPool2d(2) self.relu = torch.nn.ReLU() self.drop1 = torch.nn.Dropout(0.2) self.drop2 = torch.nn.Dropout(0.5) self.fc1 = torch.nn.Linear(64 * self.smid * self.smid, 512) self.last = torch.nn.ModuleList() for t, n in self.taskcla: self.last.append(torch.nn.Linear(512, n)) self.gate = torch.nn.Sigmoid() # All embedding stuff should start with 'e' self.ec1 = torch.nn.Embedding(len(self.taskcla), 32) self.ec2 = torch.nn.Embedding(len(self.taskcla), 32) self.ec3 = torch.nn.Embedding(len(self.taskcla), 64) self.ec4 = torch.nn.Embedding(len(self.taskcla), 64) self.efc1 = torch.nn.Embedding(len(self.taskcla), 512) """ (e.g., used in the compression experiments) lo,hi=0,2 self.ec1.weight.data.uniform_(lo,hi) self.ec2.weight.data.uniform_(lo,hi) self.ec3.weight.data.uniform_(lo,hi) self.efc1.weight.data.uniform_(lo,hi) self.efc2.weight.data.uniform_(lo,hi) #""" return
def __init__(self, args): super(Shared, self).__init__() self.ncha, size, _ = args.inputsize self.taskcla = args.taskcla self.latent_dim = args.latent_dim if args.experiment == 'cifar100': hiddens = [64, 128, 256, 1024, 1024, 512] elif args.experiment == 'miniimagenet': hiddens = [64, 128, 256, 512, 512, 512] # ---------------------------------- elif args.experiment == 'multidatasets': hiddens = [64, 128, 256, 1024, 1024, 512] else: raise NotImplementedError self.conv1 = torch.nn.Conv2d(self.ncha, hiddens[0], kernel_size=size // 8) s = utils.compute_conv_output_size(size, size // 8) s = s // 2 self.conv2 = torch.nn.Conv2d(hiddens[0], hiddens[1], kernel_size=size // 10) s = utils.compute_conv_output_size(s, size // 10) s = s // 2 self.conv3 = torch.nn.Conv2d(hiddens[1], hiddens[2], kernel_size=2) s = utils.compute_conv_output_size(s, 2) s = s // 2 self.maxpool = torch.nn.MaxPool2d(2) self.relu = torch.nn.ReLU() self.drop1 = torch.nn.Dropout(0.2) self.drop2 = torch.nn.Dropout(0.5) self.fc1 = torch.nn.Linear(hiddens[2] * s * s, hiddens[3]) self.fc2 = torch.nn.Linear(hiddens[3], hiddens[4]) self.fc3 = torch.nn.Linear(hiddens[4], hiddens[5]) self.fc4 = torch.nn.Linear(hiddens[5], self.latent_dim)
def __init__(self, ncha, size, taskcla, nhid): super(MainContinualLearning, self).__init__() self.ec1 = torch.nn.Embedding(len(taskcla), 64) self.ec2 = torch.nn.Embedding(len(taskcla), 128) self.ec3 = torch.nn.Embedding(len(taskcla), 256) self.efc1 = torch.nn.Embedding(len(taskcla), nhid) self.efc2 = torch.nn.Embedding(len(taskcla), nhid) self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8) s = utils.compute_conv_output_size(size, size // 8) s = s // 2 self.c2 = torch.nn.Conv2d(64, 128, kernel_size=size // 10) s = utils.compute_conv_output_size(s, size // 10) s = s // 2 self.c3 = torch.nn.Conv2d(128, 256, kernel_size=2) s = utils.compute_conv_output_size(s, 2) s = s // 2 self.smid = s self.fc1 = torch.nn.Linear(256 * self.smid * self.smid, nhid) self.fc2 = torch.nn.Linear(nhid, nhid) self.last = torch.nn.ModuleList() for t, n in taskcla: self.last.append(torch.nn.Linear(nhid, n)) # class LastLayer(torch.nn.Module): # # def __init__(self,taskcla): # # super(LastLayer, self).__init__() # # self.last=torch.nn.ModuleList() # for t,n in taskcla: # self.last.append(torch.nn.Linear(2048,n))
def __init__(self,inputsize,taskcla,nhid=2000,args=0): super(Net,self).__init__() ncha,size,_=inputsize self.taskcla=taskcla self.nhid = nhid self.c1=torch.nn.Conv2d(ncha,64,kernel_size=size//8) s=utils.compute_conv_output_size(size,size//8) s=s//2 self.smid=s self.maxpool=torch.nn.MaxPool2d(2) pdrop1 = args.pdrop1 pdrop2 = args.pdrop2 self.relu=torch.nn.ReLU() self.drop1=torch.nn.Dropout(pdrop1) self.drop2=torch.nn.Dropout(pdrop2) self.fc1=torch.nn.Linear(64*self.smid*self.smid,nhid) self.fc2=torch.nn.Linear(nhid,nhid) self.last=torch.nn.ModuleList() for t,n in self.taskcla: self.last.append(torch.nn.Linear(nhid,n)) self.gate=torch.nn.Sigmoid() # All embedding stuff should start with 'e' self.ec1=torch.nn.Embedding(len(self.taskcla),64) self.efc1=torch.nn.Embedding(len(self.taskcla),nhid) self.efc2=torch.nn.Embedding(len(self.taskcla),nhid) """ (e.g., used in the compression experiments) lo,hi=0,2 self.ec1.weight.data.uniform_(lo,hi) self.ec2.weight.data.uniform_(lo,hi) self.ec3.weight.data.uniform_(lo,hi) self.efc1.weight.data.uniform_(lo,hi) self.efc2.weight.data.uniform_(lo,hi) #""" print('CNN HAT') print('pdrop1: ',pdrop1) print('pdrop2: ',pdrop2) return
def _create_conv(self, fin, fout, ksize, s, pos, stem, psize): '''Decides whether to create a regular or weight masked convolution.''' # compute new kernel size s = utils.compute_conv_output_size(s, ksize) s = s // 2 # update conv if stem is not None and pos <= stem: conv = torch.nn.Conv2d(fin, fout, kernel_size=ksize) if self.use_concat is True: psize = (psize[0] + fout, s, s) else: psize = (fout, s, s) return conv, s, psize else: # create the mask (computed separate) - note: do not use combination for conv layers self._create_mask((fout, fin, ksize, ksize), False) conv = Conv2d_dwa(fin, fout, kernel_size=ksize) return conv, s, psize
def __init__(self, nhid, ncha, size, taskcla): super(MainContinualLearning, self).__init__() self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8) s = utils.compute_conv_output_size(size, size // 8) s = s // 2 self.smid = s self.ec1 = torch.nn.Embedding(len(taskcla), 64) self.efc1 = torch.nn.Embedding(len(taskcla), nhid) self.efc2 = torch.nn.Embedding(len(taskcla), nhid) self.fc1 = torch.nn.Linear(64 * self.smid * self.smid, nhid) self.fc2 = torch.nn.Linear(nhid, nhid) self.mask_last = torch.nn.ModuleList() self.att_last = torch.nn.ModuleList() for t, n in taskcla: self.mask_last.append(torch.nn.Linear(nhid, n)) self.att_last.append(torch.nn.Linear(nhid, n))
def __init__(self,inputsize,taskcla,nhid,args=0): super(Net,self).__init__() ncha,size,_=inputsize self.taskcla=taskcla self.ntasks = len(self.taskcla) expand_factor = 1.117 #increases the number of parameters #init task columns subnets self.conv1=torch.nn.ModuleList() self.sizec1 = int(expand_factor*64/self.ntasks) self.conv2=torch.nn.ModuleList() self.V2scale=torch.nn.ModuleList() #for conv layers the dimensionality reduction in the adapters is performed by 1x1 convolutions self.V2x1=torch.nn.ModuleList() self.U2=torch.nn.ModuleList() self.sizec2 = int(expand_factor*128/self.ntasks) self.conv3=torch.nn.ModuleList() self.V3scale=torch.nn.ModuleList() self.V3x1=torch.nn.ModuleList() self.U3=torch.nn.ModuleList() self.sizec3 = int(expand_factor*256/self.ntasks) self.fc1=torch.nn.ModuleList() self.Vf1scale=torch.nn.ModuleList() self.Vf1=torch.nn.ModuleList() self.Uf1=torch.nn.ModuleList() self.sizefc1 = int(expand_factor*2048/self.ntasks) self.fc2=torch.nn.ModuleList() self.Vf2scale=torch.nn.ModuleList() self.Vf2=torch.nn.ModuleList() self.Uf2=torch.nn.ModuleList() self.sizefc2 = int(expand_factor*2048/self.ntasks) self.last=torch.nn.ModuleList() self.Vflscale=torch.nn.ModuleList() self.Vfl=torch.nn.ModuleList() self.Ufl=torch.nn.ModuleList() self.maxpool=torch.nn.MaxPool2d(2) self.relu=torch.nn.ReLU() self.drop1=torch.nn.Dropout(0.2) self.drop2=torch.nn.Dropout(0.5) #declare task columns subnets for t,n in self.taskcla: self.conv1.append(torch.nn.Conv2d(ncha,self.sizec1,kernel_size=size//8)) s=utils.compute_conv_output_size(size,size//8) s=s//2 self.conv2.append(torch.nn.Conv2d(self.sizec1,self.sizec2,kernel_size=size//10)) s=utils.compute_conv_output_size(s,size//10) s=s//2 self.conv3.append(torch.nn.Conv2d(self.sizec2,self.sizec3,kernel_size=2)) s=utils.compute_conv_output_size(s,2) s=s//2 self.fc1.append(torch.nn.Linear(self.sizec3*s*s,self.sizefc1)) self.fc2.append(torch.nn.Linear(self.sizefc1,self.sizefc2)) self.last.append(torch.nn.Linear(self.sizefc2,n)) if t>0: #lateral connections with previous columns self.V2scale.append(torch.nn.Embedding(1,t)) self.V2x1.append(torch.nn.Conv2d(t*self.sizec1, self.sizec1, kernel_size=1, stride=1)) self.U2.append(torch.nn.Conv2d(self.sizec1,self.sizec2,kernel_size=size//10)) self.V3scale.append(torch.nn.Embedding(1,t)) self.V3x1.append(torch.nn.Conv2d(t*self.sizec2,self.sizec2, kernel_size=1, stride=1)) self.U3.append(torch.nn.Conv2d(self.sizec2,self.sizec3,kernel_size=2)) self.Vf1scale.append(torch.nn.Embedding(1,t)) self.Vf1.append(torch.nn.Linear(t*self.sizec3*s*s, self.sizec3*s*s)) self.Uf1.append(torch.nn.Linear(self.sizec3*s*s,self.sizefc1)) self.Vf2scale.append(torch.nn.Embedding(1,t)) self.Vf2.append(torch.nn.Linear(t*self.sizefc1,self.sizefc1)) self.Uf2.append(torch.nn.Linear(self.sizefc1,self.sizefc2)) self.Vflscale.append(torch.nn.Embedding(1,t)) self.Vfl.append(torch.nn.Linear(t*self.sizefc2,self.sizefc2)) self.Ufl.append(torch.nn.Linear(self.sizefc2,n)) return
def __init__(self, inputsize, taskcla): super(Net, self).__init__() ncha, size, _ = inputsize self.taskcla = taskcla self.ntasks = len(self.taskcla) """ # Config of Sec 2.5 in the paper expand_factor = 0.231 # to match num params self.N = 5 self.M = 20 # Large M numbers like this, given our architecture, produce no training #""" """ # Config of Sec 2.4 in the paper expand_factor = 0.325 # match num params self.N = 3 self.M = 10 #""" #""" # Better config found by us expand_factor = 0.258 # match num params self.N = 3 self.M = 16 #""" self.L = 5 # our architecture has 5 layers self.bestPath = -1 * np.ones( (self.ntasks, self.L, self.N), dtype=np.int) #we need to remember this between the tasks #init modules subnets self.conv1 = torch.nn.ModuleList() self.sizec1 = int(expand_factor * 64) self.conv2 = torch.nn.ModuleList() self.sizec2 = int(expand_factor * 128) self.conv3 = torch.nn.ModuleList() self.sizec3 = int(expand_factor * 256) self.fc1 = torch.nn.ModuleList() self.sizefc1 = int(expand_factor * 2048) self.fc2 = torch.nn.ModuleList() self.sizefc2 = int(expand_factor * 2048) self.last = torch.nn.ModuleList() self.maxpool = torch.nn.MaxPool2d(2) self.relu = torch.nn.ReLU() self.drop1 = torch.nn.Dropout(0.2) self.drop2 = torch.nn.Dropout(0.5) #declare task columns subnets for j in range(self.M): self.conv1.append( torch.nn.Conv2d(ncha, self.sizec1, kernel_size=size // 8)) s = utils.compute_conv_output_size(size, size // 8) s = s // 2 self.conv2.append( torch.nn.Conv2d(self.sizec1, self.sizec2, kernel_size=size // 10)) s = utils.compute_conv_output_size(s, size // 10) s = s // 2 self.conv3.append( torch.nn.Conv2d(self.sizec2, self.sizec3, kernel_size=2)) s = utils.compute_conv_output_size(s, 2) s = s // 2 self.fc1.append(torch.nn.Linear(self.sizec3 * s * s, self.sizefc1)) self.fc2.append(torch.nn.Linear(self.sizefc1, self.sizefc2)) for t, n in self.taskcla: self.last.append(torch.nn.Linear(self.sizefc2, n)) return