def __init__(self, dimension, add_batch_norm=True): super(ContextGating, self).__init__() self.fc = nn.Linear(dimension, dimension) self.add_batch_norm = add_batch_norm self.batch_norm = nn.BatchNorm1d(dimension)
def __init__(self, PMT_grid): super(S2ConvNet_deep, self).__init__() grid_s2 = PMT_grid grid_so3_1 = so3_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1, max_gamma=2*np.pi, n_gamma=6) grid_so3_2 = so3_near_identity_grid(n_alpha=6, max_beta=np.pi/ 8, n_beta=1, max_gamma=2*np.pi, n_gamma=6) grid_so3_3 = so3_near_identity_grid(n_alpha=6, max_beta=np.pi/ 4, n_beta=1, max_gamma=2*np.pi, n_gamma=6) grid_so3_4 = so3_near_identity_grid(n_alpha=6, max_beta=np.pi/ 2, n_beta=1, max_gamma=2*np.pi, n_gamma=6) self.convolutional = nn.Sequential( S2Convolution( nfeature_in = 1029, nfeature_out = 20, b_in = 15, b_out = 15, grid=grid_s2), nn.ReLU(inplace=False), SO3Convolution( nfeature_in = 20, nfeature_out = 16, b_in = 15, b_out = 8, grid=grid_so3_1), nn.ReLU(inplace=False), SO3Convolution( nfeature_in = 16, nfeature_out = 16, b_in = 8, b_out = 8, grid=grid_so3_2), nn.ReLU(inplace=False), SO3Convolution( nfeature_in = 16, nfeature_out = 24, b_in = 8, b_out = 4, grid=grid_so3_2), nn.ReLU(inplace=False), SO3Convolution( nfeature_in = 24, nfeature_out = 24, b_in = 4, b_out = 4, grid=grid_so3_3), nn.ReLU(inplace=False), SO3Convolution( nfeature_in = 24, nfeature_out = 32, b_in = 4, b_out = 2, grid=grid_so3_3), nn.ReLU(inplace=False), SO3Convolution( nfeature_in = 32, nfeature_out = 64, b_in = 2, b_out = 2, grid=grid_so3_4), nn.ReLU(inplace=False) ) self.linear = nn.Sequential( # linear 1 nn.BatchNorm1d(64), nn.Linear(in_features=64,out_features=64), nn.ReLU(inplace=False), # linear 2 nn.BatchNorm1d(64), nn.Linear(in_features=64, out_features=32), nn.ReLU(inplace=False), # linear 3 nn.BatchNorm1d(32), nn.Linear(in_features=32, out_features=2) )
def __init__(self, num_classes=1000, block=BasicBlock, layers=[2, 2, 2, 2],depth=18): super(ResNet_imagenet, self).__init__() self.inflate = 1 self.inplanes = 16*self.inflate n = int((depth) / 6)+2 # The layers with binary activations are defined as BinConv2d whereas layers with multi-bit activations are defined as BinConv2d2 self.conv1=nn.Conv2d(3,int(64*self.inflate), kernel_size=7, stride=2, padding=3,bias=False) self.bn1= nn.BatchNorm2d(int(64*self.inflate)) self.relu1=nn.ReLU(inplace=True) self.maxpool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.conv2=BinConv2d2(int(64*self.inflate), int(64*self.inflate), kernel_size=3, stride=1, padding=1) self.bn2= nn.BatchNorm2d(int(64*self.inflate)) self.relu2=nn.ReLU(inplace=True) ####################################################### self.conv3=BinConv2d2(int(64*self.inflate), int(64*self.inflate), kernel_size=3, stride=1, padding=1) self.bn3= nn.BatchNorm2d(int(64*self.inflate)) self.relu3=nn.ReLU(inplace=True) ####################################################### self.conv4=BinConv2d2(int(64*self.inflate), int(64*self.inflate), kernel_size=3, stride=1, padding=1) self.bn4= nn.BatchNorm2d(int(64*self.inflate)) self.relu4=nn.ReLU(inplace=True) ####################################################### self.conv5=BinConv2d2(int(64*self.inflate), int(64*self.inflate), kernel_size=3, stride=1, padding=1) self.bn5= nn.BatchNorm2d(int(64*self.inflate)) self.relu5=nn.ReLU(inplace=True) ####################################################### self.conv6=BinConv2d2(int(64*self.inflate), int(64*self.inflate), kernel_size=3, stride=1, padding=1) self.bn6= nn.BatchNorm2d(int(64*self.inflate)) self.relu6=nn.ReLU(inplace=True) ####################################################### self.conv7=BinConv2d2(int(64*self.inflate), int(64*self.inflate), kernel_size=3, stride=1, padding=1) self.bn7= nn.BatchNorm2d(int(64*self.inflate)) self.relu7=nn.ReLU(inplace=True) ####################################################### #########Layer################ self.conv8=BinConv2d2(int(64*self.inflate), int(128*self.inflate), kernel_size=3, stride=2, padding=1) self.bn8= nn.BatchNorm2d(int(128*self.inflate)) self.resconv1=nn.Sequential(BinConv2d2(int(64*self.inflate), int(128*self.inflate), kernel_size=1, stride=2, padding=0), nn.BatchNorm2d(int(128*self.inflate)), nn.ReLU(inplace=True),) self.relu8=nn.ReLU(inplace=True) ####################################################### self.conv9=BinConv2d2(int(128*self.inflate), int(128*self.inflate), kernel_size=3, stride=1, padding=1) self.bn9= nn.BatchNorm2d(int(128*self.inflate)) self.relu9=nn.ReLU(inplace=True) ####################################################### self.conv10=BinConv2d2(int(128*self.inflate), int(128*self.inflate), kernel_size=3, stride=1, padding=1) self.bn10= nn.BatchNorm2d(int(128*self.inflate)) self.relu10=nn.ReLU(inplace=True) ####################################################### self.conv11=BinConv2d2(int(128*self.inflate), int(128*self.inflate), kernel_size=3, stride=1, padding=1) self.bn11= nn.BatchNorm2d(int(128*self.inflate)) self.relu11=nn.ReLU(inplace=True) ####################################################### self.conv12=BinConv2d2(int(128*self.inflate), int(128*self.inflate), kernel_size=3, stride=1, padding=1) self.bn12= nn.BatchNorm2d(int(128*self.inflate)) self.relu12=nn.ReLU(inplace=True) ####################################################### self.conv13=BinConv2d2(int(128*self.inflate), int(128*self.inflate), kernel_size=3, stride=1, padding=1) self.bn13= nn.BatchNorm2d(int(128*self.inflate)) self.relu13=nn.ReLU(inplace=True) ####################################################### self.conv14=BinConv2d2(int(128*self.inflate), int(128*self.inflate), kernel_size=3, stride=1, padding=1) self.bn14= nn.BatchNorm2d(int(128*self.inflate)) self.relu14=nn.ReLU(inplace=True) ####################################################### self.conv15=BinConv2d2(int(128*self.inflate), int(128*self.inflate), kernel_size=3, stride=1, padding=1) self.bn15= nn.BatchNorm2d(int(128*self.inflate)) self.relu15=nn.ReLU(inplace=True) ####################################################### #########Layer################ self.conv16=BinConv2d2(int(128*self.inflate), int(256*self.inflate), kernel_size=3, stride=2, padding=1) self.bn16= nn.BatchNorm2d(int(256*self.inflate)) self.resconv2=nn.Sequential(BinConv2d2(int(128*self.inflate), int(256*self.inflate), kernel_size=1, stride=2, padding=0), nn.BatchNorm2d(int(256*self.inflate)), nn.ReLU(inplace=True),) self.relu16=nn.ReLU(inplace=True) ####################################################### self.conv17=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn17= nn.BatchNorm2d(int(256*self.inflate)) self.relu17=nn.ReLU(inplace=True) ####################################################### self.conv18=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn18= nn.BatchNorm2d(int(256*self.inflate)) self.relu18=nn.ReLU(inplace=True) ####################################################### self.conv19=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn19= nn.BatchNorm2d(int(256*self.inflate)) self.relu19=nn.ReLU(inplace=True) ####################################################### self.conv20=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn20= nn.BatchNorm2d(int(256*self.inflate)) self.relu20=nn.ReLU(inplace=True) ####################################################### self.conv21=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn21= nn.BatchNorm2d(int(256*self.inflate)) self.relu21=nn.ReLU(inplace=True) ####################################################### self.conv22=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn22= nn.BatchNorm2d(int(256*self.inflate)) self.relu22=nn.ReLU(inplace=True) ####################################################### self.conv23=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn23= nn.BatchNorm2d(int(256*self.inflate)) self.relu23=nn.ReLU(inplace=True) ####################################################### self.conv24=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn24= nn.BatchNorm2d(int(256*self.inflate)) self.relu24=nn.ReLU(inplace=True) ####################################################### self.conv25=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn25= nn.BatchNorm2d(int(256*self.inflate)) self.relu25=nn.ReLU(inplace=True) ####################################################### self.conv26=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn26= nn.BatchNorm2d(int(256*self.inflate)) self.relu26=nn.ReLU(inplace=True) ####################################################### self.conv27=BinConv2d2(int(256*self.inflate), int(256*self.inflate), kernel_size=3, stride=1, padding=1) self.bn27= nn.BatchNorm2d(int(256*self.inflate)) self.relu27=nn.ReLU(inplace=True) ####################################################### #########Layer################ self.conv28=BinConv2d2(int(256*self.inflate), int(512*self.inflate), kernel_size=3, stride=2, padding=1) self.bn28= nn.BatchNorm2d(int(512*self.inflate)) self.resconv3=nn.Sequential(BinConv2d2(int(256*self.inflate), int(512*self.inflate), kernel_size=1, stride=2, padding=0), nn.BatchNorm2d(int(512*self.inflate)), nn.ReLU(inplace=True),) self.relu28=nn.ReLU(inplace=True) ####################################################### self.conv29=BinConv2d2(int(512*self.inflate), int(512*self.inflate), kernel_size=3, stride=1, padding=1) self.bn29= nn.BatchNorm2d(int(512*self.inflate)) self.relu29=nn.ReLU(inplace=True) ####################################################### self.conv30=BinConv2d2(int(512*self.inflate), int(512*self.inflate), kernel_size=3, stride=1, padding=1) self.bn30= nn.BatchNorm2d(int(512*self.inflate)) self.relu30=nn.ReLU(inplace=True) ####################################################### self.conv31=BinConv2d2(int(512*self.inflate), int(512*self.inflate), kernel_size=3, stride=1, padding=1) self.bn31= nn.BatchNorm2d(int(512*self.inflate)) self.relu31=nn.ReLU(inplace=True) ####################################################### self.conv32=BinConv2d2(int(512*self.inflate), int(512*self.inflate), kernel_size=3, stride=1, padding=1) self.bn32= nn.BatchNorm2d(int(512*self.inflate)) self.relu32=nn.ReLU(inplace=True) ####################################################### self.conv33=BinConv2d2(int(512*self.inflate), int(512*self.inflate), kernel_size=3, stride=1, padding=1) self.bn33= nn.BatchNorm2d(int(512*self.inflate)) self.relu33=nn.ReLU(inplace=True) ####################################################### #########Layer################ self.avgpool=nn.AvgPool2d(7) self.bn34= nn.BatchNorm1d(int(512*self.inflate)) self.fc=nn.Linear(int(512*self.inflate),num_classes) self.bn35= nn.BatchNorm1d(num_classes) self.logsoftmax=nn.LogSoftmax() #init_model(self) #self.regime = { # 0: {'optimizer': 'SGD', 'lr': 1e-1, # 'weight_decay': 1e-4, 'momentum': 0.9}, # 81: {'lr': 1e-4}, # 122: {'lr': 1e-5, 'weight_decay': 0}, # 164: {'lr': 1e-6} #} self.regime = { 0: {'optimizer': 'SGD', 'lr': 5e-3}, 101: {'lr': 1e-3}, 142: {'lr': 5e-4}, 184: {'lr': 1e-4}, 220: {'lr': 1e-5} }
def __init__( self, n_classes, deno, in_out=None, feat_dim=None, graph_size=None, method='cos', sparsify=False, non_lin='HT', normalize=[True, True], attention=False, gk=8, aft_nonlin=None, ): super(Graph_Multi_Video, self).__init__() self.num_classes = n_classes self.deno = deno self.graph_size = graph_size self.sparsify = sparsify self.gk = gk if in_out is None: in_out = [2048, 64] if feat_dim is None: feat_dim = [2048, 64] num_layers = len(in_out) - 1 print 'NUM LAYERS', num_layers, in_out self.bn = nn.BatchNorm1d(2048, affine=False, track_running_stats=False) self.linear_layers = nn.ModuleList() self.linear_layers_after = nn.ModuleList() for idx_layer_num, layer_num in enumerate(range(num_layers)): if non_lin == 'HT': non_lin_curr = nn.Hardtanh() elif non_lin == 'RL': non_lin_curr = nn.ReLU() else: error_message = str('Non lin %s not valid', non_lin) raise ValueError(error_message) idx_curr = idx_layer_num * 2 lin_curr = [] lin_curr.append( nn.Linear(feat_dim[idx_curr], feat_dim[idx_curr + 1], bias=False)) lin_curr.append(copy.deepcopy(non_lin_curr)) lin_curr.append( nn.BatchNorm1d(feat_dim[idx_curr + 1], affine=False, track_running_stats=False)) lin_curr = nn.Sequential(*lin_curr) self.linear_layers.append(lin_curr) last_linear = [] # last_linear.append(copy.deepcopy(non_lin_curr)) if normalize[0]: last_linear.append(Normalize()) # last_linear.append(nn.BatchNorm1d( last_linear.append(nn.Dropout(0.5)) last_linear.append(nn.Linear(feat_dim[idx_curr + 1], n_classes)) last_linear = nn.Sequential(*last_linear) self.linear_layers_after.append(last_linear) self.graph_layers = nn.ModuleList() for num_layer in range(num_layers): self.graph_layers.append( Graph_Layer_Wrapper(in_out[num_layer], n_out=in_out[num_layer + 1], non_lin=None, method=method, aft_nonlin=aft_nonlin)) last_graph = [] # if aft_nonlin is None: # if non_lin =='HT': # last_graph.append(nn.Hardtanh()) # elif non_lin =='RL': # last_graph.append(nn.ReLU()) # else: # error_message = str('Non lin %s not valid', non_lin) # raise ValueError(error_message) # if normalize[1]: # last_graph.append(Normalize()) last_graph.append(nn.Dropout(0.5)) last_graph.append(nn.Linear(in_out[-1], n_classes)) last_graph = nn.Sequential(*last_graph) self.last_graph = last_graph self.num_branches = num_layers + 1 self.attention = attention print 'self.num_branches', self.num_branches
def __init__(self, num_classes, last_stride, model_path, neck, neck_feat, model_name, pretrain_choice): super(Baseline, self).__init__() if model_name == 'resnet18': self.in_planes = 512 self.base = ResNet(last_stride=last_stride, block=BasicBlock, layers=[2, 2, 2, 2]) elif model_name == 'resnet34': self.in_planes = 512 self.base = ResNet(last_stride=last_stride, block=BasicBlock, layers=[3, 4, 6, 3]) elif model_name == 'resnet50': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 4, 6, 3]) elif model_name == 'resnet101': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 4, 23, 3]) elif model_name == 'resnet152': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 8, 36, 3]) elif model_name == 'se_resnet50': self.base = SENet(block=SEResNetBottleneck, layers=[3, 4, 6, 3], groups=1, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) elif model_name == 'se_resnet101': self.base = SENet(block=SEResNetBottleneck, layers=[3, 4, 23, 3], groups=1, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) elif model_name == 'se_resnet152': self.base = SENet(block=SEResNetBottleneck, layers=[3, 8, 36, 3], groups=1, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) elif model_name == 'se_resnext50': self.base = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 6, 3], groups=32, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) elif model_name == 'se_resnext101': self.base = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 23, 3], groups=32, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) elif model_name == 'senet154': self.base = SENet(block=SEBottleneck, layers=[3, 8, 36, 3], groups=64, reduction=16, dropout_p=0.2, last_stride=last_stride) elif model_name == 'resnet50_ibn_a': self.base = resnet50_ibn_a(last_stride) elif model_name == 'resnet101_ibn_a': self.base = resnet101_ibn_a(last_stride) # if pretrain_choice == 'imagenet': # self.base.load_param(model_path) # print('Loading pretrained ImageNet model......') self.gap = nn.AdaptiveAvgPool2d(1) # self.gap = nn.AdaptiveMaxPool2d(1) self.num_classes = num_classes self.neck = neck self.neck_feat = neck_feat if self.neck == 'no': self.classifier = nn.Linear(self.in_planes, self.num_classes) # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) # new add by luo # self.classifier.apply(weights_init_classifier) # new add by luo elif self.neck == 'bnneck': self.bottleneck = nn.BatchNorm1d(self.in_planes) self.bottleneck.bias.requires_grad_(False) # no shift self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) self.bottleneck.apply(weights_init_kaiming) self.classifier.apply(weights_init_classifier)
def __init__(self, options): super(MLP, self).__init__() # Reading options: self.input_dim = options.input_dim self.hidden_dim = int(options.hidden_dim) self.N_hid = int(options.N_hid) self.num_classes = options.num_classes self.drop_rate = float(options.drop_rate) self.use_batchnorm = bool(int(options.use_batchnorm)) self.use_laynorm = bool(int(options.use_laynorm)) self.use_cuda = bool(int(options.use_cuda)) self.resnet = bool(int(options.resnet)) self.skip_conn = bool(int(options.skip_conn)) self.act = options.act self.cost = options.cost # List initialization self.wx = nn.ModuleList([]) self.droplay = nn.ModuleList([]) if self.use_batchnorm: self.bn_wx = nn.ModuleList([]) if self.use_laynorm: self.ln = nn.ModuleList([]) if self.act == "relu": self.act = nn.ReLU() if self.act == "tanh": self.act = nn.Tanh() if self.act == "sigmoid": self.act = nn.Sigmoid() if self.act == "normrelu": self.act = normrelu() curr_dim = self.input_dim for i in range(self.N_hid): # wx initialization if self.use_batchnorm: self.wx.append(nn.Linear(curr_dim, self.hidden_dim, bias=False)) self.bn_wx.append( nn.BatchNorm1d(self.hidden_dim, momentum=0.05)) else: self.wx.append(nn.Linear(curr_dim, self.hidden_dim)) self.wx[i].weight = torch.nn.Parameter( torch.Tensor(self.hidden_dim, curr_dim).uniform_( -np.sqrt(0.01 / (curr_dim + self.hidden_dim)), np.sqrt(0.01 / (curr_dim + self.hidden_dim)))) self.wx[i].bias = torch.nn.Parameter(torch.zeros(self.hidden_dim)) # layer norm initialization if self.use_laynorm: self.ln.append(LayerNorm(self.hidden_dim)) # dropout self.droplay.append(nn.Dropout(p=self.drop_rate)) curr_dim = self.hidden_dim # output layer initialization self.fco = nn.Linear(curr_dim, self.num_classes) self.fco.weight = torch.nn.Parameter( torch.zeros(self.num_classes, curr_dim)) self.fco.bias = torch.nn.Parameter(torch.zeros(self.num_classes)) # loss definition if self.cost == "nll": self.criterion = nn.NLLLoss() if self.cost == "mse": self.criterion = torch.nn.MSELoss()
def __init__(self, depth, pretrained=True, cut_at_pooling=False, num_features=0, norm=False, dropout=0, num_classes=0, metric=None, s=64, m=0.35): super(ResNetIBN, self).__init__() self.depth = depth self.pretrained = pretrained self.cut_at_pooling = cut_at_pooling resnet = ResNetIBN.__factory[depth](pretrained=pretrained) resnet.layer4[0].conv2.stride = (1, 1) resnet.layer4[0].downsample[0].stride = (1, 1) self.base = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4) self.gap = GeneralizedMeanPoolingP() if not self.cut_at_pooling: self.num_features = num_features self.norm = norm self.dropout = dropout self.has_embedding = num_features > 0 self.num_classes = num_classes out_planes = resnet.fc.in_features # Append new layers if self.has_embedding: self.feat = nn.Linear(out_planes, self.num_features) self.feat_bn = nn.BatchNorm1d(self.num_features) init.kaiming_normal_(self.feat.weight, mode='fan_out') init.constant_(self.feat.bias, 0) else: # Change the num_features to CNN output channels self.num_features = out_planes self.feat_bn = nn.BatchNorm1d(self.num_features) self.feat_bn.bias.requires_grad_(False) if self.dropout > 0: self.drop = nn.Dropout(self.dropout) if self.num_classes > 0: if metric is not None: self.classifier = build_metric(metric, self.num_features, self.num_classes, s, m) else: self.classifier = nn.Linear(self.num_features, self.num_classes, bias=False) init.normal_(self.classifier.weight, std=0.001) init.constant_(self.feat_bn.weight, 1) init.constant_(self.feat_bn.bias, 0) if not pretrained: self.reset_params()
def __init__(self, block, num_classes=1000): self.inplanes = 64 global arrays # for plotting arrays = [] super(ResNet, self).__init__() self.offset = args.offset self.offset_input = args.offset_input if self.offset > 0: self.generate_offsets = True with torch.no_grad(): self.register_buffer('act2_offsets', torch.zeros(1)) if self.offset_input > 0: self.generate_offsets = True with torch.no_grad(): self.register_buffer('input_offsets', torch.zeros(1)) #self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.conv1 = NoisyConv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False, num_bits=0, num_bits_weight=args.q_w, noise=args.n_w, test_noise=args.n_w_test, stochastic=args.stochastic, debug=args.debug_noise) self.bn1 = nn.BatchNorm2d(64, track_running_stats=args.track_running_stats) if args.act_max > 0: self.relu = nn.Hardtanh(0.0, args.act_max, inplace=True) else: self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) if args.q_a_first > 0: #when quantizing, if q_a_first is not specified, set it to 6 bits self.q_a_first = args.q_a_first elif args.q_a > 0: self.q_a_first = 6 elif args.q_a_first == 8: self.q_a_first = 0 else: self.q_a_first = 0 if self.q_a_first > 0: self.quantize1 = QuantMeasure(self.q_a_first, stochastic=args.stochastic, scale=args.q_scale, calculate_running=args.calculate_running, pctl=args.pctl, debug=args.debug_quant, inplace=args.q_inplace) if args.q_a > 0: self.quantize2 = QuantMeasure(args.q_a, stochastic=args.stochastic, scale=args.q_scale, calculate_running=args.calculate_running, pctl=args.pctl, debug=args.debug_quant, inplace=args.q_inplace) self.layer1 = self._make_layer(block, 64) self.layer2 = self._make_layer(block, 128, stride=2) self.layer3 = self._make_layer(block, 256, stride=2) self.layer4 = self._make_layer(block, 512, stride=2) self.avgpool = nn.AvgPool2d(7, stride=1) #self.fc = nn.Linear(512, num_classes) self.fc = NoisyLinear(512, num_classes, bias=True, num_bits=0, num_bits_weight=args.q_w, noise=args.n_w, test_noise=args.n_w_test, stochastic=args.stochastic, debug=args.debug_noise) if args.bn_out: self.bn_out = nn.BatchNorm1d(1000, track_running_stats=args.track_running_stats) for m in self.modules(): #print(m) if isinstance(m, nn.Conv2d): #print('is instance of nn.Conv2D\n') n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, user_num, item_num, factors, act_function, num_layers, batch_norm, q, epochs, lr, reg_1=0., reg_2=0., loss_type='CL', gpuid='0', early_stop=True): """ Point-wise NFM Recommender Class Parameters ---------- user_num : int, the number of users item_num : int, the number of items factors : int, the number of latent factor act_function : str, activation function for hidden layer num_layers : int, number of hidden layers batch_norm : bool, whether to normalize a batch of data q : float, dropout rate epochs : int, number of training epochs lr : float, learning rate reg_1 : float, first-order regularization term reg_2 : float, second-order regularization term loss_type : str, loss function type gpuid : str, GPU ID early_stop : bool, whether to activate early stop mechanism """ super(PointNFM, self).__init__() self.factors = factors self.act_function = act_function self.num_layers = num_layers self.batch_norm = batch_norm self.dropout = q self.lr = lr self.reg_1 = reg_1 self.reg_2 = reg_2 self.epochs = epochs self.loss_type = loss_type self.early_stop = early_stop os.environ['CUDA_VISIBLE_DEVICES'] = gpuid cudnn.benchmark = True self.embed_user = nn.Embedding(user_num, factors) self.embed_item = nn.Embedding(item_num, factors) self.u_bias = nn.Embedding(user_num, 1) self.i_bias = nn.Embedding(item_num, 1) self.bias_ = nn.Parameter(torch.tensor([0.0])) FM_modules = [] if self.batch_norm: FM_modules.append(nn.BatchNorm1d(factors)) FM_modules.append(nn.Dropout(self.dropout)) self.FM_layers = nn.Sequential(*FM_modules) MLP_modules = [] in_dim = factors for _ in range(self.num_layers): # dim out_dim = in_dim # dim MLP_modules.append(nn.Linear(in_dim, out_dim)) in_dim = out_dim if self.batch_norm: MLP_modules.append(nn.BatchNorm1d(out_dim)) if self.act_function == 'relu': MLP_modules.append(nn.ReLU()) elif self.act_function == 'sigmoid': MLP_modules.append(nn.Sigmoid()) elif self.act_function == 'tanh': MLP_modules.append(nn.Tanh()) MLP_modules.append(nn.Dropout(self.dropout)) self.deep_layers = nn.Sequential(*MLP_modules) predict_size = factors # layers[-1] if layers else factors self.prediction = nn.Linear(predict_size, 1, bias=False) self._init_weight()
def __init__(self, in_dim, mid_dim, out_dim): super(PredictionMLP, self).__init__() self.l1 = nn.Sequential(nn.Linear(in_dim, mid_dim), nn.BatchNorm1d(mid_dim), nn.ReLU(inplace=True)) self.l2 = nn.Linear(mid_dim, out_dim)
def main(): ''' Demonstrate Mish activation function to classify Fashion MNIST ''' # Parse command line arguments parser = argparse.ArgumentParser(description='Argument parser') # Add argument to choose Mish activation function parser.add_argument('--activation', action='store', default=MISH, help='Activation function for demonstration.', choices=[MISH]) # Add argument to choose the way to initialize the model parser.add_argument( '--model_initialization', action='store', default='class', help='Model initialization mode: use custom class or use Sequential.', choices=['class', 'sequential']) # Parse command line arguments results = parser.parse_args() activation = results.activation model_initialization = results.model_initialization # Define a transform transform = transforms.Compose([transforms.ToTensor()]) # Download and load the training data for Fashion MNIST trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True) # Download and load the test data for Fashion MNIST testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True) print("Create model with {activation} function.\n".format( activation=activation)) # Initialize the model if (model_initialization == 'class'): # Initialize the model using defined Classifier class model = Classifier(activation=activation) else: # Setup the activation function if (activation == MISH): activation_function = mish() # Initialize the model using nn.Sequential model = nn.Sequential( OrderedDict([ ('fc1', nn.Linear(784, 256)), ('mila', activation_function), # use custom activation function ('fc2', nn.Linear(256, 128)), ('bn2', nn.BatchNorm1d(num_features=128)), ('relu2', nn.ReLU()), ('dropout', nn.Dropout(0.3)), ('fc3', nn.Linear(128, 64)), ('bn3', nn.BatchNorm1d(num_features=64)), ('relu3', nn.ReLU()), ('logits', nn.Linear(64, 10)), ('logsoftmax', nn.LogSoftmax(dim=1)) ])) # Train the model print( "Training the model on Fashion MNIST dataset with {} activation function.\n" .format(activation)) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=0.003) epochs = 5 for e in range(epochs): running_loss = 0 for images, labels in trainloader: images = images.view(images.shape[0], -1) log_ps = model(images) loss = criterion(log_ps, labels) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() else: print(f"Training loss: {running_loss}")
def __init__(self, n_inputs=2, n_outputs=1, n_enc_layers=4, n_hidden_units=64, n_dec_layers=1, multiplication=True, ln=False, bn=False, activation=nn.ReLU, instance_norm=False, sample_norm=False, n_samples=1000, **kwargs): """ Note: sample_norm = True first tranposes the data so that the sample_dim is last to reuse existing norm implementations """ super().__init__() if sample_norm and any([bn, ln, instance_norm]): raise ValueError("Cannot have sample_norm and other norms") enc_layers = [] for i in range(n_enc_layers): if i == 0: if sample_norm: enc_layers.append( nn.ConvTranspose1d(n_inputs, n_hidden_units, 1)) else: enc_layers.append( nn.Linear(in_features=n_inputs, out_features=n_hidden_units)) else: if sample_norm: enc_layers.append( nn.ConvTranspose1d(n_hidden_units, n_hidden_units, 1)) else: enc_layers.append( nn.Linear(in_features=n_hidden_units, out_features=n_hidden_units)) if ln: enc_layers.append(nn.LayerNorm(n_hidden_units)) if bn: enc_layers.append(nn.BatchNorm1d(n_samples)) if instance_norm: enc_layers.append(nn.InstanceNorm1d(n_samples)) if sample_norm: if i == 0: enc_layers.append( nn.InstanceNorm1d(n_hidden_units, affine=True)) enc_layers.append(activation()) # remove last relu enc_layers = enc_layers[:-1] self.enc = nn.Sequential(*enc_layers) dec_layers = [] # for i in range(n_dec_layers - 1): # dec_layers.append(nn.Linear(in_features=n_hidden_units, out_features=n_hidden_units)) # dec_layers.append(activation()) # dec_layers.append(nn.Linear(in_features=n_hidden_units, out_features=n_outputs)) for i in range(n_dec_layers): if i == n_dec_layers - 1: dec_layers.append( nn.Linear(in_features=n_hidden_units, out_features=n_outputs)) else: dec_layers.append( nn.Linear(in_features=n_hidden_units, out_features=n_hidden_units)) if ln: dec_layers.append(nn.LayerNorm(n_hidden_units)) dec_layers.append(activation()) self.dec = nn.Sequential(*dec_layers) self.multiplication = multiplication self.sample_norm = sample_norm
def __init__(self,options): super(SincNetModel,self).__init__() self.cnn_N_filt=options['cnn_N_filt'] self.cnn_len_filt=options['cnn_len_filt'] self.cnn_max_pool_len=options['cnn_max_pool_len'] self.cnn_act=options['cnn_act'] self.cnn_drop=options['cnn_drop'] self.cnn_use_laynorm=options['cnn_use_laynorm'] self.cnn_use_batchnorm=options['cnn_use_batchnorm'] self.cnn_use_laynorm_inp=options['cnn_use_laynorm_inp'] self.cnn_use_batchnorm_inp=options['cnn_use_batchnorm_inp'] self.input_dim=int(options['input_dim']) self.fs=options['fs'] self.N_cnn_lay=len(options['cnn_N_filt']) self.conv = nn.ModuleList([]) self.bn = nn.ModuleList([]) self.ln = nn.ModuleList([]) self.act = nn.ModuleList([]) self.drop = nn.ModuleList([]) if self.cnn_use_laynorm_inp: self.ln0=LayerNorm(self.input_dim) if self.cnn_use_batchnorm_inp: self.bn0=nn.BatchNorm1d([self.input_dim],momentum=0.05) current_input=self.input_dim for i in range(self.N_cnn_lay): N_filt=int(self.cnn_N_filt[i]) len_filt=int(self.cnn_len_filt[i]) # dropout self.drop.append(nn.Dropout(p=self.cnn_drop[i])) # activation self.act.append(act_fun(self.cnn_act[i])) # layer norm initialization self.ln.append(LayerNorm([N_filt,int((current_input-self.cnn_len_filt[i]+1)/self.cnn_max_pool_len[i])])) self.bn.append(nn.BatchNorm1d(N_filt,int((current_input-self.cnn_len_filt[i]+1)/self.cnn_max_pool_len[i]),momentum=0.05)) if i==0: self.conv.append(SincConv_fast(self.cnn_N_filt[0],self.cnn_len_filt[0],self.fs)) else: self.conv.append(nn.Conv1d(self.cnn_N_filt[i-1], self.cnn_N_filt[i], self.cnn_len_filt[i])) current_input=int((current_input-self.cnn_len_filt[i]+1)/self.cnn_max_pool_len[i]) self.out_dim=current_input*N_filt
def __init__(self, options): super(MLP, self).__init__() self.input_dim=int(options['input_dim']) self.fc_lay=options['fc_lay'] self.fc_drop=options['fc_drop'] self.fc_use_batchnorm=options['fc_use_batchnorm'] self.fc_use_laynorm=options['fc_use_laynorm'] self.fc_use_laynorm_inp=options['fc_use_laynorm_inp'] self.fc_use_batchnorm_inp=options['fc_use_batchnorm_inp'] self.fc_act=options['fc_act'] self.wx = nn.ModuleList([]) self.bn = nn.ModuleList([]) self.ln = nn.ModuleList([]) self.act = nn.ModuleList([]) self.drop = nn.ModuleList([]) # input layer normalization if self.fc_use_laynorm_inp: self.ln0=LayerNorm(self.input_dim) # input batch normalization if self.fc_use_batchnorm_inp: self.bn0=nn.BatchNorm1d([self.input_dim],momentum=0.05) self.N_fc_lay=len(self.fc_lay) current_input=self.input_dim # Initialization of hidden layers for i in range(self.N_fc_lay): # dropout self.drop.append(nn.Dropout(p=self.fc_drop[i])) # activation self.act.append(act_fun(self.fc_act[i])) add_bias=True # layer norm initialization self.ln.append(LayerNorm(self.fc_lay[i])) self.bn.append(nn.BatchNorm1d(self.fc_lay[i],momentum=0.05)) if self.fc_use_laynorm[i] or self.fc_use_batchnorm[i]: add_bias=False # Linear operations self.wx.append(nn.Linear(current_input, self.fc_lay[i],bias=add_bias)) # weight initialization self.wx[i].weight = torch.nn.Parameter(torch.Tensor(self.fc_lay[i],current_input).uniform_(-np.sqrt(0.01/(current_input+self.fc_lay[i])),np.sqrt(0.01/(current_input+self.fc_lay[i])))) self.wx[i].bias = torch.nn.Parameter(torch.zeros(self.fc_lay[i])) current_input=self.fc_lay[i]
def __init__(self, in_channels, out_channels, **kwargs): super(BasicConv1d, self).__init__() self.conv = nn.Conv1d(in_channels, out_channels, bias=False, **kwargs) self.bn = nn.BatchNorm1d(out_channels, eps=0.001)
def batchnorm_1d(in_features, eps=1e-5, momentum=0.0001, affine=True): return nn.BatchNorm1d(in_features, eps=eps, momentum=momentum, affine=affine)
def __init__(self, in_channels, planes, stride, global_index_num=100, attention_window=20, downsample=True, dropout=0.5, kernel_sizes=[3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25]): super(BasicLayer, self).__init__() self.d_k = planes self.kernel_sizes = kernel_sizes self.h = len(self.kernel_sizes) self.d_model = self.d_k * self.h self.attn = None self.in_channels = in_channels self.models = nn.ModuleList([ clones( nn.Sequential( nn.Conv1d(in_channels=in_channels, out_channels=planes // 4, kernel_size=1, bias=False), # nn.Dropout(dropout), nn.ReLU(inplace=True), nn.BatchNorm1d(planes // 4), nn.Conv1d(planes // 4, planes, kernel_size, stride, (kernel_size - 1) // 2, bias=False), # nn.Dropout(dropout), nn.ReLU(inplace=True), nn.BatchNorm1d(planes), nn.Conv1d(planes, planes, kernel_size, stride, (kernel_size - 1) // 2, bias=False), # nn.Dropout(dropout), nn.ReLU(inplace=True), nn.BatchNorm1d(planes), # nn.Conv2d(in_channels=in_channels,out_channels=planes,) ), 3) for kernel_size in self.kernel_sizes ]) self.linears = clones(nn.Linear(self.d_model, self.d_model), 3) self.dropout_p = dropout self.relu = nn.ReLU(inplace=True) self.dropout = nn.Dropout(p=dropout) # self.sublayer= SublayerConnection(self.d_model,dropout) self.sublayer_bn = nn.BatchNorm1d(in_channels) # self.sublayer_bn =LayerNorm(batch_size) self.sublayer_dropout = nn.Dropout(p=dropout) # self.attention_mask= torch.zeros(()) self.global_index_num = global_index_num self.attention_window = attention_window
def __init__(self, dims): super().__init__() self.conv1 = nn.Conv1d(dims, dims, kernel_size=1, bias=False) self.conv2 = nn.Conv1d(dims, dims, kernel_size=1, bias=False) self.batch_norm1 = nn.BatchNorm1d(dims) self.batch_norm2 = nn.BatchNorm1d(dims)
def __init__(self, options): super(RNN, self).__init__() # Reading options: self.input_dim = options.input_dim self.hidden_dim = int(options.hidden_dim) self.N_hid = int(options.N_hid) self.num_classes = options.num_classes self.drop_rate = float(options.drop_rate) self.use_batchnorm = bool(int(options.use_batchnorm)) self.use_laynorm = bool(int(options.use_laynorm)) self.use_cuda = bool(int(options.use_cuda)) self.bidir = bool(int(options.bidir)) self.skip_conn = bool(int(options.skip_conn)) self.act = options.act self.act_gate = options.act_gate self.cost = options.cost self.twin_reg = bool(int(options.twin_reg)) self.twin_w = float(options.twin_w) self.cnn_pre = bool(int(options.cnn_pre)) # List initialization self.wx = nn.ModuleList([]) # Update Gate self.uh = nn.ModuleList([]) # Candidate (feed-forward) if self.cnn_pre: self.cnn = CNN_feaproc(options) if self.use_batchnorm: self.bn_wx = nn.ModuleList([]) if self.use_laynorm: self.ln = nn.ModuleList([]) if self.act == "relu": self.act = nn.ReLU() if self.act == "tanh": self.act = nn.Tanh() if self.act == "sigmoid": self.act = nn.Sigmoid() if self.act == "normrelu": self.act = normrelu() if self.act_gate == "relu": self.act_gate = nn.ReLU() if self.act_gate == "tanh": self.act_gate = nn.Tanh() if self.act_gate == "sigmoid": self.act_gate = nn.Sigmoid() if self.act_gate == "normrelu": self.act_gate = normrelu() curr_dim = self.input_dim for i in range(self.N_hid): # wx initialization if self.use_batchnorm: self.wx.append(nn.Linear(curr_dim, self.hidden_dim, bias=False)) else: self.wx.append(nn.Linear(curr_dim, self.hidden_dim)) # uh initialization self.uh.append( nn.Linear(self.hidden_dim, self.hidden_dim, bias=False)) # batch norm initialization if self.use_batchnorm: self.bn_wx.append( nn.BatchNorm1d(self.hidden_dim, momentum=0.05)) # layer norm initialization if self.use_laynorm: self.ln.append(LayerNorm(self.hidden_dim)) if self.bidir: curr_dim = 2 * self.hidden_dim else: curr_dim = self.hidden_dim # output layer initialization self.fco = nn.Linear(curr_dim, self.num_classes) # loss definition if self.cost == "nll": self.criterion = nn.NLLLoss() if self.cost == "mse": self.criterion = torch.nn.MSELoss()
def __init__(self, num_classes=11, batch_size=4): super(deepAlexNet, self).__init__() self.features = nn.Sequential( # conv 1 nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.BatchNorm2d( 64), #put in the number of features from the expected input nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # conv 2 nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.BatchNorm2d( 192), #put in the number of features from the expected input nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # conv 3 nn.Conv2d(192, 320, kernel_size=3, padding=1), nn.BatchNorm2d( 320), #put in the number of features from the expected input nn.ReLU(inplace=True), # conv 4 nn.Conv2d(320, 384, kernel_size=3, padding=1), nn.BatchNorm2d( 384), #put in the number of features from the expected input nn.ReLU(inplace=True), # conv 5 nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.BatchNorm2d( 384), #put in the number of features from the expected input nn.ReLU(inplace=True), # conv 6 nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.BatchNorm2d( 256), #put in the number of features from the expected input nn.ReLU(inplace=True), # conv 7 nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d( 256), #put in the number of features from the expected input nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2)) self.classifier = nn.Sequential( # linear 1 nn.Dropout(), nn.Linear(256 * 3 * 3, 4096), nn.BatchNorm1d(4096), nn.ReLU(inplace=True), # linear 2 nn.Dropout(), nn.Linear(4096, 4096), nn.BatchNorm1d(4096), nn.ReLU(inplace=True), # linear 3 nn.Dropout(), nn.Linear(4096, 2048), nn.BatchNorm1d(2048), nn.ReLU(inplace=True), # linear 4 nn.Dropout(), nn.Linear(2048, 1024), nn.BatchNorm1d(1024), nn.ReLU(inplace=True), # linear 5, output nn.Linear(1024, num_classes), )
def test_batchnorm1d_special(self): c = torch.randn(BATCH_SIZE, 224) model = nn.BatchNorm1d(224) self.run_model_test(model, train=True, input=c, batch_size=BATCH_SIZE)
def __init__(self, num_classes): super().__init__() self.bn = nn.BatchNorm1d(num_classes) self.fc = nn.Linear(num_classes, num_classes) self.xent = CrossEntropyLoss()
def __init__(self, args, input_features, nf, J): super(GNN_active, self).__init__() self.args = args self.input_features = input_features self.nf = nf self.J = J self.num_layers = 2 for i in range(self.num_layers // 2): if i == 0: module_w = Wcompute(self.input_features, nf, operator='J2', activation='softmax', ratio=[2, 2, 1, 1]) module_l = Gconv(self.input_features, int(nf / 2), 2) else: module_w = Wcompute(self.input_features + int(nf / 2) * i, nf, operator='J2', activation='softmax', ratio=[2, 2, 1, 1]) module_l = Gconv(self.input_features + int(nf / 2) * i, int(nf / 2), 2) self.add_module('layer_w{}'.format(i), module_w) self.add_module('layer_l{}'.format(i), module_l) self.conv_active_1 = nn.Conv1d(self.input_features + int(nf / 2) * 1, self.input_features + int(nf / 2) * 1, 1) self.bn_active = nn.BatchNorm1d(self.input_features + int(nf / 2) * 1) self.conv_active_2 = nn.Conv1d(self.input_features + int(nf / 2) * 1, 1, 1) for i in range(int(self.num_layers / 2), self.num_layers): if i == 0: module_w = Wcompute(self.input_features, nf, operator='J2', activation='softmax', ratio=[2, 2, 1, 1]) module_l = Gconv(self.input_features, int(nf / 2), 2) else: module_w = Wcompute(self.input_features + int(nf / 2) * i, nf, operator='J2', activation='softmax', ratio=[2, 2, 1, 1]) module_l = Gconv(self.input_features + int(nf / 2) * i, int(nf / 2), 2) self.add_module('layer_w{}'.format(i), module_w) self.add_module('layer_l{}'.format(i), module_l) self.w_comp_last = Wcompute(self.input_features + int(self.nf / 2) * self.num_layers, nf, operator='J2', activation='softmax', ratio=[2, 2, 1, 1]) self.layer_last = Gconv(self.input_features + int(self.nf / 2) * self.num_layers, args.train_N_way, 2, bn_bool=False)
def __init__( self, n_mel_channels=80, postnet_embedding_dim=512, postnet_kernel_size=5, postnet_n_convolutions=5, ): super(PostNet, self).__init__() self.convolutions = nn.ModuleList() self.convolutions.append( nn.Sequential( ConvNorm( n_mel_channels, postnet_embedding_dim, kernel_size=postnet_kernel_size, stride=1, padding=int((postnet_kernel_size - 1) / 2), dilation=1, w_init_gain="tanh", ), nn.BatchNorm1d(postnet_embedding_dim), nn.Tanh(), nn.Dropout() ) ) for i in range(1, postnet_n_convolutions - 1): self.convolutions.append( nn.Sequential( ConvNorm( postnet_embedding_dim, postnet_embedding_dim, kernel_size=postnet_kernel_size, stride=1, padding=int((postnet_kernel_size - 1) / 2), dilation=1, w_init_gain="tanh", ), nn.BatchNorm1d(postnet_embedding_dim), nn.Tanh(), nn.Dropout() ) ) self.convolutions.append( nn.Sequential( ConvNorm( postnet_embedding_dim, n_mel_channels, kernel_size=postnet_kernel_size, stride=1, padding=int((postnet_kernel_size - 1) / 2), dilation=1, w_init_gain="linear", ), nn.BatchNorm1d(n_mel_channels), nn.Dropout() ) )
def __init__(self, depth, pretrained=True, cut_at_pooling=False, num_features=0, norm=False, dropout=0, num_classes=0, FCN=False, radius=1., thresh=0.5): super(ResNet_3stripe, self).__init__() self.depth = depth self.pretrained = pretrained self.cut_at_pooling = cut_at_pooling self.FCN = FCN self.feat_bn = nn.BatchNorm1d(num_features) self.feat_bn.bias.requires_grad_(False) # Construct base (pretrained) resnet if depth not in ResNet_3stripe.__factory: raise KeyError("Unsupported depth:", depth) resnet = ResNet_3stripe.__factory[depth](pretrained=pretrained) self.base = resnet # ==========================add dilation=============================# if self.FCN: # for mo in self.base.layer4[0].modules(): # if isinstance(mo, nn.Conv2d): # mo.stride = (1,1) # ================append conv for FCN==============================# self.num_features = num_features self.num_classes = 751 # num_classes self.dropout = dropout out_planes = resnet.fc.in_features self.local_conv = nn.Conv2d(out_planes, self.num_features, kernel_size=1, padding=0, bias=False) init.kaiming_normal(self.local_conv.weight, mode='fan_out') # init.constant(self.local_conv.bias,0) self.feat_bn2d = nn.BatchNorm2d( self.num_features) # may not be used, not working on caffe init.constant(self.feat_bn2d.weight, 1) # initialize BN, may not be used init.constant(self.feat_bn2d.bias, 0) # iniitialize BN, may not be used ##---------------------------stripe1----------------------------------------------# self.instance0 = MLP(input_dim=self.num_features, dimensions=[ self.num_features, self.num_features, self.num_classes ]) self.nnq0 = nn.Linear(self.num_classes, self.num_classes, bias=False) init.normal_(self.nnq0.weight, std=0.001) ##---------------------------stripe1----------------------------------------------# ##---------------------------stripe1----------------------------------------------# self.instance1 = MLP(input_dim=self.num_features, dimensions=[ self.num_features, self.num_features, self.num_classes ]) self.nnq1 = nn.Linear(self.num_classes, self.num_classes, bias=False) init.normal_(self.nnq1.weight, std=0.001) ##---------------------------stripe1----------------------------------------------# ##---------------------------stripe1----------------------------------------------# self.instance2 = MLP(input_dim=self.num_features, dimensions=[ self.num_features, self.num_features, self.num_classes ]) self.nnq2 = nn.Linear(self.num_classes, self.num_classes, bias=False) init.normal_(self.nnq2.weight, std=0.001) ##---------------------------stripe1----------------------------------------------# self.drop = nn.Dropout(self.dropout) self.classifier = nn.Linear(self.num_features, self.num_classes, bias=True) init.normal(self.classifier.weight, std=0.001) init.constant(self.classifier.bias, 0) elif not self.cut_at_pooling: self.num_features = num_features self.norm = norm self.dropout = dropout self.has_embedding = num_features > 0 self.num_classes = num_classes self.radius = nn.Parameter(torch.FloatTensor([radius])) self.thresh = nn.Parameter(torch.FloatTensor([thresh])) out_planes = self.base.fc.in_features # Append new layers if self.has_embedding: self.feat = nn.Linear(out_planes, self.num_features, bias=False) self.feat_bn = nn.BatchNorm1d(self.num_features) init.kaiming_normal(self.feat.weight, mode='fan_out') else: # Change the num_features to CNN output channels self.num_features = out_planes if self.dropout > 0: self.drop = nn.Dropout(self.dropout) if self.num_classes > 0: self.classifier = nn.Linear(self.num_features, self.num_classes, bias=True) init.normal(self.classifier.weight, std=0.001) init.constant(self.classifier.bias, 0) if not self.pretrained: self.reset_params()
def __init__(self, n_classes, model_name='resnet50', pooling='GeM', args_pooling: dict = {}, use_fc=False, fc_dim=512, dropout=0.0, loss_module="", s=30.0, margin=0.50, ls_eps=0.0, theta_zero=0.785): """ :param n_classes: :param model_name: name of model from pretrainedmodels e.g. resnet50, resnext101_32x4d, pnasnet5large :param pooling: One of ('SPoC', 'MAC', 'RMAC', 'GeM', 'Rpool', 'Flatten', 'CompactBilinearPooling') :param loss_module: One of ('arcface', 'cosface', 'softmax') """ super(LandmarkNet, self).__init__() self.backbone = getattr(pretrainedmodels, model_name)(num_classes=1000) final_in_features = self.backbone.last_linear.in_features # HACK: work around for this issue https://github.com/Cadene/pretrained-models.pytorch/issues/120 self.backbone = nn.Sequential(*list(self.backbone.children())[:-2]) # TODO:CompactBilinearPooling self.pooling = getattr(cirtorch.pooling, pooling)(**args_pooling) self.use_fc = use_fc if use_fc: self.dropout = nn.Dropout(p=dropout) self.fc = nn.Linear(final_in_features, fc_dim) self.bn = nn.BatchNorm1d(fc_dim) self._init_params() final_in_features = fc_dim self.loss_module = loss_module print("Current Loss:{}".format(self.loss_module)) if loss_module == 'arcface': self.final = ArcMarginProduct(final_in_features, n_classes, s=s, m=margin, easy_margin=False, ls_eps=ls_eps) elif loss_module == 'cosface': self.final = AddMarginProduct(final_in_features, n_classes, s=s, m=margin) elif loss_module == 'adacos': self.final = AdaCos(final_in_features, n_classes, m=margin, theta_zero=theta_zero) # New by Group elif loss_module == "AdditiveMarginSoftmaxLoss": self.final = AdMSoftmaxLoss(final_in_features, n_classes, s=s, m=margin) elif loss_module == "LSoftmax": self.final = LSoftmaxLinear(final_in_features, n_classes, margin=2) elif loss_module == "Softmax": self.final = nn.Linear(final_in_features, n_classes) else: raise NotImplementedError("Loss Not Implemented.")
def block(in_feat, out_feat, normalize=True): layers = [nn.Linear(in_feat, out_feat)] if normalize: layers.append(nn.BatchNorm1d(out_feat, 0.8)) layers.append(nn.LeakyReLU(0.2, inplace=True)) return layers
def __init__(self, hidden_size, seq_len): super(Batch_norm_overtime, self).__init__() self.bn = nn.BatchNorm1d(hidden_size)
def run_mle_net(X, Y, X_test, Y_test, params, is_nonlinear=False): # Training/validation split th_frac = 0.8 inds = np.random.permutation(X.shape[0]) train_inds = inds[:int(X.shape[0] * th_frac)] hold_inds = inds[int(X.shape[0] * th_frac):] X_train, X_hold = X[train_inds, :], X[hold_inds, :] Y_train, Y_hold = Y[train_inds, :], Y[hold_inds, :] X_train_t = torch.Tensor(X_train).cuda() Y_train_t = torch.Tensor(Y_train).cuda() X_hold_t = torch.Tensor(X_hold).cuda() Y_hold_t = torch.Tensor(Y_hold).cuda() X_test_t = torch.Tensor(X_test).cuda() Y_test_t = torch.Tensor(Y_test).cuda() Y_train_int_t = torch.LongTensor(np.where( Y_train_t.cpu().numpy())[1]).cuda() Y_hold_int_t = torch.LongTensor(np.where(Y_hold_t.cpu().numpy())[1]).cuda() Y_test_int_t = torch.LongTensor(np.where(Y_test_t.cpu().numpy())[1]).cuda() d_ = Variable(torch.Tensor(params['d'])).cuda() # Expected inventory cost and solver for newsvendor scheduling problem cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) + params['b_lin'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0) + 0.5 * params['b_quad'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0)**2 + params['h_lin'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0) + 0.5 * params['h_quad'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0)**2) \ .mean() newsvendor_solve = SolveNewsvendor(params).cuda() cost_news_fn = lambda x, y: cost(newsvendor_solve(x), y) if is_nonlinear: # Non-linear model, use ADAM step size 1e-3 layer_sizes = [X_train.shape[1], 200, 200, Y_train.shape[1]] layers = reduce(operator.add, [[ nn.Linear(a, b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.5) ] for a, b in zip(layer_sizes[0:-2], layer_sizes[1:-1])]) layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1]), nn.Softmax()] model = nn.Sequential(*layers).cuda() step_size = 1e-3 else: # Linear model, use ADAM step size 1e-2 model = nn.Sequential(nn.Linear(X_train.shape[1], Y_train.shape[1]), nn.Softmax()).cuda() step_size = 1e-2 opt = optim.Adam(model.parameters(), lr=step_size) # For early stopping hold_costs, test_costs = [], [] model_states = [] num_stop_rounds = 20 for i in range(1000): # model.eval() test_cost = batch.get_cost_nll(100, i, model, X_test_t, Y_test_int_t, nn.NLLLoss()) hold_cost = batch.get_cost_nll(100, i, model, X_hold_t, Y_hold_int_t, nn.NLLLoss()) model.train() train_cost = batch_train(150, i, X_train_t, Y_train_t, Y_train_int_t, model, nn.NLLLoss(), opt) print(i, train_cost.data[0], test_cost.data[0], hold_cost.data[0]) # Early stopping # See https://github.com/locuslab/e2e-model-learning-staging/commit/d183c65d0cd53d611a77a4508da65c25cf88c93d test_costs.append(test_cost.data[0]) hold_costs.append(hold_cost.data[0]) model_states.append(model.state_dict().copy()) if i > 0 and i % num_stop_rounds == 0: idx = hold_costs.index(min(hold_costs)) # Stop if current cost is worst in num_stop_rounds rounds if max(hold_costs) == hold_cost.data[0]: model.eval() best_model = get_model(X_train, Y_train, X_test, Y_test, params, is_nonlinear) best_model.load_state_dict(model_states[idx]) best_model.cuda() test_cost_news = batch.get_cost(100, i, best_model, X_test_t, Y_test_t, cost_news_fn) return test_cost_news.data[0] else: # Keep only "best" round hold_costs = [hold_costs[idx]] test_costs = [test_costs[idx]] model_states = [model_states[idx]] # # In case of no early stopping, return best run so far idx = hold_costs.index(min(hold_costs)) best_model = get_model(X, Y, X_test, Y_test, params, is_nonlinear) best_model.load_state_dict(model_states[idx]) best_model.cuda() test_cost_news = batch.get_cost(100, i, best_model, X_test_t, Y_test_t, cost_news_fn) return test_cost_news.data[0]
def __init__(self,field_size, feature_sizes, embedding_size = 4, attention_size = 4, dropout_shallow = -1.0, dropout_attention = -1.0, attention_layers_activation = 'relu', compression = 0, is_batch_norm = False, use_fm = True, use_ffm = False ): super(AFM, self).__init__() self.field_size = field_size self.feature_sizes = feature_sizes self.embedding_size = embedding_size self.attention_size = attention_size self.dropout_shallow = dropout_shallow self.dropout_attention = dropout_attention self.attention_layers_activation = attention_layers_activation self.compression = compression self.is_batch_norm = is_batch_norm self.use_fm = use_fm self.use_ffm = use_ffm """ check use fm or ffm """ if self.use_fm and self.use_ffm: print("only support one type only, please make sure to choose only fm or ffm part") exit(1) elif self.use_fm: print("The model is afm(fm+attention layers)") elif self.use_ffm: print("The model is affm(ffm+attention layers)") else: print("You have to choose more than one of (fm, ffm) models to use") exit(1) """ bias """ self.bias = torch.nn.Parameter(torch.Tensor(1)) self.bias.data.normal_(0, 0.2) """ fm part """ if self.use_fm: print("Init fm part") self.fm_first_order_embeddings = nn.ModuleList([nn.Embedding(feature_size,1) for feature_size in self.feature_sizes]) if self.dropout_shallow>0.0: self.fm_first_order_dropout = nn.Dropout(self.dropout_shallow) self.fm_second_order_embeddings = nn.ModuleList([nn.Embedding(feature_size, self.embedding_size) for feature_size in self.feature_sizes]) print("Init fm part succeed") """ ffm part """ if self.use_ffm: print("Init ffm part") self.ffm_first_order_embeddings = nn.ModuleList([nn.Embedding(feature_size,1) for feature_size in self.feature_sizes]) if self.dropout_shallow>0.0: self.ffm_first_order_dropout = nn.Dropout(self.dropout_shallow) self.ffm_second_order_embeddings = nn.ModuleList([nn.ModuleList([nn.Embedding(feature_size, self.embedding_size) for i in range(self.field_size)]) for feature_size in self.feature_sizes]) print("Init ffm part succeed") """ attention part """ print("Init attention part") if self.dropout_attention>0.0: self.attention_linear_0_dropout = nn.Dropout(self.dropout_attention) #self.attention_linear_1 = nn.Linear(self.embedding_size, self.attention_size) #self.H = torch.nn.Parameter(torch.Tensor(self.attention_size))#why not nn.Linear? #self.attention_linear_1.weight.data.normal_(0,0.2) self.H = torch.nn.Parameter(torch.Tensor(self.embedding_size))#why not nn.Linear? self.attn_bn = nn.BatchNorm1d(self.field_size*(self.field_size-1)//2, momentum=None) self.attention_linear_2 = nn.Linear(self.field_size*(self.field_size-1)//2, self.field_size*(self.field_size-1)//2) self.attention_linear_2.weight.data.normal_(0,0.2) self.P = torch.nn.Parameter(torch.Tensor(self.embedding_size)) self.H.data.normal_(0, 0.2) self.P.data.normal_(0, 0.2) if self.compression > 0: self.conv1 = torch.nn.Conv1d(self.field_size, self.compression , kernel_size=1, bias=False) nn.init.xavier_uniform_(self.conv1.weight, gain=nn.init.calculate_gain('relu')) self.bn1 = torch.nn.BatchNorm1d(self.compression, momentum=None) print("Init attention part succeed") print("Init succeed") if self.is_batch_norm: self.shallow_bn = torch.nn.BatchNorm1d(self.field_size, momentum=None)