def __init__(self, num_classes=1001, stem_filters=96, penultimate_filters=4032, filters_multiplier=2): super(NASNetALarge, self).__init__() self.num_classes = num_classes self.stem_filters = stem_filters self.penultimate_filters = penultimate_filters self.filters_multiplier = filters_multiplier filters = self.penultimate_filters // 24 # 24 is default value for the architecture self.conv0 = nn.Sequential() self.conv0.add_module('conv', nn.Conv2d(in_channels=3, out_channels=self.stem_filters, kernel_size=3, padding=0, stride=2, bias=False)) self.conv0.add_module('bn', nn.BatchNorm2d( self.stem_filters, eps=0.001, momentum=0.1, affine=True)) self.cell_stem_0 = CellStem0( self.stem_filters, num_filters=filters // (filters_multiplier ** 2)) self.cell_stem_1 = CellStem1( self.stem_filters, num_filters=filters // filters_multiplier) self.cell_0 = FirstCell(in_channels_left=filters, out_channels_left=filters//2, in_channels_right=2*filters, out_channels_right=filters) self.cell_1 = NormalCell(in_channels_left=2*filters, out_channels_left=filters, in_channels_right=6*filters, out_channels_right=filters) self.cell_2 = NormalCell(in_channels_left=6*filters, out_channels_left=filters, in_channels_right=6*filters, out_channels_right=filters) self.cell_3 = NormalCell(in_channels_left=6*filters, out_channels_left=filters, in_channels_right=6*filters, out_channels_right=filters) self.cell_4 = NormalCell(in_channels_left=6*filters, out_channels_left=filters, in_channels_right=6*filters, out_channels_right=filters) self.cell_5 = NormalCell(in_channels_left=6*filters, out_channels_left=filters, in_channels_right=6*filters, out_channels_right=filters) self.reduction_cell_0 = ReductionCell0(in_channels_left=6*filters, out_channels_left=2*filters, in_channels_right=6*filters, out_channels_right=2*filters) self.cell_6 = FirstCell(in_channels_left=6*filters, out_channels_left=filters, in_channels_right=8*filters, out_channels_right=2*filters) self.cell_7 = NormalCell(in_channels_left=8*filters, out_channels_left=2*filters, in_channels_right=12*filters, out_channels_right=2*filters) self.cell_8 = NormalCell(in_channels_left=12*filters, out_channels_left=2*filters, in_channels_right=12*filters, out_channels_right=2*filters) self.cell_9 = NormalCell(in_channels_left=12*filters, out_channels_left=2*filters, in_channels_right=12*filters, out_channels_right=2*filters) self.cell_10 = NormalCell(in_channels_left=12*filters, out_channels_left=2*filters, in_channels_right=12*filters, out_channels_right=2*filters) self.cell_11 = NormalCell(in_channels_left=12*filters, out_channels_left=2*filters, in_channels_right=12*filters, out_channels_right=2*filters) self.reduction_cell_1 = ReductionCell1(in_channels_left=12*filters, out_channels_left=4*filters, in_channels_right=12*filters, out_channels_right=4*filters) self.cell_12 = FirstCell(in_channels_left=12*filters, out_channels_left=2*filters, in_channels_right=16*filters, out_channels_right=4*filters) self.cell_13 = NormalCell(in_channels_left=16*filters, out_channels_left=4*filters, in_channels_right=24*filters, out_channels_right=4*filters) self.cell_14 = NormalCell(in_channels_left=24*filters, out_channels_left=4*filters, in_channels_right=24*filters, out_channels_right=4*filters) self.cell_15 = NormalCell(in_channels_left=24*filters, out_channels_left=4*filters, in_channels_right=24*filters, out_channels_right=4*filters) self.cell_16 = NormalCell(in_channels_left=24*filters, out_channels_left=4*filters, in_channels_right=24*filters, out_channels_right=4*filters) self.cell_17 = NormalCell(in_channels_left=24*filters, out_channels_left=4*filters, in_channels_right=24*filters, out_channels_right=4*filters) self.relu = nn.ReLU() self.avg_pool = nn.AvgPool2d(11, stride=1, padding=0) self.dropout = nn.Dropout() self.last_linear = nn.Linear(24*filters, self.num_classes)
def __init__(self, n_token, n_layer, n_head, d_model, d_head, d_inner, dropout, dropatt, dtype, tie_weight=True, d_embed=None, div_val=1, tie_projs=[False], pre_lnorm=False, tgt_len=None, ext_len=None, mem_len=None, cutoffs=[], adapt_inp=False, same_length=False, attn_type=0, clamp_len=-1, sample_softmax=-1): super(MemTransformerLM, self).__init__() self.n_token = n_token d_embed = d_model if d_embed is None else d_embed self.d_embed = d_embed self.d_model = d_model self.n_head = n_head self.d_head = d_head self.dtype = dtype self.word_emb = AdaptiveEmbedding(n_token, d_embed, d_model, cutoffs, div_val=div_val, dtype=dtype) self.drop = nn.Dropout(dropout) self.tie_weight = tie_weight self.tie_projs = tie_projs self.div_val = div_val self.n_layer = n_layer self.tgt_len = tgt_len self.mem_len = mem_len self.ext_len = ext_len self.max_klen = tgt_len + ext_len + mem_len self.attn_type = attn_type if attn_type != 0: raise RuntimeError( 'TorchScripted model supports only attn_type == 0') self.layers = nn.ModuleList() # the default attention if attn_type == 0: for i in range(n_layer): self.layers.append( RelPartialLearnableDecoderLayer(n_head, d_model, d_head, d_inner, dropout, tgt_len=tgt_len, ext_len=ext_len, mem_len=mem_len, dropatt=dropatt, pre_lnorm=pre_lnorm)) self.sample_softmax = sample_softmax # use sampled softmax if sample_softmax > 0: self.out_layer = nn.Linear(d_model, n_token) self.tie_weight = tie_weight self.sampler = LogUniformSampler(n_token, sample_softmax) # use adaptive softmax (including standard softmax) else: if tie_weight: emb_layers = [i.weight for i in self.word_emb.emb_layers] else: emb_layers = None emb_projs = self.word_emb.emb_projs self.crit = ProjectedAdaptiveLogSoftmax( n_token, d_embed, d_model, cutoffs, div_val=div_val, dtype=dtype, tie_projs=tie_projs, out_projs=emb_projs, out_layers_weights=emb_layers) self.same_length = same_length self.clamp_len = clamp_len self._create_params()
def __init__(self, hps, *_): super(BiLSTMTagger, self).__init__() batch_size = hps['batch_size'] lstm_hidden_dim = hps['sent_hdim'] sent_embedding_dim_DEP = 2*hps['sent_edim'] + 1*hps['pos_edim'] + 1 sent_embedding_dim_SRL = 3 * hps['sent_edim'] + 1 * hps['pos_edim'] + 1 ## for the region mark role_embedding_dim = hps['role_edim'] frame_embedding_dim = role_embedding_dim vocab_size = hps['vword'] self.tagset_size = hps['vbio'] self.pos_size = hps['vpos'] self.dep_size = hps['vdep'] self.frameset_size = hps['vframe'] self.num_layers = hps['rec_layers'] self.batch_size = batch_size self.hidden_dim = lstm_hidden_dim self.word_emb_dim = hps['sent_edim'] self.specific_dep_size = hps['svdep'] self.word_embeddings_SRL = nn.Embedding(vocab_size, hps['sent_edim']) self.word_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim']) self.pos_embeddings = nn.Embedding(self.pos_size, hps['pos_edim']) self.pos_embeddings_DEP = nn.Embedding(self.pos_size, hps['pos_edim']) self.p_lemma_embeddings = nn.Embedding(self.frameset_size, hps['sent_edim']) self.dep_embeddings = nn.Embedding(self.dep_size, self.pos_size) #self.lr_dep_embeddings = nn.Embedding(self.lr_dep_size, hps[]) self.word_fixed_embeddings = nn.Embedding(vocab_size, hps['sent_edim']) self.word_fixed_embeddings.weight.data.copy_(torch.from_numpy(hps['word_embeddings'])) self.word_fixed_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim']) self.word_fixed_embeddings_DEP.weight.data.copy_(torch.from_numpy(hps['word_embeddings'])) self.role_embeddings = nn.Embedding(self.tagset_size, role_embedding_dim) self.frame_embeddings = nn.Embedding(self.frameset_size, frame_embedding_dim) self.hidden2tag = nn.Linear(4*lstm_hidden_dim, 2*lstm_hidden_dim) self.MLP = nn.Linear(2*lstm_hidden_dim, self.dep_size) self.tag2hidden = nn.Linear(self.dep_size, self.pos_size) self.SRL_input_dropout = nn.Dropout(p=0.3) self.DEP_input_dropout = nn.Dropout(p=0.5) self.hidden_state_dropout = nn.Dropout(p=0.3) self.label_dropout = nn.Dropout(p=0.5) #self.use_dropout = nn.Dropout(p=0.2) # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. self.num_layers = 1 self.BiLSTM_0 = nn.LSTM(input_size=sent_embedding_dim_DEP, hidden_size=lstm_hidden_dim, batch_first=True, bidirectional=True, num_layers=self.num_layers) init.orthogonal_(self.BiLSTM_0.all_weights[0][0]) init.orthogonal_(self.BiLSTM_0.all_weights[0][1]) init.orthogonal_(self.BiLSTM_0.all_weights[1][0]) init.orthogonal_(self.BiLSTM_0.all_weights[1][1]) self.num_layers = 1 self.BiLSTM_1 = nn.LSTM(input_size=lstm_hidden_dim * 2, hidden_size=lstm_hidden_dim, batch_first=True, bidirectional=True, num_layers=self.num_layers) init.orthogonal_(self.BiLSTM_1.all_weights[0][0]) init.orthogonal_(self.BiLSTM_1.all_weights[0][1]) init.orthogonal_(self.BiLSTM_1.all_weights[1][0]) init.orthogonal_(self.BiLSTM_1.all_weights[1][1]) self.num_layers = 4 self.BiLSTM_SRL = nn.LSTM(input_size=sent_embedding_dim_SRL , hidden_size=lstm_hidden_dim, batch_first=True, bidirectional=True, num_layers=self.num_layers) init.orthogonal_(self.BiLSTM_SRL.all_weights[0][0]) init.orthogonal_(self.BiLSTM_SRL.all_weights[0][1]) init.orthogonal_(self.BiLSTM_SRL.all_weights[1][0]) init.orthogonal_(self.BiLSTM_SRL.all_weights[1][1]) # non-linear map to role embedding self.role_map = nn.Linear(in_features=role_embedding_dim * 2, out_features=self.hidden_dim * 4) # Init hidden state self.hidden = self.init_hidden_spe() self.hidden_2 = self.init_hidden_spe() self.hidden_3 = self.init_hidden_spe() self.hidden_4 = self.init_hidden_share()
def __init__(self, in_features, num_classes, drop=0.0): super(Classifier, self).__init__() self.add_module('drop', nn.Dropout(drop)) self.add_module('lin', nn.Linear(in_features, num_classes))
def __init__(self, block, layers, groups, reduction, dropout_p=0.2, inplanes=128, input_3x3=True, downsample_kernel_size=3, downsample_padding=1, num_classes=1000): """ Parameters ---------- block (nn.Module): Bottleneck class. - For SENet154: SEBottleneck - For SE-ResNet models: SEResNetBottleneck - For SE-ResNeXt models: SEResNeXtBottleneck layers (list of ints): Number of residual blocks for 4 layers of the network (layer1...layer4). groups (int): Number of groups for the 3x3 convolution in each bottleneck block. - For SENet154: 64 - For SE-ResNet models: 1 - For SE-ResNeXt models: 32 reduction (int): Reduction ratio for Squeeze-and-Excitation modules. - For all models: 16 dropout_p (float or None): Drop probability for the Dropout layer. If `None` the Dropout layer is not used. - For SENet154: 0.2 - For SE-ResNet models: None - For SE-ResNeXt models: None inplanes (int): Number of input channels for layer1. - For SENet154: 128 - For SE-ResNet models: 64 - For SE-ResNeXt models: 64 input_3x3 (bool): If `True`, use three 3x3 convolutions instead of a single 7x7 convolution in layer0. - For SENet154: True - For SE-ResNet models: False - For SE-ResNeXt models: False downsample_kernel_size (int): Kernel size for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 3 - For SE-ResNet models: 1 - For SE-ResNeXt models: 1 downsample_padding (int): Padding for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 1 - For SE-ResNet models: 0 - For SE-ResNeXt models: 0 num_classes (int): Number of outputs in `last_linear` layer. - For all models: 1000 """ super(SENet, self).__init__() self.inplanes = inplanes if input_3x3: layer0_modules = [ ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)), ('bn1', nn.BatchNorm2d(64)), ('relu1', nn.ReLU(inplace=True)), ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)), ('bn2', nn.BatchNorm2d(64)), ('relu2', nn.ReLU(inplace=True)), ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)), ('bn3', nn.BatchNorm2d(inplanes)), ('relu3', nn.ReLU(inplace=True)), ] else: layer0_modules = [ ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)), ('bn1', nn.BatchNorm2d(inplanes)), ('relu1', nn.ReLU(inplace=True)), ] # To preserve compatibility with Caffe weights `ceil_mode=True` # is used instead of `padding=1`. layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True))) self.conv_2_img = nn.Conv2d(inplanes, 3, kernel_size=1, stride=1, padding=0, bias=False) self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], groups=groups, reduction=reduction, downsample_kernel_size=1, downsample_padding=0) self.layer2 = self._make_layer( block, planes=128, blocks=layers[1], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.layer3 = self._make_layer( block, planes=256, blocks=layers[2], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.layer4 = self._make_layer( block, planes=512, blocks=layers[3], stride=1, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) num_feautures = 512 * block.expansion num_inner = num_feautures // 4 self.fa_layer = FAModule(num_feautures, num_inner) self.cls_head = nn.Sequential(conv3x3(num_feautures + num_inner, 512), nn.BatchNorm2d(512), nn.ReLU(), nn.Dropout2d(0.1)) self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None self.mu = nn.Linear(512, num_classes)
def __init__(self): super(CNN, self).__init__() self.conv1 = nn.Sequential( nn.Conv2d(3, 64, 3, padding=1, bias=False), #layer0 nn.BatchNorm2d( 64), # batch norm is added because dataset is changed nn.ReLU(inplace=True), ) self.conv2 = nn.Sequential( nn.Conv2d(64, 64, 3, padding=1, bias=False), #layer3 nn.BatchNorm2d(64), nn.ReLU(inplace=True), ) self.maxpool1 = nn.Sequential( nn.MaxPool2d(2, 2), # 16*16* 64 ) self.conv3 = nn.Sequential( nn.Conv2d(64, 128, 3, padding=1, bias=False), #layer7 nn.BatchNorm2d(128), nn.ReLU(inplace=True), ) self.conv4 = nn.Sequential( nn.Conv2d(128, 128, 3, padding=1, bias=False), #layer10 nn.BatchNorm2d(128), nn.ReLU(inplace=True), ) self.maxpool2 = nn.Sequential( nn.MaxPool2d(2, 2), # 8*8*128 ) self.conv5 = nn.Sequential( nn.Conv2d(128, 256, 3, padding=1, bias=False), #layer14 nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.conv6 = nn.Sequential( nn.Conv2d(256, 256, 3, padding=1, bias=False), #layer17 nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.conv7 = nn.Sequential( nn.Conv2d(256, 256, 3, padding=1, bias=False), #layer20 nn.BatchNorm2d(256), nn.ReLU(inplace=True), ) self.maxpool3 = nn.Sequential( nn.MaxPool2d(2, 2), # 4*4*256 ) self.conv8 = nn.Sequential( nn.Conv2d(256, 512, 3, padding=1, bias=False), #layer24 nn.BatchNorm2d(512), nn.ReLU(inplace=True), ) self.conv9 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1, bias=False), #layer27 nn.BatchNorm2d(512), nn.ReLU(inplace=True), ) self.conv10 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1, bias=False), #layer30 nn.BatchNorm2d(512), nn.ReLU(inplace=True), ) self.maxpool4 = nn.Sequential( nn.MaxPool2d(2, 2), # 2*2*512 ) self.conv11 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1, bias=False), #layer34 nn.BatchNorm2d(512), nn.ReLU(inplace=True), ) self.conv12 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1, bias=False), #layer37 nn.BatchNorm2d(512), nn.ReLU(inplace=True), ) self.conv13 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1, bias=False), #layer40 nn.BatchNorm2d(512), nn.ReLU(inplace=True), ) self.maxpool5 = nn.Sequential(nn.MaxPool2d(2, 2) # 1*1*512 ) self.fc1 = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(512, 512, bias=False), #fc_layer1 nn.ReLU(inplace=True), ) self.fc2 = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(512, 512, bias=False), #fc_layer4 nn.ReLU(inplace=True), ) self.fc3 = nn.Sequential(nn.Linear(512, 10, bias=False) #fc_layer6 )
def __init__(self, d_model, d_ff, dropout=0.1): super(PositionwiseFeedForward, self).__init__() self.w_1 = nn.Linear(d_model, d_ff) self.w_2 = nn.Linear(d_ff, d_model) self.dropout = nn.Dropout(dropout)
# ### VGG-16 and Resnet-18 # Now that you have created the dataset we can use it for training and testing neural networks. VGG-16 and Resnet-18 are both well-known deep-net architectures. VGG-16 is named as such since it has 16 layers in total (13 convolution and 3 fully-connected). Resnet-18 on the other hand is a Resnet architecture that uses skip-connections. PyTorch provides pre-trained models of both these architectures and we shall be using them directly. If you are interested in knowing how they have been defined do take a look at the source, [VGG](https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py), [Resnet](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) # In[6]: vgg16 = models.vgg16(pretrained=True) resnet18 = models.resnet18(pretrained=True) # Code to change the last layers so that they only have 10 classes as output vgg16.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 10), ) resnet18.fc = nn.Linear(resnet18.fc.in_features, 10) # Add code for using CUDA here if it is available use_gpu = False if(torch.cuda.is_available()): use_gpu = True vgg16.cuda() resnet18.cuda()
def __init__(self): super(AEE, self).__init__() self.EnE = torch.nn.Sequential(nn.Linear(IE_dim, h_dim), nn.BatchNorm1d(h_dim), nn.ReLU(), nn.Dropout())
if is_training: H1 = dropout(H1, drop_prob1) H2 = (torch.matmul(H1, W2) + b2).relu() if is_training: H2 = dropout(H2, drop_prob2) return torch.matmul(H2, W3) + b3 num_epochs, lr, batch_size, = 5, 100.0, 256 loss = torch.nn.CrossEntropyLoss() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) #d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr) 训练和测试模型 #简介实现 net = nn.Sequential( d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens1), nn.ReLU(), nn.Dropout(drop_prob1), nn.Linear(num_hiddens1, num_hiddens2), nn.ReLU(), nn.Dropout(drop_prob2), nn.Linear(num_hiddens2, num_outputs) ) for param in net.parameters(): nn.init.normal_(param, mean=0, std=0.01) optimizer = torch.optim.SGD(net.parameters(), lr=0.5) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
def __init__(self, hidden_size, drop=0.0): super().__init__() self.fc = nn.Sequential(nn.Linear(hidden_size, 1), nn.ReLU()) self.dropout = nn.Dropout(drop)
def __init__( self, num_features, embeddingsize, hiddensize, dropout=0, numsoftmax=1, shared_weight=None, padding_idx=-1, ): """Initialize output layer. :param num_features: number of candidates to rank :param hiddensize: (last) dimension of the input vectors :param embeddingsize: (last) dimension of the candidate vectors :param numsoftmax: (default 1) number of softmaxes to calculate. see arxiv.org/abs/1711.03953 for more info. increasing this slows down computation but can add more expressivity to the embeddings. :param shared_weight: (num_features x esz) vector of weights to use as the final linear layer's weight matrix. default None starts with a new linear layer. :param padding_idx: model should output a large negative number for score at this index. if set to -1 (default), this is disabled. if >= 0, subtracts one from num_features and always outputs -1e20 at this index. only used when shared_weight is not None. setting this param helps protect gradient from entering shared embedding matrices. """ super().__init__() self.dropout = nn.Dropout(p=dropout) self.padding_idx = padding_idx if shared_weight is not None else -1 # embedding to scores if shared_weight is None: # just a regular linear layer self.e2s = nn.Linear(embeddingsize, num_features, bias=True) else: # use shared weights and a bias layer instead if padding_idx == 0: num_features -= 1 # don't include padding shared_weight = shared_weight.narrow(0, 1, num_features) elif padding_idx > 0: raise RuntimeError('nonzero pad_idx not yet implemented') self.weight = Parameter(shared_weight) self.bias = Parameter(torch.Tensor(num_features)) self.reset_parameters() self.e2s = lambda x: F.linear(x, self.weight, self.bias) self.numsoftmax = numsoftmax if numsoftmax > 1: self.esz = embeddingsize self.softmax = nn.Softmax(dim=1) self.prior = nn.Linear(hiddensize, numsoftmax, bias=False) self.latent = nn.Linear(hiddensize, numsoftmax * embeddingsize) self.activation = nn.Tanh() else: # rnn output to embedding if hiddensize != embeddingsize: # learn projection to correct dimensions self.o2e = nn.Linear(hiddensize, embeddingsize, bias=True) else: # no need for any transformation here self.o2e = lambda x: x
def __init__(self, key_size, query_size, units, dropout, **kwargs): super(MLPAttention, self).__init__(**kwargs) self.W_k = nn.Linear(key_size, units, bias=False) self.W_q = nn.Linear(query_size, units, bias=False) self.v = nn.Linear(units, 1, bias=False) self.dropout = nn.Dropout(dropout)
def __init__(self, dropout, **kwargs): super(DotProductAttention, self).__init__(**kwargs) self.dropout = nn.Dropout(dropout)
def __init__(self, params): super(FeedForward, self).__init__() self.fc1 = nn.Linear(params.hidden_dim, params.ffn_dim) self.fc2 = nn.Linear(params.ffn_dim, params.hidden_dim) self.dropout = nn.Dropout(params.dropout)
def __init__(self): super(Classifier, self).__init__() self.FC = torch.nn.Sequential(nn.Linear(Z_in, 1), nn.Dropout(rate), nn.Sigmoid())
def __init__(self): super().__init__() # The parameters of a GatedBlock are: # - The representation multiplicities (scalar, vector and dim. 5 repr.) for the input and the output # - the non linearities for the scalars and the gates (None for no non-linearity) # - stride, padding... same as 2D convolution # features = [ # (4,), # As input we have a scalar field # (2, 2, 2, 2), # Note that this particular choice of multiplicities it completely arbitrary # (4, 4, 3, 3), # (4, 4, 3, 3), # (4, 4, 3, 3), # (4, 4, 3, 3), # (4, 4, 3, 3), # (512,) # scalar fields to end with fully-connected layers # ] features = [ (4, ), # As input we have a scalar field (8, ), (16, 2), (32, 4), (64, 8), (128, 16), (512, ) # scalar fields to end with fully-connected layers ] common_block_params = { 'size': 5, 'stride': 2, 'padding': 3, 'normalization': 'batch', } block_params = [ { 'activation': (None, torch.sigmoid) }, { 'activation': (F.relu, torch.sigmoid) }, { 'activation': (F.relu, torch.sigmoid) }, { 'activation': (F.relu, torch.sigmoid) }, { 'activation': (F.relu, torch.sigmoid) }, { 'activation': None }, ] assert len(block_params) + 1 == len(features) blocks = [ GatedBlock(features[i], features[i + 1], **common_block_params, **block_params[i]) for i in range(len(block_params)) ] self.sequence = nn.Sequential(*blocks, AvgSpacial(), nn.Linear(512, 256), nn.Dropout(0.3), nn.ReLU(), nn.Linear(256, 128))
def __init__( self, n_heads, n_layers, embedding_size, ffn_size, vocabulary_size, embedding=None, dropout=0.0, attention_dropout=0.0, relu_dropout=0.0, padding_idx=0, learn_positional_embeddings=False, embeddings_scale=False, reduction_type='mean', n_positions=1024, activation='relu', variant='aiayn', n_segments=0, ): super(TransformerEncoder, self).__init__() self.embedding_size = embedding_size self.ffn_size = ffn_size self.n_layers = n_layers self.n_heads = n_heads self.dim = embedding_size self.embeddings_scale = embeddings_scale self.reduction_type = reduction_type self.padding_idx = padding_idx # this is --dropout, not --relu-dropout or --attention-dropout self.dropout = nn.Dropout(p=dropout) self.variant = variant self.n_segments = n_segments self.out_dim = embedding_size assert embedding_size % n_heads == 0, \ 'Transformer embedding size must be a multiple of n_heads' # check input formats: if embedding is not None: assert ( embedding_size is None or embedding_size == embedding.weight.shape[1] ), "Embedding dim must match the embedding size." if embedding is not None: self.embeddings = embedding else: assert False assert padding_idx is not None self.embeddings = nn.Embedding( vocabulary_size, embedding_size, padding_idx=padding_idx ) nn.init.normal_(self.embeddings.weight, 0, embedding_size ** -0.5) # create the positional embeddings self.position_embeddings = nn.Embedding(n_positions, embedding_size) if not learn_positional_embeddings: create_position_codes( n_positions, embedding_size, out=self.position_embeddings.weight ) else: nn.init.normal_(self.position_embeddings.weight, 0, embedding_size ** -0.5) # embedding normalization if self.variant == 'xlm': self.norm_embeddings = nn.LayerNorm(self.dim, eps=LAYER_NORM_EPS) elif self.variant == 'aiayn': pass else: raise ValueError("Can't handle --variant {}".format(self.variant)) if self.n_segments >= 1: self.segment_embeddings = nn.Embedding(self.n_segments, self.dim) # build the model self.layers = nn.ModuleList() for _ in range(self.n_layers): self.layers.append(TransformerEncoderLayer( n_heads, embedding_size, ffn_size, attention_dropout=attention_dropout, relu_dropout=relu_dropout, dropout=dropout, variant=variant, activation=activation, ))
def __init__(self, size, dropout): super(SublayerConnection, self).__init__() self.norm = LayerNorm(size) self.dropout = nn.Dropout(dropout)
def __init__( self, n_heads, n_layers, embedding_size, ffn_size, vocabulary_size, embedding=None, dropout=0.0, attention_dropout=0.0, relu_dropout=0.0, embeddings_scale=True, learn_positional_embeddings=False, padding_idx=None, n_positions=1024, n_segments=0, variant='aiayn', activation='relu', ): super().__init__() self.embedding_size = embedding_size self.ffn_size = ffn_size self.n_layers = n_layers self.n_heads = n_heads self.dim = embedding_size self.activation = activation self.variant = variant self.embeddings_scale = embeddings_scale self.dropout = nn.Dropout(p=dropout) # --dropout self.out_dim = embedding_size assert embedding_size % n_heads == 0, \ 'Transformer embedding size must be a multiple of n_heads' self.embeddings = embedding if self.variant == 'xlm': self.norm_embeddings = nn.LayerNorm(self.dim, eps=LAYER_NORM_EPS) elif self.variant == 'aiayn': pass else: raise ValueError("Can't handle --variant {}".format(self.variant)) # create the positional embeddings self.position_embeddings = nn.Embedding(n_positions, embedding_size) if not learn_positional_embeddings: create_position_codes( n_positions, embedding_size, out=self.position_embeddings.weight ) else: nn.init.normal_(self.position_embeddings.weight, 0, embedding_size ** -0.5) # build the model self.layers = nn.ModuleList() for _ in range(self.n_layers): self.layers.append(TransformerDecoderLayer( n_heads, embedding_size, ffn_size, attention_dropout=attention_dropout, relu_dropout=relu_dropout, dropout=dropout, activation=activation, variant=variant, ))
def __init__(self, outer_nc, inner_nc, submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm3d, use_dropout=False): super(UnetSkipConnectionBlock, self).__init__() self.outermost = outermost if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm3d else: use_bias = norm_layer == nn.InstanceNorm3d downconv = nn.Conv3d(outer_nc, inner_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) downrelu = nn.LeakyReLU(0.2, True) downnorm = norm_layer(inner_nc) uprelu = nn.ReLU(True) upnorm = norm_layer(outer_nc) if outermost: upconv = nn.ConvTranspose3d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1) down = [downconv] up = [uprelu, upconv, nn.Tanh()] model = down + [submodule] + up elif innermost: upconv = nn.ConvTranspose3d(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) down = [downrelu, downconv] up = [uprelu, upconv, upnorm] model = down + up else: upconv = nn.ConvTranspose3d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) down = [downrelu, downconv, downnorm] up = [uprelu, upconv, upnorm] if use_dropout: model = down + [submodule] + up + [nn.Dropout(0.5)] else: model = down + [submodule] + up self.model = nn.Sequential(*model)
def __init__(self, input_levels, target_sizes, feat_size=64): super().__init__() self.embedders = nn.ModuleDict({ k: nn.Embedding(v.n_levels, v.emb_dim) for k, v in input_levels.items() }) for layer in self.embedders.values(): layer.weight.data = nn.init.normal_(layer.weight.data, mean=0, std=0.01) env_in_features = sum([ v.emb_dim for k, v in input_levels.items() if k not in {'teams', 'pitchers', 'managers'} ]) team_in_features = sum([ v.emb_dim for k, v in input_levels.items() if k in {'teams', 'pitchers', 'managers'} ]) env_hidden_features = int(env_in_features//2) team_hidden_features = int(team_in_features//2) out_features = feat_size self.env_encoder = nn.Sequential( nn.Dropout(0.1), nn.Linear(env_in_features, env_hidden_features), nn.BatchNorm1d(env_hidden_features), nn.ReLU(), nn.Dropout(0.1), nn.Linear(env_hidden_features, out_features), nn.BatchNorm1d(out_features), nn.ReLU(), ) self.team_encoder = nn.Sequential( nn.Dropout(0.1), nn.Linear(team_in_features, team_hidden_features), nn.BatchNorm1d(team_hidden_features), nn.ReLU(), nn.Dropout(0.1), nn.Linear(team_hidden_features, out_features), nn.BatchNorm1d(out_features), nn.ReLU(), ) in_features = out_features*3 # env output, team, opponent hidden_features = in_features//2 self.ffn = nn.Sequential( nn.Dropout(0.1), nn.Linear(in_features, hidden_features), nn.BatchNorm1d(hidden_features), nn.ReLU(), nn.Dropout(0.1), nn.Linear(hidden_features, feat_size), ) self.target_classifiers = nn.ModuleDict({ f'{k}_{side}': nn.Linear(feat_size, n_targets) for k, n_targets in target_sizes.items() for side in ['team', 'opp'] }) self.target_classifiers['Win'] = nn.Linear(feat_size, 1)
def __init__(self): super(Net, self).__init__() # Input conv block self.convblock1 = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=10, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(10), nn.Dropout(dropout_value), nn.ReLU() ) # output size : 28, RF : 3 # Conv block 1 self.convblock2 = nn.Sequential( nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(10), nn.Dropout(dropout_value), nn.ReLU() ) # output size : 28, RF : 5 # Transition block 1 self.pool1 = nn.MaxPool2d(2, 2) # output size : 12, RF : 6 self.convblock3 = nn.Sequential( nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0, bias=False), nn.BatchNorm2d(10), nn.Dropout(dropout_value), nn.ReLU() ) # output size : 12, RF : 10 # Conv block 2 self.convblock4 = nn.Sequential( nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0, bias=False), nn.BatchNorm2d(10), nn.ReLU() ) # output size : 10, RF : 14 self.convblock5 = nn.Sequential( nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0, bias=False), nn.BatchNorm2d(10), nn.Dropout(dropout_value), nn.ReLU() ) # output size : 8, RF : 18 self.convblock6 = nn.Sequential( nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=0, bias=False), nn.BatchNorm2d(16), nn.Dropout(dropout_value), nn.ReLU() ) # output size : 6, RF : 22 self.convblock7 = nn.Sequential( nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=0, bias=False), nn.BatchNorm2d(16), nn.Dropout(dropout_value), nn.ReLU() ) # output size : 4, RF : 26 # output block self.gap = nn.AvgPool2d(kernel_size=(4,4)) self.convblock8 = nn.Sequential( nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False), ) # output size : 1, RF : 26
def __init__(self, d_k, dropout=.1): super(ScaledDotProductAttention, self).__init__() self.scale_factor = np.sqrt(d_k) self.softmax = nn.Softmax(dim=-1) self.dropout = nn.Dropout(dropout)
def __init__(self, nsamples, nhiddens=None, nlatent=32, alpha=None, beta=200, dropout=0.2, cuda=False): if nlatent < 1: raise ValueError('Minimum 1 latent neuron, not {}'.format(latent)) if nsamples < 1: raise ValueError('nsamples must be > 0, not {}'.format(nsamples)) # If only 1 sample, we weigh alpha and nhiddens differently if alpha is None: alpha = 0.15 if nsamples > 1 else 0.50 if nhiddens is None: nhiddens = [512, 512] if nsamples > 1 else [256, 256] if dropout is None: dropout = 0.2 if nsamples > 1 else 0.0 if any(i < 1 for i in nhiddens): raise ValueError('Minimum 1 neuron per layer, not {}'.format(min(nhiddens))) if beta <= 0: raise ValueError('beta must be > 0, not {}'.format(beta)) if not (0 < alpha < 1): raise ValueError('alpha must be 0 < alpha < 1, not {}'.format(alpha)) if not (0 <= dropout < 1): raise ValueError('dropout must be 0 <= dropout < 1, not {}'.format(dropout)) super(VAE, self).__init__() # Initialize simple attributes self.usecuda = cuda self.nsamples = nsamples self.ntnf = 103 self.alpha = alpha self.beta = beta self.nhiddens = nhiddens self.nlatent = nlatent self.dropout = dropout # Initialize lists for holding hidden layers self.encoderlayers = _nn.ModuleList() self.encodernorms = _nn.ModuleList() self.decoderlayers = _nn.ModuleList() self.decodernorms = _nn.ModuleList() # Add all other hidden layers for nin, nout in zip([self.nsamples + self.ntnf] + self.nhiddens, self.nhiddens): self.encoderlayers.append(_nn.Linear(nin, nout)) self.encodernorms.append(_nn.BatchNorm1d(nout)) # Latent layers self.mu = _nn.Linear(self.nhiddens[-1], self.nlatent) self.logsigma = _nn.Linear(self.nhiddens[-1], self.nlatent) # Add first decoding layer for nin, nout in zip([self.nlatent] + self.nhiddens[::-1], self.nhiddens[::-1]): self.decoderlayers.append(_nn.Linear(nin, nout)) self.decodernorms.append(_nn.BatchNorm1d(nout)) # Reconstruction (output) layer self.outputlayer = _nn.Linear(self.nhiddens[0], self.nsamples + self.ntnf) # Activation functions self.relu = _nn.LeakyReLU() self.softplus = _nn.Softplus() self.dropoutlayer = _nn.Dropout(p=self.dropout) if cuda: self.cuda()
if args.arch == 'vgg13': model = models.vgg13(pretrained=True) no_input_layer = 25088 else: model = models.alexnet(pretrained=True) no_input_layer = 9216 for param in model.parameters(): param.requires_grad = False if args.hidden_units != None: classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(no_input_layer, args.hidden_units)), ('relu1', nn.ReLU()), ('dropout1', nn.Dropout(p=0.3)), ('fc2', nn.Linear(args.hidden_units, 2048)), ('relu2', nn.ReLU()), ('dropout2', nn.Dropout(p=0.3)), ('fc3', nn.Linear(2048, 102)), ('output', nn.LogSoftmax(dim=1)) ])) else: classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(no_input_layer, 4096)), ('relu1', nn.ReLU()), ('dropout1', nn.Dropout(p=0.3)), ('fc2', nn.Linear(4096, 2048)), ('relu2', nn.ReLU()), ('dropout2', nn.Dropout(p=0.3)),
def __init__(self, config): super(BertNeuralNet, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.linear_out = nn.Linear(config.hidden_size, 1) self.apply(self.init_bert_weights)
def __init__(self, config): super().__init__() self.dropout = nn.Dropout(config.classifier_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels)
def __init__(self, encoder, decoder, dropout=0): nn.Module.__init__(self) self.encoder = encoder self.decoder = decoder self.dropout = nn.Dropout(dropout)
def __init__(self, args): super(traphicNet, self).__init__() ## Unpack arguments self.args = args ## Use gpu flag self.use_cuda = args['cuda'] # Flag for maneuver based (True) vs uni-modal decoder (False) self.use_maneuvers = args['use_maneuvers'] # Flag for train mode (True) vs test-mode (False) self.train_flag = True ## Sizes of network layers self.dropout_prob = args['dropout_prob'] self.encoder_size = args['encoder_size'] self.decoder_size = args['decoder_size'] self.in_length = args['in_length'] self.out_length = args['out_length'] self.grid_size = args['grid_size'] self.upp_grid_size = args['upp_grid_size'] self.soc_conv_depth = args['soc_conv_depth'] self.conv_3x1_depth = args['conv_3x1_depth'] self.dyn_embedding_size = args['dyn_embedding_size'] self.input_embedding_size = args['input_embedding_size'] self.num_lat_classes = args['num_lat_classes'] self.num_lon_classes = args['num_lon_classes'] self.soc_embedding_size = (( (args['grid_size'][0] - 4) + 1) // 2) * self.conv_3x1_depth self.upp_soc_embedding_size = (( (args['upp_grid_size'][0] - 4) + 1) // 2) * self.conv_3x1_depth self.ours = args['ours'] ## Define network weights # Input embedding layer self.ip_emb = torch.nn.Linear(2, self.input_embedding_size) # Behavioral Modification 3: Extra Inputs if self.ours: self.ip_emb_vel = torch.nn.Linear(2, self.input_embedding_size) # Behavioral Modification 3: Extra Inputs if self.ours: self.ip_emb_nc = torch.nn.Linear(2, self.input_embedding_size) # Encoder LSTM self.enc_lstm = torch.nn.LSTM(self.input_embedding_size, self.encoder_size, 1) # Vehicle dynamics embedding self.dyn_emb = torch.nn.Linear(self.encoder_size, self.dyn_embedding_size) # Batch norm self.bn_conv = torch.nn.BatchNorm2d(self.encoder_size) #Behavioral Modification 1: Weighting the neighbors' hidden vectors after the LSTM stage if self.ours: self.beh_1 = torch.nn.Linear(self.encoder_size, self.encoder_size) # Convolutional social pooling layer and social embedding layer self.soc_conv = torch.nn.Conv2d(self.encoder_size, self.soc_conv_depth, 3) self.conv_3x1 = torch.nn.Conv2d(self.soc_conv_depth, self.conv_3x1_depth, (3, 1)) self.soc_maxpool = torch.nn.MaxPool2d((2, 1), padding=(1, 0)) # FC social pooling layer (for comparison): # self.soc_fc = torch.nn.Linear(self.soc_conv_depth * self.grid_size[0] * self.grid_size[1], (((args['grid_size'][0]-4)+1)//2)*self.conv_3x1_depth) # Decoder LSTM if self.use_maneuvers: if self.ours: self.dec_lstm = torch.nn.LSTM( self.upp_soc_embedding_size + self.soc_embedding_size + self.dyn_embedding_size + self.num_lat_classes + self.num_lon_classes, self.decoder_size) else: self.dec_lstm = torch.nn.LSTM( self.soc_embedding_size + self.dyn_embedding_size + self.num_lat_classes + self.num_lon_classes, self.decoder_size) else: if self.ours: self.dec_lstm = torch.nn.LSTM(self.upp_soc_embedding_size + self.soc_embedding_size + self.dyn_embedding_size, self.decoder_size, dropout=self.dropout_prob) else: self.dec_lstm = torch.nn.LSTM(self.soc_embedding_size + self.dyn_embedding_size, self.decoder_size, dropout=self.dropout_prob) #batch norm # self.bn_dec = torch.nn.BatchNorm1d(self.decoder_size) #batch norm # self.bn_enc = torch.nn.BatchNorm1d(self.decoder_size) self.bnupp_soc_enc = torch.nn.BatchNorm1d(self.input_embedding_size) self.bn_soc_enc = torch.nn.BatchNorm1d(self.soc_embedding_size) self.bn_hist_enc = torch.nn.BatchNorm1d(self.upp_soc_embedding_size) # Output layers: self.op = torch.nn.Linear(self.decoder_size, 5) #batchnorm self.bn_lin = torch.nn.BatchNorm1d(self.out_length) # Dropout self.dropout = nn.Dropout(self.dropout_prob) if self.ours: self.op_lat = torch.nn.Linear( self.upp_soc_embedding_size + self.soc_embedding_size + self.dyn_embedding_size, self.num_lat_classes) self.op_lon = torch.nn.Linear( self.upp_soc_embedding_size + self.soc_embedding_size + self.dyn_embedding_size, self.num_lon_classes) else: self.op_lat = torch.nn.Linear( self.soc_embedding_size + self.dyn_embedding_size, self.num_lat_classes) self.op_lon = torch.nn.Linear( self.soc_embedding_size + self.dyn_embedding_size, self.num_lon_classes) # Activations: # self.leaky_relu = torch.nn.LeakyReLU(0.1) self.leaky_relu = torch.nn.ELU() self.relu = torch.nn.ReLU() self.softmax = torch.nn.Softmax(dim=1)