def __init__(self, num_classes=1001, stem_filters=96, penultimate_filters=4032, filters_multiplier=2):
        super(NASNetALarge, self).__init__()
        self.num_classes = num_classes
        self.stem_filters = stem_filters
        self.penultimate_filters = penultimate_filters
        self.filters_multiplier = filters_multiplier

        filters = self.penultimate_filters // 24
        # 24 is default value for the architecture

        self.conv0 = nn.Sequential()
        self.conv0.add_module('conv', nn.Conv2d(in_channels=3, out_channels=self.stem_filters, kernel_size=3, padding=0, stride=2,
                                                bias=False))
        self.conv0.add_module('bn', nn.BatchNorm2d(
            self.stem_filters, eps=0.001, momentum=0.1, affine=True))

        self.cell_stem_0 = CellStem0(
            self.stem_filters, num_filters=filters // (filters_multiplier ** 2))
        self.cell_stem_1 = CellStem1(
            self.stem_filters, num_filters=filters // filters_multiplier)

        self.cell_0 = FirstCell(in_channels_left=filters, out_channels_left=filters//2,
                                in_channels_right=2*filters, out_channels_right=filters)
        self.cell_1 = NormalCell(in_channels_left=2*filters, out_channels_left=filters,
                                 in_channels_right=6*filters, out_channels_right=filters)
        self.cell_2 = NormalCell(in_channels_left=6*filters, out_channels_left=filters,
                                 in_channels_right=6*filters, out_channels_right=filters)
        self.cell_3 = NormalCell(in_channels_left=6*filters, out_channels_left=filters,
                                 in_channels_right=6*filters, out_channels_right=filters)
        self.cell_4 = NormalCell(in_channels_left=6*filters, out_channels_left=filters,
                                 in_channels_right=6*filters, out_channels_right=filters)
        self.cell_5 = NormalCell(in_channels_left=6*filters, out_channels_left=filters,
                                 in_channels_right=6*filters, out_channels_right=filters)

        self.reduction_cell_0 = ReductionCell0(in_channels_left=6*filters, out_channels_left=2*filters,
                                               in_channels_right=6*filters, out_channels_right=2*filters)

        self.cell_6 = FirstCell(in_channels_left=6*filters, out_channels_left=filters,
                                in_channels_right=8*filters, out_channels_right=2*filters)
        self.cell_7 = NormalCell(in_channels_left=8*filters, out_channels_left=2*filters,
                                 in_channels_right=12*filters, out_channels_right=2*filters)
        self.cell_8 = NormalCell(in_channels_left=12*filters, out_channels_left=2*filters,
                                 in_channels_right=12*filters, out_channels_right=2*filters)
        self.cell_9 = NormalCell(in_channels_left=12*filters, out_channels_left=2*filters,
                                 in_channels_right=12*filters, out_channels_right=2*filters)
        self.cell_10 = NormalCell(in_channels_left=12*filters, out_channels_left=2*filters,
                                  in_channels_right=12*filters, out_channels_right=2*filters)
        self.cell_11 = NormalCell(in_channels_left=12*filters, out_channels_left=2*filters,
                                  in_channels_right=12*filters, out_channels_right=2*filters)

        self.reduction_cell_1 = ReductionCell1(in_channels_left=12*filters, out_channels_left=4*filters,
                                               in_channels_right=12*filters, out_channels_right=4*filters)

        self.cell_12 = FirstCell(in_channels_left=12*filters, out_channels_left=2*filters,
                                 in_channels_right=16*filters, out_channels_right=4*filters)
        self.cell_13 = NormalCell(in_channels_left=16*filters, out_channels_left=4*filters,
                                  in_channels_right=24*filters, out_channels_right=4*filters)
        self.cell_14 = NormalCell(in_channels_left=24*filters, out_channels_left=4*filters,
                                  in_channels_right=24*filters, out_channels_right=4*filters)
        self.cell_15 = NormalCell(in_channels_left=24*filters, out_channels_left=4*filters,
                                  in_channels_right=24*filters, out_channels_right=4*filters)
        self.cell_16 = NormalCell(in_channels_left=24*filters, out_channels_left=4*filters,
                                  in_channels_right=24*filters, out_channels_right=4*filters)
        self.cell_17 = NormalCell(in_channels_left=24*filters, out_channels_left=4*filters,
                                  in_channels_right=24*filters, out_channels_right=4*filters)

        self.relu = nn.ReLU()
        self.avg_pool = nn.AvgPool2d(11, stride=1, padding=0)
        self.dropout = nn.Dropout()
        self.last_linear = nn.Linear(24*filters, self.num_classes)
    def __init__(self,
                 n_token,
                 n_layer,
                 n_head,
                 d_model,
                 d_head,
                 d_inner,
                 dropout,
                 dropatt,
                 dtype,
                 tie_weight=True,
                 d_embed=None,
                 div_val=1,
                 tie_projs=[False],
                 pre_lnorm=False,
                 tgt_len=None,
                 ext_len=None,
                 mem_len=None,
                 cutoffs=[],
                 adapt_inp=False,
                 same_length=False,
                 attn_type=0,
                 clamp_len=-1,
                 sample_softmax=-1):
        super(MemTransformerLM, self).__init__()
        self.n_token = n_token

        d_embed = d_model if d_embed is None else d_embed
        self.d_embed = d_embed
        self.d_model = d_model
        self.n_head = n_head
        self.d_head = d_head
        self.dtype = dtype

        self.word_emb = AdaptiveEmbedding(n_token,
                                          d_embed,
                                          d_model,
                                          cutoffs,
                                          div_val=div_val,
                                          dtype=dtype)

        self.drop = nn.Dropout(dropout)

        self.tie_weight = tie_weight
        self.tie_projs = tie_projs
        self.div_val = div_val

        self.n_layer = n_layer

        self.tgt_len = tgt_len
        self.mem_len = mem_len
        self.ext_len = ext_len
        self.max_klen = tgt_len + ext_len + mem_len

        self.attn_type = attn_type
        if attn_type != 0:
            raise RuntimeError(
                'TorchScripted model supports only attn_type == 0')

        self.layers = nn.ModuleList()
        # the default attention
        if attn_type == 0:
            for i in range(n_layer):
                self.layers.append(
                    RelPartialLearnableDecoderLayer(n_head,
                                                    d_model,
                                                    d_head,
                                                    d_inner,
                                                    dropout,
                                                    tgt_len=tgt_len,
                                                    ext_len=ext_len,
                                                    mem_len=mem_len,
                                                    dropatt=dropatt,
                                                    pre_lnorm=pre_lnorm))

        self.sample_softmax = sample_softmax
        # use sampled softmax
        if sample_softmax > 0:
            self.out_layer = nn.Linear(d_model, n_token)
            self.tie_weight = tie_weight
            self.sampler = LogUniformSampler(n_token, sample_softmax)

        # use adaptive softmax (including standard softmax)
        else:
            if tie_weight:
                emb_layers = [i.weight for i in self.word_emb.emb_layers]
            else:
                emb_layers = None

            emb_projs = self.word_emb.emb_projs

            self.crit = ProjectedAdaptiveLogSoftmax(
                n_token,
                d_embed,
                d_model,
                cutoffs,
                div_val=div_val,
                dtype=dtype,
                tie_projs=tie_projs,
                out_projs=emb_projs,
                out_layers_weights=emb_layers)

        self.same_length = same_length
        self.clamp_len = clamp_len

        self._create_params()
Пример #3
0
    def __init__(self, hps, *_):
        super(BiLSTMTagger, self).__init__()

        batch_size = hps['batch_size']
        lstm_hidden_dim = hps['sent_hdim']
        sent_embedding_dim_DEP = 2*hps['sent_edim'] + 1*hps['pos_edim'] + 1
        sent_embedding_dim_SRL = 3 * hps['sent_edim'] + 1 * hps['pos_edim'] + 1
        ## for the region mark
        role_embedding_dim = hps['role_edim']
        frame_embedding_dim = role_embedding_dim
        vocab_size = hps['vword']

        self.tagset_size = hps['vbio']
        self.pos_size = hps['vpos']
        self.dep_size = hps['vdep']
        self.frameset_size = hps['vframe']
        self.num_layers = hps['rec_layers']
        self.batch_size = batch_size
        self.hidden_dim = lstm_hidden_dim
        self.word_emb_dim = hps['sent_edim']
        self.specific_dep_size = hps['svdep']

        self.word_embeddings_SRL = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim'])
        self.pos_embeddings = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.pos_embeddings_DEP = nn.Embedding(self.pos_size, hps['pos_edim'])
        self.p_lemma_embeddings = nn.Embedding(self.frameset_size, hps['sent_edim'])
        self.dep_embeddings = nn.Embedding(self.dep_size, self.pos_size)
        #self.lr_dep_embeddings = nn.Embedding(self.lr_dep_size, hps[])

        self.word_fixed_embeddings = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_fixed_embeddings.weight.data.copy_(torch.from_numpy(hps['word_embeddings']))

        self.word_fixed_embeddings_DEP = nn.Embedding(vocab_size, hps['sent_edim'])
        self.word_fixed_embeddings_DEP.weight.data.copy_(torch.from_numpy(hps['word_embeddings']))


        self.role_embeddings = nn.Embedding(self.tagset_size, role_embedding_dim)
        self.frame_embeddings = nn.Embedding(self.frameset_size, frame_embedding_dim)


        self.hidden2tag = nn.Linear(4*lstm_hidden_dim, 2*lstm_hidden_dim)
        self.MLP = nn.Linear(2*lstm_hidden_dim, self.dep_size)
        self.tag2hidden = nn.Linear(self.dep_size, self.pos_size)

        self.SRL_input_dropout = nn.Dropout(p=0.3)
        self.DEP_input_dropout = nn.Dropout(p=0.5)
        self.hidden_state_dropout = nn.Dropout(p=0.3)
        self.label_dropout = nn.Dropout(p=0.5)
        #self.use_dropout = nn.Dropout(p=0.2)



        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.num_layers = 1
        self.BiLSTM_0 = nn.LSTM(input_size=sent_embedding_dim_DEP, hidden_size=lstm_hidden_dim, batch_first=True,
                              bidirectional=True, num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_0.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_0.all_weights[1][1])

        self.num_layers = 1
        self.BiLSTM_1 = nn.LSTM(input_size=lstm_hidden_dim * 2, hidden_size=lstm_hidden_dim, batch_first=True,
                                bidirectional=True, num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_1.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_1.all_weights[1][1])


        self.num_layers = 4
        self.BiLSTM_SRL = nn.LSTM(input_size=sent_embedding_dim_SRL , hidden_size=lstm_hidden_dim, batch_first=True,
                                    bidirectional=True, num_layers=self.num_layers)

        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[0][1])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][0])
        init.orthogonal_(self.BiLSTM_SRL.all_weights[1][1])


        # non-linear map to role embedding
        self.role_map = nn.Linear(in_features=role_embedding_dim * 2, out_features=self.hidden_dim * 4)

        # Init hidden state
        self.hidden = self.init_hidden_spe()
        self.hidden_2 = self.init_hidden_spe()
        self.hidden_3 = self.init_hidden_spe()
        self.hidden_4 = self.init_hidden_share()
Пример #4
0
 def __init__(self, in_features, num_classes, drop=0.0):
     super(Classifier, self).__init__()
     self.add_module('drop', nn.Dropout(drop))
     self.add_module('lin', nn.Linear(in_features, num_classes))
Пример #5
0
    def __init__(self,
                 block,
                 layers,
                 groups,
                 reduction,
                 dropout_p=0.2,
                 inplanes=128,
                 input_3x3=True,
                 downsample_kernel_size=3,
                 downsample_padding=1,
                 num_classes=1000):
        """
        Parameters
        ----------
        block (nn.Module): Bottleneck class.
            - For SENet154: SEBottleneck
            - For SE-ResNet models: SEResNetBottleneck
            - For SE-ResNeXt models:  SEResNeXtBottleneck
        layers (list of ints): Number of residual blocks for 4 layers of the
            network (layer1...layer4).
        groups (int): Number of groups for the 3x3 convolution in each
            bottleneck block.
            - For SENet154: 64
            - For SE-ResNet models: 1
            - For SE-ResNeXt models:  32
        reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
            - For all models: 16
        dropout_p (float or None): Drop probability for the Dropout layer.
            If `None` the Dropout layer is not used.
            - For SENet154: 0.2
            - For SE-ResNet models: None
            - For SE-ResNeXt models: None
        inplanes (int):  Number of input channels for layer1.
            - For SENet154: 128
            - For SE-ResNet models: 64
            - For SE-ResNeXt models: 64
        input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
            a single 7x7 convolution in layer0.
            - For SENet154: True
            - For SE-ResNet models: False
            - For SE-ResNeXt models: False
        downsample_kernel_size (int): Kernel size for downsampling convolutions
            in layer2, layer3 and layer4.
            - For SENet154: 3
            - For SE-ResNet models: 1
            - For SE-ResNeXt models: 1
        downsample_padding (int): Padding for downsampling convolutions in
            layer2, layer3 and layer4.
            - For SENet154: 1
            - For SE-ResNet models: 0
            - For SE-ResNeXt models: 0
        num_classes (int): Number of outputs in `last_linear` layer.
            - For all models: 1000
        """
        super(SENet, self).__init__()
        self.inplanes = inplanes
        if input_3x3:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
                                    bias=False)),
                ('bn1', nn.BatchNorm2d(64)),
                ('relu1', nn.ReLU(inplace=True)),
                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn2', nn.BatchNorm2d(64)),
                ('relu2', nn.ReLU(inplace=True)),
                ('conv3',
                 nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)),
                ('bn3', nn.BatchNorm2d(inplanes)),
                ('relu3', nn.ReLU(inplace=True)),
            ]
        else:
            layer0_modules = [
                ('conv1',
                 nn.Conv2d(3,
                           inplanes,
                           kernel_size=7,
                           stride=2,
                           padding=3,
                           bias=False)),
                ('bn1', nn.BatchNorm2d(inplanes)),
                ('relu1', nn.ReLU(inplace=True)),
            ]
        # To preserve compatibility with Caffe weights `ceil_mode=True`
        # is used instead of `padding=1`.
        layer0_modules.append(('pool', nn.MaxPool2d(3,
                                                    stride=2,
                                                    ceil_mode=True)))
        self.conv_2_img = nn.Conv2d(inplanes,
                                    3,
                                    kernel_size=1,
                                    stride=1,
                                    padding=0,
                                    bias=False)
        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
        self.layer1 = self._make_layer(block,
                                       planes=64,
                                       blocks=layers[0],
                                       groups=groups,
                                       reduction=reduction,
                                       downsample_kernel_size=1,
                                       downsample_padding=0)
        self.layer2 = self._make_layer(
            block,
            planes=128,
            blocks=layers[1],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding)
        self.layer3 = self._make_layer(
            block,
            planes=256,
            blocks=layers[2],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding)
        self.layer4 = self._make_layer(
            block,
            planes=512,
            blocks=layers[3],
            stride=1,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding)

        num_feautures = 512 * block.expansion
        num_inner = num_feautures // 4
        self.fa_layer = FAModule(num_feautures, num_inner)

        self.cls_head = nn.Sequential(conv3x3(num_feautures + num_inner, 512),
                                      nn.BatchNorm2d(512), nn.ReLU(),
                                      nn.Dropout2d(0.1))

        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
        self.mu = nn.Linear(512, num_classes)
 def __init__(self):
     super(CNN, self).__init__()
     self.conv1 = nn.Sequential(
         nn.Conv2d(3, 64, 3, padding=1, bias=False),  #layer0
         nn.BatchNorm2d(
             64),  # batch norm is added because dataset is changed
         nn.ReLU(inplace=True),
     )
     self.conv2 = nn.Sequential(
         nn.Conv2d(64, 64, 3, padding=1, bias=False),  #layer3
         nn.BatchNorm2d(64),
         nn.ReLU(inplace=True),
     )
     self.maxpool1 = nn.Sequential(
         nn.MaxPool2d(2, 2),  # 16*16* 64
     )
     self.conv3 = nn.Sequential(
         nn.Conv2d(64, 128, 3, padding=1, bias=False),  #layer7
         nn.BatchNorm2d(128),
         nn.ReLU(inplace=True),
     )
     self.conv4 = nn.Sequential(
         nn.Conv2d(128, 128, 3, padding=1, bias=False),  #layer10
         nn.BatchNorm2d(128),
         nn.ReLU(inplace=True),
     )
     self.maxpool2 = nn.Sequential(
         nn.MaxPool2d(2, 2),  # 8*8*128
     )
     self.conv5 = nn.Sequential(
         nn.Conv2d(128, 256, 3, padding=1, bias=False),  #layer14
         nn.BatchNorm2d(256),
         nn.ReLU(inplace=True),
     )
     self.conv6 = nn.Sequential(
         nn.Conv2d(256, 256, 3, padding=1, bias=False),  #layer17
         nn.BatchNorm2d(256),
         nn.ReLU(inplace=True),
     )
     self.conv7 = nn.Sequential(
         nn.Conv2d(256, 256, 3, padding=1, bias=False),  #layer20
         nn.BatchNorm2d(256),
         nn.ReLU(inplace=True),
     )
     self.maxpool3 = nn.Sequential(
         nn.MaxPool2d(2, 2),  # 4*4*256
     )
     self.conv8 = nn.Sequential(
         nn.Conv2d(256, 512, 3, padding=1, bias=False),  #layer24
         nn.BatchNorm2d(512),
         nn.ReLU(inplace=True),
     )
     self.conv9 = nn.Sequential(
         nn.Conv2d(512, 512, 3, padding=1, bias=False),  #layer27
         nn.BatchNorm2d(512),
         nn.ReLU(inplace=True),
     )
     self.conv10 = nn.Sequential(
         nn.Conv2d(512, 512, 3, padding=1, bias=False),  #layer30
         nn.BatchNorm2d(512),
         nn.ReLU(inplace=True),
     )
     self.maxpool4 = nn.Sequential(
         nn.MaxPool2d(2, 2),  # 2*2*512
     )
     self.conv11 = nn.Sequential(
         nn.Conv2d(512, 512, 3, padding=1, bias=False),  #layer34
         nn.BatchNorm2d(512),
         nn.ReLU(inplace=True),
     )
     self.conv12 = nn.Sequential(
         nn.Conv2d(512, 512, 3, padding=1, bias=False),  #layer37
         nn.BatchNorm2d(512),
         nn.ReLU(inplace=True),
     )
     self.conv13 = nn.Sequential(
         nn.Conv2d(512, 512, 3, padding=1, bias=False),  #layer40
         nn.BatchNorm2d(512),
         nn.ReLU(inplace=True),
     )
     self.maxpool5 = nn.Sequential(nn.MaxPool2d(2, 2)  # 1*1*512
                                   )
     self.fc1 = nn.Sequential(
         nn.Dropout(p=0.5),
         nn.Linear(512, 512, bias=False),  #fc_layer1
         nn.ReLU(inplace=True),
     )
     self.fc2 = nn.Sequential(
         nn.Dropout(p=0.5),
         nn.Linear(512, 512, bias=False),  #fc_layer4
         nn.ReLU(inplace=True),
     )
     self.fc3 = nn.Sequential(nn.Linear(512, 10, bias=False)  #fc_layer6
                              )
Пример #7
0
 def __init__(self, d_model, d_ff, dropout=0.1):
     super(PositionwiseFeedForward, self).__init__()
     self.w_1 = nn.Linear(d_model, d_ff)
     self.w_2 = nn.Linear(d_ff, d_model)
     self.dropout = nn.Dropout(dropout)
Пример #8
0

# ### VGG-16 and Resnet-18
# Now that you have created the dataset we can use it for training and testing neural networks. VGG-16 and Resnet-18 are both well-known deep-net architectures. VGG-16 is named as such since it has 16 layers in total (13 convolution and 3 fully-connected). Resnet-18 on the other hand is a Resnet architecture that uses skip-connections. PyTorch provides pre-trained models of both these architectures and we shall be using them directly. If you are interested in knowing how they have been defined do take a look at the source, [VGG](https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py), [Resnet](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py)

# In[6]:


vgg16 = models.vgg16(pretrained=True)
resnet18 = models.resnet18(pretrained=True)

# Code to change the last layers so that they only have 10 classes as output
vgg16.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 4096),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(4096, 4096),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(4096, 10),
)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 10)

# Add code for using CUDA here if it is available
use_gpu = False
if(torch.cuda.is_available()):
    use_gpu = True
    vgg16.cuda()
    resnet18.cuda()

Пример #9
0
 def __init__(self):
     super(AEE, self).__init__()
     self.EnE = torch.nn.Sequential(nn.Linear(IE_dim, h_dim),
                                    nn.BatchNorm1d(h_dim),
                                    nn.ReLU(), nn.Dropout())
Пример #10
0
    if is_training:
        H1 = dropout(H1, drop_prob1)
    H2 = (torch.matmul(H1, W2) + b2).relu()
    if is_training:
        H2 = dropout(H2, drop_prob2)

    return torch.matmul(H2, W3) + b3

num_epochs, lr, batch_size, = 5, 100.0, 256
loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
#d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr) 训练和测试模型

#简介实现
net = nn.Sequential(
    d2l.FlattenLayer(),
    nn.Linear(num_inputs, num_hiddens1),
    nn.ReLU(),
    nn.Dropout(drop_prob1),
    nn.Linear(num_hiddens1, num_hiddens2),
    nn.ReLU(),
    nn.Dropout(drop_prob2),
    nn.Linear(num_hiddens2, num_outputs)
)

for param in net.parameters():
    nn.init.normal_(param, mean=0, std=0.01)

optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
Пример #11
0
 def __init__(self, hidden_size, drop=0.0):
     super().__init__()
     self.fc = nn.Sequential(nn.Linear(hidden_size, 1), nn.ReLU())
     self.dropout = nn.Dropout(drop)
Пример #12
0
    def __init__(
        self,
        num_features,
        embeddingsize,
        hiddensize,
        dropout=0,
        numsoftmax=1,
        shared_weight=None,
        padding_idx=-1,
    ):
        """Initialize output layer.

        :param num_features:  number of candidates to rank
        :param hiddensize:    (last) dimension of the input vectors
        :param embeddingsize: (last) dimension of the candidate vectors
        :param numsoftmax:   (default 1) number of softmaxes to calculate.
                              see arxiv.org/abs/1711.03953 for more info.
                              increasing this slows down computation but can
                              add more expressivity to the embeddings.
        :param shared_weight: (num_features x esz) vector of weights to use as
                              the final linear layer's weight matrix. default
                              None starts with a new linear layer.
        :param padding_idx:   model should output a large negative number for
                              score at this index. if set to -1 (default),
                              this is disabled. if >= 0, subtracts one from
                              num_features and always outputs -1e20 at this
                              index. only used when shared_weight is not None.
                              setting this param helps protect gradient from
                              entering shared embedding matrices.
        """
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        self.padding_idx = padding_idx if shared_weight is not None else -1

        # embedding to scores
        if shared_weight is None:
            # just a regular linear layer
            self.e2s = nn.Linear(embeddingsize, num_features, bias=True)
        else:
            # use shared weights and a bias layer instead
            if padding_idx == 0:
                num_features -= 1  # don't include padding
                shared_weight = shared_weight.narrow(0, 1, num_features)
            elif padding_idx > 0:
                raise RuntimeError('nonzero pad_idx not yet implemented')
            self.weight = Parameter(shared_weight)
            self.bias = Parameter(torch.Tensor(num_features))
            self.reset_parameters()
            self.e2s = lambda x: F.linear(x, self.weight, self.bias)

        self.numsoftmax = numsoftmax
        if numsoftmax > 1:
            self.esz = embeddingsize
            self.softmax = nn.Softmax(dim=1)
            self.prior = nn.Linear(hiddensize, numsoftmax, bias=False)
            self.latent = nn.Linear(hiddensize, numsoftmax * embeddingsize)
            self.activation = nn.Tanh()
        else:
            # rnn output to embedding
            if hiddensize != embeddingsize:
                # learn projection to correct dimensions
                self.o2e = nn.Linear(hiddensize, embeddingsize, bias=True)
            else:
                # no need for any transformation here
                self.o2e = lambda x: x
Пример #13
0
 def __init__(self, key_size, query_size, units, dropout, **kwargs):
     super(MLPAttention, self).__init__(**kwargs)
     self.W_k = nn.Linear(key_size, units, bias=False)
     self.W_q = nn.Linear(query_size, units, bias=False)
     self.v = nn.Linear(units, 1, bias=False)
     self.dropout = nn.Dropout(dropout)
Пример #14
0
 def __init__(self, dropout, **kwargs):
     super(DotProductAttention, self).__init__(**kwargs)
     self.dropout = nn.Dropout(dropout)
Пример #15
0
 def __init__(self, params):
     super(FeedForward, self).__init__()
     self.fc1 = nn.Linear(params.hidden_dim, params.ffn_dim)
     self.fc2 = nn.Linear(params.ffn_dim, params.hidden_dim)
     self.dropout = nn.Dropout(params.dropout)
Пример #16
0
 def __init__(self):
     super(Classifier, self).__init__()
     self.FC = torch.nn.Sequential(nn.Linear(Z_in, 1),
                                   nn.Dropout(rate), nn.Sigmoid())
Пример #17
0
    def __init__(self):
        super().__init__()

        # The parameters of a GatedBlock are:
        # - The representation multiplicities (scalar, vector and dim. 5 repr.) for the input and the output
        # - the non linearities for the scalars and the gates (None for no non-linearity)
        # - stride, padding... same as 2D convolution
        # features = [
        #     (4,),  # As input we have a scalar field
        #     (2, 2, 2, 2),  # Note that this particular choice of multiplicities it completely arbitrary
        #     (4, 4, 3, 3),
        #     (4, 4, 3, 3),
        #     (4, 4, 3, 3),
        #     (4, 4, 3, 3),
        #     (4, 4, 3, 3),
        #     (512,)  # scalar fields to end with fully-connected layers
        # ]

        features = [
            (4, ),  # As input we have a scalar field
            (8, ),
            (16, 2),
            (32, 4),
            (64, 8),
            (128, 16),
            (512, )  # scalar fields to end with fully-connected layers
        ]

        common_block_params = {
            'size': 5,
            'stride': 2,
            'padding': 3,
            'normalization': 'batch',
        }

        block_params = [
            {
                'activation': (None, torch.sigmoid)
            },
            {
                'activation': (F.relu, torch.sigmoid)
            },
            {
                'activation': (F.relu, torch.sigmoid)
            },
            {
                'activation': (F.relu, torch.sigmoid)
            },
            {
                'activation': (F.relu, torch.sigmoid)
            },
            {
                'activation': None
            },
        ]

        assert len(block_params) + 1 == len(features)

        blocks = [
            GatedBlock(features[i], features[i + 1], **common_block_params,
                       **block_params[i]) for i in range(len(block_params))
        ]

        self.sequence = nn.Sequential(*blocks, AvgSpacial(),
                                      nn.Linear(512, 256), nn.Dropout(0.3),
                                      nn.ReLU(), nn.Linear(256, 128))
Пример #18
0
    def __init__(
        self,
        n_heads,
        n_layers,
        embedding_size,
        ffn_size,
        vocabulary_size,
        embedding=None,
        dropout=0.0,
        attention_dropout=0.0,
        relu_dropout=0.0,
        padding_idx=0,
        learn_positional_embeddings=False,
        embeddings_scale=False,
        reduction_type='mean',
        n_positions=1024,
        activation='relu',
        variant='aiayn',
        n_segments=0,
    ):
        super(TransformerEncoder, self).__init__()

        self.embedding_size = embedding_size
        self.ffn_size = ffn_size
        self.n_layers = n_layers
        self.n_heads = n_heads
        self.dim = embedding_size
        self.embeddings_scale = embeddings_scale
        self.reduction_type = reduction_type
        self.padding_idx = padding_idx
        # this is --dropout, not --relu-dropout or --attention-dropout
        self.dropout = nn.Dropout(p=dropout)
        self.variant = variant
        self.n_segments = n_segments

        self.out_dim = embedding_size
        assert embedding_size % n_heads == 0, \
            'Transformer embedding size must be a multiple of n_heads'

        # check input formats:
        if embedding is not None:
            assert (
                embedding_size is None or embedding_size == embedding.weight.shape[1]
            ), "Embedding dim must match the embedding size."

        if embedding is not None:
            self.embeddings = embedding
        else:
            assert False
            assert padding_idx is not None
            self.embeddings = nn.Embedding(
                vocabulary_size, embedding_size, padding_idx=padding_idx
            )
            nn.init.normal_(self.embeddings.weight, 0, embedding_size ** -0.5)

        # create the positional embeddings
        self.position_embeddings = nn.Embedding(n_positions, embedding_size)
        if not learn_positional_embeddings:
            create_position_codes(
                n_positions, embedding_size, out=self.position_embeddings.weight
            )
        else:
            nn.init.normal_(self.position_embeddings.weight, 0, embedding_size ** -0.5)

        # embedding normalization
        if self.variant == 'xlm':
            self.norm_embeddings = nn.LayerNorm(self.dim, eps=LAYER_NORM_EPS)
        elif self.variant == 'aiayn':
            pass
        else:
            raise ValueError("Can't handle --variant {}".format(self.variant))

        if self.n_segments >= 1:
            self.segment_embeddings = nn.Embedding(self.n_segments, self.dim)

        # build the model
        self.layers = nn.ModuleList()
        for _ in range(self.n_layers):
            self.layers.append(TransformerEncoderLayer(
                n_heads, embedding_size, ffn_size,
                attention_dropout=attention_dropout,
                relu_dropout=relu_dropout,
                dropout=dropout,
                variant=variant,
                activation=activation,
            ))
Пример #19
0
 def __init__(self, size, dropout):
     super(SublayerConnection, self).__init__()
     self.norm = LayerNorm(size)
     self.dropout = nn.Dropout(dropout)
Пример #20
0
    def __init__(
        self,
        n_heads,
        n_layers,
        embedding_size,
        ffn_size,
        vocabulary_size,
        embedding=None,
        dropout=0.0,
        attention_dropout=0.0,
        relu_dropout=0.0,
        embeddings_scale=True,
        learn_positional_embeddings=False,
        padding_idx=None,
        n_positions=1024,
        n_segments=0,
        variant='aiayn',
        activation='relu',
    ):
        super().__init__()
        self.embedding_size = embedding_size
        self.ffn_size = ffn_size
        self.n_layers = n_layers
        self.n_heads = n_heads
        self.dim = embedding_size
        self.activation = activation
        self.variant = variant
        self.embeddings_scale = embeddings_scale
        self.dropout = nn.Dropout(p=dropout)  # --dropout

        self.out_dim = embedding_size
        assert embedding_size % n_heads == 0, \
            'Transformer embedding size must be a multiple of n_heads'

        self.embeddings = embedding

        if self.variant == 'xlm':
            self.norm_embeddings = nn.LayerNorm(self.dim, eps=LAYER_NORM_EPS)
        elif self.variant == 'aiayn':
            pass
        else:
            raise ValueError("Can't handle --variant {}".format(self.variant))

        # create the positional embeddings
        self.position_embeddings = nn.Embedding(n_positions, embedding_size)
        if not learn_positional_embeddings:
            create_position_codes(
                n_positions, embedding_size, out=self.position_embeddings.weight
            )
        else:
            nn.init.normal_(self.position_embeddings.weight, 0, embedding_size ** -0.5)

        # build the model
        self.layers = nn.ModuleList()
        for _ in range(self.n_layers):
            self.layers.append(TransformerDecoderLayer(
                n_heads, embedding_size, ffn_size,
                attention_dropout=attention_dropout,
                relu_dropout=relu_dropout,
                dropout=dropout,
                activation=activation,
                variant=variant,
            ))
Пример #21
0
    def __init__(self,
                 outer_nc,
                 inner_nc,
                 submodule=None,
                 outermost=False,
                 innermost=False,
                 norm_layer=nn.BatchNorm3d,
                 use_dropout=False):
        super(UnetSkipConnectionBlock, self).__init__()
        self.outermost = outermost
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm3d
        else:
            use_bias = norm_layer == nn.InstanceNorm3d

        downconv = nn.Conv3d(outer_nc,
                             inner_nc,
                             kernel_size=4,
                             stride=2,
                             padding=1,
                             bias=use_bias)
        downrelu = nn.LeakyReLU(0.2, True)
        downnorm = norm_layer(inner_nc)
        uprelu = nn.ReLU(True)
        upnorm = norm_layer(outer_nc)

        if outermost:
            upconv = nn.ConvTranspose3d(inner_nc * 2,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1)
            down = [downconv]
            up = [uprelu, upconv, nn.Tanh()]
            model = down + [submodule] + up
        elif innermost:
            upconv = nn.ConvTranspose3d(inner_nc,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        bias=use_bias)
            down = [downrelu, downconv]
            up = [uprelu, upconv, upnorm]
            model = down + up
        else:
            upconv = nn.ConvTranspose3d(inner_nc * 2,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        bias=use_bias)
            down = [downrelu, downconv, downnorm]
            up = [uprelu, upconv, upnorm]

            if use_dropout:
                model = down + [submodule] + up + [nn.Dropout(0.5)]
            else:
                model = down + [submodule] + up

        self.model = nn.Sequential(*model)
Пример #22
0
    def __init__(self, input_levels, target_sizes, feat_size=64):
        super().__init__()

        self.embedders = nn.ModuleDict({
            k: nn.Embedding(v.n_levels, v.emb_dim)
            for k, v
            in input_levels.items()
        })
        for layer in self.embedders.values():
            layer.weight.data = nn.init.normal_(layer.weight.data, mean=0, std=0.01)
        
        env_in_features = sum([
            v.emb_dim
            for k, v
            in input_levels.items()
            if k not in {'teams', 'pitchers', 'managers'}
            
        ])
        team_in_features = sum([
            v.emb_dim
            for k, v
            in input_levels.items()
            if k in {'teams', 'pitchers', 'managers'}
        ])
        
        env_hidden_features = int(env_in_features//2)
        team_hidden_features = int(team_in_features//2)
        out_features = feat_size
        self.env_encoder = nn.Sequential(
            nn.Dropout(0.1),
            nn.Linear(env_in_features, env_hidden_features),
            nn.BatchNorm1d(env_hidden_features),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(env_hidden_features, out_features),
            nn.BatchNorm1d(out_features),
            nn.ReLU(),
        )
        self.team_encoder = nn.Sequential(
            nn.Dropout(0.1),
            nn.Linear(team_in_features, team_hidden_features),
            nn.BatchNorm1d(team_hidden_features),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(team_hidden_features, out_features),
            nn.BatchNorm1d(out_features),
            nn.ReLU(),
        )
        
        in_features = out_features*3  # env output, team, opponent
        hidden_features = in_features//2
        self.ffn = nn.Sequential(
            nn.Dropout(0.1),
            nn.Linear(in_features, hidden_features),
            nn.BatchNorm1d(hidden_features),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_features, feat_size),
        )

        self.target_classifiers = nn.ModuleDict({
            f'{k}_{side}': nn.Linear(feat_size, n_targets)
            for k, n_targets in target_sizes.items()
            for side in ['team', 'opp']
        })
        self.target_classifiers['Win'] = nn.Linear(feat_size, 1)
Пример #23
0
    def __init__(self):
        super(Net, self).__init__()
        # Input conv block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=10, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(10),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output size : 28, RF : 3

        # Conv block 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(10),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output size : 28, RF : 5

        # Transition block 1
        self.pool1 = nn.MaxPool2d(2, 2) # output size : 12, RF : 6

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(10),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output size : 12, RF : 10

        # Conv block 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(10),
            nn.ReLU()
        ) # output size : 10, RF : 14
        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(10),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output size : 8, RF : 18

        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output size : 6, RF : 22

        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output size : 4, RF : 26

        # output block
        self.gap = nn.AvgPool2d(kernel_size=(4,4)) 

        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        ) # output size : 1, RF : 26
Пример #24
0
 def __init__(self, d_k, dropout=.1):
     super(ScaledDotProductAttention, self).__init__()
     self.scale_factor = np.sqrt(d_k)
     self.softmax = nn.Softmax(dim=-1)
     self.dropout = nn.Dropout(dropout)
Пример #25
0
    def __init__(self, nsamples, nhiddens=None, nlatent=32, alpha=None,
                 beta=200, dropout=0.2, cuda=False):
        if nlatent < 1:
            raise ValueError('Minimum 1 latent neuron, not {}'.format(latent))

        if nsamples < 1:
            raise ValueError('nsamples must be > 0, not {}'.format(nsamples))

        # If only 1 sample, we weigh alpha and nhiddens differently
        if alpha is None:
            alpha = 0.15 if nsamples > 1 else 0.50

        if nhiddens is None:
            nhiddens = [512, 512] if nsamples > 1 else [256, 256]

        if dropout is None:
            dropout = 0.2 if nsamples > 1 else 0.0

        if any(i < 1 for i in nhiddens):
            raise ValueError('Minimum 1 neuron per layer, not {}'.format(min(nhiddens)))

        if beta <= 0:
            raise ValueError('beta must be > 0, not {}'.format(beta))

        if not (0 < alpha < 1):
            raise ValueError('alpha must be 0 < alpha < 1, not {}'.format(alpha))

        if not (0 <= dropout < 1):
            raise ValueError('dropout must be 0 <= dropout < 1, not {}'.format(dropout))

        super(VAE, self).__init__()

        # Initialize simple attributes
        self.usecuda = cuda
        self.nsamples = nsamples
        self.ntnf = 103
        self.alpha = alpha
        self.beta = beta
        self.nhiddens = nhiddens
        self.nlatent = nlatent
        self.dropout = dropout

        # Initialize lists for holding hidden layers
        self.encoderlayers = _nn.ModuleList()
        self.encodernorms = _nn.ModuleList()
        self.decoderlayers = _nn.ModuleList()
        self.decodernorms = _nn.ModuleList()

        # Add all other hidden layers
        for nin, nout in zip([self.nsamples + self.ntnf] + self.nhiddens, self.nhiddens):
            self.encoderlayers.append(_nn.Linear(nin, nout))
            self.encodernorms.append(_nn.BatchNorm1d(nout))

        # Latent layers
        self.mu = _nn.Linear(self.nhiddens[-1], self.nlatent)
        self.logsigma = _nn.Linear(self.nhiddens[-1], self.nlatent)

        # Add first decoding layer
        for nin, nout in zip([self.nlatent] + self.nhiddens[::-1], self.nhiddens[::-1]):
            self.decoderlayers.append(_nn.Linear(nin, nout))
            self.decodernorms.append(_nn.BatchNorm1d(nout))

        # Reconstruction (output) layer
        self.outputlayer = _nn.Linear(self.nhiddens[0], self.nsamples + self.ntnf)

        # Activation functions
        self.relu = _nn.LeakyReLU()
        self.softplus = _nn.Softplus()
        self.dropoutlayer = _nn.Dropout(p=self.dropout)

        if cuda:
            self.cuda()
if args.arch == 'vgg13':
    model = models.vgg13(pretrained=True)
    no_input_layer = 25088
else:
    model = models.alexnet(pretrained=True)
    no_input_layer = 9216

for param in model.parameters():
    param.requires_grad = False

if args.hidden_units != None:
    classifier = nn.Sequential(OrderedDict([
                            ('fc1', nn.Linear(no_input_layer, args.hidden_units)),
                            ('relu1', nn.ReLU()),
                            ('dropout1', nn.Dropout(p=0.3)),
                            ('fc2', nn.Linear(args.hidden_units, 2048)),
                            ('relu2', nn.ReLU()),
                            ('dropout2', nn.Dropout(p=0.3)),
                            ('fc3', nn.Linear(2048, 102)),
                            ('output', nn.LogSoftmax(dim=1))
                            ]))
    
else:
    classifier = nn.Sequential(OrderedDict([
                            ('fc1', nn.Linear(no_input_layer, 4096)),
                            ('relu1', nn.ReLU()),
                            ('dropout1', nn.Dropout(p=0.3)),
                            ('fc2', nn.Linear(4096, 2048)),
                            ('relu2', nn.ReLU()),
                            ('dropout2', nn.Dropout(p=0.3)),
Пример #27
0
 def __init__(self, config):
     super(BertNeuralNet, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.linear_out = nn.Linear(config.hidden_size, 1)
     self.apply(self.init_bert_weights)
Пример #28
0
    def __init__(self, config):
        super().__init__()

        self.dropout = nn.Dropout(config.classifier_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
Пример #29
0
 def __init__(self, encoder, decoder, dropout=0):
     nn.Module.__init__(self)
     self.encoder = encoder
     self.decoder = decoder
     self.dropout = nn.Dropout(dropout)
    def __init__(self, args):
        super(traphicNet, self).__init__()

        ## Unpack arguments
        self.args = args

        ## Use gpu flag
        self.use_cuda = args['cuda']

        # Flag for maneuver based (True) vs uni-modal decoder (False)
        self.use_maneuvers = args['use_maneuvers']

        # Flag for train mode (True) vs test-mode (False)
        self.train_flag = True

        ## Sizes of network layers
        self.dropout_prob = args['dropout_prob']
        self.encoder_size = args['encoder_size']
        self.decoder_size = args['decoder_size']
        self.in_length = args['in_length']
        self.out_length = args['out_length']
        self.grid_size = args['grid_size']
        self.upp_grid_size = args['upp_grid_size']
        self.soc_conv_depth = args['soc_conv_depth']
        self.conv_3x1_depth = args['conv_3x1_depth']
        self.dyn_embedding_size = args['dyn_embedding_size']
        self.input_embedding_size = args['input_embedding_size']
        self.num_lat_classes = args['num_lat_classes']
        self.num_lon_classes = args['num_lon_classes']
        self.soc_embedding_size = ((
            (args['grid_size'][0] - 4) + 1) // 2) * self.conv_3x1_depth
        self.upp_soc_embedding_size = ((
            (args['upp_grid_size'][0] - 4) + 1) // 2) * self.conv_3x1_depth
        self.ours = args['ours']
        ## Define network weights

        # Input embedding layer
        self.ip_emb = torch.nn.Linear(2, self.input_embedding_size)

        # Behavioral Modification 3: Extra Inputs
        if self.ours:
            self.ip_emb_vel = torch.nn.Linear(2, self.input_embedding_size)
        # Behavioral Modification 3: Extra Inputs
        if self.ours:
            self.ip_emb_nc = torch.nn.Linear(2, self.input_embedding_size)

        # Encoder LSTM
        self.enc_lstm = torch.nn.LSTM(self.input_embedding_size,
                                      self.encoder_size, 1)

        # Vehicle dynamics embedding
        self.dyn_emb = torch.nn.Linear(self.encoder_size,
                                       self.dyn_embedding_size)

        # Batch norm
        self.bn_conv = torch.nn.BatchNorm2d(self.encoder_size)
        #Behavioral Modification 1: Weighting the neighbors' hidden vectors after the LSTM stage
        if self.ours:
            self.beh_1 = torch.nn.Linear(self.encoder_size, self.encoder_size)

        # Convolutional social pooling layer and social embedding layer
        self.soc_conv = torch.nn.Conv2d(self.encoder_size, self.soc_conv_depth,
                                        3)
        self.conv_3x1 = torch.nn.Conv2d(self.soc_conv_depth,
                                        self.conv_3x1_depth, (3, 1))
        self.soc_maxpool = torch.nn.MaxPool2d((2, 1), padding=(1, 0))

        # FC social pooling layer (for comparison):
        # self.soc_fc = torch.nn.Linear(self.soc_conv_depth * self.grid_size[0] * self.grid_size[1], (((args['grid_size'][0]-4)+1)//2)*self.conv_3x1_depth)

        # Decoder LSTM
        if self.use_maneuvers:
            if self.ours:
                self.dec_lstm = torch.nn.LSTM(
                    self.upp_soc_embedding_size + self.soc_embedding_size +
                    self.dyn_embedding_size + self.num_lat_classes +
                    self.num_lon_classes, self.decoder_size)
            else:
                self.dec_lstm = torch.nn.LSTM(
                    self.soc_embedding_size + self.dyn_embedding_size +
                    self.num_lat_classes + self.num_lon_classes,
                    self.decoder_size)
        else:
            if self.ours:
                self.dec_lstm = torch.nn.LSTM(self.upp_soc_embedding_size +
                                              self.soc_embedding_size +
                                              self.dyn_embedding_size,
                                              self.decoder_size,
                                              dropout=self.dropout_prob)
            else:
                self.dec_lstm = torch.nn.LSTM(self.soc_embedding_size +
                                              self.dyn_embedding_size,
                                              self.decoder_size,
                                              dropout=self.dropout_prob)

        #batch norm
        # self.bn_dec = torch.nn.BatchNorm1d(self.decoder_size)
        #batch norm
        # self.bn_enc = torch.nn.BatchNorm1d(self.decoder_size)
        self.bnupp_soc_enc = torch.nn.BatchNorm1d(self.input_embedding_size)
        self.bn_soc_enc = torch.nn.BatchNorm1d(self.soc_embedding_size)
        self.bn_hist_enc = torch.nn.BatchNorm1d(self.upp_soc_embedding_size)
        # Output layers:
        self.op = torch.nn.Linear(self.decoder_size, 5)
        #batchnorm
        self.bn_lin = torch.nn.BatchNorm1d(self.out_length)

        # Dropout
        self.dropout = nn.Dropout(self.dropout_prob)

        if self.ours:
            self.op_lat = torch.nn.Linear(
                self.upp_soc_embedding_size + self.soc_embedding_size +
                self.dyn_embedding_size, self.num_lat_classes)
            self.op_lon = torch.nn.Linear(
                self.upp_soc_embedding_size + self.soc_embedding_size +
                self.dyn_embedding_size, self.num_lon_classes)
        else:
            self.op_lat = torch.nn.Linear(
                self.soc_embedding_size + self.dyn_embedding_size,
                self.num_lat_classes)
            self.op_lon = torch.nn.Linear(
                self.soc_embedding_size + self.dyn_embedding_size,
                self.num_lon_classes)

        # Activations:
        # self.leaky_relu = torch.nn.LeakyReLU(0.1)
        self.leaky_relu = torch.nn.ELU()
        self.relu = torch.nn.ReLU()
        self.softmax = torch.nn.Softmax(dim=1)