Exemplo n.º 1
0
    def __init__(self,
                 input_size,
                 hidden_sizes,
                 output_size,
                 act_func='sigmoid',
                 train_alg='batch'):
        """
        Parameters:
        ------------------
        - input_size: integer, the number of features in the input
        - hidden_sizes: a list of integers, a list object containing number of units for hidden layers
        - output_size: an integer, the length of output vector 
        - act_func: string, name of activation function to use for each hidden layer 
        - train_alg: string, allowed values are {'batch', 'reweight', 'naive'}
        """
        super(MLP, self).__init__()

        self.input_size = input_size
        layer_sizes = [input_size] + hidden_sizes
        self.linears = nn.ModuleList([
            Linear(in_size, out_size, bias=True)
            for in_size, out_size in zip(layer_sizes[:-1], layer_sizes[1:])
        ])

        self.output_layer = Linear(hidden_sizes[-1], output_size, bias=True)
        self.act = activation[act_func]
        self.train_alg = train_alg

        # list of layers in the network
        self.layers = [layer for layer in self.linears]
        self.layers.append(self.output_layer)
Exemplo n.º 2
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 pe_grad=True):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)

        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = _get_activation_fn(activation)

        self._pe_modules = [
            self.self_attn, self.linear1, self.linear2, self.norm1, self.norm2
        ]
Exemplo n.º 3
0
    def __init__(self, input_size, hidden_size, num_classes, train_alg='batch'):
        super(type(self), self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = num_classes
        self.train_alg = train_alg

        self.rnn = RNNCell(input_size, hidden_size)
        self.fc = Linear(self.hidden_size, self.output_size)
Exemplo n.º 4
0
    def __init__(self,
                 input_size,
                 channel_sizes,
                 kernel_sizes,
                 fc_sizes,
                 num_classes,
                 train_alg='batch'):
        super(type(self), self).__init__()

        self.input_size = input_size
        self.kernel_sizes = kernel_sizes
        self.act = F.relu
        self.pooling = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # convolutional layers
        layers = []
        out_size = input_size
        for c_in, c_out, k in zip(channel_sizes[:-1], channel_sizes[1:],
                                  kernel_sizes):
            layer = Conv2d(c_in, c_out, k)
            layers.append(layer)
            out_size = conv_outsize(out_size, k, layer.padding[0],
                                    layer.stride[0])
            out_size = conv_outsize(out_size, self.pooling.kernel_size,
                                    self.pooling.padding, self.pooling.stride)

        self.convs = nn.ModuleList(layers)
        self.conv_outsize = out_size * out_size * c_out

        # fully-connected layers
        fc_sizes = [self.conv_outsize] + fc_sizes
        self.linears = nn.ModuleList([
            Linear(in_size, out_size)
            for in_size, out_size in zip(fc_sizes[:-1], fc_sizes[1:])
        ])
        self.output_layer = Linear(fc_sizes[-1], num_classes)

        self.layers = [layer for layer in self.convs]
        self.layers += [layer for layer in self.linears]
        self.layers.append(self.output_layer)
        self.train_alg = train_alg
Exemplo n.º 5
0
    def __init__(self, output_size, cfg='A', train_alg='batch',
                 batch_norm=False, pre_trained=False, init_weights=True):
        super(VGG, self).__init__()
        self.layers = []
        self.features = make_layers(cfgs[cfg], self.layers, batch_norm)
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = Sequential(
            Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            Linear(4096, output_size)
        )
        self.layers += [self.classifier[0], self.classifier[3], self.classifier[6]]

        if init_weights:
            self._initialize_weights()

        self.train_alg = train_alg        
Exemplo n.º 6
0
    def __init__(self, input_size, hidden_size, num_classes, num_layers=1,
                 train_alg='batch', bias=True):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size
        # self.rnn = nn.RNN(input_size, hidden_size, num_layers=num_layers,
        #                   nonlinearity='tanh')
        self.rnn = RNNModule(input_size, hidden_size)
        self.output_layer = Linear(hidden_size, num_classes)
        self.train_alg = train_alg

        self.layers = [self.rnn, self.output_layer]
Exemplo n.º 7
0
    def __init__(self, block, layers, num_classes=10, zero_init_residual=False,
                 norm_layer=None, train_alg='batch'):
        super(ResNet, self).__init__()

        self.train_alg = train_alg
        self.inplanes = 64
        self.dialation = 1
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.conv1 = Conv2d(3, self.inplanes, kernel_size=6, stride=2, padding=3,
                            bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

        # collecting layers whose per-example gradients need to be computed
        self.layers = [self.conv1]
        add_pegrad_layers(self.layer1, self.layers)
        add_pegrad_layers(self.layer2, self.layers)
        add_pegrad_layers(self.layer3, self.layers)
        add_pegrad_layers(self.layer4, self.layers)
        self.layers.append(self.fc)
Exemplo n.º 8
0
    def __init__(self,
                 embed_dim,
                 num_heads,
                 dropout=0.,
                 bias=True,
                 add_bias_kv=False,
                 add_zero_attn=False):
        super(MultiheadAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.dropout = dropout
        self.head_dim = embed_dim // num_heads
        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"

        self.in_proj = Linear(embed_dim, 3 * embed_dim)
        # self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim))

        # if bias:
        #     self.in_proj_bias = Parameter(torch.empty(3 * embed_dim))
        # else:
        #     self.register_parameter('in_proj_bias', None)
        self.out_proj = Linear(embed_dim, embed_dim)

        self._reset_parameters()
Exemplo n.º 9
0
    def __init__(self, n_token, n_classes, d_model=512, n_layers=2,
                 n_head=8, n_hidden=2048, dropout=0.1, max_seq_len=512,
                 embeddings=None, train_alg='batch'):
        super(TransformerModel, self).__init__()

        self.train_alg = train_alg
        self.d_model = d_model
        self.n_head = n_head

        if embeddings is None:            
            self.token_embedding = nn.Embedding(n_token, d_model)
        else:
            self.token_embedding = nn.Embedding.from_pretrained(embeddings)
            self.token_embedding.weight.requires_grad = False

        self.pos_encoder = PositionalEncoding(d_model, dropout, max_seq_len)        
        encoder_layers = TransformerEncoderLayer(d_model, n_head, n_hidden, dropout)
        # encoder_norm = nn.LayerNorm(d_model)
        encoder_norm = None
        self.encoder = TransformerEncoder(encoder_layers, n_layers, encoder_norm)
        self.fc= Linear(d_model, n_classes)
Exemplo n.º 10
0
 def __init__(self, input_size, hidden_size, output_size, train_alg='batch'):
     super(SimpleLSTM, self).__init__()
     
     self.lstm = LSTMCell(input_size, hidden_size)
     self.fc = Linear(hidden_size, output_size)
     self.train_alg = train_alg