def _init(m):
     if isinstance(m, nn.Conv2d):
         mynn.init.MSRAFill(m.weight)
         init.constant_(m.bias, 0)
     elif isinstance(m, nn.Linear):
         mynn.init.XavierFill(m.weight)
         init.constant_(m.bias, 0)
Пример #2
0
    def __init__(
        self,
        layers,
        activations,
        use_batch_norm=False,
        use_noisy_linear_layers=False,
        min_std=0.0,
    ) -> None:
        super(FullyConnectedNetwork, self).__init__()
        self.layers: nn.ModuleList = nn.ModuleList()
        self.batch_norm_ops: nn.ModuleList = nn.ModuleList()
        self.activations = activations
        self.use_batch_norm = use_batch_norm

        assert len(layers) >= 2, "Invalid layer schema {} for network".format(layers)

        for i, layer in enumerate(layers[1:]):
            if use_noisy_linear_layers:
                self.layers.append(NoisyLinear(layers[i], layer))
            else:
                self.layers.append(nn.Linear(layers[i], layer))
            if self.use_batch_norm:
                self.batch_norm_ops.append(nn.BatchNorm1d(layers[i]))
            gaussian_fill_w_gain(
                self.layers[i].weight, self.activations[i], layers[i], min_std
            )
            init.constant_(self.layers[i].bias, 0)
Пример #3
0
 def _init_weights(self, m):
     if isinstance(m, nn.Conv2d):
         if cfg.KRCNN.CONV_INIT == 'GaussianFill':
             init.normal_(m.weight, std=0.01)
         elif cfg.KRCNN.CONV_INIT == 'MSRAFill':
             mynn.init.MSRAFill(m.weight)
         else:
             ValueError('Unexpected cfg.KRCNN.CONV_INIT: {}'.format(cfg.KRCNN.CONV_INIT))
         init.constant_(m.bias, 0)
Пример #4
0
 def _init_weights(self, m):
     if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
         if cfg.MRCNN.CONV_INIT == 'GaussianFill':
             init.normal_(m.weight, std=0.001)
         elif cfg.MRCNN.CONV_INIT == 'MSRAFill':
             mynn.init.MSRAFill(m.weight)
         else:
             raise ValueError
         init.constant_(m.bias, 0)
Пример #5
0
 def _init_weights(self):
     if not cfg.MRCNN.USE_FC_OUTPUT and cfg.MRCNN.CLS_SPECIFIC_MASK and \
             cfg.MRCNN.CONV_INIT=='MSRAFill':
         # Use GaussianFill for class-agnostic mask prediction; fills based on
         # fan-in can be too large in this case and cause divergence
         weight_init_func = mynn.init.MSRAFill
     else:
         weight_init_func = partial(init.normal_, std=0.001)
     weight_init_func(self.classify.weight)
     init.constant_(self.classify.bias, 0)
Пример #6
0
    def __init__(self, version=1.0, num_classes=1000):
        super(SqueezeNet, self).__init__()
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        self.num_classes = num_classes
        if version == 1.0:
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        # Final convolution is initialized differently form the rest
        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            final_conv,
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1))
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv:
                    init.normal_(m.weight, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
Пример #7
0
    def _init_weights(self):
        if cfg.KRCNN.USE_DECONV:
            init.normal_(self.deconv.weight, std=0.01)
            init.constant_(self.deconv.bias, 0)

        if cfg.KRCNN.CONV_INIT == 'GaussianFill':
            init.normal_(self.classify.weight, std=0.001)
        elif cfg.KRCNN.CONV_INIT == 'MSRAFill':
            mynn.init.MSRAFill(self.classify.weight)
        else:
            raise ValueError(cfg.KRCNN.CONV_INIT)
        init.constant_(self.classify.bias, 0)
Пример #8
0
    def _init_weights(self):
        if cfg.FPN.USE_GN:
            conv = self.conv_lateral[0]
        else:
            conv = self.conv_lateral

        if cfg.FPN.ZERO_INIT_LATERAL:
            init.constant_(conv.weight, 0)
        else:
            mynn.init.XavierFill(conv.weight)
        if conv.bias is not None:
            init.constant_(conv.bias, 0)
 def init_func(m):
     classname = m.__class__.__name__
     if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
         if init_type == 'normal':
             init.normal_(m.weight.data, 0.0, gain)
         elif init_type == 'xavier':
             init.xavier_normal_(m.weight.data, gain=gain)
         elif init_type == 'kaiming':
             init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
         elif init_type == 'orthogonal':
             init.orthogonal_(m.weight.data, gain=gain)
         else:
             raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
         if hasattr(m, 'bias') and m.bias is not None:
             init.constant_(m.bias.data, 0.0)
     elif classname.find('BatchNorm2d') != -1:
         init.normal_(m.weight.data, 1.0, gain)
         init.constant_(m.bias.data, 0.0)
Пример #10
0
 def test_l2_regularization(self):
     model = torch.nn.Sequential(
             torch.nn.Linear(5, 10),
             torch.nn.Linear(10, 5)
     )
     initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, 0.5))])
     initializer(model)
     value = RegularizerApplicator([("", L2Regularizer(1.0))])(model)
     assert value.data.numpy() == 28.75
Пример #11
0
 def test_regularizer_applicator_respects_regex_matching(self):
     model = torch.nn.Sequential(
             torch.nn.Linear(5, 10),
             torch.nn.Linear(10, 5)
     )
     initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, 1.))])
     initializer(model)
     value = RegularizerApplicator([("weight", L2Regularizer(0.5)),
                                    ("bias", L1Regularizer(1.0))])(model)
     assert value.data.numpy() == 65.0
Пример #12
0
    def __init__(self, layers, activations, use_batch_norm=False, action_dim=0) -> None:
        """
        Dueling Q-Network Architecture: https://arxiv.org/abs/1511.06581

        :param layers: List of layer dimensions
        :param activations: List of layer activations
        :param use_batch_norm: bool indicating whether to apply batch normalization
        :param action_dim: if !=0 use parametric dueling DQN, else standard dueling DQN
        """
        super(DuelingQNetwork, self).__init__()
        self.layers: nn.ModuleList = nn.ModuleList()
        self.batch_norm_ops: nn.ModuleList = nn.ModuleList()
        self.activations = activations
        self.use_batch_norm = use_batch_norm

        assert len(layers) >= 3, "Invalid layer schema {} for network".format(layers)
        assert (
            len(layers) == len(activations) + 1
        ), "Invalid activation schema {} for network".format(activations)
        assert (
            layers[-2] % 2 == 0
        ), """Last shared layer in dueling architecture should be
        divisible by 2."""

        for i, layer in enumerate(layers[1:-1]):
            self.layers.append(nn.Linear(layers[i], layer))
            self.batch_norm_ops.append(nn.BatchNorm1d(layers[i]))
            gaussian_fill_w_gain(self.layers[i].weight, self.activations[i], layers[i])
            init.constant_(self.layers[i].bias, 0)

        self.parametric_action = action_dim > 0
        # Split last layer into a value & advantage stream
        self.advantage = nn.Sequential(
            nn.Linear(int(layers[-2] + action_dim), int(layers[-2] / 2)),
            nn.ReLU(),
            nn.Linear(int(layers[-2] / 2), layers[-1]),
        )
        self.value = nn.Sequential(
            nn.Linear(int(layers[-2]), int(layers[-2] / 2)),
            nn.ReLU(),
            nn.Linear(int(layers[-2] / 2), 1),
        )
        self._name = "unnamed"
Пример #13
0
 def test_l1_regularization(self):
     model = torch.nn.Sequential(
             torch.nn.Linear(5, 10),
             torch.nn.Linear(10, 5)
     )
     initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, -1))])
     initializer(model)
     value = RegularizerApplicator([("", L1Regularizer(1.0))])(model)
     # 115 because of biases.
     assert value.data.numpy() == 115.0
Пример #14
0
 def init_params(self):
     for m in self.modules():
         if isinstance(m, nn.Conv2d):
             init.kaiming_normal_(m.weight, mode='fan_out')
             if m.bias is not None:
                 init.constant_(m.bias, 0)
         elif isinstance(m, nn.BatchNorm2d):
             init.constant_(m.weight, 1)
             init.constant_(m.bias, 0)
         elif isinstance(m, nn.Linear):
             init.normal_(m.weight, std=0.001)
             if m.bias is not None:
                 init.constant_(m.bias, 0)
Пример #15
0
 def _init_weights(self):
     init.normal_(self.FPN_RPN_conv.weight, std=0.01)
     init.constant_(self.FPN_RPN_conv.bias, 0)
     init.normal_(self.FPN_RPN_cls_score.weight, std=0.01)
     init.constant_(self.FPN_RPN_cls_score.bias, 0)
     init.normal_(self.FPN_RPN_bbox_pred.weight, std=0.01)
     init.constant_(self.FPN_RPN_bbox_pred.bias, 0)
Пример #16
0
    def __init__(self, input_num, output_num):
        super(CrossPoolingDir, self).__init__()
        self.input_num = input_num
        self.output_num = output_num

        self.featK = nn.Linear(self.input_num, self.output_num)
        self.featK_bn = nn.BatchNorm1d(self.output_num)

        # Softmax
        self.softmax = nn.Softmax()

        init.kaiming_uniform_(self.featK.weight, mode='fan_out')
        init.constant_(self.featK.bias, 0)
        init.constant_(self.featK_bn.weight, 1)
        init.constant_(self.featK_bn.bias, 0)
Пример #17
0
    def __init__(self, feat_num, class_num, drop=0):
        super(Classifier, self).__init__()
        self.feat_num = feat_num
        self.class_num = class_num
        self.drop = drop

        # BN layer
        self.classifierBN = nn.BatchNorm1d(self.feat_num)
        # feat classifeir
        self.classifierlinear = nn.Linear(self.feat_num, self.class_num)
        # dropout_layer
        self.drop = drop
        if self.drop > 0:
            self.droplayer = nn.Dropout(drop)

        init.constant_(self.classifierBN.weight, 1)
        init.constant_(self.classifierBN.bias, 0)

        init.normal_(self.classifierlinear.weight, std=0.001)
        init.constant_(self.classifierlinear.bias, 0)
Пример #18
0
    def __init__(self, depth, pretrained=True, cut_at_pooling=False,
                 num_features=0, dropout=0):
        super(ResNet, self).__init__()

        self.depth = depth
        self.pretrained = pretrained
        self.cut_at_pooling = cut_at_pooling

        # Construct base (pretrain) resnet
        if depth not in ResNet.__factory:
            raise KeyError("Unsupported depth:", depth)

        conv0 = nn.Conv2d(2, 64, kernel_size=7, stride=2, padding=3, bias=False)
        init.kaiming_uniform_(conv0.weight, mode='fan_out')

        self.conv0 = conv0
        self.base = ResNet.__factory[depth](pretrained=pretrained)

        if not self.cut_at_pooling:
            self.num_features = num_features
            self.dropout = dropout
            self.has_embedding = num_features > 0

            out_planes = self.base.fc.in_features

            # Append new layers
            if self.has_embedding:
                self.feat = nn.Linear(out_planes, self.num_features)
                self.feat_bn = nn.BatchNorm1d(self.num_features)
                init.kaiming_uniform_(self.feat.weight, mode='fan_out')
                init.constant_(self.feat.bias, 0)
                init.constant_(self.feat_bn.weight, 1)
                init.constant_(self.feat_bn.bias, 0)
            else:
                self.num_features = out_planes

            if self.dropout > 0:
                self.drop = nn.Dropout(self.dropout)

        if not self.pretrained:
            self.reset_params()
Пример #19
0
def weight_init(m): # https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5
    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.kaiming_normal_(m.weight.data)
        init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data) 
 def _init_weights(self):
     mynn.init.XavierFill(self.fc1.weight)
     init.constant_(self.fc1.bias, 0)
     mynn.init.XavierFill(self.fc2.weight)
     init.constant_(self.fc2.bias, 0)
Пример #21
0
 def reset_parameters(self):
     self.reset_running_stats()
     if self.affine:
         init.constant_(self.weight[:, :2], 1.4142135623730951)
         init.zeros_(self.weight[:, 2])
         init.zeros_(self.bias)
Пример #22
0
 def reset_parameters(self):
     init.xavier_uniform_(self.weight, gain=math.sqrt(2))
     init.constant_(self.bias, 0)
Пример #23
0
def init_weights(m):
    '''
    Usage:
        model = Model()
        model.apply(weight_init)
    '''

    if isinstance(m, nn.Conv1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.Conv3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose1d):
        init.normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose2d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.ConvTranspose3d):
        init.xavier_normal_(m.weight.data)
        if m.bias is not None:
            init.normal_(m.bias.data)
    elif isinstance(m, nn.BatchNorm1d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.BatchNorm3d):
        init.normal_(m.weight.data, mean=1, std=0.02)
        init.constant_(m.bias.data, 0)
    elif isinstance(m, nn.Linear):
        init.xavier_normal_(m.weight.data)
        init.normal_(m.bias.data)
    elif isinstance(m, nn.LSTM):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.LSTMCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
    elif isinstance(m, nn.GRUCell):
        for param in m.parameters():
            if len(param.shape) >= 2:
                init.orthogonal_(param.data)
            else:
                init.normal_(param.data)
Пример #24
0
lin_nn_model = nn.Sequential(
    nn.Linear(d, d_1, bias=False),
    nn.Linear(d_1, d_2, bias=False)
)
ReLU_model = nn.Sequential(
    nn.Linear(d, d_1),
    nn.ReLU(),
    nn.Linear(d_1, d_2)
)
loss = nn.MSELoss()

iter = lin_nn_model.parameters()
w1 = next(iter)
w2 = next(iter)
init.uniform_(w1, a=0, b=0.01)
init.constant_(w2, w1.norm() / 10)  # This is definitely true! Compute the gradient!

learning_rate = 0.01
time_range = range(2000)
for i in range(1):
    x = data[i, :, :-1]
    y = data[i, :, -1].unsqueeze(1)
    r1, r2, r3 = [], [], []
    for t in time_range:
        y_lin_pred = lin_model(x)
        lin_risk = loss(y_lin_pred, y)
        y_lin_nn_pred = lin_nn_model(x)
        lin_nn_risk = loss(y_lin_nn_pred, y)
        y_ReLU_pred = ReLU_model(x)
        ReLU_risk = loss(y_ReLU_pred, y)
        r1.append(lin_risk.item())
 def weight_init(m):
     if isinstance(m, nn.Linear) or isinstance(m, nn.Conv3d):
         init.xavier_uniform_(m.weight.data)
         init.constant_(m.bias.data, 0)
Пример #26
0
 def _init_weights(self):
     init.normal_(self.cls_score.weight, std=0.01)
     init.constant_(self.cls_score.bias, 0)
     init.normal_(self.bbox_pred.weight, std=0.001)
     init.constant_(self.bbox_pred.bias, 0)
Пример #27
0
 def _init_weights(self):
     mynn.init.XavierFill(self.fc1.weight)
     init.constant_(self.fc1.bias, 0)
     mynn.init.XavierFill(self.fc2.weight)
     init.constant_(self.fc2.bias, 0)
Пример #28
0
    def __init__(self, version='Squeezenet_1_0', num_bins=66):
        super(Hopeless_Squeezenet, self).__init__()
        self.num_bins = num_bins
        if version == 'Squeezenet_1_0':
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
        elif version == 'Squeezenet_1_1':
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        else:
            # FIXME: Is this needed? SqueezeNet should only be called from the
            # FIXME: squeezenet1_x() functions
            # FIXME: This checking is not done for the other models
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1_0 or 1_1 expected".format(version=version))

        # Final convolution is initialized differently from the rest
        final_conv_yaw = nn.Conv2d(512, self.num_bins, kernel_size=1)
        self.classifier_yaw = nn.Sequential(nn.Dropout(p=0.5), final_conv_yaw,
                                            nn.ReLU(inplace=True),
                                            nn.AdaptiveAvgPool2d((1, 1)))
        final_conv_pitch = nn.Conv2d(512, self.num_bins, kernel_size=1)
        self.classifier_pitch = nn.Sequential(nn.Dropout(p=0.5),
                                              final_conv_pitch,
                                              nn.ReLU(inplace=True),
                                              nn.AdaptiveAvgPool2d((1, 1)))
        final_conv_roll = nn.Conv2d(512, self.num_bins, kernel_size=1)
        self.classifier_roll = nn.Sequential(nn.Dropout(p=0.5),
                                             final_conv_roll,
                                             nn.ReLU(inplace=True),
                                             nn.AdaptiveAvgPool2d((1, 1)))

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv_pitch or \
                    m is final_conv_yaw or \
                        m is final_conv_roll:
                    init.normal_(m.weight, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
Пример #29
0
 def init_func(m):
     if isinstance(m, nn.Conv2d):
         mynn.init.XavierFill(m.weight)
         if m.bias is not None:
             init.constant_(m.bias, 0)
Пример #30
0
def train(opt):
    """ dataset preparation """
    opt.select_data = opt.select_data.split('-')
    opt.batch_ratio = opt.batch_ratio.split('-')
    train_dataset = Batch_Balanced_Dataset(opt)

    AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
    valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=opt.batch_size,
        shuffle=True,  # 'True' to check training progress with validation function.
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_valid, pin_memory=True)
    print('-' * 80)

    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initialized')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:  # for batchnorm.
            if 'weight' in name:
                param.data.fill_(1)
            continue

    # data parallel for multi-GPU
    model = torch.nn.DataParallel(model).to(device)
    model.train()
    if opt.saved_model != '':
        print(f'loading pretrained model from {opt.saved_model}')
        if opt.FT:
            model.load_state_dict(torch.load(opt.saved_model), strict=False)
        else:
            model.load_state_dict(torch.load(opt.saved_model))
    print("Model:")
    print(model)

    """ setup loss """
    if 'CTC' in opt.Prediction:
        criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
    else:
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)  # ignore [GO] token = ignore index 0
    # loss averager
    loss_avg = Averager()

    # filter that only require gradient decent
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))
    # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())]

    # setup optimizer
    if opt.adam:
        optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps)
    print("Optimizer:")
    print(optimizer)

    """ final options """
    # print(opt)
    with open(f'./saved_models/{opt.experiment_name}/opt.txt', 'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += f'{str(k)}: {str(v)}\n'
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)

    """ start training """
    start_iter = 0
    if opt.saved_model != '':
        start_iter = int(opt.saved_model.split('_')[-1].split('.')[0])
        print(f'continue to train, start_iter: {start_iter}')

    start_time = time.time()
    best_accuracy = -1
    best_norm_ED = 1e+6
    i = start_iter

    while(True):
        # train part
        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.to(device)
        text, length = converter.encode(labels, batch_max_length=opt.batch_max_length)
        batch_size = image.size(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text).log_softmax(2)
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            preds = preds.permute(1, 0, 2)  # to use CTCLoss format

            # (ctc_a) To avoid ctc_loss issue, disabled cudnn for the computation of the ctc_loss
            # https://github.com/jpuigcerver/PyLaia/issues/16
            torch.backends.cudnn.enabled = False
            cost = criterion(preds, text.to(device), preds_size.to(device), length.to(device))
            torch.backends.cudnn.enabled = True

            # # (ctc_b) To reproduce our pretrained model / paper, use our previous code (below code) instead of (ctc_a).
            # # With PyTorch 1.2.0, the below code occurs NAN, so you may use PyTorch 1.1.0.
            # # Thus, the result of CTCLoss is different in PyTorch 1.1.0 and PyTorch 1.2.0.
            # # See https://github.com/clovaai/deep-text-recognition-benchmark/issues/56#issuecomment-526490707
            # cost = criterion(preds, text, preds_size, length)

        else:
            preds = model(image, text[:, :-1]) # align with Attention.forward
            target = text[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1))

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()

        loss_avg.add(cost)
        # validation part
        if i % opt.valInterval == 0:
            elapsed_time = time.time() - start_time
            print(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}')
            # for log
            with open(f'./saved_models/{opt.experiment_name}/log_train.txt', 'a') as log:
                log.write(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n')
                loss_avg.reset()

                model.eval()
                with torch.no_grad():
                    valid_loss, current_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data = validation(
                        model, criterion, valid_loader, converter, opt)
                model.train()

                for pred, gt in zip(preds[:5], labels[:5]):
                    if 'Attn' in opt.Prediction:
                        pred = pred[:pred.find('[s]')]
                        gt = gt[:gt.find('[s]')]
                    print(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}')
                    log.write(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}\n')

                valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}'
                valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}'
                print(valid_log)
                log.write(valid_log + '\n')

                # keep best accuracy model
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_accuracy_iter_{i+1}.pth')
                if current_norm_ED < best_norm_ED:
                    best_norm_ED = current_norm_ED
                    torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_norm_ED_iter_{i+1}.pth')
                best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}'
                print(best_model_log)
                log.write(best_model_log + '\n')

        # save model per 250 iter.
        if (i + 1) % 1000 == 0:
            torch.save(
                model.state_dict(), f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth')

        if i == opt.num_iter:
            print('end the training')
            sys.exit()
        i = i + 1
Пример #31
0
def weights_init_classifier(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        init.normal_(m.weight.data, std=0.001)
        init.constant_(m.bias.data, 0.0)
Пример #32
0
def weights_init(m):
    if isinstance(m, nn.Linear):
        if m.weight is not None:
            init.xavier_uniform_(m.weight)
        if m.bias is not None:
            init.constant_(m.bias, 0.0)
Пример #33
0
 def _init_weights(self):
     """
     initialize layers before ReLU activation with kaiming initialization
     """
     if cfg.GAN.MODEL.KAIMING_INIT:
         if cfg.DEBUG:
             print("\tInit Adversarial with KAIMING")
         init.kaiming_uniform_(self.adversarial[0].weight,
                               a=0,
                               mode='fan_in',
                               nonlinearity='relu')
         init.constant_(self.adversarial[0].bias, 0.0)
         init.kaiming_uniform_(self.adversarial[2].weight,
                               a=0,
                               mode='fan_in',
                               nonlinearity='relu')
         init.constant_(self.adversarial[2].bias, 0.0)
         init.kaiming_uniform_(self.adversarial[4].weight,
                               a=0,
                               mode='fan_in',
                               nonlinearity='relu')
         init.constant_(self.adversarial[4].bias, 0.0)
     else:
         if cfg.DEBUG:
             print("\tInit ResidualBlock with XAVIER")
         mynn.init.XavierFill(self.adversarial[0].weight)
         init.constant_(self.adversarial[0].bias, 0.0)
         mynn.init.XavierFill(self.adversarial[2].weight)
         init.constant_(self.adversarial[2].bias, 0.0)
         mynn.init.XavierFill(self.adversarial[4].weight)
         init.constant_(self.adversarial[4].bias, 0.0)
Пример #34
0
 def _init(m):
     if isinstance(m, nn.Conv2d):
         mynn.init.MSRAFill(m.weight)
     elif isinstance(m, nn.Linear):
         mynn.init.XavierFill(m.weight)
         init.constant_(m.bias, 0)
Пример #35
0
    def __init__(self, input_num, output_num):
        super(SelfPoolingDir, self).__init__()
        self.input_num = input_num
        self.output_num = output_num

        # todo: LSTM
        self.lstm = nn.LSTM(input_size=self.input_num,
                            hidden_size=self.output_num, num_layers=1, batch_first=True, dropout=0)
        self.bilstm = nn.LSTM(input_size=self.input_num, hidden_size=self.output_num,
                              num_layers=1, batch_first=True, dropout=0, bidirectional=True)
        self.lstm_bn = nn.BatchNorm1d(self.output_num)

        ## Linear K
        self.featK = nn.Linear(self.input_num, self.output_num)
        self.featK_bn = nn.BatchNorm1d(self.output_num)

        ## Linear_Q
        self.featQ = nn.Linear(self.input_num, self.output_num)
        self.featQ_bn = nn.BatchNorm1d(self.output_num)


        ## Softmax
        self.softmax = nn.Softmax(dim=-1)

        init.kaiming_uniform_(self.featK.weight, mode='fan_out')
        init.constant_(self.featK.bias, 0)

        init.constant_(self.featK_bn.weight, 1)
        init.constant_(self.featK_bn.bias, 0)

        init.kaiming_uniform_(self.featQ.weight, mode='fan_out')
        init.constant_(self.featQ.bias, 0)
        init.constant_(self.featQ_bn.weight, 1)
        init.constant_(self.featQ_bn.bias, 0)
        init.constant_(self.lstm_bn.weight, 1)
        init.constant_(self.lstm_bn.bias, 0)
Пример #36
0
 def _init_weights(self):
     if cfg.MRCNN.CONV_INIT == 'GaussianFill':
         init.normal_(self.upconv5.weight, std=0.001)
     elif cfg.MRCNN.CONV_INIT == 'MSRAFill':
         mynn.init.MSRAFill(self.upconv5.weight)
     init.constant_(self.upconv5.bias, 0)
Пример #37
0
def train(opt):
    """ dataset preparation """
    if not opt.data_filtering_off:
        print(
            'Filtering the images containing characters which are not in opt.character'
        )
        print(
            'Filtering the images whose label is longer than opt.batch_max_length'
        )
        # see https://github.com/clovaai/deep-text-recognition-benchmark/blob/6593928855fb7abb999a99f428b3e4477d4ae356/dataset.py#L130

    opt.select_data = opt.select_data.split('-')
    opt.batch_ratio = opt.batch_ratio.split('-')
    train_dataset = Batch_Balanced_Dataset(opt)

    log = open(
        f'/content/drive/MyDrive/saved_models/{opt.exp_name}/log_dataset.txt',
        'a')
    AlignCollate_valid = AlignCollate(imgH=opt.imgH,
                                      imgW=opt.imgW,
                                      keep_ratio_with_pad=opt.PAD)
    valid_dataset, valid_dataset_log = hierarchical_dataset(
        root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=opt.batch_size,
        shuffle=
        True,  # 'True' to check training progress with validation function.
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_valid,
        pin_memory=True)
    log.write(valid_dataset_log)
    print('-' * 80)
    log.write('-' * 80 + '\n')
    log.close()
    """ model configuration """
    if 'CTC' in opt.Prediction:
        if opt.baiduCTC:
            converter = CTCLabelConverterForBaiduWarpctc(opt.character)
        else:
            converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initialized')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:  # for batchnorm.
            if 'weight' in name:
                param.data.fill_(1)
            continue

    # data parallel for multi-GPU
    model = torch.nn.DataParallel(model).to(device)
    model.train()
    if opt.saved_model != '':
        print(f'loading pretrained model from {opt.saved_model}')
        if opt.FT:
            model.load_state_dict(torch.load(opt.saved_model), strict=False)
        else:
            model.load_state_dict(torch.load(opt.saved_model))
    print("Model:")
    print(model)
    """ setup loss """
    if 'CTC' in opt.Prediction:
        if opt.baiduCTC:
            # need to install warpctc. see our guideline.
            #from warpctc_pytorch import CTCLoss
            #criterion = CTCLoss()
            print('Hello')
        else:
            criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
    else:
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(
            device)  # ignore [GO] token = ignore index 0
    # loss averager
    loss_avg = Averager()

    # filter that only require gradient decent
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))
    # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())]

    # setup optimizer
    if opt.adam:
        optimizer = optim.Adam(filtered_parameters,
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters,
                                   lr=opt.lr,
                                   rho=opt.rho,
                                   eps=opt.eps)
    print("Optimizer:")
    print(optimizer)
    """ final options """
    # print(opt)
    with open(f'/content/drive/MyDrive/saved_models/{opt.exp_name}/opt.txt',
              'a',
              encoding='utf-8') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += f'{str(k)}: {str(v)}\n'
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)
    """ start training """
    start_iter = 0
    if opt.saved_model != '':
        try:
            start_iter = int(opt.saved_model.split('_')[-1].split('.')[0])
            print(f'continue to train, start_iter: {start_iter}')
        except:
            pass

    start_time = time.time()
    best_accuracy = -1
    best_norm_ED = -1
    iteration = start_iter

    while (True):
        # train part
        print(iteration)
        image_tensors, labels = train_dataset.get_batch()
        image = image_tensors.to(device)
        text, length = converter.encode(labels,
                                        batch_max_length=opt.batch_max_length)
        batch_size = image.size(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text)
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            if opt.baiduCTC:
                preds = preds.permute(1, 0, 2)  # to use CTCLoss format
                cost = criterion(preds, text, preds_size, length) / batch_size
            else:
                preds = preds.log_softmax(2).permute(1, 0, 2)
                cost = criterion(preds, text, preds_size, length)

        else:
            preds = model(image, text[:, :-1])  # align with Attention.forward
            target = text[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.view(-1, preds.shape[-1]),
                             target.contiguous().view(-1))

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(
            model.parameters(),
            opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()

        loss_avg.add(cost)
        print(cost)

        # validation part

        if (
                iteration + 1
        ) % opt.valInterval == 0 or iteration == 0:  # To see training progress, we also conduct validation when 'iteration == 0'
            elapsed_time = time.time() - start_time
            # for log
            with open(
                    f'/content/drive/MyDrive/saved_models/{opt.exp_name}/log_train.txt',
                    'a',
                    encoding='utf-8') as log:
                model.eval()
                with torch.no_grad():
                    valid_loss, current_accuracy, current_norm_ED, preds, confidence_score, labels, infer_time, length_of_data = validation(
                        model, criterion, valid_loader, converter, opt)
                model.train()

                # training loss and validation loss
                loss_log = f'[{iteration + 1}/{opt.num_iter}] Train loss: {loss_avg.val():0.5f}, Valid loss: {valid_loss:0.5f}, Elapsed_time: {elapsed_time:0.5f}'
                loss_avg.reset()

                current_model_log = f'{"Current_accuracy":17s}: {current_accuracy:0.3f}, {"Current_norm_ED":17s}: {current_norm_ED:0.2f}'

                # keep best accuracy model (on valid dataset)
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(
                        model.state_dict(),
                        f'/content/drive/MyDrive/saved_models//{opt.exp_name}/best_accuracy.pth'
                    )
                if current_norm_ED > best_norm_ED:
                    best_norm_ED = current_norm_ED
                    torch.save(
                        model.state_dict(),
                        f'/content/drive/MyDrive/saved_models//{opt.exp_name}/best_norm_ED.pth'
                    )
                best_model_log = f'{"Best_accuracy":17s}: {best_accuracy:0.3f}, {"Best_norm_ED":17s}: {best_norm_ED:0.2f}'

                loss_model_log = f'{loss_log}\n{current_model_log}\n{best_model_log}'
                print(loss_model_log)
                log.write(loss_model_log + '\n')

                # show some predicted results
                dashed_line = '-' * 80
                head = f'{"Ground Truth":25s} | {"Prediction":25s} | Confidence Score & T/F'
                predicted_result_log = f'{dashed_line}\n{head}\n{dashed_line}\n'
                for gt, pred, confidence in zip(labels[:5], preds[:5],
                                                confidence_score[:5]):
                    if 'Attn' in opt.Prediction:
                        gt = gt[:gt.find('[s]')]
                        pred = pred[:pred.find('[s]')]

                    predicted_result_log += f'{gt:25s} | {pred:25s} | {confidence:0.4f}\t{str(pred == gt)}\n'
                predicted_result_log += f'{dashed_line}'
                print(predicted_result_log)
                log.write(predicted_result_log + '\n')

        # save model per 1e+5 iter.
        if (iteration + 1) % 1e+5 == 0:
            torch.save(
                model.state_dict(),
                f'/content/drive/MyDrive/saved_models//{opt.exp_name}/iter_{iteration + 1}.pth'
            )

        if (iteration + 1) == opt.num_iter:
            print('end the training')
            sys.exit()
        iteration += 1
Пример #38
0
 def reset_parameters(self):
     init.constant_(self.weight, self.gamma)
Пример #39
0
    def __init__(self,
                 num_classes,
                 block,
                 layers,
                 n_head=1,
                 attention_type='concat',
                 shot_mode='mean',
                 num_way=2,
                 num_shot=5,
                 pos_encoding=True,
                 pretrained=False):
        super(aaa_retinanet, self).__init__()
        self.model_path = 'data/pretrained_model/resnet50_caffe.pth'
        self.pretrained = pretrained
        self.inplanes = 64
        self.n_head = n_head
        self.attention_type = attention_type
        self.shot_mode = shot_mode
        self.num_shot = num_shot
        self.pos_encoding = pos_encoding
        self.support_im_size = 320

        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if self.pretrained == True:
            print("Loading pretrained weights from %s" % (self.model_path))
            state_dict = torch.load(self.model_path)
            self.load_state_dict({
                k: v
                for k, v in state_dict.items() if k in self.state_dict()
            })

            def set_bn_fix(m):
                classname = m.__class__.__name__
                if classname.find('BatchNorm') != -1:
                    for p in m.parameters():
                        p.requires_grad = False

            self.apply(set_bn_fix)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]
        else:
            raise ValueError(f"Block type {block} not understood")

        attention_output_dim = 256 if self.attention_type == 'product' else 512
        if self.attention_type == 'product':
            self.fpn = PyramidFeatures(
                fpn_sizes[0],
                fpn_sizes[1],
                fpn_sizes[2],
                feature_size=attention_output_dim)  # [512, 1024, 2048]
        else:
            self.fpn = PyramidFeatures(fpn_sizes[0] * 2,
                                       fpn_sizes[1] * 2,
                                       fpn_sizes[2] * 2,
                                       feature_size=attention_output_dim)

        self.regressionModel = RegressionModel(attention_output_dim)
        self.classificationModel = ClassificationModel(attention_output_dim,
                                                       num_classes=num_classes)
        self.anchors = Anchors([4, 5, 6, 7])
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = losses.FocalLoss()

        # weights initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        prior = 0.01
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
        self.freeze_bn()

        self.resnet_base = nn.Sequential(self.conv1, self.bn1, self.relu,
                                         self.maxpool)

        # querys, keys
        Q_list = []
        K_list = []
        self.d_k = 64
        self.fpn_dims = [512, 1024, 2048]
        for fpn_dim in self.fpn_dims:
            Q_weight = nn.Linear(fpn_dim, self.d_k)
            K_weight = nn.Linear(fpn_dim, self.d_k)
            init.normal_(Q_weight.weight, std=0.01)
            init.constant_(Q_weight.bias, 0)
            init.normal_(K_weight.weight, std=0.01)
            init.constant_(K_weight.bias, 0)
            Q_list.append(Q_weight)
            K_list.append(K_weight)
        self.pyramid_Q_layers = nn.ModuleList(Q_list)
        self.pyramid_K_layers = nn.ModuleList(K_list)
        if self.pos_encoding:
            pel_3 = PositionalEncoding(d_model=512, max_len=40 * 40)
            pel_4 = PositionalEncoding(d_model=1024, max_len=20 * 20)
            pel_5 = PositionalEncoding(d_model=2048, max_len=10 * 10)
        self.pos_encoding_layers = nn.ModuleList([pel_3, pel_4, pel_5])
Пример #40
0
    def __init__(
        self,
        input_size,
        hidden_size,
        kernel_size,
        groups,
        reset_gate=True,
        min_reset=0.0,
        update_gate=True,
        min_update=0.0,
        out_bias=True,
        out_act=None,
    ):
        super().__init__()
        padding = kernel_size // 2
        self.padding = padding
        self.input_size = input_size
        self.hidden_size = hidden_size
        if reset_gate:
            self.min_reset = min_reset
            self.reset_gate = nn.Conv2d(
                input_size + hidden_size,
                hidden_size,
                kernel_size,
                groups=groups,
                padding=padding,
            )
        else:
            self.reset_gate = None

        if update_gate:
            self.min_update = min_update
            self.update_gate = nn.Conv2d(
                input_size + hidden_size,
                hidden_size,
                kernel_size,
                groups=groups,
                padding=padding,
            )
        else:
            self.update_gate = None

        # self.out_gate = nn.Conv2d(
        #     input_size + hidden_size,
        #     hidden_size,
        #     kernel_size,
        #     groups=groups,
        #     padding=padding,
        #     bias=out_bias,
        # )

        W = torch.ones(hidden_size, hidden_size, kernel_size, kernel_size)
        self.out_weights = nn.Parameter(0.75 * W / input_size / kernel_size**2)

        if self.reset_gate:
            init.orthogonal_(self.reset_gate.weight)
            init.constant_(self.reset_gate.bias, 0.0)
        if self.update_gate:
            init.orthogonal_(self.update_gate.weight)
            init.constant_(self.update_gate.bias, 0.0)
        # init.orthogonal_(self.out_gate.weight)
        # eye = torch.eye(kernel_size, kernel_size).unsqueeze(0).unsqueeze(0)
        # init.constant_(self.out_gate.weight, eye)
        if out_bias:
            init.constant_(self.out_gate.bias, 0.0)

        if out_act is None:
            self.out_act = None
        elif out_act == "tanh":
            self.out_act = torch.tanh
        elif out_act == "leaky_relu":
            self.out_act = torch.leaky_relu
        else:
            raise NotImplementedError(out_act)
    def __init__(self, word_vec, class_num, pos_num, config):
        super().__init__()
        self.word_vec = word_vec
        self.class_num = class_num
        self.pos_num = pos_num

        # hyper parameters and others
        self.max_len = config.max_len
        self.word_dim = config.word_dim
        self.pos_dim = config.pos_dim
        self.pos_dis = config.pos_dis
        self.tag_dim = config.tag_dim

        self.dropout_value = config.dropout
        self.filter_num = config.filter_num
        self.window = config.window

        self.dim = self.word_dim + 2 * self.pos_dim + self.tag_dim

        # net structures and operations
        self.word_embedding = nn.Embedding.from_pretrained(
            embeddings=self.word_vec,
            freeze=False,
        )
        self.pos1_embedding = nn.Embedding(num_embeddings=2 * self.pos_dis + 3,
                                           embedding_dim=self.pos_dim)
        self.pos2_embedding = nn.Embedding(num_embeddings=2 * self.pos_dis + 3,
                                           embedding_dim=self.pos_dim)

        self.tag_embedding = nn.Embedding(num_embeddings=self.pos_num,
                                          embedding_dim=self.tag_dim)
        self.conv = nn.Conv2d(
            in_channels=1,
            out_channels=self.filter_num,
            kernel_size=(self.window, self.dim),
            stride=(1, 1),
            bias=True,
            padding=(1, 0),  # same padding
            padding_mode='zeros')
        self.maxpool = nn.MaxPool2d((self.max_len, 1))
        self.tanh = nn.Tanh()
        self.we = nn.Linear(in_features=self.dim * 2,
                            out_features=self.dim * 2,
                            bias=True)
        self.wa = nn.Linear(in_features=self.dim * 2,
                            out_features=1,
                            bias=True)
        self.dense = nn.Linear(in_features=self.filter_num + 2 * self.dim,
                               out_features=self.class_num,
                               bias=True)

        # initialize weight
        init.uniform_(self.pos1_embedding.weight, a=-0.1, b=0.1)
        init.uniform_(self.pos2_embedding.weight, a=-0.1, b=0.1)
        init.uniform_(self.tag_embedding.weight, a=-0.1, b=0.1)
        init.uniform_(self.conv.weight, a=-0.1, b=0.1)
        init.constant_(self.conv.bias, 0.)
        init.uniform_(self.we.weight, a=-0.1, b=0.1)
        init.constant_(self.we.bias, 0.)
        init.uniform_(self.wa.weight, a=-0.1, b=0.1)
        init.constant_(self.wa.bias, 0.)
        init.uniform_(self.dense.weight, a=-0.1, b=0.1)
        init.constant_(self.dense.bias, 0.)
Пример #42
0
def weights_init(m):
    classname = m.__class__.__name__
    if classname == 'Linear':
        init.xavier_uniform_(m.weight, gain=np.sqrt(2.0))
        if m.bias is not None:
            init.constant_(m.bias, 0.1)
Пример #43
0
 def init_weights(self):
     init.normal_(self.tgt_embedding.weight, std=0.01)
     init.normal_(self.fc.weight, std=0.01)
     init.constant_(self.fc.bias, 0)
Пример #44
0
 def _init_params(self):
     for name, module in self.named_modules():
         if isinstance(module, nn.Conv2d):
             init.kaiming_uniform_(module.weight)
             if module.bias is not None:
                 init.constant_(module.bias, 0)
 def _init_weights(self):
     init.normal_(self.cls_score.weight, std=0.01)
     init.constant_(self.cls_score.bias, 0)
     init.normal_(self.bbox_pred.weight, std=0.001)
     init.constant_(self.bbox_pred.bias, 0)
Пример #46
0
 def _set_init(self, layer):
     init.normal_(layer.weight, mean=0., std=.1)
     init.constant_(layer.bias, B_INIT)
Пример #47
0
 def init_parameter(self, parameter):
     init.constant_(parameter, val=self.val)
Пример #48
0
        y=self.linear(X)
        return y
net=LinearNet(num_inputs)
print(net)
#net=nn.Sequential(nn.Linear(num_inputs,1))
#net=nn.Sequential()
#net.add_module('linear',nn.Linear(num_inputs,1))
#from collections import OrderDict
#net=nn.Sequential(OrderDict(['linear',nn.Linear(num_inputs,1)]))
for param in net.parameters():
    print(param)

#初始化模型参数
from torch.nn import init
init.normal_(net.linear.weight,mean=0.0,std=0.01)
init.constant_(net.linear.bias,val=0.0)
for param in net.parameters():
    print(param)

#损失函数
loss=nn.MSELoss()
#优化算法
import torch.optim as optim
optimizer=optim.SGD(net.parameters(),lr=0.03)
print(optimizer)
# 为不同子网络设置不同的学习率
# optimizer =optim.SGD([
#                 # 如果对某个参数不指定学习率,就使用最外层的默认学习率
#                 {'params': net.subnet1.parameters()}, # lr=0.03
#                 {'params': net.subnet2.parameters(), 'lr': 0.01}
#             ], lr=0.03)
Пример #49
0
 def init_func(m):
     if isinstance(m, nn.Conv2d):
         mynn.init.XavierFill(m.weight)
         #mynn.init.MSRAFill(m.weight)
         if m.bias is not None:
             init.constant_(m.bias, 0)
Пример #50
0
net = nn.Sequential(
    OrderedDict([('linear', nn.Linear(num_inputs, 1))
                 # ......
                 ]))

print(net)
print(net[0])

for param in net.parameters():
    print(param)

# 初始化模型参数
from torch.nn import init

init.normal_(net[0].weight, mean=0.0, std=0.1)
init.constant_(net[0].bias, val=0.0)

for param in net.parameters():
    print(param)

# 定义损失函数
loss = nn.MSELoss()
# 定义优化算法
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.03)
print(optimizer)

# 为不同子网络设置不同的学习率
# optimizer =optim.SGD([
#                 # 如果对某个参数不指定学习率,就使用最外层的默认学习率
 def _init_weights(self):
     if cfg.MRCNN.CONV_INIT == 'GaussianFill':
         init.normal_(self.upconv5.weight, std=0.001)
     elif cfg.MRCNN.CONV_INIT == 'MSRAFill':
         mynn.init.MSRAFill(self.upconv5.weight)
     init.constant_(self.upconv5.bias, 0)
def constant_init(m):
    if isinstance(m, nn.Conv2d):
        init.constant_(m.weight.data, 0.01)
        m.bias.data.zero_()
Пример #53
0
def initialize(m):
    if isinstance(m, nn.Conv2d):
        init.xavier_normal_(m.weight)
        init.constant_(m.bias, 0)
    if isinstance(m, nn.ConvTranspose2d):
        init.xavier_normal_(m.weight)
Пример #54
0
 def weight_init(m):
     if isinstance(m, nn.Conv2d):
         init.xavier_normal_(m.weight)
         init.constant_(m.bias, 0)
 def _set_init(self, layer):
     init.normal_(layer.weight, mean=0., std=.1)
     init.constant_(layer.bias, B_INIT)
Пример #56
0
    def reset_params(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)

        resnet = ResNetIBN.__factory[self.depth](pretrained=self.pretrained)
        self.base[0].load_state_dict(resnet.conv1.state_dict())
        self.base[1].load_state_dict(resnet.bn1.state_dict())
        self.base[2].load_state_dict(resnet.relu.state_dict())
        self.base[3].load_state_dict(resnet.maxpool.state_dict())
        self.base[4].load_state_dict(resnet.layer1.state_dict())
        self.base[5].load_state_dict(resnet.layer2.state_dict())
        self.base[6].load_state_dict(resnet.layer3.state_dict())
        self.base[7].load_state_dict(resnet.layer4.state_dict())