def __init__(self, channel): super(ResidualModule, self).__init__() self.conv1 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=False) self.conv2 = nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=3, stride=1, padding=1, bias=False) self.bn = nn.InstanceNorm2d(channel, eps=1e-5, momentum=0.01, affine=True) self.cbam = CBAM(channel) self.relu = nn.ReLU(inplace=True) self.apply(weights_init)
def __init__(self, in_channel, out_channel): super(DeConvPitchPadding, self).__init__() self.deConv1 = nn.ConvTranspose2d(in_channels=in_channel, out_channels=out_channel, kernel_size=4, stride=2, padding=1, output_padding=(0, 1), bias=True) self.deConv2 = nn.ConvTranspose2d(in_channels=in_channel, out_channels=out_channel, kernel_size=4, stride=2, padding=1, output_padding=(0, 1), bias=True) self.conv = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) self.bn1 = nn.InstanceNorm2d(out_channel, eps=1e-5, momentum=0.01, affine=True) self.bn2 = nn.InstanceNorm2d(out_channel, eps=1e-5, momentum=0.01, affine=True) self.bn3 = nn.InstanceNorm2d(out_channel, eps=1e-5, momentum=0.01, affine=True) self.cbam1 = CBAM(out_channel) self.cbam2 = CBAM(out_channel) self.relu = nn.ReLU(inplace=True) self.apply(weights_init)
def __init__(self, layers): # [1024, 512, 256, 128, 64] super().__init__() self.leaky = nn.LeakyReLU(inplace=True) self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid() self.dropout = nn.Dropout(p=0.3) self.bar_linear = nn.Linear(1152 * 2, 1152) self.phrase_linear = nn.Linear(1152 * 2, 1152) self.time = TimePitchModule() self.pitch = PitchTimeModule() self.fit1 = nn.Conv2d(in_channels=2048, out_channels=1024, kernel_size=1, stride=1, bias=False) self.bn = nn.InstanceNorm2d(1024, eps=1e-5, momentum=0.01, affine=True) self.fit2 = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1, stride=1, bias=False) self.layers = [] for i in range(1, len(layers)): if i < 3: self.layers.append(DeConvPitchPadding(layers[i - 1], layers[i])) else: self.layers.append(DeConvModule(layers[i - 1], layers[i])) self.layers = nn.ModuleList(self.layers) self.cbam = CBAM(1024) self.position_embedding = nn.Embedding(332, 1152) nn.init.uniform_(self.position_embedding.weight, -1.0, 1.0) self.apply(weights_init)
def __init__(self): super(PitchTimeModule, self).__init__() self.pitch = nn.ConvTranspose2d(in_channels=2304, out_channels=1024, kernel_size=(1, 3), stride=(1, 3), bias=False) self.time = nn.ConvTranspose2d(in_channels=1024, out_channels=1024, kernel_size=(6, 1), stride=(6, 1), bias=False) self.bn = nn.InstanceNorm2d(1024, eps=1e-5, momentum=0.01, affine=True) self.cbam = CBAM(1024) self.relu = nn.ReLU(inplace=True) self.apply(weights_init)
def __init__(self): super(TimePitchModule, self).__init__() self.time = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(4, 1), stride=(2, 1), padding=[1, 0], bias=False) self.pitch = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(1, 4), stride=(1, 2), padding=[0, 1], bias=False) self.bn = nn.InstanceNorm2d(32, eps=1e-5, momentum=0.01, affine=True) self.cbam = CBAM(32) self.leaky = nn.LeakyReLU(inplace=True) self.apply(weights_init)