def __init__(self, channels, ker, cardinality=1, dropRate=0): super(ResNeXtBottleNeck_2d, self).__init__() inter_channels = int(channels / 2) self.conv_reduce = nn.Conv2d(channels, inter_channels, kernel_size=1, groups=cardinality, bias=False) self.bn_reduce = nn.BatchNorm2d(inter_channels) self.conv_conv = nn.Conv2d(inter_channels, inter_channels, kernel_size=(ker, ker), padding=(get_padding(ker), get_padding(ker)), groups=cardinality, bias=False) self.bn_conv = nn.BatchNorm2d(inter_channels) self.conv_expand = nn.Conv2d(inter_channels, channels, kernel_size=1, bias=False, groups=cardinality) self.bn_expand = nn.BatchNorm2d(channels) self.relu = nn.ReLU(inplace=True) self.dropout = nn.Dropout(p=dropRate)
def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): super(DiscriminatorP, self).__init__() self.period = period norm_f = weight_norm if use_spectral_norm == False else spectral_norm self.convs = nn.ModuleList([ norm_f( Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), norm_f( Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), norm_f( Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), norm_f( Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))), ]) self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
def __init__(self, planes, ker, n_layers, decode=False, dropRate=0): super(BasicResBlock, self).__init__() self.relu = nn.ReLU(inplace=True) layers = [] for i in range(n_layers): if decode: layers.append( nn.ConvTranspose1d(planes, planes, ker, padding=get_padding(ker), bias=False)) else: layers.append( nn.Conv1d(planes, planes, ker, padding=get_padding(ker), bias=False)) layers.append(nn.BatchNorm1d(planes)) if i < (n_layers - 1): layers.append(nn.ReLU(inplace=True)) layers.append(nn.Dropout(p=dropRate)) self.main_conv = nn.Sequential(*layers)
def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)): super(ResBlock2, self).__init__() self.h = h self.convs = nn.ModuleList([ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], padding=get_padding(kernel_size, dilation[0]))), weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], padding=get_padding(kernel_size, dilation[1]))) ]) self.convs.apply(init_weights)
def encode(self, inputs, attention_bias, training): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length]. training: boolean, whether in training mode or not. Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) embedded_inputs = tf.cast(embedded_inputs, self.params["dtype"]) inputs_padding = model_utils.get_padding(inputs) attention_bias = tf.cast(attention_bias, self.params["dtype"]) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) pos_encoding = tf.cast(pos_encoding, self.params["dtype"]) encoder_inputs = embedded_inputs + pos_encoding if training: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding, training=training)
def predict(options, img_read_path, img_write_path): # Read image content = process_image(img_read_path, -1, -1, resize=False) ori_height = content.shape[1] ori_width = content.shape[2] # Pad image content = get_padding(content) height = content.shape[1] width = content.shape[2] # Get eval model eval_model = get_evaluate_model(width, height) eval_model.load_weights(options['weights_read_path']) # If flag is set, print model summary and generate model description if options["plot_model"]: eval_model.summary() plot_model(eval_model, to_file='model.png') # Generate output and save image res = eval_model.predict([content]) output = deprocess_image(res[0], width, height) output = remove_padding(output, ori_height, ori_width) imwrite(img_write_path, output)
def forward(self, *input): x1 = self.pre_conv1(input[0]) x2 = self.pre_conv2(input[1]) x3 = self.pre_conv3(input[2]) padding, out_padding = utils.get_padding(x1.size(), x2.size(), 2) x2 = F.conv_transpose2d(x2, self.deconv_weight1, stride=2, padding=padding, output_padding=out_padding) padding, out_padding = utils.get_padding(x1.size(), x3.size(), 4) x3 = F.conv_transpose2d(x3, self.deconv_weight2, stride=4, padding=padding, output_padding=out_padding) return x1 + x2 + x3
def forward(self, *input): x1 = self.pre_conv1(input[0]) x2 = self.pre_conv2(input[1]) if self.factor > 1: padding, out_padding = utils.get_padding(x1.size(), x2.size(), self.factor) x2 = F.conv_transpose2d(x2, self.deconv_weight, stride=self.factor, padding=padding, output_padding=out_padding) return x1 + x2
def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)): super(ResBlock1, self).__init__() self.lrelu_slope = LRELU_SLOPE self.h = h self.convs1 = nn.ModuleList([ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], padding=get_padding(kernel_size, dilation[0]))), weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], padding=get_padding(kernel_size, dilation[1]))), weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], padding=get_padding(kernel_size, dilation[2]))) ]) self.convs1.apply(init_weights) self.convs2 = nn.ModuleList([ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))), weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))), weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))) ]) self.convs2.apply(init_weights)
def __init__(self, cfg): super(SimpleVae, self).__init__(cfg) self.enc_conv_1 = nn.Sequential( nn.Conv2d(1, cfg["enc_conv_1_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["enc_conv_1_ch"]), nn.Dropout(p=cfg["dropRate"])) self.enc_conv_2 = nn.Sequential( nn.Conv2d(cfg["enc_conv_1_ch"], cfg["enc_conv_2_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["enc_conv_2_ch"]), nn.Dropout(p=cfg["dropRate"])) self.enc_conv_4 = nn.Sequential( nn.Conv2d(cfg["enc_conv_2_ch"], cfg["enc_conv_4_ch"] * 2, (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["enc_conv_4_ch"] * 2), nn.Dropout(p=cfg["dropRate"])) self.ds_2_2 = nn.MaxPool2d((cfg["ds_ratio"], cfg["ds_ratio"])) self.n_recording_chan_1 = cfg["n_channels"] self.n_recording_chan_2 = np.floor( (self.n_recording_chan_1 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.n_recording_chan_3 = np.floor( (self.n_recording_chan_2 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.n_recording_chan_4 = np.floor( (self.n_recording_chan_3 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.spk_length_1 = cfg["spk_length"] self.spk_length_2 = np.floor( (self.spk_length_1 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.spk_length_3 = np.floor( (self.spk_length_2 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.spk_length_4 = np.floor( (self.spk_length_3 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.dec_conv_1 = nn.Sequential( nn.Conv2d(cfg["dec_conv_1_ch"], 1, (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), ) self.dec_conv_2 = nn.Sequential( nn.Conv2d(cfg["dec_conv_2_ch"], cfg["dec_conv_1_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["dec_conv_1_ch"]), nn.Dropout(p=cfg["dropRate"])) self.dec_conv_4 = nn.Sequential( nn.Conv2d(cfg["dec_conv_4_ch"], cfg["dec_conv_2_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["dec_conv_2_ch"]), nn.Dropout(p=cfg["dropRate"])) # move model to GPU if torch.cuda.is_available(): self.cuda() # optimizer self.optimizer = optim.Adam(self.parameters(), lr=self.cfg["learn_rate"], weight_decay=self.cfg["weight_decay"], amsgrad=True)
def __init__(self, cfg): super(resnet_2d_vae_v2, self).__init__(cfg) self.enc_conv_1 = nn.Sequential( nn.Conv2d(1, cfg["enc_conv_1_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["enc_conv_1_ch"]), nn.Dropout(p=cfg["dropRate"])) self.enc_conv_2 = nn.Sequential( nn.Conv2d(cfg["enc_conv_1_ch"], cfg["enc_conv_2_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["enc_conv_2_ch"]), nn.Dropout(p=cfg["dropRate"])) self.enc_conv_3 = nn.Sequential( nn.Conv2d(cfg["enc_conv_2_ch"], cfg["enc_conv_3_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["enc_conv_3_ch"]), nn.Dropout(p=cfg["dropRate"])) self.enc_conv_4 = nn.Sequential( nn.Conv2d(cfg["enc_conv_3_ch"], cfg["enc_conv_4_ch"] * 2, (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["enc_conv_4_ch"] * 2), nn.Dropout(p=cfg["dropRate"])) self.ds_2_2 = nn.MaxPool2d((cfg["ds_ratio"], cfg["ds_ratio"])) self.n_recording_chan_1 = cfg["n_channels"] self.n_recording_chan_2 = np.floor( (self.n_recording_chan_1 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.n_recording_chan_3 = np.floor( (self.n_recording_chan_2 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.n_recording_chan_4 = np.floor( (self.n_recording_chan_3 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.spk_length_1 = cfg["spk_length"] self.spk_length_2 = np.floor( (self.spk_length_1 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.spk_length_3 = np.floor( (self.spk_length_2 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.spk_length_4 = np.floor( (self.spk_length_3 + 2 * get_padding(cfg["ds_ratio"]) - cfg["ds_ratio"]) / cfg["ds_ratio"]).astype('int') + 1 self.dec_conv_1 = nn.Sequential( nn.Conv2d(cfg["dec_conv_1_ch"], 1, (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), ) self.dec_conv_2 = nn.Sequential( nn.Conv2d(cfg["dec_conv_2_ch"], cfg["dec_conv_1_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["dec_conv_1_ch"]), nn.Dropout(p=cfg["dropRate"])) self.dec_conv_3 = nn.Sequential( nn.Conv2d(cfg["dec_conv_3_ch"], cfg["dec_conv_2_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["dec_conv_2_ch"]), nn.Dropout(p=cfg["dropRate"])) self.dec_conv_4 = nn.Sequential( nn.Conv2d(cfg["dec_conv_4_ch"], cfg["dec_conv_3_ch"], (cfg["conv_ker"], cfg["conv_ker"]), groups=1, padding=(get_padding(cfg["conv_ker"]), get_padding(cfg["conv_ker"])), bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(cfg["dec_conv_3_ch"]), nn.Dropout(p=cfg["dropRate"])) self.resx_3 = ResNeXtBottleNeck_2d( cfg["dec_conv_3_ch"], cfg["conv_ker"], cardinality=int(cfg["dec_conv_3_ch"] / cfg["cardinality_factor"]), dropRate=cfg["dropRate"]) self.resnet_3 = BasicResBlock_2d(cfg["dec_conv_3_ch"], cfg["conv_ker"], n_layers=2, decode=True, dropRate=cfg["dropRate"]) # move model to GPU if torch.cuda.is_available(): self.cuda() # optimizer self.optimizer = optim.Adam(self.parameters(), lr=self.cfg["learn_rate"], weight_decay=self.cfg["weight_decay"], amsgrad=True)
def train(self): #Do the training print("Training on device: ",self.device) start_time=time.time() track_step=50 running_loss=0 for epoch in range(self.EPOCHS): self.model.train() steps=0 display_header(epoch,self.EPOCHS) for images,labels in self.train_loader: images=images.to(self.device) labels=labels.to(self.device) steps+=1 self.optimizer.zero_grad() output = self.model.forward(images) loss = self.criterion(output, labels) loss.backward() self.optimizer.step() running_loss+=loss.item() if steps%track_step==0: self.model.eval() with torch.no_grad(): val_loss, accuracy = self.validation(self.model, self.val_loader, self.criterion) pad=get_padding(steps) print(f" {epoch+1} ", # epoch f" {pad}{steps}/{len(self.train_loader)} ", # step f" {running_loss/track_step :.2f} ",# training loss f" {val_loss/len(self.test_loader):.2f} ",# validation loss f" {accuracy/len(self.test_loader):.2f} ")# validation accuracy draw_line() self.model.train() running_loss=0 displayDuration("Epoch "+str(epoch+1),start_time) displayDuration("Training ",start_time) #Save Checkpoint self.model.class_to_idx = self.train_set.class_to_idx checkpoint = {'name':'Flower Classifier Model', 'epoch':self.EPOCHS, 'optimizer': self.optimizer.state_dict(), 'input_size': self.input_size, 'output_size': 102, 'pretrained_arch': 'vgg19', 'learning_rate': self.learning_rate, 'batch_size': self.batch_size, 'classifier': self.model.classifier, 'class_to_idx': self.model.class_to_idx, 'state_dict':self.model.state_dict()} torch.save(checkpoint, self.save_dir+"/"+self.checkpoint)
def __init__(self, cfg): super(Vae, self).__init__(cfg) # encoder self.res0 = make_layers(cfg["n_channels"], cfg["conv1_ch"], cfg["conv0_ker"], n_layers=1, cardinality=1, dropRate=0) self.resx = ResNeXtBottleNeck(cfg["conv1_ch"], cfg["conv1_ker"], cardinality=cfg["cardinality"], dropRate=cfg["dropRate"]) self.res2 = nn.Sequential( nn.Conv1d(cfg["conv1_ch"], cfg["conv2_ch"], cfg["conv2_ker"], groups=1, padding=get_padding(cfg["conv2_ker"]), bias=False), nn.BatchNorm1d(cfg["conv2_ch"]), nn.Dropout(p=cfg["dropRate"])) self.enc_mu = nn.Linear(in_features=int( cfg["conv2_ch"] * cfg["spk_length"] / cfg["ds_ratio_tot"]), out_features=cfg["latent_dim"]) self.enc_log_var = nn.Linear(in_features=int( cfg["conv2_ch"] * cfg["spk_length"] / cfg["ds_ratio_tot"]), out_features=cfg["latent_dim"]) # decoder self.dec_linear = nn.Linear( in_features=cfg["latent_dim"], out_features=int(cfg["conv2_ch"] * cfg["spk_length"] / cfg["ds_ratio_tot"])) self.deres2 = make_layers(cfg["conv2_ch"], cfg["conv1_ch"], cfg["conv2_ker"], n_layers=1, decode=True, dropRate=cfg["dropRate"]) self.deres1 = BasicResBlock(cfg["conv1_ch"], cfg["conv1_ker"], n_layers=2, decode=True, dropRate=cfg["dropRate"]) self.deres0 = nn.ConvTranspose1d(cfg["conv1_ch"], cfg["n_channels"], cfg["conv0_ker"], padding=get_padding(cfg["conv0_ker"])) # down sampling layers self.ds1 = nn.MaxPool1d(cfg["ds_ratio_1"]) self.ds2 = nn.MaxPool1d(cfg["ds_ratio_2"]) # move model to GPU if torch.cuda.is_available(): self.cuda() # optimizer self.optimizer = optim.Adam(self.parameters(), lr=self.cfg["learn_rate"], weight_decay=self.cfg["weight_decay"], amsgrad=True) self.unique_labels = [] self.target_means = []