def init_model(self): # Get Initial states for LSTM from the first image and its masked lstm_initializer = LSTM_initializer() lstm_initializer.build(self.input_images_initializer) h_0 = lstm_initializer.h0 c_0 = lstm_initializer.c0 # Get 7 frames and feed them to Encoder tmp_shape = self.input_image_encoder.shape input_image_encoder_unstacked = tf.reshape(self.input_image_encoder, [ tmp_shape[0] * tmp_shape[1], tmp_shape[2], tmp_shape[3], tmp_shape[4] ]) encoder = Encoder() encoder.build(input_image_encoder_unstacked) encoder_output = encoder.conv6 # This will be the set of B batches and F frames to be fed to ConvLSTM encoder_output_stacked = tf.reshape(encoder_output, [ self.input_image_encoder.shape[0], self.input_image_encoder.shape[1], encoder_output.shape[1], encoder_output.shape[2], encoder_output.shape[3] ]) # Feed the output of encoder to ConvLSTM conv_lstm = Unrolled_convLSTM() conv_lstm.build(encoder_output_stacked, c_0, h_0) lstm_output = conv_lstm.lstm_output # This will be fed to decoder lstm_output_unstacked = tf.reshape( lstm_output, (lstm_output.shape[0] * lstm_output.shape[1], lstm_output.shape[2], lstm_output.shape[3], lstm_output.shape[4])) # Feed the output of ConvLSTM to decoder decoder = Decoder() decoder.build(lstm_output_unstacked) decoder_output = decoder.y_hat mask_output = decoder.mask_out self.decoder_output_unstacked = tf.reshape( decoder_output, (lstm_output.shape[0], lstm_output.shape[1], decoder_output.shape[1], decoder_output.shape[2], decoder_output.shape[3])) self.mask_output_unstacked = tf.reshape( mask_output, self.decoder_output_unstacked.shape)
def __init__(self, embedding_dim=256, linear_dim=1025, mel_dim=80, r=5, padding_idx=None): super(Tacotron, self).__init__() self.mel_dim = mel_dim self.embedding = Embedding(num_embeddings=len(symbols), embedding_dim=embedding_dim, padding_idx=padding_idx) self.embedding.weight.data.normal_(mean=0, std=0.3) self.encoder = Encoder(in_features=embedding_dim) self.decoder = Decoder(in_features=256, memory_dim=mel_dim, r=r) self.postnet = CBHG(sample_size=mel_dim, conv_bank_max_filter_size=8, conv_projections_channel_size=[256, mel_dim], num_highways=4) self.last_linear = Linear(in_features=(mel_dim * 2), out_features=linear_dim)
def __init__(self): super(Merlin, self).__init__( z_network=Z_network(), predictor=Predictor(), #TODO: hは2層? decoder=Decoder(), policy=Policy(), memory=Memory(), ) # module間の処理はmerlinがやる.memory以外のmodule間で共有する変数はmerlinが持つ. self.optimizer = optimizers.Adam() self.optimizer.setup(self)
def __init__(self): super(Merlin, self).__init__( z_network=Z_network(), predictor=Predictor(), #TODO: h outputs 2 layers [h1, h2] decoder=Decoder(), policy=Policy(), memory=Memory(), ) self.optimizer = optimizers.Adam() self.optimizer.setup(self) self.mbp_loss_log = [] self.policy_loss_log = []
mask_transformation = transforms.Compose([transforms.Resize(resize_dim), transforms.ToTensor()]) ytvos = YouTubeVOSLoader(root = root_data_dir, mode = mode, fraction = fraction, image_transformation = image_transformation, mask_transformation = mask_transformation, num_frames = NUM_FRAMES) ytvos = DataLoader(ytvos, batch_size = batch_size, shuffle = shuffle_data, num_workers = num_workers) num_batches = len(ytvos) ###### MODEL OBJECTS ###### encoded_h = int(resize_dim[0] / 32) encoded_w = int(resize_dim[1] / 32) initializer = Initializer().to(device) encoder = Encoder().to(device) convlstmcell = ConvLSTMCell(height = encoded_h, width = encoded_w).to(device) decoder = Decoder(input_res = (encoded_h, encoded_w), output_res = resize_dim).to(device) cost_fn = nn.BCELoss() optimizer = torch.optim.Adam(list(initializer.parameters()) + list(encoder.parameters()) + list(convlstmcell.parameters()) + list(decoder.parameters()), lr = lr) ########################### iter_count = -1 try: os.makedirs(save_models_dir, exist_ok = True) except: pass import matplotlib matplotlib.use('pdf')
num_batches = len(ytvos) ###### MODEL OBJECTS ###### encoded_h = int(resize_dim[0] / 32) encoded_w = int(resize_dim[1] / 32) initializer = Initializer().to(device) encoder = Encoder().to(device) convlstmcell_encoder = ConvLSTMCell(height=encoded_h, width=encoded_w).to(device) convlstmcell_decoder = ConvLSTMCell(channels=512 * 2, height=encoded_h, width=encoded_w).to(device) decoder = Decoder(input_channels=512 * 2, input_res=(encoded_h, encoded_w), output_res=resize_dim).to(device) cost_fn = nn.BCELoss() optimizer = torch.optim.Adam( list(initializer.parameters()) + list(encoder.parameters()) + list(convlstmcell_encoder.parameters()) + list(convlstmcell_decoder.parameters()) + list(decoder.parameters()), lr=lr) ########################### iter_count = -1 try: os.makedirs(save_models_dir, exist_ok=True) except:
ytvos = ValidationYouTubeVOSLoader(root = root_data_dir, mode = mode, fraction = fraction, image_transformation = image_transformation, mask_transformation = mask_transformation) data_loader = DataLoader(ytvos, batch_size = batch_size, shuffle = shuffle_data, num_workers = num_workers) num_val_images = len(data_loader) ###### MODEL OBJECTS ###### encoded_h = int(resize_dim[0] / 32) encoded_w = int(resize_dim[1] / 32) initializer = Initializer().to(device) encoder = Encoder().to(device) convlstmcell = ConvLSTMCell(height = encoded_h, width = encoded_w).to(device) decoder = Decoder(input_res = (encoded_h, encoded_w), output_res = resize_dim).to(device) checkpoint = torch.load(saved_model_path) initializer.load_state_dict(checkpoint['initializer']) encoder.load_state_dict(checkpoint['encoder']) convlstmcell.load_state_dict(checkpoint['convlstmcell']) decoder.load_state_dict(checkpoint['decoder']) initializer.eval() encoder.eval() convlstmcell.eval() decoder.eval() ###########################
def main(): args = Arg() global device device = torch.device( "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu") nas_dir = make_dir( '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/results', format(args.dataset)) nas_model_dir = '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/models' if args.network == 'unet': nas_dir = make_dir(nas_dir, 'unet_{}'.format(args.data_root)) nas_dir = make_dir( nas_dir, 'unet_encoderfix:{}'.format(args.encoder_parameter_fix)) else: nas_dir = make_dir(nas_dir, '{}_{}'.format(args.network, args.data_root)) nas_dir = make_dir( nas_dir, '{}_supix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_head:{}' .format(args.network, args.superpix_number, args.n_hid, args.n_layer, args.encoder_parameter_fix, args.concat, args.use_gnn_encoder, args.use_gnn_parameter, args.attention_head)) if args.multi_try > 1: nas_dir = make_dir(nas_dir, 'multi_try') image_transforms = { 'train': transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } label_transforms = { 'train': transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), ]), 'val': transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), ]), } n_class = 1 if args.dataset == 'fss': if args.data_root == 'local': FSS_dir = '../datasets/few_shot_seg_1000/fewshot_data' # dataset = {x: FSS_Dataset(FSS_dir, mode=x, image_transform=image_transforms[x], label_transform=label_transforms[x]) for x in ['train', 'test']} dataset = { x: FSS_Dataset(FSS_dir, n_superpix=args.superpix_number, mode=x) for x in ['train', 'test'] } elif args.dataset == 'catdog': if args.data_root == 'local': # catdog_dir = '../datasets/catdog' catdog_dir = '../datasets/split_catdog' # catdog_dir = '../datasets/simplesplit_catdog' dataset = { x: catdog_Dataset(catdog_dir, n_superpix=args.superpix_number, mode=x, image_transform=image_transforms[x], label_transform=label_transforms[x]) for x in ['train', 'val'] } elif args.data_root == 'nas': catdog_dir = '/media/NAS/nas_187/soopil/data/catdog_superpix' elif args.dataset == 'simplecatdog': catdog_dir = '../datasets/simplesplit_catdog' dataset = { x: catdog_Dataset(catdog_dir, n_superpix=args.superpix_number, mode=x, image_transform=image_transforms[x], label_transform=label_transforms[x]) for x in ['train', 'val'] } elif args.dataset == 'city': city_dir = '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/datasets/cityscapes/for_graph_resize' dataset = { x: city_Dataset(city_dir, n_superpix=args.superpix_number, mode=x, image_transform=image_transforms[x], label_transform=label_transforms[x]) for x in ['train', 'val'] } n_class = 19 dataloader = { x: torch.utils.data.DataLoader(dataset[x], batch_size=1, shuffle=True, num_workers=4) for x in ['train', 'val'] } pretrained_path = './pretrained_model/vgg16-397923af.pth' # save_root = os.path.join('models', args.data_root, args.network, args.dataset) save_root = make_dir(nas_model_dir, args.data_root) save_root = make_dir(save_root, args.network) save_root = make_dir(save_root, args.dataset) if args.multi_try > 1: save_root = make_dir(save_root, 'multi_try') for i in range(args.multi_try): if args.network == 'unet': save_path = make_dir( save_root, 'encoderfix:{}_iter:{}'.format(args.encoder_parameter_fix, i)) encoder = Encoder(pretrained_path, device, args.network, parameter_fix=args.encoder_parameter_fix) decoder = Decoder(output_channel=n_class).to(device) optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=0.01, weight_decay=5e-4) criterion = nn.BCELoss() encoder, decoder = train_unet(encoder, decoder, dataloader, optimizer, criterion, nas_dir, device, i, epochs=args.epochs) torch.save( encoder.state_dict(), os.path.join( save_path, 'encoder_encoderfix:{}_{}.pth'.format( args.encoder_parameter_fix, i))) torch.save( decoder.state_dict(), os.path.join( save_path, 'decoder_{}_{}.pth'.format(args.encoder_parameter_fix, i))) else: save_path = make_dir( save_root, 'superpix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_head:{}_iter:{}' .format(args.superpix_number, args.n_hid, args.n_layer, args.encoder_parameter_fix, args.concat, args.use_gnn_encoder, args.use_gnn_parameter, args.attention_head, i)) encoder_path = os.path.join( save_path, 'encoder.pth'.format(args.superpix_number, args.encoder_parameter_fix, args.concat, i)) gnn_path = os.path.join(save_path, '{}.pth'.format(args.network)) encoder = Encoder(pretrained_path, device, args.network, parameter_fix=args.encoder_parameter_fix) if args.use_gnn_encoder: gnn_encoder_path = os.path.join( save_root.replace(args.network, 'gnn'), 'superpix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_iter:{}' .format(args.superpix_number, args.n_hid, args.n_layer, args.encoder_parameter_fix, args.concat, False, False, i), 'encoder.pth') encoder.load_state_dict(torch.load(gnn_encoder_path)) if args.network == 'gnn': gnn = GNN(nfeat=512, nhid=args.n_hid, nclass=n_class, dropout=0.5, n_layer=args.n_layer, concat=args.concat).to(device) elif args.network == 'gcn': gnn = GCN(nfeat=512, nhid=args.n_hid, nclass=n_class, dropout=0.5, n_layer=args.n_layer, concat=args.concat).to(device) elif 'gat' in args.network: gnn = GAT(nfeat=512, nhid=args.n_hid, nclass=n_class, dropout=0.5, nheads=args.attention_head, alpha=0.2, n_layer=args.n_layer, concat=args.concat, gatType=args.network).to(device) optimizer = optim.Adam(list(encoder.parameters()) + list(gnn.parameters()), lr=0.01, weight_decay=5e-4) if args.dataset == 'city': criterion = nn.CrossEntropyLoss() else: criterion = nn.BCELoss() if 'gat' in args.network: encoder, gnn = train_gat(encoder, gnn, dataloader, optimizer, criterion, nas_dir, device, i, epochs=args.epochs, concat=args.concat, network=args.network) else: encoder, gnn = train_gnn(encoder, gnn, dataloader, optimizer, criterion, nas_dir, device, i, epochs=args.epochs, concat=args.concat) torch.save(encoder.state_dict(), encoder_path) torch.save(gnn.state_dict(), gnn_path)
def main(): args = Arg() global device device = torch.device( "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu") nas_dir = '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/results/{}'.format( args.dataset) nas_model_dir = '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/models' if args.network == 'unet': nas_dir = make_dir(nas_dir, 'unet_{}'.format(args.data_root)) nas_dir = make_dir( nas_dir, 'unet_encoderfix:{}'.format(args.encoder_parameter_fix)) else: nas_dir = make_dir(nas_dir, '{}_{}'.format(args.network, args.data_root)) nas_dir = make_dir( nas_dir, '{}_supix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_head:{}_gft:{}_epochs:{}' .format(args.network, args.superpix_number, args.n_hid, args.n_layer, args.encoder_parameter_fix, args.concat, args.use_gnn_encoder, args.use_gnn_parameter, args.attention_head, args.graph_feature, args.epochs)) if args.multi_try > 1: nas_dir = make_dir(nas_dir, 'multi_try') print('result dir') print(nas_dir) image_transforms = { 'train': transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } label_transforms = { 'train': transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), ]), 'test': transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), ]), } if args.dataset == 'fss': FSS_dir = '../datasets/few_shot_seg_1000/fewshot_data' dataset = FSS_Dataset(FSS_dir, n_superpix=args.superpix_number, mode='test', image_transform=image_transforms['test'], label_transform=label_transforms['test']) elif args.dataset == 'catdog': # catdog_dir = '../datasets/catdog' catdog_dir = '../datasets/split_catdog' dataset = catdog_Dataset(catdog_dir, net=args.network, n_superpix=args.superpix_number, mode='test', image_transform=image_transforms['test'], label_transform=label_transforms['test']) elif args.dataset == 'simplecatdog': # catdog_dir = '../datasets/catdog' catdog_dir = '../datasets/simplesplit_catdog' dataset = catdog_Dataset(catdog_dir, net=args.network, n_superpix=args.superpix_number, mode='test', image_transform=image_transforms['test'], label_transform=label_transforms['test']) # catdog_Dataset(catdog_dir, n_superpix=args.superpix_number, mode=x, image_transform=image_transforms[x], label_transform=label_transforms[x]) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False) save_root = os.path.join(nas_model_dir, args.data_root, args.network + '_epoch:{}'.format(args.epochs), args.dataset) if args.multi_try > 1: save_root = make_dir(save_root, 'multi_try') result_txt = os.path.join(nas_dir, 'total_result.txt') f = open(result_txt, 'w') pretrained_path = './pretrained_model/vgg16-397923af.pth' whole_dsc = 0.0 whole_loss = 0.0 for i in range(args.multi_try): if args.network == 'unet': save_path = make_dir( save_root, 'encoderfix:{}_iter:{}'.format(args.encoder_parameter_fix, i)) encoder_path = os.path.join( save_path, 'encoder_encoderfix:{}_{}.pth'.format( args.encoder_parameter_fix, i)) decoder_path = os.path.join( save_path, 'decoder_{}_{}.pth'.format(args.encoder_parameter_fix, i)) encoder = Encoder(pretrained_path, device, args.network) decoder = Decoder().to(device) encoder.load_state_dict(torch.load(encoder_path)) decoder.load_state_dict(torch.load(decoder_path)) criterion = nn.BCELoss() total_dsc, total_loss = test_unet(encoder, decoder, dataloader, criterion, nas_dir, device, i) else: save_path = make_dir( save_root, 'superpix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_head:{}_gft:{}_iter:{}' .format(args.superpix_number, args.n_hid, args.n_layer, args.encoder_parameter_fix, args.concat, args.use_gnn_encoder, args.use_gnn_parameter, args.attention_head, args.graph_feature, i)) print('model path') print(save_path) encoder_path = os.path.join(save_path, 'encoder.pth') encoder = Encoder(pretrained_path, device, args.network) gnn_path = os.path.join(save_path, '{}.pth'.format(args.network)) if args.network == 'gnn': gnn = GNN(nfeat=512, nhid=args.n_hid, nclass=1, dropout=0.5, n_layer=args.n_layer, concat=args.concat).to(device) elif args.network == 'gcn': gnn = GCN(nfeat=512, nhid=args.n_hid, nclass=1, dropout=0.5, n_layer=args.n_layer, concat=args.concat).to(device) elif 'gat' in args.network: gnn = GAT(nfeat=512, nhid=args.n_hid, nclass=1, dropout=0.5, nheads=args.attention_head, alpha=0.2, n_layer=args.n_layer, concat=args.concat, gatType=args.network).to(device) elif 'gunet' in args.network: gnn = GraphUnet(nfeat=512, nhid=args.n_hid, nclass=1, dropout=0.5, alpha=0.2, n_layer=args.n_layer, concat=args.concat).to(device) encoder.load_state_dict( torch.load(encoder_path, map_location='cuda:0')) gnn.load_state_dict(torch.load(gnn_path, map_location='cuda:0')) criterion = nn.BCELoss() if 'gunet' in args.network: # print('gunet start!') total_dsc, total_loss = test_gunet(encoder, gnn, dataloader, criterion, nas_dir, device, i, concat=args.concat, network=args.network) elif 'gat' in args.network: total_dsc, total_loss = test_gat( encoder, gnn, dataloader, criterion, nas_dir, device, i, concat=args.concat, network=args.network, graph_feature=args.graph_feature) else: total_dsc, total_loss = test_gnn(encoder, gnn, dataloader, criterion, nas_dir, device, i, concat=args.concat) if args.multi_try > 1: whole_dsc += total_dsc whole_loss += total_loss f.write('[{}] dsc: {:.4f}'.format(i, whole_dsc / args.multi_try)) if args.multi_try > 1: f.write('all average dsc: {:.4f}'.format(whole_dsc / args.multi_try))
def test_forward(self): net = Decoder() z = make_sample_input(1, Z_DIM) log_pi = make_sample_input(1, A_DIM) a = make_sample_input(1, A_DIM) net(z, log_pi, a)