示例#1
0
    def init_model(self):
        # Get Initial states for LSTM from the first image and its masked
        lstm_initializer = LSTM_initializer()
        lstm_initializer.build(self.input_images_initializer)
        h_0 = lstm_initializer.h0
        c_0 = lstm_initializer.c0

        # Get 7 frames and feed them to Encoder
        tmp_shape = self.input_image_encoder.shape
        input_image_encoder_unstacked = tf.reshape(self.input_image_encoder, [
            tmp_shape[0] * tmp_shape[1], tmp_shape[2], tmp_shape[3],
            tmp_shape[4]
        ])

        encoder = Encoder()
        encoder.build(input_image_encoder_unstacked)
        encoder_output = encoder.conv6

        # This will be the set of B batches and F frames to be fed to ConvLSTM
        encoder_output_stacked = tf.reshape(encoder_output, [
            self.input_image_encoder.shape[0],
            self.input_image_encoder.shape[1], encoder_output.shape[1],
            encoder_output.shape[2], encoder_output.shape[3]
        ])

        # Feed the output of encoder to ConvLSTM
        conv_lstm = Unrolled_convLSTM()
        conv_lstm.build(encoder_output_stacked, c_0, h_0)
        lstm_output = conv_lstm.lstm_output

        # This will be fed to decoder
        lstm_output_unstacked = tf.reshape(
            lstm_output,
            (lstm_output.shape[0] * lstm_output.shape[1], lstm_output.shape[2],
             lstm_output.shape[3], lstm_output.shape[4]))

        # Feed the output of ConvLSTM to decoder
        decoder = Decoder()
        decoder.build(lstm_output_unstacked)
        decoder_output = decoder.y_hat
        mask_output = decoder.mask_out

        self.decoder_output_unstacked = tf.reshape(
            decoder_output, (lstm_output.shape[0], lstm_output.shape[1],
                             decoder_output.shape[1], decoder_output.shape[2],
                             decoder_output.shape[3]))
        self.mask_output_unstacked = tf.reshape(
            mask_output, self.decoder_output_unstacked.shape)
示例#2
0
 def __init__(self, embedding_dim=256, linear_dim=1025, mel_dim=80, r=5, padding_idx=None):
     super(Tacotron, self).__init__()
     self.mel_dim = mel_dim
     self.embedding = Embedding(num_embeddings=len(symbols), embedding_dim=embedding_dim, padding_idx=padding_idx)
     self.embedding.weight.data.normal_(mean=0, std=0.3)
     self.encoder = Encoder(in_features=embedding_dim)
     self.decoder = Decoder(in_features=256, memory_dim=mel_dim, r=r)
     self.postnet = CBHG(sample_size=mel_dim, conv_bank_max_filter_size=8,
                         conv_projections_channel_size=[256, mel_dim], num_highways=4)
     self.last_linear = Linear(in_features=(mel_dim * 2), out_features=linear_dim)
示例#3
0
 def __init__(self):
     super(Merlin, self).__init__(
         z_network=Z_network(),
         predictor=Predictor(),  #TODO: hは2層?
         decoder=Decoder(),
         policy=Policy(),
         memory=Memory(),
     )
     # module間の処理はmerlinがやる.memory以外のmodule間で共有する変数はmerlinが持つ.
     self.optimizer = optimizers.Adam()
     self.optimizer.setup(self)
示例#4
0
文件: merlin.py 项目: yosider/merlin
 def __init__(self):
     super(Merlin, self).__init__(
         z_network=Z_network(),
         predictor=Predictor(),  #TODO: h outputs 2 layers [h1, h2]
         decoder=Decoder(),
         policy=Policy(),
         memory=Memory(),
     )
     self.optimizer = optimizers.Adam()
     self.optimizer.setup(self)
     self.mbp_loss_log = []
     self.policy_loss_log = []
示例#5
0
mask_transformation = transforms.Compose([transforms.Resize(resize_dim), transforms.ToTensor()])

ytvos = YouTubeVOSLoader(root = root_data_dir, mode = mode, fraction = fraction, image_transformation = image_transformation, mask_transformation = mask_transformation, num_frames = NUM_FRAMES)
ytvos = DataLoader(ytvos, batch_size = batch_size, shuffle = shuffle_data, num_workers = num_workers)
num_batches = len(ytvos)


###### MODEL OBJECTS ######

encoded_h = int(resize_dim[0] / 32)
encoded_w = int(resize_dim[1] / 32)

initializer = Initializer().to(device)
encoder = Encoder().to(device)
convlstmcell = ConvLSTMCell(height = encoded_h, width = encoded_w).to(device)
decoder = Decoder(input_res = (encoded_h, encoded_w), output_res = resize_dim).to(device)

cost_fn = nn.BCELoss()
optimizer = torch.optim.Adam(list(initializer.parameters()) + list(encoder.parameters()) + list(convlstmcell.parameters()) + list(decoder.parameters()), lr = lr)


###########################

iter_count = -1
try:
	os.makedirs(save_models_dir, exist_ok = True)
except:
	pass

import matplotlib
matplotlib.use('pdf')
示例#6
0
num_batches = len(ytvos)

###### MODEL OBJECTS ######

encoded_h = int(resize_dim[0] / 32)
encoded_w = int(resize_dim[1] / 32)

initializer = Initializer().to(device)
encoder = Encoder().to(device)
convlstmcell_encoder = ConvLSTMCell(height=encoded_h,
                                    width=encoded_w).to(device)
convlstmcell_decoder = ConvLSTMCell(channels=512 * 2,
                                    height=encoded_h,
                                    width=encoded_w).to(device)
decoder = Decoder(input_channels=512 * 2,
                  input_res=(encoded_h, encoded_w),
                  output_res=resize_dim).to(device)

cost_fn = nn.BCELoss()
optimizer = torch.optim.Adam(
    list(initializer.parameters()) + list(encoder.parameters()) +
    list(convlstmcell_encoder.parameters()) +
    list(convlstmcell_decoder.parameters()) + list(decoder.parameters()),
    lr=lr)

###########################

iter_count = -1
try:
    os.makedirs(save_models_dir, exist_ok=True)
except:
示例#7
0
ytvos = ValidationYouTubeVOSLoader(root = root_data_dir, mode = mode, fraction = fraction, image_transformation = image_transformation, mask_transformation = mask_transformation)
data_loader = DataLoader(ytvos, batch_size = batch_size, shuffle = shuffle_data, num_workers = num_workers)

num_val_images = len(data_loader)


###### MODEL OBJECTS ######

encoded_h = int(resize_dim[0] / 32)
encoded_w = int(resize_dim[1] / 32)

initializer = Initializer().to(device)
encoder = Encoder().to(device)
convlstmcell = ConvLSTMCell(height = encoded_h, width = encoded_w).to(device)
decoder = Decoder(input_res = (encoded_h, encoded_w), output_res = resize_dim).to(device)

checkpoint = torch.load(saved_model_path)
initializer.load_state_dict(checkpoint['initializer'])
encoder.load_state_dict(checkpoint['encoder'])
convlstmcell.load_state_dict(checkpoint['convlstmcell'])
decoder.load_state_dict(checkpoint['decoder'])

initializer.eval()
encoder.eval()
convlstmcell.eval()
decoder.eval()

###########################

示例#8
0
def main():
    args = Arg()
    global device
    device = torch.device(
        "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu")

    nas_dir = make_dir(
        '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/results',
        format(args.dataset))
    nas_model_dir = '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/models'
    if args.network == 'unet':
        nas_dir = make_dir(nas_dir, 'unet_{}'.format(args.data_root))
        nas_dir = make_dir(
            nas_dir, 'unet_encoderfix:{}'.format(args.encoder_parameter_fix))
    else:
        nas_dir = make_dir(nas_dir, '{}_{}'.format(args.network,
                                                   args.data_root))
        nas_dir = make_dir(
            nas_dir,
            '{}_supix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_head:{}'
            .format(args.network, args.superpix_number, args.n_hid,
                    args.n_layer, args.encoder_parameter_fix, args.concat,
                    args.use_gnn_encoder, args.use_gnn_parameter,
                    args.attention_head))
    if args.multi_try > 1:
        nas_dir = make_dir(nas_dir, 'multi_try')

    image_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val':
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    label_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ]),
        'val':
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ]),
    }
    n_class = 1
    if args.dataset == 'fss':
        if args.data_root == 'local':
            FSS_dir = '../datasets/few_shot_seg_1000/fewshot_data'
            # dataset = {x: FSS_Dataset(FSS_dir, mode=x, image_transform=image_transforms[x], label_transform=label_transforms[x]) for x in ['train', 'test']}
            dataset = {
                x: FSS_Dataset(FSS_dir,
                               n_superpix=args.superpix_number,
                               mode=x)
                for x in ['train', 'test']
            }
    elif args.dataset == 'catdog':
        if args.data_root == 'local':
            # catdog_dir = '../datasets/catdog'
            catdog_dir = '../datasets/split_catdog'
            # catdog_dir = '../datasets/simplesplit_catdog'
            dataset = {
                x: catdog_Dataset(catdog_dir,
                                  n_superpix=args.superpix_number,
                                  mode=x,
                                  image_transform=image_transforms[x],
                                  label_transform=label_transforms[x])
                for x in ['train', 'val']
            }
        elif args.data_root == 'nas':
            catdog_dir = '/media/NAS/nas_187/soopil/data/catdog_superpix'
    elif args.dataset == 'simplecatdog':
        catdog_dir = '../datasets/simplesplit_catdog'
        dataset = {
            x: catdog_Dataset(catdog_dir,
                              n_superpix=args.superpix_number,
                              mode=x,
                              image_transform=image_transforms[x],
                              label_transform=label_transforms[x])
            for x in ['train', 'val']
        }
    elif args.dataset == 'city':
        city_dir = '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/datasets/cityscapes/for_graph_resize'
        dataset = {
            x: city_Dataset(city_dir,
                            n_superpix=args.superpix_number,
                            mode=x,
                            image_transform=image_transforms[x],
                            label_transform=label_transforms[x])
            for x in ['train', 'val']
        }
        n_class = 19
    dataloader = {
        x: torch.utils.data.DataLoader(dataset[x],
                                       batch_size=1,
                                       shuffle=True,
                                       num_workers=4)
        for x in ['train', 'val']
    }

    pretrained_path = './pretrained_model/vgg16-397923af.pth'

    # save_root = os.path.join('models', args.data_root, args.network, args.dataset)
    save_root = make_dir(nas_model_dir, args.data_root)
    save_root = make_dir(save_root, args.network)
    save_root = make_dir(save_root, args.dataset)
    if args.multi_try > 1:
        save_root = make_dir(save_root, 'multi_try')

    for i in range(args.multi_try):
        if args.network == 'unet':
            save_path = make_dir(
                save_root,
                'encoderfix:{}_iter:{}'.format(args.encoder_parameter_fix, i))
            encoder = Encoder(pretrained_path,
                              device,
                              args.network,
                              parameter_fix=args.encoder_parameter_fix)
            decoder = Decoder(output_channel=n_class).to(device)

            optimizer = optim.Adam(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=0.01,
                                   weight_decay=5e-4)
            criterion = nn.BCELoss()

            encoder, decoder = train_unet(encoder,
                                          decoder,
                                          dataloader,
                                          optimizer,
                                          criterion,
                                          nas_dir,
                                          device,
                                          i,
                                          epochs=args.epochs)

            torch.save(
                encoder.state_dict(),
                os.path.join(
                    save_path, 'encoder_encoderfix:{}_{}.pth'.format(
                        args.encoder_parameter_fix, i)))
            torch.save(
                decoder.state_dict(),
                os.path.join(
                    save_path,
                    'decoder_{}_{}.pth'.format(args.encoder_parameter_fix, i)))
        else:
            save_path = make_dir(
                save_root,
                'superpix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_head:{}_iter:{}'
                .format(args.superpix_number, args.n_hid, args.n_layer,
                        args.encoder_parameter_fix, args.concat,
                        args.use_gnn_encoder, args.use_gnn_parameter,
                        args.attention_head, i))
            encoder_path = os.path.join(
                save_path, 'encoder.pth'.format(args.superpix_number,
                                                args.encoder_parameter_fix,
                                                args.concat, i))
            gnn_path = os.path.join(save_path, '{}.pth'.format(args.network))

            encoder = Encoder(pretrained_path,
                              device,
                              args.network,
                              parameter_fix=args.encoder_parameter_fix)
            if args.use_gnn_encoder:
                gnn_encoder_path = os.path.join(
                    save_root.replace(args.network, 'gnn'),
                    'superpix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_iter:{}'
                    .format(args.superpix_number, args.n_hid, args.n_layer,
                            args.encoder_parameter_fix, args.concat, False,
                            False, i), 'encoder.pth')
                encoder.load_state_dict(torch.load(gnn_encoder_path))

            if args.network == 'gnn':
                gnn = GNN(nfeat=512,
                          nhid=args.n_hid,
                          nclass=n_class,
                          dropout=0.5,
                          n_layer=args.n_layer,
                          concat=args.concat).to(device)
            elif args.network == 'gcn':
                gnn = GCN(nfeat=512,
                          nhid=args.n_hid,
                          nclass=n_class,
                          dropout=0.5,
                          n_layer=args.n_layer,
                          concat=args.concat).to(device)
            elif 'gat' in args.network:
                gnn = GAT(nfeat=512,
                          nhid=args.n_hid,
                          nclass=n_class,
                          dropout=0.5,
                          nheads=args.attention_head,
                          alpha=0.2,
                          n_layer=args.n_layer,
                          concat=args.concat,
                          gatType=args.network).to(device)

            optimizer = optim.Adam(list(encoder.parameters()) +
                                   list(gnn.parameters()),
                                   lr=0.01,
                                   weight_decay=5e-4)

            if args.dataset == 'city':
                criterion = nn.CrossEntropyLoss()
            else:
                criterion = nn.BCELoss()

            if 'gat' in args.network:
                encoder, gnn = train_gat(encoder,
                                         gnn,
                                         dataloader,
                                         optimizer,
                                         criterion,
                                         nas_dir,
                                         device,
                                         i,
                                         epochs=args.epochs,
                                         concat=args.concat,
                                         network=args.network)
            else:
                encoder, gnn = train_gnn(encoder,
                                         gnn,
                                         dataloader,
                                         optimizer,
                                         criterion,
                                         nas_dir,
                                         device,
                                         i,
                                         epochs=args.epochs,
                                         concat=args.concat)

            torch.save(encoder.state_dict(), encoder_path)
            torch.save(gnn.state_dict(), gnn_path)
示例#9
0
def main():
    args = Arg()
    global device
    device = torch.device(
        "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu")

    nas_dir = '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/results/{}'.format(
        args.dataset)
    nas_model_dir = '/media/NAS/nas_187/datasets/junghwan/experience/superpixel/models'
    if args.network == 'unet':
        nas_dir = make_dir(nas_dir, 'unet_{}'.format(args.data_root))
        nas_dir = make_dir(
            nas_dir, 'unet_encoderfix:{}'.format(args.encoder_parameter_fix))
    else:
        nas_dir = make_dir(nas_dir, '{}_{}'.format(args.network,
                                                   args.data_root))
        nas_dir = make_dir(
            nas_dir,
            '{}_supix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_head:{}_gft:{}_epochs:{}'
            .format(args.network, args.superpix_number, args.n_hid,
                    args.n_layer, args.encoder_parameter_fix, args.concat,
                    args.use_gnn_encoder, args.use_gnn_parameter,
                    args.attention_head, args.graph_feature, args.epochs))
    if args.multi_try > 1:
        nas_dir = make_dir(nas_dir, 'multi_try')
    print('result dir')
    print(nas_dir)

    image_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'test':
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    label_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ]),
        'test':
        transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ]),
    }

    if args.dataset == 'fss':
        FSS_dir = '../datasets/few_shot_seg_1000/fewshot_data'
        dataset = FSS_Dataset(FSS_dir,
                              n_superpix=args.superpix_number,
                              mode='test',
                              image_transform=image_transforms['test'],
                              label_transform=label_transforms['test'])
    elif args.dataset == 'catdog':
        # catdog_dir = '../datasets/catdog'
        catdog_dir = '../datasets/split_catdog'
        dataset = catdog_Dataset(catdog_dir,
                                 net=args.network,
                                 n_superpix=args.superpix_number,
                                 mode='test',
                                 image_transform=image_transforms['test'],
                                 label_transform=label_transforms['test'])
    elif args.dataset == 'simplecatdog':
        # catdog_dir = '../datasets/catdog'
        catdog_dir = '../datasets/simplesplit_catdog'
        dataset = catdog_Dataset(catdog_dir,
                                 net=args.network,
                                 n_superpix=args.superpix_number,
                                 mode='test',
                                 image_transform=image_transforms['test'],
                                 label_transform=label_transforms['test'])


# catdog_Dataset(catdog_dir, n_superpix=args.superpix_number, mode=x, image_transform=image_transforms[x], label_transform=label_transforms[x])
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False)

    save_root = os.path.join(nas_model_dir, args.data_root,
                             args.network + '_epoch:{}'.format(args.epochs),
                             args.dataset)
    if args.multi_try > 1:
        save_root = make_dir(save_root, 'multi_try')
        result_txt = os.path.join(nas_dir, 'total_result.txt')
        f = open(result_txt, 'w')

    pretrained_path = './pretrained_model/vgg16-397923af.pth'

    whole_dsc = 0.0
    whole_loss = 0.0
    for i in range(args.multi_try):
        if args.network == 'unet':
            save_path = make_dir(
                save_root,
                'encoderfix:{}_iter:{}'.format(args.encoder_parameter_fix, i))
            encoder_path = os.path.join(
                save_path, 'encoder_encoderfix:{}_{}.pth'.format(
                    args.encoder_parameter_fix, i))
            decoder_path = os.path.join(
                save_path,
                'decoder_{}_{}.pth'.format(args.encoder_parameter_fix, i))
            encoder = Encoder(pretrained_path, device, args.network)
            decoder = Decoder().to(device)

            encoder.load_state_dict(torch.load(encoder_path))
            decoder.load_state_dict(torch.load(decoder_path))

            criterion = nn.BCELoss()

            total_dsc, total_loss = test_unet(encoder, decoder, dataloader,
                                              criterion, nas_dir, device, i)

        else:
            save_path = make_dir(
                save_root,
                'superpix:{}_nhid:{}_nlayer:{}_encoderfix:{}_concat:{}_gnnencoder:{}_gnnparameter:{}_head:{}_gft:{}_iter:{}'
                .format(args.superpix_number, args.n_hid, args.n_layer,
                        args.encoder_parameter_fix, args.concat,
                        args.use_gnn_encoder, args.use_gnn_parameter,
                        args.attention_head, args.graph_feature, i))
            print('model path')
            print(save_path)
            encoder_path = os.path.join(save_path, 'encoder.pth')
            encoder = Encoder(pretrained_path, device, args.network)

            gnn_path = os.path.join(save_path, '{}.pth'.format(args.network))
            if args.network == 'gnn':
                gnn = GNN(nfeat=512,
                          nhid=args.n_hid,
                          nclass=1,
                          dropout=0.5,
                          n_layer=args.n_layer,
                          concat=args.concat).to(device)
            elif args.network == 'gcn':
                gnn = GCN(nfeat=512,
                          nhid=args.n_hid,
                          nclass=1,
                          dropout=0.5,
                          n_layer=args.n_layer,
                          concat=args.concat).to(device)
            elif 'gat' in args.network:
                gnn = GAT(nfeat=512,
                          nhid=args.n_hid,
                          nclass=1,
                          dropout=0.5,
                          nheads=args.attention_head,
                          alpha=0.2,
                          n_layer=args.n_layer,
                          concat=args.concat,
                          gatType=args.network).to(device)
            elif 'gunet' in args.network:
                gnn = GraphUnet(nfeat=512,
                                nhid=args.n_hid,
                                nclass=1,
                                dropout=0.5,
                                alpha=0.2,
                                n_layer=args.n_layer,
                                concat=args.concat).to(device)

            encoder.load_state_dict(
                torch.load(encoder_path, map_location='cuda:0'))
            gnn.load_state_dict(torch.load(gnn_path, map_location='cuda:0'))
            criterion = nn.BCELoss()

            if 'gunet' in args.network:
                # print('gunet start!')
                total_dsc, total_loss = test_gunet(encoder,
                                                   gnn,
                                                   dataloader,
                                                   criterion,
                                                   nas_dir,
                                                   device,
                                                   i,
                                                   concat=args.concat,
                                                   network=args.network)
            elif 'gat' in args.network:
                total_dsc, total_loss = test_gat(
                    encoder,
                    gnn,
                    dataloader,
                    criterion,
                    nas_dir,
                    device,
                    i,
                    concat=args.concat,
                    network=args.network,
                    graph_feature=args.graph_feature)
            else:
                total_dsc, total_loss = test_gnn(encoder,
                                                 gnn,
                                                 dataloader,
                                                 criterion,
                                                 nas_dir,
                                                 device,
                                                 i,
                                                 concat=args.concat)

        if args.multi_try > 1:
            whole_dsc += total_dsc
            whole_loss += total_loss
            f.write('[{}] dsc: {:.4f}'.format(i, whole_dsc / args.multi_try))

    if args.multi_try > 1:
        f.write('all average dsc: {:.4f}'.format(whole_dsc / args.multi_try))
示例#10
0
文件: test.py 项目: yosider/merlin
 def test_forward(self):
     net = Decoder()
     z = make_sample_input(1, Z_DIM)
     log_pi = make_sample_input(1, A_DIM)
     a = make_sample_input(1, A_DIM)
     net(z, log_pi, a)