Exemplo n.º 1
0
 def __init__(self, root_dir, model_path, save_dir=None):
     self.root_dir = root_dir
     self.transform = transforms.Compose([Normalize(), ToTensor()])
     self.model_path = model_path
     self.model = model.GoNet()
     if use_gpu:
         self.model = self.model.cuda()
     self.model.load_state_dict(torch.load(model_path))
     frames = os.listdir(root_dir + '/img')
     frames = [root_dir + "/img/" + frame for frame in frames]
     self.len = len(frames) - 1
     frames = np.array(frames)
     frames.sort()
     self.x = []
     for i in xrange(self.len):
         self.x.append([frames[i], frames[i + 1]])
     self.x = np.array(self.x)
     #         uncomment to select rectangle manually
     #         init_bbox = bbox_coordinates(self.x[0][0])
     f = open(root_dir + '/groundtruth_rect.txt')
     lines = f.readlines()
     init_bbox = lines[0].strip().split('\t')
     init_bbox = [float(x) for x in init_bbox]
     init_bbox = [
         init_bbox[0], init_bbox[1], init_bbox[0] + init_bbox[2],
         init_bbox[1] + init_bbox[3]
     ]
     init_bbox = np.array(init_bbox)
     print init_bbox
     self.prev_rect = init_bbox
Exemplo n.º 2
0
def main():

    args = parser.parse_args()
    print args
    # load dataset
    transform = transforms.Compose([Normalize(), ToTensor()])
    alov = datasets.ALOVDataset('../data/alov300/imagedata++/',
                                '../data/alov300/alov300++_rectangleAnnotation_full/',
                                transform)
    dataloader = DataLoader(alov, batch_size=args.batch_size, shuffle=True, num_workers=4)

    # load model
    net = model.GoNet()
    loss_fn = torch.nn.L1Loss(size_average = False)
    if use_gpu:
        net = net.cuda()
        loss_fn = loss_fn.cuda()
    optimizer = optim.SGD(net.classifier.parameters(), lr=args.learning_rate, momentum=args.momentum)
    if os.path.exists(args.save_directory):
        print('Directory %s already exists' % (args.save_directory))
    else:
        os.makedirs(args.save_directory)

    # start training
    net = train_model(net, dataloader, loss_fn, optimizer, args.epochs, args.learning_rate, args.save_directory)
Exemplo n.º 3
0
 def __init__(self, root_dir, model_path, save_dir=None):
     self.root_dir = root_dir
     self.transform = transforms.Compose([Normalize(), ToTensor()])
     self.model_path = model_path
     self.model = model.GoNet()
     if use_gpu:
         self.model = self.model.cuda()
     self.model.load_state_dict(torch.load(model_path))
     frames = os.listdir(root_dir)
     self.len = len(frames) - 1
     frames = [root_dir + "/" + frame for frame in frames]
     frames = np.array(frames)
     frames.sort()
     self.x = []
     for i in xrange(self.len):
         self.x.append([frames[i], frames[i + 1]])
     self.x = np.array(self.x)
     # code for previous rectange
     init_bbox = bbox_coordinates(self.x[0][0])
     print init_bbox
     self.prev_rect = init_bbox
Exemplo n.º 4
0
    def __init__(self,
                 *,
                 ch,
                 out_ch,
                 ch_mult=(1, 2, 4, 8),
                 num_res_blocks,
                 attn_resolutions,
                 dropout=0.0,
                 resamp_with_conv=True,
                 in_channels,
                 resolution,
                 z_channels,
                 give_pre_end=False,
                 **ignorekwargs):
        super().__init__()
        self.ch = ch
        self.temb_ch = 0
        self.num_resolutions = len(ch_mult)
        self.num_res_blocks = num_res_blocks
        self.resolution = resolution
        self.in_channels = in_channels
        self.give_pre_end = give_pre_end

        # compute in_ch_mult, block_in and curr_res at lowest res
        in_ch_mult = (1, ) + tuple(ch_mult)
        block_in = ch * ch_mult[self.num_resolutions - 1]
        curr_res = resolution // 2**(self.num_resolutions - 1)
        self.z_shape = (1, z_channels, curr_res, curr_res)
        print("Working with z of shape {} = {} dimensions.".format(
            self.z_shape, np.prod(self.z_shape)))

        # z to block_in
        self.conv_in = torch.nn.Conv2d(z_channels,
                                       block_in,
                                       kernel_size=3,
                                       stride=1,
                                       padding=1)

        # middle
        self.mid = nn.Module()
        self.mid.block_1 = ResnetBlock(in_channels=block_in,
                                       out_channels=block_in,
                                       temb_channels=self.temb_ch,
                                       dropout=dropout)
        self.mid.attn_1 = AttnBlock(block_in)
        self.mid.block_2 = ResnetBlock(in_channels=block_in,
                                       out_channels=block_in,
                                       temb_channels=self.temb_ch,
                                       dropout=dropout)

        # upsampling
        self.up = nn.ModuleList()
        for i_level in reversed(range(self.num_resolutions)):
            block = nn.ModuleList()
            attn = nn.ModuleList()
            block_out = ch * ch_mult[i_level]
            for i_block in range(self.num_res_blocks + 1):
                block.append(
                    ResnetBlock(in_channels=block_in,
                                out_channels=block_out,
                                temb_channels=self.temb_ch,
                                dropout=dropout))
                block_in = block_out
                if curr_res in attn_resolutions:
                    attn.append(AttnBlock(block_in))
            up = nn.Module()
            up.block = block
            up.attn = attn
            if i_level != 0:
                up.upsample = Upsample(block_in, resamp_with_conv)
                curr_res = curr_res * 2
            self.up.insert(0, up)  # prepend to get consistent order

        # end
        self.norm_out = Normalize(block_in)
        self.conv_out = torch.nn.Conv2d(block_in,
                                        out_ch,
                                        kernel_size=3,
                                        stride=1,
                                        padding=1)
Exemplo n.º 5
0
from torchvision import transforms
from helper import ToTensor, Normalize, show_batch
from torch.utils.data import DataLoader
import torch.optim as optim
import numpy as np
from helper import *
from multiprocessing.dummy import Pool as ThreadPool
from tensorboardX import SummaryWriter

# constants
use_gpu = torch.cuda.is_available()
kSaveModel = 20000  # save model after every 20000 steps
batchSize = 50  # number of samples in a batch
kGeneratedExamplesPerImage = 10
# generate 10 synthetic samples per image in a dataset
transform = transforms.Compose([Normalize(), ToTensor()])
writer = SummaryWriter()

args = None
parser = argparse.ArgumentParser(description='GOTURN Training')
parser.add_argument('-n',
                    '--num-batches',
                    default=500000,
                    type=int,
                    help='number of total batches to run')
parser.add_argument('-lr',
                    '--learning-rate',
                    default=1e-6,
                    type=float,
                    help='initial learning rate')
parser.add_argument('--gamma',
Exemplo n.º 6
0
    def __init__(self,
                 *,
                 ch,
                 out_ch,
                 ch_mult=(1, 2, 4, 8),
                 num_res_blocks,
                 attn_resolutions,
                 dropout=0.0,
                 resamp_with_conv=True,
                 in_channels,
                 resolution,
                 z_channels,
                 double_z=True,
                 convd="torch.nn.Conv1d",
                 **ignore_kwargs):
        super().__init__()
        self.ch = ch
        self.temb_ch = 0
        self.num_resolutions = len(ch_mult)
        self.num_res_blocks = num_res_blocks
        self.resolution = resolution
        self.in_channels = in_channels
        exec(f"self.convd={convd}")

        # downsampling
        self.conv_in = self.convd(in_channels,
                                  self.ch,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)

        curr_res = resolution
        in_ch_mult = (1, ) + tuple(ch_mult)
        self.down = nn.ModuleList()
        for i_level in range(self.num_resolutions):
            block = nn.ModuleList()
            attn = nn.ModuleList()
            block_in = ch * in_ch_mult[i_level]
            block_out = ch * ch_mult[i_level]
            for i_block in range(self.num_res_blocks):
                block.append(
                    ResnetBlock(in_channels=block_in,
                                out_channels=block_out,
                                temb_channels=self.temb_ch,
                                dropout=dropout,
                                convd=self.convd))
                block_in = block_out
                if curr_res in attn_resolutions:
                    attn.append(AttnBlock(block_in, convd=self.convd))
            down = nn.Module()
            down.block = block
            down.attn = attn
            if i_level != self.num_resolutions - 1:
                down.downsample = Downsample(block_in,
                                             resamp_with_conv,
                                             convd=self.convd)
                curr_res = curr_res // 2
            self.down.append(down)

        # middle
        self.mid = nn.Module()
        self.mid.block_1 = ResnetBlock(in_channels=block_in,
                                       out_channels=block_in,
                                       temb_channels=self.temb_ch,
                                       dropout=dropout,
                                       convd=self.convd)
        self.mid.attn_1 = AttnBlock(block_in, convd=self.convd)
        self.mid.block_2 = ResnetBlock(in_channels=block_in,
                                       out_channels=block_in,
                                       temb_channels=self.temb_ch,
                                       dropout=dropout,
                                       convd=self.convd)

        # end
        self.norm_out = Normalize(block_in)
        self.conv_out = self.convd(block_in,
                                   2 * z_channels if double_z else z_channels,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)