def __init__(self, nstack, inp_dim, oup_dim, bn=False, increase=0, **kwargs): super(PoseNet, self).__init__() self.nstack = nstack self.pre = nn.Sequential(Conv(3, 64, 7, 2, bn=True, relu=True), Residual(64, 128), Pool(2, 2), Residual(128, 128), Residual(128, inp_dim)) self.hgs = nn.ModuleList([ nn.Sequential(Hourglass(4, inp_dim, bn, increase), ) for i in range(nstack) ]) self.features = nn.ModuleList([ nn.Sequential(Residual(inp_dim, inp_dim), Conv(inp_dim, inp_dim, 1, bn=True, relu=True)) for i in range(nstack) ]) self.outs = nn.ModuleList([ Conv(inp_dim, oup_dim, 1, relu=False, bn=False) for i in range(nstack) ]) self.merge_features = nn.ModuleList( [Merge(inp_dim, inp_dim) for i in range(nstack - 1)]) self.merge_preds = nn.ModuleList( [Merge(oup_dim, inp_dim) for i in range(nstack - 1)]) self.nstack = nstack self.heatmapLoss = HeatmapLoss()
def __init__(self, nstack, nfeatures, nlandmarks, bn=False, increase=0, **kwargs): super(EyeNet, self).__init__() self.img_w = 160 self.img_h = 96 self.nstack = nstack self.nfeatures = nfeatures self.nlandmarks = nlandmarks self.heatmap_w = self.img_w / 2 self.heatmap_h = self.img_h / 2 self.nstack = nstack self.pre = nn.Sequential(Conv(1, 64, 7, 1, bn=True, relu=True), Residual(64, 128), Pool(2, 2), Residual(128, 128), Residual(128, nfeatures)) self.pre2 = nn.Sequential( Conv(nfeatures, 64, 7, 2, bn=True, relu=True), Residual(64, 128), Pool(2, 2), Residual(128, 128), Residual(128, nfeatures)) self.hgs = nn.ModuleList([ nn.Sequential(Hourglass(4, nfeatures, bn, increase), ) for i in range(nstack) ]) self.features = nn.ModuleList([ nn.Sequential(Residual(nfeatures, nfeatures), Conv(nfeatures, nfeatures, 1, bn=True, relu=True)) for i in range(nstack) ]) self.outs = nn.ModuleList([ Conv(nfeatures, nlandmarks, 1, relu=False, bn=False) for i in range(nstack) ]) self.merge_features = nn.ModuleList( [Merge(nfeatures, nfeatures) for i in range(nstack - 1)]) self.merge_preds = nn.ModuleList( [Merge(nlandmarks, nfeatures) for i in range(nstack - 1)]) self.gaze_fc1 = nn.Linear( in_features=int(nfeatures * self.img_w * self.img_h / 64 + nlandmarks * 2), out_features=256) self.gaze_fc2 = nn.Linear(in_features=256, out_features=2) self.nstack = nstack self.heatmapLoss = HeatmapLoss() self.landmarks_loss = nn.MSELoss() self.gaze_loss = nn.MSELoss()
def __init__(self, inp_dim, increase=128, bn=False): super(Features, self).__init__() # regress 4 heat maps per stack self.before_regress = nn.ModuleList([ nn.Sequential( Conv(inp_dim + i * increase, inp_dim + i * increase, 3, bn=bn), Conv(inp_dim + i * increase, inp_dim + i * increase, 3, bn=bn)) for i in range(5) ])
def main(): print('test') train = SurfaceNormalsDataset('../data/train', test=False) trainloader = torch.utils.data.DataLoader(train, batch_size=25, shuffle=True) model = torch.nn.Sequential( Conv(3, 10), Hourglass(4, 10, bn=None, increase=20), Conv(10, 10), Conv(10, 3), ) criterion = torch.nn.MSELoss(size_average=False) optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) for epoch in range(3): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs inputs = data['image'] labels = data['normal'] # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = model(inputs) #print(outputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.data[0] if i % 10 == 9: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / (10 * 25 * 128 * 128 * 3))) running_loss = 0.0 if i == 100: pass #break torch.save(model, '../models/hg_test') print('Finished Training')
def __init__(self): super(TestNet, self).__init__() self.pre = Hourglass( 3, 1, increase=10) #downsample the feature map for 5 times self.location = Hourglass(5, 1, increase=15) self.xVector = Hourglass( 5, 3, increase=10 ) #predict the offset from corners to center along x axis self.yVector = Hourglass( 5, 3, increase=10 ) # predict the offset from corners to center along y axis self.conv = Conv(3, 1, kernel_size=1, stride=1) self.conv2 = Conv(3, 1, kernel_size=1, stride=1, relu=False) self.conv3 = Conv(3, 1, kernel_size=1, stride=1, relu=False)
def __init__(self, nstack, inp_dim, oup_dim, bn=False, increase=128, init_weights=True, **kwargs): """ Pack or initialize the trainable parameters of the network :param nstack: number of stack :param inp_dim: input tensor channels fed into the hourglass block :param oup_dim: channels of regressed feature maps :param bn: :param increase: increased channels once down-sampling :param kwargs: """ super(PoseNet, self).__init__() self.pre = nn.Sequential(Conv(3, 64, 7, 2, bn=bn), Conv(64, 128, bn=bn), nn.MaxPool2d(2, 2), Conv(128, 128, bn=bn), Conv(128, inp_dim, bn=bn)) self.hourglass = nn.ModuleList( [Hourglass(4, inp_dim, increase, bn=bn) for _ in range(nstack)]) self.features = nn.ModuleList([ Features(inp_dim, increase=increase, bn=bn) for _ in range(nstack) ]) # predict 5 different scales of heatmpas per stack, keep in mind to pack the list using ModuleList. # Notice: nn.ModuleList can only identify Module subclass! Thus, we must pack the inner layers in ModuleList. self.outs = nn.ModuleList([ nn.ModuleList([ Conv(inp_dim + j * increase, oup_dim, 1, relu=False, bn=False) for j in range(5) ]) for i in range(nstack) ]) self.merge_features = nn.ModuleList([ nn.ModuleList([ Merge(inp_dim + j * increase, inp_dim + j * increase) for j in range(5) ]) for i in range(nstack - 1) ]) self.merge_preds = nn.ModuleList([ nn.ModuleList( [Merge(oup_dim, inp_dim + j * increase) for j in range(5)]) for i in range(nstack - 1) ]) self.nstack = nstack if init_weights: self._initialize_weights()
def __init__(self, nstack, inp_dim, oup_dim, bn=False, increase=128, **kwargs): super(EdgeNet, self).__init__() self.pre = nn.Sequential( #Conv(3, 64, 7, 2, bn=bn), Conv(3, 64, 7, 1, bn=bn), Conv(64, 128, bn=bn), #Pool(2, 2), Conv(128, 128, bn=bn), Conv(128, inp_dim, bn=bn)) self.features = nn.ModuleList([ nn.Sequential( Hourglass(5, inp_dim, bn, increase), # Orig 4 Conv(inp_dim, inp_dim, 3, bn=False), Conv(inp_dim, inp_dim, 3, bn=False)) for i in range(nstack) ]) self.outs = nn.ModuleList([ Conv(inp_dim, oup_dim, 1, relu=False, bn=False) for i in range(nstack) ]) self.merge_features = nn.ModuleList( [Merge(inp_dim, inp_dim) for i in range(nstack - 1)]) self.merge_preds = nn.ModuleList( [Merge(oup_dim, inp_dim) for i in range(nstack - 1)]) self.nstack = nstack
def __init__(self, nstack, inp_dim, oup_dim, bn=False, increase=128, **kwargs): super(PoseNet, self).__init__() self.pre = nn.Sequential( #图像降分辨率 预处理 Conv(3, 64, 7, 2, bn=bn), Conv(64, 128, bn=bn), Pool(2, 2), Conv(128, 128, bn=bn), Conv(128, inp_dim, bn=bn)) self.features = nn.ModuleList([ #沙漏模块堆叠 nn.Sequential( Hourglass(4, inp_dim, bn, increase), #4次下采样 Conv(inp_dim, inp_dim, 3, bn=False), Conv(inp_dim, inp_dim, 3, bn=False)) for i in range(nstack) ]) #堆叠次数 self.outs = nn.ModuleList([ Conv(inp_dim, oup_dim, 1, relu=False, bn=False) for i in range(nstack) ]) self.merge_features = nn.ModuleList( [Merge(inp_dim, inp_dim) for i in range(nstack - 1)]) #融合 self.merge_preds = nn.ModuleList( [Merge(oup_dim, inp_dim) for i in range(nstack - 1)]) self.nstack = nstack self.myAEloss = AEloss() self.heatmapLoss = HeatmapLoss()
def __init__(self): super(TestCornerNet, self).__init__() self.pre = nn.Sequential(Conv(3, 8, kernel_size=3, stride=1), nn.ReLU(), Conv(8, 16, kernel_size=3, stride=1), nn.ReLU(), Conv(16, 3, kernel_size=1, stride=1), nn.ReLU()) self.location = nn.Sequential( nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(16, 1, kernel_size=3, stride=1, padding=1), ) self.xvector = nn.Sequential( #2 channels for x and y nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1), nn.Tanh(), nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1), nn.Tanh(), nn.Conv2d(16, 1, kernel_size=3, stride=1, padding=1), ) self.yvector = nn.Sequential( #2 channels for x and y nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1), nn.Tanh(), nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1), nn.Tanh(), nn.Conv2d(16, 1, kernel_size=3, stride=1, padding=1), ) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()
def __init__(self, nstack, inp_dim, oup_dim, bn=False, increase=128, **kwargs): super(PoseNet, self).__init__() self.pre = nn.Sequential( Conv(3, 64, 7, 2, bn=bn), Conv(64, 128, bn=bn), Pool(2, 2), Conv(128, 128, bn=bn), Conv(128, inp_dim, bn=bn) ) self.features = nn.ModuleList( [ nn.Sequential( Hourglass(4, inp_dim, bn, increase), Conv(inp_dim, inp_dim, 3, bn=False), Conv(inp_dim, inp_dim, 3, bn=False) ) for i in range(nstack)] ) # hourglass 结构提取特征 self.outs = nn.ModuleList( [Conv(inp_dim, oup_dim, 1, relu=False, bn=False) for i in range(nstack)] ) # 预测 self.merge_features = nn.ModuleList( [Merge(inp_dim, inp_dim) for i in range(nstack-1)] ) # 用于融合特征和结构以便于多层次预测 self.merge_preds = nn.ModuleList( [Merge(oup_dim, inp_dim) for i in range(nstack-1)] ) self.nstack = nstack self.myAEloss = AEloss() self.heatmapLoss = HeatmapLoss()
def __init__(self, nstack, inp_dim, oup_dim, bn=False, increase=128, **kwargs): super(PoseNet, self).__init__() # 'nn.Sequential' is a Container that contains each Module to construct the layer sequence ofCNN self.pre = nn.Sequential( # Conv() parameter is 'inp_dim, out_dim, kernel_size, stride' in layers.py # uses module 'nn.Conv2d' Conv(3, 64, 7, 2, bn=bn), Conv(64, 128, bn=bn), Pool(2, 2), Conv(128, 128, bn=bn), Conv(128, inp_dim, bn=bn)) self.features = nn.ModuleList([ nn.Sequential(Hourglass(4, inp_dim, bn, increase), Conv(inp_dim, inp_dim, 3, bn=False), Conv(inp_dim, inp_dim, 3, bn=False)) for i in range(nstack) ]) # build for number of the nstack layers # the 'nn.ModuleList' is to store nn.Modules, similar to python list, to pass as input self.outs = nn.ModuleList([ Conv(inp_dim, oup_dim, 1, relu=False, bn=False) for i in range(nstack) ]) self.merge_features = nn.ModuleList( [Merge(inp_dim, inp_dim) for i in range(nstack - 1)]) self.merge_preds = nn.ModuleList( [Merge(oup_dim, inp_dim) for i in range(nstack - 1)]) self.nstack = nstack self.myAEloss = AEloss() self.heatmapLoss = HeatmapLoss()
def __init__(self, nstack, inp_dim, oup_dim, bn=True, increase=128, **kwargs): super(SNNet, self).__init__() self.pre = nn.Sequential( Conv(3, 64, bn=bn), Conv(64, 128, bn=bn), #Pool(2, 2), Conv(128, 128, bn=bn), Conv(128, inp_dim, bn=bn) ) self.features = nn.ModuleList( [ nn.Sequential( Hourglass(4, inp_dim, bn, increase), Conv(inp_dim, inp_dim, 3, bn=False), Conv(inp_dim, inp_dim, 3, bn=False) ) for i in range(nstack)] ) self.outs = nn.ModuleList( [Conv(inp_dim, oup_dim, 1, relu=False, bn=False) for i in range(nstack)] ) self.merge_features = nn.ModuleList( [Merge(inp_dim, inp_dim) for i in range(nstack-1)] ) self.merge_preds = nn.ModuleList( [Merge(oup_dim, inp_dim) for i in range(nstack-1)] ) self.nstack = nstack #self.myAEloss = AEloss() #self.heatmapLoss = HeatmapLoss() self.mae_loss = MAELoss()
def __init__(self, x_dim, y_dim): super(Merge, self).__init__() self.conv = Conv(x_dim, y_dim, 1, relu=False, bn=False)
def __init__(self, block, num_classes, num_blocks=[3, 4, 6, 3], conv_channels=[64, 128, 256, 512], stride_times=5, init_weights=True): super(ResNet_v2, self).__init__() # Normal input size (for ImageNet; e.g. 224, 256, 288) if stride_times == 5: self.conv1 = Conv(c1=3, c2=conv_channels[0], k=7, s=2, p=3, g=1, bias=False, bn=False, act=False) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # Small input size (e.g. 112, 128, 144) elif stride_times == 4: self.conv1 = Conv(c1=3, c2=conv_channels[0], k=7, s=1, p=3, g=1, bias=False, bn=False, act=False) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # Tiny input size (e.g. 56, 64, 72) elif stride_times == 3: self.conv1 = Conv(c1=3, c2=conv_channels[0], k=3, s=1, p=1, g=1, bias=False, bn=False, act=False) self.maxpool = nn.Sequential() # Extreme tiny input size (for Cifar; e.g. 28, 32, 36) elif stride_times == 2: self.conv1 = Conv(c1=3, c2=conv_channels[0], k=3, s=1, p=1, g=1, bias=False, bn=False, act=False) self.maxpool = nn.Sequential() # no stride in first conv and remove group4 self.group1 = ResGroup(block, conv_channels[0], conv_channels[0], num_blocks[0], 1) self.group2 = ResGroup(block, conv_channels[0] * block.expansion, conv_channels[1], num_blocks[1]) self.group3 = ResGroup(block, conv_channels[1] * block.expansion, conv_channels[2], num_blocks[2]) if stride_times == 2: # remove group4 for stride_times=2 self.group4 = nn.Sequential() else: self.group4 = ResGroup(block, conv_channels[2] * block.expansion, conv_channels[3], num_blocks[3]) self.relu = nn.ReLU(inplace=True) self.avgpool = nn.AdaptiveAvgPool2d(1) if stride_times == 2: # use conv_channels[2] because group4 was removed self.bn = nn.BatchNorm2d(conv_channels[2] * block.expansion) self.fc = nn.Linear(conv_channels[2] * block.expansion, num_classes) else: self.bn = nn.BatchNorm2d(conv_channels[3] * block.expansion) self.fc = nn.Linear(conv_channels[3] * block.expansion, num_classes) if init_weights: self._initialize_weights()