def get_model(learning_rate=1e-3, num_classes=200):

    model = SqueezeNet(num_classes=num_classes)

    # set the first layer not trainable
    model.features[0].weight.requires_grad = False

    # all conv layers except the first and the last
    all_conv_weights = [
        (n, p) for n, p in model.named_parameters()
        if 'weight' in n and not 'bn' in n and not 'features.1.' in n
    ]
    weights_to_be_quantized = [
        p for n, p in all_conv_weights
        if not ('classifier' in n or 'features.0.' in n)
    ]

    # the last layer
    weights = [model.classifier[1].weight]
    biases = [model.classifier[1].bias]

    # parameters of batch_norm layers
    bn_weights = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'weight' in n
    ]
    bn_biases = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'bias' in n
    ]

    params = [{
        'params': weights,
        'weight_decay': 1e-4
    }, {
        'params': weights_to_be_quantized
    }, {
        'params': biases
    }, {
        'params': bn_weights
    }, {
        'params': bn_biases
    }]
    optimizer = optim.Adam(params, lr=learning_rate)

    # loss function
    criterion = nn.CrossEntropyLoss().cuda()
    # move the model to gpu
    model = model.cuda()
    return model, criterion, optimizer
Exemplo n.º 2
0
def get_model():

    model = SqueezeNet()

    # create different parameter groups
    weights = [
        p for n, p in model.named_parameters()
        if 'weight' in n and not 'bn' in n and not 'features.1.' in n
    ]
    biases = [model.classifier[1].bias]
    bn_weights = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'weight' in n
    ]
    bn_biases = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'bias' in n
    ]

    # initialize batch norm params
    for p in bn_weights:
        constant(p, 1.0)
    for p in bn_biases:
        constant(p, 0.0)

    params = [{
        'params': weights,
        'weight_decay': 1e-3
    }, {
        'params': biases
    }, {
        'params': bn_weights
    }, {
        'params': bn_biases
    }]
    optimizer = optim.SGD(params, lr=4e-2, momentum=0.95, nesterov=True)

    # loss function
    criterion = nn.CrossEntropyLoss().cuda()
    # move the model to gpu
    model = model.cuda()
    return model, criterion, optimizer
def get_model():

    model = SqueezeNet()

    # create different parameter groups
    weights = [
        p for n, p in model.named_parameters()
        if 'weight' in n and 'bn' not in n and 'features.1.' not in n
    ]
    biases = [model.classifier[1].bias]
    bn_weights = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'weight' in n
    ]
    bn_biases = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'bias' in n
    ]

    for p in bn_weights:
        constant(p, 1.0)
    for p in bn_biases:
        constant(p, 0.0)

    params = [{
        'params': weights,
        'weight_decay': 3e-4
    }, {
        'params': biases
    }, {
        'params': bn_weights
    }, {
        'params': bn_biases
    }]
    optimizer = optim.SGD(params, lr=4e-2, momentum=0.95, nesterov=True)

    loss = nn.CrossEntropyLoss().cuda()
    model = model.cuda()  # move the model to gpu
    return model, loss, optimizer
Exemplo n.º 4
0
                    new[k][0] = valid['.'.join(parts[:-1] +
                                               ['weight'])].max() / 2
            elif parts[-1] == 'W_n':
                if not k in new:
                    new[k] = torch.Tensor(1)
                    new[k][0] = -valid['.'.join(parts[:-1] +
                                                ['weight'])].min() / 2
    return new


#dct = torch.load('resnet.pth')
#dct = fix_state(model.state_dict(), dct)
#model.load_state_dict(dct)

if use_cuda:
    model.cuda()

criterion = F.cross_entropy
optimizer = optim.Adam(model.parameters(),
                       lr=args.lr,
                       weight_decay=args.weight_decay)


def train(epoch):
    model.train()
    running_loss = 0
    running_total = 0
    correct = 0
    for i, (inputs, labels) in enumerate(trainloader):
        if use_cuda:
            inputs, labels = inputs.cuda(), labels.cuda()