#find latest checkpoint
latest_epoch = 0
for name in list(data_list):
    if latest_epoch < int(getEpoch(name)):
        latest_epoch = int(getEpoch(name))
if latest_epoch != 0:
    network.load_state_dict(
        torch.load(
            os.path.join(
                default_path, 'training_logs/model_' + str(model_id) +
                '/checkpoints/model_' + str(model_id) + '_epoch_' +
                str(latest_epoch) + '.pth')))
    print("Recorver check point of epoch: " + str(latest_epoch))

val_dataset = DatasetVal(
    cityscapes_data_path=os.path.join(default_path, 'data/cityscapes'),
    cityscapes_meta_path=os.path.join(default_path, 'data/cityscapes/meta'))

num_val_batches = int(len(val_dataset) / batch_size)
print("num_val_batches:", num_val_batches)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=1)

with open(os.path.join(default_path, 'data/cityscapes/meta/class_weights.pkl'),
          "rb") as file:  # (needed for python3)
    class_weights = np.array(pickle.load(file))
class_weights = torch.from_numpy(class_weights)
class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()
示例#2
0
import time

# NOTE! NOTE! change this to not overwrite all log data when you train the model:
model_id = "1"

num_epochs = 1000
batch_size = 3
learning_rate = 0.0001

network = DeepLabV3(model_id, project_dir="/root/deeplabv3").cuda()

train_dataset = DatasetTrain(
    cityscapes_data_path="/root/deeplabv3/data/cityscapes",
    cityscapes_meta_path="/root/deeplabv3/data/cityscapes/meta")
val_dataset = DatasetVal(
    cityscapes_data_path="/root/deeplabv3/data/cityscapes",
    cityscapes_meta_path="/root/deeplabv3/data/cityscapes/meta")

num_train_batches = int(len(train_dataset) / batch_size)
num_val_batches = int(len(val_dataset) / batch_size)
print("num_train_batches:", num_train_batches)
print("num_val_batches:", num_val_batches)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=1)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=1)
def train_sem():

    # NOTE! NOTE! change this to not overwrite all log data when you train the model:

    num_epochs = 100
    batch_size = 1
    model_id = '1'
    learning_rate = 0.00001

    train_dataset = DatasetTrain(cityscapes_data_path="data/cityscapes",
                                cityscapes_meta_path="data/cityscapes/meta")
    val_dataset = DatasetVal(cityscapes_data_path="data/cityscapes",
                            cityscapes_meta_path="data/cityscapes/meta")


    num_train_batches = int(len(train_dataset)/batch_size)
    num_val_batches = int(len(val_dataset)/batch_size)
    print ("num_train_batches:", num_train_batches)
    print ("num_val_batches:", num_val_batches)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                            batch_size=batch_size, shuffle=False,
                                            num_workers=1)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=batch_size, shuffle=False,
                                            num_workers=1)

    params = add_weight_decay(model, l2_value=0.0001)
    optimizer = torch.optim.Adam(params, lr=learning_rate)

    with open("data/cityscapes/meta/class_weights.pkl", "rb") as file: # (needed for python3)
        class_weights = np.array(pickle.load(file))
    class_weights = torch.from_numpy(class_weights)
    class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()

    # loss function
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)

    epoch_losses_train = []
    epoch_losses_val = []

    model_dir = 'model_weight'


    for epoch in range(len(epoch_losses_val), num_epochs):
        print ("###########################")
        print ("######## NEW EPOCH ########")
        print ("###########################")
        print ("epoch: %d/%d" % (epoch+1, num_epochs))

        ############################################################################
        # train:
        ############################################################################
        with torch.autograd.set_detect_anomaly(True):
        model.train() # (set in training mode, this affects BatchNorm and dropout)
        batch_losses = []
        batch_accuracy_train = []
        current_time = time.time()
        for step, (imgs, label_imgs) in enumerate(train_loader):
            
            imgs = Variable(imgs).cuda() # (shape: (batch_size, 3, img_h, img_w))
            label_imgs = Variable(label_imgs.type(torch.LongTensor)).cuda() # (shape: (batch_size, img_h, img_w))

            outputs = model(imgs) # (shape: (batch_size, num_classes, img_h, img_w))

            # compute the loss:
            loss = loss_fn(outputs[-1], label_imgs)
            loss_value = loss.data.cpu().numpy() 
            batch_losses.append(loss_value)

            # optimization step:
            optimizer.zero_grad() # (reset gradients)
            #print(loss)
            loss.backward() # (compute gradients)
            optimizer.step() # (perform optimization step)


        print('Time for training 1 epoch : ', time.time() - current_time)
        current_time = time.time()
            

        epoch_loss = np.mean(batch_losses)
        epoch_losses_train.append(epoch_loss)

        
        print ("train loss: %g" % epoch_loss)
        plt.figure(1)
        plt.plot(epoch_losses_train, "k^")
        plt.plot(epoch_losses_train, "k")
        plt.ylabel("loss")
        plt.xlabel("epoch")
        plt.title("train loss per epoch")
        plt.savefig("%s/epoch_losses_train.png" % model_dir)
        plt.close(1)


        print ("####")

        ############################################################################
        # val:
        ############################################################################
        model.eval() # (set in evaluation mode, this affects BatchNorm and dropout)
        batch_losses = []
        batch_accuracy_test = []
        for step, (imgs, label_imgs, img_ids) in enumerate(val_loader):
            with torch.no_grad(): # (corresponds to setting volatile=True in all variables, this is done during inference to reduce memory consumption)
                imgs = Variable(imgs).cuda() # (shape: (batch_size, 3, img_h, img_w))
                label_imgs = Variable(label_imgs.type(torch.LongTensor)).cuda() # (shape: (batch_size, img_h, img_w))

                outputs = model(imgs) # (shape: (batch_size, num_classes, img_h, img_w))

                intersection = np.logical_and(label_imgs.data.cpu().numpy(), outputs[-1].data.cpu().numpy())
                union = np.logical_or(label.data.cpu().numpy(), outputs[-1].data.cpu().numpy())
                iou_score = np.sum(intersection) / np.sum(union)
                batch_accuracy_test.append(iou_score)


                # compute the loss:
                loss = loss_fn(outputs[-1], label_imgs)
                loss_value = loss.data.cpu().numpy()
                batch_losses.append(loss_value)

            

        epoch_loss = np.mean(batch_losses)
        epoch_losses_val.append(epoch_loss)
        
        print('Time for testing 1 epoch : ', time.time() - current_time)
        
        print ("val loss: %g" % epoch_loss)
        plt.figure(1)
        plt.plot(epoch_losses_val, "k^")
        plt.plot(epoch_losses_val, "k")
        plt.ylabel("loss")
        plt.xlabel("epoch")
        plt.title("val loss per epoch")
        plt.savefig("%s/epoch_losses_val.png" % model_dir)
        plt.close(1)



        # save the model weights to disk:
        checkpoint_path = model_dir + "/model_" + model_id +"_epoch_" + str(epoch+1) + ".pth"
        torch.save(model.state_dict(), checkpoint_path)

        np.save('model_weight/training_loss.npy', epoch_losses_train)
        np.save('model_weight/validation_loss.npy', epoch_losses_val)

if __name__ == "__main__":
    train_sem()
示例#4
0
import numpy as np
import pickle
import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import cv2

batch_size = 2

network = DeepLabV3("eval_val", project_dir="/root/deeplabv3").cuda()
network.load_state_dict(
    torch.load(
        "/root/deeplabv3/pretrained_models/model_13_2_2_2_epoch_580.pth"))

val_dataset = DatasetVal(cityscapes_data_path="/data/cityscapes_dataset",
                         cityscapes_meta_path="/data/cityscapes_dataset/meta")

num_val_batches = int(len(val_dataset) / batch_size)
print("num_val_batches:", num_val_batches)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=1)

with open("/data/cityscapes_dataset/meta/class_weights.pkl",
          "rb") as file:  # (needed for python3)
    class_weights = np.array(pickle.load(file))
class_weights = torch.from_numpy(class_weights)
class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()
示例#5
0
import torch.nn.functional as F

import numpy as np
import pickle
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import cv2

batch_size = 2

network = DeepLabV3("eval_val", project_dir="..").cuda()
network.load_state_dict(
    torch.load("../pretrained_models/model_13_2_2_2_epoch_580.pth"))

val_dataset = DatasetVal(cityscapes_data_path="../data/cityscapes",
                         cityscapes_meta_path="../data/cityscapes/meta")

num_val_batches = int(len(val_dataset) / batch_size)
print("num_val_batches:", num_val_batches)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=1)

with open("../data/cityscapes/meta/class_weights.pkl",
          "rb") as file:  # (needed for python3)
    class_weights = np.array(pickle.load(file))
class_weights = torch.from_numpy(class_weights)
class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()
示例#6
0
learning_rate = 0.0001  #需要增加学习率的控制,不要固定一个学习率,应该迭代400时候降一次

network = DeepLabV3(
    model_id,
    project_dir="/workspace/mnt/group/other/luchao/deeplabv3/deeplabv3").cuda(
    )

train_dataset = DatasetTrain(
    cityscapes_data_path=
    "/workspace/mnt/group/other/luchao/deeplabv3/deeplabv3/data/cityscapes",
    cityscapes_meta_path=
    "/workspace/mnt/group/other/luchao/deeplabv3/deeplabv3/data/cityscapes/meta"
)
val_dataset = DatasetVal(
    cityscapes_data_path=
    "/workspace/mnt/group/other/luchao/deeplabv3/deeplabv3/data/cityscapes",
    cityscapes_meta_path=
    "/workspace/mnt/group/other/luchao/deeplabv3/deeplabv3/data/cityscapes/meta"
)

num_train_batches = int(len(train_dataset) / batch_size)
num_val_batches = int(len(val_dataset) / batch_size)
print("num_train_batches:", num_train_batches)
print("num_val_batches:", num_val_batches)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=1)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,