Exemplo n.º 1
0
def main(args):
    crossVal = 5
    maxEpoch = [80, 80, 80, 80, 80]
    mIOUList = []
    avgmIOU = 0

    for i in range(crossVal):
        dataLoad = ld.LoadData(args.data_dir, args.classes)
        data = dataLoad.processData(i, args.data_name)
        mean = data['mean']  #np.array([0.485, 0.456, 0.406], dtype=np.float32)
        std = data['std']  #np.array([0.229, 0.224, 0.225], dtype=np.float32)

        print('Data statistics:')
        print(mean, std)

        pthName = 'model_' + args.model_name + '_crossVal' + str(
            i + 1) + '_' + str(maxEpoch[i]) + '.pth'
        pretrainedModel = args.pretrained + args.data_name + '/' + args.model_name + '/' + pthName
        mIOU = "{:.4f}".format(main_te(args, i, pretrainedModel, mean, std))
        mIOU = float(mIOU)
        mIOUList.append(mIOU)
        avgmIOU = avgmIOU + mIOU / 5
    print(mIOUList)
    print(args.model_name, args.data_name, "{:.4f}".format(avgmIOU))
Exemplo n.º 2
0
    "sneaker", "bag", "ankle boot"
]

image_size = [28, 28]
image_depth = 1
num_classes = 10
EPOCHS = 5
BATCH_SIZE = 32

image_height = image_size[0]
print("image_height = ", image_height)
image_width = image_size[1]
print("image_width = ", image_width)

#load training data
load_data = loadData.LoadData(image_size)

train_data = pd.read_csv(train_data_path)
x_train, y_train = load_data.load_formatted_data(data=train_data)

test_data = pd.read_csv(test_data_path)
x_test, y_test = load_data.load_formatted_data(data=test_data)

num_train_samples = x_train.shape[0]
num_test_samples = x_test.shape[0]

x_train = x_train.reshape(
    (num_train_samples, image_height, image_width, image_depth))
x_test = x_test.reshape(
    (num_test_samples, image_height, image_width, image_depth))
input_shape = (image_height, image_width, image_depth)
Exemplo n.º 3
0
def trainValidateSegmentation(args):
    '''
    Main function for trainign and validation
    :param args: global arguments
    :return: None
    '''
    # check if processed data file exists or not
    if not os.path.isfile(args.cached_data_file):
        dataLoad = ld.LoadData(args.data_dir, args.classes, args.cached_data_file)
        data = dataLoad.processData()
        if data is None:
            print('Error while pickling data. Please check.')
            exit(-1)
    else:
        data = pickle.load(open(args.cached_data_file, "rb"))

    q = args.q
    p = args.p
    # load the model
    if not args.decoder:
        model = net.ESPNet_Encoder(args.classes, p=p, q=q)
        args.savedir = args.savedir + '_enc_' + str(p) + '_' + str(q) + '/'
    else:
        model = net.ESPNet(args.classes, p=p, q=q, encoderFile=args.pretrained)
        args.savedir = args.savedir + '_dec_' + str(p) + '_' + str(q) + '/'

    if args.onGPU:
        model = model.cuda()

    # create the directory if not exist
    if not os.path.exists(args.savedir):
        os.mkdir(args.savedir)

    if args.visualizeNet:
        x = Variable(torch.randn(1, 3, args.inWidth, args.inHeight))

        if args.onGPU:
            x = x.cuda()

        y = model.forward(x)
        g = viz.make_dot(y)
        g.render(args.savedir + 'model.png', view=False)

    total_paramters = netParams(model)
    print('Total network parameters: ' + str(total_paramters))

    # define optimization criteria
    weight = torch.from_numpy(data['classWeights']) # convert the numpy array to torch
    if args.onGPU:
        weight = weight.cuda()

    criteria = CrossEntropyLoss2d(weight) #weight

    if args.onGPU:
        criteria = criteria.cuda()

    print('Data statistics')
    print(data['mean'], data['std'])
    print(data['classWeights'])

    #compose the data with transforms
    trainDataset_main = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(1024, 512),
        myTransforms.RandomCropResize(32),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64).
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainDataset_scale1 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(1536, 768), # 1536, 768
        myTransforms.RandomCropResize(100),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainDataset_scale2 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(1280, 720), # 1536, 768
        myTransforms.RandomCropResize(100),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainDataset_scale3 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(768, 384),
        myTransforms.RandomCropResize(32),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainDataset_scale4 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(512, 256),
        #myTransforms.RandomCropResize(20),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64).
        myTransforms.ToTensor(args.scaleIn),
        #
    ])


    valDataset = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(1024, 512),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    # since we training from scratch, we create data loaders at different scales
    # so that we can generate more augmented data and prevent the network from overfitting

    trainLoader = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_main),
        batch_size=args.batch_size + 2, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainLoader_scale1 = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale1),
        batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainLoader_scale2 = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale2),
        batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainLoader_scale3 = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale3),
        batch_size=args.batch_size + 4, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainLoader_scale4 = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale4),
        batch_size=args.batch_size + 4, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    valLoader = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['valIm'], data['valAnnot'], transform=valDataset),
        batch_size=args.batch_size + 4, shuffle=False, num_workers=args.num_workers, pin_memory=True)

    if args.onGPU:
        cudnn.benchmark = True

    start_epoch = 0

    if args.resume:
        if os.path.isfile(args.resumeLoc):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resumeLoc)
            start_epoch = checkpoint['epoch']
            #args.lr = checkpoint['lr']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    

    logFileLoc = args.savedir + args.logFile
    if os.path.isfile(logFileLoc):
        logger = open(logFileLoc, 'a')
    else:
        logger = open(logFileLoc, 'w')
        logger.write("Parameters: %s" % (str(total_paramters)))
        logger.write("\n%s\t%s\t%s\t%s\t%s\t" % ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val'))
    logger.flush()

    optimizer = torch.optim.Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=5e-4)
    # we step the loss by 2 after step size is reached
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_loss, gamma=0.5)


    for epoch in range(start_epoch, args.max_epochs):

        scheduler.step(epoch)
        lr = 0
        for param_group in optimizer.param_groups:
            lr = param_group['lr']
        print("Learning rate: " +  str(lr))

        # train for one epoch
        # We consider 1 epoch with all the training data (at different scales)
        train(args, trainLoader_scale1, model, criteria, optimizer, epoch)
        train(args, trainLoader_scale2, model, criteria, optimizer, epoch)
        train(args, trainLoader_scale4, model, criteria, optimizer, epoch)
        train(args, trainLoader_scale3, model, criteria, optimizer, epoch)
        lossTr, overall_acc_tr, per_class_acc_tr, per_class_iu_tr, mIOU_tr = train(args, trainLoader, model, criteria, optimizer, epoch)

        # evaluate on validation set
        lossVal, overall_acc_val, per_class_acc_val, per_class_iu_val, mIOU_val = val(args, valLoader, model, criteria)
        
            
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': str(model),
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lossTr': lossTr,
            'lossVal': lossVal,
            'iouTr': mIOU_tr,
            'iouVal': mIOU_val,
            'lr': lr
        }, args.savedir + 'checkpoint.pth.tar')

        #save the model also
        model_file_name = args.savedir + '/model_' + str(epoch + 1) + '.pth'
        torch.save(model.state_dict(), model_file_name)

        

        with open(args.savedir + 'acc_' + str(epoch) + '.txt', 'w') as log:
            log.write("\nEpoch: %d\t Overall Acc (Tr): %.4f\t Overall Acc (Val): %.4f\t mIOU (Tr): %.4f\t mIOU (Val): %.4f" % (epoch, overall_acc_tr, overall_acc_val, mIOU_tr, mIOU_val))
            log.write('\n')
            log.write('Per Class Training Acc: ' + str(per_class_acc_tr))
            log.write('\n')
            log.write('Per Class Validation Acc: ' + str(per_class_acc_val))
            log.write('\n')
            log.write('Per Class Training mIOU: ' + str(per_class_iu_tr))
            log.write('\n')
            log.write('Per Class Validation mIOU: ' + str(per_class_iu_val))

        logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.7f" % (epoch, lossTr, lossVal, mIOU_tr, mIOU_val, lr))
        logger.flush()
        print("Epoch : " + str(epoch) + ' Details')
        print("\nEpoch No.: %d\tTrain Loss = %.4f\tVal Loss = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f" % (epoch, lossTr, lossVal, mIOU_tr, mIOU_val))
    logger.close()
Exemplo n.º 4
0
def run(args):
    #load configuration
    config = Config(args)
    #load data
    dataset = ld.LoadData(args.input)
    data = dataset.data
    label = dataset.label
    anomaly_num = dataset.anomaly_num
    feature_index = dataset.feature_index
    feature_item_num = np.sum(dataset.feature_item_num)
    instance_num = len(data)
    #for training
    training_data = data[:instance_num - 2 * anomaly_num]
    training_data = ld.get_shaped_data(training_data, config.batch_size,
                                       config.block_size, len(data[0]))
    instance_dim = len(training_data[0][0][0])

    #for testing
    testing_data = data[instance_num - 2 * anomaly_num:]
    testing_label = label[instance_num - 2 * anomaly_num:]
    #shuffle testing data,to ensure testing data and label are shuffled in the same way
    randnum = config.seed
    random.seed(randnum)
    random.shuffle(testing_data)
    random.seed(randnum)
    random.shuffle(testing_label)

    testing_data = ld.get_shaped_data(testing_data, config.batch_size,
                                      config.block_size, len(data[0]))
    testing_data_num = len(testing_label) - len(testing_label) % (
        config.block_size * config.batch_size)
    testing_label = testing_label[:
                                  testing_data_num]  # testing data instance level ground truth

    print("instance_dim", training_data.shape, instance_dim)
    print("feature_item_num", feature_item_num)
    with tf.Graph().as_default(), tf.Session() as sess:
        #graph settings
        FM_weight_dim = config.FM_weight_dim
        batch_size = config.batch_size
        block_size = config.block_size
        attention_dim = config.attention_dim
        autoencoder_hidden_dim = config.autoencoder_hidden_dim
        lstm_dropout_keep_prob = config.lstm_dropout_keep_prob
        lstm_layer_num = config.lstm_layer_num
        lstm_hidden_size = config.lstm_hidden_size
        is_training = config.is_training
        gan_hidden_dim = config.gan_hidden_dim
        alpha = config.alpha
        beta = config.beta
        learning_rate = config.learning_rate
        model = AnomalyNet(feature_index, FM_weight_dim, feature_item_num,
                           batch_size, block_size, instance_dim, attention_dim,
                           autoencoder_hidden_dim, lstm_dropout_keep_prob,
                           lstm_layer_num, lstm_hidden_size, is_training,
                           gan_hidden_dim, alpha, beta, learning_rate)
        saver = tf.train.Saver(
            max_to_keep=10
        )  #saver for checkpoints, add var_list because of batching training

        init = tf.global_variables_initializer()
        sess.run(init)

        for epoch in range(config.epoch):
            # training
            for i in range(len(training_data)):
                curr_batch = training_data[i]
                feed_dict = {model.data: curr_batch}
                result = sess.run(
                    (model.D_train, model.G_train, model.test1, model.test2),
                    feed_dict=feed_dict)
                instance_loss = np.mean(result[2])
                block_loss = np.mean(result[3])
                print(
                    "current epoch %d, in batch %d, instance average loss %.4f, block average loss %.4f"
                    % (epoch, i, instance_loss, block_loss), result[2].shape,
                    result[3].shape)

            model_path = "saved_model/epoch_%s.ckpt" % (epoch)
            saver.save(sess, model_path)
            '''
			#####
			testing 
			#####
			'''

            #instance output
            instance_loss_list = []
            block_loss_list = []
            for i in range(len(testing_data)):
                curr_batch = testing_data[i]
                feed_dict = {model.data: curr_batch}
                instance_loss, block_loss = sess.run(
                    (model.instance_loss, model.block_loss),
                    feed_dict=feed_dict)
                for i in range(len(instance_loss)):
                    instance_loss_list.append(instance_loss[i][0])

                for i in range(len(block_loss)):
                    block_loss_list.append(block_loss[i][0])

            bw = open(args.instance_output + '_%d' % (epoch), 'w')  #by dingfu
            bw.write("true pred\n")
            for i in range(len(instance_loss_list)):
                bw.write(
                    str(testing_label[i]) + " " + str(instance_loss_list[i]) +
                    "\n")
            bw.close()

            #block output
            testing_block_num = testing_data_num // config.block_size
            block_true = []
            for i in range(testing_block_num):
                true_sum = np.sum(testing_label[i * config.block_size:(i + 1) *
                                                config.block_size])

                # generate ground truth
                if true_sum < config.block_size * config.block_ratio:
                    block_true.append(0)
                else:
                    block_true.append(1)

            bw = open(args.block_output + '_%d' % (epoch), 'w')  #by dingfu
            bw.write("true pred\n")
            for i in range(testing_block_num):
                bw.write(
                    str(block_true[i]) + " " + str(block_loss_list[i]) + "\n")
            bw.close()

            # print(true_block,pred_block)
            instance_auc, _, _, _ = newmetrics.roc(
                testing_label,
                instance_loss_list,
                pos_label=0,
                output_path=args.instance_output + '_%d' % (epoch))  #by dingfu
            block_auc, _, _, _ = newmetrics.roc(block_true,
                                                block_loss_list,
                                                pos_label=0,
                                                output_path=args.block_output +
                                                '_%d' % (epoch))  #by dingfu
            #print("instance level evaluation: ",instance_eval)
            print('epoch:', epoch, " instance level auc: ", instance_auc)
            #print("block level evaluation: ",block_eval)
            print('epoch:', epoch, " block level auc: ", block_auc)
Exemplo n.º 5
0
random.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)

os.environ['PYTHONHASHSEED'] = str(args.seed)

# create the directory if not exist
if not exists(args.savedir):
    os.mkdir(args.savedir)

for (key, value) in vars(args).items():
    print("{0:16} | {1}".format(key, value))

# check if processed data file exists or not
if not isfile(args.cached_data_file):
    dataLoad = ld.LoadData(args.data_dir, args.cached_data_file)
    data = dataLoad.processData()
    if data is None:
        print('Error while pickling data. Please check.')
        exit(-1)
else:
    data = pickle.load(open(args.cached_data_file, "rb"))

data['mean'] = np.array([0.485 * 255., 0.456 * 255., 0.406 * 255.],
                        dtype=np.float32)
data['std'] = np.array([0.229 * 255., 0.224 * 255., 0.225 * 255.],
                       dtype=np.float32)

# load the model
model = BiSalNet()
model.eval()
import loadData


class RemoveStopWord():
    def __init__(self, textData):
        self._stopWords = nltk.corpus.stopwords.words("english")
        self._result = None
        self._text = textData

    def removeStopWord(self):
        self._stopWords.append('OMG')
        self._stopWords.append(':-)')
        for index in range(0, len(self._text)):
            for word in self._text[index].split():
                #print(word)
                if word not in self._stopWords:
                    self._result = ''.join(word)


instanceLoadData = loadData.LoadData('../../ExtractReviews/')
instanceLoadData.openFile('HospitalReviews.txt')
instanceLoadData.readText()
instanceLoadData.closeFile()

instanceRemveStopWord = RemoveStopWord(instanceLoadData._text)
instanceRemveStopWord.removeStopWord()
#pprint.pprint(instanceRemveStopWord._stopWords)
#pprint.pprint(instanceRemveStopWord._result)
#print(instanceRemveStopWord._stopWords)
#print(instanceRemveStopWord._result)
Exemplo n.º 7
0
def run(args):
    #load configuration
    config = Config(args)
    #load data
    dataset = ld.LoadData(args.input)
    data = dataset.data
    label = dataset.label
    anomaly_num = dataset.anomaly_num
    feature_index = dataset.feature_index
    feature_item_num = np.sum(dataset.feature_item_num)
    instance_num = len(data)
    #for training
    training_data = data[:instance_num - 2 * anomaly_num]
    training_data = ld.get_shaped_data(training_data, config.batch_size,
                                       config.block_size, len(data[0]))
    instance_dim = len(training_data[0][0][0])

    #for testing
    testing_data = data[instance_num - 2 * anomaly_num:]
    testing_label = label[instance_num - 2 * anomaly_num:]
    #shuffle testing data,to ensure testing data and label are shuffled in the same way
    randnum = config.seed
    random.seed(randnum)
    random.shuffle(testing_data)
    random.seed(randnum)
    random.shuffle(testing_label)

    testing_data = ld.get_shaped_data(testing_data, config.batch_size,
                                      config.block_size, len(data[0]))
    testing_data_num = len(testing_label) - len(testing_label) % (
        config.block_size * config.batch_size)
    testing_label = testing_label[:
                                  testing_data_num]  # testing data instance level ground truth

    print("instance_dim", training_data.shape, instance_dim)
    print("feature_item_num", feature_item_num)
    with tf.Graph().as_default(), tf.Session() as sess:
        #graph settings
        FM_weight_dim = config.FM_weight_dim
        batch_size = config.batch_size
        block_size = config.block_size
        attention_dim = config.attention_dim
        autoencoder_hidden_dim = config.autoencoder_hidden_dim
        lstm_dropout_keep_prob = config.lstm_dropout_keep_prob
        lstm_layer_num = config.lstm_layer_num
        lstm_hidden_size = config.lstm_hidden_size
        is_training = config.is_training
        gan_hidden_dim = config.gan_hidden_dim
        alpha = config.alpha
        beta = config.beta
        learning_rate = config.learning_rate
        model = AnomalyNet(feature_index, FM_weight_dim, feature_item_num,
                           batch_size, block_size, instance_dim, attention_dim,
                           autoencoder_hidden_dim, lstm_dropout_keep_prob,
                           lstm_layer_num, lstm_hidden_size, is_training,
                           gan_hidden_dim, alpha, beta, learning_rate)
        saver = tf.train.Saver(
            max_to_keep=10
        )  #saver for checkpoints, add var_list because of batching training

        init = tf.global_variables_initializer()
        sess.run(init)

        for epoch in range(config.epoch):
            # training
            for i in range(len(training_data)):
                curr_batch = training_data[i]
                feed_dict = {model.data: curr_batch}
                result = sess.run(
                    (model.D_train, model.G_train, model.test1, model.test2),
                    feed_dict=feed_dict)
                instance_loss_threshold = np.mean(result[2])
                block_loss_threshold = np.mean(result[3])
                print(
                    "current epoch %d, in batch %d, instance average loss %.4f, block average loss %.4f"
                    %
                    (epoch, i, instance_loss_threshold, block_loss_threshold),
                    result[2].shape, result[3].shape)

            model_path = "saved_model/epoch_%s.ckpt" % (epoch)
            saver.save(sess, model_path)
            '''
			#####
			testing 
			#####
			'''
            instance_loss_threshold = instance_loss_threshold * config.threshold_scale
            block_loss_threshold = block_loss_threshold * config.threshold_scale
            instance_pred = []
            block_pred = []
            for i in range(len(testing_data)):
                curr_batch = testing_data[i]
                feed_dict = {model.data: curr_batch}
                instance_loss, block_loss = sess.run(
                    (model.instance_loss, model.block_loss),
                    feed_dict=feed_dict)
                for j in range(len(instance_loss)):
                    if np.mean(instance_loss[j]) < instance_loss_threshold:
                        instance_pred.append(1)
                    else:
                        instance_pred.append(0)

                for j in range(len(block_loss)):
                    if np.mean(block_loss[j]) < block_loss_threshold:
                        block_pred.append(1)
                    else:
                        block_pred.append(0)

                # print("every batch instance/block:",instance_loss.shape,block_loss.shape)

            testing_block_num = testing_data_num // config.block_size
            block_true = []
            #block anomaly detection from instance level. which means instances can also judge
            block_pred_from_instance = []
            for i in range(testing_block_num):
                pred_sum = np.sum(instance_pred[i * config.block_size:(i + 1) *
                                                config.block_size])
                true_sum = np.sum(testing_label[i * config.block_size:(i + 1) *
                                                config.block_size])

                #instance prediction judges block. If instance prediction very ensure
                # the block is anomaly or nomal, we use its result. Otherwise use our prediction result
                if pred_sum > config.block_size * config.instance_confidence:
                    block_pred_from_instance.append(1)
                elif pred_sum < config.block_size * (
                        1 - config.instance_confidence):
                    block_pred_from_instance.append(0)
                else:
                    block_pred_from_instance.append(-1)
                # generate ground truth
                if true_sum < config.block_size * config.block_ratio:
                    block_true.append(0)
                else:
                    block_true.append(1)

            block_pred_mixure = []
            for i in range(len(block_pred)):
                if block_pred_from_instance[i] == 0:
                    block_pred_mixure.append(0)
                elif block_pred_from_instance[i] == 1:
                    block_pred_mixure.append(1)
                else:
                    block_pred_mixure.append(block_pred[i])
            #instance level evaluation
            print("testing_block_num", testing_data_num, testing_block_num,
                  config.block_size)
            instance_eval = eval(testing_label, instance_pred)
            #block level evaluation
            block_eval = eval(block_true, block_pred_mixure)
            # print(block_true,block_pred)
            print("instance level evaluation: ", instance_eval)
            print("block level evaluation: ", block_eval)
Exemplo n.º 8
0
def main(args):
    if args.apex:
        if sys.version_info < (3, 0):
            raise RuntimeError("Apex currently only supports Python 3. Aborting.")
        if amp is None:
            raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex "
                               "to enable mixed-precision training.")

    if args.output_dir:
        utils.mkdir(args.output_dir)

    utils.init_distributed_mode(args)
    print(args)

    # device = torch.device(args.device)
    device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() else 'cpu')

    torch.backends.cudnn.benchmark = True

    # Data loading code
    print("Loading data")
    if not os.path.isfile(args.cached_data_file):
        dataLoader = ld.LoadData(args.data_dir, args.classes, args.cached_data_file)
        if dataLoader is None:
            print('Error while processing the data. Please check')
            exit(-1)
        data = dataLoader.processData()
    else:
        data = pickle.load(open(args.cached_data_file, "rb"))
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    trainDataset = transforms.Compose([
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.Resize(224),
        transforms.ToTensor(),
        normalize,
    ])

    valDataset = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        normalize,
    ])
    dataset = myDataLoader.MyDataset(data['trainIm'], data['trainClass'], transform=trainDataset)
    dataset_test = myDataLoader.MyDataset(data['valIm'], data['valClass'], transform=valDataset)

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=args.batch_size,
        sampler=train_sampler, num_workers=args.workers, pin_memory=True)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=args.batch_size,
        sampler=test_sampler, num_workers=args.workers, pin_memory=True)

    print("Creating model")
    # import pdb
    # pdb.set_trace()
    model = torchvision.models.__dict__[args.model](pretrained=args.pretrained)
    # googlenet
    # num_ftrs = model.fc.in_features
    # model.fc = nn.Linear(num_ftrs, args.classes)   
    ##densenet161
    num_ftrs = model.classifier.in_features
    model.classifier = nn.Linear(num_ftrs, args.classes)

    ##resnet101
    # num_ftrs = model.fc.in_features
    # model.fc = nn.Linear(num_ftrs, args.classes)   
    model.to(device)
    if args.distributed and args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    criterion = nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(
        model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    if args.apex:
        model, optimizer = amp.initialize(model, optimizer,
                                          opt_level=args.apex_opt_level
                                          )

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    # 修改了resume参数的规则
    if args.resume and os.path.exists(args.resume):
        print('Loading resume data from file: {}'.format(args.resume))
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1
    else:
        print('Not load resume data.')

    if args.test_only:
        evaluate(model, criterion, data_loader_test, device=device)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args.print_freq, args.apex)
        lr_scheduler.step()
        evaluate(model, criterion, data_loader_test, device=device)
        if args.output_dir:
            checkpoint = {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'epoch': epoch,
                'args': args}
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'checkpoint.pth'))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
Exemplo n.º 9
0
def trainValSegmentation(args):
    if not os.path.isfile(args.cached_data_file):
        dataLoader = ld.LoadData(args.data_dir, args.classes, args.attrClasses,
                                 args.cached_data_file)
        if dataLoader is None:
            print("Error while cacheing the data.")
            exit(-1)
        data = dataLoader.processData()
    else:
        print("load cacheing data.")
        data = pickle.load(open(args.cached_data_file, 'rb'))
    # only unet for segmentation now.
    # model= unet.UNet(args.classes)
    # model = r18unet.ResNetUNet(args.classes)
    model = mobileunet.MobileUNet(args.classes)
    print("UNet done...")
    # if args.onGPU == True:
    model = model.cuda()
    # devices_ids=[2,3], device_ids=range(2)
    # device = torch.device('cuda:' + str(devices_ids[0]))
    # model = model.to(device)
    if args.visNet == True:
        x = Variable(torch.randn(1, 3, args.inwidth, args.inheight))
        if args.onGPU == True:
            x = x.cuda()
        print("before forward...")
        y = model.forward(x)
        print("after forward...")
        g = viz.make_dot(y)
        # g1 = viz.make_dot(y1)
        g.render(args.save_dir + '/model', view=False)
    model = torch.nn.DataParallel(model)
    n_param = sum([np.prod(param.size()) for param in model.parameters()])
    print('network parameters: ' + str(n_param))

    #define optimization criteria
    weight = torch.from_numpy(data['classWeights'])
    print(weight)
    if args.onGPU == True:
        weight = weight.cuda()
    criteria = CrossEntropyLoss2d(weight)
    # if args.onGPU == True:
    # 	criteria = criteria.cuda()

    trainDatasetNoZoom = myTransforms.Compose([
        myTransforms.RandomCropResize(args.inwidth, args.inheight),
        # myTransforms.RandomHorizontalFlip(),
        myTransforms.ToTensor(args.scaleIn)
    ])
    trainDatasetWithZoom = myTransforms.Compose([
        # myTransforms.Zoom(512,512),
        myTransforms.RandomCropResize(args.inwidth, args.inheight),
        myTransforms.RandomHorizontalFlip(),
        myTransforms.ToTensor(args.scaleIn)
    ])
    valDataset = myTransforms.Compose([
        myTransforms.RandomCropResize(args.inwidth, args.inheight),
        myTransforms.ToTensor(args.scaleIn)
    ])
    trainLoaderNoZoom = torch.utils.data.DataLoader(
        ld.MyDataset(data['trainIm'],
                     data['trainAnnot'],
                     transform=trainDatasetNoZoom),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True)
    trainLoaderWithZoom = torch.utils.data.DataLoader(
        ld.MyDataset(data['trainIm'],
                     data['trainAnnot'],
                     transform=trainDatasetWithZoom),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True)
    valLoader = torch.utils.data.DataLoader(ld.MyDataset(data['valIm'],
                                                         data['valAnnot'],
                                                         transform=valDataset),
                                            batch_size=args.batch_size_val,
                                            shuffle=True,
                                            num_workers=args.num_workers,
                                            pin_memory=True)

    #define the optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr, (0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=2e-4)
    # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4)
    # optimizer = torch.optim.SGD([
    #        {'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'],
    #         'lr': 2 * args.lr},
    #        {'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'],
    #         'lr': args.lr, 'weight_decay': 5e-4}
    #    ], momentum=0.99)

    if args.onGPU == True:
        cudnn.benchmark = True
    start_epoch = 0
    if args.resume:
        if os.path.isfile(args.resumeLoc):
            print("=> loading checkpoint '{}'".format(args.resumeLoc))
            checkpoint = torch.load(args.resumeLoc)
            start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch{})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resumeLoc))

    logfileLoc = args.save_dir + os.sep + args.logFile
    print(logfileLoc)
    if os.path.isfile(logfileLoc):
        logger = open(logfileLoc, 'a')
        logger.write("parameters: %s" % (str(n_param)))
        logger.write("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t" %
                     ('Epoch', 'Loss(Tr)', 'Loss(val)', 'Overall acc(Tr)',
                      'Overall acc(val)', 'mIOU (tr)', 'mIOU (val'))
        logger.flush()
    else:
        logger = open(logfileLoc, 'w')
        logger.write("Parameters: %s" % (str(n_param)))
        logger.write("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t" %
                     ('Epoch', 'Loss(Tr)', 'Loss(val)', 'Overall acc(Tr)',
                      'Overall acc(val)', 'mIOU (tr)', 'mIOU (val'))
        logger.flush()

    #lr scheduler
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[30, 60, 90],
                                                     gamma=0.1)
    best_model_acc = 0
    for epoch in range(start_epoch, args.max_epochs):
        scheduler.step(epoch)
        lr = 0
        for param_group in optimizer.param_groups:
            lr = param_group['lr']
        # train(args,trainLoaderWithZoom,model,criteria,optimizer,epoch)
        lossTr, overall_acc_tr, per_class_acc_tr, per_class_iu_tr, mIOU_tr = train(
            args, trainLoaderNoZoom, model, criteria, optimizer, epoch)
        # print(per_class_acc_tr,per_class_iu_tr)
        lossVal, overall_acc_val, per_class_acc_val, per_class_iu_val, mIOU_val = val(
            args, valLoader, model, criteria)

        #save_checkpoint
        torch.save(
            {
                'epoch': epoch + 1,
                'arch': str(model),
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lossTr': lossTr,
                'lossVal': lossVal,
                'iouTr': mIOU_tr,
                'iouVal': mIOU_val,
            }, args.save_dir + '/checkpoint.pth.tar')

        #save model also
        # if overall_acc_val > best_model_acc:
        # 	best_model_acc = overall_acc_val
        model_file_name = args.save_dir + '/model_' + str(epoch + 1) + '.pth'
        torch.save(model.state_dict(), model_file_name)
        with open('../acc/acc_' + str(epoch) + '.txt', 'w') as log:
            log.write(
                "\nEpoch: %d\t Overall Acc (Tr): %.4f\t Overall Acc (Val): %.4f\t mIOU (Tr): %.4f\t mIOU (Val): %.4f"
                % (epoch, overall_acc_tr, overall_acc_val, mIOU_tr, mIOU_val))
            log.write('\n')
            log.write('Per Class Training Acc: ' + str(per_class_acc_tr))
            log.write('\n')
            log.write('Per Class Validation Acc: ' + str(per_class_acc_val))
            log.write('\n')
            log.write('Per Class Training mIOU: ' + str(per_class_iu_tr))
            log.write('\n')
            log.write('Per Class Validation mIOU: ' + str(per_class_iu_val))

        logger.write(
            "\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.6f" %
            (epoch, lossTr, lossVal, overall_acc_tr, overall_acc_val, mIOU_tr,
             mIOU_val, lr))
        logger.flush()
        print("Epoch : " + str(epoch) + ' Details')
        print(
            "\nEpoch No.: %d\tTrain Loss = %.4f\tVal Loss = %.4f\t Train acc = %.4f\t Val acc = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f"
            % (epoch, lossTr, lossVal, overall_acc_tr, overall_acc_val,
               mIOU_tr, mIOU_val))

    logger.close()
Exemplo n.º 10
0
def trainValidateSegmentation(args):
    # check if processed data file exists or not
    if not os.path.isfile(args.cached_data_file):
        dataLoader = ld.LoadData(args.data_dir, args.classes,
                                 args.cached_data_file)
        if dataLoader is None:
            print('Error while processing the data. Please check')
            exit(-1)
        data = dataLoader.processData()
    else:
        data = pickle.load(open(args.cached_data_file, "rb"))

    if args.modelType == 'C1':
        model = net.ResNetC1(args.classes)
    elif args.modelType == 'D1':
        model = net.ResNetD1(args.classes)
    else:
        print('Please select the correct model. Exiting!!')
        exit(-1)

        args.savedir = args.savedir + args.modelType + '/'

    if args.onGPU == True:
        model = model.cuda()

    # create the directory if not exist
    if not os.path.exists(args.savedir):
        os.mkdir(args.savedir)

    if args.onGPU == True:
        model = model.cuda()

    if args.visualizeNet == True:
        x = Variable(torch.randn(1, 3, args.inWidth, args.inHeight))

        if args.onGPU == True:
            x = x.cuda()

        y = model.forward(x)
        g = viz.make_dot(y)
        g.render(args.savedir + '/model.png', view=False)

    n_param = sum([np.prod(param.size()) for param in model.parameters()])
    print('Network parameters: ' + str(n_param))

    # define optimization criteria
    print('Weights to handle class-imbalance')
    weight = torch.from_numpy(
        data['classWeights'])  # convert the numpy array to torch
    print(weight)
    if args.onGPU == True:
        weight = weight.cuda()

    criteria = CrossEntropyLoss2d(weight)  # weight

    if args.onGPU == True:
        criteria = criteria.cuda()

    trainDatasetNoZoom = myTransforms.Compose([
        # myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.RandomCropResize(20),
        myTransforms.RandomHorizontalFlip(),
        myTransforms.ToTensor(args.scaleIn)
    ])

    trainDatasetWithZoom = myTransforms.Compose([
        # myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Zoom(512, 512),
        myTransforms.RandomCropResize(20),
        myTransforms.RandomHorizontalFlip(),
        myTransforms.ToTensor(args.scaleIn)
    ])

    valDataset = myTransforms.Compose([
        # myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.ToTensor(args.scaleIn)
    ])

    trainLoaderNoZoom = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'],
                               data['trainAnnot'],
                               transform=trainDatasetNoZoom),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True)

    trainLoaderWithZoom = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'],
                               data['trainAnnot'],
                               transform=trainDatasetWithZoom),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True)

    valLoader = torch.utils.data.DataLoader(myDataLoader.MyDataset(
        data['valIm'], data['valAnnot'], transform=valDataset),
                                            batch_size=args.batch_size,
                                            shuffle=False,
                                            num_workers=args.num_workers,
                                            pin_memory=True)

    # define the optimizer
    # optimizer = torch.optim.Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=2e-4)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=5e-4)

    if args.onGPU == True:
        cudnn.benchmark = True

    start_epoch = 0

    if args.resume:
        if os.path.isfile(args.resumeLoc):
            print("=> loading checkpoint '{}'".format(args.resumeLoc))
            checkpoint = torch.load(args.resumeLoc)
            start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    logFileLoc = args.savedir + os.sep + args.logFile
    if os.path.isfile(logFileLoc):
        logger = open(logFileLoc, 'a')
        logger.write("Parameters: %s" % (str(total_paramters)))
        logger.write(
            "\n%s\t%s\t%s\t%s\t%s\t" %
            ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val'))
        logger.flush()
    else:
        logger = open(logFileLoc, 'w')
        logger.write("Parameters: %s" % (str(total_paramters)))
        logger.write(
            "\n%s\t%s\t%s\t%s\t%s\t" %
            ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val'))
        logger.flush()

    #lr scheduler
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=args.step_loss,
                                                gamma=0.1)

    for epoch in range(start_epoch, args.max_epochs):
        scheduler.step(epoch)

        lr = 0
        for param_group in optimizer.param_groups:
            lr = param_group['lr']

        # run at zoomed images first
        train(args, trainLoaderWithZoom, model, criteria, optimizer, epoch)
        lossTr, overall_acc_tr, per_class_acc_tr, per_class_iu_tr, mIOU_tr = train(
            args, trainLoaderNoZoom, model, criteria, optimizer, epoch)
        # evaluate on validation set
        lossVal, overall_acc_val, per_class_acc_val, per_class_iu_val, mIOU_val = val(
            args, valLoader, model, criteria)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': str(model),
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lossTr': lossTr,
                'lossVal': lossVal,
                'iouTr': mIOU_tr,
                'iouVal': mIOU_val,
            }, args.savedir + '/checkpoint.pth.tar')

        # save the model also
        model_file_name = args.savedir + '/model_' + str(epoch + 1) + '.pth'
        torch.save(model.state_dict(), model_file_name)

        with open(args.savedir + 'acc_' + str(epoch) + '.txt', 'w') as log:
            log.write(
                "\nEpoch: %d\t Overall Acc (Tr): %.4f\t Overall Acc (Val): %.4f\t mIOU (Tr): %.4f\t mIOU (Val): %.4f"
                % (epoch, overall_acc_tr, overall_acc_val, mIOU_tr, mIOU_val))
            log.write('\n')
            log.write('Per Class Training Acc: ' + str(per_class_acc_tr))
            log.write('\n')
            log.write('Per Class Validation Acc: ' + str(per_class_acc_val))
            log.write('\n')
            log.write('Per Class Training mIOU: ' + str(per_class_iu_tr))
            log.write('\n')
            log.write('Per Class Validation mIOU: ' + str(per_class_iu_val))

        logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f" %
                     (epoch, lossTr, lossVal, mIOU_tr, mIOU_val, lr))
        logger.flush()
        print("Epoch : " + str(epoch) + ' Details')
        print(
            "\nEpoch No.: %d\tTrain Loss = %.4f\tVal Loss = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f"
            % (epoch, lossTr, lossVal, mIOU_tr, mIOU_val))

    logger.close()
Exemplo n.º 11
0
def main_tr(args, crossVal):
    dataLoad = ld.LoadData(args.data_dir, args.classes)
    data = dataLoad.processData(crossVal, args.data_name)

    # load the model
    model = net.MiniSeg(args.classes, aux=True)
    if not osp.isdir(osp.join(args.savedir + '_mod' + str(args.max_epochs))):
        os.mkdir(args.savedir + '_mod' + str(args.max_epochs))
    if not osp.isdir(
            osp.join(args.savedir + '_mod' + str(args.max_epochs),
                     args.data_name)):
        os.mkdir(
            osp.join(args.savedir + '_mod' + str(args.max_epochs),
                     args.data_name))
    saveDir = args.savedir + '_mod' + str(
        args.max_epochs) + '/' + args.data_name + '/' + args.model_name
    # create the directory if not exist
    if not osp.exists(saveDir):
        os.mkdir(saveDir)

    if args.gpu and torch.cuda.device_count() > 1:
        #model = torch.nn.DataParallel(model)
        model = DataParallelModel(model)
    if args.gpu:
        model = model.cuda()

    total_paramters = sum([np.prod(p.size()) for p in model.parameters()])
    print('Total network parameters: ' + str(total_paramters))

    # define optimization criteria
    weight = torch.from_numpy(
        data['classWeights'])  # convert the numpy array to torch
    if args.gpu:
        weight = weight.cuda()

    criteria = CrossEntropyLoss2d(weight, args.ignore_label)  #weight
    if args.gpu and torch.cuda.device_count() > 1:
        criteria = DataParallelCriterion(criteria)
    if args.gpu:
        criteria = criteria.cuda()

    # compose the data with transforms
    trainDataset_main = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(args.width, args.height),
        myTransforms.RandomCropResize(int(32. / 1024. * args.width)),
        myTransforms.RandomFlip(),
        myTransforms.ToTensor()
    ])
    trainDataset_scale1 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(int(args.width * 1.5), int(args.height * 1.5)),
        myTransforms.RandomCropResize(int(100. / 1024. * args.width)),
        myTransforms.RandomFlip(),
        myTransforms.ToTensor()
    ])

    trainDataset_scale2 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(int(args.width * 1.25), int(args.height * 1.25)),
        myTransforms.RandomCropResize(int(100. / 1024. * args.width)),
        myTransforms.RandomFlip(),
        myTransforms.ToTensor()
    ])
    trainDataset_scale3 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(int(args.width * 0.75), int(args.height * 0.75)),
        myTransforms.RandomCropResize(int(32. / 1024. * args.width)),
        myTransforms.RandomFlip(),
        myTransforms.ToTensor()
    ])

    valDataset = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(args.width, args.height),
        myTransforms.ToTensor()
    ])

    # since we training from scratch, we create data loaders at different scales
    # so that we can generate more augmented data and prevent the network from overfitting
    trainLoader = torch.utils.data.DataLoader(myDataLoader.Dataset(
        data['trainIm'], data['trainAnnot'], transform=trainDataset_main),
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.num_workers,
                                              pin_memory=True,
                                              drop_last=True)

    trainLoader_scale1 = torch.utils.data.DataLoader(
        myDataLoader.Dataset(data['trainIm'],
                             data['trainAnnot'],
                             transform=trainDataset_scale1),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True)

    trainLoader_scale2 = torch.utils.data.DataLoader(
        myDataLoader.Dataset(data['trainIm'],
                             data['trainAnnot'],
                             transform=trainDataset_scale2),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True)
    trainLoader_scale3 = torch.utils.data.DataLoader(
        myDataLoader.Dataset(data['trainIm'],
                             data['trainAnnot'],
                             transform=trainDataset_scale3),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True)

    valLoader = torch.utils.data.DataLoader(myDataLoader.Dataset(
        data['valIm'], data['valAnnot'], transform=valDataset),
                                            batch_size=args.batch_size,
                                            shuffle=False,
                                            num_workers=args.num_workers,
                                            pin_memory=True)
    max_batches = len(trainLoader) + len(trainLoader_scale1) + len(
        trainLoader_scale2) + len(trainLoader_scale3)

    if args.gpu:
        cudnn.benchmark = True

    start_epoch = 0

    if args.pretrained is not None:
        state_dict = torch.load(args.pretrained)
        new_keys = []
        new_values = []
        for idx, key in enumerate(state_dict.keys()):
            if 'pred' not in key:
                new_keys.append(key)
                new_values.append(list(state_dict.values())[idx])
        new_dict = OrderedDict(list(zip(new_keys, new_values)))
        model.load_state_dict(new_dict, strict=False)
        print('pretrained model loaded')

    if args.resume is not None:
        if osp.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            args.lr = checkpoint['lr']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    log_file = osp.join(saveDir, 'trainValLog_' + args.model_name + '.txt')
    if osp.isfile(log_file):
        logger = open(log_file, 'a')
    else:
        logger = open(log_file, 'w')
        logger.write("Parameters: %s" % (str(total_paramters)))
        logger.write("\n%s\t%s\t\t%s\t%s\t%s\t%s\tlr" %
                     ('CrossVal', 'Epoch', 'Loss(Tr)', 'Loss(val)',
                      'mIOU (tr)', 'mIOU (val)'))
    logger.flush()

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr, (0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=1e-4)
    maxmIOU = 0
    maxEpoch = 0
    print(args.model_name + '-CrossVal: ' + str(crossVal + 1))
    for epoch in range(start_epoch, args.max_epochs):
        # train for one epoch
        cur_iter = 0

        train(args, trainLoader_scale1, model, criteria, optimizer, epoch,
              max_batches, cur_iter)
        cur_iter += len(trainLoader_scale1)
        train(args, trainLoader_scale2, model, criteria, optimizer, epoch,
              max_batches, cur_iter)
        cur_iter += len(trainLoader_scale2)
        train(args, trainLoader_scale3, model, criteria, optimizer, epoch,
              max_batches, cur_iter)
        cur_iter += len(trainLoader_scale3)
        lossTr, overall_acc_tr, per_class_acc_tr, per_class_iu_tr, mIOU_tr, lr = \
                train(args, trainLoader, model, criteria, optimizer, epoch, max_batches, cur_iter)

        # evaluate on validation set
        lossVal, overall_acc_val, per_class_acc_val, per_class_iu_val, mIOU_val = \
                val(args, valLoader, model, criteria)

        torch.save(
            {
                'epoch': epoch + 1,
                'arch': str(model),
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lossTr': lossTr,
                'lossVal': lossVal,
                'iouTr': mIOU_tr,
                'iouVal': mIOU_val,
                'lr': lr
            },
            osp.join(
                saveDir, 'checkpoint_' + args.model_name + '_crossVal' +
                str(crossVal + 1) + '.pth.tar'))

        # save the model also
        model_file_name = osp.join(
            saveDir, 'model_' + args.model_name + '_crossVal' +
            str(crossVal + 1) + '_' + str(epoch + 1) + '.pth')
        torch.save(model.state_dict(), model_file_name)

        logger.write(
            "\n%d\t\t%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.7f" %
            (crossVal + 1, epoch + 1, lossTr, lossVal, mIOU_tr, mIOU_val, lr))
        logger.flush()
        print("\nEpoch No. %d:\tTrain Loss = %.4f\tVal Loss = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f\n" \
                % (epoch + 1, lossTr, lossVal, mIOU_tr, mIOU_val))

        if mIOU_val >= maxmIOU:
            maxmIOU = mIOU_val
            maxEpoch = epoch + 1
        torch.cuda.empty_cache()
    logger.flush()
    logger.close()
    return maxEpoch, maxmIOU
import aspectOpinion

CONSTANT_DIRECTORY = '../../ExtractReviews/Reviews/'
CONSTANT_FILENAME = 'reviews.txt'

if __name__ == '__main__':
    instanceReadDirectory = readAllDirectory.ReadDirectory(CONSTANT_DIRECTORY)
    instanceReadDirectory.readDirectory()
    #pprint.pprint(instanceReadDirectory._dirList)
    for index in range(0, len(instanceReadDirectory._dirList)):
        print("Process : ", index)
        pprint.pprint(CONSTANT_DIRECTORY +
                      instanceReadDirectory._dirList[index])

        #Loading Review File
        instanceLoadData = loadData.LoadData(
            CONSTANT_DIRECTORY + instanceReadDirectory._dirList[index])
        instanceLoadData.openFile(CONSTANT_FILENAME)
        instanceLoadData.readText()
        instanceLoadData.readNegativeWord()
        #pprint.pprint(instanceLoadData._text)
        #pprint.pprint(instanceLoadData._negWord)
        instanceLoadData.closeFile()

        #Removing Stop Words
        instanceRemveStopWord = removeStopWord.RemoveStopWord(
            instanceLoadData._text)
        instanceRemveStopWord.removeStopWord()
        #print(instanceRemveStopWord._stopWords)
        #print(instanceRemveStopWord._result)

        #Tokenizing Data
Exemplo n.º 13
0
def trainValidateSegmentation(args):

    print('Data file: ' + str(args.cached_data_file))
    print(args)

    # check if processed data file exists or not
    if not os.path.isfile(args.cached_data_file):
        dataLoader = ld.LoadData(args.data_dir, args.data_dir_val,
                                 args.classes, args.cached_data_file)
        data = dataLoader.processData()
        if data is None:
            print('Error while pickling data. Please check.')
            exit(-1)
    else:
        data = pickle.load(open(args.cached_data_file, "rb"))
    print('=> Loading the model')
    model = net.ESPNet(classes=args.classes, channels=args.channels)
    args.savedir = args.savedir + os.sep

    if args.onGPU:
        model = model.cuda()

    # create the directory if not exist
    if not os.path.exists(args.savedir):
        os.mkdir(args.savedir)

    if args.onGPU:
        model = model.cuda()

    if args.visualizeNet:
        import VisualizeGraph as viz
        x = Variable(
            torch.randn(1, args.channels, args.inDepth, args.inWidth,
                        args.inHeight))

        if args.onGPU:
            x = x.cuda()

        y = model(x, (128, 128, 128))  #, _, _
        g = viz.make_dot(y)
        g.render(args.savedir + os.sep + 'model', view=False)

    total_paramters = 0
    for parameter in model.parameters():
        i = len(parameter.size())
        p = 1
        for j in range(i):
            p *= parameter.size(j)
        total_paramters += p

    print('Parameters: ' + str(total_paramters))

    # define optimization criteria
    weight = torch.from_numpy(
        data['classWeights'])  # convert the numpy array to torch <- Sachin
    print('Class Imbalance Weights')
    print(weight)
    criteria = torch.nn.CrossEntropyLoss(weight)
    if args.onGPU:
        criteria = criteria.cuda()

    # We train at three different resolutions (144x144x144, 96x96x96 and 128x128x128)
    # and validate at one resolution (128x128x128)
    trainDatasetA = myTransforms.Compose([
        myTransforms.MinMaxNormalize(),
        myTransforms.ScaleToFixed(dimA=144, dimB=144, dimC=144),
        myTransforms.RandomFlip(),
        myTransforms.ToTensor(args.scaleIn),
    ])

    trainDatasetB = myTransforms.Compose([
        myTransforms.MinMaxNormalize(),
        myTransforms.ScaleToFixed(dimA=96, dimB=96, dimC=96),
        myTransforms.RandomFlip(),
        myTransforms.ToTensor(args.scaleIn),
    ])

    trainDatasetC = myTransforms.Compose([
        myTransforms.MinMaxNormalize(),
        myTransforms.ScaleToFixed(dimA=args.inWidth,
                                  dimB=args.inHeight,
                                  dimC=args.inDepth),
        myTransforms.RandomFlip(),
        myTransforms.ToTensor(args.scaleIn),
    ])

    valDataset = myTransforms.Compose([
        myTransforms.MinMaxNormalize(),
        myTransforms.ScaleToFixed(dimA=args.inWidth,
                                  dimB=args.inHeight,
                                  dimC=args.inDepth),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainLoaderA = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'],
                               data['trainAnnot'],
                               transform=trainDatasetA),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=False)  #disabling pin memory because swap usage is high
    trainLoaderB = torch.utils.data.DataLoader(myDataLoader.MyDataset(
        data['trainIm'], data['trainAnnot'], transform=trainDatasetB),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers,
                                               pin_memory=False)
    trainLoaderC = torch.utils.data.DataLoader(myDataLoader.MyDataset(
        data['trainIm'], data['trainAnnot'], transform=trainDatasetC),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers,
                                               pin_memory=False)

    valLoader = torch.utils.data.DataLoader(myDataLoader.MyDataset(
        data['valIm'], data['valAnnot'], transform=valDataset),
                                            batch_size=1,
                                            shuffle=False,
                                            num_workers=args.num_workers,
                                            pin_memory=False)

    # define the optimizer
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 args.lr, (0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=2e-4)

    if args.onGPU == True:
        cudnn.benchmark = True

    start_epoch = 0
    stored_loss = 100000000.0
    if args.resume:
        if os.path.isfile(args.resumeLoc):
            print("=> loading checkpoint '{}'".format(args.resumeLoc))
            checkpoint = torch.load(args.resumeLoc)
            start_epoch = checkpoint['epoch']
            stored_loss = checkpoint['stored_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    logFileLoc = args.savedir + args.logFile
    if os.path.isfile(logFileLoc):
        logger = open(logFileLoc, 'a')
        logger.write("Parameters: %s" % (str(total_paramters)))
        logger.write(
            "\n%s\t%s\t%s\t%s\t%s\t" %
            ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val'))
        logger.flush()
    else:
        logger = open(logFileLoc, 'w')
        logger.write("Arguments: %s" % (str(args)))
        logger.write("\n Parameters: %s" % (str(total_paramters)))
        logger.write(
            "\n%s\t%s\t%s\t%s\t%s\t" %
            ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val'))
        logger.flush()

    # reduce the learning rate by 0.5 after every 100 epochs
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=args.step_loss,
                                                gamma=0.5)  #40
    best_val_acc = 0

    loader_idxs = [
        0, 1, 2
    ]  # Three loaders at different resolutions are mapped to three indexes
    for epoch in range(start_epoch, args.max_epochs):
        # step the learning rate
        scheduler.step(epoch)
        lr = 0
        for param_group in optimizer.param_groups:
            lr = param_group['lr']
        print('Running epoch {} with learning rate {:.5f}'.format(epoch, lr))

        if epoch > 0:
            # shuffle the loaders
            np.random.shuffle(loader_idxs)

        for l_id in loader_idxs:
            if l_id == 0:
                train(args, trainLoaderA, model, criteria, optimizer, epoch)
            elif l_id == 1:
                train(args, trainLoaderB, model, criteria, optimizer, epoch)
            else:
                lossTr, overall_acc_tr, per_class_acc_tr, per_class_iu_tr, mIOU_tr = \
                    train(args, trainLoaderC, model, criteria, optimizer, epoch)

        # evaluate on validation set
        lossVal, overall_acc_val, per_class_acc_val, per_class_iu_val, mIOU_val = val(
            args, valLoader, model, criteria)

        print('saving checkpoint')  ## added
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': str(model),
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lossTr': lossTr,
                'lossVal': lossVal,
                'iouTr': mIOU_tr,
                'iouVal': mIOU_val,
                'stored_loss': stored_loss,
            }, args.savedir + '/checkpoint.pth.tar')

        # save the model also
        if mIOU_val >= best_val_acc:
            best_val_acc = mIOU_val
            torch.save(model.state_dict(), args.savedir + '/best_model.pth')

        with open(args.savedir + 'acc_' + str(epoch) + '.txt', 'w') as log:
            log.write(
                "\nEpoch: %d\t Overall Acc (Tr): %.4f\t Overall Acc (Val): %.4f\t mIOU (Tr): %.4f\t mIOU (Val): %.4f"
                % (epoch, overall_acc_tr, overall_acc_val, mIOU_tr, mIOU_val))
            log.write('\n')
            log.write('Per Class Training Acc: ' + str(per_class_acc_tr))
            log.write('\n')
            log.write('Per Class Validation Acc: ' + str(per_class_acc_val))
            log.write('\n')
            log.write('Per Class Training mIOU: ' + str(per_class_iu_tr))
            log.write('\n')
            log.write('Per Class Validation mIOU: ' + str(per_class_iu_val))

        logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.6f" %
                     (epoch, lossTr, lossVal, mIOU_tr, mIOU_val, lr))
        logger.flush()
        print("Epoch : " + str(epoch) + ' Details')
        print(
            "\nEpoch No.: %d\tTrain Loss = %.4f\tVal Loss = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f"
            % (epoch, lossTr, lossVal, mIOU_tr, mIOU_val))

    logger.close()