Пример #1
0
    def __init__(self, net, mode, save_in_memory=False):
        
        self.save_in_memory = save_in_memory

        start_time = time.time()
        print("{} || loading data ...".format(getTime()))

        anno = ANNO.format(net, net.lower(), mode)
        with open(anno, 'r') as f:
            annolist = f.readlines()

        self.samplelist = [self._parse(anno) for anno in annolist]

        print("{} || loaded: {}min".\
            format(getTime(), (time.time()-start_time)/60))
Пример #2
0
def main_several_channels():

    # 波段选择依据
    # 最优的波段排序:
    #       [850, 870, 930, 730, 790, 910, 770, 750, 670, 950, 990, 830, 890, 810, 970, 690, 710, 650, 590, 570, 630, 610, 550]
    # 依次选择多个波段组合进行实, 组合的意思是[[850], [850, 870], [850, 870, 930], ..., [850, ..., 550]]
    CHANNEL_SORT = [
        850, 870, 930, 730, 790, 910, 770, 750, 670, 950, 990, 830, 890, 810,
        970, 690, 710, 650, 590, 570, 630, 610, 550
    ]

    for splitidx in range(46, 51):
        usedChannelsList = [CHANNEL_SORT[:i + 1] for i in range(23)]

        for usedChannels in usedChannelsList:

            print(getTime(), splitidx, len(usedChannels), '...')

            configer = EasyDict()

            configer.dsize = (64, 64)
            configer.datatype = 'Multi'
            configer.n_epoch = 300 if configer.datatype == 'Multi' else 350
            configer.lrbase = 0.001 if configer.datatype == 'Multi' else 0.0005

            configer.n_channel = 23
            configer.n_class = 63
            configer.batchsize = 32
            configer.stepsize = 250
            configer.gamma = 0.2
            configer.cuda = True

            configer.splitmode = 'split_{}x{}_{}'.format(
                configer.dsize[0], configer.dsize[1], splitidx)
            configer.modelbase = 'recognize_vgg11_bn'

            if configer.datatype == 'Multi':
                configer.usedChannels = usedChannels
                configer.n_usedChannels = len(configer.usedChannels)
                configer.modelname = '{}_{}_{}'.\
                                format(configer.modelbase, configer.splitmode,
                                        '_'.join(list(map(str, configer.usedChannels))))
            elif configer.datatype == 'RGB':
                configer.usedChannels = 'RGB'
                configer.n_usedChannels = len(configer.usedChannels)
                configer.modelname = '{}_{}_{}'.\
                                format(configer.modelbase, configer.splitmode, configer.usedChannels)


            configer.datapath = '/datasets/ECUST2019_{}x{}'.\
                                            format(configer.dsize[0], configer.dsize[1])
            configer.logspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/logs/{}_{}_{}subjects_logs'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)
            configer.mdlspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/modelfiles/{}_{}_{}subjects_models'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)

            train(configer)
            test(configer)
Пример #3
0
def main_spectral_resolution():

    # 光谱分辨率验证
    # 依次以1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ..., 23 为间隔选取
    # 划分比例为 0.6: 0.2: 0.2
    CHANNEL_SORT = [550 + 20 * i for i in range(23)]

    for splitidx in range(1, 6):
        usedChannelsList = [CHANNEL_SORT[::i + 1] for i in range(22)]

        for usedChannels in usedChannelsList:

            print(getTime(), splitidx, len(usedChannels), '...')

            configer = EasyDict()

            configer.dsize = (64, 64)
            configer.datatype = 'Multi'
            configer.n_epoch = 300 if configer.datatype == 'Multi' else 350
            configer.lrbase = 0.001 if configer.datatype == 'Multi' else 0.0005

            configer.n_channel = 23
            configer.n_class = 63
            configer.batchsize = 32
            configer.stepsize = 250
            configer.gamma = 0.2
            configer.cuda = True

            configer.splitmode = 'split_{}x{}_{}'.format(
                configer.dsize[0], configer.dsize[1], splitidx)
            configer.modelbase = 'recognize_vgg11_bn'

            if configer.datatype == 'Multi':
                configer.usedChannels = usedChannels
                configer.n_usedChannels = len(configer.usedChannels)
                configer.modelname = '{}_{}_{}'.\
                                format(configer.modelbase, configer.splitmode,
                                        '_'.join(list(map(str, configer.usedChannels))))
            elif configer.datatype == 'RGB':
                configer.usedChannels = 'RGB'
                configer.n_usedChannels = len(configer.usedChannels)
                configer.modelname = '{}_{}_{}'.\
                                format(configer.modelbase, configer.splitmode, configer.usedChannels)


            configer.datapath = '/datasets/ECUST2019_{}x{}'.\
                                            format(configer.dsize[0], configer.dsize[1])
            configer.logspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/logs/{}_{}_{}subjects_logs'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)
            configer.mdlspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/modelfiles/{}_{}_{}subjects_models'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)

            train(configer)
            test(configer)
def main_split():

    ## 选出适当的划分比例

    for splitidx in range(6, 36):
        for datatype in ['Multi', 'RGB']:
             
            print(getTime(), splitidx, datatype, '...')

            configer = EasyDict()

            configer.dsize = (64, 64)
            configer.datatype = datatype
            configer.n_epoch =   300 if datatype == 'Multi' else 350
            configer.lrbase  = 0.001 if datatype == 'Multi' else 0.0005

            configer.n_channel = 23
            configer.n_class = 63
            configer.batchsize = 32
            configer.stepsize = 250
            configer.gamma = 0.2
            configer.cuda = True


            configer.splitmode = 'split_{}x{}_{}'.format(configer.dsize[0], configer.dsize[1], splitidx)
            configer.modelbase = 'recognize_vgg11_bn'


            if configer.datatype == 'Multi':
                configer.usedChannels = [550+i*20 for i in range(23)]
                configer.n_usedChannels = len(configer.usedChannels)
                configer.modelname = '{}_{}_{}'.\
                                format(configer.modelbase, configer.splitmode, 
                                        '_'.join(list(map(str, configer.usedChannels))))
            elif configer.datatype == 'RGB':
                configer.usedChannels = 'RGB'
                configer.n_usedChannels = len(configer.usedChannels)
                configer.modelname = '{}_{}_{}'.\
                                format(configer.modelbase, configer.splitmode, configer.usedChannels)


            configer.datapath = '/datasets/ECUST2019_{}x{}'.\
                                            format(configer.dsize[0], configer.dsize[1])
            configer.logspath = '/home/siminzhu/rewrite_recognize/logs/{}_{}_{}subjects_logs'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)
            configer.mdlspath = '/home/siminzhu/rewrite_recognize/modelfiles/recognize/{}_{}_{}subjects_models'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)
            
            
            train(configer)
            test(configer)
Пример #5
0
    def save_checkpoint(self):
        
        checkpoint_state = {
            'save_time': getTime(),

            'cur_epoch': self.cur_epoch,
            'cur_batch': self.cur_batch,
            'elapsed_time': self.elapsed_time,
            'valid_loss': self.valid_loss,
            
            'net_state': self.net.state_dict(),
            'optimizer_state': self.optimizer.state_dict(),
            'lr_scheduler_state': self.lr_scheduler.state_dict(),
        }

        checkpoint_path = os.path.join(self.ckptdir, "{}.pkl".format(self.net._get_name()))
        torch.save(checkpoint_state, checkpoint_path)
Пример #6
0
def test(configer):

    ## datasets
    testset = RecognizeDataset(configer.datapath, configer.datatype,
                               configer.splitmode, 'test',
                               configer.usedChannels)
    testloader = DataLoader(testset, configer.batchsize_test, shuffle=False)

    ## model
    modelpath = os.path.join(configer.mdlspath, configer.modelname) + '.pkl'
    assert os.path.exists(modelpath), 'please train first! '
    model = torch.load(modelpath)
    if configer.cuda and is_available(): model.cuda()

    ## loss
    loss = nn.CrossEntropyLoss()

    ## log
    logpath = os.path.join(configer.logspath, configer.modelname)
    ftest = open(os.path.join(logpath, 'test_log.txt'), 'w')

    ## initialize
    acc_test = []
    loss_test = []
    output = None
    ArcMargin = ArcMarginProduct(128, configer.n_class)

    ## start testing
    model.eval()
    for i_batch, (X, y) in enumerate(testloader):

        # get batch
        X = Variable(X.float())
        y = Variable(y)
        if configer.cuda and is_available():
            X = X.cuda()
            y = y.cuda()

        # forward
        if configer.modelbase == 'recognize_mobilefacenet':

            raw_logits = model(X)
            y_pred_prob = ArcMargin(raw_logits, y)

        else:
            y_pred_prob = model(X)
        #y_pred_prob = model(X)

        loss_i = loss(y_pred_prob, y)
        acc_i = accuracy(y_pred_prob, y)

        # log
        print_log = "{} || Batch: [{:3d}]/[{:3d}] || accuracy: {:2.2%}, loss: {:4.4f}".\
                format(getTime(), i_batch, len(testset) // configer.batchsize, acc_i, loss_i)
        print(print_log)
        ftest.write(print_log + '\n')

        loss_test += [loss_i.detach().cpu().numpy()]
        acc_test += [acc_i.cpu().numpy()]

        # save output
        if output is None:
            output = y_pred_prob.detach().cpu().numpy()
        else:
            output = np.concatenate(
                [output, y_pred_prob.detach().cpu().numpy()], axis=0)

    print(
        '------------------------------------------------------------------------------------------------------------------'
    )

    loss_test = np.mean(np.array(loss_test))
    acc_test = np.mean(np.array(acc_test))
    print_log = "{} || test | acc: {:2.2%}, loss: {:4.4f}".\
            format(getTime(), acc_test, loss_test)
    print(print_log)
    ftest.write(print_log + '\n')
    np.save(os.path.join(logpath, 'test_out.npy'), output)

    print(
        '=================================================================================================================='
    )
    ftest.close()
Пример #7
0
def train(configer):

    ## datasets
    trainset = AnalysisDataset(configer.datapath, configer.splitmode, 'train')
    validset = AnalysisDataset(configer.datapath, configer.splitmode, 'valid')
    trainloader = DataLoader(trainset, configer.batchsize, shuffle=True)
    validloader = DataLoader(validset, configer.batchsize, shuffle=True)

    ## model
    modelpath = os.path.join(configer.mdlspath, configer.modelname) + '.pkl'
    if not os.path.exists(configer.mdlspath): os.makedirs(configer.mdlspath)
    model = modeldict[configer.modelbase](1, configer.n_class,
                                          configer.dsize[0])
    if configer.cuda and is_available(): model.cuda()

    ## loss
    loss = nn.CrossEntropyLoss()

    ## optimizer
    params = model.parameters()
    optimizer = optim.Adam(params, configer.lrbase, weight_decay=5e-4)

    ## learning rate scheduler
    scheduler = lr_scheduler.StepLR(optimizer, configer.stepsize,
                                    configer.gamma)

    ## log
    logpath = os.path.join(configer.logspath, configer.modelname)
    if not os.path.exists(logpath): os.makedirs(logpath)
    logger = SummaryWriter(logpath)

    ## initialize
    elapsed_time = 0
    total_time = 0
    start_time = 0
    acc_train = 0.
    acc_valid = 0.
    loss_train = float('inf')
    loss_valid = float('inf')
    loss_valid_last = float('inf')

    ## start training
    for i_epoch in range(configer.n_epoch):

        if configer.cuda and is_available(): empty_cache()
        scheduler.step(i_epoch)
        acc_train = []
        acc_valid = []
        loss_train = []
        loss_valid = []

        model.train()
        start_time = time.time()
        for i_batch, (X, y) in enumerate(trainloader):

            # get batch
            X = Variable(X.float())
            y = Variable(y)
            if configer.cuda and is_available():
                X = X.cuda()
                y = y.cuda()

            # forward
            y_pred_prob = model(X)
            loss_i = loss(y_pred_prob, y)
            acc_i = accuracy(y_pred_prob, y)

            # backward
            optimizer.zero_grad()
            loss_i.backward()
            optimizer.step()

            # time
            duration_time = time.time() - start_time
            start_time = time.time()
            elapsed_time += duration_time
            total_time = duration_time * configer.n_epoch * len(
                trainset) // configer.batchsize

            # log
            print_log = "{} || Elapsed: {:.4f}h | Left: {:.4f}h | FPS: {:4.2f} || Epoch: [{:3d}]/[{:3d}] | Batch: [{:3d}]/[{:3d}] || lr: {:.6f} | accuracy: {:2.2%}, loss: {:4.4f}".\
                    format(getTime(), elapsed_time/3600, (total_time - elapsed_time)/3600, configer.batchsize / duration_time,
                            i_epoch, configer.n_epoch, i_batch, len(trainset) // configer.batchsize,
                            scheduler.get_lr()[-1], acc_i, loss_i)
            print(print_log)

            loss_train += [loss_i.detach().cpu().numpy()]
            acc_train += [acc_i.cpu().numpy()]

        print(
            '------------------------------------------------------------------------------------------------------------------'
        )

        model.eval()
        for i_batch, (X, y) in enumerate(validloader):

            # get batch
            X = Variable(X.float())
            y = Variable(y)
            if configer.cuda and is_available():
                X = X.cuda()
                y = y.cuda()

            # forward
            y_pred_prob = model(X)
            loss_i = loss(y_pred_prob, y)
            acc_i = accuracy(y_pred_prob, y)

            # log
            print_log = "{} || Epoch: [{:3d}]/[{:3d}] | Batch: [{:3d}]/[{:3d}] || accuracy: {:2.2%}, loss: {:4.4f}".\
                    format(getTime(), i_epoch, configer.n_epoch, i_batch, len(validset) // configer.batchsize, acc_i, loss_i)
            print(print_log)

            loss_valid += [loss_i.detach().cpu().numpy()]
            acc_valid += [acc_i.cpu().numpy()]

        print(
            '------------------------------------------------------------------------------------------------------------------'
        )

        loss_train = np.mean(np.array(loss_train))
        acc_train = np.mean(np.array(acc_train))
        loss_valid = np.mean(np.array(loss_valid))
        acc_valid = np.mean(np.array(acc_valid))
        print_log = "{} || Epoch: [{:3d}]/[{:3d}] || lr: {:.6f} || train | acc: {:2.2%}, loss: {:4.4f} || valid | acc: {:2.2%}, loss: {:4.4f}".\
                format(getTime(), i_epoch, configer.n_epoch, scheduler.get_lr()[-1], acc_train, loss_train, acc_valid, loss_valid)
        print(print_log)

        logger.add_scalars('accuracy', {
            'train': acc_train,
            'valid': acc_valid
        }, i_epoch)
        logger.add_scalars('logloss', {
            'train': loss_train,
            'valid': loss_valid
        }, i_epoch)
        logger.add_scalar('lr', scheduler.get_lr()[-1], i_epoch)

        print(
            '------------------------------------------------------------------------------------------------------------------'
        )

        if loss_valid_last > loss_valid:

            loss_valid_last = loss_valid
            torch.save(model, modelpath)
            print_log = "{} || Epoch: [{:3d}]/[{:3d}] || Saved as {}".\
                    format(getTime(), i_epoch, configer.n_epoch, modelpath)
            print(print_log)

        print(
            '=================================================================================================================='
        )
def main_best_channels():

    # 波段选择依据
    # 以最佳的划分方式: 
    # 依次选择每个波段进行实验

    for splitidx in range(46, 51):
        for datatype in ['Multi', 'RGB']:

            if datatype == 'Multi':
                # usedChannelsList = [[i] for i in range(23)]
                usedChannelsList = [[550+20*i for i in range(23)]]
            else:
                # usedChannelsList = ['R', 'G', 'B']
                usedChannelsList = ['RGB',]

            for usedChannels in usedChannelsList:
                
                print(getTime(), splitidx, datatype, usedChannels, '...')

                configer = EasyDict()

                configer.dsize = (64, 64)
                configer.datatype = datatype
                configer.n_epoch =   300 if datatype == 'Multi' else 350
                configer.lrbase  = 0.001 if datatype == 'Multi' else 0.0005

                configer.n_channel = 23
                configer.n_class = 63
                configer.batchsize = 32
                configer.stepsize = 250
                configer.gamma = 0.2
                configer.cuda = True


                configer.splitmode = 'split_{}x{}_{}'.format(configer.dsize[0], configer.dsize[1], splitidx)
                configer.modelbase = 'recognize_vgg11_bn'


                if configer.datatype == 'Multi':
                    configer.usedChannels = usedChannels
                    configer.n_usedChannels = len(configer.usedChannels)
                    configer.modelname = '{}_{}_{}'.\
                                    format(configer.modelbase, configer.splitmode, 
                                            '_'.join(list(map(str, configer.usedChannels))))
                elif configer.datatype == 'RGB':
                    configer.usedChannels = usedChannels
                    configer.n_usedChannels = len(configer.usedChannels)
                    configer.modelname = '{}_{}_{}'.\
                                    format(configer.modelbase, configer.splitmode, configer.usedChannels)


                configer.datapath = '/datasets/ECUST2019_{}x{}'.\
                                                format(configer.dsize[0], configer.dsize[1])
                configer.logspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/logs/{}_{}_{}subjects_logs'.\
                                                format(configer.modelbase, configer.splitmode, configer.n_class)
                configer.mdlspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/modelfiles/{}_{}_{}subjects_models'.\
                                                format(configer.modelbase, configer.splitmode, configer.n_class)

                train(configer)
                test(configer)
def main_pca():
    from tensorPCA import NDarrayPCA

    for splitidx in range(1, 6):
        
        configer = EasyDict()

        configer.dsize = (64, 64)
        configer.datatype = 'Multi'
        configer.n_epoch   = 500
        configer.lrbase = 0.0001

        configer.n_channel = 23
        configer.n_class = 63
        configer.batchsize = 32
        configer.stepsize = 250
        configer.gamma = 0.2
        configer.cuda = True

        configer.splitmode = 'split_{}x{}_{}'.format(configer.dsize[0], configer.dsize[1], splitidx)
        configer.modelbase = 'recognize_vgg11_bn'

        configer.usedChannels = [550+i*20 for i in range(23)]
        configer.n_usedChannels = len(configer.usedChannels)
        configer.modelname = '{}_{}_{}_PCA'.\
                        format(configer.modelbase, configer.splitmode, 
                                '_'.join(list(map(str, configer.usedChannels))))

        configer.datapath = '/home/louishsu/Work/Workspace/ECUST2019_{}x{}'.\
                                        format(configer.dsize[0], configer.dsize[1])
        configer.logspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/logs/{}_{}_{}subjects_logs'.\
                                        format(configer.modelbase, configer.splitmode, configer.n_class)
        configer.mdlspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/modelfiles/{}_{}_{}subjects_models'.\
                                        format(configer.modelbase, configer.splitmode, configer.n_class)


        trainset = RecognizeDataset(configer.datapath, configer.datatype, configer.splitmode, 'train', configer.usedChannels)
        validset = RecognizeDataset(configer.datapath, configer.datatype, configer.splitmode, 'valid', configer.usedChannels)
        trainloader = DataLoader(trainset, configer.batchsize, shuffle=True)
        validloader = DataLoader(validset, configer.batchsize, shuffle=False)

        for chs in range(10, 24):

            print(getTime(), splitidx, 'reduce to ', chs, '...')
            
            ## fit pca
            decomposer = NDarrayPCA(n_components=[chs, 64, 64])
            traindata = np.concatenate([trainset.samplelist[i][0].numpy()[np.newaxis] for i in range(len(trainset.samplelist))], axis=0)
            decomposer.fit(traindata)
            del traindata

            ## model
            modelpath = os.path.join(configer.mdlspath, configer.modelname) + '{}chs.pkl'.format(chs)
            modeldir  = '/'.join(modelpath.split('/')[:-1])
            if not os.path.exists(modeldir): os.makedirs(modeldir)
            model = modeldict[configer.modelbase](chs, configer.n_class, configer.dsize[0])
            if configer.cuda and is_available(): model.cuda()

            ## loss
            loss = nn.CrossEntropyLoss()
            params = model.parameters()
            optimizer = optim.Adam(params, configer.lrbase, weight_decay=1e-3)
            scheduler = lr_scheduler.StepLR(optimizer, configer.stepsize, configer.gamma)
            logpath = os.path.join(configer.logspath, configer.modelname) + '{}chs'.format(chs)
            if not os.path.exists(logpath): os.makedirs(logpath)
            logger = SummaryWriter(logpath)

            ## initialize
            acc_train = 0.
            acc_valid = 0.
            loss_train = float('inf')
            loss_valid = float('inf')
            loss_valid_last = float('inf')


            ## start training
            for i_epoch in range(configer.n_epoch):

                if configer.cuda and is_available(): empty_cache()
                scheduler.step(i_epoch)
                acc_train = []; acc_valid = []
                loss_train = []; loss_valid = []

                model.train()
                for i_batch, (X, y) in enumerate(trainloader):

                    X = torch.from_numpy(decomposer.transform(X.numpy()))
                    
                    # get batch
                    X = Variable(X.float()); y = Variable(y)
                    if configer.cuda and is_available():
                        X = X.cuda(); y = y.cuda()

                    # forward
                    y_pred_prob = model(X)
                    loss_i = loss(y_pred_prob, y)
                    acc_i  = accuracy(y_pred_prob, y)

                    # backward
                    optimizer.zero_grad()
                    loss_i.backward() 
                    optimizer.step()

                    loss_train += [loss_i.detach().cpu().numpy()]
                    acc_train  += [acc_i.cpu().numpy()]
                
                model.eval()
                for i_batch, (X, y) in enumerate(validloader):

                    X = torch.from_numpy(decomposer.transform(X.numpy()))
                    
                    # get batch
                    X = Variable(X.float()); y = Variable(y)
                    if configer.cuda and is_available():
                        X = X.cuda(); y = y.cuda()

                    # forward
                    y_pred_prob = model(X)
                    loss_i = loss(y_pred_prob, y)
                    acc_i  = accuracy(y_pred_prob, y)

                    loss_valid += [loss_i.detach().cpu().numpy()]
                    acc_valid  += [acc_i.cpu().numpy()]

                loss_train = np.mean(np.array(loss_train))
                acc_train  = np.mean(np.array(acc_train))
                loss_valid = np.mean(np.array(loss_valid))
                acc_valid  = np.mean(np.array(acc_valid))
                
                logger.add_scalars('accuracy', {'train': acc_train,  'valid': acc_valid},  i_epoch)
                logger.add_scalars('logloss',  {'train': loss_train, 'valid': loss_valid}, i_epoch)
                logger.add_scalar('lr', scheduler.get_lr()[-1], i_epoch)

                if loss_valid_last > loss_valid:

                    loss_valid_last = loss_valid
                    torch.save(model, modelpath)



            ## start testing
            model.eval()
            testset = RecognizeDataset(configer.datapath, configer.datatype, configer.splitmode, 'test', configer.usedChannels)
            testloader = DataLoader(testset, configer.batchsize, shuffle=False)
            loss_test = []
            acc_test  = []
            output = None
            for i_batch, (X, y) in enumerate(testloader):

                X = torch.from_numpy(decomposer.transform(X.numpy()))

                # get batch
                X = Variable(X.float()); y = Variable(y)
                if configer.cuda and is_available():
                    X = X.cuda(); y = y.cuda()
                # forward
                y_pred_prob = model(X)
                loss_i = loss(y_pred_prob, y)
                acc_i  = accuracy(y_pred_prob, y)
                # log
                loss_test += [loss_i.detach().cpu().numpy()]
                acc_test  += [acc_i.cpu().numpy()]

                # save output
                if output is None:
                    output = y_pred_prob.detach().cpu().numpy()
                else:
                    output = np.concatenate([output, y_pred_prob.detach().cpu().numpy()], axis=0)

            # print('------------------------------------------------------------------------------------------------------------------')

            loss_test = np.mean(np.array(loss_test))
            acc_test  = np.mean(np.array(acc_test))
            print_log = "{} || test | acc: {:2.2%}, loss: {:4.4f}".\
                    format(getTime(), acc_test, loss_test)
            print(print_log)
            with open(os.path.join(logpath, 'test_log.txt'), 'w') as  f:
                f.write(print_log + '\n')
            np.save(os.path.join(logpath, 'test_out.npy'), output)
def main_finetune_channels():

    # 波段选择依据
    # 最优的波段排序: 
    #       [850, 870, 930, 730, 790, 910, 770, 750, 670, 950, 990, 830, 890, 810, 970, 690, 710, 650, 590, 570, 630, 610, 550]
    # 依次增加一个波段, 前一个模型进行微调

    CHANNEL_SORT = [850, 870, 930, 730, 790, 910, 770, 750, 670, 950, 990, 830, 890, 810, 970, 690, 710, 650, 590, 570, 630, 610, 550]
    
    for splitidx in range(4, 5):
        usedChannelsList = [CHANNEL_SORT[:i+1] for i in range(23)]

        # for i_usedChannels in range(len(usedChannelsList)):
        for i_usedChannels in [4, 6]:

            usedChannels = usedChannelsList[i_usedChannels]

            print(getTime(), splitidx, len(usedChannels), '...')

            configer = EasyDict()

            configer.dsize = (64, 64)
            configer.datatype = 'Multi'
            configer.n_epoch   = 300
            configer.lrbase = 0.001

            configer.n_channel = 23
            configer.n_class = 63
            configer.batchsize = 32
            configer.stepsize = 250
            configer.gamma = 0.2
            configer.cuda = True

            configer.splitmode = 'split_{}x{}_{}'.format(configer.dsize[0], configer.dsize[1], splitidx)
            configer.modelbase = 'recognize_mobilenet'

            configer.usedChannels = usedChannels
            configer.n_usedChannels = len(configer.usedChannels)
            configer.modelname = '{}_{}_{}_finetune'.\
                            format(configer.modelbase, configer.splitmode, 
                                    '_'.join(list(map(str, configer.usedChannels))))


            configer.datapath = '/home/louishsu/Work/Workspace/ECUST2019_{}x{}'.\
                                            format(configer.dsize[0], configer.dsize[1])
            configer.logspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/logs/{}_{}_{}subjects_logs'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)
            configer.mdlspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/modelfiles/{}_{}_{}subjects_models'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)


            ## datasets
            trainset = RecognizeDataset(configer.datapath, configer.datatype, configer.splitmode, 'train', configer.usedChannels)
            validset = RecognizeDataset(configer.datapath, configer.datatype, configer.splitmode, 'valid', configer.usedChannels)
            trainloader = DataLoader(trainset, configer.batchsize, shuffle=True)
            validloader = DataLoader(validset, configer.batchsize, shuffle=False)


            ## ============================================================================================
            ## model
            modelpath = os.path.join(configer.mdlspath, configer.modelname) + '.pkl'
            modeldir  = '/'.join(modelpath.split('/')[:-1])
            if not os.path.exists(modeldir): os.makedirs(modeldir)
            
            if i_usedChannels == 0:
                model = modeldict[configer.modelbase](configer.n_usedChannels, configer.n_class, configer.dsize[0])
                params = model.parameters()
                torch.save(model, modelpath)
            else:
                modelpath_pretrain = os.path.join(
                    modeldir, '{}_{}_{}_finetune.pkl'.format(configer.modelbase, configer.splitmode, 
                                                '_'.join(list(map(str, usedChannelsList[i_usedChannels-1])))))
                model = torch.load(modelpath_pretrain)
                model.features[0] = nn.Conv2d(len(usedChannels), 64, 3, stride=1, padding=1)
                params = [
                    {'params': model.features[1:].parameters(), 'lr': configer.lrbase*0.01, },
                    {'params': model.features[0].parameters(),}
                ]
                torch.save(model, modelpath)
            if configer.cuda and is_available(): model.cuda()
            ## ============================================================================================


            ## optimizer
            optimizer = optim.Adam(params, configer.lrbase, weight_decay=1e-3)
            
            ## loss
            loss = nn.CrossEntropyLoss()

            ## learning rate scheduler
            scheduler = lr_scheduler.StepLR(optimizer, configer.stepsize, configer.gamma)
            
            ## log
            logpath = os.path.join(configer.logspath, configer.modelname)
            if not os.path.exists(logpath): os.makedirs(logpath)
            logger = SummaryWriter(logpath)

            ## initialize
            acc_train = 0.
            acc_valid = 0.
            loss_train = float('inf')
            loss_valid = float('inf')
            loss_valid_last = float('inf')


            ## start training
            for i_epoch in range(configer.n_epoch):

                if configer.cuda and is_available(): empty_cache()
                scheduler.step(i_epoch)
                acc_train = []; acc_valid = []
                loss_train = []; loss_valid = []


                model.train()
                for i_batch, (X, y) in enumerate(trainloader):
                    
                    # get batch
                    X = Variable(X.float()); y = Variable(y)
                    if configer.cuda and is_available():
                        X = X.cuda(); y = y.cuda()

                    # forward
                    y_pred_prob = model(X)
                    loss_i = loss(y_pred_prob, y)
                    acc_i  = accuracy(y_pred_prob, y)

                    # backward
                    optimizer.zero_grad()
                    loss_i.backward() 
                    optimizer.step()

                    loss_train += [loss_i.detach().cpu().numpy()]
                    acc_train  += [acc_i.cpu().numpy()]
                
                model.eval()
                for i_batch, (X, y) in enumerate(validloader):
                    
                    # get batch
                    X = Variable(X.float()); y = Variable(y)
                    if configer.cuda and is_available():
                        X = X.cuda(); y = y.cuda()

                    # forward
                    y_pred_prob = model(X)
                    loss_i = loss(y_pred_prob, y)
                    acc_i  = accuracy(y_pred_prob, y)

                    loss_valid += [loss_i.detach().cpu().numpy()]
                    acc_valid  += [acc_i.cpu().numpy()]

                loss_train = np.mean(np.array(loss_train))
                acc_train  = np.mean(np.array(acc_train))
                loss_valid = np.mean(np.array(loss_valid))
                acc_valid  = np.mean(np.array(acc_valid))
                
                logger.add_scalars('accuracy', {'train': acc_train,  'valid': acc_valid},  i_epoch)
                logger.add_scalars('logloss',  {'train': loss_train, 'valid': loss_valid}, i_epoch)
                logger.add_scalar('lr', scheduler.get_lr()[-1], i_epoch)

                if loss_valid_last > loss_valid:

                    loss_valid_last = loss_valid
                    torch.save(model, modelpath)


            test(configer)
def main_several_channels_k_fold(k=5):

    # 波段选择依据
    # 最优的波段排序: 
    #       [850, 870, 930, 730, 790, 910, 770, 750, 670, 950, 990, 830, 890, 810, 970, 690, 710, 650, 590, 570, 630, 610, 550]
    # 依次选择多个波段组合进行实, 组合的意思是[[850], [850, 870], [850, 870, 930], ..., [850, ..., 550]]
    # 每组波段下进行5折交叉验证
    # 读取`split_64x64_1`中的`train/valid/test.txt`,按顺序划分为k折

    class KFoldDataset(Dataset):
        def __init__(self, datapath, filelist, usedChannels):
            filelist = list(map(lambda x: os.path.join('/'.join(datapath.split('/')[:-1]), x.strip()), filelist))
            self.samplelist = list(map(lambda x: [RecognizeDataset._load_image(x, 'Multi', usedChannels), getLabel(x)-1], filelist))
        def __getitem__(self, index):
            image, label = self.samplelist[index]
            return image, label
        def __len__(self):
            return len(self.samplelist)        


    CHANNEL_SORT = [850, 870, 930, 730, 790, 910, 770, 750, 670, 950, 990, 830, 890, 810, 970, 690, 710, 650, 590, 570, 630, 610, 550]
    usedChannelsList = [CHANNEL_SORT[:i+1] for i in range(23)]

    ## 读取所有文件
    filelist = []
    for mode in ['train', 'valid', 'test']:
        with open('./split/split_64x64_1/{}.txt'.format(mode), 'r') as f:
            filelist += f.readlines()

    ## 划分为k折
    n_files_fold = len(filelist) // k
    foldlist = []
    for i in range(k-1):
        foldlist += [filelist[i*n_files_fold: (i+1)*n_files_fold]]
    foldlist += [filelist[(k-1)*n_files_fold: ]]

    for i in range(k):

        ## k折交叉验证
        validlist = foldlist[i]
        trainlist = list(filter(lambda x: x not in validlist, filelist))

        for i_usedChannels in range(len(usedChannelsList)):
            usedChannels = usedChannelsList[i_usedChannels]
            
            print(getTime(), '[', i, '/', k, ']', len(usedChannels), '...')

            configer = EasyDict()
            configer.dsize = (64, 64)
            configer.datatype = 'Multi'
            configer.n_epoch   = 300
            configer.lrbase = 0.001
            configer.n_channel = 23
            configer.n_class = 63
            configer.batchsize = 32
            configer.stepsize = 250
            configer.gamma = 0.2
            configer.cuda = True
            configer.splitmode = 'split_{}x{}_1'.format(configer.dsize[0], configer.dsize[1])
            configer.modelbase = 'recognize_vgg11_bn'
            configer.usedChannels = usedChannels
            configer.n_usedChannels = len(configer.usedChannels)
            configer.modelname = '{}_{}_{}_[{}_{}]fold'.\
                            format(configer.modelbase, configer.splitmode, 
                                    '_'.join(list(map(str, configer.usedChannels))), i+1, k)
            configer.datapath = '/home/louishsu/Work/Workspace/ECUST2019_{}x{}'.\
                                            format(configer.dsize[0], configer.dsize[1])
            configer.logspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/logs/{}_{}_{}subjects_logs'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)
            configer.mdlspath = '/home/louishsu/Work/Workspace/HUAWEI/pytorch/modelfiles/{}_{}_{}subjects_models'.\
                                            format(configer.modelbase, configer.splitmode, configer.n_class)

            ## datasets
            trainset = KFoldDataset(configer.datapath, trainlist, usedChannels)
            validset = KFoldDataset(configer.datapath, validlist,  usedChannels)
            trainloader = DataLoader(trainset, configer.batchsize, shuffle=True)
            validloader = DataLoader(validset, configer.batchsize, shuffle=False)

            ## model
            modelpath = os.path.join(configer.mdlspath, configer.modelname) + '.pkl'
            modeldir  = '/'.join(modelpath.split('/')[:-1])
            if not os.path.exists(modeldir): os.makedirs(modeldir)
            model = modeldict[configer.modelbase](configer.n_usedChannels, configer.n_class, configer.dsize[0])
            if configer.cuda and is_available(): model.cuda()

            ## loss
            loss = nn.CrossEntropyLoss()
            params = model.parameters()
            optimizer = optim.Adam(params, configer.lrbase, weight_decay=1e-3)
            scheduler = lr_scheduler.StepLR(optimizer, configer.stepsize, configer.gamma)
            logpath = os.path.join(configer.logspath, configer.modelname)
            if not os.path.exists(logpath): os.makedirs(logpath)
            logger = SummaryWriter(logpath)

            ## initialize
            acc_train = 0.
            acc_valid = 0.
            loss_train = float('inf')
            loss_valid = float('inf')
            loss_valid_last = float('inf')

            ## start training
            for i_epoch in range(configer.n_epoch):

                if configer.cuda and is_available(): empty_cache()
                scheduler.step(i_epoch)
                acc_train = []; acc_valid = []
                loss_train = []; loss_valid = []

                model.train()
                for i_batch, (X, y) in enumerate(trainloader):
                    
                    # get batch
                    X = Variable(X.float()); y = Variable(y)
                    if configer.cuda and is_available():
                        X = X.cuda(); y = y.cuda()

                    # forward
                    y_pred_prob = model(X)
                    loss_i = loss(y_pred_prob, y)
                    acc_i  = accuracy(y_pred_prob, y)

                    # backward
                    optimizer.zero_grad()
                    loss_i.backward() 
                    optimizer.step()
                    
                    loss_train += [loss_i.detach().cpu().numpy()]
                    acc_train  += [acc_i.cpu().numpy()]

                loss_train = np.mean(np.array(loss_train))
                acc_train  = np.mean(np.array(acc_train))
                
                logger.add_scalar('accuracy', acc_train,  i_epoch)
                logger.add_scalar('logloss',  loss_train, i_epoch)
                logger.add_scalar('lr', scheduler.get_lr()[-1], i_epoch)

            ## start testing
            model.eval()
            loss_test = []
            acc_test  = []
            output = None
            for i_batch, (X, y) in enumerate(validloader):
                # get batch
                X = Variable(X.float()); y = Variable(y)
                if configer.cuda and is_available():
                    X = X.cuda(); y = y.cuda()
                # forward
                y_pred_prob = model(X)
                loss_i = loss(y_pred_prob, y)
                acc_i  = accuracy(y_pred_prob, y)
                # log
                loss_test += [loss_i.detach().cpu().numpy()]
                acc_test  += [acc_i.cpu().numpy()]

                # save output
                if output is None:
                    output = y_pred_prob.detach().cpu().numpy()
                else:
                    output = np.concatenate([output, y_pred_prob.detach().cpu().numpy()], axis=0)

            # print('------------------------------------------------------------------------------------------------------------------')

            loss_test = np.mean(np.array(loss_test))
            acc_test  = np.mean(np.array(acc_test))
            print_log = "{} || test | acc: {:2.2%}, loss: {:4.4f}".\
                    format(getTime(), acc_test, loss_test)
            print(print_log)
            with open(os.path.join(logpath, 'test_log.txt'), 'w') as  f:
                f.write(print_log + '\n')
            np.save(os.path.join(logpath, 'test_out.npy'), output)