Exemplo n.º 1
0
    def __init__(self, settings):
        super(Trainer, self).__init__()
        self.settings = settings
        self.phase = settings.cmd
        self.batch_size = settings.batch_size
        self.data_dir = settings.data_dir
        self.list_dir = settings.list_dir
        self.checkpoint = settings.resume
        self.load_checkpoint = (len(self.checkpoint) > 0)
        self.num_epochs = settings.num_epochs
        self.lr = float(settings.lr)
        self.save = settings.save_on or settings.out_dir
        self.from_pause = self.settings.continu
        self.path_ctrl = settings.global_path
        self.path = self.path_ctrl.get_path

        log_dir = '' if settings.log_off else self.path_ctrl.get_dir('log')
        self.logger = Logger(scrn=True, log_dir=log_dir, phase=self.phase)

        for k, v in sorted(settings.__dict__.items()):
            self.logger.show("{}: {}".format(k, v))

        self.start_epoch = 0
        self._init_max_acc = 0.0

        self.model = None
        self.criterion = None
Exemplo n.º 2
0
    def __init__(self,
                 data_dir,
                 ckp_path,
                 save_lr=False,
                 list_dir='',
                 out_dir='./',
                 log_dir=''):
        super(Predictor, self).__init__()
        self.data_dir = data_dir
        self.list_dir = list_dir
        self.out_dir = out_dir
        self.checkpoint = ckp_path
        self.save_lr = save_lr

        self.logger = Logger(scrn=True, log_dir=log_dir, phase='test')

        self.model = None
Exemplo n.º 3
0
def set_gpc_and_logger(args):
    gpc = OutPathGetter(root=os.path.join(args.exp_dir, args.tag),
                        suffix=args.suffix)

    log_dir = '' if args.log_off else gpc.get_dir('log')
    logger = Logger(scrn=True, log_dir=log_dir, phase=args.cmd)

    register('GPC', gpc)
    register('LOGGER', logger)

    return gpc, logger
Exemplo n.º 4
0
    def __init__(self,
                 model=None,
                 mode='folder',
                 save_dir=None,
                 scrn=True,
                 log_dir=None,
                 cuda_off=False):

        self.save_dir = save_dir
        self.output = None

        if not cuda_off and torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')

        assert model is not None, "The model must be assigned"
        self.model = self._model_init(model)

        if mode not in Predictor.modes:
            raise NotImplementedError

        self.logger = Logger(scrn=scrn, log_dir=log_dir, phase='predict')

        if mode == 'dataloader':
            self._predict = partial(self._predict_dataloader,
                                    dataloader=None,
                                    save_dir=save_dir)
        elif mode == 'folder':
            # self.suffix = ['.jpg', '.png', '.bmp', '.gif', '.npy']  # 支持的图像格式
            self._predict = partial(self._predict_folder, save_dir=save_dir)
        elif mode == 'list':
            self._predict = partial(self._predict_list, save_dir=save_dir)
        elif mode == 'file':
            self._predict = partial(self._predict_file, save_dir=save_dir)
        elif mode == 'data':
            self._predict = partial(self._predict_data, save_dir=save_dir)
        else:
            raise NotImplementedError
Exemplo n.º 5
0
# @FileName: main_wl.py
# @ProjectName :Facility_Location_WL
"""

from utils.util import DataHandler
from core.complex_model import FacilityLocation
from core.model import FacilityLocation
from core.cutoff_model import FacilityLocation
# from core.model import FacilityLocation
# from core.model_2 import FacilityLocation
from core.model_3 import FacilityLocation
from utils.misc import Logger
import pandas as pd
import os
# define the log file and log level
log = Logger(log_path='./log').logger
# define the configuration parameters


class Config(object):
    """
    define all parameters
    """
    # TODO: tune these capacity of cdc and rdc
    rdc_capacity = 5000000000000
    num_rdc = 1
    num_cdc = 4
    P_c = 0.95
    P_b = 0.8
    rr_cdc = 0.00
    weight_avg = 100
Exemplo n.º 6
0
class Predictor:
    modes = ['dataloader', 'folder', 'list', 'file', 'data']

    def __init__(self,
                 model=None,
                 mode='folder',
                 save_dir=None,
                 scrn=True,
                 log_dir=None,
                 cuda_off=False):

        self.save_dir = save_dir
        self.output = None

        if not cuda_off and torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')

        assert model is not None, "The model must be assigned"
        self.model = self._model_init(model)

        if mode not in Predictor.modes:
            raise NotImplementedError

        self.logger = Logger(scrn=scrn, log_dir=log_dir, phase='predict')

        if mode == 'dataloader':
            self._predict = partial(self._predict_dataloader,
                                    dataloader=None,
                                    save_dir=save_dir)
        elif mode == 'folder':
            # self.suffix = ['.jpg', '.png', '.bmp', '.gif', '.npy']  # 支持的图像格式
            self._predict = partial(self._predict_folder, save_dir=save_dir)
        elif mode == 'list':
            self._predict = partial(self._predict_list, save_dir=save_dir)
        elif mode == 'file':
            self._predict = partial(self._predict_file, save_dir=save_dir)
        elif mode == 'data':
            self._predict = partial(self._predict_data, save_dir=save_dir)
        else:
            raise NotImplementedError

    def __call__(self, *args, **kwargs):
        return self._predict(*args, **kwargs)

    def _model_init(self, model):
        model.to(self.device)
        model.eval()
        return model

    def _load_data(self, path):
        return io.imread(path)

    def _to_tensor(self, arr):
        return to_tensor(arr)

    def _to_array(self, tensor):
        return to_array(tensor)

    def _normalize(self, tensor):
        return normalize(tensor)

    def _np2tensor(self, arr):
        nor_tensor = self._normalize(self._to_tensor(arr))
        assert isinstance(nor_tensor, torch.Tensor)
        return nor_tensor

    def _save_data_NTIRE2020(self, data, path):
        s_dir = os.path.dirname(path)
        if not os.path.exists(s_dir):
            os.mkdir(s_dir)
        path = path.replace('_clean.png',
                            '.mat').replace('_RealWorld.png', '.mat')
        if isinstance(data, torch.Tensor):
            data = self._to_array(data).squeeze()

        content = {}
        content['cube'] = data
        content['bands'] = np.array([[
            400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 510, 520,
            530, 540, 550, 560, 570, 580, 590, 600, 610, 620, 630, 640, 650,
            660, 670, 680, 690, 700
        ]])
        # content['norm_factor'] =
        hdf5.write(data=content,
                   filename=path,
                   store_python_metadata=True,
                   matlab_compatible=True)

    def _save_data(self, data, path):
        s_dir = os.path.dirname(path)
        if not os.path.exists(s_dir):
            os.mkdir(s_dir)

        torchvision.utils.save_image(data, path)

    def predict_base(self, model, data, path=None):
        start = time.time()
        with torch.no_grad():
            output = model(data)
        torch.cuda.synchronize()
        su_time = time.time() - start
        if path:
            self._save_data_NTIRE2020(output, path)

        self.output = output
        return output, su_time

    def _predict_dataloader(self, dataloader, save_dir=None):
        assert dataloader is not None, \
            "In 'dataloader' mode the input must be a valid dataloader!"
        consume_time = AverageMeter()
        pb = tqdm(dataloader)
        for idx, (name, data) in enumerate(pb):
            assert isinstance(data, torch.Tensor) and data.dim() == 4,\
            "input data must be 4-dimention tensor"
            data = data.to(self.device)  # 4-d tensor
            save_path = os.path.join(save_dir, name) if save_dir else None
            _, su_time = self.predict_base(self.model, data, path=save_path)
            consume_time.update(su_time, n=1)

            # logger
            description = (
                "[{}/{}] speed: {time.val:.4f}s({time.avg:.4f}s)".format(
                    idx + 1, len(dataloader.dataset), time=consume_time))
            pb.set_description(description)
            self.logger.dump(description)

    def _predict_folder(self, folder, save_dir=None):
        assert folder is not None and os.path.isdir(folder),\
        "In 'folder' mode the input must be a valid path of a folder!"
        consume_time = AverageMeter()
        file_list = glob.glob(os.path.join(folder, '*'))

        assert not len(file_list) == 0, "The input folder is empty"

        pb = tqdm(file_list)  # processbar

        for idx, file in enumerate(pb):
            img = self._load_data(file)
            name = os.path.basename(file)
            img = self._np2tensor(img).unsqueeze(0).to(self.device)
            save_path = os.path.join(save_dir, name) if save_dir else None
            _, su_time = self.predict_base(model=self.model,
                                           data=img,
                                           path=save_path)
            consume_time.update(su_time)

            # logger
            description = (
                "[{}/{}] speed: {time.val:.4f}s({time.avg:.4f}s)".format(
                    idx + 1, len(file_list), time=consume_time))
            pb.set_description(description)
            self.logger.dump(description)

    def _predict_list(self, file_list, save_dir=None):
        assert isinstance(file_list, list),\
        "In 'list' mode the input must be a valid file_path list!"
        consume_time = AverageMeter()

        assert not len(file_list) == 0, "The input file list is empty!"

        pb = tqdm(file_list)  # processbar

        for idx, path in enumerate(pb):
            data = self._load_data(path)
            name = os.path.basename(path)
            data = self._np2tensor(data).unsqueeze(0).to(self.device)
            path = os.path.join(save_dir, name) if save_dir else None
            _, su_time = self.predict_base(model=self.model,
                                           data=data,
                                           path=path)
            consume_time.update(su_time, n=1)

            # logger
            description = (
                "[{}/{}] speed: {time.val:.4f}s({time.avg:.4f}s)".format(
                    idx + 1, len(file_list), time=consume_time))
            pb.set_description(description)
            self.logger.dump(description)

    def _predict_file(self, file_path, save_dir=None):
        assert isinstance(file_path, str) and os.path.isfile(file_path), \
        "In 'file' mode the input must a valid path of a file!"

        consume_time = AverageMeter()
        data = self._load_data(file_path)
        name = os.path.basename(file_path)
        data = self._np2tensor(data).unsqueeze(0).to(self.device)
        path = os.path.join(save_dir, name) if save_dir else None

        _, su_time = self.predict_base(model=self.model, data=data, path=path)
        consume_time.update(su_time)

        # logger
        description = ("file: {}  speed: {time.val:.4f}s".format(
            name, time=consume_time))

        self.logger.show(description)

    def _predict_data(self, data):
        """
        :return: tensor
        """

        assert isinstance(data, torch.Tensor) and data.dim() == 4, \
        "In 'data' mode the input must be a 4-d tensor"

        consume_time = AverageMeter()
        output, su_time = self.predict_base(model=self.model, data=data)

        consume_time.update(su_time)

        # logger
        description = ("speed: {time.val:.4f}s".format(time=consume_time))

        self.logger.dump(description)

        return output
Exemplo n.º 7
0
RESTORE_FROM = "/data/AutoPheno/green/200527/PatchNet/snapshots-fb/LEAF_UNET_B0064_S010700.pth"
SAVE_PRED_EVERY = 1000
SNAPSHOT_DIR = root_dir + 'PatchNet/snapshots'+postfix
IMGSHOT_DIR = root_dir + 'PatchNet/imgshots'+postfix
WEIGHT_DECAY = 0.0005
NUM_EXAMPLES_PER_EPOCH = 13862
NUM_STEPS_PER_EPOCH = math.ceil(NUM_EXAMPLES_PER_EPOCH / float(BATCH_SIZE))
MAX_ITER = max(NUM_EXAMPLES_PER_EPOCH * MAX_EPOCH + 1,
               NUM_STEPS_PER_EPOCH * BATCH_SIZE * MAX_EPOCH + 1)
if not os.path.exists(SNAPSHOT_DIR):
    os.makedirs(SNAPSHOT_DIR)
if not os.path.exists(IMGSHOT_DIR):
    os.makedirs(IMGSHOT_DIR)

LOG_PATH = SNAPSHOT_DIR + "/B"+format(BATCH_SIZE, "04d")+"E"+format(MAX_EPOCH, "04d")+".log"
sys.stdout = Logger(LOG_PATH, sys.stdout)
print(DATA_LIST_PATH)
print("num of epoch:", MAX_EPOCH)
print("RESTORE_FROM:", RESTORE_FROM)
print(NUM_EXAMPLES_PER_EPOCH)


def get_arguments():
    """Parse all the arguments provided from the CLI.

    Returns:
      A list of parsed arguments.
    """
    parser = argparse.ArgumentParser(description="UNet Network")
    parser.add_argument("--set-start", default=False)
    parser.add_argument("--start-step", default=0, type=int)
Exemplo n.º 8
0
def run(try_num, config):
    output_dir = f'./dae-out-{try_num}'

    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    args = get_args()

    train_features = pd.read_csv('../input/lish-moa/train_features.csv')
    test_features = pd.read_csv('../input/lish-moa/test_features.csv')

    if args.debug:
        train_features = train_features.loc[:500]
        config.update(dict(n_epochs=3, n_folds=2))

    all_features = pd.concat([train_features,
                              test_features]).reset_index(drop=True)
    g_features_columns = [
        col for col in all_features.columns if col.startswith('g-')
    ]
    c_features_columns = [
        col for col in all_features.columns if col.startswith('c-')
    ]
    feature_columns = g_features_columns + c_features_columns
    n_features = len(feature_columns)

    kfold = MultilabelStratifiedKFold(n_splits=config.n_folds,
                                      random_state=42,
                                      shuffle=True)
    logger = Logger()

    for fold_index, (train_idx, valid_idx) in enumerate(
            kfold.split(all_features.values, all_features.values)):
        print('Fold: ', fold_index + 1, flush=True)

        x_train = all_features.loc[train_idx]
        x_valid = all_features.loc[valid_idx]

        model = new_autoencoder(config.model_kind,
                                n_features=n_features).to(DEVICE)
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=config.learning_rate,
                                     weight_decay=1e-5)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='min',
                                                               factor=0.1,
                                                               patience=3,
                                                               eps=1e-4,
                                                               verbose=True)
        early_stopping = EarlyStopping(patience=10)
        best_score = np.inf

        for epoch in range(config.n_epochs):
            dataset = DaeDataset(x_train,
                                 feature_columns,
                                 noise_ratio=config.noise_ratio)
            dataloader = DataLoader(dataset,
                                    batch_size=config.batch_size,
                                    shuffle=True)

            train_loss = loop_train(model, criterion, dataloader, optimizer)

            dataset = DaeDataset(x_valid,
                                 feature_columns,
                                 noise_ratio=config.noise_ratio)
            dataloader = DataLoader(dataset,
                                    batch_size=config.valid_batch_size,
                                    shuffle=False)
            valid_loss, _ = loop_valid(model, criterion, dataloader)

            scheduler.step(valid_loss)

            logger.update({
                'fold': fold_index,
                'epoch': epoch + 1,
                'train_loss': train_loss,
                'val_loss': valid_loss
            })
            print(
                f'epoch {epoch + 1}/{config.n_epochs}  -  train_loss: {train_loss:.5f}  -  '
                + f'valid_loss: {valid_loss:.5f}',
                flush=True)

            if valid_loss < best_score:
                best_score = valid_loss
                torch.save(model.state_dict(),
                           f'./{output_dir}/dae_fold_weight_{fold_index}.pt')

            if early_stopping.should_stop(valid_loss):
                print('Early stopping', flush=True)
                break

    logger.save(f'./{output_dir}/dae_log.csv')
    oof_preds = []

    for fold_index in range(config.n_folds):
        model = new_autoencoder(config.model_kind,
                                n_features=n_features).to(DEVICE)
        model.load_state_dict(
            torch.load(f'./{output_dir}/dae_fold_weight_{fold_index}.pt'))
        model.eval()

        dataset = DaeDataset(all_features,
                             feature_columns,
                             noise_ratio=config.noise_ratio)
        dataloader = DataLoader(dataset,
                                batch_size=config.valid_batch_size,
                                shuffle=False)

        loss, preds = loop_valid(model, nn.MSELoss(), dataloader)

        logger.update({'fold': fold_index, 'val_loss': loss})
        print('Evaluation   fold: {}  -  valid_loss: {:.5f}'.format(
            fold_index, loss),
              flush=True)

        oof_preds.append(preds)

    print('A Whole Evaluation Score: {:.5f}'.format(
        mean_squared_error(all_features.loc[:, feature_columns].values,
                           np.mean(oof_preds, axis=0))),
          flush=True)

    # for i, preds in enumerate(oof_preds):
    #     create_pred_feature_df(preds, all_features).to_csv(f'./{output_dir}/dae_features_{i}.csv', index=False)
    create_pred_feature_df(np.mean(oof_preds, axis=0), all_features).to_csv(
        f'./{output_dir}/dae_features_mean.csv', index=False)
Exemplo n.º 9
0
References
----------

[1] Narula, S. & Weistroffer,
    H. A flexible method for nonlinear multicriteria decision-making problems Systems,
    Man and Cybernetics, IEEE Transactions on, 1989 , 19 , 883-887.

'''
import sys,os


example_path=os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(example_path,".."))
from utils.misc import Logger 
sys.stdout = Logger(os.path.splitext(os.path.basename(__file__))[0])


from utils import tui

from method.NAUTILUS import NAUTILUSv1,ENAUTILUS

from optimization.OptimizationMethod import PointSearch
from problem.Problem import PreGeneratedProblem


if __name__ == '__main__':
    # SciPy breaks box constraints
    method = ENAUTILUS(PreGeneratedProblem(filename=os.path.join(example_path,"AuxiliaryServices.csv")), PointSearch)
    zh=tui.iter_enautilus(method)
    ci=method.current_iter
Exemplo n.º 10
0
# -*- coding: UTF-8 -*-
"""
# @Time    : 2020/11/17 18:57
# @Author  : peng.wang
# @Email   : [email protected]
# @FileName: main.py
# @ProjectName :Prediction_Optimization
"""
import os
import json
from utils.util import DataHandler
from utils.misc import Logger
from core.model import Scheduler

# define the log file and log level
log = Logger(log_path='./log').logger


# define the configuration parameters
class Config(object):
    Dates = []


if not os.path.exists('results'):
    os.mkdir('results')

# load the data
filename = "data_input.xlsx"
data_ins = DataHandler(file=filename, config=Config)
mode = 'deterministic'
mode = 'expected'
Exemplo n.º 11
0
class Trainer:
    def __init__(self, settings):
        super(Trainer, self).__init__()
        self.settings = settings
        self.phase = settings.cmd
        self.batch_size = settings.batch_size
        self.data_dir = settings.data_dir
        self.list_dir = settings.list_dir
        self.checkpoint = settings.resume
        self.load_checkpoint = (len(self.checkpoint) > 0)
        self.num_epochs = settings.num_epochs
        self.lr = float(settings.lr)
        self.save = settings.save_on or settings.out_dir
        self.from_pause = self.settings.continu
        self.path_ctrl = settings.global_path
        self.path = self.path_ctrl.get_path

        log_dir = '' if settings.log_off else self.path_ctrl.get_dir('log')
        self.logger = Logger(scrn=True, log_dir=log_dir, phase=self.phase)

        for k, v in sorted(settings.__dict__.items()):
            self.logger.show("{}: {}".format(k, v))

        self.start_epoch = 0
        self._init_max_acc = 0.0

        self.model = None
        self.criterion = None

    def train_epoch(self):
        raise NotImplementedError

    def validate_epoch(self, epoch, store):
        raise NotImplementedError

    def train(self):
        cudnn.benchmark = True

        if self.load_checkpoint:
            self._resume_from_checkpoint()
        max_acc = self._init_max_acc
        best_epoch = self.get_ckp_epoch()

        self.model.cuda()
        self.criterion.cuda()

        end_epoch = self.num_epochs if self.from_pause else self.start_epoch + self.num_epochs
        for epoch in range(self.start_epoch, end_epoch):
            lr = self._adjust_learning_rate(epoch)

            self.logger.show_nl("Epoch: [{0}]\tlr {1:.06f}".format(epoch, lr))
            # Train for one epoch
            self.train_epoch()

            # Evaluate the model on validation set
            self.logger.show_nl("Validate")
            acc = self.validate_epoch(epoch=epoch, store=self.save)

            is_best = acc > max_acc
            if is_best:
                max_acc = acc
                best_epoch = epoch
            self.logger.show_nl(
                "Current: {:.6f} ({:03d})\tBest: {:.6f} ({:03d})\t".format(
                    acc, epoch, max_acc, best_epoch))

            # The checkpoint saves next epoch
            self._save_checkpoint(self.model.state_dict(), max_acc, epoch + 1,
                                  is_best)

    def validate(self):
        if self.checkpoint:
            if self._resume_from_checkpoint():
                self.model.cuda()
                self.criterion.cuda()
                self.validate_epoch(self.get_ckp_epoch(), self.save)
        else:
            self.logger.warning("no checkpoint assigned!")

    def _load_pretrained(self):
        raise NotImplementedError

    def _adjust_learning_rate(self, epoch):
        # Note that this does not take effect for separate learning rates
        start_epoch = 0 if self.from_pause else self.start_epoch
        if self.settings.lr_mode == 'step':
            lr = self.lr * (0.5**((epoch - start_epoch) // self.settings.step))
        elif self.settings.lr_mode == 'poly':
            lr = self.lr * (1 - (epoch - start_epoch) /
                            (self.num_epochs - start_epoch))**1.1
        elif self.settings.lr_mode == 'const':
            lr = self.lr
        else:
            raise ValueError('unknown lr mode {}'.format(
                self.settings.lr_mode))

        if lr == self.lr:
            return self.lr

        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
        return lr

    def _resume_from_checkpoint(self):
        if not os.path.isfile(self.checkpoint):
            self.logger.error("=> no checkpoint found at '{}'".format(
                self.checkpoint))
            return False

        self.logger.show("=> loading checkpoint '{}'".format(self.checkpoint))
        checkpoint = torch.load(self.checkpoint)

        state_dict = self.model.state_dict()
        ckp_dict = checkpoint.get('state_dict', checkpoint)
        update_dict = {
            k: v
            for k, v in ckp_dict.items()
            if k in state_dict and state_dict[k].shape == v.shape
        }

        num_to_update = len(update_dict)
        if (num_to_update < len(state_dict)) or (len(state_dict) <
                                                 len(ckp_dict)):
            if self.phase == 'val':
                self.logger.error("=> mismatched checkpoint for validation")
                return False
            self.logger.warning(
                "warning: trying to load an mismatched checkpoint")
            if num_to_update == 0:
                self.logger.error("=> no parameter is to be loaded")
                return False
            else:
                self.logger.warning(
                    "=> {} params are to be loaded".format(num_to_update))
        elif (not self.settings.anew) or (self.phase != 'train'):
            # Note in the non-anew mode, it is not guaranteed that the contained field
            # max_acc be the corresponding one of the loaded checkpoint.
            self.start_epoch = checkpoint.get('epoch', self.start_epoch)
            self._init_max_acc = checkpoint.get('max_acc', self._init_max_acc)

        state_dict.update(update_dict)
        self.model.load_state_dict(state_dict)

        self.logger.show(
            "=> loaded checkpoint '{}' (epoch {}, max_acc {:.4f})".format(
                self.checkpoint, self.get_ckp_epoch(), self._init_max_acc))
        return True

    def _save_checkpoint(self, state_dict, max_acc, epoch, is_best):
        state = {'epoch': epoch, 'state_dict': state_dict, 'max_acc': max_acc}
        # Save history
        history_path = self.path('weight',
                                 CKP_COUNTED.format(e=epoch, s=self.scale),
                                 underline=True)
        if (epoch - self.start_epoch) % self.settings.trace_freq == 0:
            torch.save(state, history_path)
        # Save latest
        latest_path = self.path('weight',
                                CKP_LATEST.format(s=self.scale),
                                underline=True)
        torch.save(state, latest_path)
        if is_best:
            shutil.copyfile(
                latest_path,
                self.path('weight',
                          CKP_BEST.format(s=self.scale),
                          underline=True))

    def get_ckp_epoch(self):
        # Get current epoch of the checkpoint
        # For dismatched ckp or no ckp, set to 0
        return max(self.start_epoch - 1, 0)
Exemplo n.º 12
0
class Predictor:
    def __init__(self,
                 data_dir,
                 ckp_path,
                 save_lr=False,
                 list_dir='',
                 out_dir='./',
                 log_dir=''):
        super(Predictor, self).__init__()
        self.data_dir = data_dir
        self.list_dir = list_dir
        self.out_dir = out_dir
        self.checkpoint = ckp_path
        self.save_lr = save_lr

        self.logger = Logger(scrn=True, log_dir=log_dir, phase='test')

        self.model = None

    def test_epoch(self):
        raise NotImplementedError

    def test(self):
        if self.checkpoint:
            if self._resume_from_checkpoint():
                self.model.cuda()
                self.model.eval()
                self.test_epoch()
        else:
            self.logger.warning("no checkpoint assigned!")

    def _resume_from_checkpoint(self):
        if not os.path.isfile(self.checkpoint):
            self.logger.error("=> no checkpoint found at '{}'".format(
                self.checkpoint))
            return False

        self.logger.show("=> loading checkpoint '{}'".format(self.checkpoint))
        checkpoint = torch.load(self.checkpoint)

        state_dict = self.model.state_dict()
        ckp_dict = checkpoint.get('state_dict', checkpoint)

        try:
            state_dict.update(ckp_dict)
            self.model.load_state_dict(ckp_dict)
        except KeyError as e:
            self.logger.error("=> mismatched checkpoint for test")
            self.logger.error(e)
            return False
        else:
            self.epoch = checkpoint.get('epoch', 0)

        self.logger.show("=> loaded checkpoint '{}'".format(self.checkpoint))
        return True
Exemplo n.º 13
0
Arquivo: util.py Projeto: perry-xy/SF
# @Author  : peng.wang
# @Email   : [email protected]
# @FileName: util.py
# @ProjectName :Facility_Location_FangTai
"""

import pandas as pd
import os
import xlwings as xw
from utils.misc import Logger
from pyecharts import options as opts
from pyecharts.charts import Geo
from pyecharts.globals import ChartType, SymbolType
# define the log file
RAW_DATA_PATH = os.path.dirname(os.path.dirname(__file__))
log = Logger(log_path=os.path.join(RAW_DATA_PATH, 'log')).logger


class DataHandler(object):
    """

    """
    def __init__(self, file, config):
        """
        file:文件名
        :param file:
        :param config
        """
        # 读数路径
        self._PATH = os.path.join(os.path.join(RAW_DATA_PATH, 'data'), file)
        self._config = config
Exemplo n.º 14
0
def run(try_num, config):
    args = get_args()

    print('args', args, flush=True)
    print('config:', config.to_dict(), flush=True)

    set_seed(config.rand_seed)

    pretrained_model = f"tf_efficientnet_b3_ns"
    model_dir = f'deepinsight-{try_num}'

    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    train_features = pd.read_csv(f"../input/lish-moa/train_features.csv")
    train_targets = pd.read_csv(f"../input/lish-moa/train_targets_scored.csv")
    test_features = pd.read_csv(f"../input/lish-moa/test_features.csv")

    if config.dae_path:
        dae_features = pd.read_csv(config.dae_path)

    if args.debug:
        train_features = train_features.iloc[:500]
        train_targets = train_targets.iloc[:500]
        if config.dae_path:
            dae_features = pd.concat([dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True)

        config.update(dict(
            kfolds=3,
            n_epoch=3
        ))

    train_features = train_features.sort_values(by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)
    train_targets = train_targets.sort_values(by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)

    cat_features_columns = ["cp_dose", 'cp_time']
    num_feature_columns = [c for c in train_features.columns
                           if c != "sig_id" and c not in cat_features_columns + ['cp_type']]
    all_features_columns = cat_features_columns + num_feature_columns
    target_columns = [c for c in train_targets.columns if c != "sig_id"]
    g_feature_columns = [c for c in num_feature_columns if c.startswith("g-")]
    c_feature_columns = [c for c in num_feature_columns if c.startswith("c-")]

    if config.dae_path:
        if config.dae_strategy == 'replace':
            train_features, test_features = assign_dae_features(
                train_features, test_features, dae_features, len(num_feature_columns))
        else:
            train_features, test_features, dae_feature_columns = merge_dae_features(
                train_features, test_features, dae_features, len(g_feature_columns), len(c_feature_columns))
            all_features_columns += dae_feature_columns

    train_targets = train_targets.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True)
    train_features = train_features.loc[train_features['cp_type'] == 'trt_cp'].reset_index(drop=True)

    if config.normalizer == 'rank':
        train_features, test_features = normalize(train_features, test_features, num_feature_columns)

    for df in [train_features, test_features]:
        df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp': 1})
        df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
        df['cp_time'] = df['cp_time'].map({24: 0, 48: 0.5, 72: 1})

    if config.variance_target_type == 1:
        pickle_path = f'{model_dir}/variance_reduction.pkl'

        variance_target_features = num_feature_columns
        if config.dae_path and config.dae_strategy != 'replace':
            variance_target_features += dae_feature_columns

        if not os.path.exists(pickle_path):
            vt = variance_reduction_fit(train_features, variance_target_features, config.variance_threshold)
            save_pickle(vt, pickle_path)

        vt = load_pickle(pickle_path)
        train_features = variance_reduction_transform(vt, train_features, variance_target_features)
        test_features = variance_reduction_transform(vt, test_features, variance_target_features)
        print('(variance_reduction) Number of features after applying:', len(train_features.columns), flush=True)
        all_features_columns = list(train_features.columns[1:])

    skf = MultilabelStratifiedKFold(n_splits=config.kfolds, shuffle=True, random_state=config.rand_seed)
    y_labels = np.sum(train_targets.drop("sig_id", axis=1), axis=0).index.tolist()
    logger = Logger()

    for fold_index, (train_index, val_index) in enumerate(skf.split(train_features, train_targets[y_labels])):
        if args.only_pred:
            print('Skip training', flush=True)
            break

        print(f'Fold: {fold_index}', train_index.shape, val_index.shape, flush=True)

        X_train = train_features.loc[train_index, all_features_columns].copy().values
        y_train = train_targets.iloc[train_index, 1:].copy().values
        X_valid = train_features.loc[val_index, all_features_columns].copy().values
        y_valid = train_targets.iloc[val_index, 1:].copy().values

        if config.normalizer == 'log':
            scaler = LogScaler()
            if config.norm_apply_all:
                scaler.fit(X_train)
                X_train = scaler.transform(X_train)
                X_valid = scaler.transform(X_valid)
            else:
                target_features = [i for i, c in enumerate(all_features_columns) if c in num_feature_columns]
                non_target_features = [i for i, c in enumerate(all_features_columns) if c not in num_feature_columns]

                scaler.fit(X_train[:, target_features])
                X_train_tr = scaler.transform(X_train[:, target_features])
                X_valid_tr = scaler.transform(X_valid[:, target_features])
                X_train = np.concatenate([X_train[:, non_target_features], X_train_tr], axis=1)
                X_valid = np.concatenate([X_valid[:, non_target_features], X_valid_tr], axis=1)
            save_pickle(scaler, f'{model_dir}/scaler-{fold_index}.pkl')

        transformer = DeepInsightTransformer(
            feature_extractor=config.extractor,
            pixels=config.resolution,
            perplexity=config.perplexity,
            random_state=config.rand_seed,
            n_jobs=-1
        ).fit(X_train)

        save_pickle(transformer, f'{model_dir}/transformer-{fold_index}.pkl')

        model = MoAEfficientNet(
            pretrained_model_name=pretrained_model,
            fc_size=config.fc_size,
            drop_rate=config.drop_rate,
            drop_connect_rate=config.drop_connect_rate,
            weight_init='goog',
        ).to(DEVICE)

        if config.smoothing is not None:
            if config.weighted_loss_weights is not None:
                indices = get_minority_target_index(train_targets, threshold=config.weighted_loss_threshold)
                indices = [int(i not in indices) for i, c in enumerate(target_columns)]
                train_loss_function = SmoothBCEwLogits(
                    smoothing=config.smoothing,
                    weight=config.weighted_loss_weights,
                    weight_targets=indices,
                    n_labels=len(target_columns))
            else:
                train_loss_function = SmoothBCEwLogits(smoothing=config.smoothing)
        else:
            train_loss_function = bce_loss

        eval_loss_function = bce_loss

        optimizer = optim.Adam(model.parameters(), weight_decay=config.weight_decay, lr=config.learning_rate)

        if config.scheduler_type == 'ca':
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.t_max, eta_min=0, last_epoch=-1)
        elif config.scheduler_type == 'ms':
            scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.ms_scheduler_milestones, gamma=0.1)
        else:
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='min', factor=0.1, patience=config.rp_patience, eps=1e-4, verbose=True)

        early_stopping = EarlyStopping(patience=7)
        best_score = np.inf
        start_time = time.time()

        for epoch in range(config.n_epoch):

            if config.swap_enable:
                dataset = MoAImageSwapDataset(
                    X_train,
                    y_train,
                    transformer,
                    image_size=config.image_size,
                    swap_prob=config.swap_prob,
                    swap_portion=config.swap_portion)
            else:
                dataset = MoAImageDataset(X_train, y_train, transformer, image_size=config.image_size)

            dataloader = DataLoader(
                dataset,
                batch_size=config.batch_size,
                shuffle=True,
                num_workers=8,
                pin_memory=True,
                drop_last=False)
            loss = loop_train(model, train_loss_function, dataloader, optimizer)

            if config.scheduler_type == 'rp':
                scheduler.step(loss)
            else:
                scheduler.step()
                for param_group in optimizer.param_groups:
                    print('current learning rate:', param_group['lr'])

            del dataset, dataloader

            dataset = MoAImageDataset(X_valid, y_valid, transformer, image_size=config.image_size)
            dataloader = DataLoader(
                dataset,
                batch_size=config.infer_batch_size,
                shuffle=False,
                num_workers=8,
                pin_memory=True,
                drop_last=False)
            valid_loss, valid_preds = loop_valid(model, eval_loss_function, dataloader)

            del dataset, dataloader

            logger.update({'fold': fold_index, 'epoch': epoch + 1, 'train_loss': loss, 'val_loss': valid_loss})
            print(f'epoch {epoch + 1}/{config.n_epoch}  -  train_loss: {loss:.5f}  -  ' +
                  f'valid_loss: {valid_loss:.5f}  -  elapsed: {time_format(time.time() - start_time)}', flush=True)

            if valid_loss < best_score:
                best_score = valid_loss
                torch.save(model.state_dict(), f'./{model_dir}/deepinsight-{fold_index}.pt')

            if early_stopping.should_stop(valid_loss):
                print('Early stopping', flush=True)
                break

        print(f'Done -> Fold {fold_index}/{config.kfolds}  -  best_valid_loss: {best_score:.5f}  -  ' +
              f'elapsed: {time_format(time.time() - start_time)}', flush=True)

        torch.cuda.empty_cache()
        gc.collect()

        if args.return_first_fold:
            logger.save(f'{model_dir}/log.csv')
            return

    test_preds = np.zeros((test_features.shape[0], len(target_columns)))
    start_time = time.time()
    print('Start infarence', flush=True)

    oof_preds = np.zeros((len(train_features), len(target_columns)))
    eval_loss_function = bce_loss

    for fold_index, (train_index, val_index) in enumerate(skf.split(train_features, train_targets[y_labels])):
        print(f'Infarence Fold: {fold_index}', train_index.shape, val_index.shape, flush=True)
        X_valid = train_features.loc[val_index, all_features_columns].copy().values
        y_valid = train_targets.iloc[val_index, 1:].copy().values
        X_test = test_features[all_features_columns].values

        if config.normalizer == 'log':
            scaler = load_pickle(f'{model_dir}/scaler-{fold_index}.pkl')
            X_valid = scaler.transform(X_valid)
            X_test = scaler.transform(X_test)

        transformer = load_pickle(f'{model_dir}/transformer-{fold_index}.pkl')
        model = MoAEfficientNet(
            pretrained_model_name=pretrained_model,
            fc_size=config.fc_size,
            drop_rate=config.drop_rate,
            drop_connect_rate=config.drop_connect_rate,
            weight_init='goog',
        ).to(DEVICE)
        model.load_state_dict(torch.load(f'./{model_dir}/deepinsight-{fold_index}.pt'))

        dataset = MoAImageDataset(X_valid, y_valid, transformer, image_size=config.image_size)
        dataloader = DataLoader(
            dataset,
            batch_size=config.infer_batch_size,
            shuffle=False,
            num_workers=8,
            pin_memory=True,
            drop_last=False)
        valid_loss, valid_preds = loop_valid(model, eval_loss_function, dataloader)
        print(f'Fold {fold_index}/{config.kfolds}  -  fold_valid_loss: {valid_loss:.5f}', flush=True)
        logger.update({'fold': fold_index, 'val_loss': valid_loss})

        oof_preds[val_index, :] = valid_preds

        dataset = TestDataset(X_test, None, transformer, image_size=config.image_size)
        dataloader = DataLoader(
            dataset,
            batch_size=config.infer_batch_size,
            shuffle=False,
            num_workers=8,
            pin_memory=True,
            drop_last=False)

        preds = loop_preds(model, dataloader)
        test_preds += preds / config.kfolds

    oof_preds_df = train_targets.copy()
    oof_preds_df.loc[:, target_columns] = oof_preds.clip(0, 1)
    oof_preds_df.to_csv(f'{model_dir}/oof_preds.csv', index=False)
    oof_loss = mean_log_loss(train_targets.loc[:, target_columns].values, oof_preds)

    print(f'OOF Validation Loss: {oof_loss:.6f}', flush=True)
    print(f'Done infarence  Elapsed {time_format(time.time() - start_time)}', flush=True)
    logger.update({'fold': 'oof', 'val_loss': oof_loss})
    logger.save(f'{model_dir}/log.csv')

    submission = pd.DataFrame(data=test_features['sig_id'].values, columns=['sig_id'])
    submission = submission.reindex(columns=['sig_id'] + target_columns)
    submission.loc[:, target_columns] = test_preds.clip(0, 1)
    submission.loc[test_features['cp_type'] == 0, submission.columns[1:]] = 0
    submission.to_csv(f'{model_dir}/submission.csv', index=False)
Exemplo n.º 15
0
def run(try_num, config):
    logger = Logger()
    args = get_args()

    print('config:', config.to_dict(), flush=True)
    print('args:', args, flush=True)
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

    model_dir = f'blending-01-nn-{try_num}'

    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    train_features = pd.read_csv('../input/lish-moa/train_features.csv')
    train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
    dae_features = pd.read_csv(config.dae_path)
    test_features = pd.read_csv('../input/lish-moa/test_features.csv')

    if args.debug:
        train_features = train_features[:500]
        train_targets = train_targets[:500]
        dae_features = pd.concat(
            [dae_features.iloc[:500],
             dae_features.iloc[-3982:]]).reset_index(drop=True)

        config.update(
            dict(
                n_folds=3,
                seeds=[222],
                n_epochs=3,
                batch_size=128,
            ))

    target_columns = [col for col in train_targets.columns if col != 'sig_id']
    n_targets = len(target_columns)

    train_features, train_targets, test_features = preprocess(
        config, model_dir, train_features, train_targets, test_features,
        dae_features)
    features_columns = [
        col for col in train_features.columns if col not in [
            'sig_id', 'cp_type', 'cp_time', 'cp_dose', 'cp_type_ctl_vehicle',
            'cp_type_trt_cp'
        ]
    ]

    metric_loss_function = nn.BCELoss()

    if config.weighted_loss_strategy == 1:
        indices = get_minority_target_index(
            train_targets, threshold=config.weighted_loss_threshold)
        indices = [int(i not in indices) for i, c in enumerate(target_columns)]
        smooth_loss_function = SmoothBCELoss(
            smoothing=config.smoothing,
            weight=config.weighted_loss_weights,
            weight_targets=indices,
            n_labels=n_targets)
    else:
        smooth_loss_function = SmoothBCELoss(smoothing=config.smoothing)

    kfold = MultilabelStratifiedKFold(n_splits=config.n_folds,
                                      random_state=42,
                                      shuffle=True)

    for seed_index, seed in enumerate(config.seeds):
        if args.only_pred:
            print('Skip training', flush=True)
            break

        print(f'Train seed {seed}', flush=True)
        set_seed(seed)

        for fold_index, (train_indices, val_indices) in enumerate(
                kfold.split(train_targets[target_columns].values,
                            train_targets[target_columns].values)):
            print(f'Train fold {fold_index + 1}', flush=True)

            x_train = train_features.loc[train_indices, features_columns]
            y_train = train_targets.loc[train_indices, target_columns]
            x_val = train_features.loc[val_indices, features_columns]
            y_val = train_targets.loc[val_indices, target_columns]

            model = new_model(config.model_kind,
                              len(features_columns)).to(DEVICE)
            checkpoint_path = f'{model_dir}/repeat-{seed}_Fold-{fold_index + 1}.pt'
            optimizer = optim.Adam(model.parameters(),
                                   weight_decay=config.weight_decay,
                                   lr=config.learning_rate)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                             mode='min',
                                                             factor=0.1,
                                                             patience=3,
                                                             eps=1e-4,
                                                             verbose=True)

            best_loss = np.inf

            for epoch in range(config.n_epochs):
                dataset = MoaDataset(x_train.values, y_train.values)
                dataloader = DataLoader(dataset,
                                        batch_size=config.batch_size,
                                        shuffle=True,
                                        drop_last=True)

                train_loss = loop_train(model,
                                        dataloader,
                                        optimizer,
                                        loss_functions=(
                                            smooth_loss_function,
                                            metric_loss_function,
                                        ))

                dataset = MoaDataset(x_val.values, y_val.values)
                dataloader = DataLoader(dataset,
                                        batch_size=config.val_batch_size,
                                        shuffle=False)
                valid_loss, _ = loop_valid(model, dataloader,
                                           metric_loss_function)

                print(
                    'Epoch {}/{}   -   loss: {:5.5f}   -   val_loss: {:5.5f}'.
                    format(epoch + 1, config.n_epochs, train_loss, valid_loss),
                    flush=True)

                logger.update({
                    'epoch': epoch + 1,
                    'loss': train_loss,
                    'val_loss': valid_loss
                })

                scheduler.step(valid_loss)

                if valid_loss < best_loss:
                    best_loss = valid_loss
                    torch.save(model.state_dict(), checkpoint_path)

    oof_preds = np.zeros((len(train_features), len(config.seeds), n_targets))
    test_preds = np.zeros((len(test_features), n_targets))

    for seed_index in range(len(config.seeds)):
        seed = config.seeds[seed_index]

        print(f'Inference for seed {seed}', flush=True)

        _test_preds_in_seed = np.zeros((len(test_features), n_targets))

        for fold_index, (_, valid_indices) in enumerate(
                kfold.split(train_targets[target_columns].values,
                            train_targets[target_columns].values)):
            x_val = train_features.loc[valid_indices, features_columns]
            y_val = train_targets.loc[valid_indices, target_columns]

            checkpoint_path = f'{model_dir}/repeat-{seed}_Fold-{fold_index + 1}.pt'
            model = new_model(config.model_kind,
                              len(features_columns)).to(DEVICE)
            model.load_state_dict(torch.load(checkpoint_path))

            dataset = MoaDataset(x_val.values, y_val.values)
            dataloader = DataLoader(dataset,
                                    batch_size=config.val_batch_size,
                                    shuffle=False)
            preds = loop_pred(model, dataloader)

            oof_preds[valid_indices, seed_index, :] = preds

            dataset = MoaDataset(test_features[features_columns].values, None)
            dataloader = DataLoader(dataset,
                                    batch_size=config.val_batch_size,
                                    shuffle=False)
            preds = loop_pred(model, dataloader)

            _test_preds_in_seed += preds / config.n_folds

        score = mean_log_loss(train_targets.loc[:, target_columns].values,
                              oof_preds[:, seed_index, :],
                              n_targets=n_targets)
        test_preds += _test_preds_in_seed / len(config.seeds)

        print(f'Score for this seed {score:5.5f}', flush=True)
        logger.update({'val_loss': score})

    # Evalucate validation score
    oof_preds = np.mean(oof_preds, axis=1)
    score = mean_log_loss(train_targets.loc[:, target_columns].values,
                          oof_preds,
                          n_targets=n_targets)
    print(f'Overall score is {score:5.5f}', flush=True)

    # Save validation prediction
    oof_pred_df = train_targets.copy()
    oof_pred_df.iloc[:, 1:] = oof_preds
    oof_pred_df.to_csv(f'{model_dir}/oof_pred.csv', index=False)

    # Save log
    logger.update({'val_loss': score})
    logger.save(f'{model_dir}/log.csv')

    # Save Test Prediction
    test_features = pd.read_csv('../input/lish-moa/test_features.csv')
    submission = create_submission(test_features, ['sig_id'] + target_columns)
    submission[target_columns] = test_preds
    submission.loc[test_features['cp_type'] == 'ctl_vehicle',
                   target_columns] = 0
    submission.to_csv(f'{model_dir}/submission.csv', index=False)
Exemplo n.º 16
0
Arquivo: model.py Projeto: perry-xy/SF
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn.svm import SVR
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import GridSearchCV
#
from utils.misc import Logger
from utils.util import generate_cutoffs
from utils.misc import save_model_to_file, mean_abs_percentage_error, xgb_mape

log = Logger(log_path=os.path.join(os.path.dirname(os.path.dirname(__file__)),
                                   'log')).logger
warnings.filterwarnings("ignore")

xgb_installed = False
lgt_installed = False
try:
    import xgboost as xgb
    xgb_installed = True
    import lightgbm as lgt
    lgt_installed = True
except ImportError:
    pass


class TrainModel(BaseEstimator):
Exemplo n.º 17
0
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
# @Time    : 2019/9/9 9:55
# @Author  : peng.wang
# @Email   : [email protected]
# @FileName: model.py
# @ProjectName :Facility_Location_FangTai
"""

from gurobipy import *
from utils.misc import Logger
import pandas as pd
# define the log file
log = Logger(log_path='../log').logger

# define the facility location problem

YEAR_DAY = 365


class FacilityLocation(object):
    """
    this class is consist of attributes for problem construction
    some utils function for dealing with post-process
    one key function for building the detail model
    """
    def __init__(self, data, config):
        """

        :param data: class of data provide all data used
Exemplo n.º 18
0
Arquivo: util.py Projeto: perry-xy/SF
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
# @Time    : 2019/10/8 22:34
# @Author  : peng.wang
# @Email   : [email protected]
# @FileName: util.py
# @ProjectName :sh-demand-forecast-alg
"""
import pandas as pd
import numpy as np
import requests
import json
import os
from utils.misc import Logger
log = Logger(log_path='log').logger


class DataLoader(object):
    """
    Data loader. Combines a dataset and a sampler, and provides an iterable over
    the given dataset.
    """
    def __init__(self,
                 data,
                 train_len,
                 pred_len,
                 feature_names,
                 target_name,
                 append_train=False):
        self.data = data