Exemplo n.º 1
0
def update_data():
    global opt_buf
    global tb_fill_idx
    buf = opt_buf[tb_fill_idx]
    tb_fill_idx = (tb_fill_idx + 1) % 2

    print("opt_buf, {}, {}".format(len(opt_buf[0]), len(opt_buf[1])))

    new_tb = dt.Opt(time=[],
                    display_time=[],
                    lr=[],
                    lr_log=[],
                    loss=[],
                    acc=[],
                    ep_step=[])
    new_tm = dt.Opt(time=[],
                    display_time=[],
                    loss_val=[],
                    top1_val=[],
                    top5_val=[],
                    ep_idx=[])
    for opt in buf:
        if opt.t == 'tb':
            new_tb.time.append(opt.ts)
            new_tb.display_time.append("{}".format(opt.ts))
            new_tb.lr.append(opt.lr)
            new_tb.lr_log.append(math.log(opt.lr, 10))
            new_tb.loss.append(opt.loss)
            new_tb.acc.append(opt.acc)
            new_tb.ep_step.append("{} ({})".format(opt.ep, opt.s))
        elif opt.t == 'tm':
            new_tm.time.append(opt.ts)
            new_tm.display_time.append("{}".format(opt.ts))
            new_tm.loss_val.append(opt.vals[0])
            new_tm.top1_val.append(opt.vals[1])
            new_tm.top5_val.append(opt.vals[2])
            new_tm.ep_idx.append("{} ({})".format(opt.ep, opt.idx))
    buf.clear()

    data_tb.stream(
        dict(time=new_tb.time,
             display_time=new_tb.display_time,
             lr=new_tb.lr,
             lr_log=new_tb.lr_log,
             loss=new_tb.loss,
             acc=new_tb.acc,
             ep_step=new_tb.ep_step), 20000)

    data_tm.stream(
        dict(time=new_tm.time,
             display_time=new_tm.display_time,
             loss_val=new_tm.loss_val,
             top1_val=new_tm.top1_val,
             top5_val=new_tm.top5_val,
             ep_idx=new_tm.ep_idx), 10000)
Exemplo n.º 2
0
 def metric(self, logits, labels, is_training):
     if is_training:
         correct = self.correct(logits, labels, is_training)
         acc = correct.float().sum().div_(len(labels))
         return [dt.Opt(name='top1', tensor=acc)]
     else:
         acc = dt.metric.accuracy(logits, labels, topk=(1, 5))
         return [
             dt.Opt(name='top1', tensor=acc[0]),
             dt.Opt(name='top5', tensor=acc[1])
         ]
Exemplo n.º 3
0
def summary_model_patch(model, patch_fn=patch_add_dt, **kwargs):
    class_name = model.__class__.__name__

    state = dt.Opt()
    cl = dt.create_ctx_list(args=dt.Opt(kwargs),
                            patch_fn=patch_fn,
                            state=state)

    with dt.ctx_cl(cl, None, level=0, key=None, path=''):
        _walk_module(cl, model)

    return True
Exemplo n.º 4
0
    def wrapper(tensor, **kwargs):
        # kwargs parsing
        opt = dt.Opt(kwargs) + dt.get_ctx()

        # set default train mode
        opt += dt.Opt(is_training=True, reuse=None)
        # set default data format
        opt += dt.Opt(data_format=dt.dformat.DEFAULT)

        # call sugar function
        out = func(tensor, opt)

        return out
Exemplo n.º 5
0
    def wrapper(**kwargs):
        # kwargs parsing
        _opt = dt.Opt(kwargs) + get_ctx()

        _out = func(_opt)

        return _out
Exemplo n.º 6
0
 def init_saver(self):
     # checkpoint
     self._saver = dt.Opt(
         model_latest=self.ctx.args.inst_dir + '/model_latest.pt',
         optimizer_latest=self.ctx.args.inst_dir + '/optimizer_latest.pt',
         model_best=self.ctx.args.inst_dir + '/model_best.pt',
         optimizer_best=self.ctx.args.inst_dir + '/optimizer_best.pt')
Exemplo n.º 7
0
    def wrapper(ctx_list, **kwargs):
        # kwargs parsing
        _opt = dt.Opt(kwargs) + get_ctx_cl(ctx_list)

        _out = func(ctx_list, _opt)

        return _out
Exemplo n.º 8
0
def set_lr(lr):
    global g_lr
    if True or g_lr != lr:
        g_lr = lr
        dt.util.datalink().send_opt(
            dt.Opt(t='cmd', a='set', key='lr', val=g_lr))
        lr_plot.title.text = "Training lr={}".format(g_lr)
Exemplo n.º 9
0
def dict_to_opt(d):
    opt = dt.Opt()
    for k, v in d.items():
        if type(v) is dict:
            opt[k] = dict_to_opt(v)
        else:
            opt[k] = v
    return opt
Exemplo n.º 10
0
def save(optimizer, fname, **kwargs):
    params = dt.Opt(kwargs)

    torch.save(
        {
            'optimizer_state_dict': optimizer.state_dict(),
            'optimizer_params': params.to_dict(),
        }, fname)
Exemplo n.º 11
0
def save(model, fname, **kwargs):
    params = dt.Opt(kwargs)

    torch.save(
        {
            'model_state_dict': model.state_dict(),
            'model_params': params.to_dict(),
        }, fname)
Exemplo n.º 12
0
    def init_data(self):
        dt.trace(dt.DC.DATA, "[{}] init data".format(self.tag))

        self.train, self.valid, self.test = dt.Opt(), dt.Opt, dt.Opt()

        self.train.batch_size = self._batch_size
        self.valid.batch_size = self._valid_size
        self.test.batch_size = self._test_size

        self.train.num_total = ImageNet.TRAIN_NUM_PER_EPOCH
        self.valid.num_total = ImageNet.VALID_NUM_PER_EPOCH
        self.test.num_total = ImageNet.TEST_NUM_PER_EPOCH

        self.train.num_batch = int(math.ceil(ImageNet.TRAIN_NUM_PER_EPOCH / self._batch_size / hvd.size()))
        self.valid.num_batch = int(math.ceil(ImageNet.VALID_NUM_PER_EPOCH / self._valid_size / hvd.size()))
        self.test.num_batch = int(math.ceil(ImageNet.TEST_NUM_PER_EPOCH / self._test_size / hvd.size()))

        return self
Exemplo n.º 13
0
def get_ctx():
    global __global_ctx_list

    # merge current context
    res = dt.Opt()
    for c in reversed(__global_ctx_list):
        res += c

    return res
Exemplo n.º 14
0
def patch_add_dt(module, gc):
    class_name = module.__class__.__name__
    #dt.trace(dt.DC.TRAIN, "[PATCH] level {}, path {}, key {}, class {}".format(
    #                      gc.level, gc.path, gc.key, class_name))
    if not hasattr(module, '_dt_'):
        module._dt_ = dt.Opt()
    module._dt_.level = gc.level
    module._dt_.path = gc.path
    module._dt_.key = gc.key
    module._dt_.class_name = class_name
Exemplo n.º 15
0
    def __init__(self, ctx, **kwargs):
        self._ctx = dt.Opt(kwargs) + dt.get_ctx() + ctx

        self._use_cuda = False
        self._device = torch.device('cpu')
        self._device_index = 0
        self._device_count = 0

        self._global_step = None
        self._learning_rate = None
Exemplo n.º 16
0
def ctx(**kwargs):
    global __global_ctx_list

    # append current context when enter
    _cur_ctx = dt.Opt(kwargs)
    __global_ctx_list += [_cur_ctx]

    yield

    # clear current context when exit
    del __global_ctx_list[-1]
Exemplo n.º 17
0
    def init_data(self):
        dt.trace(dt.DC.DATA, "[{}] init data".format(self.tag))

        self.train, self.valid, self.test = dt.Opt(), dt.Opt, dt.Opt()

        self.train.batch_size = self._batch_size
        self.valid.batch_size = self._valid_size
        self.test.batch_size = self._test_size

        self.train.num_total = Cifar10.TRAIN_NUM_PER_EPOCH
        self.valid.num_total = Cifar10.VALID_NUM_PER_EPOCH
        self.test.num_total = Cifar10.TEST_NUM_PER_EPOCH

        self.train.num_batch = int(math.ceil(Cifar10.TRAIN_NUM_PER_EPOCH / self._batch_size / hvd.size()))
        self.valid.num_batch = int(math.ceil(Cifar10.VALID_NUM_PER_EPOCH / self._valid_size / hvd.size()))
        self.test.num_batch = int(math.ceil(Cifar10.TEST_NUM_PER_EPOCH / self._test_size / hvd.size()))

        self.classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

        return self
Exemplo n.º 18
0
def ctx_cl(ctx_list, opt, **kwargs):
    global __global_ctx_list

    # append current context when enter
    if opt is None:
        _cur_ctx = dt.Opt(kwargs)
    else:
        _cur_ctx = opt + dt.Opt(kwargs)

    if ctx_list is None:
        __global_ctx_list += [_cur_ctx]
    else:
        ctx_list += [_cur_ctx]

    yield

    # clear current context when exit
    if ctx_list is None:
        del __global_ctx_list[-1]
    else:
        del ctx_list[-1]
Exemplo n.º 19
0
def get_ctx_cl(ctx_list):
    global __global_ctx_list

    # merge current context
    res = dt.Opt()
    if ctx_list is None:
        for c in reversed(__global_ctx_list):
            res += c
    else:
        for c in reversed(ctx_list):
            res += c

    return res
Exemplo n.º 20
0
def datalink_recv(socket, packet):
    opt = dt.Opt().loads(packet._data.decode())
    opt_buf[tb_fill_idx].append(opt)
    print(opt)
Exemplo n.º 21
0
def create_ctx_list(**kwargs):
    _cur_ctx = dt.Opt(kwargs)
    return [_cur_ctx]
Exemplo n.º 22
0
    def build_data(self):
        dt.trace(dt.DC.MODEL, "[{}] ({}) build data".format(self.tag, type(self).__name__))
        args = self._ctx.args
        data = dt.data.Mnist(batch_size=args.batch_size, valid_size=args.valid_size,
                             num_workers=1, pin_memory=self.use_cuda)
        data.init_data()
        data.load_data()
        self._data = data
        return True

    def build_model(self):
        dt.trace(dt.DC.MODEL, "[{}] ({}) build model".format(self.tag, type(self).__name__))

        self._model = MnistNet()

        #model = torchvision.models.resnet50(False)
        # Have ResNet model take in grayscale rather than RGB
        #model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

        return True


# Train
with dt.ctx(optim=ARGS.optim, lr_initial=ARGS.lr_initial, lr_minimal=ARGS.lr_minimal, lr_curve=ARGS.lr_curve):
    dt.train.train(args=ARGS, est_class = MnistEstimator, est_cfg=dt.Opt(),
                   batch_size=ARGS.batch_size, valid_size=ARGS.valid_size, summary_freq=ARGS.summary_freq,
                   validate_ep=ARGS.validate_ep, max_ep=ARGS.max_ep,
                   model_dir=ARGS.model_dir, save_interval=ARGS.save_interval,
                   beta1=ARGS.beta1, beta2=ARGS.beta2, momentum=ARGS.momentum, weight_decay=ARGS.weight_decay,
                   random_seed=1 * (hvd.rank()+1), deferred=ARGS.deferred)
Exemplo n.º 23
0
class DbgLvl(IntEnum):
    NONE = 0
    NOTSET = 0
    MAX = 5
    TRACE = 5
    DEBUG = 10
    MED = 15
    INFO = 20
    WARNING = 30
    MIN = 35
    ERROR = 40
    CRITICAL = 50


_dbg_cfg = dt.Opt()
_dbg_cfg += dt.Opt(level=DbgLvl.MAX, channel=DbgChn.ALL)


def dbg_cfg_val():
    global _dbg_cfg
    return _dbg_cfg


def dbg_cfg(**kwargs):
    global _dbg_cfg
    _dbg_cfg *= dt.Opt(kwargs)
    if dbg_vld(DbgChn.STD, DbgLvl.DEBUG):
        dt.print_pp(_dbg_cfg)

Exemplo n.º 24
0
 def __init__(self, ctx, **kwargs):
     self._ctx = dt.Opt(kwargs) + ctx
Exemplo n.º 25
0
 def __init__(self, ctx, **kwargs):
     self._ctx = dt.Opt(kwargs) + ctx
     self._callbacks = []
Exemplo n.º 26
0
def dbg_cfg(**kwargs):
    global _dbg_cfg
    _dbg_cfg *= dt.Opt(kwargs)
    if dbg_vld(DbgChn.STD, DbgLvl.DEBUG):
        dt.print_pp(_dbg_cfg)
Exemplo n.º 27
0
    def init(self, **kwargs):

        opt = dt.Opt(kwargs) + dt.get_ctx() + self._ctx

        # Set default device settings
        opt += dt.Opt(gpu0=0)

        # Set default train mode
        opt += dt.Opt(is_training=True, is_eval=False, is_pred=False)

        # Learning rate
        opt += dt.Opt(lr_initial=0.001,
                      lr_minimal=1e-6,
                      lr_curve=[['*', 0.1, 10, 1]])

        # Default training options
        opt += dt.Opt(optim='SGD',
                      alpha=0.9,
                      beta1=0.9,
                      beta2=0.99,
                      opt_eps=1e-6,
                      momentum=0.9,
                      weight_decay=5e-4,
                      model_dir='asset/train',
                      random_seed=0,
                      max_ep=100000,
                      save_interval=1,
                      validate_ep=1,
                      data_format=dt.dformat.DEFAULT)

        # Default horovod options
        opt += dt.Opt(fp16_allreduce=False)

        # Stats
        opt += dt.Opt(stats=dt.Opt(avg_loss=None,
                                   train_metric_name=None,
                                   train_metric=None,
                                   valid_loss=0,
                                   valid_metric_name='',
                                   valid_metric=0,
                                   valid_metric_max=None,
                                   train_speed=0,
                                   valid_speed=0))

        # Saver
        opt += dt.Opt(epoch_done=-1)

        # Update ctx
        self._ctx = opt

        # Initialize device
        self.init_device()
        dt.info(
            dt.DC.TRAIN,
            '[HOROVOD] rank {}/{}, local {}'.format(hvd.rank(), hvd.size(),
                                                    hvd.local_rank()))
        dt.info(
            dt.DC.TRAIN,
            '[DEVICE] use_cuda {}, device {}, gpu {}/{}, random_seed {}'.
            format(self.use_cuda, self.device, self.device_index,
                   self.device_count, self._ctx.random_seed))

        if is_chief():
            dt.info(dt.DC.TRAIN, '[TRAIN] ctx')
            dt.print_pp(dt.opt_to_dict(self._ctx))

        # Initialize training variables
        self.init_global_step()
        self.init_learning_rate()
        self.init_summary()
        self.init_saver()

        if self._ctx.random_seed != 0:
            self.set_random_seed(self._ctx.random_seed)

        if self.use_cuda:
            # Horovod: pin GPU to local rank.
            torch.cuda.set_device(self.device_index)

        # Horovod: limit # of CPU threads to be used per worker.
        torch.set_num_threads(1)

        return self
Exemplo n.º 28
0
                    momentum=self._ctx.momentum, weight_decay=self._ctx.weight_decay,
                    warmup=0)
        elif self._ctx.optim == 'SDG':
            self._optimizer = optim.SGD(self._model.parameters(), lr=self.trainer.get_lr_val(),
                momentum=self._ctx.momentum, weight_decay=self._ctx.weight_decay)
        else:
            self._optimizer = None

        return True

# Train
ctx = dt.Opt(args=ARGS,
             optim=ARGS.optim, data_format=ARGS.data_format,
             lr_initial=ARGS.lr_initial, lr_minimal=ARGS.lr_minimal, lr_curve=ARGS.lr_curve,
             batch_size=ARGS.batch_size, valid_size=ARGS.valid_size,
             validate_ep=ARGS.validate_ep, max_ep=ARGS.max_ep,
             model_dir=ARGS.model_dir, save_interval=ARGS.save_interval,
             alpha=ARGS.alpha, beta1=ARGS.beta1, beta2=ARGS.beta2, opt_eps=ARGS.opt_eps,
             momentum=ARGS.momentum, weight_decay=ARGS.weight_decay,
             random_seed=dt.util.random_int(1, 999999), gpu0=ARGS.gpu0, valid_only=ARGS.valid_only)

est = ImageNetEstimator(ctx)
est.build_flow()

trainer = dt.train.Trainer(ctx).init()

trainer.bind_estimator(est)

trainer.train_setup()
trainer.train_begin()
trainer.train()
Exemplo n.º 29
0
    def init_config(self):
        self._config = configparser.ConfigParser(
            interpolation=configparser.ExtendedInterpolation())
        self._config.read(self._args.c)

        if 'args' not in self._config:
            self._config['args'] = {}
        if 'debug' not in self._config:
            self._config['debug'] = {}

        self._opt = dt.Opt()

        # load config to opt
        for section in self._config.sections():
            opt = dt.Opt()
            for key in self._config[section]:
                val_str = self._config[section][key]
                val = json.loads(val_str)
                opt[key] = val
            self._opt[section] = opt

        # override with command line args
        for arg in vars(self._args):
            val = getattr(self._args, arg)
            if val is None:
                continue
            val_str = str(val)

            if arg in self._opt.args or arg in self._default_config['args']:
                if arg in self._opt.args:
                    opt_val = self._opt.args[arg]
                else:
                    opt_val = self._default_config['args'][arg]

                if isinstance(opt_val, str):
                    val_str = '"' + val_str + '"'

                if type(opt_val) is not type(val):
                    dt.log(
                        log.DC.STD, "[Convert Arg] {}: {}, {} => {}".format(
                            arg, val, type(val), type(opt_val)))
                    self._opt.args[arg] = json.loads(val_str)
                else:
                    self._opt.args[arg] = val
                self._config['args'][arg] = val_str
            else:
                self._opt.args[arg] = val
                self._config['args'][arg] = val_str

        # add default settings
        for section in self._default_config:
            opt = dt.Opt()
            for key in self._default_config[section]:
                if key not in self._opt[section]:
                    opt[key] = self._default_config[section][key]
                    self._config[section][key] = json.dumps(opt[key])
            self._opt[section] += opt

        # additional post process
        if self._opt.args.add is not None and type(self._opt.args.add) is dict:
            self._opt.args.add = dt.util.dict_to_opt(self._opt.args.add)