Exemplo n.º 1
0
Arquivo: layer.py Projeto: zz119/neon
    def initialize(self, kwargs):
        super(WeightLayer, self).initialize(kwargs)
        req_param(self, ['weight_init', 'lrule_init', 'nin', 'nout'])
        opt_param(self, ['accumulate'], False)
        opt_param(self, ['batch_norm'], False)

        self.weight_init.initialize(self.backend)
        self.params = []
        self.updates = []

        if self.batch_norm:
            self.bn = BatchNorm()
            kwargs['layer'] = self
            self.bn.initialize(kwargs)
Exemplo n.º 2
0
    def initialize(self, kwargs):
        super(WeightLayer, self).initialize(kwargs)
        req_param(self, ['nin', 'nout'])
        opt_param(self, ['weight_init'], default_weight_init())
        opt_param(self, ['lrule_init'], default_lrule_init())
        opt_param(self, ['accumulate'], False)
        opt_param(self, ['batch_norm'], False)
        opt_param(self, ['mempool'])  # Used for parallel mode

        self.weight_init.initialize(self.backend)
        self.params = []
        self.updates = []

        if self.batch_norm:
            self.bn = BatchNorm()
            kwargs['layer'] = self
            self.bn.initialize(kwargs)
Exemplo n.º 3
0
    def initialize(self, kwargs):
        super(WeightLayer, self).initialize(kwargs)
        req_param(self, ['weight_init', 'lrule_init', 'nin', 'nout'])
        opt_param(self, ['accumulate'], False)
        opt_param(self, ['batch_norm'], False)

        self.weight_init.initialize(self.backend)
        self.params = []
        self.updates = []

        if self.batch_norm:
            self.bn = BatchNorm()
            kwargs['layer'] = self
            self.bn.initialize(kwargs)
Exemplo n.º 4
0
class WeightLayer(Layer):
    """
    Typical hidden layer with weight parameters to be learned.
    """
    def __init__(self, **kwargs):
        super(WeightLayer, self).__init__(**kwargs)
        self.distributable = True
        self.has_params = True
        self.params_initialized = False

    def initialize(self, kwargs):
        super(WeightLayer, self).initialize(kwargs)
        req_param(self, ['nin', 'nout'])
        opt_param(self, ['weight_init'], default_weight_init())
        opt_param(self, ['lrule_init'], default_lrule_init())
        opt_param(self, ['accumulate'], False)
        opt_param(self, ['batch_norm'], False)
        opt_param(self, ['mempool'])  # Used for parallel mode

        self.weight_init.initialize(self.backend)
        self.params = []
        self.updates = []

        if self.batch_norm:
            self.bn = BatchNorm()
            kwargs['layer'] = self
            self.bn.initialize(kwargs)

    def get_params(self):
        np_params = dict()
        for p in ['weights', 'biases']:
            if hasattr(self, p):
                p_tensor = getattr(self, p)
                np_params[p] = p_tensor.asnumpyarray()

        if self.batch_norm:
            np_params.update(self.bn.get_params())

        np_params.update(self.learning_rule.get_params())
        if self.bias_rule is not None:
            np_params.update(self.bias_rule.get_params())
        return np_params

    def set_params(self, params_dict):
        for p in ['weights', 'biases']:
            if p in params_dict:
                self.backend.set(getattr(self, p), params_dict[p])

        if self.batch_norm:
            self.bn.set_params(params_dict)
        self.learning_rule.set_params(params_dict)
        if self.bias_rule is not None:
            self.bias_rule.set_params(params_dict)

    def make_views(self):
        pass

    def allocate_param_bufs(self):
        if self.params_initialized:
            return

        def make_ebuf(shape, dtype, persist_values):
            b = self.backend.empty(shape, dtype, persist_values)
            if self.backend.is_dist:
                b.ptype = 'replica' if self.is_local else 'vfragment'
            return b

        self.weight_init.is_local = self.is_local
        self.weights = self.weight_init.generate(self.weight_shape,
                                                 self.weight_dtype)
        self.weights.name = self.name  # naming weights for timing diagnostics
        self.weight_updates = make_ebuf(self.weight_shape,
                                        dtype=self.updates_dtype,
                                        persist_values=True)

        self.make_views()

        self.use_biases = 'bias_init' in self.weight_init.__dict__
        opt_param(self, ['brule_init'], None)
        if self.use_biases is True:
            self.biases = make_ebuf(self.bias_shape, dtype=self.weight_dtype,
                                    persist_values=False)
            self.biases.fill(self.weight_init.bias_init)
            self.bias_updates = make_ebuf(self.bias_shape,
                                          dtype=self.updates_dtype,
                                          persist_values=False)
            self.params.extend([self.weights, self.biases])
            self.updates.extend([self.weight_updates, self.bias_updates])
        else:
            self.params.extend([self.weights])
            self.updates.extend([self.weight_updates])

        if self.accumulate:
            self.utemp = [make_ebuf(x.shape,
                                    dtype=self.updates_dtype,
                                    persist_values=False)
                          for x in self.updates]

        for upm in self.updates:
            upm.fill(0.0)
        self.learning_rule = self.init_learning_rule(self.lrule_init)
        self.bias_rule = None
        if self.brule_init is not None and self.use_biases:
            lrn = self.learning_rule.name + 'bias'
            self.bias_rule = self.init_learning_rule(self.brule_init, name=lrn)
            self.bias_rule.allocate_state([self.updates[-1]])
            self.learning_rule.allocate_state(self.updates[:-1])
        else:
            self.learning_rule.allocate_state(self.updates)

        if self.backend.is_dist:
            # Create a mempool used for sharing in parallel mode
            self.make_mempool()

        self.params_initialized = True

    def update(self, epoch):
        if self.is_local and self.backend.is_dist:
            self.backend.redsynchronize()
            self.backend.synchronize()
            # for evt, strm in zip(self.update_events, self.backend.strms):
            #     strm.wait_for_event(evt)

        if self.bias_rule is None:
            self.learning_rule.apply_rule(self.params, self.updates, epoch)
        else:
            self.learning_rule.apply_rule(self.params[:-1],
                                          self.updates[:-1], epoch)
            self.bias_rule.apply_rule([self.params[-1]],
                                      [self.updates[-1]], epoch)

        if self.accumulate:
            for upm in self.updates:
                upm.fill(0.0)

    def normalize_weights(self, wts):
        norms = self.backend.norm(wts, order=2, axis=1)
        self.backend.divide(wts, norms.reshape((norms.shape[0], 1)), out=wts)

    def set_train_mode(self, mode):
        if self.batch_norm and mode is False:
            self.bn.set_inference_mode()

    def init_learning_rule(self, lrule_init, name=None):
        dtype = self.weight_dtype  # TODO: Cool to reuse this here?
        if name is None:
            lrname = self.name + '_lr'
        else:
            lrname = name
        if lrule_init['type'] == 'gradient_descent':
            lr = GradientDescent(name=lrname,
                                 lr_params=lrule_init['lr_params'])
        elif lrule_init['type'] == 'gradient_descent_pretrain':
            lr = GradientDescentPretrain(
                name=lrname, lr_params=lrule_init['lr_params'])
        elif lrule_init['type'] == 'gradient_descent_momentum':
            lr = GradientDescentMomentum(
                name=lrname, lr_params=lrule_init['lr_params'],
                param_dtype=dtype, gradient_dtype=dtype)
        elif lrule_init['type'] == 'gradient_descent_momentum_weight_decay':
            lr = GradientDescentMomentumWeightDecay(
                name=lrname, lr_params=lrule_init['lr_params'],
                param_dtype=dtype, gradient_dtype=dtype)
        elif lrule_init['type'] == 'adadelta':
            lr = AdaDelta(name=lrname, lr_params=lrule_init['lr_params'])
        elif lrule_init['type'] == 'rmsprop':
            lr = RMSProp(name=lrname, lr_params=lrule_init['lr_params'],
                         param_dtype=dtype, gradient_dtype=dtype)
        else:
            raise AttributeError("invalid learning rule params specified")
        lr.initialize(self.backend)
        return lr
Exemplo n.º 5
0
class WeightLayer(Layer):
    """
    Typical hidden layer with weight parameters to be learned.
    """
    def __init__(self, **kwargs):
        super(WeightLayer, self).__init__(**kwargs)
        self.distributable = True
        self.has_params = True
        self.params_initialized = False

    def initialize(self, kwargs):
        super(WeightLayer, self).initialize(kwargs)
        req_param(self, ['nin', 'nout'])
        opt_param(self, ['weight_init'], default_weight_init())
        opt_param(self, ['lrule_init'], default_lrule_init())
        opt_param(self, ['accumulate'], False)
        opt_param(self, ['batch_norm'], False)

        self.weight_init.initialize(self.backend)
        self.params = []
        self.updates = []

        if self.batch_norm:
            self.bn = BatchNorm()
            kwargs['layer'] = self
            self.bn.initialize(kwargs)

    def get_params(self):
        np_params = dict()
        for p in ['weights', 'biases']:
            if hasattr(self, p):
                p_tensor = getattr(self, p)
                np_params[p] = np.array(p_tensor.asnumpyarray(),
                                        dtype=p_tensor.dtype).reshape(
                                            p_tensor.shape)

        if self.batch_norm:
            np_params.update(self.bn.get_params())

        np_params.update(self.learning_rule.get_params())
        return np_params

    def set_params(self, params_dict):
        for p in ['weights', 'biases']:
            if p in params_dict:
                getattr(self, p)[:] = params_dict[p]

        if self.batch_norm:
            self.bn.set_params(params_dict)

        self.learning_rule.set_params(params_dict)

    def allocate_param_bufs(self):
        if self.params_initialized:
            return
        make_ebuf = self.backend.empty
        self.weights = self.weight_init.generate(self.weight_shape,
                                                 self.weight_dtype)
        self.weights.name = self.name  # naming weights for timing diagnostics
        self.weight_updates = make_ebuf(self.weight_shape, self.updates_dtype)

        self.use_biases = 'bias_init' in self.weight_init.__dict__
        opt_param(self, ['brule_init'], None)
        if self.use_biases is True:
            self.biases = make_ebuf(self.bias_shape, self.weight_dtype)
            self.biases.fill(self.weight_init.bias_init)
            self.bias_updates = make_ebuf(self.bias_shape, self.updates_dtype)
            self.params.extend([self.weights, self.biases])
            self.updates.extend([self.weight_updates, self.bias_updates])
        else:
            self.params.extend([self.weights])
            self.updates.extend([self.weight_updates])

        if self.accumulate:
            self.utemp = map(lambda x: make_ebuf(x.shape, self.updates_dtype),
                             self.updates)
        for upm in self.updates:
            upm.fill(0.0)
        self.learning_rule = self.init_learning_rule(self.lrule_init)
        self.bias_rule = None
        if self.brule_init is not None and self.use_biases:
            self.bias_rule = self.init_learning_rule(self.brule_init)
            self.bias_rule.allocate_state([self.updates[-1]])
            self.learning_rule.allocate_state(self.updates[:-1])
        else:
            self.learning_rule.allocate_state(self.updates)
        self.params_initialized = True

    def update(self, epoch):
        if self.bias_rule is None:
            self.learning_rule.apply_rule(self.params, self.updates, epoch)
        else:
            self.learning_rule.apply_rule(self.params[:-1],
                                          self.updates[:-1], epoch)
            self.bias_rule.apply_rule([self.params[-1]],
                                      [self.updates[-1]], epoch)

        if self.accumulate:
            for upm in self.updates:
                upm.fill(0.0)

    def normalize_weights(self, wts):
        norms = self.backend.norm(wts, order=2, axis=1)
        self.backend.divide(wts, norms.reshape((norms.shape[0], 1)), out=wts)

    def set_train_mode(self, mode):
        if self.batch_norm and mode is False:
            self.bn.set_inference_mode()

    def init_learning_rule(self, lrule_init):
        dtype = self.weight_dtype  # TODO: Cool to reuse this here?
        lrname = self.name + '_lr'
        if lrule_init['type'] == 'gradient_descent':
            lr = GradientDescent(name=lrname,
                                 lr_params=lrule_init['lr_params'])
        elif lrule_init['type'] == 'gradient_descent_pretrain':
            lr = GradientDescentPretrain(
                name=lrname, lr_params=lrule_init['lr_params'])
        elif lrule_init['type'] == 'gradient_descent_momentum':
            lr = GradientDescentMomentum(
                name=lrname, lr_params=lrule_init['lr_params'],
                param_dtype=dtype, gradient_dtype=dtype)
        elif lrule_init['type'] == 'gradient_descent_momentum_weight_decay':
            lr = GradientDescentMomentumWeightDecay(
                name=lrname, lr_params=lrule_init['lr_params'],
                param_dtype=dtype, gradient_dtype=dtype)
        elif lrule_init['type'] == 'adadelta':
            lr = AdaDelta(name=lrname, lr_params=lrule_init['lr_params'])
        else:
            raise AttributeError("invalid learning rule params specified")
        lr.initialize(self.backend)
        return lr
Exemplo n.º 6
0
class WeightLayer(Layer):
    """
    Typical hidden layer with weight parameters to be learned.
    """
    def __init__(self, **kwargs):
        super(WeightLayer, self).__init__(**kwargs)
        self.distributable = True
        self.has_params = True
        self.params_initialized = False

    def initialize(self, kwargs):
        super(WeightLayer, self).initialize(kwargs)
        req_param(self, ['nin', 'nout'])
        opt_param(self, ['weight_init'], default_weight_init())
        opt_param(self, ['lrule_init'], default_lrule_init())
        opt_param(self, ['accumulate'], False)
        opt_param(self, ['batch_norm'], False)

        self.weight_init.initialize(self.backend)
        self.params = []
        self.updates = []

        if self.batch_norm:
            self.bn = BatchNorm()
            kwargs['layer'] = self
            self.bn.initialize(kwargs)

    def get_params(self):
        np_params = dict()
        for p in ['weights', 'biases']:
            if hasattr(self, p):
                p_tensor = getattr(self, p)
                np_params[p] = np.array(p_tensor.asnumpyarray(),
                                        dtype=p_tensor.dtype).reshape(
                                            p_tensor.shape)

        if self.batch_norm:
            np_params.update(self.bn.get_params())

        np_params.update(self.learning_rule.get_params())
        return np_params

    def set_params(self, params_dict):
        for p in ['weights', 'biases']:
            if p in params_dict:
                getattr(self, p)[:] = params_dict[p]

        if self.batch_norm:
            self.bn.set_params(params_dict)

        self.learning_rule.set_params(params_dict)

    def allocate_param_bufs(self):
        if self.params_initialized:
            return
        make_ebuf = self.backend.empty
        self.weights = self.weight_init.generate(self.weight_shape,
                                                 self.weight_dtype)
        self.weights.name = self.name  # naming weights for timing diagnostics
        self.weight_updates = make_ebuf(self.weight_shape, self.updates_dtype)

        self.use_biases = 'bias_init' in self.weight_init.__dict__
        opt_param(self, ['brule_init'], None)
        if self.use_biases is True:
            self.biases = make_ebuf(self.bias_shape, self.weight_dtype)
            self.biases.fill(self.weight_init.bias_init)
            self.bias_updates = make_ebuf(self.bias_shape, self.updates_dtype)
            self.params.extend([self.weights, self.biases])
            self.updates.extend([self.weight_updates, self.bias_updates])
        else:
            self.params.extend([self.weights])
            self.updates.extend([self.weight_updates])

        if self.accumulate:
            self.utemp = map(lambda x: make_ebuf(x.shape, self.updates_dtype),
                             self.updates)
        for upm in self.updates:
            upm.fill(0.0)
        self.learning_rule = self.init_learning_rule(self.lrule_init)
        self.bias_rule = None
        if self.brule_init is not None and self.use_biases:
            self.bias_rule = self.init_learning_rule(self.brule_init)
            self.bias_rule.allocate_state([self.updates[-1]])
            self.learning_rule.allocate_state(self.updates[:-1])
        else:
            self.learning_rule.allocate_state(self.updates)
        self.params_initialized = True

    def update(self, epoch):
        if self.bias_rule is None:
            self.learning_rule.apply_rule(self.params, self.updates, epoch)
        else:
            self.learning_rule.apply_rule(self.params[:-1],
                                          self.updates[:-1], epoch)
            self.bias_rule.apply_rule([self.params[-1]],
                                      [self.updates[-1]], epoch)

        if self.accumulate:
            for upm in self.updates:
                upm.fill(0.0)

    def normalize_weights(self, wts):
        norms = self.backend.norm(wts, order=2, axis=1)
        self.backend.divide(wts, norms.reshape((norms.shape[0], 1)), out=wts)

    def set_train_mode(self, mode):
        if self.batch_norm and mode is False:
            self.bn.set_inference_mode()

    def init_learning_rule(self, lrule_init):
        dtype = self.weight_dtype  # TODO: Cool to reuse this here?
        lrname = self.name + '_lr'
        if lrule_init['type'] == 'gradient_descent':
            lr = GradientDescent(name=lrname,
                                 lr_params=lrule_init['lr_params'])
        elif lrule_init['type'] == 'gradient_descent_pretrain':
            lr = GradientDescentPretrain(
                name=lrname, lr_params=lrule_init['lr_params'])
        elif lrule_init['type'] == 'gradient_descent_momentum':
            lr = GradientDescentMomentum(
                name=lrname, lr_params=lrule_init['lr_params'],
                param_dtype=dtype, gradient_dtype=dtype)
        elif lrule_init['type'] == 'gradient_descent_momentum_weight_decay':
            lr = GradientDescentMomentumWeightDecay(
                name=lrname, lr_params=lrule_init['lr_params'],
                param_dtype=dtype, gradient_dtype=dtype)
        elif lrule_init['type'] == 'adadelta':
            lr = AdaDelta(name=lrname, lr_params=lrule_init['lr_params'])
        elif lrule_init['type'] == 'rmsprop':
            lr = RMSProp(name=lrname, lr_params=lrule_init['lr_params'],
                         param_dtype=dtype, gradient_dtype=dtype)
        else:
            raise AttributeError("invalid learning rule params specified")
        lr.initialize(self.backend)
        return lr