def __init__(self, **kwargs): super(AutoUniformValGen, self).__init__(**kwargs) opt_param(self, ['relu'], False) opt_param(self, ['islocal'], False) self.low = float('nan') self.high = float('nan')
def allocate_param_bufs(self): if self.params_initialized: return make_ebuf = self.backend.empty self.weights = self.weight_init.generate(self.weight_shape, self.weight_dtype) self.weights.name = self.name # naming weights for timing diagnostics self.weight_updates = make_ebuf(self.weight_shape, self.updates_dtype) self.use_biases = 'bias_init' in self.weight_init.__dict__ opt_param(self, ['brule_init'], None) if self.use_biases is True: self.biases = make_ebuf(self.bias_shape, self.weight_dtype) self.biases.fill(self.weight_init.bias_init) self.bias_updates = make_ebuf(self.bias_shape, self.updates_dtype) self.params.extend([self.weights, self.biases]) self.updates.extend([self.weight_updates, self.bias_updates]) else: self.params.extend([self.weights]) self.updates.extend([self.weight_updates]) if self.accumulate: self.utemp = map(lambda x: make_ebuf(x.shape, self.updates_dtype), self.updates) for upm in self.updates: upm.fill(0.0) self.learning_rule = self.init_learning_rule(self.lrule_init) self.bias_rule = None if self.brule_init is not None and self.use_biases: self.bias_rule = self.init_learning_rule(self.brule_init) self.bias_rule.allocate_state([self.updates[-1]]) self.learning_rule.allocate_state(self.updates[:-1]) else: self.learning_rule.allocate_state(self.updates) self.params_initialized = True
def __init__(self, **kwargs): self.live = False self.server = None opt_param(self, ['batch_size'], default_value=1) opt_param(self, ['input_dtype', 'target_dtype'], default_value=np.float32) self.__dict__.update(kwargs)
def initialize(self, kwargs): super(ConvLayer, self).initialize(kwargs) self.initialize_local() if self.pad != 0 and isinstance(self.backend, CPU): raise NotImplementedError('pad != 0, for CPU backend in ConvLayer') opt_param(self, ['shared_bias'], True) if self.shared_bias: self.bias_shape = (self.nofm, 1) self.bias_expand = self.backend.empty((self.nout, 1), dtype=self.weight_dtype) else: self.bias_shape = (self.nout, 1) self.allocate_output_bufs() self.allocate_param_bufs() opt_param(self, ['prodbuf', 'bpropbuf', 'updatebuf'], None) if isinstance(self.backend, CPU): self.prodbuf = self.backend.empty((self.nofm, self.batch_size)) self.bpropbuf = self.backend.empty((self.fsize, self.batch_size)) self.updatebuf = self.backend.empty(self.weights.shape) if self.backend.__module__ == 'neon.backends.gpu': self.conv_params = self.backend.ng.conv_layer( N=self.batch_size, C=self.nifm, K=self.nofm, D=1, H=self.ifmshape[0], W=self.ifmshape[1], T=1, R=self.fshape[0], S=self.fshape[1], pad_d=0, pad_h=self.pad, pad_w=self.pad, str_d=1, str_h=self.stride, str_w=self.stride, grid_P=0, grid_Q=0, dtype=self.weight_dtype) self.prodbuf = self.bpropbuf = self.updatebuf = self.conv_params
def __init__(self, **kwargs): self.macro_batched = False self.__dict__.update(kwargs) opt_param(self, ['backend_type'], 'np.float32') self.backend_type = ensure_dtype(self.backend_type) # string to dtype logger.info("Setting dtype to" + str(self.backend_type))
def initialize(self, kwargs): opt_param(self, ['keep'], 0.5) super(DropOutLayer, self).initialize(kwargs) self.keepmask = self.backend.empty((self.nin, self.batch_size), dtype=self.weight_dtype) self.train_mode = True self.allocate_output_bufs()
def __init__(self, **kwargs): self.initialized = False self.__dict__.update(kwargs) req_param(self, ['dataset', 'model']) opt_param(self, ['backend']) opt_param(self, ['live'], False) if self.backend is not None: self.initialize(self.backend)
def __init__(self, **kwargs): self.accumulate = True # Reusing deltas not supported for RNNs yet self.reuse_deltas = False super(RNN, self).__init__(**kwargs) req_param(self, ['unrolls']) self.rec_layer = self.layers[1] opt_param(self, ['num_grad_params'], None)
def initialize(self, kwargs): super(RecurrentCostLayer, self).initialize(kwargs) req_param(self, ['cost', 'ref_layer']) opt_param(self, ['ref_label'], 'targets') self.targets = None self.cost.olayer = self.prev_layer self.cost.initialize(kwargs) self.deltas = self.cost.get_deltabuf()
def allocate_param_bufs(self): if self.params_initialized: return def make_ebuf(shape, dtype, persist_values): b = self.backend.empty(shape, dtype, persist_values) if self.backend.is_dist: b.ptype = 'replica' if self.is_local else 'vfragment' return b self.weight_init.is_local = self.is_local self.weights = self.weight_init.generate(self.weight_shape, self.weight_dtype) self.weights.name = self.name # naming weights for timing diagnostics self.weight_updates = make_ebuf(self.weight_shape, dtype=self.updates_dtype, persist_values=True) self.make_views() self.use_biases = 'bias_init' in self.weight_init.__dict__ opt_param(self, ['brule_init'], None) if self.use_biases is True: self.biases = make_ebuf(self.bias_shape, dtype=self.weight_dtype, persist_values=False) self.biases.fill(self.weight_init.bias_init) self.bias_updates = make_ebuf(self.bias_shape, dtype=self.updates_dtype, persist_values=False) self.params.extend([self.weights, self.biases]) self.updates.extend([self.weight_updates, self.bias_updates]) else: self.params.extend([self.weights]) self.updates.extend([self.weight_updates]) if self.accumulate: self.utemp = [make_ebuf(x.shape, dtype=self.updates_dtype, persist_values=False) for x in self.updates] for upm in self.updates: upm.fill(0.0) self.learning_rule = self.init_learning_rule(self.lrule_init) self.bias_rule = None if self.brule_init is not None and self.use_biases: lrn = self.learning_rule.name + 'bias' self.bias_rule = self.init_learning_rule(self.brule_init, name=lrn) self.bias_rule.allocate_state([self.updates[-1]]) self.learning_rule.allocate_state(self.updates[:-1]) else: self.learning_rule.allocate_state(self.updates) if self.backend.is_dist: # Create a mempool used for sharing in parallel mode self.make_mempool() self.params_initialized = True
def initialize(self, kwargs): opt_param(self, ['keep'], 0.5) super(DropOutLayer, self).initialize(kwargs) bkend = self.backend make_zbuf = bkend.allocate_fragment if self.is_local else bkend.empty self.keepmask = make_zbuf((self.nin, self.batch_size), dtype=self.weight_dtype) self.train_mode = True self.allocate_output_bufs()
def __init__(self, **kwargs): self.initialized = False self.__dict__.update(kwargs) req_param(self, ['layers', 'batch_size']) opt_param(self, ['step_print'], -1) opt_param(self, ['accumulate'], False) opt_param(self, ['reuse_deltas'], True) opt_param(self, ['timing_plots'], False) opt_param(self, ['serialize_schedule'])
def initialize(self, kwargs): req_param(self, ['nout', 'nin', 'unrolls', 'activation']) super(RecurrentOutputLayer, self).initialize(kwargs) self.weight_shape = (self.nout, self.nin) self.bias_shape = (self.nout, 1) opt_param(self, ['delta_shape'], (self.nin, self.batch_size)) # moved self.allocate_output_bufs() self.allocate_param_bufs()
def __init__(self, **kwargs): self.initialized = False self.__dict__.update(kwargs) req_param(self, ["layers", "batch_size"]) opt_param(self, ["step_print"], -1) opt_param(self, ["accumulate"], False) opt_param(self, ["reuse_deltas"], True) opt_param(self, ["timing_plots"], False) opt_param(self, ["serialize_schedule"])
def allocate_output_bufs(self): make_zbuf = self.backend.zeros opt_param(self, ['out_shape'], (self.nout, self.batch_size)) opt_param(self, ['delta_shape'], (self.nin, self.batch_size)) self.output = make_zbuf(self.out_shape, self.output_dtype) self.pre_act = self.activation.pre_act_buffer(self.backend, self.output, self.pre_act_dtype)
def initialize(self, kwargs): req_param(self, ['weight_init_rec']) self.weight_rec_shape = (self.nout, self.nout) super(RecurrentLSTMLayer, self).initialize(kwargs) self.weight_shape = (self.nout, self.nin) self.bias_shape = (self.nout, 1) opt_param(self, ['delta_shape'], (self.nout, self.batch_size)) self.allocate_output_bufs() self.allocate_param_bufs()
def __init__(self, name, lr_params): self.name = name opt_param(self, ['velocity_dtype', 'param_dtype', 'gradient_dtype'], np.float32) opt_param(self, ['backend_type'], 'np.float32') if self.backend_type == 'np.float16': logger.info("Setting learning rule dtypes to float16") for item in ('velocity_dtype', 'param_dtype', 'gradient_dtype'): setattr(self, item, np.float16)
def initialize(self, kwargs): super(CrossMapPoolingLayer, self).initialize(kwargs) req_param(self, ['nofm']) self.initialize_local() self.allocate_output_bufs() self.allocate_param_bufs() opt_param(self, ['updatebuf'], None) if isinstance(self.backend, CPU): self.updatebuf = self.backend.empty((1, 1))
def initialize(self, kwargs): super(ConvLayer, self).initialize(kwargs) self.initialize_local() if self.pad != 0 and isinstance(self.backend, CPU): raise NotImplementedError('pad != 0, for CPU backend in ConvLayer') self.allocate_output_bufs() opt_param(self, ['shared_bias'], True) if self.shared_bias: self.bias_shape = (self.nofm, 1) self.bias_expand = self.backend.empty((self.nout, 1), dtype=self.weight_dtype) else: self.bias_shape = (self.nout, 1) self.allocate_param_bufs() opt_param(self, ['prodbuf', 'bpropbuf', 'updatebuf'], None) if isinstance(self.backend, CPU): self.prodbuf = self.backend.empty((self.nofm, self.batch_size), dtype=self.weight_dtype) self.bpropbuf = self.backend.empty((self.fsize, self.batch_size), dtype=self.weight_dtype) self.updatebuf = self.backend.empty(self.weights.shape, dtype=self.weight_dtype) if hasattr(self.backend, 'ng'): self.conv_params = self.backend.ng.conv_layer( N=self.output.shape[1], C=self.nifm, K=self.nofm, D=1, H=self.ifmshape[0], W=self.ifmshape[1], T=1, R=self.fshape[0], S=self.fshape[1], pad_d=0, pad_h=self.pad, pad_w=self.pad, str_d=1, str_h=self.stride, str_w=self.stride, grid_P=0, grid_Q=0, dtype=self.weight_dtype) self.prodbuf = self.bpropbuf = self.updatebuf = self.conv_params if self.backend.is_dist: self.bprop_events = self.backend.make_events() self.update_events = self.backend.make_events() else: self.bprop_events = None self.update_events = None
def initialize(self, kwargs): """ Initialize the Batch Normalization transform. This function will be called from WeightLayer.initialize with a reference to the layer. Arguments: _eps (numeric, optional): value used for numerical stability when normalizing by variance _iscale (numeric, optional): explicitly set an affine scale value to be used in inference instead of calculated scale from training _ishift (numeric, optional): explicitly set an affine shift value to be used in inference instead of calculated shift from training """ self.__dict__.update(kwargs) self.dtype = self.layer.weight_dtype self.bigtype = np.float32 if self.dtype is np.float16 else self.dtype opt_param(self, ['_iscale', '_ishift']) opt_param(self, ['_eps'], 1e-6) req_param(self, ['layer']) self.backend = self.layer.backend self.is_local = self.layer.is_local self.batch_size = self.layer.batch_size if self.is_local: self.in1d = (self.layer.nofm, 1) self.ofmsize = self.layer.ofmsize self.orig_shape = (self.layer.nofm * self.ofmsize, self.batch_size) self.in_shape = (self.layer.nofm, self.ofmsize * self.batch_size) else: self.in_shape = (self.layer.nout, self.batch_size) self.in1d = (self.layer.nout, 1) self.train_mode = True logger.info("BatchNormalization set to train mode") self.nbatches = 0 self._xhat = self.backend.zeros(self.in_shape, dtype=self.dtype) self._mean = self.backend.zeros(self.in1d, dtype=self.bigtype) self._vars = self.backend.zeros(self.in1d, dtype=self.bigtype) # Global mean and var to be used during inference self._gmean = self.backend.zeros(self.in1d, dtype=self.bigtype) self._gvars = self.backend.zeros(self.in1d, dtype=self.bigtype) # learned params and their update buffers self._beta = self.backend.zeros(self.in1d, dtype=self.bigtype) self._gamma = self.backend.ones(self.in1d, dtype=self.bigtype) self.layer.params.extend([self._beta, self._gamma]) self._beta_updates = self.backend.zeros(self.in1d, dtype=self.bigtype) self._gamma_updates = self.backend.zeros(self.in1d, dtype=self.bigtype) self.layer.updates.extend([self._beta_updates, self._gamma_updates])
def initialize(self, kwargs): super(ConvLayer, self).initialize(kwargs) self.initialize_local() if self.pad != 0 and isinstance(self.backend, CPU): raise NotImplementedError('pad != 0, for CPU backend in ConvLayer') self.allocate_output_bufs() opt_param(self, ['shared_bias'], True) if self.shared_bias: self.bias_shape = (self.nofm, 1) self.bias_expand = self.backend.empty((self.nout, 1), dtype=self.weight_dtype) else: self.bias_shape = (self.nout, 1) if self.shared_bias or self.batch_norm: self.bias_expand_view = self.bias_expand.reshape( (self.nofm, self.ofmsize)) self.pre_act_view = self.pre_act.reshape( (self.nofm, self.ofmsize * self.batch_size)) self.allocate_param_bufs() opt_param(self, ['prodbuf', 'bpropbuf', 'updatebuf'], None) if isinstance(self.backend, CPU): self.prodbuf = self.backend.empty((self.nofm, self.batch_size)) self.bpropbuf = self.backend.empty((self.fsize, self.batch_size)) self.updatebuf = self.backend.empty(self.weights.shape) if self.backend.__module__ == 'neon.backends.gpu': self.conv_params = self.backend.ng.conv_layer( N=self.batch_size, C=self.nifm, K=self.nofm, D=1, H=self.ifmshape[0], W=self.ifmshape[1], T=1, R=self.fshape[0], S=self.fshape[1], pad_d=0, pad_h=self.pad, pad_w=self.pad, str_d=1, str_h=self.stride, str_w=self.stride, grid_P=0, grid_Q=0, dtype=self.weight_dtype) self.prodbuf = self.bpropbuf = self.updatebuf = self.conv_params
def initialize(self, kwargs): super(WeightLayer, self).initialize(kwargs) req_param(self, ['weight_init', 'lrule_init', 'nin', 'nout']) opt_param(self, ['accumulate'], False) opt_param(self, ['batch_norm'], False) self.weight_init.initialize(self.backend) self.params = [] self.updates = [] if self.batch_norm: self.bn = BatchNorm() kwargs['layer'] = self self.bn.initialize(kwargs)
def allocate_output_bufs(self): if self.is_local: make_zbuf = self.backend.allocate_fragment else: make_zbuf = self.backend.empty opt_param(self, ['out_shape'], (self.nout, self.batch_size)) opt_param(self, ['delta_shape'], (self.nin, self.batch_size)) self.output = make_zbuf(self.out_shape, dtype=self.output_dtype, persist_values=True) self.pre_act = self.activation.pre_act_buffer(self.backend, self.output, self.pre_act_dtype) if self.backend.is_dist: self.output.ptype = 'fragment' if self.is_local else 'replica'
def allocate_output_bufs(self): make_zbuf = self.backend.zeros opt_param(self, ['out_shape'], (self.nout, self.batch_size)) self.output = make_zbuf(self.out_shape, self.output_dtype) self.pre_act = self.activation.pre_act_buffer(self.backend, self.output, self.pre_act_dtype) # TODO: Get rid of output and pre_act. But they seem to be used in the # cost to set a buffer size. self.pre_act_list = [self.pre_act] + \ [make_zbuf(self.out_shape, self.pre_act_dtype) for k in range(1, self.unrolls)] self.output_list = [self.output] + \ [make_zbuf(self.out_shape, self.output_dtype) for k in range(1, self.unrolls)]
def initialize(self, kwargs): super(WeightLayer, self).initialize(kwargs) req_param(self, ['nin', 'nout']) opt_param(self, ['weight_init'], default_weight_init()) opt_param(self, ['lrule_init'], default_lrule_init()) opt_param(self, ['accumulate'], False) opt_param(self, ['batch_norm'], False) opt_param(self, ['mempool']) # Used for parallel mode self.weight_init.initialize(self.backend) self.params = [] self.updates = [] if self.batch_norm: self.bn = BatchNorm() kwargs['layer'] = self self.bn.initialize(kwargs)
def __init__(self, **kwargs): self.__dict__.update(kwargs) opt_param(self, ['temp_dtype'], np.float32) opt_param(self, ['outputbuf', 'temp'], None) opt_param(self, ['scale'], 1.0) opt_param(self, ['backend_type'], 'np.float32') if self.backend_type == 'np.float16': logger.info("Setting cost dtype to float16") setattr(self, 'temp_dtype', np.float16)
def initialize(self, kwargs): opt_param(self, ['shortcut_deriv'], True) # raw label indicates whether the reference labels are indexes (raw) # or one-hot (default) super(CrossEntropy, self).initialize(kwargs) if isinstance(self.olayer.activation, Softmax): self.ce_function = cross_entropy_multi if self.shortcut_deriv: self.cd_function = shortcut_derivative self.olayer.skip_act = True else: self.cd_function = cross_entropy_multi_derivative elif isinstance(self.olayer.activation, Logistic): self.ce_function = cross_entropy if self.shortcut_deriv: self.cd_function = shortcut_derivative self.olayer.skip_act = True else: self.cd_function = cross_entropy_derivative else: self.ce_function = cross_entropy self.cd_function = cross_entropy_derivative
def __init__(self, **kwargs): self.__dict__.update(kwargs) opt_param(self, ['outputbuf', 'temp'], None) opt_param(self, ['scale'], 1.0) opt_param(self, ['backend_type'], np.float32) self.temp_dtype = ensure_dtype(self.backend_type) # string to dtype logger.info("Setting dtype to" + str(self.backend_type))
def initialize(self, kwargs): self.__dict__.update(kwargs) opt_param(self, ['backend'], self.olayer.backend) opt_param(self, ['batch_size'], self.olayer.batch_size) opt_param(self, ['olayer_data'], 'output') req_param(self.olayer, [self.olayer_data]) # if not hasattr(self.olayer, self.olayer_data): # raise ValueError("Layer %s does not have buffer %s" % # (self.olayer.name, self.olayer_data)) # else: self.set_outputbuf(getattr(self.olayer, self.olayer_data))
def initialize(self, kwargs): super(CostLayer, self).initialize(kwargs) req_param(self, ['cost']) opt_param(self, ['ref_label'], 'targets') opt_param(self, ['raw_label'], False) opt_param(self, ['category_label'], 'l_id') self.reference = None self.cost.olayer = self.prev_layer kwargs['raw_label'] = self.raw_label self.cost.initialize(kwargs) self.deltas = self.cost.get_deltabuf()
def __init__(self, **kwargs): self.__dict__.update(kwargs) self.out_dir = os.path.expanduser(self.save_dir) self.in_dir = os.path.expanduser(self.image_dir) self.batch_size = self.macro_size global TARGET_SIZE, SQUARE_CROP TARGET_SIZE = self.output_image_size SQUARE_CROP = self.square_crop opt_param(self, ['file_pattern'], '*.jpg') opt_param(self, ['validation_pct'], 0.2) opt_param(self, ['num_workers'], 5) opt_param(self, ['class_samples_max']) self.train_file = os.path.join(self.out_dir, 'train_file.csv.gz') self.val_file = os.path.join(self.out_dir, 'val_file.csv.gz') self.stats = os.path.join(self.out_dir, 'dataset_cache.pkl') self.val_mean = np.zeros((self.num_channels, self.output_image_size, self.output_image_size), dtype=np.uint8) self.train_mean = np.zeros_like(self.val_mean)
def __init__(self, name, lr_params): self.name = name opt_param(self, ['velocity_dtype', 'param_dtype', 'gradient_dtype'], np.float32)