class MomentumOptimizer(Optimizer): def __init__(self, rate: float, factor: float, consistent: bool = False): self.__rate = rate self.__factor = factor self.__consistent = consistent self.__old_gradient_map = {} self.__gradient_engine = Engine() def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache momentum = self.__gradient_engine.value( ) + self.__factor * self.__old_gradient_map.get(variable, 0) self.__old_gradient_map[variable] = momentum variable.value = calculate_function(variable.value, self.__rate * momentum) engine.modified() self.__gradient_engine.modified() def minimize(self, engine: Engine): self.optimize(engine, lambda v, g: v - g) def maximize(self, engine: Engine): self.optimize(engine, lambda v, g: v + g)
class AdaptiveGradientOptimizer(Optimizer): def __init__(self, rate: float, consistent: bool = False): self.__rate = rate self.__consistent = consistent self.__gradient_engine = Engine() self.__accumulate_gradient_map = {} def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache current_gradient = self.__gradient_engine.value() self.__accumulate_gradient_map.setdefault(variable, 0) self.__accumulate_gradient_map[variable] += current_gradient**2 regularization_value = current_gradient / ( self.__accumulate_gradient_map[variable] + 1e-8)**0.5 variable.value = calculate_function( variable.value, self.__rate * regularization_value) engine.modified() self.__gradient_engine.modified() def minimize(self, engine: Engine): self.optimize(engine, lambda v, g: v - g) def maximize(self, engine: Engine): self.optimize(engine, lambda v, g: v + g)
class MomentumOptimizer(Optimizer): def __init__(self, rate: float, factor: float, consistent: bool = False): self.__rate = rate self.__factor = factor self.__consistent = consistent self.__old_gradient_map = {} self.__gradient_engine = Engine() def __repr__(self): return '{}(rate={}, consistent={})'.format(self.__class__.__name__, self.__rate, self.__consistent) def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache momentum = self.__gradient_engine.value( ) + self.__factor * self.__old_gradient_map.get(variable, 0) self.__old_gradient_map[variable] = momentum variable.value = calculate_function(variable.value, self.__rate * momentum) engine.modified() self.__gradient_engine.modified()
class AdaptiveGradientOptimizer(Optimizer): def __init__(self, rate: float, consistent: bool = False): self.__rate = rate self.__consistent = consistent self.__gradient_engine = Engine() self.__accumulate_gradient_map = {} def __repr__(self): return '{}(rate={}, consistent={})'.format(self.__class__.__name__, self.__rate, self.__consistent) def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache current_gradient = self.__gradient_engine.value() self.__accumulate_gradient_map.setdefault(variable, 0) self.__accumulate_gradient_map[variable] += current_gradient**2 regularization_value = current_gradient / ( self.__accumulate_gradient_map[variable] + 1e-8)**0.5 variable.value = calculate_function( variable.value, self.__rate * regularization_value) engine.modified() self.__gradient_engine.modified()
def end_iteration(self): for layer_tuple in self.__batch_normalization_layer: layer_mean_symbol, layer_variance_symbol = layer_tuple[0].normalization_symbol() normalization_engine = Engine(layer_variance_symbol) normalization_engine.bind = self.network.engine.bind layer_tuple[2].append(normalization_engine.value()) layer_tuple[1].append(normalization_engine.value_cache[layer_mean_symbol])
class GradientDescentOptimizer(Optimizer): def __init__(self, rate: float): self.__rate = rate self.__gradient_engine = Engine() def minimize(self, engine: Engine): engine.differentiate() variables = engine.variables for variable in variables: self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind variable.value -= self.__rate * self.__gradient_engine.value() engine.modified() def maximize(self, engine: Engine): engine.differentiate() variables = engine.variables for variable in variables: self.__gradient_engine.symbol(engine.gradient(variable)) self.__gradient_engine.bind = engine.bind variable.value += self.__rate * self.__gradient_engine.value() engine.modified()
class AdaptiveMomentEstimationOptimizer(Optimizer): def __init__(self, rate: float, decay: float, square_decay: float, consistent: bool = False): self.__rate = rate self.__decay = decay self.__square_decay = square_decay self.__consistent = consistent self.__gradient_engine = Engine() self.__estimation_map = {} self.__square_estimation_map = {} self.__step = 1 def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache current_gradient = self.__gradient_engine.value() self.__estimation_map.setdefault(variable, 0) self.__square_estimation_map.setdefault(variable, 0) self.__estimation_map[ variable] = self.__decay * self.__estimation_map[variable] + ( 1 - self.__decay) * current_gradient self.__square_estimation_map[ variable] = self.__square_decay * self.__square_estimation_map[ variable] + (1 - self.__square_decay) * current_gradient**2 estimation = self.__estimation_map[variable] / ( 1 - self.__decay**self.__step) square_estimation = self.__square_estimation_map[variable] / ( 1 - self.__square_decay**self.__step) self.__step += 1 regularization_value = estimation / (square_estimation + 1e-8)**0.5 variable.value = calculate_function( variable.value, self.__rate * regularization_value) engine.modified() self.__gradient_engine.modified() def minimize(self, engine: Engine): self.optimize(engine, lambda v, g: v - g) def maximize(self, engine: Engine): self.optimize(engine, lambda v, g: v + g)
class AdaptiveMomentEstimationOptimizer(Optimizer): def __init__(self, rate: float, decay: float = 0.9, square_decay: float = 0.999, consistent: bool = False): self.__rate = rate self.__decay = decay self.__square_decay = square_decay self.__consistent = consistent self.__gradient_engine = Engine() self.__estimation_map = {} self.__square_estimation_map = {} self.__step = 1 def __repr__(self): return '{}(rate={}, decay={}, square_decay={}, consistent={})'.format( self.__class__.__name__, self.__rate, self.__decay, self.__square_decay, self.__consistent) def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache current_gradient = self.__gradient_engine.value() self.__estimation_map.setdefault(variable, 0) self.__square_estimation_map.setdefault(variable, 0) self.__estimation_map[ variable] = self.__decay * self.__estimation_map[variable] + ( 1 - self.__decay) * current_gradient self.__square_estimation_map[ variable] = self.__square_decay * self.__square_estimation_map[ variable] + (1 - self.__square_decay) * current_gradient**2 estimation = self.__estimation_map[variable] / ( 1 - self.__decay**self.__step) square_estimation = self.__square_estimation_map[variable] / ( 1 - self.__square_decay**self.__step) self.__step += 1 regularization_value = estimation / (square_estimation + 1e-8)**0.5 variable.value = calculate_function( variable.value, self.__rate * regularization_value) engine.modified() self.__gradient_engine.modified()
class GradientDescentOptimizer(Optimizer): def __init__(self, rate: float, consistent: bool = False): self.__rate = rate self.__consistent = consistent self.__gradient_engine = Engine() def __repr__(self): return '{}(rate={}, consistent={})'.format(self.__class__.__name__, self.__rate, self.__consistent) def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache variable.value = calculate_function( variable.value, self.__rate * self.__gradient_engine.value()) engine.modified() self.__gradient_engine.modified()
class GradientDescentOptimizer(Optimizer): def __init__(self, rate: float, consistent: bool=False): self.__rate = rate self.__consistent = consistent self.__gradient_engine = Engine() def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache variable.value = calculate_function(variable.value, self.__rate * self.__gradient_engine.value()) engine.modified() self.__gradient_engine.modified() def minimize(self, engine: Engine): self.optimize(engine, lambda v, g: v - g) def maximize(self, engine: Engine): self.optimize(engine, lambda v, g: v + g)
class AdaptiveDeltaOptimizer(Optimizer): def __init__(self, decay: float, consistent: bool = False): self.__decay = decay self.__consistent = consistent self.__gradient_engine = Engine() self.__accumulate_gradient_map = {} self.__expectation_map = {} def optimize(self, engine: Engine, calculate_function): variables = engine.variables for variable in variables: value_cache = self.__gradient_engine.value_cache self.__gradient_engine.symbol = engine.gradient(variable) self.__gradient_engine.bind = engine.bind if self.__consistent: self.__gradient_engine.value_cache = value_cache current_gradient = self.__gradient_engine.value() self.__accumulate_gradient_map.setdefault(variable, 0) self.__expectation_map.setdefault(variable, 0) self.__accumulate_gradient_map[ variable] = self.__decay * self.__accumulate_gradient_map[ variable] + (1 - self.__decay) * current_gradient**2 delta = (self.__expectation_map[variable] + 1e-8)**0.5 / (self.__accumulate_gradient_map[variable] + 1e-8)**0.5 * current_gradient self.__expectation_map[ variable] = self.__decay * self.__expectation_map[variable] + ( 1 - self.__decay) * delta**2 variable.value = calculate_function(variable.value, delta) engine.modified() self.__gradient_engine.modified() def minimize(self, engine: Engine): self.optimize(engine, lambda v, g: v - g) def maximize(self, engine: Engine): self.optimize(engine, lambda v, g: v + g)
class Network: def __init__(self): self.epoch = None self.iteration = None self.epochs = None self.batch_size = None self.engine = Engine() self.__layer = [] self.__input_symbol = Variable(name='InputSymbol') self.__current_symbol = self.__input_symbol self.__current_output = None self.__current_weight = None self.__current_bias = None self.__variables = [] self.__data = None self.__optimizer = None self.__loss = None self.__predict_engine = Engine() self.__plugin = collections.OrderedDict() self.load_default_plugin() def __valid_current_output(self): if self.__current_output is None: raise ValueError('Current output is None.') else: return self.__current_output def add(self, layers): if isinstance(layers, collections.Iterable): for layer in layers: self.__add(layer) else: self.__add(layers) def __add(self, layer): if isinstance(layer, Operator): self.__add_operator(layer) elif isinstance(layer, ConnectionLayer): self.__add_connection(layer) elif isinstance(layer, Connection): self.__add_connection(layer.connection_layer()) elif isinstance(layer, ActivationLayer): self.__add_activation(layer) elif isinstance(layer, Activation): self.__add_activation(layer.activation_layer()) elif isinstance(layer, ConvolutionLayer): self.__add_convolution(layer) elif isinstance(layer, Convolution): self.__add_convolution(layer.convolution_layer()) elif isinstance(layer, PoolingLayer): self.__add_pooling(layer) elif isinstance(layer, Pooling): self.__add_pooling(layer.pooling_layer()) elif isinstance(layer, UnpoolingLayer): self.__add_unpooling(layer) elif isinstance(layer, Unpooling): self.__add_unpooling(layer.unpooling_layer()) else: raise ValueError('Invalid layer type: {}'. format(type(layer))) def __add_operator(self, layer: Operator): self.__current_symbol = Variable(operator=layer, inputs=[self.__current_symbol]) def __add_connection(self, layer: ConnectionLayer): if layer.input_dimension is None: current_output = self.__valid_current_output() if not isinstance(current_output, int): self.__current_symbol = spread(self.__current_symbol, 1 - len(current_output)) current_output = reduce(lambda a, b: a * b, current_output[1:]) layer.set_input_dimension(current_output) weight, bias = layer.weight_bias() self.__variables.append(weight) self.__variables.append(bias) self.__current_weight = weight self.__current_bias = bias self.__current_symbol = self.__current_symbol @ weight + bias self.__current_output = layer.output_dimension def __add_activation(self, layer: ActivationLayer): self.__current_symbol = layer.activation_function(self.__current_symbol) self.__current_weight.value = layer.weight_initialization(self.__current_weight.value.shape) self.__current_bias.value = layer.bias_initialization(self.__current_bias.value.shape) def __add_convolution(self, layer: ConvolutionLayer): self.__variables.append(layer.kernel) self.__current_symbol = layer.convolution_function()(self.__current_symbol, layer.kernel, layer.mode) if layer.input_shape is None: layer.input_shape = self.__valid_current_output() self.__current_output = layer.get_output_shape() def __add_pooling(self, layer: PoolingLayer): self.__current_symbol = layer.pooling_function()(self.__current_symbol, layer.size, layer.step) if layer.input_shape is None: layer.input_shape = self.__valid_current_output() self.__current_output = layer.get_output_shape() def __add_unpooling(self, layer: UnpoolingLayer): self.__current_symbol = layer.unpooling_function()(self.__current_symbol, layer.size, layer.step) if layer.input_shape is None: layer.input_shape = self.__valid_current_output() self.__current_output = layer.get_output_shape() def get_symbol(self): return self.__current_symbol def optimizer(self, optimizer_object, *args, **kwargs): if isinstance(optimizer_object, str): name = optimizer_object.lower() if name in optimizer_map: self.__optimizer = optimizer_map[name](*args, **kwargs) else: raise ValueError('No such optimizer: {}'.format(name)) elif isinstance(optimizer_object, Optimizer): self.__optimizer = optimizer_object else: raise ValueError('Invalid optimizer type: {}'.format(type(optimizer_object))) def loss(self, loss_object): if isinstance(loss_object, str): self.__loss = Loss(loss_object).loss_layer() elif isinstance(loss_object, LossLayer): self.__loss = loss_object elif isinstance(loss_object, Loss): self.__loss = loss_object.loss_layer() else: raise ValueError('Invalid loss type: {}'.format(type(loss_object))) def train(self, data, target, epochs: int=10000, batch_size: int=0): data = numpy.array(data) target = numpy.array(target) self.epochs = epochs if data.shape[0] != target.shape[0]: raise ValueError('Data dimension not match target dimension: {} {}'.format(data.shape[0], target.shape[0])) data_scale = data.shape[0] target_symbol = None if batch_size != 0: loss, target_symbol = self.__loss.loss_function(self.__current_symbol, target[:batch_size], True) else: loss = self.__loss.loss_function(self.__current_symbol, target) self.engine.bind = {self.__input_symbol: data} self.engine.symbol = loss self.engine.variables = self.__variables try: self.iteration = 0 self.run_plugin('begin_training') for epoch in range(self.epochs): self.epoch = epoch + 1 self.run_plugin('begin_epoch') for i in ([0] if batch_size == 0 else range(0, data_scale, batch_size)): if batch_size != 0: self.engine.bind = {self.__input_symbol: data[i: min([i + batch_size, data_scale])], target_symbol: target[i: min([i + batch_size, data_scale])]} self.iteration += 1 self.run_plugin('begin_iteration') self.__optimizer.minimize(self.engine) self.run_plugin('end_iteration') self.run_plugin('end_epoch') except KeyboardInterrupt: print('Keyboard Interrupt') self.run_plugin('end_training') def predict(self, data): self.__predict_engine.symbol = self.__current_symbol self.__predict_engine.bind = {self.__input_symbol: data} predict_data = self.__predict_engine.value() return predict_data def load_default_plugin(self): default_plugin = [ ('Training State', TrainingStatePlugin()), ] for name, plugin in default_plugin: self.add_plugin(name, plugin) def add_plugin(self, name: str, plugin: Plugin): self.__plugin[name] = plugin plugin.bind_network(self) def run_plugin(self, stage: str): for _, plugin in self.__plugin.items(): getattr(plugin, stage)() def plugin(self, name: str): if name in self.__plugin: return self.__plugin[name] else: raise ValueError('No such plugin: {}'.format(name))
class Network: def __init__(self): self.__layer = [] self.__input_symbol = Variable(name='InputSymbol') self.__current_symbol = self.__input_symbol self.__current_output = None self.__current_weight = None self.__current_bias = None self.__variables = [] self.__data = None self.__optimizer = None self.__loss = None self.__train_engine = Engine() self.__predict_engine = Engine() def add(self, layers): if isinstance(layers, collections.Iterable): for layer in layers: self.__add(layer) else: self.__add(layers) def __add(self, layer): if isinstance(layer, ConnectionLayer): self.__add_connection(layer) elif isinstance(layer, Connection): self.__add_connection(layer.connection_layer()) elif isinstance(layer, ActivationLayer): self.__add_activation(layer) elif isinstance(layer, Activation): self.__add_activation(layer.activation_layer()) else: raise ValueError('Invalid layer type: {}'.format(type(layer))) def __add_connection(self, layer: ConnectionLayer): weight, bias = layer.weight_bias(self.__current_output) self.__variables.append(weight) self.__variables.append(bias) self.__current_weight = weight self.__current_bias = bias self.__current_symbol = weight @ self.__current_symbol + bias self.__current_output = layer.output_dimension() def __add_activation(self, layer: ActivationLayer): self.__current_symbol = layer.activation_function( self.__current_symbol) self.__current_weight.value = layer.weight_initialization( self.__current_weight.value.shape) self.__current_bias.value = numpy.random.normal( 0, 1, self.__current_bias.value.shape) def get_symbol(self): return self.__current_symbol def optimizer(self, optimizer_object, *args, **kwargs): if isinstance(optimizer_object, str): name = optimizer_object.lower() if name in optimizer_map: self.__optimizer = optimizer_map[name](*args, **kwargs) else: raise ValueError('No such optimizer: {}'.format(name)) elif isinstance(optimizer_object, Optimizer): self.__optimizer = optimizer_object else: raise ValueError('Invalid optimizer type: {}'.format( type(optimizer_object))) def loss(self, loss_object): if isinstance(loss_object, str): self.__loss = Loss(loss_object).loss_layer() elif isinstance(loss_object, LossLayer): self.__loss = loss_object elif isinstance(loss_object, Loss): self.__loss = loss_object.loss_layer() else: raise ValueError('Invalid loss type: {}'.format(type(loss_object))) def train(self, data, target, epochs: int = 10000, loss_threshold: float = 0.001, state_cycle: int = 100): loss = self.__loss.loss_function(self.__current_symbol, target) self.__train_engine.symbol = loss self.__train_engine.variables = self.__variables self.__train_engine.bind = {self.__input_symbol: data} start_time = time.time() cycle_start_time = time.time() for epoch in range(epochs): self.__optimizer.minimize(self.__train_engine) if (epoch + 1) % state_cycle == 0: speed = state_cycle / (time.time() - cycle_start_time) cycle_start_time = time.time() loss_value = self.__train_engine.value() print( 'Training State [epoch = {}/{}, loss = {:.8f}, speed = {:.2f}(epochs/s)' .format(epoch + 1, epochs, loss_value, speed)) if loss_value < loss_threshold: print('Touch loss threshold: {} < {}'.format( loss_value, loss_threshold)) break print('Training Complete [{}]'.format( time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time)))) def predict(self, data): self.__predict_engine.symbol = self.__current_symbol self.__predict_engine.bind = {self.__input_symbol: data} predict_data = self.__predict_engine.value() return predict_data