Пример #1
0
    def init_train_updates(self):
        network_inputs = self.variables.network_inputs
        network_output = self.variables.network_output
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in params])

        gradients = T.grad(self.variables.error_func, wrt=params)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        new_inv_hessian = ifelse(
            T.eq(self.variables.epoch, 1), inv_hessian,
            self.update_function(inv_hessian, param_vector - prev_params,
                                 full_gradient - prev_full_gradient))
        param_delta = -new_inv_hessian.dot(full_gradient)
        layers_and_parameters = list(iter_parameters(self.layers))

        def prediction(step):
            updated_params = param_vector + step * param_delta

            # This trick allow us to replace shared variables
            # with theano variables and get output from the network
            start_pos = 0
            for layer, attrname, param in layers_and_parameters:
                end_pos = start_pos + param.size
                updated_param_value = T.reshape(
                    updated_params[start_pos:end_pos], param.shape)
                setattr(layer, attrname, updated_param_value)
                start_pos = end_pos

            output = self.connection.output(*network_inputs)

            # Restore previous parameters
            for layer, attrname, param in layers_and_parameters:
                setattr(layer, attrname, param)

            return output

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            return T.grad(error_func, wrt=step)

        step = asfloat(line_search(phi, derphi))
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        updates.extend([
            (inv_hessian, new_inv_hessian),
            (prev_params, param_vector),
            (prev_full_gradient, full_gradient),
        ])

        return updates
Пример #2
0
def load(connection, source, ignore_missed=False):
    """
    Load and set parameters for layers from the
    specified source.

    Parameters
    ----------
    connection : list of layers or connection

    source : str or dict
        It can be path to the pickle file that stores
        parameters or dictionary that has key values that
        store layer name and values is a dictionary that
        stores parameter names and their values.

    ignore_missed : bool
        ``False`` means that error will be triggered in case
        if some of the layers doesn't have storage parameters
        in the specified source. Defaults to ``False``.

    Raises
    ------
    TypeError
        In case if source has invalid data type.
    """
    if isinstance(connection, BaseNetwork):
        connection = connection.connection

    if isinstance(source, six.string_types):
        with open(source, 'rb') as f:
            data = pickle.load(f)

    elif isinstance(source, dict):
        data = source

    else:
        raise TypeError("Source type is unknown. Got {}, expected dict "
                        "or str".format(type(source)))

    for layer, attrname, _ in iter_parameters(connection):
        if layer.name not in data or attrname not in data[layer.name]:
            if ignore_missed:
                continue

            raise ValueError("Cannot load parameters from the specified "
                             "data source. Layer `{}` doesn't have "
                             "stored parameter `{}`."
                             "".format(layer.name, attrname))

        loaded_parameter = data[layer.name][attrname]

        attrvalue = getattr(layer, attrname)
        attrvalue.set_value(asfloat(loaded_parameter))

    # We need to initalize connection, to make sure
    # that each layer will generate shared variables
    # and validate connections
    connection.initialize()
Пример #3
0
def load(connection, source, ignore_missed=False):
    """
    Load and set parameters for layers from the
    specified source.

    Parameters
    ----------
    connection : list of layers or connection

    source : str or dict
        It can be path to the pickle file that stores
        parameters or dictionary that has key values that
        store layer name and values is a dictionary that
        stores parameter names and their values.

    ignore_missed : bool
        ``False`` means that error will be triggered in case
        if some of the layers doesn't have storage parameters
        in the specified source. Defaults to ``False``.

    Raises
    ------
    TypeError
        In case if source has invalid data type.
    """
    if isinstance(connection, BaseNetwork):
        connection = connection.connection

    if isinstance(source, six.string_types):
        with open(source, 'rb') as f:
            data = pickle.load(f)

    elif isinstance(source, dict):
        data = source

    else:
        raise TypeError("Source type is unknown. Got {}, expected dict "
                        "or str".format(type(source)))

    for layer, attrname, _ in iter_parameters(connection):
        if layer.name not in data or attrname not in data[layer.name]:
            if ignore_missed:
                continue

            raise ValueError("Cannot load parameters from the specified "
                             "data source. Layer `{}` doesn't have "
                             "stored parameter `{}`."
                             "".format(layer.name, attrname))

        loaded_parameter = data[layer.name][attrname]

        attrvalue = getattr(layer, attrname)
        attrvalue.set_value(asfloat(loaded_parameter))

    # We need to initalize connection, to make sure
    # that each layer will generate shared variables
    # and validate connections
    connection.initialize()
Пример #4
0
    def iter_params_and_grads(self):
        layers, parameters = [], []

        for layer, _, parameter in iter_parameters(self.layers):
            layers.append(layer)
            parameters.append(parameter)

        gradients = tf.gradients(self.variables.error_func, parameters)
        iterator = zip(layers, parameters, gradients)

        for layer, parameter, gradient in iterator:
            yield layer, parameter, gradient
Пример #5
0
    def init_train_updates(self):
        """
        Initialize updates that would be applied after
        each training epoch.
        """
        updates = []
        for layer, _, parameter in iter_parameters(self.layers):
            updates.extend(self.init_param_updates(layer, parameter))

        for layer in self.layers:
            updates.extend(layer.updates)

        return updates
Пример #6
0
    def init_train_updates(self):
        original_updates = super(WeightDecay, self).init_train_updates()
        parameters = [param for _, _, param in iter_parameters(self.layers)]
        modified_updates = []

        step = self.variables.step
        decay_rate = asfloat(self.decay_rate)

        for parameter, updated in original_updates:
            if parameter in parameters:
                updated -= step * decay_rate * parameter
            modified_updates.append((parameter, updated))

        return modified_updates
Пример #7
0
    def init_train_updates(self):
        original_updates = super(WeightElimination, self).init_train_updates()
        parameters = [param for _, _, param in iter_parameters(self.layers)]
        modified_updates = []

        step = self.variables.step
        decay_koef = asfloat(self.decay_rate * step)
        zero_weight_square = asfloat(self.zero_weight**2)

        for parameter, updated in original_updates:
            if parameter in parameters:
                updated -= decay_koef * (
                    (2 * parameter / zero_weight_square) /
                    tf.square(1 + tf.square(parameter) / zero_weight_square))
            modified_updates.append((parameter, updated))

        return modified_updates
Пример #8
0
def parameter_values(connection):
    """
    Iterate over all network's trainable parameters.

    Parameters
    ----------
    connection : layer, connection

    Yields
    ------
    Theano shared variable
        Network's trainable parameter.
    """
    parameters = []

    for _, _, parameter in iter_parameters(connection):
        parameters.append(parameter)

    return parameters
Пример #9
0
def parameter_values(connection):
    """
    List of all trainable parameters in the network.

    Parameters
    ----------
    connection : layer, connection

    Yields
    ------
    Theano shared variable
        Network's trainable parameter.
    """
    parameters = []

    for _, _, parameter in iter_parameters(connection):
        parameters.append(parameter)

    return parameters
Пример #10
0
def save(connection, filepath):
    """
    Save layer parameters in pickle file.

    Parameters
    ----------
    connection : network, list of layer or connection
        Connection that needs to be saved.

    filepath : str
        Path to the pickle file that will store
        network's parameters.
    """
    if isinstance(connection, BaseNetwork):
        connection = connection.connection

    data = defaultdict(dict)

    for layer, attrname, parameter in iter_parameters(connection):
        data[layer.name][attrname] = parameter.get_value()

    with open(filepath, 'wb+') as f:
        pickle.dump(data, f)
Пример #11
0
def save(connection, filepath):
    """
    Save layer parameters in pickle file.

    Parameters
    ----------
    connection : network, list of layer or connection
        Connection that needs to be saved.

    filepath : str
        Path to the pickle file that will store
        network's parameters.
    """
    if isinstance(connection, BaseNetwork):
        connection = connection.connection

    data = defaultdict(dict)

    for layer, attrname, parameter in iter_parameters(connection):
        data[layer.name][attrname] = parameter.get_value()

    with open(filepath, 'wb+') as f:
        pickle.dump(data, f)
Пример #12
0
    def find_optimal_step(self, parameter_vector, parameter_update):
        network_inputs = self.variables.network_inputs
        network_output = self.variables.network_output
        layers_and_parameters = list(iter_parameters(self.layers))

        def prediction(step):
            step = asfloat(step)
            updated_params = parameter_vector + step * parameter_update

            # This trick allow us to replace shared variables
            # with tensorflow variables and get output from the network
            start_pos = 0
            for layer, attrname, param in layers_and_parameters:
                end_pos = start_pos + get_variable_size(param)
                updated_param_value = tf.reshape(
                    updated_params[start_pos:end_pos], param.shape)
                setattr(layer, attrname, updated_param_value)
                start_pos = end_pos

            output = self.connection.output(*network_inputs)

            # Restore previous parameters
            for layer, attrname, param in layers_and_parameters:
                setattr(layer, attrname, param)

            return output

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            gradient, = tf.gradients(error_func, step)
            return gradient

        return line_search(phi, derphi, self.wolfe_maxiter, self.wolfe_c1,
                           self.wolfe_c2)
Пример #13
0
    def init_train_updates(self):
        network_input = self.variables.network_input
        network_output = self.variables.network_output
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = parameter_values(self.connection)
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=params)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        new_inv_hessian = ifelse(
            T.eq(self.variables.epoch, 1),
            inv_hessian,
            self.update_function(inv_hessian,
                                 param_vector - prev_params,
                                 full_gradient - prev_full_gradient)
        )
        param_delta = -new_inv_hessian.dot(full_gradient)
        layers_and_parameters = list(iter_parameters(self.layers))

        def prediction(step):
            updated_params = param_vector + step * param_delta

            # This trick allow us to replace shared variables
            # with theano variables and get output from the network
            start_pos = 0
            for layer, attrname, param in layers_and_parameters:
                end_pos = start_pos + param.size
                updated_param_value = T.reshape(
                    updated_params[start_pos:end_pos],
                    param.shape
                )
                setattr(layer, attrname, updated_param_value)
                start_pos = end_pos

            output = self.connection.output(network_input)

            # We need to replace back parameter to shared variable
            for layer, attrname, param in layers_and_parameters:
                setattr(layer, attrname, param)

            return output

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            return T.grad(error_func, wrt=step)

        step = asfloat(line_search(phi, derphi))
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        updates.extend([
            (inv_hessian, new_inv_hessian),
            (prev_params, param_vector),
            (prev_full_gradient, full_gradient),
        ])

        return updates