def from_dims(cls, in_dim, out_dim, activation) -> "Layer": return cls( weights=Tensor.from_numpy( np.random.normal(scale=in_dim**-0.5, size=(in_dim, out_dim))), biases=Tensor.from_numpy(np.zeros(out_dim)), activation=activation, )
def from_shape( cls, shape: Tuple[int], persistence: float = None, ) -> "BatchNormalization": return cls( mean=np.zeros(shape), variance=np.ones(shape), persistence=persistence if persistence is not None else 0.9, shift=Tensor.from_numpy(np.zeros(shape)), scale=Tensor.from_numpy(np.ones(shape)), )
def negate(tensor: Tensor): tensor = coalesce(tensor) return Tensor.from_numpy( data=-tensor.data, backward=lambda gradient: Gradients.accumulate( Gradient(tensor=tensor, gradient=-gradient)), )
def exp(tensor: Tensor): tensor = coalesce(tensor) return Tensor.from_numpy( data=np.exp(tensor.data), backward=lambda gradient: Gradients.accumulate( Gradient(tensor=tensor, gradient=gradient * np.exp(tensor.data))), )
def squeeze(tensor: Tensor, axes: Union[None, int, Tuple[int]] = None): tensor = coalesce(tensor) axes = () if axes is None else tuplify(axes) return Tensor.from_numpy( data=np.squeeze(tensor.data, axes), backward=lambda gradient: Gradients.accumulate( Gradient(tensor=tensor, gradient=np.expand_dims(gradient, axes))), )
def loss(self, batch, regularization) -> Tensor: cross_entropy = -( (self.probabilities(batch["features"]).log() * one_hot(batch["labels"], num_classes=self.num_classes)).sum() / Tensor.from_builtin(len(batch["features"]))) penalty = (sum((layer.weights * layer.weights).sum() for layer in self.layers) * regularization) return cross_entropy + penalty
def subtract(left: Tensor, right: Tensor): left, right = coalesce(left, right) return Tensor.from_numpy( data=left.data - right.data, backward=lambda gradient: Gradients.accumulate( Gradient(tensor=left, gradient=gradient), Gradient(tensor=right, gradient=-gradient), ), )
def divide(left: Tensor, right: Tensor): left, right = coalesce(left, right) return Tensor.from_numpy( data=left.data / right.data, backward=lambda gradient: Gradients.accumulate( Gradient(tensor=left, gradient=gradient / right.data), Gradient( tensor=right, gradient=-gradient * left.data / np.square(right.data), ), ), )
def matrix_multiply(left: Tensor, right: Tensor): left = coalesce(left) right = coalesce(right) assert len(left.shape) == 2 assert len(right.shape) == 2 assert left.shape[1] == right.shape[0] return Tensor.from_numpy( data=np.matmul(left.data, right.data), backward=lambda gradient: Gradients.accumulate( Gradient(tensor=left, gradient=np.matmul(gradient, right.data.T)), Gradient(tensor=right, gradient=np.matmul(left.data.T, gradient)), ), )
def coalesce(*tensors: Tensor): tensors = [Tensor.convert(tensor) for tensor in tensors] if len(tensors) == 1: return tensors[0] target_shape = np.broadcast(*[tensor.data for tensor in tensors]).shape expanded = [ tensor.expand_dims(list(range(len(target_shape) - len(tensor.shape)))) for tensor in tensors ] return [ tensor.tile([ target_dim // tensor_dim for target_dim, tensor_dim in zip(target_shape, tensor.shape) ]) for tensor in expanded ]
def clip(tensor: Tensor, low=None, high=None): if low is None: low = -float("inf") if high is None: high = float("inf") tensor, low, high = coalesce(tensor, low, high) def backward(gradient: np.ndarray): result = gradient.copy() result[(tensor.data < low.data) | (tensor.data > high.data)] = 0 return Gradients.accumulate(Gradient(tensor=tensor, gradient=result)) return Tensor.from_numpy( data=np.clip(tensor.data, low.data, high.data), backward=backward, )
def sum(tensor: Tensor, axes=None): tensor = coalesce(tensor) axes = tuplify(range(len(tensor.shape)) if axes is None else axes) return Tensor.from_numpy( data=tensor.data.sum(axes), backward=lambda gradient: Gradients.accumulate( Gradient( tensor=tensor, gradient=np.tile( np.expand_dims(gradient, axes), [ dim if idx in axes or idx - len(tensor.shape) in axes else 1 for idx, dim in enumerate(tensor.shape) ], ), ) ), )
def tile(tensor: Tensor, tiling: Tuple[int]): tensor = coalesce(tensor) tiling = tuple(tiling) assert len(tensor.shape) == len(tiling) return Tensor.from_numpy( data=np.tile(tensor.data, tiling), backward=lambda gradient: Gradients.accumulate( Gradient( tensor=tensor, gradient=np.reshape( gradient, [ value for idx, dim in enumerate(tensor.shape) for value in [tiling[idx], dim] ], ).sum(tuple(range(0, len(tiling) * 2, 2))), )), )
def sigmoid(tensor: Tensor): tensor = coalesce(tensor) exp_tensor = tensor.exp() return exp_tensor / (exp_tensor + 1)
def softmax(tensor: Tensor, axes=None): tensor = coalesce(tensor) exp_tensor = tensor.exp() return exp_tensor / exp_tensor.sum(axes=axes).expand_dims(axes=axes)