def test_der(self): image = autoTensor(torch.rand(3,3,20,20)) image.requires_grad = True obj = Flatten2d(image) obj.backprop(autoTensor(obj.value)) assert image.value.size() == image.grad.value.size() assert torch.equal(image.value,image.grad.value)
def test_der_bias(self): image = autoTensor(torch.rand(3,3,20,20)) filters = autoTensor(torch.rand(6,3,5,5)) bias = autoTensor(torch.rand([6])) bias.requires_grad = True obj = Conv2d(image,filters,bias) obj.backprop(autoTensor(torch.rand(3,6,16,16))) assert bias.grad.size() == bias.size()
def der_filters(self, gradient): back_grad = F.conv2d(input=self.image_block.value.permute(1, 0, 2, 3), weight=gradient.value.permute(1, 0, 2, 3), stride=(self.stride, self.stride), padding=(self.padding, self.padding)).permute(1, 0, 2, 3) return autoTensor(value=back_grad)
def test_init(self): image = autoTensor(torch.rand(3,3,20,20)) image.requires_grad = True obj = Flatten2d(image) assert obj.size() == torch.empty(3,1200).size() assert obj.channels[0].autoVariable == image
def test_call(self): layer = Linear(6, 1) X = autoTensor(torch.rand(5, 6)) l_out = layer(X) assert torch.equal( l_out.value, torch.mm(X.value, layer.weight.value) + layer.bias.value)
def test_updade_weights(self): weight= Weight((5,6)) pre = weight.value.cpu().detach().clone() weight.grad =autoTensor( torch.rand((5,6))) weight.update_weights(learning_rate=0.01) assert torch.equal(weight.value,pre - 0.01*weight.grad.value)
def dfs_update_gdmomentum(channels, learning_rate, beta): for back_channel in channels: if isinstance(back_channel.autoVariable, neural.param.Weight): if back_channel.autoVariable.grad_saved == None: back_channel.autoVariable.grad_saved = autoTensor( value=back_channel.autoVariable.grad.value) back_channel.autoVariable.gdmomentum(learning_rate, beta) optimNode.dfs_update_gdmomentum(back_channel.autoVariable.channels, learning_rate, beta)
def test_init(self): image = autoTensor(torch.rand(3,3,20,20)) filters = autoTensor(torch.rand(6,3,5,5)) bias = autoTensor(torch.rand([6])) image.requires_grad = True filters.requires_grad = True bias.requires_grad = True obj = Conv2d(image,filters,bias) assert obj.channels[0].autoVariable == image assert obj.channels[1].autoVariable == filters assert obj.channels[2].autoVariable == bias assert obj.size() == torch.empty(3,6,16,16).size() obj1 = Conv2d(image,filters,bias,padding=1) assert obj1.size() == torch.empty(3,6,18,18).size() obj2 = Conv2d(image,filters,bias,stride=3) assert obj2.size() == torch.empty(3,6,6,6).size() obj3 = Conv2d(image,filters,bias,padding=3,stride=4) assert obj3.size() == torch.empty(3,6,6,6).size()
def reverse_broadcast(gradient, tensor): grad_np = deepcopy(gradient.value) #when tensor was broadcasted by extending dimenstions number_of_added_dimentions = len(gradient.value.size()) - len( tensor.value.size()) for _ in range(number_of_added_dimentions): grad_np = grad_np.sum(dim=0) # undo simple broadcasting for i, dimention in enumerate(tensor.value.size()): if dimention == 1: grad_np = grad_np.sum(dim=i, keepdim=True) gradient = autoTensor(value=grad_np) assert gradient.size() == tensor.size() return gradient
def __init__(self,input_size,hidden_size,output_size,initializer=None,): self.input_size = input_size self.hidden_size = hidden_size self.f = Linear2(input_size,hidden_size,output_size,initializer) self.i = Linear2(input_size,hidden_size,output_size,initializer) self.c_ = Linear2(input_size,hidden_size,output_size,initializer) self.o = Linear2(input_size,hidden_size,output_size,initializer) self.c = autoTensor(torch.zeros(output_size,1)) self.f.weight.name = "f1" self.f.weight2.name = "f2" self.i.weight.name = "i1" self.i.weight2.name = "i2" self.c_.weight.name = "c_1" self.c_.weight2.name = "c_2" self.o.weight.name = "o1" self.o.weight2.name = "o2"
def der_image(self, gradient): back_grad = F.conv_transpose2d(input=gradient.value, weight=self.filters.value, stride=(self.stride, self.stride), padding=(self.padding, self.padding)) return autoTensor(value=back_grad)
def test_der_filter(self): image = autoTensor(torch.rand(3,3,20,20)) filters = autoTensor(torch.rand(6,3,5,5)) bias = autoTensor(torch.rand([6])) filters.requires_grad = True obj = Conv2d(image,filters,bias) obj.backprop(autoTensor(torch.rand(3,6,16,16))) assert filters.grad.size() == filters.size() image1 = autoTensor(torch.rand(3,3,20,20)) filters1 = autoTensor(torch.rand(6,3,5,5)) bias1 = autoTensor(torch.rand([6])) filters1.requires_grad = True obj1 = Conv2d(image1,filters1,bias1,padding=1) obj1.backprop(autoTensor(torch.rand(3,6,18,18))) assert filters1.grad.size() == filters1.size() image2 = autoTensor(torch.rand(3,3,20,20)) filters2 = autoTensor(torch.rand(6,3,5,5)) bias2 = autoTensor(torch.rand([6])) filters2.requires_grad = True obj2 = Conv2d(image2,filters2,bias2,stride=4) obj2.backprop(autoTensor(torch.rand(3,6,4,4))) assert filters2.grad.size() == filters2.size() image2 = autoTensor(torch.rand(3,3,20,20)) filters2 = autoTensor(torch.rand(6,3,5,5)) bias2 = autoTensor(torch.rand([6])) filters2.requires_grad = True obj2 = Conv2d(image2,filters2,bias2,stride=3) obj2.backprop(autoTensor(torch.rand(3,6,6,6))) assert filters2.grad.size() == filters2.size()
def der_pos1(self, gradient): back_grad = gradient.value * self.value return autoTensor(value=back_grad)
def der(self, gradient): value = self.y_pred.value - self.y_target.value value = value / torch.abs(value) return autoTensor(value=value)
def backward(self): if not self.isLSTM: gradient = autoTensor(torch.ones(self.value.size())) self.backprop(gradient) else: pass
def der_pos1(self, gradient): tensor = self.tensor1 gradient = reverse_broadcast(gradient, tensor) return autoTensor(value=gradient.value)
def der_pos1(self, gradient): value = torch.mm(gradient.value, self.tensor2.value.transpose(1, 0)) return autoTensor(value=value)
def der(self, gradient): back_grad = gradient.value back_grad = back_grad.view(self.inputs_size) return autoTensor(value=back_grad)
def der_bias(self, gradient): back_grad = gradient.value.mean(0).mean(1).mean(1).view( self.filters.value.size()[0]) return autoTensor(value=back_grad)
def der_pos(self, gradient): gradient = autoTensor(value=-gradient.value) return gradient
def der(self, gradient): back_grad = gradient.value * self.mask return autoTensor(value=back_grad)
def der(self, gradient): return autoTensor(self._transpose(gradient, *self.idx))
def der_pos2(self, gradient): value = torch.mm(self.tensor1.value.transpose(1, 0), gradient.value) return autoTensor(value=value)
def der_pos1(self, gradient): tensor = self.tensor1 pow_val = self.pow_val gradient = reverse_broadcast(gradient, tensor) back_grad = pow_val * gradient.value * (tensor.value**(pow_val - 1)) return autoTensor(value=back_grad)
def der_pos2(self, gradient): tensor = self.tensor2 gradient = autoTensor(gradient.value * self.tensor1.value) gradient = reverse_broadcast(gradient, tensor).value return autoTensor(value=gradient)
def der_pos1(self, gradient): back_grad = gradient.value * torch.ones(self.shape) return autoTensor(value=back_grad)
def der(self, gradient): value = self.y_pred.value - self.y_target.value return autoTensor(value=value)
def der_pos1(self, gradient): assert self.value.size() == gradient.value.size() back_grad = gradient.value * (self.value * (1 - self.value)) return autoTensor(value=back_grad)
def der(self, gradient): value = -self.y_target.value / self.y_pred.value + ( 1 - self.y_target.value) / (1 - self.y_pred.value) return autoTensor(value=value)
def der_pos1(self, gradient): assert self.value.size() == gradient.value.size() sub_grad = self.value.clone() sub_grad[sub_grad > 0] = 1 back_grad = gradient.value * sub_grad return autoTensor(value=back_grad)