def backward(self, y_pred, y_train, cache): X, h1_cache, h2_cache, score_cache, nl_cache1, nl_cache2, u1, u2, bn1_cache, bn2_cache = cache # Output layer grad_y = self.dloss_funs[self.loss](y_pred, y_train) # Third layer dh2, dW3, db3 = l.fc_backward(grad_y, score_cache) dW3 += reg.dl2_reg(self.model['W3'], self.lam) dh2 = self.backward_nonlin(dh2, nl_cache2) dh2 = l.dropout_backward(dh2, u2) dh2, dgamma2, dbeta2 = l.bn_backward(dh2, bn2_cache) # Second layer dh1, dW2, db2 = l.fc_backward(dh2, h2_cache) dW2 += reg.dl2_reg(self.model['W2'], self.lam) dh1 = self.backward_nonlin(dh1, nl_cache1) dh1 = l.dropout_backward(dh1, u1) dh1, dgamma1, dbeta1 = l.bn_backward(dh1, bn1_cache) # First layer _, dW1, db1 = l.fc_backward(dh1, h1_cache) dW1 += reg.dl2_reg(self.model['W1'], self.lam) grad = dict( W1=dW1, W2=dW2, W3=dW3, b1=db1, b2=db2, b3=db3, gamma1=dgamma1, gamma2=dgamma2, beta1=dbeta1, beta2=dbeta2 ) return grad
def backward(self, y_pred, y_train, cache): (X, h1_cache, h2_cache, h4_cache, h5_cache, score_cache, hpool1_cache, hpool1, hpool2_cache, hpool2, nl_cache1, nl_cache2, nl_cache4, nl_cache5, bn4_cache,bn5_cache ) = cache '''Output layer''' grad_y = self.dloss_funs[self.loss](y_pred, y_train) dh5, dW6, db6 = l.fc_backward(grad_y, score_cache) '''FC-2''' dh5 = self.backward_nonlin(dh5, nl_cache5) dh5, dgamma5, dbeta5 = l.bn_backward(dh5, bn5_cache) dh4, dW5, db5 = l.fc_backward(dh5, h5_cache) '''FC -1''' dh4 = self.backward_nonlin(dh4, nl_cache4) dh4, dgamma4, dbeta4 = l.bn_backward(dh4,bn4_cache) dhpool3_, dW4, db4 = l.fc_backward(dh4, h4_cache) '''reshape''' dhpool3 = dhpool3_.ravel().reshape(hpool2.shape) '''Pool -2''' dpool2 = l.maxpool_backward(dhpool3, hpool2_cache) '''Conv -2''' dh2 = self.backward_nonlin(dpool2, nl_cache2) dh1, dW2, db2 = l.conv_backward(dh2, h2_cache) '''pool -1''' dpool1 = l.maxpool_backward(dh1, hpool1_cache) '''conv -1''' dh1 = self.backward_nonlin(dpool1, nl_cache1) dX, dW1, db1 = l.conv_backward(dh1, h1_cache) grad = dict(W1=dW1, W2=dW2, W4=dW4, W5=dW5, W6=dW6, b1=db1, b2=db2, b4=db4, b5=db5, b6=db6, gamma4=dgamma4,gamma5=dgamma5, beta4=dbeta4,beta5=dbeta5 ) return grad