def AccumulateConvDeriv(self, edge, deriv): """Accumulate the derivative w.r.t the outputs of this layer. Each layer needs to compute derivatives w.r.t its outputs. These outputs may have been connected to lots of other nodes through outgoing edges. This method adds up the derivatives contributed by each outgoing edge. It gets derivatives w.r.t the inputs at the other end of an outgoing edge. Args: edge: The edge which is sending the derivative. deriv: The derivative w.r.t the inputs at the other end of this edge. """ if self.dirty: # If some derivatives have already been received. raise Exception('Not implemented.') self.dirty = True w = edge.params['weight'] conv = edge.conv_params size = conv.size stride = conv.stride padding = conv.padding num_filters = conv.num_filters num_colors = conv.num_colors f, numdims = w.shape assert f == num_filters, 'f is %d but num_filters is %d' % ( f, num_filters) assert numdims == size**2 * num_colors input_t = edge.input_t numimages, numdims = input_t.shape assert numdims % num_colors == 0 x = int(np.sqrt(numdims / num_colors)) assert x**2 == numdims / num_colors n_locs = (x + 2 * padding - size) / stride + 1 if conv.max_pool: deriv.transpose(edge.output_t2) n_pool_locs = (n_locs + 2 * padding - conv.pool_size) / conv.pool_stride + 1 cc.MaxPoolUndo(edge.unpooled_layer, edge.unpooled_layer, edge.output_t2, edge.output_t, conv.pool_size, 0, conv.pool_stride, n_pool_locs) else: deriv.transpose(edge.output_t) if self.is_input: return if conv.max_pool: output_t = edge.unpooled_layer else: output_t = edge.output_t cc.convDown(output_t, w, input_t, n_locs, stride, size, x, num_colors) input_t.transpose(self.deriv)
def AccumulateConvDeriv(self, edge, deriv): """Accumulate the derivative w.r.t the outputs of this layer. Each layer needs to compute derivatives w.r.t its outputs. These outputs may have been connected to lots of other nodes through outgoing edges. This method adds up the derivatives contributed by each outgoing edge. It gets derivatives w.r.t the inputs at the other end of an outgoing edge. Args: edge: The edge which is sending the derivative. deriv: The derivative w.r.t the inputs at the other end of this edge. """ if self.dirty: # If some derivatives have already been received. raise Exception('Not implemented.') self.dirty = True w = edge.params['weight'] conv = edge.conv_params size = conv.size stride = conv.stride padding = conv.padding num_filters = conv.num_filters num_colors = conv.num_colors f, numdims = w.shape assert f == num_filters, 'f is %d but num_filters is %d' % (f, num_filters) assert numdims == size**2 * num_colors input_t = edge.input_t numimages, numdims = input_t.shape assert numdims % num_colors == 0 x = int(np.sqrt(numdims / num_colors)) assert x**2 == numdims/num_colors n_locs = (x + 2 * padding - size) / stride + 1 if conv.max_pool: deriv.transpose(edge.output_t2) n_pool_locs = (n_locs + 2 * padding - conv.pool_size) / conv.pool_stride + 1 cc.MaxPoolUndo(edge.unpooled_layer, edge.unpooled_layer, edge.output_t2, edge.output_t, conv.pool_size, 0, conv.pool_stride, n_pool_locs) else: deriv.transpose(edge.output_t) if self.is_input: return if conv.max_pool: output_t = edge.unpooled_layer else: output_t = edge.output_t cc.convDown(output_t, w, input_t, n_locs, stride, size, x, num_colors) input_t.transpose(self.deriv)
def AccumulateConvDeriv(self, edge, deriv): """Accumulate the derivative w.r.t the outputs of this layer. Each layer needs to compute derivatives w.r.t its outputs. These outputs may have been connected to lots of other nodes through outgoing edges. This method adds up the derivatives contributed by each outgoing edge. It gets derivatives w.r.t the inputs at the other end of an outgoing edge. Args: edge: The edge which is sending the derivative. deriv: The derivative w.r.t the inputs at the other end of this edge. """ if self.dirty: # If some derivatives have already been received. raise Exception("Not implemented.") self.dirty = True w = edge.params["weight"] conv = edge.conv_params size = conv.size stride = conv.stride padding = conv.padding num_filters = conv.num_filters num_colors = conv.num_colors f, numdims = w.shape assert f == num_filters, "f is %d but num_filters is %d" % (f, num_filters) if edge.conv: assert numdims == size ** 2 * num_colors input_t = edge.input_t numImages, numdims = input_t.shape assert numdims % num_colors == 0 x = int(np.sqrt(numdims / num_colors)) assert x ** 2 == numdims / num_colors n_locs = (x + 2 * padding - size) / stride + 1 # pdb.set_trace() # Incoming gradient. deriv.transpose(edge.output_t2) input_grads = edge.output_t2 # Output activation (after conv + pool? + norm?) output_acts = edge.output_t if conv.rnorm: # ResponseNormUndo overwrites input_acts, so make a copy. input_acts = edge.rnorm_temp1 input_acts.assign(edge.unrnormalized_layer) output_grads = edge.rnorm_temp2 denoms = edge.denoms sizeX = conv.norm_size pow_scale = conv.pow_scale add_scale = conv.add_scale cc.ResponseNormUndo( input_grads, denoms, output_acts, input_acts, output_grads, num_filters, sizeX, add_scale, pow_scale ) input_grads = output_grads output_acts = edge.unrnormalized_layer if conv.max_pool: input_acts = edge.unpooled_layer output_grads = edge.unpooled_layer # It's OK to overwrite input_acts because we don't need it later. n_pool_locs = (n_locs - conv.pool_size) / conv.pool_stride + 1 sizeX = conv.pool_size strideX = conv.pool_stride cc.MaxPoolUndo(output_grads, input_acts, input_grads, output_acts, sizeX, 0, strideX, n_pool_locs) input_grads = output_grads output_acts = input_acts # pdb.set_trace() if self.is_input: return output_grads = edge.input_t2 if edge.conv: cc.convDown(input_grads, w, output_grads, n_locs, padding, stride, size, x, num_colors) else: cc.localDown(input_grads, w, output_grads, n_locs, padding, stride, size, x, num_colors) output_grads.transpose(self.deriv)
def AccumulateConvDeriv(layer, edge, deriv): """Accumulate the derivative w.r.t the outputs of this layer. Each layer needs to compute derivatives w.r.t its outputs. These outputs may have been connected to lots of other nodes through outgoing edges. This method adds up the derivatives contributed by each outgoing edge. It gets derivatives w.r.t the inputs at the other end of an outgoing edge. Args: edge: The edge which is sending the derivative. deriv: The derivative w.r.t the inputs at the other end of this edge. """ if layer.dirty: # If some derivatives have already been received. raise Exception('Not implemented.') layer.dirty = True w = edge.params['weight'] conv = edge.conv_params size = conv.size stride = conv.stride padding = conv.padding num_filters = conv.num_filters num_colors = conv.num_colors input_t = edge.input_t numImages, numdims = input_t.shape assert numdims % num_colors == 0 x = int(math.sqrt(numdims / num_colors)) assert x**2 == numdims/num_colors n_locs = (x + 2 * padding - size) / stride + 1 # Incoming gradient. deriv.transpose(edge.output_t2) input_grads = edge.output_t2 # Output activation (after conv + pool? + norm?) output_acts = edge.output_t if conv.rnorm: # ResponseNormUndo overwrites input_acts, so make a copy. input_acts = edge.rnorm_temp1 input_acts.assign(edge.unrnormalized_layer) output_grads = edge.rnorm_temp2 denoms = edge.denoms sizeX = conv.norm_size pow_scale = conv.pow_scale add_scale = conv.add_scale cc.ResponseNormUndo(input_grads, denoms, output_acts, input_acts, output_grads, num_filters, sizeX, add_scale, pow_scale) input_grads = output_grads output_acts = edge.unrnormalized_layer if conv.max_pool: input_acts = edge.unpooled_layer output_grads = edge.unpooled_layer # It's OK to overwrite input_acts because we don't need it later. n_pool_locs = (n_locs - conv.pool_size) / conv.pool_stride + 1 sizeX = conv.pool_size strideX = conv.pool_stride cc.MaxPoolUndo(output_grads, input_acts, input_grads, output_acts, sizeX, 0, strideX, n_pool_locs) input_grads = output_grads output_acts = input_acts if layer.is_input: return output_grads = edge.input_t2 if edge.conv: cc.convDown(input_grads, w, output_grads, n_locs, padding, stride, size, x, num_colors) else: cc.localDown(input_grads, w, output_grads, n_locs, padding, stride, size, x, num_colors) output_grads.transpose(layer.deriv)