def add_vec_to_cols(mat, vec, dest=None, alpha=1.0, beta=1.0): ''' Add the element in vec to every element in mat in corresponding cols The function behaves exactly like mat + vec in numpy ''' mh, mw = mat.shape vh, vw = vec.shape assert (vw == 1 and vh == mw or vh == 1 and vw == mw) if not dest: dest = mat block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = mat.strides[0] / 4 _add_vec_to_cols_(F(alpha), vec, F(beta), mat, dest, I(leading), I(mh), I(mw), block=block, grid=grid)
def relu_activate(input, output, e): mh, mw = input.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = input.strides[0] / 4 _relu_activate_(input, output, F(e), I(leading), I(mh), I(mw), block=block , grid=grid)
def build_image(array): if len(array.shape) == 4: filter_size = array.shape[1] else: filter_size = array.shape[0] num_filters = array.shape[-1] num_cols = util.divup(80, filter_size) num_rows = util.divup(num_filters, num_cols) if len(array.shape) == 4: big_pic = np.zeros((3, (filter_size + 1) * num_rows, (filter_size + 1) * num_cols)) else: big_pic = np.zeros((filter_size * num_rows, filter_size * num_cols)) for i in range(num_rows): for j in range(num_cols): idx = i * num_cols + j if idx >= num_filters: break x = i*(filter_size + 1) y = j*(filter_size + 1) if len(array.shape) == 4: big_pic[:, x:x+filter_size, y:y+filter_size] = array[:, :, :, idx] else: big_pic[x:x+filter_size, y:y+filter_size] = array[:, :, idx] if len(array.shape) == 4: return big_pic.transpose(1, 2, 0) return big_pic
def relu_compute_grad(grad, output, outGrad, e): mh, mw = grad.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = grad.strides[0] / 4 _relu_compute_grad_(grad, output, outGrad, F(e), I(leading), I(mh), I(mw), block=block, grid= grid)
def tanh_activate(input, output, a, b): mh, mw = input.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = input.strides[0] / 4 _n2b = -2.0 * b _tanh_activate_(input, output, F(a), F(_n2b), I(leading), I(mh), I(mw), block=block , grid=grid)
def tanh_compute_grad(grad, output, outGrad, a, b): mh, mw = output.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = output.strides[0] / 4 _n4ab = -4.0 * a * b _tanh_compute_grad_(grad, output, outGrad, F(a), F(_n4ab), I(leading), I(mh), I(mw), block=block , grid=grid)
def eltwise_exp(src, dest = None): if dest is None: dest = src mh, mw = src.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = src.strides[0] / 4 _eltwise_exp_(src, dest, I(mh), I(mw), I(leading), block = block, grid = grid)
def eltwise_exp(src, dest=None): if dest is None: dest = src mh, mw = src.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = src.strides[0] / 4 _eltwise_exp_(src, dest, I(mh), I(mw), I(leading), block=block, grid=grid)
def softmax_bprop(mat, label, grad): mh, mw = mat.shape vh, vw = label.shape assert((vh == 1 and vw == mw) or (vw == 1 and vh == mw)), (vh, vw, mw) block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) _softmax_bprop_(mat, label, grad, I(mat.strides[0] / 4), I(mh), I(mw), block=block, grid=grid)
def eltwise_mul(src, right, dest = None): assert src.shape == right.shape if dest is None: dest = src mh, mw = src.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = src.strides[0] / 4 _eltwise_mul_(src, right, dest, I(mh), I(mw), I(leading), block = block, grid = grid)
def gpu_partial_copy_to(x, y, row_from, row_to, col_from, col_to): mh, mw = x.shape row_to = min(row_to, mh) col_to = min(col_to, mw) r, c = row_to - row_from, col_to - col_from block = (32, 32, 1) grid = (divup(c, 32), divup(r, 32)) sleading, dleading = x.strides[0] / 4, y.strides[0] / 4 _gpu_partial_copy_to_(x, y, I(row_from), I(row_to), I(col_from), I(col_to), I(sleading), I(dleading), block=block, grid=grid)
def transpose(mat): mh, mw = mat.shape dst = gpuarray.empty((mw, mh), dtype=np.float32) block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) sleading = mat.strides[0] / 4 dleading = dst.strides[0] / 4 _transpose_(mat, dst, I(sleading), I(dleading), I(mh), I(mw), block=block, grid=grid) return dst
def bigger_than_scaler(src, scaler, dest=None): if dest is not None: assert dest.shape == src.shape else: dest = src mh, mw = src.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = src.strides[0] / 4 _bigger_than_scaler_(src, dest, F(scaler), I(mh), I(mw), I(leading), block=block , grid=grid)
def div_vec_to_cols(mat, vec, dest=None): ''' Divide the element in corresponding column of matrix by the element in the vec ''' mh, mw = mat.shape vh, vw = vec.shape if not dest: dest = mat block = (32, 32, 1) grid = (divup(mw , 32), divup(mh, 32)) leading = mat.strides[0] / 4 _div_vec_to_cols_(vec, mat, dest, I(leading), I(mh), I(mw), block=block, grid=grid)
def relu_activate(input, output, e): mh, mw = input.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = input.strides[0] / 4 _relu_activate_(input, output, F(e), I(leading), I(mh), I(mw), block=block, grid=grid)
def relu_compute_grad(grad, output, outGrad, e): mh, mw = grad.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = grad.strides[0] / 4 _relu_compute_grad_(grad, output, outGrad, F(e), I(leading), I(mh), I(mw), block=block, grid=grid)
def matrix_add(src, v, dest=None, alpha=1.0, beta=1.0): sh, sw = src.shape vh, vw = v.shape #assert sh == vh and sw == vw if sh != vh or sw != vw: assert False, '(%s, %s) + (%s, %s)' % (sh, sw, vh, vw) block = (32, 32, 1) grid = (divup(sw, 32), divup(sh, 32)) leading = src.strides[0] / 4 if dest is None: dest = src _matrix_add_(src, v, dest, F(alpha), F(beta), I(leading), I(sh), I(sw), block=block , grid=grid)
def softmax_bprop(mat, label, grad): mh, mw = mat.shape vh, vw = label.shape assert ((vh == 1 and vw == mw) or (vw == 1 and vh == mw)), (vh, vw, mw) block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) _softmax_bprop_(mat, label, grad, I(mat.strides[0] / 4), I(mh), I(mw), block=block, grid=grid)
def tanh_activate(input, output, a, b): mh, mw = input.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = input.strides[0] / 4 _n2b = -2.0 * b _tanh_activate_(input, output, F(a), F(_n2b), I(leading), I(mh), I(mw), block=block, grid=grid)
def add_vec_to_cols(mat, vec, dest=None, alpha=1.0, beta=1.0): ''' Add the element in vec to every element in mat in corresponding cols The function behaves exactly like mat + vec in numpy ''' mh, mw = mat.shape vh, vw = vec.shape assert(vw == 1 and vh == mw or vh == 1 and vw == mw) if not dest: dest = mat block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = mat.strides[0] / 4 _add_vec_to_cols_(F(alpha), vec, F(beta), mat, dest, I(leading), I(mh), I(mw), block=block, grid=grid)
def tanh_compute_grad(grad, output, outGrad, a, b): mh, mw = output.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = output.strides[0] / 4 _n4ab = -4.0 * a * b _tanh_compute_grad_(grad, output, outGrad, F(a), F(_n4ab), I(leading), I(mh), I(mw), block=block, grid=grid)
def eltwise_mul(src, right, dest=None): assert src.shape == right.shape if dest is None: dest = src mh, mw = src.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = src.strides[0] / 4 _eltwise_mul_(src, right, dest, I(mh), I(mw), I(leading), block=block, grid=grid)
def _shuffle_batches(self): # build index vector into 'images' and split into groups of batch-size image_index = np.arange(len(self.images)) np.random.shuffle(image_index) self.batches = np.array_split(image_index, util.divup(len(self.images), self.batch_size)) self.batch_range = range(len(self.batches)) np.random.shuffle(self.batch_range)
def attach(self, prev_layer): image_shape = prev_layer.get_output_shape() self.numColor, self.img_size, _, self.batch_size = image_shape self.outputSize = 1 + divup(2 * self.padding + self.img_size - self.filterSize, self.stride) self.modules = self.outputSize ** 2 weight_shape = (self.filterSize * self.filterSize * self.numColor, self.numFilter) bias_shape = (self.numFilter, 1) self._init_weights(weight_shape, bias_shape)
def _shuffle_batches(self): # build index vector into 'images' and split into groups of batch-size image_index = np.arange(len(self.images)) np.random.shuffle(image_index) self.batches = np.array_split( image_index, util.divup(len(self.images), self.batch_size)) self.batch_range = range(len(self.batches)) np.random.shuffle(self.batch_range)
def bigger_than_scaler(src, scaler, dest=None): if dest is not None: assert dest.shape == src.shape else: dest = src mh, mw = src.shape block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = src.strides[0] / 4 _bigger_than_scaler_(src, dest, F(scaler), I(mh), I(mw), I(leading), block=block, grid=grid)
def div_vec_to_cols(mat, vec, dest=None): ''' Divide the element in corresponding column of matrix by the element in the vec ''' mh, mw = mat.shape vh, vw = vec.shape if not dest: dest = mat block = (32, 32, 1) grid = (divup(mw, 32), divup(mh, 32)) leading = mat.strides[0] / 4 _div_vec_to_cols_(vec, mat, dest, I(leading), I(mh), I(mw), block=block, grid=grid)
def matrix_add(src, v, dest=None, alpha=1.0, beta=1.0): sh, sw = src.shape vh, vw = v.shape #assert sh == vh and sw == vw if sh != vh or sw != vw: assert False, '(%s, %s) + (%s, %s)' % (sh, sw, vh, vw) block = (32, 32, 1) grid = (divup(sw, 32), divup(sh, 32)) leading = src.strides[0] / 4 if dest is None: dest = src _matrix_add_(src, v, dest, F(alpha), F(beta), I(leading), I(sh), I(sw), block=block, grid=grid)
def attach(self, prev): image_shape = prev.get_output_shape() self.numColor, self.img_size, _, self.batch_size = image_shape self.outputSize = divup(self.img_size - self.poolSize - self.start, self.stride) + 1 assert self.numColor % 16 == 0,\ 'Pool layers require colors to be a multiple of 16: got %s' % self.numColor
def __init__(self, data_dir, batch_range=None, category_range=None, batch_size=1024): DataProvider.__init__(self, data_dir, batch_range) self.img_size = 256 self.border_size = 16 self.inner_size = 224 self.batch_size = batch_size # self.multiview = dp_params['multiview_test'] and test self.multiview = 0 self.num_views = 5 * 2 self.data_mult = self.num_views if self.multiview else 1 self.buffer_idx = 0 dirs = glob.glob(data_dir + '/n*') synid_to_dir = {} for d in dirs: synid_to_dir[basename(d)[1:]] = d if category_range is None: cat_dirs = dirs else: cat_dirs = [] for i in category_range: synid = self.batch_meta['label_to_synid'][i] # util.log('Using category: %d, synid: %s, label: %s', i, synid, self.batch_meta['label_names'][i]) cat_dirs.append(synid_to_dir[synid]) self.images = [] batch_dict = dict((k, k) for k in self.batch_range) for d in cat_dirs: img_files = list() img_files.extend(glob.glob(d + '/*.jpg')) img_files.extend(glob.glob(d + '/*.jpeg')) img_files.extend(glob.glob(d + '/*.JPG')) img_files.extend(glob.glob(d + '/*.JPEG')) img_files.sort() imgs = [v for i, v in enumerate(img_files) if i in batch_dict] self.images.extend(imgs) self.images = np.array(self.images) # build index vector into 'images' and split into groups of batch-size image_index = np.arange(len(self.images)) np.random.shuffle(image_index) self.batches = np.array_split(image_index, util.divup(len(self.images), batch_size)) self.batch_range = range(len(self.batches)) util.log('Starting data provider with %d batches', len(self.batches)) np.random.shuffle(self.batch_range) imagemean = cPickle.loads(open(data_dir + "image-mean.pickle").read()) self.data_mean = (imagemean['data'] .astype(np.single) .T .reshape((3, 256, 256))[:, self.border_size:self.border_size + self.inner_size, self.border_size:self.border_size + self.inner_size] .reshape((self.get_data_dims(), 1)))