def forward(self): N = self.num_output K = np.prod(self.bottom.shape[1:]) M = self.bottom.shape[0] sgemm(False, True, 1.0, self.bottom, 0, K, self.weights, 0, K, 0.0, self.top, 0, N, M, N, K) sgemm(False, False, 1.0, self.bias_multiplier, 0, 1, self.bias, 0, N, 1.0, self.top, 0, N, M, N, 1)
def backward(self): N = self.num_output K = np.prod(self.bottom.shape[1:]) M = self.bottom.shape[0] sgemm(True, False, 1.0, self.top_diff, 0, N, self.bottom, 0, K, 0.0, self.weights_diff, 0, K, N, K, M) sgemv(True, M, N, 1.0, self.top_diff, 0, N, self.bias_multiplier, 0, 1, 0.0, self.bias_diff, 0, 1) sgemm(False, False, 1.0, self.top_diff, 0, N, self.weights, 0, K, 0.0, self.bottom_diff, 0, K, M, K, N)
def launch(self, symbol_table, wait_for): bottom = symbol_table[sources[0].name] weights = symbol_table[sources[1].name] bias = symbol_table[sources[2].name] top = symbol_table[sinks[0].name] evt = sgemm(False, True, 1.0, bottom, 0, K, weights, 0, K, 0.0, top, 0, N, M, N, K, wait_for=wait_for) evt = sgemm(False, False, 1.0, bias_multiplier, 0, 1, bias, 0, N, 1.0, top, 0, N, M, N, 1, wait_for=evt) return [evt]
def launch(self, symbol_table, wait_for): queue = queues[0] bottom = symbol_table[sources[0]] bot_offset = np.prod(bottom.shape[1:]) top_diff = symbol_table[sources[1]] top_offset = np.prod(top_diff.shape[1:]) weights = symbol_table[sources[2]] bottom_diff = symbol_table[sinks[0]] bottom_diff.fill(0) bottom_diff.sync_ocl() weights_diff = symbol_table[sinks[1]] weights_diff.fill(0) weights_diff.sync_ocl() bias_diff = symbol_table[sinks[2]] bias_diff.fill(0) bias_diff.sync_ocl() for i in range(bottom.shape[0]): n = np.prod(top_diff.shape[2:]) sgemv(False, top_diff.shape[1], n, 1.0, top_diff, i * top_offset, n, bias_multiplier, 0, 1, 1.0, bias_diff, 0, 1) im2col(bottom.ocl_buf, col_data.ocl_buf, i * bot_offset).on(queue, im2col_global_size) m = top_diff.shape[1] n = col_data.shape[0] k = col_data.shape[1] sgemm(False, True, 1.0, top_diff, i * top_offset, k, col_data, 0, k, 1.0, weights_diff, 0, n, m, n, k) m = weights.shape[1] n = col_data.shape[1] k = weights.shape[0] sgemm(True, False, 1.0, weights, 0, m, top_diff, i * top_offset, n, 0.0, col_data, 0, n, m, n, k) col2im(col_data.ocl_buf, bottom_diff.ocl_buf, i * bot_offset).on(queue, col2im_global_size)
def launch(self, symbol_table, wait_for): bottom = symbol_table[sources[0].name] bot_offset = np.prod(bottom.shape[1:]) weights = symbol_table[sources[1].name] bias = symbol_table[sources[2].name] top = symbol_table[sinks[0].name] top_offset = np.prod(top.shape[1:]) m = weights.shape[0] n = np.prod(top.shape[2:]) k = np.prod(weights.shape[1:]) # cl.clFinish(queues[0]) evts = [] if is_1x1: for i in range(bottom.shape[0]): evt = sgemm(False, False, 1.0, weights, 0, k, bottom, i * bot_offset, n, 0.0, top, i * top_offset, n, m, n, k, queues[i % len(queues)], wait_for=wait_for) evt = sgemm(False, False, 1.0, bias, 0, 1, bias_multiplier, 0, n, 1.0, top, i * top_offset, n, m, n, 1, queues[i % len(queues)], wait_for=evt) evts.append(evt) else: for i in range(bottom.shape[0]): evt = im2col(bottom.ocl_buf, col_datas[i % len(queues)].ocl_buf, i * bot_offset ).on(queues[i % len(queues)], (padded, ), wait_for=wait_for) evt = sgemm(False, False, 1.0, weights, 0, k, col_datas[i % len(queues)], 0, n, 0.0, top, i * top_offset, n, m, n, k, queues[i % len(queues)], wait_for=evt) evt = sgemm(False, False, 1.0, bias, 0, 1, bias_multiplier, 0, n, 1.0, top, i * top_offset, n, m, n, 1, queues[i % len(queues)], wait_for=evt) evts.append(evt) return evts
def forward(data): global fc6, fc7, fc8 conv1 = ConvForward(data, conv1_filters, conv1_bias, kernel_size=(11, 11), padding=(0, 0), stride=(4, 4)) conv1 = ReluForward(conv1) norm1, norm1_scale = LrnForward(conv1, alpha=alpha, beta=beta, local_size=local_size, k=1) pool1, pool1_mask = PoolForward(norm1, kernel_size=(3, 3), padding=(0, 0), stride=(2, 2)) conv2 = ConvForward(pool1, conv2_filters, conv2_bias, kernel_size=(5, 5), padding=(2, 2), stride=(1, 1)) conv2 = ReluForward(conv2) norm2, norm2_scale = LrnForward(conv2, alpha=alpha, beta=beta, local_size=local_size, k=1) pool2, pool2_mask = PoolForward(norm2, kernel_size=(3, 3), padding=(0, 0), stride=(2, 2)) conv3 = ConvForward(pool2, conv3_filters, conv3_bias, kernel_size=(3, 3), padding=(1, 1), stride=(1, 1)) conv3 = ReluForward(conv3) conv4 = ConvForward(conv3, conv4_filters, conv4_bias, kernel_size=(3, 3), padding=(1, 1), stride=(1, 1)) conv4 = ReluForward(conv4) conv5 = ConvForward(conv4, conv5_filters, conv5_bias, kernel_size=(3, 3), padding=(1, 1), stride=(1, 1)) conv5 = ReluForward(conv5) pool5, pool5_mask = PoolForward(conv5, kernel_size=(3, 3), padding=(0, 0), stride=(2, 2)) N = fc6.shape[1] K = np.prod(pool5.shape[1:]) M = pool5.shape[0] sgemm(False, True, 1.0, pool5, 0, K, fc6_filters, 0, K, 0.0, fc6, 0, N, M, N, K) sgemm(False, False, 1.0, fc6_bias_multiplier, 0, 1, fc6_bias, 0, N, 1.0, fc6, 0, N, M, N, 1) fc6 = ReluForward(fc6) N = fc7.shape[1] K = np.prod(fc6.shape[1:]) M = fc6.shape[0] sgemm(False, True, 1.0, fc6, 0, K, fc7_filters, 0, K, 0.0, fc7, 0, N, M, N, K) sgemm(False, False, 1.0, fc7_bias_multiplier, 0, 1, fc7_bias, 0, N, 1.0, fc7, 0, N, M, N, 1) fc7 = ReluForward(fc7) N = fc8.shape[1] K = np.prod(fc7.shape[1:]) M = fc7.shape[0] sgemm(False, True, 1.0, fc7, 0, K, fc8_filters, 0, K, 0.0, fc8, 0, N, M, N, K) sgemm(False, False, 1.0, fc8_bias_multiplier, 0, 1, fc8_bias, 0, N, 1.0, fc8, 0, N, M, N, 1) prob = SoftmaxForward(fc8) return prob
def launch(self, symbol_table, wait_for): bottom = symbol_table[sources[0].name] bot_offset = np.prod(bottom.shape[1:]) weights = symbol_table[sources[1].name] bias = symbol_table[sources[2].name] top = symbol_table[sinks[0].name] top_offset = np.prod(top.shape[1:]) m = weights.shape[0] n = np.prod(top.shape[2:]) k = np.prod(weights.shape[1:]) # cl.clFinish(queues[0]) evts = [] if is_1x1: for i in range(bottom.shape[0]): evt = sgemm(False, False, 1.0, weights, 0, k, bottom, i * bot_offset, n, 0.0, top, i * top_offset, n, m, n, k, queues[i % len(queues)], wait_for=wait_for) evt = sgemm(False, False, 1.0, bias, 0, 1, bias_multiplier, 0, n, 1.0, top, i * top_offset, n, m, n, 1, queues[i % len(queues)], wait_for=evt) evts.append(evt) else: for i in range(bottom.shape[0]): evt = im2col(bottom.ocl_buf, col_datas[i % len(queues)].ocl_buf, i * bot_offset).on( queues[i % len(queues)], (padded, ), wait_for=wait_for) evt = sgemm(False, False, 1.0, weights, 0, k, col_datas[i % len(queues)], 0, n, 0.0, top, i * top_offset, n, m, n, k, queues[i % len(queues)], wait_for=evt) evt = sgemm(False, False, 1.0, bias, 0, 1, bias_multiplier, 0, n, 1.0, top, i * top_offset, n, m, n, 1, queues[i % len(queues)], wait_for=evt) evts.append(evt) return evts