# See the License for the specific language governing permissions and # limitations under the License. ############################################################################## import os, unittest, time import torch import torch.cuda from torch.autograd import Variable from torch.nn.parameter import Parameter import tensor_comprehensions as tc from tensor_comprehensions.mapping_options import Options from common import TestCase, run_tests tc.SetDebugFlags(dump_cuda=False) MATMUL_LANG = """ def matmul(float(M,N) A, float(N,K) B) -> (output) { output(i, j) +=! A(i, kk) * B(kk, j) } """ MATMUL_ABS_LANG = """ def matmul(float(M,N) A, float(N,K) B) -> (output) { output(i, j) +=! A(i, kk) * B(kk, j) } def abs(float(M, N) A) -> (O1) { O1(m, n) = fabs(A(m, n)) }
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## import tensor_comprehensions as tc import torch import unittest # enable this to dump cuda code generated whenever tc layer runs: simple run or # autotuner run tc.SetDebugFlags(dump_cuda=True) class TestDumpCuda(unittest.TestCase): def test_dump_cuda(self): LANG = """ def matmul(float(M,N) A, float(N,K) B) -> (output) { output(i, j) +=! A(i, kk) * B(kk, j) } """ matmul = tc.define(LANG, name="matmul") mat1, mat2 = torch.randn(3, 4).cuda(), torch.randn(4, 5).cuda() out = matmul(mat1, mat2)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## import os, unittest import torch import torch.cuda import tensor_comprehensions as tc tc.SetDebugFlags(dump_cuda=True, debug_tc_mapper=True) class TestDebugInit(unittest.TestCase): def test_debug_init(self): lang = """ def matmul(float(M,N) A, float(N,K) B) -> (output) { output(i, j) +=! A(i, kk) * B(kk, j) } """ matmul = tc.define(lang, name="matmul") mat1, mat2 = torch.randn(3, 4).cuda(), torch.randn(4, 5).cuda() out = matmul(mat1, mat2) if __name__ == '__main__':
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## import tensor_comprehensions as tc import torch import torch.cuda import unittest tc.SetDebugFlags(debug_tuner=False, debug_tc_mapper=False) class TestLayerNorm(unittest.TestCase): def test_layernorm(self): # NOTE: take note of use of {{ }} below for handling TC with scalars lang = """ def layernorm(float(T, B, C) I) -> (O, mean, centered, var) {{ mean(t, b) +=! I(t, b, c) / C centered(t, b, c) = I(t, b, c) - mean(t, b) var(t, b) +=! centered(t, b, c) * centered(t, b, c) var(t, b) = (var(t, b) + {eps}) / C O(t, b, c) = centered(t, b, c) / rsqrt(var(t, b)) }}