Exemplo n.º 1
0
def test_parameters_kernel_regex(profile, format):
    data = profile("""
    __global__ void addVectors(int* p) {
        int a = 5;
        *p = a;
    }
    __global__ void subtractVectors(int* p) {
        int a = 5;
        *p = a;
    }
    __global__ void generalKernel(int* p) {
        int a = 5;
        *p = a;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        addVectors<<<1, 1>>>(dptr);
        subtractVectors<<<1, 1>>>(dptr);
        generalKernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }""", format=format, kernel_regex=".*a.*ors.*", with_metadata=True)

    mappings = data["mappings"]

    assert metadata_file("addVectors") in mappings
    assert metadata_file("subtractVectors") in mappings
    assert metadata_file("generalKernel") not in mappings

    assert len(mappings[kernel_file("addVectors", format=format)]["warps"]) == 1
    assert len(mappings[kernel_file("subtractVectors", format=format)]["warps"]) == 1
    assert kernel_file("generalKernel", format=format) not in mappings
Exemplo n.º 2
0
def test_parameters_runtime_tracking_overwrite(profile):
    code = """
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc((void**) &dptr, sizeof(int) * 137); // try to find unique size
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """

    data = profile(code, with_metadata=True)
    allocations = data["mappings"][kernel_file("kernel")]["allocations"]
    alloc = allocations[0]

    data = profile(code, with_metadata=True, runtime_tracking=True)
    allocations = data["mappings"][kernel_file("kernel")]["allocations"]
    validate_allocations(allocations)

    for record in allocations:
        if record["size"] == alloc["size"]:
            assert record["nameString"] == "dptr"
            return

    assert False  # allocation was not overwritten by static tracking
Exemplo n.º 3
0
def test_allocation_global(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc((void**) &dptr, sizeof(int) * 10);
        printf("%p\\n", dptr);
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """,
                   format=format,
                   with_metadata=True)

    allocations = data["mappings"][kernel_file("kernel",
                                               format=format)]["allocations"]

    assert len(allocations) == 1
    assert allocations[0]["active"]
    assert allocations[0]["elementSize"] == 4
    assert allocations[0]["size"] == 40
    assert allocations[0]["typeString"] == "i32"
    assert allocations[0]["space"] == 0
    assert pointer_matches(allocations[0]["address"], data["stdout"].strip())
    assert allocations[0]["nameString"] == "dptr"
    assert allocations[0]["location"].endswith("input.cu:9")
Exemplo n.º 4
0
def test_allocation_shared(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel() {
        __shared__ int arr[10];
        printf("%p\\n", arr);
    }
    int main() {
        kernel<<<1, 1>>>();
        return 0;
    }
    """,
                   format=format,
                   with_metadata=True)

    allocations = data["mappings"][kernel_file("kernel",
                                               format=format)]["allocations"]

    assert len(allocations) == 1
    assert allocations[0]["active"]
    assert allocations[0]["elementSize"] == 4
    assert allocations[0]["size"] == 40
    assert "typeIndex" in allocations[0]
    assert allocations[0]["space"] == 1
    assert pointer_matches(allocations[0]["address"], data["stdout"].strip())
Exemplo n.º 5
0
def test_addrspace_shared_constant_access(profile, format):
    data = profile("""
    __constant__ int constArr[10];
    __global__ void kernel() {
        __shared__ int arr[10];
        arr[threadIdx.x] = constArr[1];
    }
    int main() {
        kernel<<<1, 1>>>();
        return 0;
    }
    """,
                   format=format)

    assert data[kernel_file("kernel", format=format)]["warps"][0]["space"] == 2
    assert data[kernel_file("kernel", format=format)]["warps"][1]["space"] == 1
Exemplo n.º 6
0
def test_trace_thread_id(profile, format):
    data = profile("""
    __global__ void kernel(int* p) {
        *p = threadIdx.x;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int) * 64);
        kernel<<<1, 2>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """,
                   format=format)
    warp = data[kernel_file("kernel", format=format)]["warps"][0]
    assert warp["blockIdx"]["x"] == 0
    assert warp["blockIdx"]["y"] == 0
    assert warp["blockIdx"]["z"] == 0

    ids = [
        "{}.{}.{}".format(a["threadIdx"]["z"], a["threadIdx"]["y"],
                          a["threadIdx"]["x"]) for a in warp["accesses"]
    ]

    assert "0.0.0" in ids
    assert "0.0.1" in ids
Exemplo n.º 7
0
def test_trace_bank_size(profile, format):
    data = profile("""
    __global__ void kernel() {
        int x = threadIdx.x;
    }
    int main() {
        cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeFourByte);
        kernel<<<1, 1>>>();
        
        cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
        kernel<<<1, 1>>>();
        return 0;
    }
    """,
                   format=format)
    assert data[kernel_file("kernel", format=format)]["bankSize"] == 4
    assert data[kernel_file("kernel", index=1, format=format)]["bankSize"] == 8
Exemplo n.º 8
0
def test_trace_multiple_time(profile, format):
    data = profile("""
    __global__ void kernel(int* p) {
        int x = *p;
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }""",
                   format=format)
    info = (data[kernel_file("kernel", 0,
                             format)], data[kernel_file("kernel", 1, format)])
    assert info[0]["start"] < info[1]["start"]
Exemplo n.º 9
0
def test_trace_time(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """,
                   format=format)
    assert data[kernel_file("kernel",
                            format=format)]["end"] > data[kernel_file(
                                "kernel", format=format)]["start"]
Exemplo n.º 10
0
def test_parameters_compression_content(profile, format):
    code = """
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """

    uncompressed = profile(code, format=format)
    compressed = profile(code, format=format, compress=True)

    assert (uncompressed[kernel_file("kernel", format=format)]["kernel"] ==
            compressed[kernel_file("kernel", format=format, compress=True)]["kernel"])
Exemplo n.º 11
0
def test_allocation_runtime_tracking_capture(profile):
    code = """
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc((void**) &dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """

    data = profile(code, with_metadata=True, runtime_tracking=True)
    allocations = data["mappings"][kernel_file("kernel")]["allocations"]
    assert len(allocations) > 1

    data = profile(code, with_metadata=True)
    allocations = data["mappings"][kernel_file("kernel")]["allocations"]
    assert len(allocations) == 1
Exemplo n.º 12
0
def test_trace_warp_size(profile, format):
    data = profile("""
    __global__ void kernel() {
        int x = threadIdx.x;
    }
    int main() {
        kernel<<<1, 1>>>();
        return 0;
    }
    """,
                   format=format)
    assert data[kernel_file("kernel", format=format)]["warpSize"] == 32
Exemplo n.º 13
0
def test_addrspace_constant_access(profile, format):
    data = profile("""
    __constant__ int arr[10];
    __global__ void kernel() {
        int x = arr[1];
    }
    int main() {
        kernel<<<1, 1>>>();
        return 0;
    }
    """,
                   format=format)
    assert data[kernel_file("kernel", format=format)]["warps"][0]["space"] == 2
Exemplo n.º 14
0
def test_trace_dimensions(profile, format):
    data = profile("""
    __global__ void kernel() {
        int x = threadIdx.x;
    }
    int main() {
        dim3 gridDim(3, 4, 5);
        dim3 blockDim(6, 7, 8);
        
        kernel<<<gridDim, blockDim>>>();
        return 0;
    }
    """,
                   format=format)
    grid = data[kernel_file("kernel", format=format)]["gridDim"]
    assert grid["x"] == 3
    assert grid["y"] == 4
    assert grid["z"] == 5

    block = data[kernel_file("kernel", format=format)]["blockDim"]
    assert block["x"] == 6
    assert block["y"] == 7
    assert block["z"] == 8
Exemplo n.º 15
0
def test_general_release_mode(profile):
    data = profile("""
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """,
                   release=True)
    assert kernel_file("kernel") in data
Exemplo n.º 16
0
def test_addrspace_global_access(profile, format):
    data = profile("""
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """,
                   format=format)
    assert data[kernel_file("kernel", format=format)]["warps"][0]["space"] == 0
Exemplo n.º 17
0
def test_parameters_disable_output(profile, format):
    code = """
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """
    data = profile(code, disable_output=True)

    assert kernel_file("kernel", format=format) not in data
Exemplo n.º 18
0
def test_trace_warp_id(profile, format):
    data = profile("""
    __global__ void kernel(int* p) {
        *p = threadIdx.x;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int) * 64);
        kernel<<<1, 64>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """,
                   format=format)
    warps = data[kernel_file("kernel", format=format)]["warps"]
    for warp in warps:
        assert len(warp["accesses"]) == 32
Exemplo n.º 19
0
def test_trace_type_and_name(profile, format):
    data = profile("""
    __global__ void kernel(int* p) {
        int x = *p;
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }""",
                   format=format)
    info = data[kernel_file("kernel", format=format)]
    assert info["type"] == "trace"
    assert info["kernel"] == "kernel"
Exemplo n.º 20
0
def test_parameters_instrument_locals_enable(profile, format):
    data = profile("""
    __global__ void kernel(int* p) {
        int a = 5;
        *p = a;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }""", format=format, instrument_locals=True)

    warps = data[kernel_file("kernel", format=format)]["warps"]

    assert len(warps) == 4
Exemplo n.º 21
0
def test_metadata_debug_index(profile, format):
    data = profile("""
    __global__ void kernel(int* p) {
        int x = *p;
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }""",
                   format=format)
    warps = data[kernel_file("kernel", format=format)]["warps"]
    assert warps[0]["debugId"] == 0
    assert warps[1]["debugId"] == 1
Exemplo n.º 22
0
def test_trace_multiple_invocations(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """,
                   format=format)
    for i in xrange(2):
        assert kernel_file("kernel", i, format) in data
Exemplo n.º 23
0
def test_access_type(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel(int* p) {
        *p = 5;
        int a = *p;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """, format=format, with_metadata=True)
    warps = data["mappings"][kernel_file("kernel", format=format)]["warps"]

    assert warps[0]["kind"] == AccessType.Write
    assert warps[1]["kind"] == AccessType.Read
Exemplo n.º 24
0
def test_metadata_type_index_shared_variable(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel() {
        __shared__ float arr;
        arr = threadIdx.x;
    }
    int main() {
        kernel<<<1, 1>>>();
        return 0;
    }
    """,
                   format=format)

    types = data[metadata_file("kernel")]["typeMap"]
    assert len(types) > 0

    warp = data[kernel_file("kernel", format=format)]["warps"][0]
    assert warp["typeIndex"] == types.index("float")
Exemplo n.º 25
0
def test_metadata_name_index_shared_buffer(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel() {
        __shared__ float arr[10];
        arr[threadIdx.x] = threadIdx.x;
    }
    int main() {
        kernel<<<1, 1>>>();
        return 0;
    }
    """,
                   format=format)

    names = data[metadata_file("kernel")]["nameMap"]
    assert len(names) > 0

    allocations = data[kernel_file("kernel", format=format)]["allocations"][0]
    assert allocations["nameIndex"] == names.index("arr")
Exemplo n.º 26
0
def test_metadata_type_index(profile, format):
    data = profile("""
    __global__ void kernel(int* p) {
        int x = *p;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }""",
                   format=format)

    types = data[metadata_file("kernel")]["typeMap"]
    assert len(types) > 0

    warp = data[kernel_file("kernel", format=format)]["warps"][0]
    assert warp["typeIndex"] == types.index("i32")
Exemplo n.º 27
0
def test_access_local(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel(float* p) {
        int a = 5;
        int b = a;
        *p = 5;
    }
    int main() {
        float* dptr;
        cudaMalloc(&dptr, sizeof(float));
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """, format=format, with_metadata=True)
    accesses = data["mappings"][kernel_file("kernel", format=format)]["warps"][0]["accesses"]

    assert len(accesses) == 1
Exemplo n.º 28
0
def test_access_value_float(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel(float* p) {
        p[threadIdx.x] = *p;
    }
    int main() {
        float data = 1337;
        float* dptr;
        cudaMalloc(&dptr, sizeof(data));
        cudaMemcpy(dptr, &data, sizeof(data), cudaMemcpyHostToDevice);
        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """, format=format, with_metadata=True)
    warps = data["mappings"][kernel_file("kernel", format=format)]["warps"]

    assert warps[0]["accesses"][0]["value"] == "0x0000000000000539"
    assert warps[1]["accesses"][0]["value"] == "0x0000000000000539"
Exemplo n.º 29
0
def test_access_address_match(profile, format):
    data = profile("""
    #include <iostream>
    #include <iomanip>
    __global__ void kernel(int* p) {
        *p = 5;
    }
    int main() {
        int* dptr;
        cudaMalloc(&dptr, sizeof(int));

        size_t value = reinterpret_cast<size_t>(dptr);
        std::cout << std::uppercase << std::hex << std::setfill('0') << "0x" << std::setw(16) << value << std::endl;

        kernel<<<1, 1>>>(dptr);
        cudaFree(dptr);
        return 0;
    }
    """, format=format, with_metadata=True)
    assert data["stdout"].strip() == data["mappings"][kernel_file("kernel",
                                                                  format=format)]["warps"][0]["accesses"][0]["address"]
Exemplo n.º 30
0
def test_access_complex0(profile, format):
    data = profile("""
    #include <cstdio>
    __global__ void kernel(float* a, float* b, float* c) {
        int tid = threadIdx.x;
        a[tid] = b[tid] + c[tid];
    }
    int main() {
        float* dptr;
        cudaMalloc(&dptr, sizeof(float));
        kernel<<<1, 1>>>(dptr, dptr, dptr);
        cudaFree(dptr);
        return 0;
    }
    """, format=format, with_metadata=True)
    warps = data["mappings"][kernel_file("kernel", format=format)]["warps"]

    assert warps[0]["kind"] == AccessType.Read
    assert warps[1]["kind"] == AccessType.Read
    assert warps[2]["kind"] == AccessType.Write

    assert warps[0]["size"] == 4
    assert warps[1]["size"] == 4
    assert warps[2]["size"] == 4