示例#1
0
def writeBenchmarkFiles(stepBaseDir, solutions, problemSizes, stepName,
                        filesToCopy):
    if not globalParameters["MergeFiles"]:
        ensurePath(os.path.join(globalParameters["WorkingPath"], "Solutions"))
        ensurePath(os.path.join(globalParameters["WorkingPath"], "Kernels"))

    ##############################################################################
    # Min Naming
    ##############################################################################
    kernels = []
    kernelsBetaOnly = []
    for solution in solutions:
        solutionKernels = solution.getKernels()
        for kernel in solutionKernels:
            if kernel not in kernels:
                kernels.append(kernel)
        solutionKernelsBetaOnly = solution.getKernelsBetaOnly()
        for kernel in solutionKernelsBetaOnly:
            if kernel not in kernelsBetaOnly:
                kernelsBetaOnly.append(kernel)

    solutionSerialNaming = Solution.getSerialNaming(solutions)
    kernelSerialNaming = Solution.getSerialNaming(kernels)
    solutionMinNaming = Solution.getMinNaming(solutions)
    kernelMinNaming = Solution.getMinNaming(kernels)
    solutionWriter = SolutionWriter( \
        solutionMinNaming, solutionSerialNaming, \
        kernelMinNaming, kernelSerialNaming)
    kernelWriterSource = KernelWriterSource( \
        kernelMinNaming, kernelSerialNaming)
    kernelWriterAssembly = KernelWriterAssembly( \
        kernelMinNaming, kernelSerialNaming)

    # write solution, kernels and CMake
    problemType = solutions[0]["ProblemType"]
    writeSolutionsAndKernels( \
        globalParameters["WorkingPath"], [problemType], solutions, kernels, kernelsBetaOnly, \
        solutionWriter, kernelWriterSource, kernelWriterAssembly )

    ##############################################################################
    # Write CMake
    ##############################################################################

    clientName = "TensileBenchmark_%s" % stepName
    writeCMake(globalParameters["WorkingPath"], solutions, kernels, filesToCopy, \
        clientName)

    forBenchmark = True
    writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
        filesToCopy, stepBaseDir)
def writeCMake(outputPath, solutions, kernels, libraryStaticFiles, clientName):
    print1("# Writing Custom CMake")
    ##############################################################################
    # Min Naming
    ##############################################################################
    if globalParameters["ShortNames"] and not globalParameters["MergeFiles"]:
        solutionSerialNaming = Solution.getSerialNaming(solutions)
        kernelSerialNaming = Solution.getSerialNaming(kernels)
    else:
        solutionSerialNaming = None
        kernelSerialNaming = None
    solutionMinNaming = Solution.getMinNaming(solutions)
    kernelMinNaming = Solution.getMinNaming(kernels)
    solutionWriter = SolutionWriter( \
        solutionMinNaming, solutionSerialNaming, \
        kernelMinNaming, kernelSerialNaming)
    kernelWriterSource = KernelWriterSource( \
        kernelMinNaming, kernelSerialNaming)
    kernelWriterAssembly = KernelWriterAssembly( \
        kernelMinNaming, kernelSerialNaming)

    generatedFile = open(os.path.join(outputPath, "Generated.cmake"), "w")
    generatedFile.write(CMakeHeader)
    generatedFile.write("set( TensileClient_SOLUTIONS\n")

    # write solution names
    if globalParameters["MergeFiles"]:
        generatedFile.write("  ${CMAKE_SOURCE_DIR}/Solutions.h\n")
        generatedFile.write("  ${CMAKE_SOURCE_DIR}/Solutions.cpp\n")
    else:
        for solution in solutions:
            solutionName = solutionWriter.getSolutionName(solution)
            generatedFile.write("  ${CMAKE_SOURCE_DIR}/Solutions/%s.h\n" \
                % (solutionName) )
            generatedFile.write("  ${CMAKE_SOURCE_DIR}/Solutions/%s.cpp\n" \
                % (solutionName) )
    generatedFile.write("  )\n")

    # write kernel names
    generatedFile.write("set( TensileClient_KERNELS\n")
    if globalParameters["MergeFiles"]:
        generatedFile.write("  ${CMAKE_SOURCE_DIR}/Kernels.h\n")
        generatedFile.write("  ${CMAKE_SOURCE_DIR}/Kernels.cpp\n")
    else:
        for kernel in kernels:
            kernelName = kernelWriterSource.getKernelName(kernel) if kernel[
                "KernelLanguage"] == "Source" else kernelWriterAssembly.getKernelName(
                    kernel)
            generatedFile.write("  ${CMAKE_SOURCE_DIR}/Kernels/%s.h\n" %
                                (kernelName))
            generatedFile.write("  ${CMAKE_SOURCE_DIR}/Kernels/%s.cpp\n" %
                                kernelName)
    generatedFile.write("  )\n")

    generatedFile.write("set( TensileClient_SOURCE\n")
    for fileName in libraryStaticFiles:
        # copy file
        shutil_copy( os.path.join(globalParameters["SourcePath"], fileName), \
            outputPath )
        # add file to cmake
        generatedFile.write("  ${CMAKE_SOURCE_DIR}/%s\n" % fileName)
    generatedFile.write("  )\n\n")

    # close generated cmake
    generatedFile.close()
示例#3
0
def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \
    functionList):
    h = ""

    ##############################################################################
    # Min Naming
    ##############################################################################
    if forBenchmark:
        kernels = []
        for solution in solutions:
            solutionKernels = solution.getKernels()
            for kernel in solutionKernels:
                if kernel not in kernels:
                    kernels.append(kernel)

        solutionSerialNaming = Solution.getSerialNaming(solutions)
        kernelSerialNaming = Solution.getSerialNaming(kernels)
        solutionMinNaming = Solution.getMinNaming(solutions)
        kernelMinNaming = Solution.getMinNaming(kernels)
        solutionWriter = SolutionWriter( \
            solutionMinNaming, solutionSerialNaming, \
            kernelMinNaming, kernelSerialNaming)

    if forBenchmark:
        if globalParameters["MergeFiles"]:
            h += "#include \"Solutions.h\"\n"
        else:
            for solution in solutions:
                solutionName = solutionWriter.getSolutionName(solution)
                h += "#include \"" + solutionName + ".h\"\n"
        h += "\n"
    else:
        h += "#include \"Tensile.h\"\n"

    h += "typedef enum {\n"
    h += "    enum_float,\n"
    h += "    enum_double,\n"
    h += "    enum_TensileComplexFloat,\n"
    h += "    enum_TensileComplexDouble\n"
    h += "#ifdef Tensile_ENABLE_HALF\n"
    h += "    ,enum_TensileHalf\n"
    h += "#endif\n"
    h += "} DataTypeEnum;\n"
    h += "\n"

    h += "// Debug Params\n"
    h += "const bool printTensorA=%s;\n" % toCppBool(
        globalParameters["PrintTensorA"])
    h += "const bool printTensorB=%s;\n" % toCppBool(
        globalParameters["PrintTensorB"])
    h += "const bool printTensorC=%s;\n" % toCppBool(
        globalParameters["PrintTensorC"])

    h += "const bool printWinnersOnly=%s;\n" % toCppBool(
        globalParameters["PrintWinnersOnly"])
    h += "\n"

    h += "const char indexChars[%u] = \"%s" \
        % (len(globalParameters["IndexChars"])+1, \
        globalParameters["IndexChars"][0])
    for i in range(1, len(globalParameters["IndexChars"])):
        h += globalParameters["IndexChars"][i]
    h += "\";\n"

    h += "unsigned int functionIdx;\n"
    h += "unsigned int dataTypeIdx;\n"
    h += "unsigned int problemTypeIdx;\n"
    h += "\n"

    ##############################################################################
    # Problem Types
    ##############################################################################
    #dataTypes = []
    #problemTypes = []
    #functionSerialToDataTypeAndIdx = []
    dataTypes = []
    problemTypes = []
    problemTypesForDataType = {}  # for data type
    schedulesForProblemType = {}  # for problem type
    functionInfo = [
    ]  # dataTypeIdx, problemTypeIdx, idxWithinDataType, idxWithinProblemType

    if forBenchmark:
        problemType = solutions[0]["ProblemType"]
        dataType = problemType["DataType"]
        dataTypes.append(dataType)
        problemTypes.append(problemType)
        problemTypesForDataType[dataType] = [problemType]
        schedulesForProblemType[problemType] = solutions
        numProblemTypes = 1
        for solution in solutions:
            functionInfo.append([0, 0, 0, 0, 0, 0])
    else:
        for functionIdx in range(0, len(functionList)):
            function = functionList[functionIdx]
            scheduleName = function[0]
            problemType = function[1]
            dataType = problemType["DataType"]
            if dataType not in dataTypes:
                dataTypes.append(dataType)
                problemTypesForDataType[dataType] = []
            if problemType not in problemTypesForDataType[dataType]:
                problemTypesForDataType[dataType].append(problemType)
                schedulesForProblemType[problemType] = []
            schedulesForProblemType[problemType].append(scheduleName)

        # sort
        dataTypes = sorted(dataTypes)
        for dataType in dataTypes:
            problemTypesForDataType[dataType] = \
                sorted(problemTypesForDataType[dataType])
            for problemType in problemTypesForDataType[dataType]:
                schedulesForProblemType[problemType] = \
                    sorted(schedulesForProblemType[problemType])

        # assign info
        functionIdxSerial = 0
        problemTypeIdxSerial = 0
        for dataTypeIdxSerial in range(0, len(dataTypes)):
            dataType = dataTypes[dataTypeIdxSerial]
            functionIdxForDataType = 0
            for problemTypeIdxForDataType in range(0, \
                len(problemTypesForDataType[dataType])):
                problemType = \
                    problemTypesForDataType[dataType][problemTypeIdxForDataType]
                problemTypes.append(problemType)
                functionIdxForProblemType = 0
                for functionIdxForProblemType in range(0, \
                    len(schedulesForProblemType[problemType])):
                    functionInfo.append([ \
                        dataTypeIdxSerial, \
                        problemTypeIdxForDataType, \
                        problemTypeIdxSerial, \
                        functionIdxSerial,\
                        functionIdxForDataType,\
                        functionIdxForProblemType, \
                        ])
                    functionIdxForProblemType += 1
                    functionIdxForDataType += 1
                    functionIdxSerial += 1
                problemTypeIdxSerial += 1
        numProblemTypes = problemTypeIdxSerial
        numFunctions = functionIdxSerial
        h += "const unsigned int numFunctions = %u;\n" % numFunctions

    ##############################################################################
    # Data Types
    ##############################################################################
    h += "/* data types */\n"
    numDataTypes = len(dataTypes)
    h += "const unsigned int numDataTypes = %u;\n" % numDataTypes
    h += "const DataTypeEnum dataTypeEnums[numDataTypes] = { enum_%s" \
        % dataTypes[0].toCpp()
    for dataTypeIdx in range(1, numDataTypes):
        h += ", enum_%s" % dataTypes[dataTypeIdx].toCpp()
    h += " };\n"
    # bytes per elements
    h += "const unsigned int bytesPerElement[numDataTypes] = { %u" \
        % (dataTypes[0].numBytes())
    for dataTypeIdx in range(1, numDataTypes):
        dataType = dataTypes[dataTypeIdx]
        h += ", %u" % dataType.numBytes()
    h += " };\n"
    # flops per mac
    h += "const unsigned int numFlopsPerMac[numDataTypes] = { %u" \
        % (2 if dataTypes[0].isReal() else 8)
    for dataTypeIdx in range(1, numDataTypes):
        dataType = dataTypes[dataTypeIdx]
        h += ", %u" % (2 if dataType.isReal() else 8)
    h += " };\n"
    for dataTypeIdx in range(0, numDataTypes):
        h += "#define Tensile_DATA_TYPE_%s\n" \
            % dataTypes[dataTypeIdx].toCpp().upper()

    ##############################################################################
    # Problem Types
    ##############################################################################
    h += "/* problem types */\n"
    h += "const unsigned int numProblemTypes = %u;\n" % numProblemTypes
    # Num C Indices
    h += "const unsigned int numIndicesC[numProblemTypes] = { %u" \
        % problemTypes[0]["NumIndicesC"]
    for problemTypeIdx in range(1, numProblemTypes):
        problemType = problemTypes[problemTypeIdx]
        h += ", %u" % problemType["NumIndicesC"]
    h += " };\n"

    # Num AB Indices
    maxNumIndicesAB = len(problemTypes[0]["IndexAssignmentsA"])
    h += "const unsigned int numIndicesAB[numProblemTypes] = { %u" \
        % len(problemTypes[0]["IndexAssignmentsA"])
    for problemTypeIdx in range(1, numProblemTypes):
        problemType = problemTypes[problemTypeIdx]
        numIndicesAB = len(problemType["IndexAssignmentsA"])
        h += ", %u" % numIndicesAB
        maxNumIndicesAB = max(numIndicesAB, maxNumIndicesAB)
    h += " };\n"
    h += "const unsigned int maxNumIndicesAB = %u;\n" % maxNumIndicesAB
    # Index Assignments A
    h += "const unsigned int indexAssignmentsA[numProblemTypes][maxNumIndicesAB] = {\n"
    for problemTypeIdx in range(0, numProblemTypes):
        problemType = problemTypes[problemTypeIdx]
        indices = problemType["IndexAssignmentsA"]
        h += "  { %u" % indices[0]
        for i in range(1, maxNumIndicesAB):
            if i < len(indices):
                h += ", %u" % indices[i]
            else:
                h += ", static_cast<unsigned int>(-1)"
        if problemTypeIdx < numProblemTypes - 1:
            h += " },\n"
        else:
            h += " }\n"
    h += "};\n"
    # Index Assignments B
    h += "const unsigned int indexAssignmentsB[numProblemTypes][maxNumIndicesAB] = {\n"
    for problemTypeIdx in range(0, numProblemTypes):
        problemType = problemTypes[problemTypeIdx]
        indices = problemType["IndexAssignmentsB"]
        h += "  { %u" % indices[0]
        for i in range(1, maxNumIndicesAB):
            if i < len(indices):
                h += ", %u" % indices[i]
            else:
                h += ", static_cast<unsigned int>(-1)"
        if problemTypeIdx < numProblemTypes - 1:
            h += " },\n"
        else:
            h += " }\n"
    h += "};\n"
    # beta
    h += "bool useBeta[numProblemTypes] = { %s" \
        % ("true" if problemTypes[0]["UseBeta"] else "false")
    for problemTypeIdx in range(1, numProblemTypes):
        problemType = problemTypes[problemTypeIdx]
        h += ", %s" % ("true" if problemType["UseBeta"] else "false")
    h += " };\n"
    # Complex Conjugates
    h += "const bool complexConjugateA[numProblemTypes] = { %s" \
        % ("true" if problemTypes[0]["ComplexConjugateA"] else "false" )
    for problemTypeIdx in range(1, numProblemTypes):
        problemType = problemTypes[problemTypeIdx]
        h += ", %s" % ("true"
                       if problemTypes[0]["ComplexConjugateA"] else "false")
    h += " };\n"
    h += "const bool complexConjugateB[numProblemTypes] = { %s" \
        % ("true" if problemTypes[0]["ComplexConjugateB"] else "false" )
    for problemTypeIdx in range(1, numProblemTypes):
        problemType = problemTypes[problemTypeIdx]
        h += ", %s" % ("true"
                       if problemTypes[0]["ComplexConjugateB"] else "false")
    h += " };\n"
    h += "\n"

    if not forBenchmark:
        h += "// dataTypeIdxSerial, problemTypeIdxForDataType, problemTypeIdxSerial, functionIdxSerial, functionIdxForDataType, functionIdxForProblemType\n"
        first = True
        h += "const unsigned int functionInfo[numFunctions][6] = {\n"
        for info in functionInfo:
            h += "%s{ %u, %u, %u, %u, %u, %u }" % ("  " if first else ",\n  ", \
                info[0], info[1], info[2], info[3], info[4], info[5] )
            first = False
        h += " };\n"

    ##############################################################################
    # Problem Sizes
    ##############################################################################
    maxNumIndices = problemTypes[0]["TotalIndices"]
    if not forBenchmark:
        for problemType in problemTypes:
            maxNumIndices = max(problemType["TotalIndices"], maxNumIndices)
    h += "const unsigned int maxNumIndices = %u;\n" % maxNumIndices
    h += "const unsigned int totalIndices[numProblemTypes] = { %u" \
        % problemTypes[0]["TotalIndices"]
    for problemTypeIdx in range(1, numProblemTypes):
        h += ", %u" % problemTypes[problemTypeIdx]["TotalIndices"]
    h += " };\n"
    if forBenchmark:
        h += "const unsigned int numProblems = %u;\n" \
            % problemSizes.totalProblemSizes
        h += "const unsigned int problemSizes[numProblems][%u] = {\n" \
            % problemTypes[0]["TotalIndices"]
        for i in range(0, problemSizes.totalProblemSizes):
            line = "  {%5u" % problemSizes.sizes[i][0]
            for j in range(1, problemTypes[0]["TotalIndices"]):
                line += ",%5u" % problemSizes.sizes[i][j]
            line += " }"
            h += line
            if i < problemSizes.totalProblemSizes - 1:
                h += ","
            else:
                h += "};"
            h += "\n"
        h += "const unsigned int minStrides[%u] = {" \
            % problemTypes[0]["TotalIndices"]
        for i in range(0, len(problemSizes.minStrides)):
            if (i != 0):
                h += ", "
            h += str(problemSizes.minStrides[i])
        h += "};\n"
    else:
        h += "unsigned int userSizes[maxNumIndices];\n"
        h += "unsigned int minStrides[%u] = {" \
            % maxNumIndices
        for i in range(0, maxNumIndices):
            if (i != 0):
                h += ", "
            h += str(0)
            # always use 0 for minStrides in benchmark mode
        h += "};\n"

    if forBenchmark:
        h += "/* problem sizes */\n"
        """
    h += "const bool indexIsSized[maxNumIndices] = {"
    for i in range(0, problemSizes.totalIndices):
      h += " %s" % ("true" if problemSizes.indexIsSized[i] else "false")
      if i < problemSizes.totalIndices-1:
        h += ","
    h += " };\n"

    h += "const unsigned int numIndicesSized = %u;\n" \
        % len(problemSizes.indicesSized)
    h += "const unsigned int indicesSized[numIndicesSized][4] = {\n"
    h += "// { min, stride, stride_incr, max }\n"
    for i in range(0, len(problemSizes.indicesSized)):
      r = problemSizes.indicesSized[i]
      h += "  { %u, %u, %u, %u }" % (r[0], r[1], r[2], r[3])
      if i < len(problemSizes.indicesSized)-1:
        h += ","
      h += "\n"
    h += "  };\n"

    numIndicesMapped = len(problemSizes.indicesMapped)
    h += "const unsigned int numIndicesMapped = %u;\n" % numIndicesMapped
    if numIndicesMapped > 0:
      h += "#define Tensile_INDICES_MAPPED 1\n"
      h += "const unsigned int indicesMapped[numIndicesMapped] = {"
      for i in range(0, numIndicesMapped):
        h += " %u" % problemSizes.indicesMapped[i]
        if i < numIndicesMapped-1:
          h += ","
      h += " };\n"
    else:
      h += "#define Tensile_INDICES_MAPPED 0\n"
    """

    ##############################################################################
    # Max Problem Sizes
    ##############################################################################
    if forBenchmark:
        h += "size_t maxSizeC = %u;\n" % (problemSizes.maxC)
        h += "size_t maxSizeA = %u;\n" % (problemSizes.maxA)
        h += "size_t maxSizeB = %u;\n" % (problemSizes.maxB)
        h += "\n"
    else:
        h += "size_t maxSizeC;\n"
        h += "size_t maxSizeA;\n"
        h += "size_t maxSizeB;\n"
        h += "\n"

    ##############################################################################
    # Current Problem Size
    ##############################################################################
    h += "/* current problem size */\n"
    #h += "unsigned int fullSizes[maxNumIndices];\n"
    #h += "unsigned int currentSizedIndexSizes[numIndicesSized];\n"
    #h += "unsigned int currentSizedIndexIncrements[numIndicesSized];\n"
    h += "\n"

    ##############################################################################
    # Solutions
    ##############################################################################
    if forBenchmark:
        h += "/* solutions */\n"
        # Problem Type Indices
        h += "const unsigned int maxNumSolutions = %u;\n" % len(solutions)
        h += "float solutionPerf[numProblems][maxNumSolutions]; // milliseconds\n"
        h += "\n"
        # Solution Ptrs
        h += "typedef TensileStatus (*SolutionFunctionPointer)(\n"
        argList = solutionWriter.getArgList(solutions[0]["ProblemType"], True,
                                            True, True)
        for i in range(0, len(argList)):
            h += "  %s %s%s" % (argList[i][0], argList[i][1], \
                ",\n" if i < len(argList)-1 else ");\n\n")
        h += "const SolutionFunctionPointer solutions[maxNumSolutions] = {\n"
        for i in range(0, len(solutions)):
            solution = solutions[i]
            solutionName = solutionWriter.getSolutionName(solution)
            h += "  %s" % solutionName
            if i < len(solutions) - 1:
                h += ","
            h += "\n"
        h += " };\n"
        h += "\n"
        # Solution Names
        h += "const char *solutionNames[maxNumSolutions] = {\n"
        for i in range(0, len(solutions)):
            solution = solutions[i]
            solutionName = solutionWriter.getSolutionName(solution)
            h += "  \"%s\"" % solutionName
            if i < len(solutions) - 1:
                h += ","
            h += "\n"
        h += " };\n"
        h += "\n"
    else:
        # Function Names
        functionNames = []
        for dataType in dataTypes:
            for problemType in problemTypesForDataType[dataType]:
                for scheduleName in schedulesForProblemType[problemType]:
                    #functionNames.append("tensile_%s_%s" % (scheduleName, problemType))
                    functionNames.append("tensile_%s" % (problemType))
        h += "const char *functionNames[numFunctions] = {\n"
        for functionIdx in range(0, len(functionNames)):
            functionName = functionNames[functionIdx]
            h += "    \"%s\"%s\n" % (functionName, \
                "," if functionIdx < len(functionNames)-1 else "" )
        h += " };\n"

    ##############################################################################
    # Runtime Structures
    ##############################################################################
    h += "/* runtime structures */\n"
    h += "TensileStatus status;\n"
    if globalParameters["RuntimeLanguage"] == "OCL":
        h += "cl_platform_id platform;\n"
        h += "cl_device_id device;\n"
        h += "cl_context context;\n"
        h += "cl_command_queue stream;\n"
    else:
        h += "hipStream_t stream;\n"
        #h += "int deviceIdx = %u;\n" \
        #    % (globalParameters["Device"])
    h += "\n"
    h += "void *deviceC;\n"
    h += "void *deviceA;\n"
    h += "void *deviceB;\n"

    ##############################################################################
    # Benchmarking and Validation Parameters
    ##############################################################################
    h += "\n/* benchmarking parameters */\n"
    #h += "const bool measureKernelTime = %s;\n" \
    #    % ("true" if globalParameters["KernelTime"] else "false")
    #h += "const unsigned int numEnqueuesPerSync = %u;\n" \
    #    % (globalParameters["EnqueuesPerSync"])
    #h += "const unsigned int numSyncsPerBenchmark = %u;\n" \
    #    % (globalParameters["SyncsPerBenchmark"])
    #h += "unsigned int numElementsToValidate = %s;\n" \
    #    % (str(globalParameters["NumElementsToValidate"]) \
    #    if globalParameters["NumElementsToValidate"] >= 0 \
    #    else "0xFFFFFFFF" )
    #h += "unsigned int validationMaxToPrint = %u;\n" \
    #    % globalParameters["ValidationMaxToPrint"]
    #h += "bool validationPrintValids = %s;\n" \
    #    % ("true" if globalParameters["ValidationPrintValids"] else "false")
    h += "size_t validationStride;\n"
    if problemType["HighPrecisionAccumulate"]:
        h += "static bool useHighPrecisionAccumulate = true;\n"
    else:
        h += "static bool useHighPrecisionAccumulate = false;\n"
    #h += "unsigned int dataInitTypeC = %s;\n" % globalParameters["DataInitTypeC"]
    #h += "unsigned int dataInitTypeAB = %s;\n" % globalParameters["DataInitTypeAB"]
    h += "\n"

    ##############################################################################
    # Generated Call to Reference
    ##############################################################################
    h += "/* generated call to reference */\n"
    h += "template<typename DataType>\n"
    h += "TensileStatus generatedCallToReferenceCPU(\n"
    h += "    const unsigned int *sizes,\n"
    h += "    const unsigned int *minStrides,\n"
    h += "    DataType *referenceC,\n"
    h += "    DataType *initialA,\n"
    h += "    DataType *initialB,\n"
    h += "    const unsigned int stride_a,\n"
    h += "    const unsigned int stride_b,\n"
    h += "    const unsigned int stride_c,\n"
    h += "    DataType alpha,\n"
    h += "    DataType beta,\n"
    h += "    bool useHighPrecisionAccumulate) {\n"
    h += "  return tensileReferenceCPU(\n"
    h += "      referenceC,\n"
    h += "      initialA,\n"
    h += "      initialB,\n"
    h += "      stride_a,\n"
    h += "      stride_b,\n"
    h += "      stride_c,\n"
    h += "      alpha,\n"
    h += "      beta,\n"
    h += "      totalIndices[problemTypeIdx],\n"
    h += "      sizes,\n"
    h += "      minStrides,\n"
    h += "      numIndicesC[problemTypeIdx],\n"
    h += "      numIndicesAB[problemTypeIdx],\n"
    h += "      indexAssignmentsA[problemTypeIdx],\n"
    h += "      indexAssignmentsB[problemTypeIdx],\n"
    h += "      complexConjugateA[problemTypeIdx],\n"
    h += "      complexConjugateB[problemTypeIdx],\n"
    h += "      validationStride,\n"
    h += "      useHighPrecisionAccumulate);\n"
    h += "};\n"
    h += "\n"

    ##############################################################################
    # Generated Call to Solution
    ##############################################################################
    if forBenchmark:
        problemType = solutions[0]["ProblemType"]
        h += "/* generated call to solution */\n"
        h += "template<typename DataType>\n"
        h += "TensileStatus generatedCallToSolution(\n"
        h += "    unsigned int solutionIdx,\n"
        h += "    const unsigned int *sizes,\n"
        h += "    const unsigned int *minStrides,\n"
        h += "    DataType alpha,\n"
        h += "    DataType beta, \n"
        h += "    unsigned int numEvents = 0, \n"
        if globalParameters["RuntimeLanguage"] == "OCL":
            h += "    cl_event *event_wait_list = NULL,\n"
            h += "    cl_event *outputEvent = NULL ) {\n"
        else:
            h += "    hipEvent_t *startEvent = NULL,\n"
            h += "    hipEvent_t *stopEvent = NULL ) {\n"

        h += "  // calculate parameters assuming packed data\n"
        # strides
        indexChars = globalParameters["IndexChars"]
        firstStride = 1
        if problemType["UseInitialStrides"]:
            firstStride = 0
        lastStrideC = problemType["NumIndicesC"]
        lastStrideA = len(problemType["IndexAssignmentsA"])
        lastStrideB = len(problemType["IndexAssignmentsB"])

        # calculate strides
        for i in range(0, lastStrideC):
            h += "  unsigned int strideC%u%s = 1" % (i, indexChars[i])
            for j in range(0, i):
                h += "* std::max(minStrides[%i], sizes[%i])" % (j, j)
            h += ";\n"
        for i in range(0, lastStrideA):
            h += "  unsigned int strideA%u%s = 1" % (i, \
                indexChars[problemType["IndexAssignmentsA"][i]])
            for j in range(0, i):
                h += "* std::max(minStrides[%i], sizes[%i])" % \
                  (problemType["IndexAssignmentsA"][j],
                   problemType["IndexAssignmentsA"][j])
            h += ";\n"
        for i in range(0, lastStrideB):
            h += "  unsigned int strideB%u%s = 1" % (i, \
                indexChars[problemType["IndexAssignmentsB"][i]])
            for j in range(0, i):
                h += "* std::max(minStrides[%i], sizes[%i])" % \
                  (problemType["IndexAssignmentsB"][j],
                   problemType["IndexAssignmentsB"][j])
            h += ";\n"
        for i in range(0, problemType["TotalIndices"]):
            h += "  unsigned int size%s = sizes[%u];\n" % (indexChars[i], i)
        h += "\n"

        # function call
        h += "  // call solution function\n"
        if globalParameters["RuntimeLanguage"] == "OCL":
            h += "  return solutions[solutionIdx]( static_cast<cl_mem>(deviceC), static_cast<cl_mem>(deviceA), static_cast<cl_mem>(deviceB),\n"
        else:
            typeName = dataTypes[0].toCpp()
            h += "  return solutions[solutionIdx]( static_cast<%s *>(deviceC), static_cast<%s *>(deviceA), static_cast<%s *>(deviceB),\n" \
                % (typeName, typeName, typeName)
        h += "      alpha,\n"
        if problemType["UseBeta"]:
            h += "      beta,\n"
        h += "      0, 0, 0, // offsets\n"
        for i in range(firstStride, lastStrideC):
            h += "      strideC%u%s,\n" % (i, indexChars[i])
        for i in range(firstStride, lastStrideA):
            h += "      strideA%u%s,\n" % (i, \
                indexChars[problemType["IndexAssignmentsA"][i]])
        for i in range(firstStride, lastStrideB):
            h += "      strideB%u%s,\n" % (i, \
                indexChars[problemType["IndexAssignmentsB"][i]])
        for i in range(0, problemType["TotalIndices"]):
            h += "      size%s,\n" % indexChars[i]
        h += "      stream,\n"
        if globalParameters["RuntimeLanguage"] == "OCL":
            h += "      numEvents, event_wait_list, outputEvent ); // events\n"
        else:
            h += "      numEvents, startEvent, stopEvent); // events\n"

        h += "};\n"
        h += "\n"
    else:
        ############################################################################
        # Generated Call to Function
        ############################################################################
        for enqueue in [True, False]:
            functionName = "tensile" if enqueue else "tensileGetSolutionName"
            returnName = "TensileStatus" if enqueue else "const char *"
            h += "/* generated call to function */\n"
            h += "template<typename DataType>\n"
            h += "%s generatedCallTo_%s(\n" % (returnName, functionName)
            h += "    unsigned int *sizes,\n"
            h += "    unsigned int *minStrides,\n"
            h += "    DataType alpha,\n"
            h += "    DataType beta, \n"
            h += "    unsigned int strideA, \n"
            h += "    unsigned int strideB, \n"
            h += "    unsigned int strideC, \n"
            h += "    unsigned int numEvents = 0, \n"

            if globalParameters["RuntimeLanguage"] == "OCL":
                h += "    cl_event *event_wait_list = NULL,\n"
                h += "    cl_event *outputEvent = NULL );\n\n"
            else:
                h += "    hipEvent_t *startEvent = NULL,\n"
                h += "    hipEvent_t *stopEvent = NULL );\n\n"

            for dataType in dataTypes:
                typeName = dataType.toCpp()
                functionsForDataType = []
                for problemType in problemTypesForDataType[dataType]:
                    for scheduleName in schedulesForProblemType[problemType]:
                        functionsForDataType.append(
                            [scheduleName, problemType])
                h += "template<>\n"
                h += "inline %s generatedCallTo_%s<%s>(\n" \
                    % (returnName, functionName, typeName)
                h += "    unsigned int *sizes,\n"
                h += "    unsigned int *minStrides,\n"
                h += "    %s alpha,\n" % typeName
                h += "    %s beta,\n" % typeName
                h += "    unsigned int strideA, \n"
                h += "    unsigned int strideB, \n"
                h += "    unsigned int strideC, \n"
                h += "    unsigned int numEvents, \n"

                if globalParameters["RuntimeLanguage"] == "OCL":
                    h += "    cl_event *event_wait_list,\n"
                    h += "    cl_event *outputEvent ) {\n\n"
                else:
                    h += "    hipEvent_t *startEvent,\n"
                    h += "    hipEvent_t *stopEvent ) {\n\n"

                h += "  unsigned int functionIdxForDataType = functionInfo[functionIdx][4];\n"

                for functionIdx in range(0, len(functionsForDataType)):
                    function = functionsForDataType[functionIdx]
                    scheduleName = function[0]
                    problemType = function[1]
                    if len(functionsForDataType) > 1:
                        if functionIdx == 0:
                            h += "  if (functionIdxForDataType == %u) {\n" % functionIdx
                        elif functionIdx == len(functionsForDataType) - 1:
                            h += "  } else {\n"
                        else:
                            h += "  } else if (functionIdxForDataType == %u) {\n" \
                                % functionIdx

                    # strides
                    indexChars = globalParameters["IndexChars"]
                    firstStride = 1
                    if problemType["UseInitialStrides"]:
                        firstStride = 0
                    lastStrideC = problemType["NumIndicesC"]
                    lastStrideA = len(problemType["IndexAssignmentsA"])
                    lastStrideB = len(problemType["IndexAssignmentsB"])

                    # calculate strides
                    for i in range(0, lastStrideC):
                        h += "    unsigned int strideC%u%s = 1" % (
                            i, indexChars[i])
                        for j in range(0, i):
                            h += "*sizes[%i]" % j
                        h += ";\n"
                    h += "    if (strideC != std::numeric_limits<unsigned int>::max())  strideC%u%s = strideC;\n" % (
                        lastStrideC - 1, indexChars[lastStrideC - 1])

                    for i in range(0, lastStrideA):
                        h += "    unsigned int strideA%u%s = 1" % (i, \
                            indexChars[problemType["IndexAssignmentsA"][i]])
                        for j in range(0, i):
                            h += "*sizes[%i]" % \
                              problemType["IndexAssignmentsA"][j]
                        h += ";\n"
                    h += "    if (strideA != std::numeric_limits<unsigned int>::max())  strideA%u%s = strideA;\n" % (
                        lastStrideA - 1, indexChars[lastStrideA - 1])
                    for i in range(0, lastStrideB):
                        h += "    unsigned int strideB%u%s = 1" % (i, \
                            indexChars[problemType["IndexAssignmentsB"][i]])
                        for j in range(0, i):
                            h += "*sizes[%i]" % \
                              problemType["IndexAssignmentsB"][j]
                        h += ";\n"
                    h += "    if (strideB != std::numeric_limits<unsigned int>::max())  strideB%u%s = strideB;\n" % (
                        lastStrideB - 1, indexChars[lastStrideB - 1])
                    for i in range(0, problemType["TotalIndices"]):
                        h += "    unsigned int size%s = sizes[%u];\n" % (
                            indexChars[i], i)

                    # function call
                    h += "    // call solution function\n"
                    h += "    return %s_%s(\n" % (functionName, problemType)
                    if enqueue:
                        if globalParameters["RuntimeLanguage"] == "OCL":
                            h += "        static_cast<cl_mem>(deviceC),\n"
                            h += "        static_cast<cl_mem>(deviceA),\n"
                            h += "        static_cast<cl_mem>(deviceB),\n"
                        else:
                            h += "        static_cast<%s *>(deviceC),\n" % typeName
                            h += "        static_cast<%s *>(deviceA),\n" % typeName
                            h += "        static_cast<%s *>(deviceB),\n" % typeName
                        h += "        alpha,\n"
                        if problemType["UseBeta"]:
                            h += "        beta,\n"
                        h += "        0, 0, 0, // offsets\n"
                    for i in range(firstStride, lastStrideC):
                        h += "        strideC%u%s,\n" % (i, indexChars[i])
                    for i in range(firstStride, lastStrideA):
                        h += "        strideA%u%s,\n" % (i, \
                            indexChars[problemType["IndexAssignmentsA"][i]])
                    for i in range(firstStride, lastStrideB):
                        h += "        strideB%u%s,\n" % (i, \
                            indexChars[problemType["IndexAssignmentsB"][i]])
                    for i in range(0, problemType["TotalIndices"]):
                        h += "        size%s,\n" % indexChars[i]
                    h += "        stream"
                    if enqueue:
                        if globalParameters["RuntimeLanguage"] == "OCL":
                            h += ",\n        numEvents, event_wait_list, outputEvent"
                        else:
                            h += ",\n        numEvents, startEvent, stopEvent"
                    h += ");\n"

                if len(functionsForDataType) > 1:
                    h += "  }\n"  # close last if
                h += "};\n"  # close callToFunction

    ##############################################################################
    # Results File Name
    ##############################################################################
    if forBenchmark:
        h += "/* results file name */\n"
        resultsFileName = os.path.join(globalParameters["WorkingPath"], \
            "../../Data","%s.csv" % stepName)
        resultsFileName = resultsFileName.replace("\\", "\\\\")
        h += "const char *resultsFileName = \"%s\";\n" % resultsFileName

    ##############################################################################
    # Write File
    ##############################################################################
    clientParametersFile = open(os.path.join(globalParameters["WorkingPath"], \
        "ClientParameters.h"), "w")
    clientParametersFile.write(CHeader)
    clientParametersFile.write(h)
    clientParametersFile.close()
def TensileCreateLibrary():
    print1("")
    print1(HR)
    print1("# Tensile Create Library")
    print2(HR)
    print2("")

    ##############################################################################
    # Parse Command Line Arguments
    ##############################################################################
    print2("Arguments: %s" % sys.argv)
    argParser = argparse.ArgumentParser()
    argParser.add_argument("LogicPath",
                           help="Path to LibraryLogic.yaml files.")
    argParser.add_argument("OutputPath", help="Where to write library files?")
    argParser.add_argument("RuntimeLanguage", help="Which runtime language?", \
        choices=["OCL", "HIP", "HSA"])
    argParser.add_argument("--merge-files", dest="MergeFiles", \
        action="store_true")
    argParser.add_argument("--no-merge-files", dest="MergeFiles", \
        action="store_false")
    argParser.add_argument("--short-file-names", dest="ShortNames", \
        action="store_true")
    argParser.add_argument("--no-short-file-names", dest="ShortNames", \
        action="store_false")
    argParser.add_argument("--library-print-debug", dest="LibraryPrintDebug", \
        action="store_true")
    argParser.add_argument("--no-library-print-debug", dest="LibraryPrintDebug", \
        action="store_false")
    args = argParser.parse_args()

    logicPath = args.LogicPath
    outputPath = args.OutputPath
    print2("OutputPath: %s" % outputPath)
    ensurePath(outputPath)
    arguments = {}
    arguments["RuntimeLanguage"] = args.RuntimeLanguage
    arguments["MergeFiles"] = args.MergeFiles
    arguments["ShortNames"] = args.ShortNames
    arguments["LibraryPrintDebug"] = args.LibraryPrintDebug
    arguments["CodeFromFiles"] = False
    assignGlobalParameters(arguments)

    if not os.path.exists(logicPath):
        printExit("LogicPath %s doesn't exist" % logicPath)

    logicFiles = [os.path.join(logicPath, f) for f in os.listdir(logicPath) \
        if (os.path.isfile(os.path.join(logicPath, f)) \
        and os.path.splitext(f)[1]==".yaml")]

    print1("# LibraryLogicFiles:" % logicFiles)
    for logicFile in logicFiles:
        print1("#   %s" % logicFile)

    ##############################################################################
    # Parse config files
    ##############################################################################
    solutions = []
    logicData = {}  # keys are problemTypes, values are schedules
    for logicFileName in logicFiles:
        (scheduleName, deviceNames, problemType, solutionsForSchedule, \
            indexOrder, exactLogic, rangeLogic) \
            = YAMLIO.readLibraryLogicForSchedule(logicFileName)
        if problemType not in logicData:
            logicData[problemType] = []
        logicData[problemType].append((scheduleName, deviceNames, \
            solutionsForSchedule, indexOrder, exactLogic, rangeLogic ))
        for solution in solutionsForSchedule:
            if solution not in solutions:
                solutions.append(solution)

    # create solution writer and kernel writer
    kernels = []
    kernelsBetaOnly = []
    for solution in solutions:
        solutionKernels = solution.getKernels()
        for kernel in solutionKernels:
            if kernel not in kernels:
                kernels.append(kernel)
        solutionKernelsBetaOnly = solution.getKernelsBetaOnly()
        for kernel in solutionKernelsBetaOnly:
            if kernel not in kernelsBetaOnly:
                kernelsBetaOnly.append(kernel)

    # if any kernels are assembly, append every ISA supported

    if globalParameters["ShortNames"] and not globalParameters["MergeFiles"]:
        solutionSerialNaming = Solution.getSerialNaming(solutions)
        kernelSerialNaming = Solution.getSerialNaming(kernels)
    else:
        solutionSerialNaming = None
        kernelSerialNaming = None
    solutionMinNaming = Solution.getMinNaming(solutions)
    kernelMinNaming = Solution.getMinNaming(kernels)
    solutionWriter = SolutionWriter( \
        solutionMinNaming, solutionSerialNaming, \
        kernelMinNaming, kernelSerialNaming)
    kernelWriterSource = KernelWriterSource( \
        kernelMinNaming, kernelSerialNaming)
    kernelWriterAssembly = KernelWriterAssembly( \
        kernelMinNaming, kernelSerialNaming)

    # write solutions and kernels
    writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \
        solutionWriter, kernelWriterSource, kernelWriterAssembly)

    libraryStaticFiles = [
        "SolutionMapper.h", "TensileTypes.h", "KernelHeader.h",
        "SolutionHelper.cpp", "SolutionHelper.h", "Tools.cpp", "Tools.h"
    ]

    # write cmake
    clientName = "LibraryClient"
    writeCMake(outputPath, solutions, kernels, libraryStaticFiles, clientName)

    # write logic
    writeLogic(outputPath, logicData, solutionWriter)
    print1("# Tensile Library Writer DONE")
    print1(HR)
    print1("")
示例#5
0
def TensileCreateLibrary():
    print1("")
    print1(HR)
    print1("# Tensile Create Library")
    print2(HR)
    print2("")

    ##############################################################################
    # Parse Command Line Arguments
    ##############################################################################
    print2("Arguments: %s" % sys.argv)
    argParser = argparse.ArgumentParser()
    argParser.add_argument("LogicPath",
                           help="Path to LibraryLogic.yaml files.")
    argParser.add_argument("OutputPath", help="Where to write library files?")
    argParser.add_argument("RuntimeLanguage", help="Which runtime language?", \
        choices=["OCL", "HIP", "HSA"])
    argParser.add_argument("--merge-files", dest="MergeFiles", \
        action="store_true")
    argParser.add_argument("--no-merge-files", dest="MergeFiles", \
        action="store_false")
    argParser.add_argument("--short-file-names", dest="ShortNames", \
        action="store_true")
    argParser.add_argument("--no-short-file-names", dest="ShortNames", \
        action="store_false")
    argParser.add_argument("--library-print-debug", dest="LibraryPrintDebug", \
        action="store_true")
    argParser.add_argument("--no-library-print-debug", dest="LibraryPrintDebug", \
        action="store_false")
    argParser.add_argument(
        "--isa",
        dest="isa",
        action="append",
        help="which architectures for assembly kernels to target")
    args = argParser.parse_args()

    logicPath = args.LogicPath
    outputPath = args.OutputPath
    print2("OutputPath: %s" % outputPath)
    ensurePath(outputPath)
    arguments = {}
    arguments["RuntimeLanguage"] = args.RuntimeLanguage
    arguments["MergeFiles"] = args.MergeFiles
    arguments["ShortNames"] = args.ShortNames
    arguments["LibraryPrintDebug"] = args.LibraryPrintDebug
    if args.isa:
        newISA = []
        for isa in args.isa:
            gfxIdx = isa.find("gfx")
            if gfxIdx >= 0:
                major = int(isa[gfxIdx + 3:gfxIdx + 4])
                minor = int(isa[gfxIdx + 4:gfxIdx + 5])
                step = int(isa[gfxIdx + 5:gfxIdx + 6])
                isaTuple = (major, minor, step)
                if isaTuple in globalParameters[
                        "SupportedISA"] and isaTuple not in newISA:
                    print1("# User-Specified ISA: gfx%u%u%u" %
                           (major, minor, step))
                    newISA.append(isaTuple)
            else:
                printWarning("isa parameter must be formed as: --isa gfx803")
        arguments["SupportedISA"] = newISA
    assignGlobalParameters(arguments)

    if not os.path.exists(logicPath):
        printExit("LogicPath %s doesn't exist" % logicPath)

    logicFiles = [os.path.join(logicPath, f) for f in os.listdir(logicPath) \
        if (os.path.isfile(os.path.join(logicPath, f)) \
        and os.path.splitext(f)[1]==".yaml")]

    print1("# LibraryLogicFiles:" % logicFiles)
    for logicFile in logicFiles:
        print1("#   %s" % logicFile)

    ##############################################################################
    # Parse config files
    ##############################################################################
    solutions = []
    logicData = {}  # keys are problemTypes, values are schedules
    for logicFileName in logicFiles:
        (scheduleName, deviceNames, problemType, solutionsForSchedule, \
            indexOrder, exactLogic, rangeLogic) \
            = YAMLIO.readLibraryLogicForSchedule(logicFileName)
        if problemType not in logicData:
            logicData[problemType] = []
        logicData[problemType].append((scheduleName, deviceNames, \
            solutionsForSchedule, indexOrder, exactLogic, rangeLogic ))
        for solution in solutionsForSchedule:
            if solution not in solutions:
                solutions.append(solution)

    # create solution writer and kernel writer
    kernels = []
    kernelsBetaOnly = []
    for solution in solutions:
        solutionKernels = solution.getKernels()
        for kernel in solutionKernels:
            if kernel not in kernels:
                kernels.append(kernel)
        solutionKernelsBetaOnly = solution.getKernelsBetaOnly()
        for kernel in solutionKernelsBetaOnly:
            if kernel not in kernelsBetaOnly:
                kernelsBetaOnly.append(kernel)

    # if any kernels are assembly, append every ISA supported
    if globalParameters["RuntimeLanguage"] == "HIP":
        newKernels = []
        for kernel in kernels:
            if kernel["KernelLanguage"] == "Assembly":
                kernel["ISA"] = globalParameters["SupportedISA"][0]
                for i in range(1, len(globalParameters["SupportedISA"])):
                    newKernel = deepcopy(kernel)
                    newKernel["ISA"] = globalParameters["SupportedISA"][i]
                    newKernels.append(newKernel)
            else:
                kernel["ISA"] = (0, 0, 0)
        kernels.extend(newKernels)

    if globalParameters["ShortNames"] and not globalParameters["MergeFiles"]:
        solutionSerialNaming = Solution.getSerialNaming(solutions)
        kernelSerialNaming = Solution.getSerialNaming(kernels)
    else:
        solutionSerialNaming = None
        kernelSerialNaming = None
    solutionMinNaming = Solution.getMinNaming(solutions)
    kernelMinNaming = Solution.getMinNaming(kernels)
    solutionWriter = SolutionWriter( \
        solutionMinNaming, solutionSerialNaming, \
        kernelMinNaming, kernelSerialNaming)
    kernelWriterSource = KernelWriterSource( \
        kernelMinNaming, kernelSerialNaming)
    kernelWriterAssembly = KernelWriterAssembly( \
        kernelMinNaming, kernelSerialNaming)

    # write solutions and kernels
    writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \
        solutionWriter, kernelWriterSource, kernelWriterAssembly)

    libraryStaticFiles = [
        "TensileTypes.h", "KernelHeader.h", "SolutionHelper.cpp",
        "SolutionHelper.h", "Tools.cpp", "Tools.h"
    ]

    # write cmake
    clientName = "LibraryClient"
    writeCMake(outputPath, solutions, kernels, libraryStaticFiles, clientName)

    # write logic
    writeLogic(outputPath, logicData, solutionWriter)
    print1("# Tensile Library Writer DONE")
    print1(HR)
    print1("")