def getDeviceProps(self): '''Get device properties''' # write the query code qsrc = "enum_cuda_props.cu" qexec = qsrc + ".o" qout = qexec + ".props" if not os.path.exists(qout): # check for nvcc qcmd = 'which nvcc' status = os.system(qcmd) if status != 0: g.err("%s: could not locate nvcc with '%s'" % (self.__class__, qcmd)) try: f = open(qsrc, 'w') f.write(CUDA_DEVICE_QUERY_SKELET) f.close() except: g.err('%s: cannot open file for writing: %s' % (self.__class__, qsrc)) # compile the query cmd = 'nvcc -o %s %s' % (qexec, qsrc) status = os.system(cmd) if status: g.err('%s: failed to compile cuda device query code: "%s"' % (self.__class__, cmd)) # execute the query runcmd = './%s' % (qexec) status = os.system(runcmd) if status: g.err('%s: failed to execute cuda device query code: "%s"' % (self.__class__, runcmd)) os.remove(qsrc) os.remove(qexec) # read device properties props = {} try: f = open(qout, 'r') for line in f: eline = ast.literal_eval(line) props[eline[0]] = eline[1] f.close() except: g.err('%s: cannot open query output file for reading: %s' % (self.__class__, qout)) if props['devId'] == -2: g.err("%s: there is no CUDA 1.0 enabled GPU on this machine" % self.__class__) if props['major'] < 2 and props['minor'] < 3: g.warn( "%s: running on compute capability less than 1.3 is not recommended, detected %s.%s." % (self.__class__, props['major'], props['minor'])) # set the arch to the latest supported by the device if self.tinfo is None: bcmd = "gcc" else: bcmd = self.tinfo.build_cmd if bcmd.startswith('gcc'): bcmd = 'nvcc' if bcmd.find('-arch') == -1: bcmd += ' -arch=sm_' + str(props['major']) + str(props['minor']) if self.perf_params.has_key('CFLAGS') and bcmd.find('@CFLAGS') == -1: bcmd += ' @CFLAGS' self.tinfo.build_cmd = bcmd # return queried device props return props
def getDeviceProps(self): '''Get device properties''' # write the query code qsrc = "enum_cuda_props.cu" qexec = qsrc + ".o" qout = qexec + ".props" if not os.path.exists(qout): # check for nvcc qcmd = 'which nvcc' status = os.system(qcmd) if status != 0: g.err("%s: could not locate nvcc with '%s'" % (self.__class__, qcmd)) try: f = open(qsrc, 'w') f.write(CUDA_DEVICE_QUERY_SKELET) f.close() except: g.err('%s: cannot open file for writing: %s' % (self.__class__, qsrc)) # compile the query cmd = 'nvcc -o %s %s' % (qexec, qsrc) status = os.system(cmd) if status: g.err('%s: failed to compile cuda device query code: "%s"' % (self.__class__, cmd)) # execute the query runcmd = './%s' % (qexec) status = os.system(runcmd) if status: g.err('%s: failed to execute cuda device query code: "%s"' % (self.__class__, runcmd)) os.remove(qsrc) os.remove(qexec) # read device properties props = {} try: f = open(qout, 'r') for line in f: eline = ast.literal_eval(line) props[eline[0]] = eline[1] f.close() except: g.err('%s: cannot open query output file for reading: %s' % (self.__class__, qout)) if props['devId'] == -2: g.err("%s: there is no CUDA 1.0 enabled GPU on this machine" % self.__class__) if props['major'] < 2 and props['minor'] < 3: g.warn("%s: running on compute capability less than 1.3 is not recommended, detected %s.%s." % (self.__class__, props['major'], props['minor'])) # set the arch to the latest supported by the device if self.tinfo is None: bcmd = "gcc" else: bcmd = self.tinfo.build_cmd if bcmd.startswith('gcc'): bcmd = 'nvcc' if bcmd.find('-arch') == -1: bcmd += ' -arch=sm_' + str(props['major']) + str(props['minor']) if self.perf_params.has_key('CFLAGS') and bcmd.find('@CFLAGS') == -1: bcmd += ' @CFLAGS' self.tinfo.build_cmd = bcmd # return queried device props return props
def readTransfArgs(self, perf_params, transf_args): '''Process the given transformation arguments''' # expected argument names PLATFORM = 'platform' DEVICE = 'device' WORKGROUPS = 'workGroups' WORKITEMS = 'workItemsPerGroup' CB = 'cacheBlocks' STREAMCOUNT = 'streamCount' UIF = 'unrollInner' CLFLAGS = 'clFlags' THREADCOUNT = 'threadCount' BLOCKCOUNT = 'blockCount' VECHINT = 'vecHint' SIZEHINT = 'sizeHint' # default argument values platform = 0 device = 0 workGroups = None workItemsPerGroup = None cacheBlocks = False streamCount = 1 unrollInner = None clFlags = None vecHint = 0 sizeHint = False # iterate over all transformation arguments errors = '' for aname, rhs, line_no in transf_args: # evaluate the RHS expression try: rhs = eval(rhs, perf_params) except Exception, e: g.err( 'orio.module.loop.submodule.opencl.opencl: %s: failed to evaluate the argument expression: %s\n --> %s: %s' % (line_no, rhs, e.__class__.__name__, e)) if aname == PLATFORM: # TODO: validate platform = rhs elif aname == DEVICE: # TODO: validate device = rhs elif aname == WORKGROUPS: # TODO: validate workGroups = rhs elif aname == WORKITEMS: # TODO: validate workItemsPerGroup = rhs elif aname == CB: # TODO: validate cacheBlocks = rhs elif aname == STREAMCOUNT: # TODO: validate streamCount = rhs elif aname == UIF: # TODO: validate unrollInner = rhs elif aname == CLFLAGS: clFlags = rhs elif aname == THREADCOUNT: g.warn( "Interpreting CUDA threadCount as OpenCL workItemsPerGroup" ) workItemsPerGroup = rhs elif aname == BLOCKCOUNT: g.warn("Interpreting CUDA blockCount as OpenCL workGroups") workGroups = rhs elif aname == VECHINT: vecHint = rhs elif aname == SIZEHINT: sizeHint = rhs else: g.err('%s: %s: unrecognized transformation argument: "%s"' % (self.__class__, line_no, aname))
def readTransfArgs(self, perf_params, transf_args): '''Process the given transformation arguments''' # expected argument names PLATFORM = 'platform' DEVICE = 'device' WORKGROUPS = 'workGroups' WORKITEMS = 'workItemsPerGroup' CB = 'cacheBlocks' STREAMCOUNT = 'streamCount' UIF = 'unrollInner' CLFLAGS = 'clFlags' THREADCOUNT = 'threadCount' BLOCKCOUNT = 'blockCount' VECHINT = 'vecHint' SIZEHINT = 'sizeHint' # default argument values platform = 0 device = 0 workGroups = None workItemsPerGroup = None cacheBlocks = False streamCount = 1 unrollInner = None clFlags = None vecHint = 0 sizeHint = False # iterate over all transformation arguments errors = '' for aname, rhs, line_no in transf_args: # evaluate the RHS expression try: rhs = eval(rhs, perf_params) except Exception, e: g.err('orio.module.loop.submodule.opencl.opencl: %s: failed to evaluate the argument expression: %s\n --> %s: %s' % (line_no, rhs,e.__class__.__name__, e)) if aname == PLATFORM: # TODO: validate platform = rhs elif aname == DEVICE: # TODO: validate device = rhs elif aname == WORKGROUPS: # TODO: validate workGroups = rhs elif aname == WORKITEMS: # TODO: validate workItemsPerGroup = rhs elif aname == CB: # TODO: validate cacheBlocks = rhs elif aname == STREAMCOUNT: # TODO: validate streamCount = rhs elif aname == UIF: # TODO: validate unrollInner = rhs elif aname == CLFLAGS: clFlags = rhs elif aname == THREADCOUNT: g.warn("Interpreting CUDA threadCount as OpenCL workItemsPerGroup") workItemsPerGroup = rhs elif aname == BLOCKCOUNT: g.warn("Interpreting CUDA blockCount as OpenCL workGroups") workGroups = rhs elif aname == VECHINT: vecHint = rhs elif aname == SIZEHINT: sizeHint = rhs else: g.err('%s: %s: unrecognized transformation argument: "%s"' % (self.__class__, line_no, aname))