class ClearBufferCache(Plugin): """Plugin that clears the buffer cache before a query is run.""" __name__ = "ClearBufferCache" def __init__(self, *args, **kwargs): self.cluster_controller = ClusterController(*args, **kwargs) Plugin.__init__(self, *args, **kwargs) def run_pre_hook(self, context=None): # Drop the page cache (drop_caches=1). We'll leave the inodes and dentries # since that is not what we are testing and it causes excessive performance # variability. cmd = "sysctl -w vm.drop_caches=1 vm.drop_caches=0" self.cluster_controller.deprecated_run_cmd(cmd)
class VTunePlugin(Plugin): """ This plugin runs Intel's VTune amplifier Before the query is executed, the plugin starts VTune collection. After the query has completed, the plugin stops the collection. """ __name__ = "VTunePlugin" #TODO: We should make these configurable VTUNE_PATH = '/opt/intel/vtune_amplifier_xe_2013/' TARGET_PROCESS = 'impalad' RESULT_DIR_BASE = '/var/log/impala/vtune/' + '%s' + '/db=%s' RESULT_QUERY_SCOPE = '_query=%s_format=%s_iteration=%i' KILL_CMD = 'ps aux | grep vtune | grep -v grep | awk \'{print $2}\' | xargs kill -9' def __init__(self, *args, **kwargs): self.cluster_controller = ClusterController(*args, **kwargs) Plugin.__init__(self, *args, **kwargs) # This is the unique run identifier self.run_tag = environ.get('BUILD_TAG', datetime.datetime.now()) # This method checks to ensure that VTune is installed in the expected path self._check_path_on_hosts() def run_pre_hook(self, context): # Source VTune variables and build the correct command string. For the workload # scope, the database name is added to the result path. For the query scope, the # query name and iteration is also added. result_dir = self.RESULT_DIR_BASE if context.get('scope') == 'Query': result_dir = result_dir + self.RESULT_QUERY_SCOPE pre_cmd = ('echo 0 > /proc/sys/kernel/nmi_watchdog\n' 'source ' + self.VTUNE_PATH + 'amplxe-vars.sh\n' 'amplxe-cl -collect advanced-hotspots ' '-result-dir=' + result_dir + ' -target-process=' + self.TARGET_PROCESS) table_format_str = context.get('table_format', 'UNKNOWN').replace('/', '-') pre_cmd = pre_cmd % (self.run_tag, context.get('db_name', 'UNKNOWN'), context.get('short_query_name', 'UNKNOWN'), table_format_str,context.get('iteration', 1)) self.thread = threading.Thread(target=self.cluster_controller.deprecated_run_cmd, args=[pre_cmd]) self.thread.start() # TODO: Test whether this is a good time to wait # Because we start this colection asychronously, we need to ensure that all the # machines are running. For now this is simplier than doing the full check that we # do in the post hook. time.sleep(2) def run_post_hook(self, context): # Source VTune variables and build the correct command string. This process is # identical to that in run_pre_hook() result_dir = self.RESULT_DIR_BASE if context.get('scope') == 'Query': result_dir = result_dir + self.RESULT_QUERY_SCOPE post_cmd = ('source ' + self.VTUNE_PATH + 'amplxe-vars.sh \n' 'amplxe-cl -command stop -result-dir=' + result_dir) table_format_str = context.get('table_format', 'UNKNOWN').replace('/', '-') # TODO: Fix the context dict to remove the ambiguity of the variable name # new_query_name post_cmd = post_cmd % (self.run_tag, context.get('db_name', 'UNKNOWN'), context.get('short_query_name', 'UNKNOWN'), table_format_str, context.get('iteration', 1)) self.cluster_controller.deprecated_run_cmd(post_cmd) # Wait for reports to generate and kill hosts that are hanging around self._wait_for_completion(2) def _check_path_on_hosts(self): path_check_cmd = 'if [ -d "%s" ]; then echo "exists"\nfi' % (self.VTUNE_PATH) host_check_dict = self.cluster_controller.deprecated_run_cmd(path_check_cmd) bad_hosts = [k for k in host_check_dict.keys() if host_check_dict[k] != "exists"] if bad_hosts: raise RuntimeError('VTune is not installed in the expected path for hosts %s' % ",".join(bad_hosts)) def _wait_for_completion(self, timeout): """ Waits for VTune reports to finish generating. On large datasets it can take time for the reports to generate. This method waits for a timeout period, checking to see if any machine in the cluster is still running a VTune command. After the timeout period, _kill_vtune() is called which kills any unterminated VTune commands. """ grep_dict = {} reports_done = True finish_time = datetime.datetime.now() + datetime.timedelta(minutes=timeout) while ((reports_done) and (datetime.datetime.now() < finish_time)): grep_dict = self.cluster_controller.deprecated_run_cmd( 'ps aux|grep vtune|grep -v grep') reports_done = any(map(self.__is_not_none_or_empty_str, grep_dict.values())) # TODO: Investigate a better length of time for the sleep period between checks time.sleep(5) self._kill_vtune(grep_dict) def _kill_vtune(self, host_dict): # This method kills threads that are still hanging around after timeout kill_list = filter(self.__is_not_none_or_empty_str, host_dict.keys()) if kill_list: self.cluster_controller.deprecated_run_cmd(self.KILL_CMD, hosts=kill_list) def __is_not_none_or_empty_str(self, s): return s != None and s != ''
class VTunePlugin(Plugin): """ This plugin runs Intel's VTune amplifier Before the query is executed, the plugin starts VTune collection. After the query has completed, the plugin stops the collection. """ __name__ = "VTunePlugin" #TODO: We should make these configurable VTUNE_PATH = '/opt/intel/vtune_amplifier_xe_2013/' TARGET_PROCESS = 'impalad' RESULT_DIR_BASE = '/var/log/impala/vtune/' + '%s' + '/db=%s' RESULT_QUERY_SCOPE = '_query=%s_format=%s_iteration=%i' KILL_CMD = 'ps aux | grep vtune | grep -v grep | awk \'{print $2}\' | xargs kill -9' def __init__(self, *args, **kwargs): self.cluster_controller = ClusterController(*args, **kwargs) Plugin.__init__(self, *args, **kwargs) # This is the unique run identifier self.run_tag = environ.get('BUILD_TAG', datetime.datetime.now()) # This method checks to ensure that VTune is installed in the expected path self._check_path_on_hosts() def run_pre_hook(self, context): # Source VTune variables and build the correct command string. For the workload # scope, the database name is added to the result path. For the query scope, the # query name and iteration is also added. result_dir = self.RESULT_DIR_BASE if context.get('scope') == 'Query': result_dir = result_dir + self.RESULT_QUERY_SCOPE pre_cmd = ('echo 0 > /proc/sys/kernel/nmi_watchdog\n' 'source ' + self.VTUNE_PATH + 'amplxe-vars.sh\n' 'amplxe-cl -collect advanced-hotspots ' '-result-dir=' + result_dir + ' -target-process=' + self.TARGET_PROCESS) table_format_str = context.get('table_format', 'UNKNOWN').replace('/', '-') pre_cmd = pre_cmd % (self.run_tag, context.get( 'db_name', 'UNKNOWN'), context.get('short_query_name', 'UNKNOWN'), table_format_str, context.get('iteration', 1)) self.thread = threading.Thread( target=self.cluster_controller.deprecated_run_cmd, args=[pre_cmd]) self.thread.start() # TODO: Test whether this is a good time to wait # Because we start this colection asychronously, we need to ensure that all the # machines are running. For now this is simplier than doing the full check that we # do in the post hook. time.sleep(2) def run_post_hook(self, context): # Source VTune variables and build the correct command string. This process is # identical to that in run_pre_hook() result_dir = self.RESULT_DIR_BASE if context.get('scope') == 'Query': result_dir = result_dir + self.RESULT_QUERY_SCOPE post_cmd = ('source ' + self.VTUNE_PATH + 'amplxe-vars.sh \n' 'amplxe-cl -command stop -result-dir=' + result_dir) table_format_str = context.get('table_format', 'UNKNOWN').replace('/', '-') # TODO: Fix the context dict to remove the ambiguity of the variable name # new_query_name post_cmd = post_cmd % (self.run_tag, context.get( 'db_name', 'UNKNOWN'), context.get('short_query_name', 'UNKNOWN'), table_format_str, context.get('iteration', 1)) self.cluster_controller.deprecated_run_cmd(post_cmd) # Wait for reports to generate and kill hosts that are hanging around self._wait_for_completion(2) def _check_path_on_hosts(self): path_check_cmd = 'if [ -d "%s" ]; then echo "exists"\nfi' % ( self.VTUNE_PATH) host_check_dict = self.cluster_controller.deprecated_run_cmd( path_check_cmd) bad_hosts = [ k for k in host_check_dict.keys() if host_check_dict[k] != "exists" ] if bad_hosts: raise RuntimeError( 'VTune is not installed in the expected path for hosts %s' % ",".join(bad_hosts)) def _wait_for_completion(self, timeout): """ Waits for VTune reports to finish generating. On large datasets it can take time for the reports to generate. This method waits for a timeout period, checking to see if any machine in the cluster is still running a VTune command. After the timeout period, _kill_vtune() is called which kills any unterminated VTune commands. """ grep_dict = {} reports_done = True finish_time = datetime.datetime.now() + datetime.timedelta( minutes=timeout) while ((reports_done) and (datetime.datetime.now() < finish_time)): grep_dict = self.cluster_controller.deprecated_run_cmd( 'ps aux|grep vtune|grep -v grep') reports_done = any( map(self.__is_not_none_or_empty_str, grep_dict.values())) # TODO: Investigate a better length of time for the sleep period between checks time.sleep(5) self._kill_vtune(grep_dict) def _kill_vtune(self, host_dict): # This method kills threads that are still hanging around after timeout kill_list = filter(self.__is_not_none_or_empty_str, host_dict.keys()) if kill_list: self.cluster_controller.deprecated_run_cmd(self.KILL_CMD, hosts=kill_list) def __is_not_none_or_empty_str(self, s): return s != None and s != ''