def df_peripheral_clock_effective_rate(self, clk_name): # Note: the kernel still defines a "clock_*" variant for each of these, # but it's not actually used anywhere in the code. The new "clk_*" # events are the ones we are interested about. rate_df = self.trace.df_event('clk_set_rate') enable_df = self.trace.df_event('clk_enable').copy() disable_df = self.trace.df_event('clk_disable').copy() # Add 'state' for enable and disable events enable_df['state'] = 1 disable_df['state'] = 0 freq = rate_df[rate_df['name'] == clk_name] enables = enable_df[enable_df['name'] == clk_name] disables = disable_df[disable_df['name'] == clk_name] freq = df_merge((freq, enables, disables)).ffill() freq['start'] = freq.index df_add_delta(freq, col='len', src_col='start', window=self.trace.window, inplace=True) freq['effective_rate'] = np.where(freq['state'] == 0, 0, freq['rate']) return freq
def _get_active_df(self, cpu): """ :returns: A dataframe that describes the idle status (on/off) of 'cpu' """ active_df = pd.DataFrame( self.trace.analysis.idle.signal_cpu_active(cpu), columns=['state']) df_add_delta(active_df, inplace=True, window=self.trace.window) return active_df
def df_cluster_idle_state_residency(self, cluster): """ Compute time spent by a given cluster in each idle state. :param cluster: list of CPU IDs :type cluster: list(int) :returns: a :class:`pandas.DataFrame` with: * Idle states as index * A ``time`` column (The time spent in the idle state) """ idle_df = self.df_cpu_idle() # Create a dataframe with a column per CPU cols = { cpu: group['state'] for cpu, group in idle_df.groupby( 'cpu', sort=False, observed=True, ) if cpu in cluster } cpus_df = pd.DataFrame(cols, index=idle_df.index) cpus_df.fillna(method='ffill', inplace=True) # Ensure accurate time-based sum of state deltas. This will extrapolate # the known cluster_state both to the left and the right. cpus_df = df_refit_index(cpus_df, window=self.trace.window) # Each core in a cluster can be in a different idle state, but the # cluster lies in the idle state with lowest ID, that is the shallowest # idle state among the idle states of its CPUs cluster_state = cpus_df.min(axis='columns') cluster_state.name = 'cluster_state' df = cluster_state.to_frame() # For each state transition, sum the time spent in it df_add_delta(df, inplace=True) # For each cluster state, take the sum of the delta column. # The resulting dataframe is indexed by group keys (cluster_state). residency = df.groupby('cluster_state', sort=False, observed=True)['delta'].sum() residency.name = 'time' residency = residency.to_frame() residency.index.name = 'idle_state' return residency
def df_task_total_residency(self, task): """ DataFrame of a task's execution time on each CPU :param task: the task to report runtimes for :type task: int or str or tuple(int, str) :returns: a :class:`pandas.DataFrame` with: * CPU IDs as index * A ``runtime`` column (the time the task spent being active) """ cpus = set(range(self.trace.cpus_count)) df = self.df_task_states(task) # Get the correct delta for the window we want. df = df_add_delta(df, window=self.trace.window) df = df[df.curr_state == TaskState.TASK_ACTIVE] residency_df = pd.DataFrame( df.groupby("cpu", observed=True, sort=False)["delta"].sum()) residency_df.rename(columns={"delta": "runtime"}, inplace=True) cpus_present = set(residency_df.index.unique()) for cpu in cpus.difference(cpus_present): residency_df.loc[cpu] = 0. residency_df.sort_index(inplace=True) return residency_df
def df_tasks_runtime(self): """ DataFrame of the time each task spent in TASK_ACTIVE (:class:`TaskState`) :returns: a :class:`pandas.DataFrame` with: * PIDs as index * A ``comm`` column (the name of the task) * A ``runtime`` column (the time that task spent running) """ runtimes = {} for task, pid_df in self._df_tasks_states(): pid = task.pid # Make sure to only look at the relevant portion of the dataframe # with the window, since we are going to make a time-based sum pid_df = df_refit_index(pid_df, window=self.trace.window) pid_df = df_add_delta(pid_df) # Resolve the comm to the last name of the PID in that window comms = pid_df['comm'].unique() comm = comms[-1] pid_df = pid_df[pid_df['curr_state'] == TaskState.TASK_ACTIVE] runtimes[pid] = (pid_df['delta'].sum(skipna=True), comm) df = pd.DataFrame.from_dict(runtimes, orient="index", columns=["runtime", 'comm']) df.index.name = "pid" df.sort_values(by="runtime", ascending=False, inplace=True) return df
def df_cpu_idle_state_residency(self, cpu): """ Compute time spent by a given CPU in each idle state. :param cpu: CPU ID :type cpu: int :returns: a :class:`pandas.DataFrame` with: * Idle states as index * A ``time`` column (The time spent in the idle state) """ idle_df = self.df_cpu_idle(cpu) # Ensure accurate time-based sum of state deltas idle_df = df_refit_index(idle_df, window=self.trace.window) # For each state, sum the time spent in it idle_df = df_add_delta(idle_df) residency = { cols['state']: state_df['delta'].sum() for cols, state_df in df_split_signals(idle_df, ['state']) } df = pd.DataFrame.from_dict(residency, orient='index', columns=['time']) df.index.name = 'idle_state' return df
def df_task_total_residency(self, task): """ DataFrame of a task's execution time on each CPU :param task: the task to report runtimes for :type task: int or str or tuple(int, str) :returns: a :class:`pandas.DataFrame` with: * CPU IDs as index * A ``runtime`` column (the time the task spent being active) """ df = self.df_task_states(task) # Get the correct delta for the window we want. df = df_add_delta(df, window=self.trace.window, col='runtime') df = df[df['curr_state'] == TaskState.TASK_ACTIVE] # For each CPU, sum the time spent on each by each task by_cpu = df.groupby('cpu', observed=True, sort=False) residency_df = by_cpu['runtime'].sum().to_frame() # Add runtime for CPUs that did not appear in the window residency_df = residency_df.reindex( residency_df.index.union(range(self.trace.cpus_count)) ) return residency_df.fillna(0).sort_index()
def _get_frequency_residency(self, cpus): """ Get a DataFrame with per cluster frequency residency, i.e. amount of time spent at a given frequency in each cluster. :param cpus: A tuple of CPU IDs :type cpus: tuple(int) :returns: A :class:`pandas.DataFrame` with: * A ``total_time`` column (the total time spent at a frequency) * A ``active_time`` column (the non-idle time spent at a frequency) """ freq_df = self.df_cpus_frequency() # Assumption: all CPUs in a cluster run at the same frequency, i.e. the # frequency is scaled per-cluster not per-CPU. Hence, we can limit the # cluster frequencies data to a single CPU. self._check_freq_domain_coherency(cpus) cluster_freqs = freq_df[freq_df.cpu == cpus[0]] # Compute TOTAL Time cluster_freqs = df_add_delta(cluster_freqs, col="total_time", window=self.trace.window) time_df = cluster_freqs[["total_time", "frequency"]].groupby('frequency', observed=True, sort=False).sum() # Compute ACTIVE Time cluster_active = self.trace.analysis.idle.signal_cluster_active(cpus) # In order to compute the active time spent at each frequency we # multiply 2 square waves: # - cluster_active, a square wave of the form: # cluster_active[t] == 1 if at least one CPU is reported to be # non-idle by CPUFreq at time t # cluster_active[t] == 0 otherwise # - freq_active, square wave of the form: # freq_active[t] == 1 if at time t the frequency is f # freq_active[t] == 0 otherwise available_freqs = sorted(cluster_freqs.frequency.unique()) cluster_freqs = cluster_freqs.join( cluster_active.to_frame(name='active'), how='outer') cluster_freqs.fillna(method='ffill', inplace=True) nonidle_time = [] for freq in available_freqs: freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == freq else 0) active_t = cluster_freqs.active * freq_active # Compute total time by integrating the square wave nonidle_time.append(series_integrate(active_t)) time_df["active_time"] = pd.DataFrame(index=available_freqs, data=nonidle_time) return time_df
def df_overutilized(self): """ Get overutilized events :returns: A :class:`pandas.DataFrame` with: * A ``overutilized`` column (the overutilized status at a given time) * A ``len`` column (the time spent in that overutilized status) """ # Build sequence of overutilization "bands" df = self.trace.df_event('sched_overutilized') df = df_add_delta(df, col='len', window=self.trace.window) # Ignore the last line added by df_refit_index() with a NaN len df = df.iloc[:-1] # Remove duplicated index events df = df_deduplicate(df, keep='last', consecutives=True) return df[['len', 'overutilized']]
def df_overutilized(self): """ Get overutilized events :returns: A :class:`pandas.DataFrame` with: * A ``overutilized`` column (the overutilized status at a given time) * A ``len`` column (the time spent in that overutilized status) """ # Build sequence of overutilization "bands" df = self.trace.df_event('sched_overutilized') # There might be a race between multiple CPUs to emit the # sched_overutilized event, so get rid of duplicated events df = df_deduplicate(df, cols=['overutilized'], keep='first', consecutives=True) df = df_add_delta(df, col='len', window=self.trace.window) # Ignore the last line added by df_refit_index() with a NaN len df = df.iloc[:-1] return df[['len', 'overutilized']]
def df_task_activation(self, task, cpu=None, active_value=1, sleep_value=0, preempted_value=np.NaN): """ DataFrame of a task's active time on a given CPU :param task: the task to report activations of :type task: int or str or tuple(int, str) :param cpu: the CPUs to look at. If ``None``, all CPUs will be used. :type cpu: int or None :param active_value: the value to use in the series when task is active. :type active_value: float :param sleep_value: the value to use in the series when task is sleeping. :type sleep_value: float :param preempted_value: the value to use in the series when task is preempted (runnable but not actually executing). :type sleep_value: float :returns: a :class:`pandas.DataFrame` with: * A timestamp as index * A ``active`` column, containing ``active_value`` when the task is running, ``sleep_value`` when sleeping, and ``preempted_value`` otherwise. * A ``cpu`` column with the CPU the task was running on. * A ``duration`` column containing the duration of the current sleep or activation. * A ``duty_cycle`` column containing the duty cycle in ``[0...1]`` of the task, updated at each pair of activation and sleep. """ df = self.df_task_states(task) def f(state): if state == TaskState.TASK_ACTIVE: return active_value # TASK_RUNNING happens when a task is preempted (so it's not # TASK_ACTIVE anymore but still runnable) elif state == TaskState.TASK_RUNNING: # Return NaN regardless of preempted_value, since some below # code relies on that return np.NaN else: return sleep_value if cpu is not None: df = df[df['cpu'] == cpu] df = df.copy() # TASK_WAKING can just be removed. The delta will then be computed # without it, which means the time spent in WAKING state will be # accounted into the previous state. df = df[df['curr_state'] != TaskState.TASK_WAKING] df['active'] = df['curr_state'].map(f) df = df[['active', 'cpu']] # Only keep first occurence of each adjacent duplicates, since we get # events when the signal changes df = df_deduplicate(df, consecutives=True, keep='first') # Once we removed the duplicates, we can compute the time spent while sleeping or activating df_add_delta(df, col='duration', inplace=True) # Make a dataframe where the rows corresponding to preempted time are removed preempt_free_df = df.dropna().copy() # Merge consecutive activations' duration. They could have been # split in two by a bit of preemption, and we don't want that to # affect the duty cycle. df_combine_duplicates(preempt_free_df, cols=['active'], func=lambda df: df['duration'].sum(), output_col='duration', inplace=True) sleep = preempt_free_df[preempt_free_df['active'] == sleep_value]['duration'] active = preempt_free_df[preempt_free_df['active'] == active_value]['duration'] # Pair an activation time with it's following sleep time sleep = sleep.reindex(active.index, method='bfill') duty_cycle = active / (active + sleep) df['duty_cycle'] = duty_cycle df['duty_cycle'].fillna(inplace=True, method='ffill') if not np.isnan(preempted_value): df['active'].fillna(preempted_value, inplace=True) return df