Python df_add_delta примеры, lisa.datautils.df_add_delta Python примеры использования

Пример #1

0

Показать файл

Файл: frequency.py Проект: credp/lisa

    def df_peripheral_clock_effective_rate(self, clk_name):

        # Note: the kernel still defines a "clock_*" variant for each of these,
        # but it's not actually used anywhere in the code. The new "clk_*"
        # events are the ones we are interested about.
        rate_df = self.trace.df_event('clk_set_rate')
        enable_df = self.trace.df_event('clk_enable').copy()
        disable_df = self.trace.df_event('clk_disable').copy()

        # Add 'state' for enable and disable events
        enable_df['state'] = 1
        disable_df['state'] = 0

        freq = rate_df[rate_df['name'] == clk_name]
        enables = enable_df[enable_df['name'] == clk_name]
        disables = disable_df[disable_df['name'] == clk_name]

        freq = df_merge((freq, enables, disables)).ffill()
        freq['start'] = freq.index
        df_add_delta(freq,
                     col='len',
                     src_col='start',
                     window=self.trace.window,
                     inplace=True)
        freq['effective_rate'] = np.where(freq['state'] == 0, 0, freq['rate'])
        return freq

Пример #2

0

Показать файл

 def _get_active_df(self, cpu):
     """
     :returns: A dataframe that describes the idle status (on/off) of 'cpu'
     """
     active_df = pd.DataFrame(
         self.trace.analysis.idle.signal_cpu_active(cpu), columns=['state'])
     df_add_delta(active_df, inplace=True, window=self.trace.window)
     return active_df

Пример #3

0

Показать файл

    def df_cluster_idle_state_residency(self, cluster):
        """
        Compute time spent by a given cluster in each idle state.

        :param cluster: list of CPU IDs
        :type cluster: list(int)

        :returns: a :class:`pandas.DataFrame` with:

          * Idle states as index
          * A ``time`` column (The time spent in the idle state)
        """
        idle_df = self.df_cpu_idle()

        # Create a dataframe with a column per CPU
        cols = {
            cpu: group['state']
            for cpu, group in idle_df.groupby(
                'cpu',
                sort=False,
                observed=True,
            ) if cpu in cluster
        }
        cpus_df = pd.DataFrame(cols, index=idle_df.index)
        cpus_df.fillna(method='ffill', inplace=True)

        # Ensure accurate time-based sum of state deltas. This will extrapolate
        # the known cluster_state both to the left and the right.
        cpus_df = df_refit_index(cpus_df, window=self.trace.window)

        # Each core in a cluster can be in a different idle state, but the
        # cluster lies in the idle state with lowest ID, that is the shallowest
        # idle state among the idle states of its CPUs
        cluster_state = cpus_df.min(axis='columns')
        cluster_state.name = 'cluster_state'
        df = cluster_state.to_frame()

        # For each state transition, sum the time spent in it
        df_add_delta(df, inplace=True)

        # For each cluster state, take the sum of the delta column.
        # The resulting dataframe is indexed by group keys (cluster_state).
        residency = df.groupby('cluster_state', sort=False,
                               observed=True)['delta'].sum()
        residency.name = 'time'

        residency = residency.to_frame()
        residency.index.name = 'idle_state'
        return residency

Пример #4

0

Показать файл

Файл: tasks.py Проект: Smilence902/lisa

    def df_task_total_residency(self, task):
        """
        DataFrame of a task's execution time on each CPU

        :param task: the task to report runtimes for
        :type task: int or str or tuple(int, str)

        :returns: a :class:`pandas.DataFrame` with:

          * CPU IDs as index
          * A ``runtime`` column (the time the task spent being active)
        """
        cpus = set(range(self.trace.cpus_count))

        df = self.df_task_states(task)
        # Get the correct delta for the window we want.
        df = df_add_delta(df, window=self.trace.window)
        df = df[df.curr_state == TaskState.TASK_ACTIVE]

        residency_df = pd.DataFrame(
            df.groupby("cpu", observed=True, sort=False)["delta"].sum())
        residency_df.rename(columns={"delta": "runtime"}, inplace=True)

        cpus_present = set(residency_df.index.unique())

        for cpu in cpus.difference(cpus_present):
            residency_df.loc[cpu] = 0.

        residency_df.sort_index(inplace=True)

        return residency_df

Пример #5

0

Показать файл

Файл: tasks.py Проект: Smilence902/lisa

    def df_tasks_runtime(self):
        """
        DataFrame of the time each task spent in TASK_ACTIVE (:class:`TaskState`)

        :returns: a :class:`pandas.DataFrame` with:

          * PIDs as index
          * A ``comm`` column (the name of the task)
          * A ``runtime`` column (the time that task spent running)
        """

        runtimes = {}
        for task, pid_df in self._df_tasks_states():
            pid = task.pid
            # Make sure to only look at the relevant portion of the dataframe
            # with the window, since we are going to make a time-based sum
            pid_df = df_refit_index(pid_df, window=self.trace.window)
            pid_df = df_add_delta(pid_df)
            # Resolve the comm to the last name of the PID in that window
            comms = pid_df['comm'].unique()
            comm = comms[-1]
            pid_df = pid_df[pid_df['curr_state'] == TaskState.TASK_ACTIVE]
            runtimes[pid] = (pid_df['delta'].sum(skipna=True), comm)

        df = pd.DataFrame.from_dict(runtimes,
                                    orient="index",
                                    columns=["runtime", 'comm'])

        df.index.name = "pid"
        df.sort_values(by="runtime", ascending=False, inplace=True)

        return df

Пример #6

0

Показать файл

    def df_cpu_idle_state_residency(self, cpu):
        """
        Compute time spent by a given CPU in each idle state.

        :param cpu: CPU ID
        :type cpu: int

        :returns: a :class:`pandas.DataFrame` with:

          * Idle states as index
          * A ``time`` column (The time spent in the idle state)
        """
        idle_df = self.df_cpu_idle(cpu)

        # Ensure accurate time-based sum of state deltas
        idle_df = df_refit_index(idle_df, window=self.trace.window)

        # For each state, sum the time spent in it
        idle_df = df_add_delta(idle_df)

        residency = {
            cols['state']: state_df['delta'].sum()
            for cols, state_df in df_split_signals(idle_df, ['state'])
        }
        df = pd.DataFrame.from_dict(residency,
                                    orient='index',
                                    columns=['time'])
        df.index.name = 'idle_state'
        return df

Пример #7

0

Показать файл

    def df_task_total_residency(self, task):
        """
        DataFrame of a task's execution time on each CPU

        :param task: the task to report runtimes for
        :type task: int or str or tuple(int, str)

        :returns: a :class:`pandas.DataFrame` with:

          * CPU IDs as index
          * A ``runtime`` column (the time the task spent being active)
        """
        df = self.df_task_states(task)
        # Get the correct delta for the window we want.
        df = df_add_delta(df, window=self.trace.window, col='runtime')
        df = df[df['curr_state'] == TaskState.TASK_ACTIVE]

        # For each CPU, sum the time spent on each by each task
        by_cpu = df.groupby('cpu', observed=True, sort=False)
        residency_df = by_cpu['runtime'].sum().to_frame()

        # Add runtime for CPUs that did not appear in the window
        residency_df = residency_df.reindex(
            residency_df.index.union(range(self.trace.cpus_count))
        )
        return residency_df.fillna(0).sort_index()

Пример #8

0

Показать файл

Файл: frequency.py Проект: lukaszluba-arm/lisa

    def _get_frequency_residency(self, cpus):
        """
        Get a DataFrame with per cluster frequency residency, i.e. amount of
        time spent at a given frequency in each cluster.

        :param cpus: A tuple of CPU IDs
        :type cpus: tuple(int)

        :returns: A :class:`pandas.DataFrame` with:

          * A ``total_time`` column (the total time spent at a frequency)
          * A ``active_time`` column (the non-idle time spent at a frequency)
        """
        freq_df = self.df_cpus_frequency()
        # Assumption: all CPUs in a cluster run at the same frequency, i.e. the
        # frequency is scaled per-cluster not per-CPU. Hence, we can limit the
        # cluster frequencies data to a single CPU.
        self._check_freq_domain_coherency(cpus)

        cluster_freqs = freq_df[freq_df.cpu == cpus[0]]

        # Compute TOTAL Time
        cluster_freqs = df_add_delta(cluster_freqs,
                                     col="total_time",
                                     window=self.trace.window)
        time_df = cluster_freqs[["total_time",
                                 "frequency"]].groupby('frequency',
                                                       observed=True,
                                                       sort=False).sum()

        # Compute ACTIVE Time
        cluster_active = self.trace.analysis.idle.signal_cluster_active(cpus)

        # In order to compute the active time spent at each frequency we
        # multiply 2 square waves:
        # - cluster_active, a square wave of the form:
        #     cluster_active[t] == 1 if at least one CPU is reported to be
        #                            non-idle by CPUFreq at time t
        #     cluster_active[t] == 0 otherwise
        # - freq_active, square wave of the form:
        #     freq_active[t] == 1 if at time t the frequency is f
        #     freq_active[t] == 0 otherwise
        available_freqs = sorted(cluster_freqs.frequency.unique())
        cluster_freqs = cluster_freqs.join(
            cluster_active.to_frame(name='active'), how='outer')
        cluster_freqs.fillna(method='ffill', inplace=True)
        nonidle_time = []
        for freq in available_freqs:
            freq_active = cluster_freqs.frequency.apply(lambda x: 1
                                                        if x == freq else 0)
            active_t = cluster_freqs.active * freq_active
            # Compute total time by integrating the square wave
            nonidle_time.append(series_integrate(active_t))

        time_df["active_time"] = pd.DataFrame(index=available_freqs,
                                              data=nonidle_time)
        return time_df

Пример #9

0

Показать файл

    def df_overutilized(self):
        """
        Get overutilized events

        :returns: A :class:`pandas.DataFrame` with:

          * A ``overutilized`` column (the overutilized status at a given time)
          * A ``len`` column (the time spent in that overutilized status)
        """
        # Build sequence of overutilization "bands"
        df = self.trace.df_event('sched_overutilized')
        df = df_add_delta(df, col='len', window=self.trace.window)
        # Ignore the last line added by df_refit_index() with a NaN len
        df = df.iloc[:-1]
        # Remove duplicated index events
        df = df_deduplicate(df, keep='last', consecutives=True)
        return df[['len', 'overutilized']]

Пример #10

0

Показать файл

    def df_overutilized(self):
        """
        Get overutilized events

        :returns: A :class:`pandas.DataFrame` with:

          * A ``overutilized`` column (the overutilized status at a given time)
          * A ``len`` column (the time spent in that overutilized status)
        """
        # Build sequence of overutilization "bands"
        df = self.trace.df_event('sched_overutilized')
        # There might be a race between multiple CPUs to emit the
        # sched_overutilized event, so get rid of duplicated events
        df = df_deduplicate(df,
                            cols=['overutilized'],
                            keep='first',
                            consecutives=True)
        df = df_add_delta(df, col='len', window=self.trace.window)
        # Ignore the last line added by df_refit_index() with a NaN len
        df = df.iloc[:-1]
        return df[['len', 'overutilized']]

Пример #11

0

Показать файл

Файл: tasks.py Проект: Smilence902/lisa

    def df_task_activation(self,
                           task,
                           cpu=None,
                           active_value=1,
                           sleep_value=0,
                           preempted_value=np.NaN):
        """
        DataFrame of a task's active time on a given CPU

        :param task: the task to report activations of
        :type task: int or str or tuple(int, str)

        :param cpu: the CPUs to look at. If ``None``, all CPUs will be used.
        :type cpu: int or None

        :param active_value: the value to use in the series when task is
            active.
        :type active_value: float

        :param sleep_value: the value to use in the series when task is
            sleeping.
        :type sleep_value: float

        :param preempted_value: the value to use in the series when task is
            preempted (runnable but not actually executing).
        :type sleep_value: float

        :returns: a :class:`pandas.DataFrame` with:

          * A timestamp as index
          * A ``active`` column, containing ``active_value`` when the task is
            running, ``sleep_value`` when sleeping, and ``preempted_value``
            otherwise.
          * A ``cpu`` column with the CPU the task was running on.
          * A ``duration`` column containing the duration of the current sleep or activation.
          * A ``duty_cycle`` column containing the duty cycle in ``[0...1]`` of
            the task, updated at each pair of activation and sleep.
        """

        df = self.df_task_states(task)

        def f(state):
            if state == TaskState.TASK_ACTIVE:
                return active_value
            # TASK_RUNNING happens when a task is preempted (so it's not
            # TASK_ACTIVE anymore but still runnable)
            elif state == TaskState.TASK_RUNNING:
                # Return NaN regardless of preempted_value, since some below
                # code relies on that
                return np.NaN
            else:
                return sleep_value

        if cpu is not None:
            df = df[df['cpu'] == cpu]

        df = df.copy()

        # TASK_WAKING can just be removed. The delta will then be computed
        # without it, which means the time spent in WAKING state will be
        # accounted into the previous state.
        df = df[df['curr_state'] != TaskState.TASK_WAKING]

        df['active'] = df['curr_state'].map(f)
        df = df[['active', 'cpu']]

        # Only keep first occurence of each adjacent duplicates, since we get
        # events when the signal changes
        df = df_deduplicate(df, consecutives=True, keep='first')

        # Once we removed the duplicates, we can compute the time spent while sleeping or activating
        df_add_delta(df, col='duration', inplace=True)

        # Make a dataframe where the rows corresponding to preempted time are removed
        preempt_free_df = df.dropna().copy()

        # Merge consecutive activations' duration. They could have been
        # split in two by a bit of preemption, and we don't want that to
        # affect the duty cycle.
        df_combine_duplicates(preempt_free_df,
                              cols=['active'],
                              func=lambda df: df['duration'].sum(),
                              output_col='duration',
                              inplace=True)

        sleep = preempt_free_df[preempt_free_df['active'] ==
                                sleep_value]['duration']
        active = preempt_free_df[preempt_free_df['active'] ==
                                 active_value]['duration']
        # Pair an activation time with it's following sleep time
        sleep = sleep.reindex(active.index, method='bfill')
        duty_cycle = active / (active + sleep)

        df['duty_cycle'] = duty_cycle
        df['duty_cycle'].fillna(inplace=True, method='ffill')

        if not np.isnan(preempted_value):
            df['active'].fillna(preempted_value, inplace=True)

        return df

Python df_add_delta примеры использования