Python Dataset.col примеры использования

Язык программирования: Python

Пространство имен/Пакет: data.dataset

Класс/Тип: Dataset

Метод/Функция: col

Примеров на hotexamples.com: 3

Python Dataset.col - 3 примера найдено. Это лучшие примеры Python кода для data.dataset.Dataset.col, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dataset(30)

load_data(5)

load_image(3)

col(3)

get_classnum(3)

get_class_count(2)

test_labels(2)

test_instances(2)

pick_train_mini_batch(2)

load(2)

get_val_inputs(2)

get_max_movie_id(2)

get_class_names(2)

train_instances(2)

from_list(2)

dev_instances(2)

create_iterator(2)

get(1)

pca(1)

mask_test_after(1)

name(1)

names(1)

nb_batch(1)

num_images(1)

num_trans(1)

num_transactions_hourly(1)

parse_linewise(1)

parse_tfrecord(1)

prepare(1)

_plot_config(1)

load_multivariate(1)

preprocess(1)

preprocess_true_data(1)

remover_atributos(1)

save_images(1)

split(1)

state_data(1)

__len__(1)

__init__(1)

test_train_split(1)

time_between_blocks(1)

load_pcd(1)

load_datasets(1)

load_label(1)

get_test_inputs(1)

get_batch(1)

get_batches_per_epoch(1)

full_shape(1)

filter_roi(1)

get_dados(1)

Пример #1

Показать файл

    def _match_on_cpu_time(self,
                           jm_dataset: Dataset,
                           wm_dataset: Dataset,
                           jm_subset=None,
                           wm_subset=None):
        jmdf = jm_subset if jm_subset is not None else jm_dataset.df
        wmdf = wm_subset if wm_subset is not None else wm_dataset.df

        # Round CPU time to account for rounding errors while matching float values
        jmdf['cpuApprox'] = jmdf[jm_dataset.col(Metric.CPU_TIME)].round()
        wmdf['cpuApprox'] = wmdf[wm_dataset.col(Metric.CPU_TIME)].round()

        jmdf_index = jmdf.index.name
        wmdf_index = wmdf.index.name

        self._prefix_columns(jmdf, 'jmdf_')
        self._prefix_columns(wmdf, 'wmdf_')

        matches = jmdf.reset_index().merge(wmdf.reset_index(),
                                           left_on='jmdf_cpuApprox',
                                           right_on='wmdf_cpuApprox')

        filtered = self._filter_matches(matches,
                                        jm_dataset,
                                        wm_dataset,
                                        jmdf_prefix='jmdf_',
                                        wmdf_prefix='wmdf_')

        perfect_matches = filtered.groupby(jmdf_index).filter(
            lambda x: len(x) == 1)

        return perfect_matches[[jmdf_index, wmdf_index]]

Пример #2

Показать файл

    def _filter_matches(self,
                        matches,
                        jm_dataset: Dataset,
                        wm_dataset,
                        jmdf_prefix='jmdf_',
                        wmdf_prefix='wmdf_'):

        timestamp_metrics = [Metric.START_TIME, Metric.STOP_TIME]

        for metric in timestamp_metrics:
            jmdf_ts_col = jmdf_prefix + jm_dataset.col(metric)
            wmdf_ts_col = wmdf_prefix + wm_dataset.col(metric)
            matches = matches[
                (self._timestamp_diff_series(matches[jmdf_ts_col],
                                             matches[wmdf_ts_col]) <
                 self.timestamp_tolerance)
                # |
                # (matches[jmdf_ts_col].isnull()) | (matches[wmdf_ts_col].isnull())
            ]

        jm_workflow_col = jmdf_prefix + jm_dataset.col(Metric.WORKFLOW)
        wm_workflow_col = wmdf_prefix + wm_dataset.col(Metric.WORKFLOW)

        # Only accept jobs that match in their workflow
        matches = matches[matches[jm_workflow_col] == matches[wm_workflow_col]]

        return matches

Пример #3

Показать файл

    def match_on_workflow(self,
                          jmdf,
                          wmdf,
                          jmset: Dataset,
                          wmset: Dataset,
                          exclusion_limit=200):
        jm_grouped = jmdf.groupby(jmset.col(Metric.WORKFLOW))
        wm_grouped = wmdf.groupby(wmset.col(Metric.WORKFLOW))

        total_compared = 0
        total = jmdf.shape[0]

        matches = {jmdf.index.name: [], wmdf.index.name: []}

        for key, jm_group in jm_grouped:
            try:
                wm_group = wm_grouped.get_group(key)
            except KeyError:
                # Group is not present in other frame
                continue

            if jm_group.empty or wm_group.empty:
                continue

            # Todo Maybe also compare large groups?
            if exclusion_limit > 0 and len(jm_group) > exclusion_limit:
                continue

            logging.debug("Checking for matches in {} WM, {} JM, ".format(
                len(wm_group), len(jm_group)))

            self._prefix_columns(jm_group, 'jmdf_')
            self._prefix_columns(wm_group, 'wmdf_')

            group_match_count = 0
            for jm_index, jm_job in jm_group.iterrows():

                matching_entries = self._filter_by_timestamp(
                    jm_job,
                    wm_group,
                    jmset,
                    wmset,
                    left_prefix='jmdf_',
                    right_prefix='wmdf_')

                if len(matching_entries) == 1:
                    # Perfect match found, insert into match list

                    # The index of the JM entry is already the label of the group
                    matches.get(jmdf.index.name).append(jm_index)

                    # Get the index of the only element in the data frame
                    matches.get(wmdf.index.name).append(
                        matching_entries.index.values[0])

                    group_match_count += 1

            total_compared += len(jm_group)
            logging.debug("Found {} matches (of {} WM, {} JM, ".format(
                group_match_count, len(wm_group), len(jm_group)) +
                          "{:.4}% compared).".format(100 * total_compared /
                                                     total))

        match_df = pd.DataFrame.from_dict(matches)
        return match_df