示例#1
0
def source_selector(parent, modality="all", location="all"):
    locations_set = set(get_ancestral_metadata(parent, "locations"))
    assert location in locations_set or location == "all", f"Location {location} not in {locations_set}"

    modality_set = set(get_ancestral_metadata(parent, "modalities"))
    assert modality in modality_set or modality == "all", f"Modality {modality} not in {modality_set}"

    loc, mod = location, modality
    root = parent / f"{loc=}-{mod=}"

    # Prepare a set of viable outputs
    valid_locations = set()
    for pair in parent.meta["sources"]:
        loc, mod = pair["loc"], pair["mod"]
        good_location = location == "all" or pair["loc"] == location
        good_modality = modality == "all" or pair["mod"] == modality
        if good_location and good_modality:
            valid_locations.update({Key(f"{loc=}-{mod=}")})

    # Aggregate all relevant sources
    selected = 0
    for key, node in parent.outputs.items():
        if key in valid_locations:
            selected += 1
            root.acquire_node(key=key, node=node)

    if not selected:
        logger.exception(
            f"No wearable keys found in {sorted(parent.outputs.keys())}")
        raise KeyError

    return root
示例#2
0
def window(parent, win_len, win_inc):
    root = parent / f"{win_len=:03.2f}-{win_inc=:03.2f}"

    fs = get_ancestral_metadata(root, "fs")

    kwargs = dict(index=parent.index["index"],
                  win_len=win_len,
                  win_inc=win_inc,
                  fs=fs)

    # Build index outputs
    for key, node in parent.index.items():
        root.instantiate_node(key=key,
                              func=PartitionByTrial(window_index),
                              kwargs=dict(data=node, **kwargs),
                              backend="pandas")

    # Build Data outputs
    for key, node in parent.outputs.items():
        root.instantiate_node(
            key=key,
            func=PartitionByTrial(window_data),
            kwargs=dict(data=node, **kwargs),
            backend="none",
        )

    return root
示例#3
0
def resample(parent, fs_new):
    fs_old = get_ancestral_metadata(parent, "fs")

    root = parent / f"{fs_new}Hz"
    root.meta.insert("fs", fs_new)

    kwargs = dict(fs_old=fs_old, fs_new=fs_new)

    if fs_old != fs_new:
        # Only compute indexes and outputs if the sample rate has changed
        for key, node in parent.index.items():
            root.instantiate_node(
                key=key,
                backend="pandas",
                func=PartitionByTrial(resample_metadata),
                kwargs=dict(index=parent.index["index"],
                            data=node,
                            is_index="index" in str(key),
                            **kwargs),
            )

        for key, node in parent.outputs.items():
            root.instantiate_node(
                key=key,
                func=PartitionByTrial(resample_data),
                kwargs=dict(index=parent.index["index"], data=node, **kwargs),
            )

    return root
def statistical_features(parent):
    """
    There are two feature categories defined here:
      1. Time domain
      2. Frequency domain

    And these get mapped from transformed data from two sources:
      1. Acceleration
      2. Gyroscope

    Assuming these two sources have gone through some body/gravity transformations
    (eg from src.transformations.body_grav_filt) there will actually be several
    more sources, eg:
      1. accel-body
      2. accel-body-jerk
      3. accel-body-jerk
      4. accel-grav
      5. gyro-body
      6. gyro-body-jerk
      7. gyro-body-jerk

    With more data sources this list will grows quickly.

    The feature types (time and frequency domain) are mapped to the transformed
    sources in a particular way. For example, the frequency domain features are
    *not* calculated on the gravity data sources. The loop below iterates through
    all of the outputs of the previous node in the graph, and the logic within
    the loop manages the correct mapping of functions to sources.

    Consult with the dataset table (tables/datasets.md) and see anguita2013 for
    details.
    """

    root = parent / "statistical_features"

    fs = get_ancestral_metadata(root, "fs")

    accel_key = "mod='accel'"
    gyro_key = "mod='gyro'"
    mag_key = "mod='mag'"

    for key, node in parent.outputs.items():
        key_td = f"{key}-feat='td'"
        key_fd = f"{key}-feat='fd'"

        t_kwargs = dict(data=node)
        f_kwargs = dict(data=node, fs=fs)

        if accel_key in key:
            root.instantiate_node(key=key_td, func=t_feat, kwargs=t_kwargs, backend="numpy")
            if "grav" not in key:
                root.instantiate_node(key=key_fd, func=f_feat, kwargs=f_kwargs, backend="numpy")
        if gyro_key in key or mag_key in key:
            root.instantiate_node(key=key_td, func=t_feat, kwargs=t_kwargs, backend="numpy")
            root.instantiate_node(key=key_fd, func=f_feat, kwargs=f_kwargs, backend="numpy")

    return root.instantiate_node(
        key="features", func=np.concatenate, args=[sorted_node_values(root.outputs)], kwargs=dict(axis=1)
    )
示例#5
0
def basic_har(
    #
    # Dataset
    dataset_name="pamap2",
    #
    # Representation sources
    modality="all",
    location="all",
    #
    # Task/split
    task_name="har",
    data_partition="predefined",
    #
    # Windowification
    fs_new=33,
    win_len=3,
    win_inc=1,
    #
    # Features
    feat_name="ecdf",
    clf_name="rf",
    #
    # Embedding visualisation
    viz=False,
    evaluate=False,
):
    dataset = dataset_importer(dataset_name)

    # Resample, filter and window the raw sensor data
    wear_windowed = get_windowed_wearables(dataset=dataset,
                                           modality=modality,
                                           location=location,
                                           fs_new=fs_new,
                                           win_len=win_len,
                                           win_inc=win_inc)

    # Extract features
    features = get_features(feat_name=feat_name, windowed_data=wear_windowed)

    # Visualise the feature embeddings
    if viz:
        umap_embedding(features, task_name=task_name).evaluate()

    # Get classifier params
    models = dict()
    train_test_splits = get_ancestral_metadata(
        features, "data_partitions")[data_partition]
    for train_test_split in randomised_order(train_test_splits):
        models[train_test_split] = get_classifier(
            clf_name=clf_name,
            feature_node=features,
            task_name=task_name,
            data_partition=data_partition,
            evaluate=evaluate,
            train_test_split=train_test_split,
        )

    return features, models
示例#6
0
def basic_ensemble(
    dataset_name="anguita2013",
    modality="all",
    location="all",
    task_name="har",
    feat_name="ecdf",
    data_partition="predefined",
    fs_new=33,
    win_len=3,
    win_inc=1,
):
    dataset = dataset_importer(dataset_name)

    windowed_data = get_windowed_wearables(dataset=dataset,
                                           modality=modality,
                                           location=location,
                                           fs_new=fs_new,
                                           win_len=win_len,
                                           win_inc=win_inc)

    models = dict()

    train_test_splits = get_ancestral_metadata(
        windowed_data, "data_partitions")[data_partition]
    for train_test_split in randomised_order(train_test_splits):
        models[train_test_split] = ensemble_classifier(
            feat_name=feat_name,
            task_name=task_name,
            data_partition=data_partition,
            windowed_data=windowed_data,
            train_test_split=train_test_split,
            clf_names=["sgd", "lr", "rf"],
            evaluate=True,
        )

    return models
示例#7
0
def har_chain(
    test_dataset="anguita2013",
    fs_new=33,
    win_len=3,
    win_inc=1,
    task_name="har",
    data_partition="predefined",
    feat_name="ecdf",
    clf_name="sgd",
    evaluate=False,
):
    # Make metadata for the experiment
    kwargs = dict(fs_new=fs_new,
                  win_len=win_len,
                  win_inc=win_inc,
                  task_name=task_name,
                  feat_name=feat_name,
                  clf_name=clf_name)

    dataset_alignment = dict(
        anguita2013=dict(dataset_name="anguita2013",
                         location="waist",
                         modality="accel"),
        pamap2=dict(dataset_name="pamap2", location="chest", modality="accel"),
        uschad=dict(dataset_name="uschad", location="waist", modality="accel"),
    )

    # Extract the representation for the test dataset
    test_dataset = dataset_alignment.pop(test_dataset)
    features, test_models = basic_har(data_partition="predefined",
                                      **test_dataset,
                                      **kwargs)

    # Instantiate the root directory
    root = features.graph / f"chained-from-{sorted(dataset_alignment.keys())}"

    # Build up the list of models from aux datasets
    auxiliary_models = {
        train_test_split: [model]
        for train_test_split, model in test_models.items()
    }
    for model_name, model_kwargs in dataset_alignment.items():
        aux_features, aux_models = basic_har(data_partition="deployable",
                                             **model_kwargs,
                                             **kwargs)
        for fi, mi in aux_models.items():
            auxiliary_models[fi].append(mi)

    models = dict()

    # Perform the chaining
    train_test_splits = get_ancestral_metadata(
        features, "data_partitions")[data_partition]
    for train_test_split in randomised_order(train_test_splits):
        aux_probs = [features] + [
            model.predict_proba(features)
            for model in auxiliary_models[train_test_split]
        ]
        prob_features = root.instantiate_orphan_node(func=np.concatenate,
                                                     args=[aux_probs],
                                                     kwargs=dict(axis=1))

        models[train_test_split] = get_classifier(
            clf_name=clf_name,
            feature_node=prob_features,
            task_name=task_name,
            data_partition=data_partition,
            train_test_split=train_test_split,
            evaluate=evaluate,
        )

    return features, models
示例#8
0
    def __init__(self, name, *args, **kwargs):
        super(Dataset, self).__init__(name=f"datasets/{name}",
                                      meta=DatasetMeta(name))

        def load_meta(*args, **kwargs):
            return self.meta.meta

        load_meta.__name__ = name

        metadata = self.instantiate_node(key=f"{name}-metadata",
                                         backend="yaml",
                                         func=load_meta,
                                         kwargs=dict())

        zip_name = kwargs.get("unzip_path", lambda x: x)(splitext(
            basename(self.meta.meta["download_urls"][0]))[0])
        self.unzip_path = join(self.meta.zip_path, splitext(zip_name)[0])

        index = self.instantiate_node(
            key="index",
            func=self.build_index,
            backend="pandas",
            kwargs=dict(path=self.unzip_path, metatdata=metadata),
        )

        # Build the indexes
        self.instantiate_node(
            key="predefined",
            func=self.build_predefined,
            backend="pandas",
            kwargs=dict(path=self.unzip_path, metatdata=metadata),
        )

        data_partitions = get_ancestral_metadata(self, "data_partitions")

        self.instantiate_node(
            key="loso",
            func=self.build_loso,
            backend="pandas",
            kwargs=dict(index=index, columns=data_partitions["loso"]),
        )

        self.instantiate_node(
            key="deployable",
            func=self.build_deployable,
            backend="pandas",
            kwargs=dict(index=index, columns=data_partitions["deployable"]),
        )

        tasks = get_ancestral_metadata(self, "tasks")
        for task in tasks:
            self.instantiate_node(
                key=task,
                func=self.build_label,
                backend="pandas",
                kwargs=dict(path=self.unzip_path,
                            task=task,
                            inv_lookup=self.meta.inv_lookup[task],
                            metatdata=metadata),
            )

        # Build list of outputs
        for placement_modality in self.meta["sources"]:
            loc = placement_modality["loc"]
            mod = placement_modality["mod"]

            self.instantiate_node(
                key=f"{loc=}-{mod=}",
                func=self.build_data,
                backend="numpy",
                kwargs=dict(loc=loc, mod=mod, metadata=metadata),
            )