Пример #1
0
def main(argv):
    # Don't bother using the GPU for this
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

    # ACM doesn't like Type 3 fonts
    # https://tex.stackexchange.com/q/18687
    plt.rc('pdf', fonttype=42)
    plt.rc('ps', fonttype=42)

    # We only want to plot this one dataset
    list_of_datasets = ["wisdm_ar", "ucihar", "uwave", "ucihhar"]
    dataset_names_nice = ["WISDM AR", "UCI HAR", "uWave", "UCI HHAR"]
    # Get only the ones used in the SS-DA experiments
    dataset_which = [
        [1, 3, 4, 2, 25, 7, 21, 2, 1, 0, 11, 15, 25, 29, 30, 31, 32, 7, 8],
        # [2, 7, 12, 12, 9, 14, 18, 6, 7, 17, 11, 13, 16, 18, 18, 19, 23, 24, 25],
        # [2, 3, 4, 2, 1, 2, 3, 1, 4, 7, 5, 6, 7, 8],
        # [1, 3, 4, 0, 1, 4, 5, 2, 3, 5, 3, 5, 6, 7, 8],
        # Just do for the first two adaptation problems in SS-DA experiments
        [2, 11, 7, 13],
        [2, 5, 3, 5],
        [1, 3, 3, 5],
    ]
    ymaxs = [70, 35, 16, 28]
    # We mostly care about WISDM AR and don't have enough space for all of them
    first_ns = [None, None, None, None]

    for i in range(len(list_of_datasets)):
        dataset_name = list_of_datasets[i]
        dataset_name_nice = dataset_names_nice[i]
        which = dataset_which[i]
        ymax = ymaxs[i]
        first_n = first_ns[i]

        # Get class balance for all users
        user_source_pairs = []

        for user in datasets.get_dataset_users(dataset_name):
            # Note: train_on_everything=True means the training dataset consists
            # of all train/valid/test data.
            sources, _ = load_da(dataset_name, str(user), "",
                train_on_everything=True)

            # We load them one at a time
            assert len(sources) == 1
            source = sources[0]

            user_source_pairs.append((user, source))

        balance_data = compute_class_balances(dataset_name, user_source_pairs)

        # Plot it
        class_labels = datasets.get_dataset(dataset_name).class_labels
        generate_plot(dataset_name_nice, class_labels, balance_data,
            filename="class_balance_" + dataset_name + ".pdf",
            which=which, ymax=ymax, first_n=first_n)
Пример #2
0
def main(argv):
    counts = collections.defaultdict(dict)

    for dataset_name in datasets.list_datasets():
        # Note: test=False so we only look at the training samples, which is what
        # we will vary in the vary-amount-of-target-data experiments
        for user in datasets.get_dataset_users(dataset_name):
            sources, _ = load_da(dataset_name, str(user), "", test=False)
            train_count = count_training_samples(sources)
            counts[dataset_name][user] = train_count

    print_dictionary(counts, "dataset_target_training_sample_counts")
Пример #3
0
def main(argv):
    # Don't bother using the GPU for this
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

    for dataset_name in datasets.list_datasets():
        for user in datasets.get_dataset_users(dataset_name):
            # Note: test=False so we only look at the training samples, where
            # train=80% of training set, test=20% of training set, i.e. the
            # validation set
            test = False

            sources, _ = load_da(dataset_name, str(user), "", test=test)
            assert len(sources) == 1
            dataset = sources[0]
            print_stats(dataset_name + "_" + str(user), dataset, test=test)
def main(argv):
    # Don't bother using the GPU for this
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

    for dataset_name in datasets.list_datasets():
        user_source_pairs = []

        for user in datasets.get_dataset_users(dataset_name):
            # Note: test=False so we only look at the training samples, where
            # train=80% of training set, test=20% of training set, i.e. the
            # validation set
            sources, _ = load_da(dataset_name, str(user), "", test=False)

            # We load them one at a time
            assert len(sources) == 1
            source = sources[0]

            user_source_pairs.append((user, source))

        print_class_balances(dataset_name, user_source_pairs)
Пример #5
0
        pairs.append((dataset_name, str(source_user), str(target_user)))

    return pairs


if __name__ == "__main__":
    # Sources-target pairs for training
    pairs = []
    uids = []

    for name in datasets.list_datasets():
        # Tune on "watch_noother" not "watch"
        if name == "watch":
            continue

        users = datasets.get_dataset_users(name)

        # Since sources-target aren't stored in filename anymore (too long), we
        # would run into folder name conflicts if we didn't append a unique ID
        # to each sources-target pair
        uid = 0

        # Make this repeatable
        random.seed(42)

        # Allows extra max_users for some datasets without changin uid's
        #
        # TODO get rid of all this confusing code once we decide what number
        # to set max_users to. If we don't need to change max_users, then
        # we can just increment uid's like before.
        bonus_uid = 0