Пример #1
0
def get_corpus(c, encoding='utf-8'):
    import hypertools as hyp
    if c in corpora:
        fname = os.path.join(eval(defaults['data']['datadir']), 'corpora',
                             f'{c}.npy')
        if not os.path.exists(fname):
            if not os.path.exists(
                    os.path.abspath(os.path.join(fname, os.pardir))):
                os.makedirs(os.path.abspath(os.path.join(fname, os.pardir)))
            corpus_words = to_str_list(
                hyp.load(c).data[0])  #TODO: FIX THIS TO NOT CALL HYPERTOOLS!

            np.save(fname, corpus_words)
            return corpus_words
        else:
            corpus_words = np.load(fname, allow_pickle=True)
            return to_str_list(corpus_words)
    else:
        if is_text(c):
            if type(c) == list:
                return c
            else:
                return [c]
        elif os.path.exists(c):
            return to_str_list(
                [x[0] for x in np.load(c, allow_pickle=True).tolist()])
        else:
            raise Exception(f'Unknown corpus: {c}')
Пример #2
0
def test_io():
    # note: the load function called BOTH load and save internally, so this test checks both load and save
    datasets = ['spiral', 'weights', 'weights_avg', 'weights_sample']
    for d in datasets:
        x = hyp.load(d)
        assert type(x) is list
        assert all([type(i) is np.ndarray for i in x])

    urls = [
        'https://raw.githubusercontent.com/ContextLab/data-wrangler/main/tests/resources/home_on_the_range.txt',
        'https://raw.githubusercontent.com/ContextLab/data-wrangler/main/tests/resources/testdata.csv',
        'https://raw.githubusercontent.com/ContextLab/data-wrangler/main/tests/resources/wrangler.jpg'
    ]
    types = [str, pd.DataFrame, np.ndarray]
    for i, u in enumerate(urls):
        x = hyp.load(u)
        assert type(x) is types[i]
Пример #3
0
# -*- coding: utf-8 -*-
"""
=============================
Precognition
=============================

The future trajectory of an animated plot can be visualized with the precog
argument.  This displays a low opacity version of the trace ahead of the
current points being plotted.  This can be used in conjunction with the
chemtrails argument to plot a low-opacity trace of the entire timeseries.
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp
import numpy as np

# load example data
data = hyp.load('weights', align=True)

# average into 2 groups
w1 = np.mean(data[:17], 0)
w2 = np.mean(data[18:], 0)

# plot
hyp.plot([w1, w2], animate=True, precog=True)
Пример #4
0
plot method to evaluate the same plot with new arguments, like changing the color
of the points, or trying a different normalization method.  To save the plot,
simply call geo.save(fname), where fname is a file name/path.  Then, this file
can be reloaded using hyp.load to be plotted again at another time.  Finally,
the transform method can be used to transform new data using the same transformations
that were applied to the geo.
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp

# load some data
geo = hyp.load('mushrooms')

# plot
t = geo.plot()

# replot with new parameters
geo.plot(normalize='within', color='green')

# save the object
# geo.save('test')

# load it back in
# geo = hyp.load('test.geo')

# transform some new data
# transformed_data = geo.transform(data)
Пример #5
0
of string category labels or numerical values.  If text labels are passed, the
data is restructured according to those labels and plotted in different colors
according to your color palette.  If numerical values are passed, the values
are binned (default resolution: 100) and plotted according to your color
palette.
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp
import numpy as np

# load example data
data = hyp.load('weights_sample')

# simulate groups
group = [['a' if idx % 2 == 0 else 'b' for idx, j in enumerate(i)]
         for i in data]

# plot
hyp.plot(data, '.', group=group)

# simulate random groups
group = []
for idx, i in enumerate(data):
    tmp = []
    for iidx, ii in enumerate(i):
        tmp.append(int(np.random.randint(1000, size=1)))
    group.append(tmp)
Пример #6
0
# -*- coding: utf-8 -*-
"""
=============================
Plotting a Pandas Dataframe
=============================

Hypertools supports single-index Pandas Dataframes as input. In this example, we
plot the mushrooms dataset from the kaggle database.  This is a dataset of text
features describing different attributes of a mushroom. Dataframes that contain
columns with text are converted into binary feature vectors representing the
presence or absences of the feature (see Pandas.Dataframe.get_dummies for more).
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp

# load example data
data = hyp.load('mushrooms')

# pop off the class (poisonousness)
group = data.pop('class')

# plot
hyp.plot(data, '.', group=group)
Пример #7
0
# -*- coding: utf-8 -*-
"""
=============================
Aligning matrices to a common space
=============================

In this example, we plot the trajectory of multivariate brain activity for
two groups of subjects that have been hyperaligned (Haxby et al, 2011).  First,
we use the align tool to project all subjects in the list to a common space.
Then we average the data into two groups, and plot.
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp
import numpy as np

# load example data
data = hyp.load('weights').get_data()
data = hyp.align(data, align='hyper')

# average into two groups
group1 = np.mean(data[:17], 0)
group2 = np.mean(data[18:], 0)

# plot
hyp.plot([group1[:100, :], group2[:100, :]])
Пример #8
0
# -*- coding: utf-8 -*-
"""
=============================
A basic example
=============================

Here is a basic example where we load in some data (a list of arrays - samples
by features), take the first two arrays in the list and plot them as points
with the 'o'.  Hypertools can handle all format strings supported by matplotlib.
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp

# load example data
geo = hyp.load('weights_sample')

# plot
geo.plot(fmt='.')
Пример #9
0
# -*- coding: utf-8 -*-
"""
=============================
Aligning matrices to a common space
=============================

In this example, we plot the trajectory of multivariate brain activity for
two groups of subjects that have been hyperaligned (Haxby et al, 2011).  First,
we use the align tool to project all subjects in the list to a common space.
Then we average the data into two groups, and plot.
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp
import numpy as np

# load example data
data = hyp.load('weights', align='hyper')

# average into two groups
group1 = np.mean(data[:17], 0)
group2 = np.mean(data[18:], 0)

# plot
hyp.plot([group1[:100, :], group2[:100, :]])
Пример #10
0
=============================

This example demonstrates how to use the `analyze` function to process data
prior to plotting. The data is a list of numpy arrays representing
multi-voxel activity patterns (columns) over time (rows).  First, analyze function
normalizes the columns of each matrix (within each matrix). Then the data is
reduced using PCA (10 dims) and finally it is aligned with hyperalignment. We can
then plot the data with hyp.plot, which further reduces it so that it can be
visualized.
"""

# Code source: Andrew Heusser
# License: MIT

# load hypertools
import hypertools as hyp

# load the data
geo = hyp.load('weights')
data = geo.get_data()

# process the data
data = hyp.analyze(data,
                   normalize='within',
                   reduce='PCA',
                   ndims=10,
                   align='hyper')

# plot it
hyp.plot(data)
Пример #11
0
# -*- coding: utf-8 -*-
"""
=============================
Saving a geo
=============================

To save a plot, simply use the `save_path` kwarg, and specify where you want
the image to be saved, including the file extension (e.g. pdf)
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp

# load some data
data = hyp.load('mushrooms')

# create a geo
geo = hyp.plot(data, show=False)

geo.save('test')

geo = hyp.load('test.geo')

hyp.plot(geo.transform(data), '.')
Пример #12
0
import datawrangler as dw
import numpy as np
import pandas as pd

import hypertools as hyp

models = [
    'UMAP', 'IncrementalPCA', 'DictionaryLearning', 'FactorAnalysis',
    'FastICA', 'KernelPCA', 'LatentDirichletAllocation',
    'MiniBatchDictionaryLearning', 'MiniBatchSparsePCA', 'NMF', 'PCA',
    'SparsePCA', 'TruncatedSVD', 'Isomap', 'LocallyLinearEmbedding', 'MDS',
    'SpectralEmbedding', 'TSNE'
]
# skip:  'SparseCoder'

normalized_weights = hyp.manip(hyp.load('weights_sample'), 'Normalize')


def test_reduce():
    n_components = 10
    for m in models:
        if m == 'SparseCoder':
            dictionary = hyp.reduce(dw.stack(normalized_weights).T.values,
                                    'IncrementalPCA',
                                    n_components=n_components).values.T
            next_model = {
                'model': m,
                'args': [],
                'kwargs': {
                    'dictionary': dictionary
                }
Пример #13
0
import numpy as np
import pandas as pd
import hypertools as hyp

weights = hyp.load('weights')
spiral = hyp.load('spiral')


def compare_alignments(a1, a2, tol=1e-5):
    def get_alignment(x):
        if type(x) is pd.DataFrame:
            return x.values
        elif type(x) in [np.array, np.ndarray]:
            return x
        elif type(x) is list:
            return x[0]
        elif hasattr(x, 'proj'):
            return get_alignment(x.proj)

    return np.allclose(get_alignment(a1), get_alignment(a2), atol=tol)


def spiral_alignment_checker(model, known_rot=True, relax=False, tol=1e-5):
    def get_vals(x):
        if hasattr(x, 'values'):
            return x.values
        else:
            return x

    def test_all_close(unaligned, aligned):
        if not relax:
Пример #14
0
# -*- coding: utf-8 -*-
"""
=============================
Animated trajectory plotted with multidimensional scaling
=============================

This is a trajectory of brain data plotted in 3D with multidimensional scaling.
"""

# Code source: Andrew Heusser
# License: MIT

# import hypertools
import hypertools as hyp

# load the geo
geo = hyp.load('weights_avg')

# plot
geo.plot(animate=True, reduce='MDS')
Пример #15
0
# -*- coding: utf-8 -*-
"""
=============================
Aligning two matrices with the procrustes function
=============================

In this example, we load in some synthetic data, rotate it, and then use the
procustes function to get the datasets back in alignment.  The procrustes
function uses linear transformations to project a source matrix into the
space of a target matrix.
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp
import numpy as np
import scipy

# load example data
geo = hyp.load('spiral')
geo.plot(title='Before Alignment')

# use procrusted to align the data
source, target = geo.get_data()
aligned = [hyp.tools.procrustes(source, target), target]

# after alignment
hyp.plot(aligned, ['-', '--'], title='After alignment')
Пример #16
0
Analyze data and then plot
=============================

This example demonstrates how to use the `analyze` function to process data
prior to plotting. The data is a list of numpy arrays representing
multi-voxel activity patterns (columns) over time (rows).  First, analyze function
normalizes the columns of each matrix (within each matrix). Then the data is
reduced using PCA (10 dims) and finally it is aligned with hyperalignment. We can
then plot the data with hyp.plot, which further reduces it so that it can be
visualized.
"""

# Code source: Andrew Heusser
# License: MIT

# load hypertools
import hypertools as hyp

# load the data
data = hyp.load('weights')

# process the data
data = hyp.analyze(data,
                   normalize='within',
                   reduce='PCA',
                   ndims=10,
                   align='hyper')

# plot it
hyp.plot(data)
Пример #17
0
# -*- coding: utf-8 -*-
"""
=============================
Plotting State of the Union Addresses from 1989-2017
=============================

To plot text, simply pass the text data to the plot function.  Here, we are
ploting each SOTU address fit to a topic model, and then reduced to visualize.
By default, hypertools transforms the text data using a model fit to a selected
set of wikipedia pages.

"""

# Code source: Andrew Heusser
# License: MIT

# load hypertools
import hypertools as hyp

# load the data
geo = hyp.load('sotus')

# plot it
geo.plot()