示例#1
0
    metricsMatrixEntire = []

    global metricsMatrix
    metricsMatrix = []

    global metricsMatrixSel
    metricsMatrixSel = []

    global metricsMatrixEntireSel
    metricsMatrixEntireSel = []

    return 'Reset'


location = './cachedir'
memory = Memory(location, verbose=0)


# NOTE: Only works with labeled data
def neighborhood_hit(X, y, k, selected=None):
    # Add 1 to k because the nearest neighbor is always the point itself
    k += 1

    y = np.array(y)

    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X, y)

    if selected:
        X = X[selected, :]
示例#2
0
import librosa
import logging
import numpy as np
from torch.utils.data import Dataset
from joblib import Memory
import tempfile
import shutil
import specaug

logger = logging.getLogger('root')
FORMAT = "[%(asctime)s %(filename)s:%(lineno)s - %(funcName)s()] %(message)s"
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=FORMAT)
logger.setLevel(logging.DEBUG)

CACHE_DIR = './cache'
memory = Memory(CACHE_DIR, verbose=0)

PAD = 0
N_FFT = 320
N_MFCC = 40
N_MELS = 80
SAMPLE_RATE = 16000

def clear_joblib_cache():
    logger.info('Clear Joblib cache at: %s' % CACHE_DIR)
    shutil.rmtree(CACHE_DIR)

def augment_audio_with_sox(path, sample_rate, tempo, gain):
    """
    Changes tempo and gain of the recording with sox and loads it.
    """
示例#3
0
文件: amm_main.py 项目: ljeagle/bolt
import functools
import numpy as np
import pprint
import scipy
import time
import zstandard as zstd  # pip install zstandard

from . import amm
from . import matmul_datasets as md
from . import pyience as pyn
from . import compress

from . import amm_methods as methods

from joblib import Memory
_memory = Memory('.', verbose=0)

# NUM_TRIALS = 1
NUM_TRIALS = 10


# @_memory.cache
def _estimator_for_method_id(method_id, **method_hparams):
    return methods.METHOD_TO_ESTIMATOR[method_id](**method_hparams)


def _hparams_for_method(method_id):
    if method_id in methods.SKETCH_METHODS:
        # dvals = [2, 4, 6, 8, 12, 16, 24, 32, 48, 64]  # d=1 undef on fd methods
        # dvals = [1, 2, 4, 8, 16, 32, 64, 128]
        dvals = [1, 2, 4, 8, 16, 32, 64]
示例#4
0
from joblib import Memory
import pandas as pd

from os.path import expanduser

memory = Memory(cachedir=expanduser('~/.cache/myapp'), verbose=0)


@memory.cache
def load(filename):
    df = pd.read_csv(filename, parse_dates=['start_time', 'end_time'])
    # ...
    return df


@memory.cache
def pre_process(df):
    df['temp'].fillna(0, inplace=True)
    # ...



示例#5
0
import shutil
from joblib import Parallel, delayed
from all_members_ensemble import gen_members
import os

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"  # Also affect subprocesses

from joblib import Memory
cachedir = './parallel_brute_force_search_exec_tmpmemory' + '_' + time.strftime(
    "%H_%M_%S", time.localtime(time.time()))
memory = Memory(cachedir, verbose=0)


class Estimator:
    def __init__(self, classifier=None, random_state=None, accuracy=0):
        self.classifier = classifier
        self.random_state = random_state
        self.accuracy = accuracy

    def fit(self, X, y):
        is_fitted = True
        self.classifier.fit(X, y)

    def predict(self, X):
        return self.classifier.predict(X)
示例#6
0
for x in X:  # smooth data
    x[:] = ndimage.gaussian_filter(x.reshape(size, size), sigma=1.0).ravel()
X -= X.mean(axis=0)
X /= X.std(axis=0)

y = np.dot(X, coef.ravel())
noise = np.random.randn(y.shape[0])
noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2)
y += noise_coef * noise  # add noise

# #############################################################################
# Compute the coefs of a Bayesian Ridge with GridSearch
cv = KFold(2)  # cross-validation generator for model selection
ridge = BayesianRidge()
cachedir = tempfile.mkdtemp()
mem = Memory(location=cachedir, verbose=1)

# Ward agglomeration followed by BayesianRidge
connectivity = grid_to_graph(n_x=size, n_y=size)
ward = FeatureAgglomeration(n_clusters=10,
                            connectivity=connectivity,
                            memory=mem)
clf = Pipeline([('ward', ward), ('ridge', ridge)])
# Select the optimal number of parcels with grid search
clf = GridSearchCV(clf, {'ward__n_clusters': [10, 20, 30]}, n_jobs=1, cv=cv)
clf.fit(X, y)  # set the best parameters
coef_ = clf.best_estimator_.steps[-1][1].coef_
coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)
coef_agglomeration_ = coef_.reshape(size, size)

# Anova univariate feature selection followed by BayesianRidge
示例#7
0
import os
from os.path import join
import mne
import pandas as pd
import numpy as np
from joblib import Memory

from . import preprocessing

memory = Memory(cachedir=os.environ['PYMEG_CACHE_DIR'], verbose=0)
subjects_dir = os.environ['SUBJECTS_DIR']


def set_fs_subjects_dir(directory):
    """Set freesurfer subjectdir environment variable"""
    global subjects_dir
    os.environ['SUBJECTS_DIR'] = directory
    subjects_dir = directory


def check_bems(subjects):
    """Create a plot of all BEM segmentations."""
    for sub in subjects:
        fig = mne.viz.plot_bem(subject=sub,
                               subjects_dir=subjects_dir,
                               brain_surfaces='white',
                               orientation='coronal')


@memory.cache
def get_source_space(subject, sdir=None):
示例#8
0
文件: sdfutil.py 项目: kochi0603/9
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import DataStructs
from joblib import Memory
import numpy as np

memory = Memory(cachedir="/tmp", verbose=0)


@memory.cache
def sdf_to_desc(sdf_file):
    fps = []
    targets = []
    nfps = []

    for mol in Chem.SDMolSupplier(sdf_file):
        fps.append(AllChem.GetMorganFingerprintAsBitVect(mol,
                                                         2))  # fingerprint
        targets.append(float(mol.GetProp("ACTIVITY")))  # activity

    for fp in fps:
        nfp = np.zeros((1, ))
        DataStructs.ConvertToNumpyArray(fp, nfp)
        nfps.append(nfp)

    return (np.array(nfps), np.array(targets))


if __name__ == '__main__':
    descs, targets = sdf_to_desc("CHEMBL3301363.sdf")
    print descs.shape
示例#9
0
from joblib import Memory

memory = Memory("cache", verbose=0)
示例#10
0
import xgboost as xgb
import numpy as np
import time
from sklearn import datasets
from joblib import Memory
import pandas as pd
import argparse

memory = Memory('./cachedir', verbose=0)


# Contains a dataset in numpy format as well as the relevant objective and metric
class TestDataset:
    def __init__(self, name, Xy, objective
                 ):
        self.name = name
        self.objective = objective
        self.X, self.y = Xy

    def set_params(self, params_in):
        params_in['objective'] = self.objective
        if self.objective == "multi:softmax":
            params_in["num_class"] = int(np.max(self.y) + 1)
        return params_in

    def get_dmat(self):
        return xgb.DMatrix(self.X, self.y)

    def get_test_dmat(self, num_rows):
        rs = np.random.RandomState(432)
        return xgb.DMatrix(self.X[rs.randint(0, self.X.shape[0], size=num_rows), :])
    (redirects_url, redirects_filename),
    (page_links_url, page_links_filename),
]

for url, filename in resources:
    if not os.path.exists(filename):
        print("Downloading data from '%s', please wait..." % url)
        opener = urlopen(url)
        open(filename, 'wb').write(opener.read())
        print()


# #############################################################################
# Loading the redirect files

memory = Memory(cachedir=".")


def index(redirects, index_map, k):
    """Find the index of an article name after redirect resolution"""
    k = redirects.get(k, k)
    return index_map.setdefault(k, len(index_map))


DBPEDIA_RESOURCE_PREFIX_LEN = len("http://dbpedia.org/resource/")
SHORTNAME_SLICE = slice(DBPEDIA_RESOURCE_PREFIX_LEN + 1, -1)


def short_name(nt_uri):
    """Remove the < and > URI markers and the common URI prefix"""
    return nt_uri[SHORTNAME_SLICE]
示例#12
0
import pylab as pl
import bcolz

from psi.data.io import Recording

from matplotlib.backends.backend_pdf import PdfPages
from tqdm import tqdm

from . import util

from joblib import Memory

tmpdir = '/tmp/lbhb'
if not os.path.exists(tmpdir):
    os.makedirs(tmpdir, exist_ok=True)
memory = Memory(cachedir=tmpdir)


def load_trial_log(dirname):
    try:

        tl_dirname = os.path.join(dirname, 'trial_log')
        trial_log = bcolz.ctable(rootdir=tl_dirname).todataframe()
        experiment_info = util.parse_filename(dirname)
        for k, v in experiment_info.items():
            trial_log[k] = v
        trial_log.index.name = 'trial'
        trial_log.reset_index(inplace=True)
        trial_log.set_index(['datetime', 'animal', 'trial'], inplace=True)
        return trial_log
    except Exception as e:
示例#13
0
文件: generators.py 项目: MLeib/QAOA
from joblib import Memory
import networkx as nx
import dimod

data_cache = './Data'
mem = Memory(data_cache)


def regular_graph_bqm(degree: int, variable_count: int, copy_count: int):
    bqm_arr = []
    for _ in range(copy_count):
        problem_graph = nx.random_regular_graph(degree, variable_count)
        bqm_arr.append(
            dimod.generators.uniform(problem_graph,
                                     dimod.SPIN,
                                     low=0.5,
                                     high=1.0))
    return bqm_arr


regular_graph_bqm = mem.cache(regular_graph_bqm)
示例#14
0
POWER_ITR = 'power_iteration'
MATH = 'math_method'

mapping = {
    "without_acs": 0,
    "with_acs": 1,
    "male": 0,
    "female": 1,
    "very fair": 0,
    "fair": 1,
    "medium": 2,
    "dark": 3,
}

CACHE_DIR = Path(settings.path_for(settings.PPR.FEEDBACK.CACHE_DIR))
CACHE = Memory(str(CACHE_DIR), verbose=1)


def math_method(adj_matrix, alpha):
    return np.linalg.inv(
        np.identity(adj_matrix.shape[0]) - (alpha * adj_matrix))


def power_iteration(adj_matrix, alpha, seed):
    n = 0
    pi_1 = None
    pi_2 = np.copy(seed)

    while True:
        pi_1 = np.matmul(np.dot(alpha, adj_matrix), pi_2) + np.dot(
            (1 - alpha), seed)
示例#15
0
def one_dimensional_exp():
    eps = 1e-1

    grid = torch.linspace(-4, 12, 500)[:, None]
    C = compute_distance(grid, grid)

    x_sampler = Sampler(mean=torch.tensor([[1.], [2], [3]]), cov=torch.tensor([[[.1]], [[.1]], [[.1]]]),
                        p=torch.ones(3) / 3)
    y_sampler = Sampler(mean=torch.tensor([[0.], [3], [5]]), cov=torch.tensor([[[.1]], [[.1]], [[.4]]]),
                        p=torch.ones(3) / 3)
    torch.manual_seed(100)
    np.random.seed(100)
    lpx = x_sampler.log_prob(grid)
    lpy = y_sampler.log_prob(grid)
    lpx -= torch.logsumexp(lpx, dim=0)
    lpy -= torch.logsumexp(lpy, dim=0)
    px = torch.exp(lpx)
    py = torch.exp(lpy)

    fevals = []
    gevals = []
    labels = []
    plans = []

    mem = Memory(location=expanduser('~/cache'))
    n_samples = 5000
    x, loga = x_sampler(n_samples)
    y, logb = y_sampler(n_samples)
    f, g = mem.cache(sinkhorn)(x.numpy(), y.numpy(), eps=eps, n_iter=100)
    f = torch.from_numpy(f).float()
    g = torch.from_numpy(g).float()
    distance = compute_distance(grid, y)
    feval = - eps * torch.logsumexp((- distance + g[None, :]) / eps + logb[None, :], dim=1)
    distance = compute_distance(grid, x)
    geval = - eps * torch.logsumexp((- distance + f[None, :]) / eps + loga[None, :], dim=1)

    plan = (lpx[:, None] + feval[:, None] / eps + lpy[None, :]
            + geval[None, :] / eps - C / eps)

    plans.append((plan, grid, grid))

    fevals.append(feval)
    gevals.append(geval)
    labels.append(f'True potential')

    m = 50
    hatf, posx, hatg, posy, sto_fevals, sto_gevals = sampling_sinkhorn(x_sampler, y_sampler, m=m, eps=eps,
                                                                       n_iter=100,
                                                                       grid=grid)
    feval = evaluate_potential(hatg, posy, grid, eps)
    geval = evaluate_potential(hatf, posx, grid, eps)
    plan = (lpx[:, None] + feval[:, None] / eps + lpy[None, :]
            + geval[None, :] / eps - C / eps)
    plans.append((plan, grid, grid))

    fevals.append(feval)
    gevals.append(geval)
    labels.append(f'Online Sinkhorn')

    alpha, posx, posy, _, _ = rkhs_sinkhorn(x_sampler, y_sampler, m=m, eps=eps,
                                                                 n_iter=100, sigma=1,
                                                                 grid=grid)
    feval = evaluate_kernel(alpha, posx, grid, sigma=1)
    geval = evaluate_kernel(alpha, posy, grid, sigma=1)
    plan = (lpx[:, None] + feval[:, None] / eps + lpy[None, :]
            + geval[None, :] / eps - C / eps)
    plans.append((plan, grid, grid))
    fevals.append(feval)
    gevals.append(geval)
    labels.append(f'RKHS')

    fevals = torch.cat([feval[None, :] for feval in fevals], dim=0)
    gevals = torch.cat([geval[None, :] for geval in gevals], dim=0)

    fig = plt.figure(figsize=(width, .21 * width))
    gs = gridspec.GridSpec(ncols=6, nrows=1, width_ratios=[1, 1.5, 1.5, .8, .8, .8], figure=fig)
    plt.subplots_adjust(right=0.97, left=0.05, bottom=0.27, top=0.85)
    ax0 = fig.add_subplot(gs[0])
    ax0.plot(grid, px, label=r'$\alpha$')
    ax0.plot(grid, py, label=r'$\beta$')
    ax0.legend(frameon=False)
    # ax0.axis('off')
    ax1 = fig.add_subplot(gs[1])
    ax2 = fig.add_subplot(gs[2])
    ax3 = fig.add_subplot(gs[3])
    ax4 = fig.add_subplot(gs[4])
    ax5 = fig.add_subplot(gs[5])

    for i, (label, feval, geval, (plan, x, y)) in enumerate(zip(labels, fevals, gevals, plans)):
        if label == 'True potential':
            ax1.plot(grid, feval, label=label, zorder=100 if label == 'True Potential' else 1,
                     linewidth=3 if label == 'True potential' else 1, color='C3')
            ax2.plot(grid, geval, label=None, zorder=100 if label == 'True Potential' else 1,
                     linewidth=3 if label == 'True potential' else 1, color='C3')
            plan = plan.numpy()
            ax3.contour(y[:, 0], x[:, 0], plan, levels=30)
        elif label == 'RKHS':
            ax1.plot(grid, feval, label=label, linewidth=2, color='C4')
            ax2.plot(grid, geval, label=None, linewidth=2, color='C4')
            plan = plan.numpy()
            ax4.contour(y[:, 0], x[:, 0], plan, levels=30)
        else:
            plan = plan.numpy()
            ax5.contour(y[:, 0], x[:, 0], plan, levels=30)
    ax0.set_title('Distributions')
    ax1.set_title('Estimated $f$')
    ax2.set_title('Estimated $g$')
    ax3.set_title('True OT plan')
    ax4.set_title('RKHS')
    ax5.set_title('O-S')
    # ax4.set_title('Estimated OT plan')
    colors = plt.cm.get_cmap('Blues')(np.linspace(0.2, 1, len(sto_fevals[::2])))
    for i, eval in enumerate(sto_fevals[::2]):
        ax1.plot(grid, eval, color=colors[i],
                 linewidth=2, label=f'O-S $n_t={i * 10 * 2 * 50}$' if i % 2 == 0 else None,
                 zorder=1)
    for i, eval in enumerate(sto_gevals[::2]):
        ax2.plot(grid, eval, color=colors[i],
                 linewidth=2, label=None,
                 zorder=1)
    for ax in (ax1, ax2, ax3):
        ax.tick_params(axis='both', which='major', labelsize=5)
        ax.tick_params(axis='both', which='minor', labelsize=5)
        ax.minorticks_on()
    ax1.legend(frameon=False, bbox_to_anchor=(-1, -0.53), ncol=5, loc='lower left')
    # ax2.legend(frameon=False, bbox_to_anchor=(0., 1), loc='upper left')
    sns.despine(fig)
    for ax in [ax3, ax4, ax5]:
        ax.axis('off')
    ax0.axes.get_yaxis().set_visible(False)
    plt.savefig(join(get_output_dir(), 'continuous.pdf'))
    plt.show()
示例#16
0
from sklearn.linear_model import LassoLarsCV, LassoCV, Lasso
from sklearn.model_selection import cross_val_predict, LeaveOneOut
from sklearn.preprocessing import OneHotEncoder
from sklearn.cluster import AgglomerativeClustering
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from hashlib import md5
from hdmedians import medoid
from functools import partial
import pandas as pd
import numpy as np

from joblib import Memory
mem = Memory('../tasks-joblib', verbose=0)

def hash(s):
    m = md5()
    m.update(s.encode('utf-8'))
    return m.hexdigest()

@mem.cache()
def label_clusters_bow(socd, clusters):
    # Top 5 positive words for each cluster via tfidf logistic regression
    tfidf_vectorizer = TfidfVectorizer()
    tfidf = tfidf_vectorizer.fit_transform(socd.task)

    logistic = LogisticRegression(penalty='l1', C=2)
    logistic.fit(tfidf, clusters)

    rev_lookup = dict([(idx, w) for w,idx in tfidf_vectorizer.vocabulary_.items()])
    a = np.array([[rev_lookup[j] for j in i]
示例#17
0
FN = args.graph
data = pd.read_csv(FN, delimiter = " ")

G = nx.Graph()

for i,s in data.iterrows():
    u = s[0]
    v = s[1]

    G.add_edge(u,v)


# In[ ]:

memory = Memory(cachedir='./cached_bri', verbose=0)

@memory.cache
def cached_bridgeness(G):
    print("Computing centrality metric (cache miss), please be patient.")
    return bridgeness.bridgeness_centrality(G)

bri = cached_bridgeness(G)
#bri = bridgeness.bridgeness_centrality(G)
#e_bri = bridgeness.edge_bridgeness_centrality(G)
#bet = bridgeness.betweenness_centrality(G)


# In[ ]:

phi = args.phi
示例#18
0
#lmax = 200
#Nside = 64
#m = 2
#lmax = 2000
#Nside = 1024
#lmax = 1000
#Nside = 512
m = 1
a_l = np.zeros(lmax + 1 - m)
a_l[1 - m] = 1
#a_l[4 - m] = -1
#a_l[15] = 0.1
#a_l = (-1) ** np.zeros(lmax + 1)

from joblib import Memory
memory = Memory('joblib')


@memory.cache
def getroots(l, m):
    return associated_legendre_roots(lmax + 1, m)


if 1:
    roots = getroots(lmax + 1, m)

    roots = get_ring_thetas(Nside, positive_only=True)
    print roots.shape[0]
    P = compute_normalized_associated_legendre(m, roots, lmax)
    print butterfly_compress(P).get_stats()
    print butterfly_compress(P.T).get_stats()
示例#19
0
# !/usr/bin/env python
# -*- coding: utf-8 -*-
"""
基于磁盘的缓存   joblib
教程 https://joblib.readthedocs.io/en/latest/memory.html
安装 pip install joblib
"""

from joblib import Memory

memory = Memory(location="./cachedir")


@memory.cache
def sum2(a, b):
    print(f"计算{a}+{b} ... ")
    return a + b


print(sum2(2, 3))
print(sum2(2, 3))

print(sum2(4, 7))
print(sum2(4, 7))

print(sum2(2, 3))
print(sum2(4, 7))
"""
后续的计算直接从磁盘的缓存中进行读取了,就不走这个函数了
"""
示例#20
0
        status = -1
        print("Error: Enter the peplen param\n")
        sys.exit(status)

    # Extract data from the CSV format
    data = readCSV(dirname, peplen)

    #    print (data[0])
    #    print (data[1])
    #    print (list(set(data[0]) & set(data[1])))
    #    print (distance.jaccard(data[0], data[1]))
    #    print (data)
    #    print (data.shape)

    # Construct the clustering model
    mem = Memory(cachedir='c:/tmp')
    model = hdbscan.HDBSCAN(algorithm='best',
                            allow_single_cluster=False,
                            alpha=1.0,
                            approx_min_span_tree=True,
                            core_dist_n_jobs=2,
                            gen_min_span_tree=True,
                            memory=mem,
                            metric='euclidean',
                            min_cluster_size=3,
                            min_samples=None,
                            p=None)

    #algorithm='best', metric='euclidean', min_cluster_size=3)

    # Construct clusters and display the clusters list
示例#21
0
    def __init__(self, 
                 label='Run',
                 tooltip='Run Simulation',
                 start_func=None,
                 done_func=None,
                 outcb=None,
                 show_progress=True,
                 width='auto',
                 cachename=None,
                 cachecb=None,
                 showcache=True):
        self.label = label
        self.tooltip = tooltip
        self.start_func = start_func
        self.done_func = done_func
        self.outcb = outcb
        self.cachename = cachename
        self.q = Queue()
        self.thread = 0
        self.status = None
        self.output = None
        self.show_progress = show_progress
        self.progress = None
        self.make_rname = None
        self.width = width
        self.cachecb = cachecb
        self.showcache = showcache
        self.cbuf = collections.deque(maxlen=200)  # circular buffer

        if start_func is None:
            print("start_func is required.", file=sys.stderr)
            return

        if cachename:
            # set up cache
            cachedir = os.path.join(Submit.CACHEDIR, cachename)
            cachetabdir = os.path.join(Submit.CACHETABDIR, cachename)
            if not os.path.isdir(cachedir):
                os.makedirs(cachedir)
            memory = Memory(cachedir=cachetabdir, verbose=0)

            @memory.cache
            def make_rname(*args):
                # uuid should be unique, but check just in case
                while True:
                    fname = str(uuid.uuid4()).replace('-', '')
                    if not os.path.isdir(os.path.join(cachedir, fname)):
                        break
                return fname

            self.make_rname = make_rname

        self.but = w.Button(
            description=self.label,
            tooltip=self.tooltip,
            button_style='success'
        )
        self.output = w.Textarea(layout={'width': '100%', 'height': '400px'})
        self.acc = w.Accordion(children=[self.output], width=self.width)
        self.acc.set_title(0, 'Output')
        self.acc.selected_index = None
        self.but.on_click(self._but_cb)
        self.disabled = False
        _layout = w.Layout(
            flex_flow='column',
            justify_content='flex-start',
            width=self.width
        )
        self.w = w.VBox([self.but], layout=_layout)
示例#22
0
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.kernel_approximation import Nystroem
from sklearn.kernel_approximation import RBFSampler
from sklearn.metrics import zero_one_loss
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import check_array
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# Memoize the data extraction and memory map the resulting
# train / test splits in readonly mode
memory = Memory(os.path.join(get_data_home(), "mnist_benchmark_data"), mmap_mode="r")


@memory.cache
def load_data(dtype=np.float32, order="F"):
    """Load the data, then cache and memmap the train/test split"""
    ######################################################################
    # Load dataset
    print("Loading dataset...")
    data = fetch_openml("mnist_784")
    X = check_array(data["data"], dtype=dtype, order=order)
    y = data["target"]

    # Normalize features
    X = X / 255
示例#23
0
from joblib import Memory

# The main script will override this as necessary.
# By default, no caching is performed (backend=None).
memory = Memory(backend=None)
示例#24
0
# Statistical model to extrapolate Covid-19 actives cases

Here we build the statistical model behind
https://covid19-dash.github.io/

The model is made by fitting a weighted least square on the log of the
number of cases over a window of the few last days.

This is a computational notebook: it is the actual code that is run to
build the model.
"""

# %%
# Use joblib, to speed up interactions
from joblib import Memory
mem = Memory(location='.')

# %%
# ## Load and plot the data

# %%
# Download the data
import data_input
data = mem.cache(data_input.get_data)()

# We model only the active cases
active = data['active']

# %%
# First plot the time course of the most affected countries
last_day = active.iloc[-1]
示例#25
0
import os
import mne
import numpy as np
from joblib import Memory
from scipy.signal import tukey

try:
    from ..utils import check_random_state
except ValueError:
    from alphacsc.utils import check_random_state

mem = Memory(location='.', verbose=0)


@mem.cache()
def load_data(n_trials=40,
              n_channels=1,
              n_times=6,
              sigma=.05,
              sfreq=300,
              f_noise=True,
              random_state=None,
              n_jobs=4):
    """Simulate data with 10Hz mu-wave and 10Hz oscillations.

    Parameters
    ----------
    n_trials : int
        Number of simulated signals
    n_channels : int
        Number of channels in the signals
示例#26
0
def test_pipeline_memory_sampler():
    X, y = make_classification(n_classes=2,
                               class_sep=2,
                               weights=[0.1, 0.9],
                               n_informative=3,
                               n_redundant=1,
                               flip_y=0,
                               n_features=20,
                               n_clusters_per_class=1,
                               n_samples=5000,
                               random_state=0)
    cachedir = mkdtemp()
    try:
        memory = Memory(cachedir, verbose=10)
        # Test with Transformer + SVC
        clf = SVC(gamma='scale', probability=True, random_state=0)
        transf = DummySampler()
        pipe = Pipeline([('transf', clone(transf)), ('svc', clf)])
        cached_pipe = Pipeline([('transf', transf), ('svc', clf)],
                               memory=memory)

        # Memoize the transformer at the first fit
        cached_pipe.fit(X, y)
        pipe.fit(X, y)
        # Get the time stamp of the tranformer in the cached pipeline
        expected_ts = cached_pipe.named_steps['transf'].timestamp_
        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe.predict(X))
        assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
        assert_array_equal(pipe.named_steps['transf'].means_,
                           cached_pipe.named_steps['transf'].means_)
        assert not hasattr(transf, 'means_')
        # Check that we are reading the cache while fitting
        # a second time
        cached_pipe.fit(X, y)
        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe.predict(X))
        assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
        assert_array_equal(pipe.named_steps['transf'].means_,
                           cached_pipe.named_steps['transf'].means_)
        assert cached_pipe.named_steps['transf'].timestamp_ == expected_ts
        # Create a new pipeline with cloned estimators
        # Check that even changing the name step does not affect the cache hit
        clf_2 = SVC(gamma='scale', probability=True, random_state=0)
        transf_2 = DummySampler()
        cached_pipe_2 = Pipeline([('transf_2', transf_2), ('svc', clf_2)],
                                 memory=memory)
        cached_pipe_2.fit(X, y)

        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe_2.predict(X))
        assert_array_equal(pipe.predict_proba(X),
                           cached_pipe_2.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe_2.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe_2.score(X, y))
        assert_array_equal(pipe.named_steps['transf'].means_,
                           cached_pipe_2.named_steps['transf_2'].means_)
        assert cached_pipe_2.named_steps['transf_2'].timestamp_ == expected_ts
    finally:
        shutil.rmtree(cachedir)
示例#27
0
          And run the benchmark:
              LAPJV_OLD=lapjv-old bench.sh
          ''')
    lapjv_old = None
from centrosome.lapjv import lapjv as lapjv_centrosome
from lap.tests.test_utils import (get_dense_int, get_cost_CS,
                                  sparse_from_dense_CS, sparse_from_dense)

max_time_per_benchmark = 20

szs = [10, 100, 200, 500, 1000, 2000, 5000]
rngs = [100, 1000, 10000, 100000]
seeds = [1299821, 15485867, 32452867, 49979693]

cachedir = '/tmp/lapjv-cache'
memory = Memory(cachedir=cachedir, verbose=1)


@memory.cache
def get_hard_data(sz, rng, seed):
    cost = get_dense_int(sz, 100, hard=True, seed=seed)
    opt = lapjv(cost)[0]
    return cost, opt


if lapjv_old is not None:

    @mark.timeout(max_time_per_benchmark)
    @mark.parametrize('sz,rng,seed', [(sz, rng, seed) for sz in szs
                                      for rng in rngs for seed in seeds])
    def test_JV_old(benchmark, sz, rng, seed):
示例#28
0
import sys

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from joblib import Memory

matplotlib.use('TkAgg')

# Auto-detect terminal width.
pd.options.display.width = None
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = 200

# Initialize a persistent memcache.
mem_hist = Memory(cachedir='./.cached_plot_hist', verbose=0)
mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0)

PRINT_BASELINE = True
PRINT_DELTA_ONLY = True

BETWEEN_START = pd.to_datetime('09:30').time()
BETWEEN_END = pd.to_datetime('09:30:00.000001').time()

# Linewidth for plots.
LW = 2


# Used to read and cache simulated quotes.
# Doesn't actually pay attention to symbols yet.
# @mem_sim.cache
示例#29
0
import time

import numpy as np
import pandas as pd
from scipy import sparse
from joblib import Memory
import matplotlib.pyplot as plt
from scipy.stats.mstats import gmean

from alphacsc.cython import _fast_sparse_convolve_multi
from alphacsc.cython import _fast_sparse_convolve_multi_uv

memory = Memory(cachedir='', verbose=0)


def _dense_convolve_multi(z_i, ds):
    """Convolve z_i[k] and ds[k] for each atom k, and return the sum

    z_i : array, shape(n_atoms, n_times_valid)
        Activations
    ds : array, shape(n_atoms, n_channels, n_times_atom)
        Dictionary

    Returns
    -------
    res : array, shape(n_channels, n_times)
        Result of the convolution
    """
    return np.sum([[np.convolve(zik, dkp) for dkp in dk]
                   for zik, dk in zip(z_i, ds)], 0)
示例#30
0
def one_dimensional_exp_rkhs():
    eps = 1e-1

    grid = torch.linspace(-4, 12, 500)[:, None]
    C = compute_distance(grid, grid)

    x_sampler = Sampler(mean=torch.tensor([[1.], [2], [3]]),
                        cov=torch.tensor([[[.1]], [[.1]], [[.1]]]),
                        p=torch.ones(3) / 3)
    y_sampler = Sampler(mean=torch.tensor([[0.], [3], [5]]),
                        cov=torch.tensor([[[.1]], [[.1]], [[.4]]]),
                        p=torch.ones(3) / 3)
    torch.manual_seed(100)
    np.random.seed(100)
    lpx = x_sampler.log_prob(grid)
    lpy = y_sampler.log_prob(grid)
    lpx -= torch.logsumexp(lpx, dim=0)
    lpy -= torch.logsumexp(lpy, dim=0)
    px = torch.exp(lpx)
    py = torch.exp(lpy)

    fevals = []
    gevals = []
    labels = []
    plans = []

    mem = Memory(location=expanduser('~/cache'))
    n_samples = 2000
    x, loga = x_sampler(n_samples)
    y, logb = y_sampler(n_samples)
    f, g = mem.cache(sinkhorn)(x.numpy(), y.numpy(), eps=eps, n_iter=100)
    f = torch.from_numpy(f).float()
    g = torch.from_numpy(g).float()
    distance = compute_distance(grid, y)
    feval = -eps * torch.logsumexp(
        (-distance + g[None, :]) / eps + logb[None, :], dim=1)
    distance = compute_distance(grid, x)
    geval = -eps * torch.logsumexp(
        (-distance + f[None, :]) / eps + loga[None, :], dim=1)

    plan = (lpx[:, None] + feval[:, None] / eps + lpy[None, :] +
            geval[None, :] / eps - C / eps)

    plans.append((plan, grid, grid))

    fevals.append(feval)
    gevals.append(geval)
    labels.append(f'True potential')

    m = 50
    sigma = 1
    alpha, posx, posy, sto_fevals, sto_gevals = rkhs_sinkhorn(x_sampler,
                                                              y_sampler,
                                                              m=m,
                                                              eps=eps,
                                                              n_iter=1000,
                                                              sigma=sigma,
                                                              grid=grid)
    feval = evaluate_kernel(alpha, posx, grid, sigma=sigma)
    geval = evaluate_kernel(alpha, posy, grid, sigma=sigma)
    plan = (lpx[:, None] + feval[:, None] / eps + lpy[None, :] +
            geval[None, :] / eps - C / eps)
    plans.append((plan, grid, grid))
    fevals.append(feval)
    gevals.append(geval)
    labels.append(f'RKHS descent')
    hatf, posx, hatg, posy, sto_fevals_, sto_gevals_ = sampling_sinkhorn(
        x_sampler, y_sampler, m=m, eps=eps, n_iter=1000, grid=grid)
    feval = evaluate_potential(hatg, posy, grid, eps)
    geval = evaluate_potential(hatf, posx, grid, eps)
    plan = (lpx[:, None] + feval[:, None] / eps + lpy[None, :] +
            geval[None, :] / eps - C / eps)
    plans.append((plan, grid, grid))

    fevals.append(feval)
    gevals.append(geval)
    labels.append(f'Online Sinkhorn')

    fevals = torch.cat([feval[None, :] for feval in fevals], dim=0)
    gevals = torch.cat([geval[None, :] for geval in gevals], dim=0)

    fig = plt.figure(figsize=(8 * 4 / 5, 2.2))
    gs = gridspec.GridSpec(ncols=4,
                           nrows=1,
                           width_ratios=[1.2, 1.2, 1, 1],
                           figure=fig)
    plt.subplots_adjust(right=0.97, left=0.08, top=0.8)
    ax1 = fig.add_subplot(gs[0])
    ax2 = fig.add_subplot(gs[1])
    ax3 = fig.add_subplot(gs[2])
    ax4 = fig.add_subplot(gs[3])
    for i, (label, feval, geval,
            (plan, x, y)) in enumerate(zip(labels, fevals, gevals, plans)):
        ax1.plot(grid,
                 feval,
                 label=label,
                 zorder=0 if label == 'True potential' else 1,
                 linewidth=4 if label == 'True potential' else 2)
        ax2.plot(
            grid,
            geval,
            label=label,
            zorder=0 if label == 'True potential' else 1,
            linewidth=4 if label == 'True potential' else 2,
        )
        plan = plan.numpy()
        if label == 'RKHS descent':
            ax3.contour(y[:, 0], x[:, 0], plan, levels=30)
        elif label == 'Online Sinkhorn':
            ax4.contour(y[:, 0], x[:, 0], plan, levels=30)
    ax1.set_title('Estimated $f$')
    ax2.set_title('Estimated $g$')
    ax4.set_title('Online Sinkhorn')
    ax3.set_title('OT plan: RKHS')
    ax2.legend(frameon=False, loc='upper left', bbox_to_anchor=(0, 1))
    # ax2.legend(frameon=False, bbox_to_anchor=(0., 1), loc='upper left')
    sns.despine(fig)
    for ax in [ax3, ax4]:
        ax.axis('off')
    plt.savefig('continuous_rkhs.pdf')
    plt.show()