Пример #1
0
def timeit(func, *args, **kwargs):
    """Compute the mean execution time of func based on 7 measures."""
    times = []
    tries = kwargs['tries']
    kwargs.pop('tries')
    if tries > 1:
        tries += 2

    for _ in range(tries):
        kill_disk_cache()
        t0 = time.time()
        out = func(*args, **kwargs)
        if 1:
            # Just time the function
            t1 = time.time()
            times.append(t1 - t0)
        else:
            # Compute a hash of the output, to estimate the time
            # necessary to access the elements: this is a better
            # estimate of the time to load with me mmapping.
            joblib.hash(out)
            t1 = time.time()
            joblib.hash(out)
            t2 = time.time()
            times.append(t2 - t0 - 2 * (t2 - t1))
    times.sort()
    return np.mean(times[1:-1]) if tries > 1 else t1 - t0, out
Пример #2
0
 def inner(*args, **kargs):
     a = d.get(hash((args, kargs)))
     if a is None:
         a = d[hash((args, kargs))] = []
     yield from a
     for x in drop(len(a), f(*args, **kargs)):
         a.append(x)
         yield x
Пример #3
0
 def is_damaged(self):
     mem = self.stored()
     if mem and 'obj' in mem:
         if self._obj is None:
             self._memory['obj'] = mem['obj']
             self._obj = dill.loads(base64.b64decode(mem['obj']))
             return self._obj is None
         else:
             return joblib.hash(self._obj) != \
                    joblib.hash(dill.loads(base64.b64decode(mem['obj'])))
     else:
         return self._obj is None
Пример #4
0
def test_joblib_cache():
    from joblib import hash
    # Dummy mask
    data = np.zeros((40, 40, 40, 2))
    data[20, 20, 20] = 1
    data_img = Nifti1Image(data, np.eye(4))

    with testing.write_tmp_imgs(data_img, create_files=True)\
                as filename:
        masker = NiftiMasker(mask=filename)
        masker.fit()
        mask_hash = hash(masker.mask_img_)
        masker.mask_img_.get_data()
        assert_true(mask_hash == hash(masker.mask_img_))
Пример #5
0
def hash_codeobj(code):
    """Return hashed version of a code object"""
    bytecode = code.co_code
    consts = code.co_consts
    
    consts = [hash_codeobj(c) if isinstance(c, types.CodeType) else c 
              for c in consts]
    
    return joblib.hash((bytecode, consts))
Пример #6
0
 def memcached(*args, **kwargs):
     """Cache the function in memory."""
     h = hash((args, kwargs))
     if h in cache:
         # logger.debug("Get %s(%s) from memcache.", name, str(args))
         return cache[h]
     else:
         # logger.debug("Compute %s(%s).", name, str(args))
         out = f(*args, **kwargs)
         cache[h] = out
         return out
Пример #7
0
 def checksum(self):
     if not self._checksum:
         m = hashlib.sha1()
         for ia in self.input_args:
             if isinstance(ia, target.Target):
                 m.update(ia.checksum())
             else:
                 m.update(joblib.hash(ia).encode())
         m.update(self.get_code(self.user_outputs))
         m.update(self.get_code(self.user_outputs))
         self._checksum = m.digest()
     return self._checksum
Пример #8
0
 def checksum(self):
     if not self._checksum:
         m = hashlib.sha1()
         for ia in full_traverse(self.input_args):
             if isinstance(ia, target.Target):
                 m.update(ia.checksum())
             else:
                 m.update(joblib.hash(ia).encode())
         m.update('\n'.join(inspect.getsourcelines(self.user_outputs)[0]).encode('utf-8'))
         m.update('\n'.join(inspect.getsourcelines(self.user_run)[0]).encode('utf-8'))
         self._checksum = m.hexdigest()
     return self._checksum
Пример #9
0
 def make_key(args, kwds, typed, tuple=tuple, sorted=sorted, type=type):
     # helper function to build a cache key from positional and keyword args
     key = args
     if kwds:
         sorted_items = tuple(sorted(kwds.items()))
         key += kwd_mark + sorted_items
     if typed:
         key += tuple(type(v) for v in args)
         if kwds:
             key += tuple(type(v) for k, v in sorted_items)
     if use_joblib_hash:
         key = joblib.hash(key)
     return key
Пример #10
0
    def hash(self):

        if isinstance(self.target, types.BuiltinFunctionType):
            function_hash = None
        else:
            function_hash = hash_codeobj(self.target.__code__)

        uniquity = (self.trail, self.args, self.kwargs, function_hash)

        if self.has_deps():
            previous_hash = "".join(p.hash() for p in self.previous())
        else:
            previous_hash = ""

        return previous_hash + joblib.hash(uniquity)
Пример #11
0
 def setup_cache(self, cache_path, **init_kargs):
     if self.rawmode in ('one-file', 'multi-file'):
         ressource_name = self.filename
     elif self.rawmode=='one-dir':
         ressource_name = self.dirname
     else:
         raise(NotImlementedError)
     
     if cache_path=='home':
         if sys.platform.startswith('win'):
             dirname = os.path.join(os.environ['APPDATA'], 'neo_rawio_cache')
         elif  sys.platform.startswith('darwin'):
             dirname = '~/Library/Application Support/neo_rawio_cache'
         else:
             dirname = os.path.expanduser('~/.config/neo_rawio_cache')
         dirname = os.path.join(dirname, self.__class__.__name__)
         
         if not os.path.exists(dirname):
             os.makedirs(dirname)
     elif cache_path=='same_as_resource':
         dirname = os.path.dirname(ressource_name)
     else:
         assert os.path.exists(cache_path),\
                 'cache_path do not exists use "home" or "same_as_file" to make this auto'
     
     #the hash of the ressource (dir of file) is done with filename+datetime
     #TODO make something more sofisticated when rawmode='one-dir' that use all filename and datetime
     d = dict(ressource_name=ressource_name, mtime=os.path.getmtime(ressource_name))
     hash = joblib.hash(d, hash_name='md5')
     
     #name is compund by the real_n,ame and the hash
     name = '{}_{}'.format(os.path.basename(ressource_name), hash)
     self.cache_filename = os.path.join(dirname, name)
     
     if os.path.exists(self.cache_filename):
         self.logger.warning('Use existing cache file {}'.format(self.cache_filename))
         self._cache = joblib.load(self.cache_filename)
     else:
         self.logger.warning('Create cache file {}'.format(self.cache_filename))
         self._cache = {}
         self.dump_cache()
Пример #12
0
def hash_dataframe(df):
    return joblib.hash(df)
Пример #13
0
    def embed_data(self, X, y, index, inverse, **kwargs):

        # get data from graph
        graph, epochs_per_sample, head, tail, weight, n_vertices = get_graph_elements(
            self.graph_, self.n_epochs
        )

        # number of elements per batch for tensorflow embedding
        if self.batch_size is None:
            # batch size can be larger if its just over embeddings
            if self.direct_embedding & (self.decoding_method is None):
                self.batch_size = np.min([n_vertices, 60000])
            else:
                self.batch_size = np.min([n_vertices, 1000])

        # get embedding initialization if embedding directly
        if self.direct_embedding:
            embedding = self.init_embedding_from_graph(graph, **kwargs)
            embedding = embedding[index]
            self.embedding = tf.Variable(embedding.astype(np.float32, order="C"))

        # alpha is a hack for circumventing tensorflow's bug with sparse vectors
        #   this is only needed for the adadelta on direct embeddings
        self.alpha = tf.Variable(1.0)

        # get dimensions of data
        if self.dims is None:
            self.dims = [np.shape(X)[-1]]
        # reshape data for network
        if self.dims is not None:
            if len(self.dims) > 1:
                X = np.reshape(X, [len(X)] + list(self.dims))
                if self.valid_X is not None:
                    self.valid_X = np.reshape(
                        self.valid_X, [len(self.valid_X)] + list(self.dims)
                    )

        # if network is jointly training a classifier, get labeled data
        if (y is not None) & self.train_classifier:
            # get the number of training classes
            label_mask = y != -1
            # subset labeled X and Y
            X_labeled = X[label_mask]
            y_labeled = y[label_mask]
            self.n_classes = len(np.unique(y_labeled))

        # create networks, if one does not exist
        self.prepare_networks()

        # set a batch size, if one does not exist
        if self.batch_size is None:
            self.batch_size = 100

        # create iterator for data/edges
        edge_iter, n_edges_per_epoch = self.create_edge_iterator(
            head, tail, epochs_per_sample
        )

        # if network is jointly training a classifier, prepare data iterator
        if (y is not None) & self.train_classifier:
            # generate tensorflow iterator for classifier labels
            labeled_iter = self.create_classification_iterator(
                self, X_labeled, y_labeled
            )

        # get batches per epoch
        n_batches_per_epoch = int(np.ceil(n_edges_per_epoch / self.batch_size))

        # create an iterator for validation data
        if (
            self.decoding_method in ["autoencoder", "network"]
            or (self.train_classifier)
        ) and self.valid_X is not None:
            data_valid, n_valid_samp = self.create_validation_iterator()
            # number of batches corresponding to one epoch
            n_valid_batches_per_epoch = int(n_valid_samp / self.batch_size)
        if self.verbose:
            print(ts(), "Embedding with TensorFlow")

        # create keras summary objects for loss
        self.create_summary_metrics()

        # create a tqdm iterator to show epoch progress
        if self.verbose:
            epoch_iter = tqdm(desc="epoch", total=self.training_epochs)

        batch = 0
        X_lab, y_lab = None, None  # default classifier values
        for edge_epoch, epoch in zip(edge_iter, np.arange(self.training_epochs)):

            if self.verbose & (n_batches_per_epoch > 200):
                edge_tqdm = tqdm(desc="batch", total=n_batches_per_epoch, leave=False)

            # loop through batches
            for batch_to, batch_from in edge_epoch:
                batch += 1
                # if training a classifier, get X and y data
                if self.train_classifier:
                    X_lab, y_lab = labeled_iter.next()

                # if this is a direct encoding, the embeddings should be used directly
                if self.direct_embedding:
                    (
                        ce_loss,
                        reconstruction_loss,
                        classifier_loss,
                        classifier_acc,
                    ) = self.train_batch(batch_to, batch_from, X_lab, y_lab)
                else:
                    (
                        ce_loss,
                        reconstruction_loss,
                        classifier_loss,
                        classifier_acc,
                    ) = self.train_batch(X[batch_to], X[batch_from], X_lab, y_lab)
                # save losses to tensorflow summary
                self.summary_metrics["train_loss_umap"](ce_loss)
                if self.decoding_method in ["autoencoder", "network"]:
                    self.summary_metrics["train_loss_recon"](reconstruction_loss)
                if self.train_classifier:
                    self.summary_metrics["train_loss_classif"](classifier_loss)
                    self.summary_metrics["train_acc_classif"](classifier_acc)
                if self.verbose & (n_batches_per_epoch > 200):
                    edge_tqdm.update(1)

                # save summary information
                with self.summary_writer_train.as_default():
                    tf.summary.scalar(
                        "umap_loss",
                        self.summary_metrics["train_loss_umap"].result(),
                        step=batch,
                    )
                    if self.decoding_method in ["autoencoder", "network"]:
                        tf.summary.scalar(
                            "recon_loss",
                            self.summary_metrics["train_loss_recon"].result(),
                            step=batch,
                        )

                    if self.train_classifier:
                        tf.summary.scalar(
                            "classif_loss",
                            self.summary_metrics["train_loss_classif"].result(),
                            step=batch,
                        )
                        tf.summary.scalar(
                            "classif_acc",
                            self.summary_metrics["train_acc_classif"].result(),
                            step=batch,
                        )

                    self.summary_writer_train.flush()

            # update tqdm iterators
            if self.verbose:
                if n_batches_per_epoch > 200:
                    # close tqdm iterator
                    edge_tqdm.update(edge_tqdm.total - edge_tqdm.n)
                    edge_tqdm.close()
                epoch_iter.update(1)

            # compute test loss for reconstruction and classification
            if self.valid_X is not None and self.direct_embedding is False:
                for valid_batch_X, valid_batch_Y in iter(data_valid):
                    # get loss for reconstruction
                    if self.decoding_method in ["autoencoder", "network"]:

                        valid_recon_loss = tf.reduce_mean(
                            self.compute_reconstruction_loss(valid_batch_X)
                        )
                        self.summary_metrics["valid_loss_recon"](valid_recon_loss)

                    # get loss for accuracy
                    if self.train_classifier:
                        classifier_loss, classifier_acc = self.compute_classifier_loss(
                            valid_batch_X, valid_batch_Y
                        )
                        self.summary_metrics["valid_loss_classif"](classifier_loss)
                        self.summary_metrics["valid_acc_classif"](classifier_acc)
                # save summary information

                with self.summary_writer_valid.as_default():
                    if self.decoding_method in ["autoencoder", "network"]:
                        tf.summary.scalar(
                            "recon_loss",
                            self.summary_metrics["valid_loss_recon"].result(),
                            step=batch,
                        )
                    if self.train_classifier:
                        tf.summary.scalar(
                            "classif_loss",
                            self.summary_metrics["valid_loss_classif"].result(),
                            step=batch,
                        )
                        tf.summary.scalar(
                            "classif_acc",
                            self.summary_metrics["valid_acc_classif"].result(),
                            step=batch,
                        )

                    self.summary_writer_valid.flush()

        # self.summary_writer.close()

        if self.verbose:
            print(ts() + " Finished embedding")

        # make embedding as projected batch

        if self.direct_embedding:
            self.embedding_ = self.embedding.numpy()[inverse]
        else:
            self.embedding = self.transform(X[index])
            self.embedding_ = self.embedding[inverse]

        self._input_hash = joblib.hash(self._raw_data)
Пример #14
0
def plot_tfr(tfr,
             time_cutoff,
             vmin,
             vmax,
             tl,
             cluster_correct=False,
             threshold=0.05,
             plot_colorbar=False,
             ax=None,
             cmap=None,
             stat_cutoff=None,
             aspect=None,
             cluster=None,
             contrast_name=None,
             time_lock=None):
    from pymeg.contrast_tfr import get_tfr_stats

    # colorbar:
    from matplotlib.colors import LinearSegmentedColormap

    if cmap is None:
        cmap = LinearSegmentedColormap.from_list(
            "custom", ["blue", "lightblue", "lightgrey", "yellow", "red"],
            N=100)

    if stat_cutoff is None:
        stat_cutoff = time_cutoff

    # data:
    times, freqs, X = contrast_tfr.get_tfr(tfr, stat_cutoff)
    #import ipdb; ipdb.set_trace()
    ### Save data to data source file
    from conf_analysis.meg.figures import array_to_data_source_file
    panel = 'A' if 'all' in contrast_name else 'B'
    if not 'choice' in contrast_name:
        fnr = 2
    else:
        fnr = 'S6'
        panel = 'A'
    array_to_data_source_file(
        fnr, panel, cluster + str(time_lock), X, {
            'dim_0_subjects': np.arange(1, 16),
            'dim_1_frequencies': freqs,
            'dim_2_time': times
        })

    mask = None
    if cluster_correct:
        hash = joblib.hash([times, freqs, X, threshold])
        try:
            _, _, cluster_p_values, _ = cluster_correct[hash]
            sig = cluster_p_values.reshape((X.shape[1], X.shape[2]))
            mask = sig < threshold
        except KeyError:
            s = get_tfr_stats(times, freqs, X, threshold)
            _, _, cluster_p_values, _ = s[hash]
            sig = cluster_p_values.reshape((X.shape[1], X.shape[2]))
            mask = sig < threshold
    earliest_sig = None
    if mask is not None:
        idt = np.where(np.any(mask, 0).ravel())[0]
        idt = [
            t for t in idt
            if (time_cutoff[0] <= times[t]) and (times[t] <= time_cutoff[1])
        ]
        if len(idt) > 0:
            earliest_sig = times[idt[0]]

    freqs_idx = freqs >= 4
    Xb = np.nanmean(X, 0)[freqs_idx, :]
    freqsb = freqs[freqs_idx]
    cax = pmi(
        plt.gca(),
        Xb,
        times,
        yvals=freqsb,
        yscale="linear",
        vmin=vmin,
        vmax=vmax,
        mask=mask[freqs_idx, :],
        mask_alpha=1,
        mask_cmap=cmap,
        cmap=cmap,
    )
    plt.gca().set_aspect(aspect)
    plt.xlim(time_cutoff)
    plt.ylim([freqs.min() - 0.5, freqs.max() + 0.5])
    ax.axvline(0, ls="--", lw=0.75, color="black")
    ax.axvline(1, ls="--", lw=0.75, color="black")
    if plot_colorbar:
        plt.colorbar(cax, ticks=[vmin, 0, vmax])
    return ax, earliest_sig
Пример #15
0
def plot_epoch_pair(
    tfr_data,
    vmin=-25,
    vmax=25,
    cmap="RdBu_r",
    gs=None,
    stats=False,
    threshold=0.05,
    ylabel=None,
):
    from matplotlib import gridspec
    import pylab as plt
    import joblib

    if gs is None:
        g = gridspec.GridSpec(1, 2)
    else:
        g = gridspec.GridSpecFromSubplotSpec(1,
                                             2,
                                             subplot_spec=gs,
                                             wspace=0.01,
                                             width_ratios=[1, 0.4])
    times, freq, tfr = None, None, None
    for epoch in ["stimulus", "response"]:
        row = 0
        if epoch == "stimulus":
            col = 0
            time_cutoff = (-0.35, 1.1)
            xticks = [0, 0.25, 0.5, 0.75, 1]
            yticks = [25, 50, 75, 100, 125]
            xmarker = [0, 1]
        else:
            col = 1
            time_cutoff = (-0.35, 0.1)
            xticks = [0]
            yticks = [1, 25, 50, 75, 100, 125]
            xmarker = [0, 1]

        plt.subplot(g[row, col])
        tdata = tfr_data.query('epoch=="%s"' % (epoch))
        if len(tdata) == 0:
            plt.yticks([], [""])
            plt.xticks([], [""])
            continue
        times, freqs, tfr = get_tfr(tdata, time_cutoff)

        mask = None
        if stats:
            hash = joblib.hash([times, freqs, tfr, threshold])
            try:
                _, _, cluster_p_values, _ = stats[hash]
            except KeyError:
                s = get_tfr_stats(times, freqs, tfr, threshold)
                _, _, cluster_p_values, _ = s[hash]

            sig = cluster_p_values.reshape((tfr.shape[1], tfr.shape[2]))
            mask = sig < threshold

        _ = pmi(
            plt.gca(),
            np.nanmean(tfr, 0),
            times,
            yvals=freqs,
            yscale="linear",
            vmin=vmin,
            vmax=vmax,
            mask=mask,
            mask_alpha=1,
            mask_cmap=cmap,
            cmap=cmap,
        )
        if (ylabel is not None) and (epoch == "stimulus"):
            plt.ylabel(ylabel, labelpad=-2, fontdict={"fontsize": 4})
        # for xmark in xmarker:
        #    plt.axvline(xmark, color='k', lw=1, zorder=-1, alpha=0.5)

        plt.yticks(yticks, [""] * len(yticks))
        plt.xticks(xticks, [""] * len(xticks))

        plt.tick_params(direction="inout", length=2, zorder=100)
        plt.xlim(time_cutoff)
        plt.ylim([1, 147.5])
        # plt.axhline(10, color='k', lw=1, alpha=0.5, linestyle='--')
        # plt.axhline(25, color='k', lw=1, alpha=0.5, linestyle=':')
        # plt.axhline(50, color='k', lw=1, alpha=0.5, linestyle=':')
        plt.axvline(0, color="k", lw=1, zorder=5, alpha=0.5)
        if epoch == "stimulus":
            plt.axvline(1, color="k", lw=1, zorder=5, alpha=0.5)
    return times, freqs, tfr
Пример #16
0
def test_copy_img_side_effect():
    img1 = Nifti1Image(np.ones((2, 2, 2, 2)), affine=np.eye(4))
    hash1 = joblib.hash(img1)
    niimg.copy_img(img1)
    hash2 = joblib.hash(img1)
    assert hash1 == hash2
Пример #17
0
 def __hash__(self):
     return int(joblib.hash(self.atom), base=16)
Пример #18
0
 def fit(self, X, y):
     self.training_size_ = X.shape[0]
     self.training_hash_ = joblib.hash(X)
Пример #19
0
 def __hash__(self):
     return hash(
         joblib.hash((self._final_estimator.coef_,
                      self._final_estimator.intercept_)))
Пример #20
0
def run_model(experiment_info=None,
              output_dataset=None,
              force=False,
              hash_type='sha1',
              output_path=None,
              run_number=1,
              *,
              dataset_name,
              is_supervised,
              model_name):
    '''Run a model on a dataset (predict/transform)

    Runs an algorithm_object on the dataset and returns a new
    dataset object, tagged with experiment metadata,
    and saves it to disk under `data_path / output_dataset`.

    Parameters
    ----------
    dataset_name: str, valid dataset name
        Name of a dataset object that will be run through the model
    model_name: str, valid model name
        name of the model that will transform the data
    experiment_info: (str)
        any other information to note about the experiment
        This is used as the output dataset's DESCR text
    output_path: path
        directory to store output files
    output_dataset: (str, optional) filename base for the output dataset.
        Will also be used as the output `dataset.name`.
    run_number: (int)
        attempt number via the same parameters
    force: (boolean)
        force re-running the algorithm and overwriting any existing data.

    Returns
    -------
    Dataset object emerging from the model,
    with experiment dictionary embedded in metadata
    '''
    if output_path is None:
        output_path = paths['model_output_path']
    else:
        output_path = pathlib.Path(output_path)

    if output_dataset is None:
        output_dataset = f'{model_name}_exp_{dataset_name}_{run_number}'

    os.makedirs(output_path, exist_ok=True)

    dataset = Dataset.load(dataset_name)

    model, model_meta = load_model(model_name)

    # add experiment metadata
    experiment = {
        'model_name': model_name,
        'dataset_name': dataset_name,
        'run_number': run_number,
        'hash_type': hash_type,
        'input_data_hash': joblib.hash(dataset.data, hash_name=hash_type),
        'input_target_hash': joblib.hash(dataset.target, hash_name=hash_type),
        'model_hash': joblib.hash(model, hash_name=hash_type),
    }
    logger.debug(f"Predict: Applying {model_name} to {dataset_name}")
    metadata_fq = output_path / f'{output_dataset}.metadata'

    if metadata_fq.exists() and force is False:
        cached_metadata = Dataset.load(output_dataset,
                                       data_path=output_path,
                                       metadata_only=True)
        if experiment.items() <= cached_metadata['experiment'].items():
            logger.info(
                "Experiment has already been run. Returning Cached Result")
            return Dataset.load(output_dataset, data_path=output_path)
        else:
            raise Exception(
                f'An Experiment with name {output_dataset} exists already, '
                'but metadata has changed. '
                'Use `force=True` to overwrite, or change one of '
                '`run_number` or `output_dataset`')

    # Either force is True, or we need to rerun the algorithm.
    start_time = time.time()
    if is_supervised:
        exp_data = model.predict(dataset.data)
    else:
        if hasattr(model, 'transform'):
            logger.debug('Transform found. Skipping fit')
            exp_data = model.transform(dataset.data)
        else:
            logger.debug('No Transform found. Running fit_transform')
            exp_data = model.fit_transform(dataset.data)

    end_time = record_time_interval(output_dataset, start_time)

    experiment['start_time'] = start_time
    experiment['duration'] = end_time - start_time

    new_metadata = dataset.metadata.copy()
    new_metadata['experiment'] = experiment
    if experiment_info:
        new_metadata['descr'] = experiment_info
    new_dataset = Dataset(dataset_name=output_dataset,
                          data=exp_data,
                          target=dataset.target.copy(),
                          metadata=new_metadata)
    new_dataset.dump(file_base=output_dataset,
                     dump_path=output_path,
                     force=True)
    return new_dataset
Пример #21
0
 def inner(*args, **kargs):
     a = d.get(hash((args, kargs)))
     if a is None:
         a = d[hash((args, kargs))] = f(*args, **kargs)
     return a
Пример #22
0
    def fit_embed_data(self, X, y, index, inverse):
        """
        Performs an embedding on data after a UMAP graph has been constructed.

        Parameters
        ----------
        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
            If the metric is 'precomputed' X must be a square distance
            matrix. Otherwise it contains a sample per row. If the method
            is 'exact', X may be a sparse matrix of type 'csr', 'csc'
            or 'coo'.
        y : array, shape (n_samples)
            A target array for supervised dimension reduction. How this is
            handled is determined by parameters UMAP was instantiated with.
            The relevant attributes are ``target_metric`` and
            ``target_metric_kwds``.
        index : array, shape (n_samples)
            [description]
        inverse : array, shape (n_samples)
            [description]
        """
        if self.n_epochs is None:
            n_epochs = 0
        else:
            n_epochs = self.n_epochs

        if self.densmap or self.output_dens:
            self._densmap_kwds["graph_dists"] = self.graph_dists_

        if self.verbose:
            print(ts(), "Construct embedding")

        self.embedding_, aux_data = simplicial_set_embedding(
            self._raw_data[self.index__],  # JH why raw data?
            self.graph_,
            self.n_components,
            self._initial_alpha,
            self._a,
            self._b,
            self.repulsion_strength,
            self.negative_sample_rate,
            n_epochs,
            init,
            random_state,
            self._input_distance_func,
            self._metric_kwds,
            self.densmap,
            self._densmap_kwds,
            self.output_dens,
            self._output_distance_func,
            self._output_metric_kwds,
            self.output_metric in ("euclidean", "l2"),
            self.random_state is None,
            self.verbose,
        )

        self.embedding_ = self.embedding_[self.inverse__]
        if self.output_dens:
            self.rad_orig_ = aux_data["rad_orig"][self.inverse__]
            self.rad_emb_ = aux_data["rad_emb"][self.inverse__]

        if self.verbose:
            print(ts() + " Finished embedding")

        numba.set_num_threads(self._original_n_threads)
        self._input_hash = joblib.hash(self._raw_data)
Пример #23
0
def compute_hash(*args, **kwargs):
    """Compute a hash of anything joblib can handle."""
    to_hash = {"args": args, "kwargs": kwargs}
    return joblib.hash(to_hash)
Пример #24
0
def test_random_state_second_output_reproducibility(regtest):
    random = np.random.RandomState(0)
    n_samples = 500
    _ = random.uniform(size=(n_samples, 5))
    X = random.uniform(size=(n_samples, 5))
    print(joblib.hash(X), file=regtest)
Пример #25
0
import pytz
import uuid

import traceback

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def now_localtz():
    return datetime.datetime.now(pytz.timezone('Europe/Lisbon'))


VERSION = "20181005-8"
DATE_STARTED = now_localtz()
HOSTNAME = joblib.hash("salted2662" + socket.gethostname())
WORKER_ID = str(uuid.uuid4())

USE_CACHE = False
REDIS_HOST = "XXXXXXXXX.redis.cache.windows.net"
REDIS_KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
CACHE_VERSION = "v3"

app = Flask(__name__)

# LogicApps configured to send all Msft Forms full form body with question Ids and forms answers
# We map here each question id to sklearn algorithm and pipeline parameters

FORM_IDS = {
    "rf71efaaee75f4869b3a24de441b09919": "algorithm",
    "r52e336e1f3564f47b9359debc320a7ce": "nickname",
Пример #26
0
    def fit(self, X):
        """Fit X into an embedded space.

        Optionally use y for supervised dimension reduction.

        Parameters
        ----------
        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
            If the metric is 'precomputed' X must be a square distance
            matrix. Otherwise it contains a sample per row. If the method
            is 'exact', X may be a sparse matrix of type 'csr', 'csc'
            or 'coo'.
        """

        X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C")
        self._raw_data = X

        # Handle all the optional arguments, setting default
        if self.a is None or self.b is None:
            self.a, self.b = find_ab_params(self.spread, self.min_dist)

        self._validate_parameters()

        if self.verbose:
            print(str(self))

        index = list(range(X.shape[0]))

        # Error check n_neighbors based on data size
        if X[index].shape[0] <= self.n_neighbors:
            if X[index].shape[0] == 1:
                self.embedding_ = np.zeros(
                    (1, self.n_components))  # needed to sklearn comparability
                return self

            warn("n_neighbors is larger than the dataset size; truncating to "
                 "X.shape[0] - 1")
            self._n_neighbors = X[index].shape[0] - 1
        else:
            self._n_neighbors = self.n_neighbors

        # Note: unless it causes issues for setting 'index', could move this to
        # initial sparsity check above
        if self._sparse_data and not X.has_sorted_indices:
            X.sort_indices()

        random_state = check_random_state(self.random_state)

        if self.verbose:
            print(ts(), "Construct fuzzy simplicial set")

        # pass string identifier if pynndescent also defines distance metric
        if _HAVE_PYNNDESCENT:
            if self._sparse_data and self.metric in pynn_sparse_named_distances:
                nn_metric = self.metric
            elif not self._sparse_data and self.metric in pynn_named_distances:
                nn_metric = self.metric
            else:
                nn_metric = self._input_distance_func
        else:
            nn_metric = self._input_distance_func

        (self._knn_indices, self._knn_dists, _) = nearest_neighbors(
            X[index],
            self._n_neighbors,
            # int(self._n_neighbors * 1.2),  # we can use more neighbors
            nn_metric,
            self.angular_rp_forest,
            random_state,
            self.low_memory,
            use_pynndescent=True,
            verbose=self.verbose,
        )

        if self.local_n_epochs is None:
            self.local_n_epochs = 50

        if self.global_n_epochs is None:
            self.global_n_epochs = 100

        if self.verbose:
            print(ts(), "Build K-nearest neighbor graph structure")

        flat_indices = self._knn_indices.flatten(
        )  # flattening all knn indices
        index, freq = np.unique(flat_indices, return_counts=True)
        # sorted_index = index[freq.argsort(kind="stable")]  # sorted index in increasing order
        sorted_index = index[freq.argsort(
            kind="stable")[::-1]]  # sorted index in decreasing order

        # get disjoint NN matrix
        disjoints = build_knn_graph(
            data=X,
            sorted_index=sorted_index,
            hub_num=self.hub_num,
        )

        # get hub indices from disjoint set
        hubs = pick_hubs(
            disjoints=disjoints,
            random_state=random_state,
            popular=True,
        )

        if self.verbose:
            print(ts(), "Run global optimization")

        init_global = build_global_structure(
            data=X,
            hubs=hubs,
            n_components=self.n_components,
            a=self.a,
            b=self.b,
            random_state=random_state,
            alpha=self.global_learning_rate,
            n_epochs=self.global_n_epochs,
            verbose=self.verbose,
            label=self.ll,
            init_global=self.init,
        )

        if self.verbose:
            print(
                ts(),
                "Get NN indices & Initialize them using original hub information"
            )

        init, hub_info, hubs = embed_others_nn(
            data=X,
            init_global=init_global,
            hubs=hubs,
            knn_indices=self._knn_indices,
            nn_consider=self._n_neighbors,
            random_state=random_state,
            label=self.ll,
            verbose=self.verbose,
        )

        self._knn_indices, self._knn_dists, counts = select_from_knn(
            knn_indices=self._knn_indices,
            knn_dists=self._knn_dists,
            hub_info=hub_info,
            n_neighbors=self.n_neighbors,
            n=X.shape[0],
        )

        counts_hub = counts[hubs]
        counts_sum = len(counts_hub[counts_hub < self.n_neighbors])
        if counts_sum > 0:
            if self.verbose:
                print(ts(), "Adding more KNNs to build the graph")

            self._knn_indices, self._knn_dists, counts_sum = apppend_knn(
                data=X,
                knn_indices=self._knn_indices,
                knn_dists=self._knn_dists,
                hub_info=hub_info,
                n_neighbors=self.n_neighbors,
                counts=counts,
                counts_sum=counts_sum,
            )

            if counts_sum != 0:
                raise ValueError(
                    f"KNN indices not fully determined! counts_sum: {counts_sum} != 0"
                )

        self.graph_, _, _ = fuzzy_simplicial_set(
            X[hubs],
            self.n_neighbors,
            random_state,
            nn_metric,
            hubs,
            self._knn_indices[hubs],
            self._knn_dists[hubs],
            self.angular_rp_forest,
            self.set_op_mix_ratio,
            self.local_connectivity,
            True,
            True,
        )

        if self.verbose:
            print(ts(), "Run local optimization")

        init = local_optimize_nn(
            data=X,
            graph=self.graph_,
            hub_info=hub_info,
            n_components=self.n_components,
            learning_rate=self.local_learning_rate,
            a=self.a,
            b=self.b,
            gamma=self.gamma,
            negative_sample_rate=self.negative_sample_rate,
            n_epochs=self.local_n_epochs,
            init=init,
            random_state=random_state,
            parallel=False,
            verbose=self.verbose,
            label=self.ll,
        )

        if self.verbose:
            print(ts(), "Embedding outliers")

        self.embedding_ = embed_outliers(
            data=X,
            init=init,
            hubs=hubs,
            disjoints=disjoints,
            random_state=random_state,
            label=self.ll,
            verbose=self.verbose,
        )

        if self.verbose:
            print(ts(), "Finished embedding")

        self._input_hash = joblib.hash(self._raw_data)

        return self
Пример #27
0
def train(**kargs):

    random_state = 43

    rskf = RepeatedStratifiedKFold(n_splits=5,
                                   n_repeats=1,
                                   random_state=random_state)

    if kargs["algorithm"] == "Logistic Regression":
        clf = LogisticRegression(random_state=random_state)
        clf_name = "logreg"

    if kargs["algorithm"] == "Random Forest":
        clf = RandomForestClassifier(random_state=random_state)
        clf_name = "rf"

    if kargs["algorithm"] == "Decision Tree":
        clf = DecisionTreeClassifier(random_state=random_state)
        clf_name = "dt"

    if kargs["algorithm"] == "SVM":
        clf = SVC(random_state=random_state)
        clf_name = "svm"

    if kargs["algorithm"] == "Extra Trees":
        clf = ExtraTreesClassifier(random_state=random_state)
        clf_name = "xt"

    print("train params", kargs)

    pipeline = []

    # Basic post prep pipeline (onehot/remove any remaining NA), make the dataset scikit compliant
    nums = [([c], pp.Imputer()) for c in X.select_dtypes(np.number)]
    cats = [([c], [DataFrameImputer(default_value=""),
                   pp.LabelBinarizer()]) for c in X.select_dtypes("object")]

    texts = []
    text_preproc = kargs.get("text_preproc")
    if text_preproc and text_preproc != "None":
        if text_preproc == "Tfidf":
            texts = [("Name", TfidfVectorizer())]
        elif text_preproc == "Count":
            texts = [("Name", CountVectorizer())]
        else:
            raise (Exception(f"not valid:{text_preproc}"))

    print(texts)
    mapper = DataFrameMapper(nums + cats + texts, df_out=True)

    pipeline.append(('featurize', mapper))

    pca = kargs.get("pca")
    if pca and pca != "Disabled":
        print("add pca")
        pipeline.append(('pca', PCA(n_components=guess_type(kargs["pca"]))))

    pipeline.append((clf_name, clf))

    # Our full pipeline
    train_pipeline = Pipeline(pipeline)

    # Set classifier parameters
    for k in kargs.keys():
        if (clf_name + "__") in k:
            train_pipeline.set_params(**{k: guess_type(kargs[k])})
    # Dump
    for step in train_pipeline.steps:
        pprint.pprint(step)

    # Check cache
    if USE_CACHE:
        cache_key = CACHE_VERSION + "__" + str(joblib.hash(train_pipeline))
        print("Cache key:", cache_key)
        scores = cache.get(cache_key)
        print("From Cache")
        scores = pickle.loads(scores)
        return scores + np.random.normal(0, .0005, len(scores)) * 100

    print("Not in cache, training...")

    # Train/Cross eval
    scores = cross_val_score(X=X,
                             y=y,
                             cv=rskf,
                             estimator=train_pipeline,
                             verbose=5,
                             n_jobs=1,
                             scoring="accuracy")
    scores = (scores * 100).round(3)

    if USE_CACHE:
        print("Saving in cache...")
        cache.set(cache_key, pickle.dumps(scores))

    return scores + np.random.normal(0, .0005, len(scores)) * 100
Пример #28
0
def _evaluate_one(**kwargs):
    params = DEFAULT_PARAMS.copy()
    params.update(kwargs)
    params_digest = joblib.hash(params)

    results = params.copy()
    results['digest'] = params_digest
    results_folder = Path('results')
    results_folder.mkdir(exist_ok=True)
    folder = results_folder.joinpath(params_digest)
    folder.mkdir(exist_ok=True)
    if len(list(folder.glob("*/results.json"))) == 4:
        print('Skipping')

    split_idx = params.get('split_idx', 0)
    print("Evaluating model on split #%d:" % split_idx)
    pprint(params)

    ratings_train, ratings_test = train_test_split(all_ratings,
                                                   test_size=0.2,
                                                   random_state=split_idx)
    max_user_id = all_ratings['user_id'].max()
    max_item_id = all_ratings['item_id'].max()

    user_id_train = ratings_train['user_id']
    item_id_train = ratings_train['item_id']
    rating_train = ratings_train['rating']

    user_id_test = ratings_test['user_id']
    item_id_test = ratings_test['item_id']
    rating_test = ratings_test['rating']

    loss = params.get('loss', DEFAULT_LOSS)
    if loss == 'cross_entropy':
        target_train = rating_train - 1
    else:
        target_train = rating_train

    model = make_model(max_user_id + 1, max_item_id + 1, **params)
    results['model_size'] = sum(w.size for w in model.get_weights())
    nb_epoch = 5
    epochs = 0
    for i in range(4):
        epochs += nb_epoch
        t0 = time()
        model.fit([user_id_train, item_id_train],
                  target_train,
                  batch_size=params['batch_size'],
                  nb_epoch=nb_epoch,
                  shuffle=True,
                  verbose=False)
        epoch_duration = (time() - t0) / nb_epoch
        train_scores, train_preds = _compute_scores(model, 'train',
                                                    user_id_train,
                                                    item_id_train,
                                                    rating_train, loss)
        results.update(train_scores)
        test_scores, test_preds = _compute_scores(model, 'test', user_id_test,
                                                  item_id_test, rating_test,
                                                  loss)
        results.update(test_scores)

        results['epoch_duration'] = epoch_duration
        results['epochs'] = epochs

        subfolder = folder.joinpath("%03d" % epochs)
        subfolder.mkdir(exist_ok=True)

        # Transactional results saving to avoid file corruption on ctrl-c
        results_filepath = subfolder.joinpath(RESULTS_FILENAME)
        with transactional_open(results_filepath, mode='w') as f:
            json.dump(results, f)

        model_filepath = subfolder.joinpath(MODEL_FILENAME)
        with transactional_fname(model_filepath) as fname:
            model.save(fname)

        # Save predictions and true labels to be able to recompute new scores
        # later
        with transactional_open(subfolder / 'test_preds.npy', mode='wb') as f:
            np.save(f, test_preds)
        with transactional_open(subfolder / 'train_preds.npy', mode='wb') as f:
            np.save(f, test_preds)
        with transactional_open(subfolder / 'ratings.npy', mode='wb') as f:
            np.save(f, rating_test)

    return params_digest
def _evaluate_one(**kwargs):
    params = DEFAULT_PARAMS.copy()
    params.update(kwargs)
    params_digest = joblib.hash(params)

    results = params.copy()
    results['digest'] = params_digest
    results_folder = Path('results')
    results_folder.mkdir(exist_ok=True)
    folder = results_folder.joinpath(params_digest)
    folder.mkdir(exist_ok=True)
    if len(list(folder.glob("*/results.json"))) == 4:
        print('Skipping')

    split_idx = params.get('split_idx', 0)
    print("Evaluating model on split #%d:" % split_idx)
    pprint(params)

    ratings_train, ratings_test = train_test_split(
        all_ratings, test_size=0.2, random_state=split_idx)
    max_user_id = all_ratings['user_id'].max()
    max_item_id = all_ratings['item_id'].max()

    user_id_train = ratings_train['user_id']
    item_id_train = ratings_train['item_id']
    rating_train = ratings_train['rating']

    user_id_test = ratings_test['user_id']
    item_id_test = ratings_test['item_id']
    rating_test = ratings_test['rating']

    loss = params.get('loss', DEFAULT_LOSS)
    if loss == 'cross_entropy':
        target_train = rating_train - 1
    else:
        target_train = rating_train

    model = make_model(max_user_id + 1, max_item_id + 1, **params)
    results['model_size'] = sum(w.size for w in model.get_weights())
    nb_epoch = 5
    epochs = 0
    for i in range(4):
        epochs += nb_epoch
        t0 = time()
        model.fit([user_id_train, item_id_train], target_train,
                  batch_size=params['batch_size'],
                  nb_epoch=nb_epoch, shuffle=True, verbose=False)
        epoch_duration = (time() - t0) / nb_epoch
        train_scores, train_preds = _compute_scores(
            model, 'train', user_id_train, item_id_train, rating_train, loss)
        results.update(train_scores)
        test_scores, test_preds = _compute_scores(
            model, 'test', user_id_test, item_id_test, rating_test, loss)
        results.update(test_scores)

        results['epoch_duration'] = epoch_duration
        results['epochs'] = epochs

        subfolder = folder.joinpath("%03d" % epochs)
        subfolder.mkdir(exist_ok=True)

        # Transactional results saving to avoid file corruption on ctrl-c
        results_filepath = subfolder.joinpath(RESULTS_FILENAME)
        with transactional_open(results_filepath, mode='w') as f:
            json.dump(results, f)

        model_filepath = subfolder.joinpath(MODEL_FILENAME)
        with transactional_fname(model_filepath) as fname:
            model.save(fname)

        # Save predictions and true labels to be able to recompute new scores
        # later
        with transactional_open(subfolder / 'test_preds.npy', mode='wb') as f:
            np.save(f, test_preds)
        with transactional_open(subfolder / 'train_preds.npy', mode='wb') as f:
            np.save(f, test_preds)
        with transactional_open(subfolder / 'ratings.npy', mode='wb') as f:
            np.save(f, rating_test)

    return params_digest
Пример #30
0
def compute_degeneracy(tRNAs, aaRSs, mask, cache):
    """
    This function computes all possible site-block-match-matrices
    and their encodable genetic degeneracies
    """
    uni_t = set(range(tRNAs))
    uni_a = set(range(aaRSs))
    ## if mask:
    ##     zeros   = 2**(2*(tRNAs+aaRSs))
    ## else:
    ##     zeros   = 2**(tRNAs+aaRSs)

    if mask:
        genotypes = masked_genotypes_gen(tRNAs, aaRSs)
        for setm, setn, sett, seta in genotypes:
            offm = uni_t - setm
            offn = uni_a - setn
            eoff = len(offm) + len(
                offn
            )  # eoff is ultimately the expected fraction of sites masked per genotype
            eips = (
                2 * len(setm) * len(setn)
            )  # eips is expected number of unmasked interactions per site 0 <= eips <= P or N+M/2
            if (eips > 0):
                eips /= (len(setm) + len(setn))
            settc = uni_t - sett
            sett &= setm
            settc &= setm
            setac = uni_a - seta
            seta &= setn
            setac &= setn

            m = np.zeros((tRNAs, aaRSs), dtype=np.int16)
            for match in chain(product(sett, seta), product(settc, setac)):
                m[match] += 1
                # if (m==0).all():
                ##print ('# huh! in compute_degeneracy') # why do we get here?
                # continue
            key = joblib.hash(m)
            #if key == '3d364cbacfad5c8c2be9dc4314aec17c':
            #    pdb.set_trace()
            if key in degeneracy:
                degeneracy[key] += 1
                off[key] += eoff / (2 * pairs * width)
                ips[key] += eips / width
            else:
                degeneracy[key] = 1
                off[key] = eoff / (2 * pairs * width)
                ips[key] = eips / width
                if cache:
                    sbm_matrix(key, m)  # THIS IS NOT TESTED
                else:
                    sbmmd[key] = m
                #zeros -= 1

        for key in off:
            off[key] /= degeneracy[key]
            ips[key] /= degeneracy[key]
        #pdb.set_trace()
    else:
        genotypes = genotypes_gen(tRNAs, aaRSs)
        for sett, seta in genotypes:
            settc = uni_t - sett
            setac = uni_a - seta
            m = np.zeros((tRNAs, aaRSs), dtype=np.int16)
            for match in chain(product(sett, seta), product(settc, setac)):
                m[match] += 1
            key = joblib.hash(m)

            if key in degeneracy:
                degeneracy[key] += 1
                #zeros -= 1
            else:
                degeneracy[key] = 1
                if cache:
                    sbm_matrix(m)  # THIS IS NOT TESTED
                else:
                    sbmmd[key] = m
Пример #31
0
def test_proxy():
    inst = LazyProxy(nocall)
    pickle.Pickler(io.BytesIO(), pickle.HIGHEST_PROTOCOL).dump(inst)
    pickle.Pickler(io.BytesIO()).dump(inst)
    jb.hash(inst)
Пример #32
0
def regression_state(state, regtest):
    for v in sorted(state):
        print(v, joblib.hash(state[v].values), file=regtest)
        'learning_rate': ['constant', 'adaptive'],
        'max_iter': [5000],
    },
    {
        'solver': ['lbfgs'],
        'hidden_layer_sizes': hidden_layer_sizes_range,
        'activation': ['relu'],
        'random_state': [0],
    },
]

if __name__ == '__main__':
    model_params = list(ParameterGrid(param_grid))
    with open(model_filename, 'w') as f:
        for params in model_params:
            model_id = joblib.hash(params)
            model_record = params.copy()
            model_record['model_id'] = model_id
            model_record['depth'] = len(params['hidden_layer_sizes'])
            model_record['width'] = max(params['hidden_layer_sizes'])
            f.write(json.dumps(model_record) + '\n')
            f.flush()

    model_params = shuffle(model_params, random_state=0)
    with open(evaluations_filename, 'w') as f:
        for n_samples_train in [30]:
            for label_noise_rate in np.linspace(0, 1, 11):
                print(f'\nn_samples: {n_samples_train}, label noise: {label_noise_rate:0.1f}')
                for data_seed in [0, 1]:
                    (X_train, y_train), (X_test, y_test) = make_noisy_problem(
                        n_samples_train, label_noise_rate, seed=data_seed)
Пример #34
0
        sfab = 0
        sfab2 = 0

        sfbf = 0
        sfbf2 = 0
        sfabfa = 0
        sfabfa2 = 0

        #for arg in args:
        #    m,d,o,f,f2    = compute_fitness(arg)

        for m, d, o, ei, f, f2, fa, fa2 in pool.imap(compute_fitness,
                                                     args,
                                                     chunksize=chunksize):

            key = joblib.hash(m)
            if key in dd:
                dd[key] += d
                oo[key] += (d * o)
                fb = fitb[key]
                fb2 = fitb2[key]
                fab = fitab[key]
                fab2 = fitab2[key]
            else:
                dd[key] = d
                oo[key] = (d * o)
                eeii[key] = ei

                fit[key] = f
                fit2[key] = f2
                fita[key] = fa
Пример #35
0
    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_delta,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.network_type, parameters.update_rule,
                          parameters.batch_accumulator, rng)

    # --- Instantiate agent ---
    agent = ALEAgent(
        env, qnetwork, parameters.replay_memory_size,
        max(env.inputDimensions()[i][0]
            for i in range(len(env.inputDimensions()))), parameters.batch_size,
        rng)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "ALE_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(evaluateOn='epoch', periodicity=1))

    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(
        bc.TrainerController(evaluateOn='action',
                             periodicity=parameters.update_frequency,
Пример #36
0
def plot_mosaic(
    tfr_data,
    vmin=-25,
    vmax=25,
    cmap="RdBu_r",
    ncols=4,
    epoch="stimulus",
    stats=False,
    threshold=0.05,
):

    if epoch == "stimulus":
        time_cutoff = (-0.5, 1.35)
        xticks = [0, 0.25, 0.5, 0.75, 1]
        xticklabels = ["0\nStim on", "", ".5", "", "1\nStim off"]
        yticks = [25, 50, 75, 100, 125]
        yticklabels = ["25", "", "75", "", "125"]
        xmarker = [0, 1]
        baseline = (-0.25, 0)
    else:
        time_cutoff = (-1, 0.5)
        xticks = [-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5]
        xticklabels = ["-1", "", "-0.5", "", "0\nResponse", "", "0.5"]
        yticks = [1, 25, 50, 75, 100, 125]
        yticklabels = ["1", "25", "", "75", "", "125"]
        xmarker = [0, 1]
        baseline = None
    from matplotlib import gridspec
    import pylab as plt
    import seaborn as sns

    contrast_tfr.set_jw_style()
    sns.set_style("ticks")
    nrows = (len(atlas_glasser.areas) // ncols) + 1
    gs = gridspec.GridSpec(nrows, ncols)
    gs.update(wspace=0.01, hspace=0.01)

    for i, (name, area) in enumerate(atlas_glasser.areas.items()):
        try:
            column = np.mod(i, ncols)
            row = i // ncols
            plt.subplot(gs[row, column])
            times, freqs, tfr = get_tfr(tfr_data.query('cluster=="%s"' % area),
                                        time_cutoff)
            # cax = plt.gca().pcolormesh(times, freqs, np.nanmean(
            #    tfr, 0), vmin=vmin, vmax=vmax, cmap=cmap, zorder=-2)
            mask = None

            if stats:
                import joblib

                hash = joblib.hash([times, freqs, tfr, threshold])
                try:
                    _, _, cluster_p_values, _ = stats[hash]
                except KeyError:
                    s = get_tfr_stats(times, freqs, tfr, threshold)
                    _, _, cluster_p_values, _ = s[hash]
                sig = cluster_p_values.reshape((tfr.shape[1], tfr.shape[2]))
                mask = sig < threshold
            cax = pmi(
                plt.gca(),
                np.nanmean(tfr, 0),
                times,
                yvals=freqs,
                yscale="linear",
                vmin=vmin,
                vmax=vmax,
                mask=mask,
                mask_alpha=1,
                mask_cmap=cmap,
                cmap=cmap,
            )

            # plt.grid(True, alpha=0.5)
            for xmark in xmarker:
                plt.axvline(xmark, color="k", lw=1, zorder=-1, alpha=0.5)

            plt.yticks(yticks, [""] * len(yticks))
            plt.xticks(xticks, [""] * len(xticks))
            set_title(name, times, freqs, plt.gca())
            plt.tick_params(direction="inout", length=2, zorder=100)
            plt.xlim(time_cutoff)
            plt.ylim([1, 147.5])
            plt.axhline(10, color="k", lw=1, alpha=0.5, linestyle="--")
        except ValueError as e:
            print(name, area, e)
    plt.subplot(gs[nrows - 2, 0])

    sns.despine(left=True, bottom=True)
    plt.subplot(gs[nrows - 1, 0])

    pmi(
        plt.gca(),
        np.nanmean(tfr, 0) * 0,
        times,
        yvals=freqs,
        yscale="linear",
        vmin=vmin,
        vmax=vmax,
        mask=None,
        mask_alpha=1,
        mask_cmap=cmap,
        cmap=cmap,
    )
    plt.xticks(xticks, xticklabels)
    plt.yticks(yticks, yticklabels)
    for xmark in xmarker:
        plt.axvline(xmark, color="k", lw=1, zorder=-1, alpha=0.5)
    if baseline is not None:
        plt.fill_between(baseline,
                         y1=[1, 1],
                         y2=[150, 150],
                         color="k",
                         alpha=0.5)
    plt.tick_params(direction="in", length=3)
    plt.xlim(time_cutoff)
    plt.ylim([1, 147.5])
    plt.xlabel("time [s]")
    plt.ylabel("Freq [Hz]")
    sns.despine(ax=plt.gca())
Пример #37
0
def fit_mice_hash(X: DataFrame, iterations: int = 1):
    return joblib.hash(X)
Пример #38
0
def make_path(name_tuple):
    return joblib.hash(name_tuple)
Пример #39
0
        parameters.network_type,
        parameters.update_rule,
        parameters.batch_accumulator,
        rng)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng)
    
    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "MG2S_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # --- Bind controllers to the agent ---
    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and 
    # learning rate as well as the training epoch number.
    agent.attach(bc.VerboseController(
        evaluateOn='epoch', 
        periodicity=1))
    
    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(bc.TrainerController(
Пример #40
0
    def run(self):
        self._validate_params()
        self._set_lib()
        estimator_class = self._load_estimator_class()
        metrics_functions = self._load_metrics_functions()
        parameters_grid = self._init_parameters_grid()
        self.results_ = []
        for dataset in self.datasets:
            n_features = dataset["n_features"]
            n_samples_train = dataset["n_samples_train"]
            n_samples_test = list(reversed(sorted(dataset["n_samples_test"])))
            for ns_train in n_samples_train:
                X, y = gen_data(
                    dataset["sample_generator"],
                    n_samples=ns_train + max(n_samples_test),
                    n_features=n_features,
                    **dataset["params"],
                )
                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, train_size=ns_train)
                for params in parameters_grid:
                    estimator = estimator_class(**params)
                    set_random_state(estimator, random_state=42)
                    hyperparams_digest = joblib.hash(params)
                    dims_digest = joblib.hash([ns_train, n_features])
                    profiling_results_path = str(RESULTS_PATH / "profiling")
                    profiling_path = f"{profiling_results_path}/{self.lib_}_fit_{hyperparams_digest}_{dims_digest}.html"

                    _, mean, stdev = FuncExecutor.run(estimator.fit,
                                                      profiling_path, X_train,
                                                      y_train)
                    row = dict(
                        estimator=self.name,
                        lib=self.lib_,
                        function="fit",
                        mean=mean,
                        stdev=stdev,
                        n_samples=ns_train,
                        n_features=n_features,
                        hyperparams_digest=hyperparams_digest,
                        dims_digest=dims_digest,
                        **params,
                    )
                    if hasattr(estimator, "n_iter_"):
                        row["n_iter"] = estimator.n_iter_

                    self.results_.append(row)

                    print("%s - %s - %s - mean: %6.3f - stdev: %6.3f" %
                          (self.lib_, self.name, "fit", mean, stdev))

                    for i in range(len(n_samples_test)):
                        ns_test = n_samples_test[i]
                        X_test_, y_test_ = X_test[:ns_test], y_test[:ns_test]
                        bench_func = predict_or_transform(estimator)
                        dims_digest = joblib.hash([ns_test, n_features])
                        profiling_path = f"{profiling_results_path}/{self.lib_}_{bench_func.__name__}_{hyperparams_digest}_{dims_digest}.html"

                        (
                            y_pred,
                            mean,
                            stdev,
                        ) = FuncExecutor.run(bench_func, profiling_path,
                                             X_test_)
                        if i == 0:
                            scores = {
                                func.__name__: func(y_test_, y_pred)
                                for func in metrics_functions
                            }
                        row = dict(
                            estimator=self.name,
                            lib=self.lib_,
                            function="predict",
                            mean=mean,
                            stdev=stdev,
                            n_samples=ns_test,
                            n_features=n_features,
                            hyperparams_digest=hyperparams_digest,
                            dims_digest=dims_digest,
                            **scores,
                            **params,
                        )
                        print("%s - %s - %s - mean: %6.3f - stdev: %6.3f" %
                              (self.lib_, self.name, bench_func.__name__, mean,
                               stdev))
                        self.results_.append(row)
        return self
Пример #41
0
 def calc_hash(self):
     self.hash = joblib.hash(self.filename)
Пример #42
0
        'learning_rate': ['constant', 'adaptive'],
        'max_iter': [5000],
    },
    {
        'solver': ['lbfgs'],
        'hidden_layer_sizes': hidden_layer_sizes_range,
        'activation': ['relu'],
        'random_state': [0],
    },
]

if __name__ == '__main__':
    model_params = list(ParameterGrid(param_grid))
    with open(model_filename, 'w') as f:
        for params in model_params:
            model_id = joblib.hash(params)
            model_record = params.copy()
            model_record['model_id'] = model_id
            model_record['depth'] = len(params['hidden_layer_sizes'])
            model_record['width'] = max(params['hidden_layer_sizes'])
            f.write(json.dumps(model_record) + '\n')
            f.flush()

    model_params = shuffle(model_params, random_state=0)
    with open(evaluations_filename, 'w') as f:
        for n_samples_train in [30]:
            for label_noise_rate in np.linspace(0, 1, 11):
                print(
                    f'\nn_samples: {n_samples_train}, label noise: {label_noise_rate:0.1f}'
                )
                for data_seed in [0, 1]: