Exemplo n.º 1
0
def colorize(frames, cmap):
    import matplotlib.pyplot as plt
    cmap = plt.get_cmap(cmap) if isinstance(cmap, str) else cmap
    frames = frames.copy()
    frames -= np.nanmin(frames)
    frames /= np.nanmax(frames)
    return np.clip(cmap(frames)[..., :3] * 255, 0, 255)
Exemplo n.º 2
0
def colorize(frames, cmap):
    import matplotlib.pyplot as plt
    cmap = plt.get_cmap(cmap) if isinstance(cmap, str) else cmap
    frames = frames.copy()
    frames -= np.nanmin(frames)
    frames /= np.nanmax(frames)
    return np.clip(cmap(frames)[...,:3]*255, 0, 255)
Exemplo n.º 3
0
 def initialize(self, delta0=0):
     # initialize theta with reasonable starting values
     r0 = np.nanmin(self.data)
     rd = 100
     rs = 100
     sd = 10
     ss = 60
     m = 1
     n = 1  #2
     delta = delta0
     theta0 = np.array(
         (r0, 0, rd, rs, 0, rd, rs, sd, ss, sd, ss, m, n, n, delta))
     return theta0
Exemplo n.º 4
0
def gen_traces(datafiles,
               blcutoff=blcutoff,
               blspan=blspan):  #nbefore=nbefore,nafter=nafter
    trialwise = np.array(())
    ctrialwise = np.array(())
    strialwise = np.array(())
    dfofall = np.array(())
    baselineall = np.array(())
    for datafile in datafiles:
        frm = sio.loadmat(datafile.replace('.rois', '.mat'),
                          squeeze_me=True)['info']['frame'][()][1:]
        with h5py.File(datafile, mode='r') as f:
            to_add = f['corrected'][:].T
            to_add[np.isnan(to_add)] = np.nanmin(to_add)
            #             baseline = np.percentile(to_add,blcutoff,axis=1)
            baseline = sfi.percentile_filter(to_add[:, ::ds], blcutoff,
                                             (1, int(blspan / ds)))
            baseline = np.repeat(baseline, ds, axis=1)
            for i in range(baseline.shape[0]):
                baseline[i] = sfi.gaussian_filter1d(baseline[i], blspan / 2)
#             if baseline.shape[1]<to_add.shape[1]:
#                 baseline = np.hstack((baseline,np.repeat(baseline[:,-1],to_add.shape[1]-baseline.shape[1])))
            if baseline.shape[1] > to_add.shape[1]:
                baseline = baseline[:, :to_add.shape[1]]
            c = np.zeros_like(to_add)
            s = np.zeros_like(to_add)
            dfof = np.zeros_like(to_add)
            for i in range(c.shape[0]):
                #                 dfof = (to_add[i]-baseline[i,np.newaxis])/baseline[i,np.newaxis]
                dfof[i] = (to_add[i] - baseline[i, :]) / (baseline[i, :])
                #try:
                c[i], s[i], _, _, _ = deconvolve(dfof[i].astype(np.float64),
                                                 penalty=1,
                                                 sn=5e-3)
                #except:
                #    print("in "+datafile+" couldn't do "+str(i))
            try:
                trialwise = np.concatenate((trialwise, to_add), axis=0)
                ctrialwise = np.concatenate((ctrialwise, c), axis=0)
                strialwise = np.concatenate((strialwise, s), axis=0)
                dfofall = np.concatenate((dfofall, dfof), axis=0)
                baselineall = np.concatenate((baselineall, baseline), axis=0)
            except:
                trialwise = to_add.copy()
                ctrialwise = c.copy()
                strialwise = s.copy()
                dfofall = dfof.copy()
                baselineall = baseline.copy()
    return trialwise, ctrialwise, strialwise, dfofall, baselineall
Exemplo n.º 5
0
def lc_plot(ax, t_orig, y_orig, mu, std, xlim=None):
    color = "#ff7f0e"
    ax.fill_between(t_orig,
                    mu + std * 3,
                    mu - std * 3,
                    alpha=0.7,
                    color=color,
                    zorder=0)
    ax.plot(t_orig, y_orig, '.', zorder=1)
    if xlim is None:
        xlim = [t_orig[0], t_orig[-1]]
    ax.set_xlim(xlim)
    use, = np.where((t_orig > xlim[0]) & (t_orig < xlim[1]))
    ylow = 1.1 * np.nanmin(y_orig[use])
    yhigh = 1.1 * np.nanmax(y_orig[use])
    ax.set_ylim(ylow, yhigh)
    ax.set_xlabel('Time (days)', fontsize=14)
    ax.set_ylabel('Relative Brighness', fontsize=14)
Exemplo n.º 6
0
            print(log_iw.shape)

            psis_lw, K_hat_stan = psislw(log_iw.T)
            K_hat_stan_advi_list[j, n] = K_hat_stan
            print(psis_lw.shape)
            print('K hat statistic for Stan ADVI:')
            print(K_hat_stan)

    ###################### Plotting L2 norm here #################################

plt.figure()
plt.plot(stan_vb_w[:, 0], stan_vb_w[:, 1], 'mo', label='STAN-ADVI')
plt.savefig('vb_w_samples_mf.pdf')

np.save('K_hat_linear_' + datatype + '_' + algo_name + '_' + str(N) + 'N',
        K_hat_stan_advi_list)

plt.figure()
plt.plot(K_list, np.nanmean(K_hat_stan_advi_list, axis=1), 'r-', alpha=1)
plt.plot(K_list, np.nanmin(K_hat_stan_advi_list, axis=1), 'r-', alpha=0.5)
plt.plot(K_list, np.nanmax(K_hat_stan_advi_list, axis=1), 'r-', alpha=0.5)
plt.xlabel('Dimensions')
plt.ylabel('K-hat')

np.save(
    'K_hat_linear_' + datatype + '_' + algo_name + '_' + str(N) + 'N' +
    '_samples_' + str(gradsamples), K_hat_stan_advi_list)
#plt.ylim((0,5))
plt.legend()
plt.savefig('Linear_Regression_K_hat_vs_D_' + datatype + '_' + algo_name +
            '_' + str(N) + 'N.pdf')
Exemplo n.º 7
0
def MI2AMI(y, n_clusters, r, k, init, var_distrib, nj,\
          nan_mask, target_nb_pseudo_obs = 500, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\
          dm = [], max_patience = 1): # dm: Hack to remove
    ''' Complete the missing values using a trained M1DGMM
    
    y (numobs x p ndarray): The observations containing mixed variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise
    target_nb_pseudo_obs (int): The number of pseudo-observations to generate         
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    dm (np array): The distance matrix of the observations. If not given M1DGMM computes it
    n_neighbors (int): The number of neighbors to use for NA imputation
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    # !!! Hack
    cols = y.columns
    # Formatting
    if not isinstance(nan_mask, np.ndarray): nan_mask = np.asarray(nan_mask)
    if not isinstance(y, np.ndarray): y = np.asarray(y)

    assert len(k) < 2  # Not implemented for deeper MDGMM for the moment

    # Keep complete observations
    complete_y = y[~np.isnan(y.astype(float)).any(1)]
    completed_y = deepcopy(y)

    out = M1DGMM(complete_y, 'auto', r, k, init, var_distrib, nj, it,\
             eps, maxstep, seed, perform_selec = perform_selec,\
                 dm = dm, max_patience = max_patience, use_silhouette = True)

    # Compute the associations
    vc = vars_contributions(pd.DataFrame(complete_y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \
                           title = 'Contribution of the variables to the latent dimensions',\
                           storage_path = None)

    # Upacking the model from the M1DGMM output
    #p = y.shape[1]
    k = out['best_k']
    r = out['best_r']
    mu = out['mu'][0]
    lambda_bin = np.array(out['lambda_bin'])
    lambda_ord = out['lambda_ord']
    lambda_categ = out['lambda_categ']
    lambda_cont = np.array(out['lambda_cont'])

    nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli',
                                             'binomial'])].astype(int)
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nj_categ = nj[var_distrib == 'categorical'].astype(int)

    nb_cont = np.sum(var_distrib == 'continuous')
    nb_bin = np.sum(var_distrib == 'binomial')

    y_std = complete_y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\
                                                                    keepdims = True)
    cat_features = var_distrib != 'categorical'

    # Compute the associations between variables and use them as weights for the optimisation
    assoc = cosine_similarity(vc, dense_output=True)
    np.fill_diagonal(assoc, 0.0)
    assoc = np.abs(assoc)
    weights = (assoc / assoc.sum(1, keepdims=True))

    #==============================================
    # Optimisation sandbox
    #==============================================

    # Define the observation generated by the center of each cluster
    cluster_obs = [impute(mu[kk,:,0], var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\
                 lambda_ord, nj_ord, lambda_cont, y_std) for kk in range(k[0])]

    # Use only of the observed variables as references
    types = {'bin': ['bernoulli', 'binomial'], 'categ': ['categorical'],\
             'cont': ['continuous'], 'ord': 'ordinal'}

    # Gradient optimisation
    nan_indices = np.where(nan_mask.any(1))[0]
    imputed_y = np.zeros_like(y)
    numobs = y.shape[0]

    #************************************
    # Linear constraint to stay in the support of continuous variables
    #************************************

    lb = np.array([])
    ub = np.array([])
    A = np.array([[]]).reshape((0, r[0]))

    if nb_bin > 0:
        ## Corrected Binomial bounds (ub is actually +inf)
        bin_indices = var_distrib[np.logical_or(var_distrib == 'bernoulli',
                                                var_distrib == 'binomial')]
        binomial_indices = bin_indices == 'binomial'

        lb_bin = np.nanmin(y[:, var_distrib == 'binomial'], 0)
        lb_bin = logit(
            lb_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices,
                                                            0]
        ub_bin = np.nanmax(y[:, var_distrib == 'binomial'], 0)
        ub_bin = logit(
            ub_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices,
                                                            0]
        A_bin = lambda_bin[binomial_indices, 1:]

        ## Concatenate the constraints
        lb = np.concatenate([lb, lb_bin])
        ub = np.concatenate([ub, ub_bin])
        A = np.concatenate([A, A_bin], axis=0)

    if nb_cont > 0:
        ## Corrected Gaussian bounds
        lb_cont = np.nanmin(y[:, var_distrib == 'continuous'],
                            0) / y_std[0] - lambda_cont[:, 0]
        ub_cont = np.nanmax(y[:, var_distrib == 'continuous'],
                            0) / y_std[0] - lambda_cont[:, 0]
        A_cont = lambda_cont[:, 1:]

        ## Concatenate the constraints
        lb = np.concatenate([lb, lb_cont])
        ub = np.concatenate([ub, ub_cont])
        A = np.concatenate([A, A_cont], axis=0)

    lc = LinearConstraint(A, lb, ub, keep_feasible=True)

    zz = []
    fun = []
    for i in range(numobs):
        if i in nan_indices:

            # Design the nan masks for the optimisation process
            nan_mask_i = nan_mask[i]
            weights_i = weights[nan_mask_i].mean(0)

            # Look for the best starting point
            cluster_dist = [error(y[i, ~nan_mask_i], obs[~nan_mask_i],\
                            cat_features[~nan_mask_i], weights_i)\
                            for obs in cluster_obs]
            z02 = mu[np.argmin(cluster_dist), :, 0]

            # Formatting
            vars_i = {type_alias: np.where(~nan_mask_i[np.isin(var_distrib, vartype)])[0] \
                             for type_alias, vartype in types.items()}

            complete_categ = [
                l for idx, l in enumerate(lambda_categ)
                if idx in vars_i['categ']
            ]
            complete_ord = [
                l for idx, l in enumerate(lambda_ord) if idx in vars_i['ord']
            ]

            opt = minimize(stat_all, z02, \
                   args = (y[i, ~nan_mask_i], var_distrib[~nan_mask_i],\
                   weights_i[~nan_mask_i],\
                   lambda_bin[vars_i['bin']], nj_bin[vars_i['bin']],\
                   complete_categ,\
                   nj_categ[vars_i['categ']],\
                   complete_ord,\
                   nj_ord[vars_i['ord']],\
                   lambda_cont[vars_i['cont']], y_std[:, vars_i['cont']]),
                   tol = eps, method='trust-constr', jac = grad_stat,\
                   constraints = lc,
                   options = {'maxiter': 1000})

            z = opt.x
            zz.append(z)
            fun.append(opt.fun)

            imputed_y[i] = impute(z, var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\
                         lambda_ord, nj_ord, lambda_cont, y_std)

        else:
            imputed_y[i] = y[i]

    completed_y = np.where(nan_mask, imputed_y, y)

    out['completed_y'] = completed_y
    out['zz'] = zz
    out['fun'] = fun
    return (out)