Пример #1
0
def load_behavioral(path, verbose=None):
    """Load the behavioral analysis excel file of a single subject.

    The purpose of this function is to load the excel file that have been
    generated using the function `behavioral_analysis`.

    Parameters
    ----------
    path : str
        Full path to the excel file

    Returns
    -------
    summary : pandas.DataFrame
        The dataframe that summarize probabilities and contingency.
    behavior : dict
        A dictionary where the keys refer to the team number. Items are
        dataframes with all of the info per trial.
    """
    assert os.path.isfile(path)
    set_log_level(verbose)
    logger.info('Loading %s' % path)
    xl = pd.ExcelFile(path)
    sheet_names = xl.sheet_names
    assert sheet_names[0] == 'Summary'
    # Get the summary
    logger.info('    - Reading summary')
    summary = xl.parse('Summary')
    # Read team sheet
    behavior = dict()
    logger.info('    - Reading team')
    for s in sheet_names[1::]:
        behavior[int(s.split('Team ')[1])] = xl.parse(s)
    return summary, behavior
Пример #2
0
def gccmi_ccc(x, y, z, verbose=None):
    """Gaussian-Copula CMI between three continuous variables.
    I = gccmi_ccc(x,y,z) returns the CMI between two (possibly multidimensional)
    continuous variables, x and y, conditioned on a third, z, estimated via a
    Gaussian copula.
    If x and/or y are multivariate columns must correspond to samples, rows
    to dimensions/variables. (Samples first axis)
    """
    set_log_level(verbose)
    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    z = np.atleast_2d(z)
    if x.ndim > 2 or y.ndim > 2 or z.ndim > 2:
        raise ValueError("x, y and z must be at most 2d")

    Ntrl = x.shape[1]
    Nvarx = x.shape[0]
    Nvary = y.shape[0]
    Nvarz = z.shape[0]

    if y.shape[1] != Ntrl or z.shape[1] != Ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(Nvarx):
        if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break
    for yi in range(Nvary):
        if (np.unique(y[yi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break
    for zi in range(Nvarz):
        if (np.unique(z[zi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break

    # copula normalization
    cx = copnorm(x)
    cy = copnorm(y)
    cz = copnorm(z)
    # parametric Gaussian CMI
    I = cmi_ggg(cx, cy, cz, True, True)
    return I
Пример #3
0
def gcmi_model_cd(x, y, Ym, verbose=None):
    """Gaussian-Copula Mutual Information between a continuous and a discrete variable
     based on ANOVA style model comparison.
    I = gcmi_model_cd(x,y,Ym) returns the MI between the (possibly multidimensional)
    continuous variable x and the discrete variable y.
    For 1D x this is a lower bound to the mutual information.
    Columns of x correspond to samples, rows to dimensions/variables.
    (Samples last axis)
    y should contain integer values in the range [0 Ym-1] (inclusive).
    See also: gcmi_mixture_cd
    """
    set_log_level(verbose)
    x = np.atleast_2d(x)
    y = np.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(y.dtype, np.integer):
        raise ValueError("y should be an integer array")
    if not isinstance(Ym, int):
        raise ValueError("Ym should be an integer")

    Ntrl = x.shape[1]
    Nvarx = x.shape[0]

    if y.size != Ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(Nvarx):
        if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break

    # check values of discrete variable
    if y.min() != 0 or y.max() != (Ym - 1):
        raise ValueError("values of discrete variable y are out of bounds")

    # copula normalization
    cx = copnorm(x)
    # parametric Gaussian MI
    I = mi_model_gd(cx, y, Ym, True, True)
    return I
Пример #4
0
def mne_epochstfr_to_epochs(epoch, freqs=None, verbose=None):
    """Convert an MNE EpochsTFR to Epochs instance.

    Parameters
    ----------
    epoch : mne.time_frequency.EpochsTFR | str
        Should either be an EpochsTFR instance or a path to a -tfr.h5 file
    freqs : tuple, list | None
        The frequencies to select Use None to select all frequencies or a tuple
        of two floats to select a sub-band. The final Epochs instance is
        obtained by taking the mean across selected frequencies.

    Returns
    -------
    r_epoch : mne.Epochs
        Epochs instance
    """
    set_log_level(verbose)
    if isinstance(epoch, str):
        assert '-tfr.h5' in epoch, "File should end with -tfr.h5 file"
        epoch = mne.time_frequency.read_tfrs(epoch)[0]
    assert isinstance(epoch, mne.time_frequency.EpochsTFR)
    # Handle frequencies
    epoch_freqs = epoch.freqs
    if freqs is None:
        logger.info('    Selecting all frequencies')
        sl = slice(None)
    elif isinstance(freqs, (list, tuple, np.ndarray)):
        assert len(freqs) == 2, "`freqs` should be tuple of two elements"
        logger.info("    Selecting frequencies "
                    "(%.2f, %.2f)" % (freqs[0], freqs[1]))
        _idx = np.abs(
            epoch_freqs.reshape(-1, 1) -
            np.array(freqs).reshape(1, -1)).argmin(0)
        sl = slice(_idx[0], _idx[1])
    # Built the Epochs instance
    info = epoch.info
    data = epoch.data[..., sl, :].mean(2)
    return mne.EpochsArray(data, info, tmin=epoch.times[0], verbose=verbose)
Пример #5
0
def gcmi_cc(x, y, verbose=None):
    """Gaussian-Copula Mutual Information between two continuous variables.
    I = gcmi_cc(x,y) returns the MI between two (possibly multidimensional)
    continuous variables, x and y, estimated via a Gaussian copula.
    If x and/or y are multivariate columns must correspond to samples, rows
    to dimensions/variables. (Samples first axis) 
    This provides a lower bound to the true MI value.
    """
    set_log_level(verbose)
    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    if x.ndim > 2 or y.ndim > 2:
        raise ValueError("x and y must be at most 2d")
    Ntrl = x.shape[1]
    Nvarx = x.shape[0]
    Nvary = y.shape[0]

    if y.shape[1] != Ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(Nvarx):
        if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break
    for yi in range(Nvary):
        if (np.unique(y[yi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break

    # copula normalization
    cx = copnorm(x)
    cy = copnorm(y)
    # parametric Gaussian MI
    I = mi_gg(cx, cy, True, True)
    return I
Пример #6
0
def behavioral_analysis(tr_team,
                        tr_play,
                        tr_win,
                        save_as=None,
                        embedded_plot=True,
                        modality='meg',
                        verbose=None):
    """Perform behavioral analysis using team, play and win triggers.

    Parameters
    ----------
    tr_team : array_like
        Array describing the team number per trial (e.g. [6, 6, 6, ..., 15])
    tr_play : array_like
        Array describing if the subject is playing (1) or not (0)
    tr_win : array_like
        Array describing if the subject win (1) or lose (0)
    save_as : string | None
        Full path to a .xlsx file where to save the Excel file
    embedded_plot : bool | True
        Put P(O|A), P(O|nA) and dP plots inside the excel file.
    modality : {'meg', 'seeg'}
        Because the probabilities are different between the meg and seeg task,
        you have to specify the recording modality.

    Returns
    -------
    summary : dataframe
        A pandas dataframe that summarize the estimated probabilities (edP)
        with theorical values define by the task (different if seeg or meg)
    behavior : dict
        Dictionary organize by team number in which conditional probabilities
        and cumulative sum are saved.
    """
    set_log_level(verbose)
    # Sanity check
    (tr_team, tr_play, tr_win) = tuple(
        [np.asarray(k, dtype=int) for k in [tr_team, tr_play, tr_win]])
    assert tr_team.shape == tr_play.shape == tr_win.shape
    _u = all([np.array_equal(np.unique(k), [0, 1]) for k in [tr_play, tr_win]])
    assert _u, "`tr_play` and `tr_win` should only contains 0 and 1"
    is_t_min, is_t_max = 1 <= tr_team.min() <= 15, 1 <= tr_team.max() <= 15
    assert is_t_min and is_t_max, "Team number must be between [1, 15]"
    # Boolean analysis
    logger.info('    - Get conditional booleans')
    is_oa = np.logical_and(tr_win == 1, tr_play == 1).astype(int)
    is_noa = np.logical_and(tr_win == 0, tr_play == 1).astype(int)
    is_ona = np.logical_and(tr_win == 1, tr_play == 0).astype(int)
    is_nona = np.logical_and(tr_win == 0, tr_play == 0).astype(int)
    # Compute the cumulative sum per team
    behavior = dict()
    col = [
        'Team', 'Play', 'Win', 'O|A', 'nO|A', 'O|nA', 'nO|nA', 'f(O|A)',
        'f(nO|A)', 'f(O|nA)', 'f(nO|nA)', 'eP(O|A)', 'eP(O|nA)', 'edP', 'uedP'
    ]
    logger.info('    - Split cumulative sum per team')
    for team in np.unique(tr_team):
        _df = dict()
        idx_team = tr_team == team
        # Retains trigger for the current team
        _df['Team'] = tr_team[idx_team]
        _df['Play'], _df['Win'] = tr_play[idx_team], tr_win[idx_team]
        # Retains the probability
        _df['O|A'], _df['O|nA'] = is_oa[idx_team], is_ona[idx_team]
        _df['nO|A'], _df['nO|nA'] = is_noa[idx_team], is_nona[idx_team]
        # Get the cumulative sum for each condition
        _df['f(O|A)'] = bincumsum(_df['O|A'])
        _df['f(O|nA)'] = bincumsum(_df['O|nA'])
        _df['f(nO|A)'] = bincumsum(_df['nO|A'])
        _df['f(nO|nA)'] = bincumsum(_df['nO|nA'])
        # Compute P(O|A) and P(O|nA)
        _df['eP(O|A)'] = _df['f(O|A)'] / (_df['f(O|A)'] + _df['f(nO|A)'])
        _df['eP(O|nA)'] = _df['f(O|nA)'] / (_df['f(O|nA)'] + _df['f(nO|nA)'])
        _df['edP'] = _df['eP(O|A)'] - _df['eP(O|nA)']
        _df['uedP'] = np.diff(np.r_[0, _df['edP']])
        behavior[team] = pd.DataFrame(_df, columns=col)
    # Summary table
    logger.info("    - Summary table")
    summary = get_causal_probabilities(False, modality)
    _task = np.full((len(summary), 3), np.nan)
    for t in np.unique(tr_team) - 1:
        _task[t, 0] = behavior[t + 1]['eP(O|A)'].iloc[-1]
        _task[t, 1] = behavior[t + 1]['eP(O|nA)'].iloc[-1]
        _task[t, 2] = behavior[t + 1]['edP'].iloc[-1]
    summary['eP(O|A)'] = _task[:, 0]
    summary['eP(O|nA)'] = _task[:, 1]
    summary['edP'] = _task[:, 2]
    # Change the column order for plotting
    col = ['Team', 'P(O|A)', 'eP(O|A)', 'P(O|nA)', 'eP(O|nA)', 'dP', 'edP']
    summary = summary[col]
    # Save the dataframe
    if isinstance(save_as, str):
        with pd.ExcelWriter(save_as) as writer:
            summary.to_excel(writer, sheet_name='Summary')
            for team, df in behavior.items():
                df.to_excel(writer, sheet_name='Team %i' % team)
        # Generate plots inside the Excel file
        from openpyxl import load_workbook
        from openpyxl.chart import Reference, LineChart, BarChart
        wb = load_workbook(save_as)
        # Summary plot
        ws = wb['Summary']
        c1 = BarChart()
        title = "Comparison between dP and estimated dP (edP)"
        _xl_plot(c1, title, 'Team', 'Contingency')
        data = Reference(ws, min_col=7, min_row=1, max_row=17, max_col=8)
        team = Reference(ws, min_col=2, min_row=1, max_row=17)
        c1.set_categories(team)
        c1.add_data(data, titles_from_data=True)
        ws.add_chart(c1, "A18")
        # Team plot
        for team, df in behavior.items():
            ws = wb['Team %i' % team]
            c1 = LineChart()
            title = "Contingency evolution across trials"
            _xl_plot(c1, title, 'Trials', 'dP')
            data = Reference(ws, min_col=13, max_col=15, min_row=1, max_row=41)
            c1.add_data(data, titles_from_data=True)
            ws.add_chart(c1, "A45")
        wb.save(save_as)
        logger.info("    - Behavioral analysis saved to %s" % save_as)

    return summary, behavior
Пример #7
0
"""
BraiNets
========

Python codes for causal relationships using Gaussian copula and information
theory based tools.
"""
import logging

from brainets import (
    behavior,
    gcmi,
    infodyn,
    spectral,
    stats,
    syslog,
    utils,  # noqa
    preprocessing,
    plot,
    io)

# Set 'info' as the default logging level
logger = logging.getLogger('brainets')
syslog.set_log_level('info')

__version__ = "0.0.0"
Пример #8
0
def plot_marsatlas(data,
                   time=None,
                   modality='meg',
                   seeg_roi=None,
                   contrast=5,
                   cmap='viridis',
                   title=None,
                   verbose=None):
    """Plot data sorted using MarsAtlas parcellation.

    This function sort the data by hemisphere, lobe (frontal, occipital,
    parietal, temporal and subcortical) and by roi.

    Parameters
    ----------
    data : array_like
        GCMI result across ROI of shape (n_pts, n_roi)
    time : list | tuple | None
        Time boundaries. Should be (time_start, time_end). If None, a default
        time vector is set between (-1.5, 1.5)
    modality : {'meg', 'seeg'}
        The recording modality. Should either be 'meg' or 'seeg'.
    seeg_roi : pd.DataFrame | None
        The ROI dataframe in case of sEEG data. Should contains n_rois rows
        and a MarsAtlas column
    contrast : int | float
        Contrast to use for the plot. A contrast of 5 means that vmin is set to
        5% of the data and vmax 95% of the data. If None, vmin and vmax are set
        to the min and max of the data. Alternatively, you can also provide
        a tuple to manually define it
    title : string | None
        Title of the figure
    cmap : string | 'viridis'
        The colormap to use

    Returns
    -------
    fig_l, fig_r : plt.figure
        Figures for the left and right hemisphere
    """
    set_log_level(verbose)
    assert modality in ['meg', 'seeg']
    assert isinstance(data, np.ndarray) and (data.ndim == 2)

    # Load MarsAtlas DataFrame
    logger.info('    Load MarsAtlas labels')
    df_ma = load_marsatlas()

    # Prepare the data before plotting according to the recording modality
    logger.info('    Prepare the data for %s modality' % modality)
    if modality == 'meg':
        assert data.shape[1] == len(df_ma), ("`data` should have a shape of "
                                             "(n_pts, %i)" % len(df_ma))
        df, df_ma = _prepare_data_meg(df_ma, data)
    elif modality == 'seeg':
        assert isinstance(seeg_roi, pd.DataFrame) and (
            data.shape[1] == len(seeg_roi)), ("`data` should have a shape of "
                                              "(n_pts, %i)" % len(seeg_roi))
        df, df_ma = _prepare_data_seeg(df_ma, data, seeg_roi)

    # Built the multi-indexing
    assert len(df.columns) == len(df_ma)
    mi = pd.MultiIndex.from_frame(df_ma[['Hemisphere', 'Lobe', 'Name']])
    df.columns = mi

    # Time vector
    if isinstance(time, (list, tuple, np.ndarray)) and (len(time) == 2):
        time = np.linspace(time[0], time[1], data.shape[0], endpoint=True)
        logger.info('    Generate time vector')
    else:
        time = np.linspace(-1.5, 1.5, data.shape[0], endpoint=True)
        logger.warning("Automatically generate a time vector between "
                       "(-1.5, 1.5)")

    # Get colorbar limits
    if isinstance(contrast, (int, float)):
        vmin = np.percentile(data, contrast)
        vmax = np.percentile(data, 100 - contrast)
    elif isinstance(contrast, (tuple, list)) and (len(contrast) == 2):
        vmin, vmax = contrast
    else:
        vmin, vmax = data.min(), data.max()
    kwargs = dict(cmap=cmap, vmin=vmin, vmax=vmax)

    # Generate plots
    title = '' if not isinstance(title, str) else title
    fig_l = _plot_gcmi_hemi(df, 'L', time, title, **kwargs)
    fig_r = _plot_gcmi_hemi(df, 'R', time, title, **kwargs)
    return fig_l, fig_r
Пример #9
0
def gccmi_ccd(x, y, z, Zm, verbose=None):
    """Gaussian-Copula CMI between 2 continuous variables conditioned on a discrete variable.
    I = gccmi_ccd(x,y,z,Zm) returns the CMI between two (possibly multidimensional)
    continuous variables, x and y, conditioned on a third discrete variable z, estimated
    via a Gaussian copula.
    If x and/or y are multivariate columns must correspond to samples, rows
    to dimensions/variables. (Samples first axis)
    z should contain integer values in the range [0 Zm-1] (inclusive).
    """
    set_log_level(verbose)
    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    if x.ndim > 2 or y.ndim > 2:
        raise ValueError("x and y must be at most 2d")
    if z.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(z.dtype, np.integer):
        raise ValueError("z should be an integer array")
    if not isinstance(Zm, int):
        raise ValueError("Zm should be an integer")

    Ntrl = x.shape[1]
    Nvarx = x.shape[0]
    Nvary = y.shape[0]

    if y.shape[1] != Ntrl or z.size != Ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(Nvarx):
        if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break
    for yi in range(Nvary):
        if (np.unique(y[yi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break

    # check values of discrete variable
    if z.min() != 0 or z.max() != (Zm - 1):
        raise ValueError("values of discrete variable z are out of bounds")

    # calculate gcmi for each z value
    Icond = np.zeros(Zm)
    Pz = np.zeros(Zm)
    cx = []
    cy = []
    for zi in range(Zm):
        idx = z == zi
        thsx = copnorm(x[:, idx])
        thsy = copnorm(y[:, idx])
        Pz[zi] = x.shape[1]
        cx.append(thsx)
        cy.append(thsy)
        Icond[zi] = mi_gg(thsx, thsy, True, True)

    Pz = Pz / float(Ntrl)

    # conditional mutual information
    CMI = np.sum(Pz * Icond)
    I = mi_gg(np.hstack(cx), np.hstack(cy), True, False)
    return (CMI, I)
Пример #10
0
def gcmi_mixture_cd(x, y, Ym, verbose=None):
    """Gaussian-Copula Mutual Information between a continuous and a discrete variable
    calculated from a Gaussian mixture.
    The Gaussian mixture is fit using robust measures of location (median) and scale
    (median absolute deviation) for each class.
    I = gcmi_mixture_cd(x,y,Ym) returns the MI between the (possibly multidimensional)
    continuous variable x and the discrete variable y.
    For 1D x this is a lower bound to the mutual information.
    Columns of x correspond to samples, rows to dimensions/variables.
    (Samples last axis)
    y should contain integer values in the range [0 Ym-1] (inclusive).
    See also: gcmi_model_cd
    """
    set_log_level(verbose)
    x = np.atleast_2d(x)
    y = np.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(y.dtype, np.integer):
        raise ValueError("y should be an integer array")
    if not isinstance(Ym, int):
        raise ValueError("Ym should be an integer")

    Ntrl = x.shape[1]
    Nvarx = x.shape[0]

    if y.size != Ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(Nvarx):
        if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break

    # check values of discrete variable
    if y.min() != 0 or y.max() != (Ym - 1):
        raise ValueError("values of discrete variable y are out of bounds")

    # copula normalise each class
    # shift and rescale to match loc and scale of raw data
    # this provides a robust way to fit the gaussian mixture
    classdat = []
    ydat = []
    for yi in range(Ym):
        # class conditional data
        idx = y == yi
        xm = x[:, idx]
        cxm = copnorm(xm)

        xmmed = np.median(xm, axis=1)[:, np.newaxis]
        # robust measure of s.d. under Gaussian assumption from median absolute deviation
        xmmad = np.median(np.abs(xm - xmmed), axis=1)[:, np.newaxis]
        cxmscaled = cxm * (1.482602218505602 * xmmad)
        # robust measure of loc from median
        cxmscaled = cxmscaled + xmmed
        classdat.append(cxmscaled)
        ydat.append(yi * np.ones(xm.shape[1], dtype=np.int))

    cx = np.concatenate(classdat, axis=1)
    newy = np.concatenate(ydat)
    I = mi_mixture_gd(cx, newy, Ym)
    return I