Exemplos de pd_is_one_row em Python, exemplos de mesostat.utils.pandas_helper.pd_is_one_row em Python

Exemplo n.º 1

0

Exibir arquivo

    def check_reward_in_data(self, pwd):
        for mouseName, dfMouse in self.dataPaths.groupby(['mouse']):
            h5fname = os.path.join(pwd, mouseName + '.h5')

            for idx, row in dfMouse.iterrows():
                session = row['day'] + '_' + row['session']
                with h5py.File(h5fname, 'a') as h5f:
                    if session not in h5f['metadata'].keys():
                        print(mouseName, session, 'has no metadata, skipping')
                        continue

                    dataRAW = np.copy(h5f['data'][session])

                delay = pd_is_one_row(
                    pd_query(
                        self.dfSession, {
                            'mousename': mouseName,
                            'dateKey': row['day'],
                            'sessionKey': row['session']
                        }))[1]['delay']

                nTimestepVid = dataRAW.shape[1]
                rewStartIdx = int((5 + delay) * 20)
                overlap = max(0, nTimestepVid - rewStartIdx)

                print(mouseName, session, delay, nTimestepVid, rewStartIdx,
                      overlap)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: preprocess_raw.py Projeto: aleksejs-fomins/pub-2020-exploratory-analysis

    def behaviour_tune_resample_kernel(self, mousename, session, sig2,
                                       trialType='Hit', trialIdx=0, srcFreq=30.0, trgFreq=20.0):
        dayKey = '_'.join(session.split('_')[:3])
        sessionKey = session.split('_')[3]

        idx, row = pd_is_one_row(pd_query(self.dataPaths, {'mouse': mousename, 'day': dayKey, 'session':sessionKey}))

        prepcommon.behaviour_tune_resample_kernel(row['pathMovementVectors'], sig2,
                                                  trialType=trialType, trialIdx=trialIdx, srcFreq=srcFreq, trgFreq=trgFreq)

Exemplo n.º 3

0

Exibir arquivo

def test_avg_bits(dataDB, mc, h5fname, h5fnameRand, dfSummary, dfSummaryRand):
    channelLabels = dataDB.get_channel_labels()
    nChannel = len(channelLabels)
    pidTypes = ['unique', 'syn', 'red']
    '''
    Plan:
    1. Loop over (col / key, mousename) in dfSummaryRand
    2. Query dfSummary using rand key
    3. Loop over (col / key, mousename) in dfSummaryQueried
    4. Loop over mousename in dfSummaryRand
    5. Query dfSummaryQueried using mousename
    6. Extract datasets
    7. Barplot+Statannot
    '''

    groupLstRand = sorted(
        list(set(dfSummaryRand.columns) - {'key', 'mousename'}))
    print(set(dfSummaryRand['performance']))

    print(groupLstRand)
    for keyRand, dfRandMouse in dfSummaryRand.groupby(groupLstRand):
        print(keyRand)
        if isinstance(keyRand, str):
            keyRand = [keyRand]

        selectorLstRand = dict(zip(groupLstRand, keyRand))
        dfSummQueried = pd_query(dfSummary, selectorLstRand)

        groupLstQueried = sorted(
            list(
                set(dfSummQueried.columns) - {'key', 'mousename'} -
                set(groupLstRand)))
        for key, dfMouse in dfSummQueried.groupby(groupLstQueried):
            print('--', key)

            dfTot = pd.DataFrame()
            for idx, row in dfMouse.iterrows():
                # Read and preprocess true data
                dfRezTrue = pd.read_hdf(h5fname, row['key'])
                dfRezTrue = preprocess_unique(dfRezTrue)
                dfRezTrue = preprocess_drop_negative(dfRezTrue)
                dfRezTrue['type'] = 'Measured'
                dfRezTrue['mousename'] = row['mousename']
                dfTot = dfTot.append(dfRezTrue)

                # Read and preprocess random data
                rowRand = pd_is_one_row(
                    pd_query(dfRandMouse, {'mousename': row['mousename']}))[1]
                dfRezRand = pd.read_hdf(h5fnameRand, rowRand['key'])
                dfRezRand = preprocess_unique(dfRezRand)
                dfRezRand = preprocess_drop_negative(dfRezRand)
                dfRezRand['type'] = 'Shuffle'
                dfRezRand['mousename'] = rowRand['mousename']
                dfTot = dfTot.append(dfRezRand)

            # Barplot differences
            fig, ax = plt.subplots(ncols=3, figsize=(12, 4))
            fig.suptitle('_'.join(list(key) + list(keyRand)))
            for iPid, pidType in enumerate(pidTypes):
                dfPID = dfTot[dfTot['PID'] == pidType]
                sns.violinplot(ax=ax[iPid],
                               x="mousename",
                               y="muTrue",
                               hue="type",
                               data=dfPID,
                               scale='width',
                               cut=0)

                for mousename in sorted(set(dfPID['mousename'])):
                    dataTrue = pd_query(dfPID, {
                        'mousename': mousename,
                        'type': 'Measured'
                    })['muTrue']
                    dataRand = pd_query(dfPID, {
                        'mousename': mousename,
                        'type': 'Shuffle'
                    })['muTrue']
                    print(
                        'Test:', pidType, mousename, 'pval =',
                        mannwhitneyu(dataTrue, dataRand,
                                     alternative='greater')[1])

                ax[iPid].set_yscale('log')
                ax[iPid].set_ylabel('Bits')
                ax[iPid].set_title(pidType)
            plt.show()

Exemplo n.º 4

0

Exibir arquivo

def get_sessions(dfRawH5, mousename):
    row = pd_is_one_row(pd_query(dfRawH5, {'mousename' : mousename}))[1]
    with h5py.File(row['path'], 'r') as h5file:
        return list(h5file['data'].keys())

Exemplo n.º 5

0

Exibir arquivo

 def get_delay_length(self, mousename, session):
     row = pd_is_one_row(pd_query(self.dfSessions, {'mousename': mousename, 'session': session}))[1]
     return row['delay']

Exemplo n.º 6

0

Exibir arquivo

def plot_consistency_significant_activity_byphase(dataDB,
                                                  ds,
                                                  intervals,
                                                  minTrials=10,
                                                  performance=None,
                                                  dropChannels=None):
    rows = ds.list_dsets_pd()
    rows['mousename'] = [
        dataDB.find_mouse_by_session(session) for session in rows['session']
    ]

    dfColumns = ['datatype', 'trialType', 'consistency']
    dfConsistency = pd.DataFrame(columns=dfColumns)

    for (datatype,
         trialType), rowsMouse in rows.groupby(['datatype', 'trialType']):
        pSigDict = {}
        for mousename, rowsSession in rowsMouse.groupby(['mousename']):
            pSig = []
            for session, rowsTrial in rowsSession.groupby(['session']):
                if (performance is None) or dataDB.is_matching_performance(
                        session, performance, mousename=mousename):
                    assert intervals[0] in list(rowsTrial['intervName'])
                    assert intervals[1] in list(rowsTrial['intervName'])
                    dsetLabel1 = pd_is_one_row(
                        pd_query(rowsTrial,
                                 {'intervName': intervals[0]}))[1]['dset']
                    dsetLabel2 = pd_is_one_row(
                        pd_query(rowsTrial,
                                 {'intervName': intervals[1]}))[1]['dset']
                    data1 = ds.get_data(dsetLabel1)
                    data2 = ds.get_data(dsetLabel2)
                    nTrials1 = data1.shape[0]
                    nTrials2 = data2.shape[1]

                    if (nTrials1 < minTrials) or (nTrials2 < minTrials):
                        print(session, datatype, trialType, 'too few trials',
                              nTrials1, nTrials2, ';; skipping')
                    else:
                        nChannels = data1.shape[1]
                        if dropChannels is not None:
                            channelMask = np.ones(nChannels).astype(bool)
                            channelMask[dropChannels] = 0
                            data1 = data1[:, channelMask]
                            data2 = data2[:, channelMask]
                            nChannels = nChannels - len(dropChannels)

                        pvals = [
                            wilcoxon(data1[:, iCh],
                                     data2[:, iCh],
                                     alternative='two-sided')[1]
                            for iCh in range(nChannels)
                        ]
                        # pSig += [(np.array(pvals) < 0.01).astype(int)]
                        pSig += [-np.log10(np.array(pvals))]
            # pSigDict[mousename] = np.sum(pSig, axis=0)
            pSigDict[mousename] = np.mean(pSig, axis=0)

        mice = sorted(dataDB.mice)
        nMice = len(mice)
        corrCoef = np.zeros((nMice, nMice))
        for iMouse, iName in enumerate(mice):
            for jMouse, jName in enumerate(mice):
                corrCoef[iMouse, jMouse] = np.corrcoef(pSigDict[iName],
                                                       pSigDict[jName])[0, 1]

        sns.pairplot(data=pd.DataFrame(pSigDict), vars=mice)

        prefixPath = 'pics/consistency/significant_activity/byphase/bymouse/'
        make_path(prefixPath)
        plt.savefig(prefixPath + datatype + '_' + trialType + '.svg')
        plt.close()

        fig2, ax2 = plt.subplots()
        ax2.imshow(corrCoef, vmin=0, vmax=1)
        imshow(fig2,
               ax2,
               corrCoef,
               title='Significance Correlation',
               haveColorBar=True,
               limits=[0, 1],
               xTicks=mice,
               yTicks=mice)

        prefixPath = 'pics/consistency/significant_activity/byphase/bymouse_corr/'
        make_path(prefixPath)
        plt.savefig(prefixPath + datatype + '_' + trialType + '.svg')
        plt.close()

        avgConsistency = np.round(np.mean(offdiag_1D(corrCoef)), 2)
        dfConsistency = pd_append_row(dfConsistency,
                                      [datatype, trialType, avgConsistency])

    fig, ax = plt.subplots()
    dfPivot = pd_pivot(dfConsistency, *dfColumns)
    sns.heatmap(data=dfPivot, ax=ax, annot=True, vmax=1, cmap='jet')

    prefixPath = 'pics/consistency/significant_activity/byphase/'
    make_path(prefixPath)
    fig.savefig(prefixPath + 'consistency_' + str(performance) + '.svg')
    plt.close()

Exemplo n.º 7

0

Exibir arquivo

def test_prediction(dataDB, prepData, prepDF, intervNames=None):
    # classifier = LogisticRegression(max_iter=10000, C=1.0E-2, solver='lbfgs')
    classifier = RidgeClassifier(max_iter=10000, alpha=1.0E-2)

    for mousename in sorted(dataDB.mice):
        sessions = dataDB.get_sessions(mousename)

        nSessions = len(sessions)
        if intervNames is None:
            intervNames = dataDB.get_interval_names()

        figTest, axTest = plt.subplots(ncols=3, figsize=(10, 5))
        figClass, axClass = plt.subplots(ncols=3, figsize=(10, 5))
        figTest.suptitle(mousename)
        figClass.suptitle(mousename)

        for iInterv, intervName in enumerate(intervNames):
            testMat = np.zeros((48, nSessions))
            accLst = []

            for iSession, session in enumerate(sessions):
                print(intervName, session)

                queryDict = {
                    'mousename': mousename,
                    'session': session,
                    'interval': intervName
                }
                rowGo = pd_query(prepDF, {**queryDict, **{'trialType': 'iGO'}})
                rowNogo = pd_query(prepDF, {
                    **queryDict,
                    **{
                        'trialType': 'iNOGO'
                    }
                })

                if (len(rowGo) == 0) or (len(rowNogo) == 0):
                    print('Skipping session', session,
                          'because too few trials')
                    testMat[:, iSession] = np.nan
                    accLst += [{'accTrain': np.nan, 'accTest': np.nan}]
                else:
                    idxRowGO, _ = pd_is_one_row(rowGo)
                    idxRowNOGO, _ = pd_is_one_row(rowNogo)
                    dataGO = prepData[idxRowGO]
                    dataNOGO = prepData[idxRowNOGO]

                    # Doing pairwise testing on individual channels
                    for iCh in range(48):
                        p = mannwhitneyu(dataGO[:, iCh],
                                         dataNOGO[:, iCh],
                                         alternative='two-sided')[1]
                        testMat[iCh, iSession] = -np.log10(p)

                    # Doing classification
                    accLst += [
                        binary_classifier(dataGO,
                                          dataNOGO,
                                          classifier,
                                          method="looc",
                                          balancing=False)
                    ]

            # Plot test
            axTest[iInterv].set_title(intervName)
            img = axTest[iInterv].imshow(testMat, vmin=0, vmax=10)
            imshow_add_color_bar(figTest, axTest[iInterv], img)

            # Plot classification
            axClass[iInterv].set_title(intervName)
            axClass[iInterv].plot([l['accTrain'] for l in accLst],
                                  label='train')
            axClass[iInterv].plot([l['accTest'] for l in accLst], label='test')
            axClass[iInterv].axhline(y=0.5, linestyle='--', color='pink')
            axClass[iInterv].set_xlim(0, len(sessions))
            axClass[iInterv].set_ylim(0, 1)
            axClass[iInterv].legend()

        plt.show()

Exemplo n.º 8

0

Exibir arquivo

 def get_delay_length(self, mousename, session):
     # row = pd_is_one_row(pd_query(self.dfSessions, {'mousename': mousename, 'session': session}))[1]
     # return row['delay']
     df = pd.read_hdf(self.datapaths[mousename], 'metadataSession')
     return pd_is_one_row(df[df['session'] == session])[1]['delay']