Пример #1
0
def cogtest_manipulation(tbldict, roc_cols):
    """Process table containing cognitive testing data
    
    Parameters
    ----------
    tbldict : dict
        Dictionary of DataFrames each containing a different type of data
    roc_cols : list
        List of strings corresponding to column names for which rate of change should be 
        calculated
    
    Returns
    -------
    tbldict : dict
        The same as input tbldict but now with new item
    """
    
    tbldict['cogtests'] = pd.merge(tbldict['cogtestdates'],tbldict['cogdata'],on=['codeb','NP_Tp'])
    
    del tbldict['cogtestdates']
    del tbldict['cogdata']
    
    for col in roc_cols:
        tbldict['cogtests'] = cf.rate_of_change(tbldict['cogtests'], 'codeb', 'NP_Tp', 
                                'NP_Date', col, '%s_sl' %col)
    
    #add column for maximum follow-up time per subject
    tbldict['cogtests'] = cf.max_per_sub(tbldict['cogtests'], 'codeb', 'NP_YrsRelBL', 'NP_Followup_Time')
    
    return tbldict
Пример #2
0
def mri_run(datadir, outdir, rois):
    """Main function to collect MRI volume data
    
    Parameters
    ----------
    datadir : string
        Full path to root directory of freesurfer processed data. Expect file tree
        to be datadir/subcode/stats/aseg.stats
    outdir : string
        Full path to directory where data will be saved
    rois : list of strings
        List of freesurfer rois of interest. These volumes of these rois will be 
        inserted in aseg_change along with their rates of change
        
    Returns
    -------
    aseg_stats : pandas DataFrame
        DataFrame where each row is a scan, and columns are volumes of all
        freesurfer processed regions
    aseg_change : pandas DataFrame
        DataFrame where each row is a scan, and columns are the volumes of interest,
        their rates of change, and icv correction
    """

    #get aseg_stats data from freesurfer processed data
    outfile = '%sFS_aseg_stats.txt' %outdir
    subs, asegout, output = extractFSasegstats(datadir, outfile)
    aseg_stats = pd.read_csv(outfile, header=0, delim_whitespace=True)

    #add columns for SubjID and MRI_TP
    aseg_stats['codea'] = [cf.get_id(sub) for sub in subs]
    aseg_stats['MRI_Tp'] = [cf.get_tp(sub) for sub in subs]
    aseg_stats.drop('Measure:volume', axis=1, inplace=True)

    #get dates of MRI scans that were processed with freesurfer
    mridates = bacs_pet_mri_date_batch(datadir)

    aseg_change = pd.merge(aseg_change, mridates, on=['codea','MRI_Tp'])

    rois_icvcorr = dict([(roi, '%s_icvcorr' %roi) for roi in rois])
    
    aseg_change = icvcorr(aseg_change, rois_icvcorr, 'IntraCranialVol')

    #calculate rate of change in years
    for roi in rois:
        aseg_change = cf.rate_of_change(aseg_change, 'codea', 'MRI_Tp', 
                                    'MRI_Scandate', roi, '%s_sl' %roi)
    
    cf.save_xls_and_pkl(aseg_stats, 'aseg_stats', outdir)
    cf.save_xls_and_pkl(aseg_change, 'aseg_change', outdir)
    
    return aseg_stats, aseg_change
Пример #3
0
def pibparams_run(path_pib, pibrename, outdir, pibcutoff):
    """Reads data from the spreadsheet, does some calculations, and 
    returns a Pandas dataframe with PIB data.
    
    Parameters
    ----------
    path_pib : string
        String of full path to *.xls
    pibrename : dict
        Dictionary of name:rename pairs, where the keys are columns in the 
        PIB spreadsheet and values are what to rename the keys to
    outdir : string
        Full path where final dataframe will be saved
    pibcutoff : float
        PIB cutoff value
    
    Returns
    -------
    pib_df : pandas dataframe
        Dataframe containing all PIB data
    """

    #read in pib data from old sheet
    pib_old = pd.read_excel(path_pib, sheetname='i')
    #read in PIB data from longitudinal timepoints
    pib_long = pd.read_excel(path_pib, sheetname='j')
    #concatenate PIB tables
    pib_df = pd.concat([pib_long, pib_old])
    pib_df = pib_df[pibrename.keys()]
    pib_df.rename(columns=pibrename, inplace=True)

    #make binary PIB value
    pib_df['PIB_Pos'] = pib_df['PIB_Index'].apply(lambda x: 1 if x >= pibcutoff else 0)

    #calculate rate of change of PIB_Index in years
    pib_df = cf.rate_of_change(pib_df, 'codea', 'PIB_Tp', 'PIB_Scandate', 
                               'PIB_Index', 'PIB_sl')

    #make column for the age at which PIB positivity appears
    pib_df.sort(columns=['codea','PIB_Tp'], inplace=True)

    #calculate age of PIB positivity
    pib_df['PIB_agepos'] = float('nan')
    pib_df = pib_df.groupby(by='codea')
    pib_df = pib_df.apply(f)
    
    cf.save_xls_and_pkl(pib_df, 'pibparams', outdir)
    
    return pib_df