Exemplo n.º 1
0
def pearson_correlation(datadir,
                        outdir,
                        usemask=True,
                        excludef='',
                        exclude_idx=-1):

    slidir = datadir + os.path.sep + au.slices_str()

    subjsfile = datadir + os.path.sep + au.subjects_str()
    labelsfile = datadir + os.path.sep + au.labels_str()

    lst = os.listdir(slidir)
    n = au.count_match(lst, au.data_str() + '_' + au.slice_regex())

    exclude_log = ''
    if exclude_idx > -1:
        exclude_log = ' excluding subject ' + str(exclude_idx)

    au.log.info('Calculating correlation of ' + slidir + os.path.sep +
                au.data_str() + '_' + au.slice_regex() + exclude_log)

    for i in range(n):
        slino = au.zeropad(i)

        dataf = slidir + os.path.sep + au.data_str() + '_' + au.slice_str(
        ) + '_' + slino + au.ext_str()
        maskf = slidir + os.path.sep + au.mask_str() + '_' + au.slice_str(
        ) + '_' + slino + au.ext_str()
        outf = outdir + os.path.sep + au.pearson_str() + '_' + au.slice_str(
        ) + '_' + slino

        if exclude_idx > -1:
            outf += '_' + au.excluded_str() + str(exclude_idx) + au.ext_str()
        else:
            outf += au.ext_str()

        if not os.path.isfile(dataf):
            au.log.error('Could not find ' + dataf)
            continue

        if not usemask:
            maskf = ''

        try:
            measure_pearson(dataf, labelsfile, outf, maskf, excludef,
                            exclude_idx)
        except:
            au.log.error(
                'pearson_correlation: Error measuring correlation on ' + dataf)
            au.log.error("Unexpected error: ", sys.exc_info()[0])
            exit(1)
Exemplo n.º 2
0
def group_stats(datadir, groupname, groupsize, outdir=''):

    lst = os.listdir(datadir)
    n = au.count_match(lst, groupname + 's_' + au.slice_regex() + au.ext_str())

    if not outdir:
        outdir = datadir

    au.log.info('Calculating stats from group ' + groupname + ' in ' + outdir)

    for i in range(n):
        slino = au.zeropad(i)
        dataf = datadir + os.path.sep + groupname + 's_' + au.slice_str(
        ) + '_' + slino + au.ext_str()
        volstats(dataf, groupname, groupsize, outdir)
Exemplo n.º 3
0
def measure_ttest_py (mean1fname, mean2fname,\
                      var1fname, var2fname,  \
                      std1fname, std2fname,  \
                      numsubjs1, numsubjs2,  \
                      experimentname, outdir, exclude_idx=-1):

   if not os.path.exists (outdir):
      os.mkdir (outdir)

   #following the equation:
   #t = (m1 - m2) / sqrt( (v1^2)/N1 + (v2^2)/N2 )
   #from:
   #http://en.wikipedia.org/wiki/Student%27s_t-test#Unequal_sample_sizes.2C_unequal_variance

   aff   = nib.load(mean1fname).get_affine()
   mean1 = nib.load(mean1fname).get_data()
   mean2 = nib.load(mean2fname).get_data()
   var1  = nib.load( var1fname).get_data()
   var2  = nib.load( var2fname).get_data()

   ttest = (mean1 - mean2) / np.sqrt((np.square(var1) / numsubjs1) + (np.square(var2) / numsubjs2))
   ttest[np.isnan(ttest)] = 0
   ttest[np.isinf(ttest)] = 0
   #ttest = np.nan_to_num(ttest)

   ttstfname = outdir   + os.path.sep + experimentname + '_ttest' + au.ext_str()

   au.save_nibabel(ttstfname, ttest, aff)

   return ttstfname
Exemplo n.º 4
0
def pearson_correlation (datadir, outdir, usemask=True, excludef='', exclude_idx=-1):

   slidir = datadir + os.path.sep + au.slices_str()

   subjsfile  = datadir + os.path.sep + au.subjects_str()
   labelsfile = datadir + os.path.sep + au.labels_str()

   lst = os.listdir(slidir)
   n = au.count_match(lst, au.data_str() + '_' + au.slice_regex())

   exclude_log = ''
   if exclude_idx > -1:
      exclude_log = ' excluding subject ' + str(exclude_idx)
   
   au.log.info ('Calculating correlation of ' + slidir + os.path.sep + au.data_str() + '_' + au.slice_regex() + exclude_log)

   for i in range(n):
      slino = au.zeropad(i)

      dataf = slidir + os.path.sep + au.data_str()    + '_' + au.slice_str() + '_' + slino + au.ext_str()
      maskf = slidir + os.path.sep + au.mask_str()    + '_' + au.slice_str() + '_' + slino + au.ext_str()
      outf  = outdir + os.path.sep + au.pearson_str() + '_' + au.slice_str() + '_' + slino

      if exclude_idx > -1:
         outf += '_' + au.excluded_str() + str(exclude_idx) + au.ext_str()
      else:
         outf += au.ext_str()

      if not os.path.isfile(dataf): 
         au.log.error('Could not find ' + dataf)
         continue

      if not usemask:
         maskf = ''

      try:
         measure_pearson(dataf, labelsfile, outf, maskf, excludef, exclude_idx)
      except:
         au.log.error('pearson_correlation: Error measuring correlation on ' + dataf)
         au.log.error("Unexpected error: ", sys.exc_info()[0] )
         exit(1)
Exemplo n.º 5
0
def group_stats (datadir, groupname, groupsize, outdir=''):

    lst = os.listdir(datadir)
    n   = au.count_match(lst, groupname + 's_' + au.slice_regex() + au.ext_str())

    if not outdir:
        outdir = datadir

    au.log.info ('Calculating stats from group ' + groupname + ' in ' + outdir)

    for i in range(n):
        slino = au.zeropad(i)
        dataf = datadir + os.path.sep + groupname + 's_' + au.slice_str() + '_' + slino + au.ext_str()
        volstats (dataf, groupname, groupsize, outdir)
Exemplo n.º 6
0
def remove_subject_from_stats(meanfname,
                              varfname,
                              samplesize,
                              subjvolfname,
                              newmeanfname,
                              newvarfname,
                              newstdfname=''):

    meanfname = au.add_extension_if_needed(meanfname, au.ext_str())
    varfname = au.add_extension_if_needed(varfname, au.ext_str())
    subjvolfname = au.add_extension_if_needed(subjvolfname, au.ext_str())

    newmeanfname = au.add_extension_if_needed(newmeanfname, au.ext_str())
    newvarfname = au.add_extension_if_needed(newvarfname, au.ext_str())

    if newstdfname:
        newstdfname = au.add_extension_if_needed(newstdfname, au.ext_str())

    #load data
    n = samplesize

    meanv = nib.load(meanfname).get_data()
    varv = nib.load(varfname).get_data()
    subjv = nib.load(subjvolfname).get_data()
    aff = nib.load(meanfname).get_affine()

    #calculate new mean: ((oldmean*N) - x)/(N-1)
    newmean = meanv.copy()
    newmean = ((newmean * n) - subjv) / (n - 1)
    newmean = np.nan_to_num(newmean)

    #calculate new variance:
    # oldvar = (n/(n-1)) * (sumsquare/n - oldmu^2)
    # s = ((oldvar * (n/(n-1)) ) + oldmu^2) * n
    # newvar = ((n-1)/(n-2)) * (((s - x^2)/(n-1)) - newmu^2)
    s = varv.copy()
    s = ((s * (n / (n - 1))) + np.square(meanv)) * n
    newvar = ((n - 1) / (n - 2)) * (((s - np.square(subjv)) /
                                     (n - 1)) - np.square(newmean))
    newvar = np.nan_to_num(newvar)

    #save nifti files
    au.save_nibabel(newmeanfname, newmean, aff)
    au.save_nibabel(newvarfname, newvar, aff)

    #calculate new standard deviation: sqrt(newvar)
    if newstdfname:
        newstd = np.sqrt(newvar)
        newstd = np.nan_to_num(newstd)
        au.save_nibabel(newstdfname, newstd, aff)
Exemplo n.º 7
0
def change_to_absolute_values (niifname, outfname=''):

    niifname = au.add_extension_if_needed(niifname, au.ext_str())

    if not outfname:
        outfname = niifname

    try:
        #load data
        vol = nib.load(niifname).get_data()
        aff = nib.load(niifname).get_affine()

        vol = np.abs(vol)

        #save nifti file
        au.save_nibabel (outfname, vol, aff)

    except:
        au.log.error ("Change_to_absolute_values:: Unexpected error: ", sys.exc_info()[0])
        raise
Exemplo n.º 8
0
def get_stats_fnames (groupnames, outdir=''):

    if np.ndim(groupnames) == 0:
        groupnames = [groupnames]

    if outdir:
        outdir += outdir + os.path.sep

    mnames  = [au.sums_str(), au.mean_str(), au.var_str(), au.std_str()]
    ngroups = len(groupnames)

    statfnames = np.zeros ([ngroups, len(mnames)], dtype=np.dtype('a2000'))

    for g in np.arange(ngroups):
        group = groupnames[g]
        for m in np.arange(len(mnames)):
            measure = mnames[m]
            statfnames[g,m] = outdir + group + '_' + measure + au.ext_str()

    return [statfnames, mnames]
Exemplo n.º 9
0
def get_stats_fnames(groupnames, outdir=''):

    if np.ndim(groupnames) == 0:
        groupnames = [groupnames]

    if outdir:
        outdir += outdir + os.path.sep

    mnames = [au.sums_str(), au.mean_str(), au.var_str(), au.std_str()]
    ngroups = len(groupnames)

    statfnames = np.zeros([ngroups, len(mnames)], dtype=np.dtype('a2000'))

    for g in np.arange(ngroups):
        group = groupnames[g]
        for m in np.arange(len(mnames)):
            measure = mnames[m]
            statfnames[g, m] = outdir + group + '_' + measure + au.ext_str()

    return [statfnames, mnames]
Exemplo n.º 10
0
def measure_bhattacharyya_distance_py (mean1fname, mean2fname, \
                                        var1fname, var2fname,  \
                                        std1fname, std2fname,  \
                                        numsubjs1, numsubjs2,  \
                                        experimentname, outdir, exclude_idx=-1):

    if not os.path.exists(outdir):
        os.mkdir(outdir)

    #following the equations:
    #1/4 * (m1-m2)^2/(var1+var2) + 1/2 * log( (var1+var2)/(2*std1*std2) )
    #from:
    #1
    #Bhattacharyya clustering with applications to mixture simplifications
    #Frank Nielsen, Sylvain Boltz, and Olivier Schwander
    #2010 International Conference on Pattern Recognition
    #2
    #The Divergence and Bhattacharyya Distance Measures in Signal Selection
    #Kailath, T.
    #http://dx.doi.org/10.1109/TCOM.1967.1089532

    aff = nib.load(mean1fname).get_affine()
    m1 = nib.load(mean1fname).get_data()
    m2 = nib.load(mean2fname).get_data()
    v1 = nib.load(var1fname).get_data()
    v2 = nib.load(var2fname).get_data()
    s1 = nib.load(std1fname).get_data()
    s2 = nib.load(std2fname).get_data()

    b1 = 0.25 * (np.square(m1 - m2) / (v1 + v2)) + 0.5 * (np.log(
        (v1 + v2) / (2 * s1 * s2)))
    b1[np.isnan(b1)] = 0
    b1[np.isinf(b1)] = 0
    #b1 = np.nan_to_num(b1)

    bhatta = outdir + os.path.sep + experimentname + '_' + au.bhattacharyya_str(
    ) + au.ext_str()

    au.save_nibabel(bhatta, b1, aff)

    return bhatta
Exemplo n.º 11
0
def change_to_absolute_values(niifname, outfname=''):

    niifname = au.add_extension_if_needed(niifname, au.ext_str())

    if not outfname:
        outfname = niifname

    try:
        #load data
        vol = nib.load(niifname).get_data()
        aff = nib.load(niifname).get_affine()

        vol = np.abs(vol)

        #save nifti file
        au.save_nibabel(outfname, vol, aff)

    except:
        au.log.error("Change_to_absolute_values:: Unexpected error: ",
                     sys.exc_info()[0])
        raise
Exemplo n.º 12
0
def measure_bhattacharyya_distance_py (mean1fname, mean2fname, \
                                        var1fname, var2fname,  \
                                        std1fname, std2fname,  \
                                        numsubjs1, numsubjs2,  \
                                        experimentname, outdir, exclude_idx=-1):

   if not os.path.exists (outdir):
      os.mkdir (outdir)

   #following the equations:
   #1/4 * (m1-m2)^2/(var1+var2) + 1/2 * log( (var1+var2)/(2*std1*std2) )
   #from:
   #1
      #Bhattacharyya clustering with applications to mixture simplifications
      #Frank Nielsen, Sylvain Boltz, and Olivier Schwander
      #2010 International Conference on Pattern Recognition
   #2
      #The Divergence and Bhattacharyya Distance Measures in Signal Selection
      #Kailath, T.
      #http://dx.doi.org/10.1109/TCOM.1967.1089532

   aff = nib.load(mean1fname).get_affine()
   m1  = nib.load(mean1fname).get_data()
   m2  = nib.load(mean2fname).get_data()
   v1  = nib.load (var1fname).get_data()
   v2  = nib.load (var2fname).get_data()
   s1  = nib.load (std1fname).get_data()
   s2  = nib.load (std2fname).get_data()

   b1  = 0.25 * (np.square(m1 - m2) / (v1 + v2)) + 0.5  * (np.log((v1 + v2) / (2*s1*s2)))
   b1[np.isnan(b1)] = 0
   b1[np.isinf(b1)] = 0
   #b1 = np.nan_to_num(b1)

   bhatta = outdir + os.path.sep + experimentname + '_' + au.bhattacharyya_str() + au.ext_str()

   au.save_nibabel(bhatta, b1, aff)

   return bhatta
Exemplo n.º 13
0
def remove_subject_from_stats (meanfname, varfname, samplesize, subjvolfname, newmeanfname, newvarfname, newstdfname=''):

    meanfname    = au.add_extension_if_needed(meanfname,    au.ext_str())
    varfname     = au.add_extension_if_needed(varfname,     au.ext_str())
    subjvolfname = au.add_extension_if_needed(subjvolfname, au.ext_str())

    newmeanfname = au.add_extension_if_needed(newmeanfname, au.ext_str())
    newvarfname  = au.add_extension_if_needed(newvarfname,  au.ext_str())

    if newstdfname:
        newstdfname = au.add_extension_if_needed(newstdfname, au.ext_str())

    #load data
    n = samplesize

    meanv = nib.load(meanfname).get_data()
    varv  = nib.load( varfname).get_data()
    subjv = nib.load(subjvolfname).get_data()
    aff   = nib.load(meanfname).get_affine()

    #calculate new mean: ((oldmean*N) - x)/(N-1)
    newmean = meanv.copy()
    newmean = ((newmean * n) - subjv)/(n-1)
    newmean = np.nan_to_num(newmean)

    #calculate new variance: 
    # oldvar = (n/(n-1)) * (sumsquare/n - oldmu^2)
    # s = ((oldvar * (n/(n-1)) ) + oldmu^2) * n
    # newvar = ((n-1)/(n-2)) * (((s - x^2)/(n-1)) - newmu^2)
    s = varv.copy()
    s = ((s * (n/(n-1)) ) + np.square(meanv)) * n
    newvar = ((n-1)/(n-2)) * (((s - np.square(subjv))/(n-1)) - np.square(newmean))
    newvar = np.nan_to_num(newvar)

    #save nifti files
    au.save_nibabel (newmeanfname, newmean, aff)
    au.save_nibabel (newvarfname , newvar,  aff)

    #calculate new standard deviation: sqrt(newvar)
    if newstdfname:
        newstd = np.sqrt(newvar)
        newstd = np.nan_to_num(newstd)
        au.save_nibabel (newstdfname, newstd, aff)
Exemplo n.º 14
0
def group_distance (measure_function, datadir, groups, groupsizes, chkf, absolute=False, outdir='', foldno='', expname='', exclude_idx=-1, exclude_subj='', exclude_subjclass=''):

    olddir = os.getcwd()

    if not outdir:
        outdir = datadir

    ngroups = len(groups)
    #matrix of strings of 2000 characters maximum, to save filepaths
    gfnames = np.zeros ([ngroups,3], dtype=np.dtype('a2000'))

    subject_excluded = False

    for g1 in range(ngroups):
        g1name = groups[g1]
        #mean1fname
        gfnames[g1,0] = datadir + os.path.sep + g1name + '_' + au.mean_str()
        #var1fname  
        gfnames[g1,1] = datadir + os.path.sep + g1name + '_' + au.var_str()
        #std1fname
        gfnames[g1,2] = datadir + os.path.sep + g1name + '_' + au.std_str()

        for g2 in range(g1+1, ngroups):
            g2name = groups[g2]
            gfnames[g2,0] = datadir + os.path.sep + g2name + '_' + au.mean_str()
            gfnames[g2,1] = datadir + os.path.sep + g2name + '_' + au.var_str()
            gfnames[g2,2] = datadir + os.path.sep + g2name + '_' + au.std_str()

            experiment = g1name + '_vs_' + g2name

            #check if exclude_subjclass is any of both current groups
            eg = -1
            if exclude_idx > -1:
                if   exclude_subjclass == g1name: eg = g2
                elif exclude_subjclass == g2name: eg = g1

            step = au.measure_str() + ' ' + measure_function.func_name + ' ' + experiment + ' ' + datadir

            #remove subject from stats
            if eg > -1:
                exclude_str = '_' + au.excluded_str() + str(exclude_idx)
                step       += exclude_str
                experiment += exclude_str

                if not au.is_done(chkf, step):
                    if not subject_excluded:
                        newmeanfname = gfnames[eg,0] + exclude_str
                        newvarfname  = gfnames[eg,1] + exclude_str
                        newstdfname  = gfnames[eg,2] + exclude_str

                        rstep = au.remove_str() + ' ' + au.subject_str() + ' ' + str(exclude_subj) + ' ' + au.fromstats_str() + ' ' + datadir
                        if not au.is_done(chkf, rstep):
                           #(meanfname, varfname, samplesize, subjvolfname, newmeanfname, newvarfname, newstdfname='')
                           remove_subject_from_stats (gfnames[eg,0], gfnames[eg,1], groupsizes[eg][1], exclude_subj, newmeanfname, newvarfname, newstdfname)
                           au.checklist_add (chkf, rstep)

                        gfnames[eg,0] += exclude_str
                        gfnames[eg,1] += exclude_str
                        gfnames[eg,2] += exclude_str

                        groupsizes[eg][1] -= 1

                        subject_excluded = True

            #calculating distance
            if not au.is_done(chkf, step):
                mean1fname = au.add_extension_if_needed (gfnames[g1,0], au.ext_str())
                mean2fname = au.add_extension_if_needed (gfnames[g2,0], au.ext_str())
                var1fname  = au.add_extension_if_needed (gfnames[g1,1], au.ext_str())
                var2fname  = au.add_extension_if_needed (gfnames[g2,1], au.ext_str())
                std1fname  = au.add_extension_if_needed (gfnames[g1,2], au.ext_str())
                std2fname  = au.add_extension_if_needed (gfnames[g2,2], au.ext_str())

                outfname = measure_function (mean1fname, mean2fname, var1fname, var2fname, std1fname, std2fname, groupsizes[g1][1], groupsizes[g2][1], experiment, outdir, exclude_idx)

                if absolute:
                    change_to_absolute_values (outfname)

                au.checklist_add (chkf, step)

                return outfname
Exemplo n.º 15
0
def group_distance(measure_function,
                   datadir,
                   groups,
                   groupsizes,
                   chkf,
                   absolute=False,
                   outdir='',
                   foldno='',
                   expname='',
                   exclude_idx=-1,
                   exclude_subj='',
                   exclude_subjclass=''):

    olddir = os.getcwd()

    if not outdir:
        outdir = datadir

    ngroups = len(groups)
    #matrix of strings of 2000 characters maximum, to save filepaths
    gfnames = np.zeros([ngroups, 3], dtype=np.dtype('a2000'))

    subject_excluded = False

    for g1 in range(ngroups):
        g1name = groups[g1]
        #mean1fname
        gfnames[g1, 0] = datadir + os.path.sep + g1name + '_' + au.mean_str()
        #var1fname
        gfnames[g1, 1] = datadir + os.path.sep + g1name + '_' + au.var_str()
        #std1fname
        gfnames[g1, 2] = datadir + os.path.sep + g1name + '_' + au.std_str()

        for g2 in range(g1 + 1, ngroups):
            g2name = groups[g2]
            gfnames[g2,
                    0] = datadir + os.path.sep + g2name + '_' + au.mean_str()
            gfnames[g2,
                    1] = datadir + os.path.sep + g2name + '_' + au.var_str()
            gfnames[g2,
                    2] = datadir + os.path.sep + g2name + '_' + au.std_str()

            experiment = g1name + '_vs_' + g2name

            #check if exclude_subjclass is any of both current groups
            eg = -1
            if exclude_idx > -1:
                if exclude_subjclass == g1name: eg = g2
                elif exclude_subjclass == g2name: eg = g1

            step = au.measure_str(
            ) + ' ' + measure_function.func_name + ' ' + experiment + ' ' + datadir

            #remove subject from stats
            if eg > -1:
                exclude_str = '_' + au.excluded_str() + str(exclude_idx)
                step += exclude_str
                experiment += exclude_str

                if not au.is_done(chkf, step):
                    if not subject_excluded:
                        newmeanfname = gfnames[eg, 0] + exclude_str
                        newvarfname = gfnames[eg, 1] + exclude_str
                        newstdfname = gfnames[eg, 2] + exclude_str

                        rstep = au.remove_str() + ' ' + au.subject_str(
                        ) + ' ' + str(exclude_subj) + ' ' + au.fromstats_str(
                        ) + ' ' + datadir
                        if not au.is_done(chkf, rstep):
                            #(meanfname, varfname, samplesize, subjvolfname, newmeanfname, newvarfname, newstdfname='')
                            remove_subject_from_stats(gfnames[eg,
                                                              0], gfnames[eg,
                                                                          1],
                                                      groupsizes[eg][1],
                                                      exclude_subj,
                                                      newmeanfname,
                                                      newvarfname, newstdfname)
                            au.checklist_add(chkf, rstep)

                        gfnames[eg, 0] += exclude_str
                        gfnames[eg, 1] += exclude_str
                        gfnames[eg, 2] += exclude_str

                        groupsizes[eg][1] -= 1

                        subject_excluded = True

            #calculating distance
            if not au.is_done(chkf, step):
                mean1fname = au.add_extension_if_needed(
                    gfnames[g1, 0], au.ext_str())
                mean2fname = au.add_extension_if_needed(
                    gfnames[g2, 0], au.ext_str())
                var1fname = au.add_extension_if_needed(gfnames[g1, 1],
                                                       au.ext_str())
                var2fname = au.add_extension_if_needed(gfnames[g2, 1],
                                                       au.ext_str())
                std1fname = au.add_extension_if_needed(gfnames[g1, 2],
                                                       au.ext_str())
                std2fname = au.add_extension_if_needed(gfnames[g2, 2],
                                                       au.ext_str())

                outfname = measure_function(mean1fname, mean2fname, var1fname,
                                            var2fname, std1fname, std2fname,
                                            groupsizes[g1][1],
                                            groupsizes[g2][1], experiment,
                                            outdir, exclude_idx)

                if absolute:
                    change_to_absolute_values(outfname)

                au.checklist_add(chkf, step)

                return outfname