def median_abs_dev(X, W=[], precision=1, c=1.0): """ Compute the (weighted) median absolute deviation. mad = median(abs(x - median(x))) / c Parameters ---------- X : numpy array of floats or integers values W : numpy array of floats or integers weights precision : integer number of decimal places to consider weights c : float constant used as divisor for mad computation; c = 0.6745 is used to convert from mad to standard deviation Returns ------- mad : float (weighted) median absolute deviation Examples -------- >>> import numpy as np >>> from mindboggle.guts.compute import median_abs_dev >>> X = np.array([1,2,4,7,8]) >>> W = np.array([.1,.1,.3,.2,.3]) >>> precision = 1 >>> # [1, 2, 4, 4, 4, 7, 7, 8, 8, 8] >>> median_abs_dev(X, W, precision) 2.0 """ import numpy as np from mindboggle.guts.compute import weighted_to_repeated_values # Make sure arguments have the correct type: if not isinstance(X, np.ndarray): X = np.array(X) if not isinstance(W, np.ndarray): W = np.array(W) if not isinstance(precision, int): precision = int(precision) if np.size(W): X = weighted_to_repeated_values(X, W, precision) mad = np.median(np.abs(X - np.median(X))) / c return mad
def weighted_median(X, W=[], precision=1): """ Compute a weighted median. Parameters ---------- X : numpy array of floats or integers values W : numpy array of floats or integers weights precision : integer number of decimal places to consider weights Returns ------- wmedian : float weighted median Examples -------- >>> import numpy as np >>> from mindboggle.guts.compute import weighted_median >>> X = np.array([1,2,4,7,8]) >>> W = np.array([.1,.1,.3,.2,.3]) >>> precision = 1 >>> # [1, 2, 4, 4, 4, 7, 7, 8, 8, 8] >>> weighted_median(X, W, precision) 5.5 """ import numpy as np from mindboggle.guts.compute import weighted_to_repeated_values # Make sure arguments have the correct type: if not isinstance(X, np.ndarray): X = np.array(X) if not isinstance(W, np.ndarray): W = np.array(W) if not isinstance(precision, int): precision = int(precision) wmedian = np.median(weighted_to_repeated_values(X, W, precision)) return wmedian
def stats_per_label(values, labels, include_labels=[], exclude_labels=[], weights=[], precision=1): """ Compute various statistical measures across vertices per label, optionally using weights (such as surface area per vertex). Example (area-weighted mean): average value = sum(a_i * v_i) / total_surface_area, where *a_i* and *v_i* are the area and value for each vertex *i*. Note :: This function is different than means_per_label() in two ways: 1. It computes more than simply the (weighted) mean and sdev. 2. It only accepts 1-D arrays of values. Reference --------- Weighted skewness and kurtosis unbiased by sample size Lorenzo Rimoldini, arXiv:1304.6564 (2013) http://arxiv.org/abs/1304.6564 Parameters ---------- values : numpy array of individual or lists of integers or floats values for all vertices labels : list or array of integers label for each value include_labels : list of integers labels to include exclude_labels : list of integers labels to be excluded weights : numpy array of floats weights to compute weighted statistical measures precision : integer number of decimal places to consider weights Returns ------- medians : list of floats median for each label mads : list of floats median absolute deviation for each label means : list of floats mean for each label sdevs : list of floats standard deviation for each label skews : list of floats skew for each label kurts : list of floats kurtosis value for each label lower_quarts : list of floats lower quartile for each label upper_quarts : list of floats upper quartile for each label label_list : list of integers list of unique labels Examples -------- >>> import os >>> from mindboggle.mio.vtks import read_scalars >>> from mindboggle.guts.compute import stats_per_label >>> data_path = os.environ['MINDBOGGLE_DATA'] >>> values_file = os.path.join(data_path, 'arno', 'shapes', 'lh.pial.mean_curvature.vtk') >>> area_file = os.path.join(data_path, 'arno', 'shapes', 'lh.pial.area.vtk') >>> labels_file = os.path.join(data_path, 'arno', 'labels', 'lh.labels.DKT25.manual.vtk') >>> values, name = read_scalars(values_file, True, True) >>> areas, name = read_scalars(area_file, True, True) >>> labels, name = read_scalars(labels_file) >>> include_labels = [] >>> exclude_labels = [-1] >>> weights = areas >>> precision = 1 >>> stats_per_label(values, labels, include_labels, exclude_labels, weights, precision) """ import numpy as np from scipy.stats import skew, kurtosis, scoreatpercentile from mindboggle.guts.compute import weighted_to_repeated_values, median_abs_dev # Make sure arguments are numpy arrays: if not isinstance(values, np.ndarray): values = np.asarray(values) if not isinstance(weights, np.ndarray): weights = np.asarray(weights) # Initialize all statistical lists: if include_labels: label_list = include_labels else: label_list = np.unique(labels) label_list = [int(x) for x in label_list if int(x) not in exclude_labels] medians = [] mads = [] means = [] sdevs = [] skews = [] kurts = [] lower_quarts = [] upper_quarts = [] # Extract all vertex indices for each label: for label in label_list: I = [i for i, x in enumerate(labels) if x == label] if I: # Get the vertex values: X = values[I] if len([x for x in X if x != 0]): # If there are as many weights as values, apply the weights to the values: if np.size(weights) == np.size(values): W = weights[I] sumW = np.sum(W) # If the sum of the weights and the standard deviation is non-zero, # compute all statistics of the weighted values: if sumW > 0: Xdiff = X - np.mean(X) Xstd = np.sqrt(np.sum(W * Xdiff**2) / sumW) means.append(np.sum(W * X) / sumW) sdevs.append(Xstd) if Xstd > 0: skews.append( (np.sum(W * Xdiff**3) / sumW) / Xstd**3) kurts.append((np.sum(W * Xdiff**4) / sumW) / Xstd**4 - 3) else: skews.append(skew(X)) kurts.append(kurtosis(X)) X = weighted_to_repeated_values(X, W, precision) # If the sum of the weights equals zero, simply compute the statistics: else: means.append(np.mean(X)) sdevs.append(np.std(X)) skews.append(skew(X)) kurts.append(kurtosis(X)) # If there are no (or not enough) weights, simply compute the statistics: else: means.append(np.mean(X)) sdevs.append(np.std(X)) skews.append(skew(X)) kurts.append(kurtosis(X)) # Compute median, median absolute deviation, and lower and upper quartiles: if np.size(X): medians.append(np.median(X)) mads.append(median_abs_dev(X)) lower_quarts.append(scoreatpercentile(X, 25)) upper_quarts.append(scoreatpercentile(X, 75)) # If the weights are all smaller than the precision, then X will disappear, # so set the above statistics (in the 'if' block) to zero: else: medians.append(0) mads.append(0) lower_quarts.append(0) upper_quarts.append(0) # If all values are equal to zero, set all statistics to zero: else: medians.append(0) mads.append(0) means.append(0) sdevs.append(0) skews.append(0) kurts.append(0) lower_quarts.append(0) upper_quarts.append(0) # If there are no vertices for the label, set all statistics to zero: else: medians.append(0) mads.append(0) means.append(0) sdevs.append(0) skews.append(0) kurts.append(0) lower_quarts.append(0) upper_quarts.append(0) return medians, mads, means, sdevs, skews, kurts, \ lower_quarts, upper_quarts, label_list
def stats_per_label(values, labels, include_labels=[], exclude_labels=[], weights=[], precision=1): """ Compute various statistical measures across vertices per label, optionally using weights (such as surface area per vertex). Example (area-weighted mean): average value = sum(a_i * v_i) / total_surface_area, where *a_i* and *v_i* are the area and value for each vertex *i*. Reference: Weighted skewness and kurtosis unbiased by sample size Lorenzo Rimoldini, arXiv:1304.6564 (2013) http://arxiv.org/abs/1304.6564 Note :: This function is different than means_per_label() in two ways: 1. It computes more than simply the (weighted) mean and sdev. 2. It only accepts 1-D arrays of values. Parameters ---------- values : numpy array of individual or lists of integers or floats values for all vertices labels : list or array of integers label for each value include_labels : list of integers labels to include exclude_labels : list of integers labels to be excluded weights : numpy array of floats weights to compute weighted statistical measures precision : integer number of decimal places to consider weights Returns ------- medians : list of floats median for each label mads : list of floats median absolute deviation for each label means : list of floats mean for each label sdevs : list of floats standard deviation for each label skews : list of floats skew for each label kurts : list of floats kurtosis value for each label lower_quarts : list of floats lower quartile for each label upper_quarts : list of floats upper quartile for each label label_list : list of integers list of unique labels Examples -------- >>> import numpy as np >>> from mindboggle.mio.vtks import read_scalars >>> from mindboggle.guts.compute import stats_per_label >>> from mindboggle.mio.fetch_data import prep_tests >>> urls, fetch_data = prep_tests() >>> values_file = fetch_data(urls['left_mean_curvature']) >>> labels_file = fetch_data(urls['left_freesurfer_labels']) >>> area_file = fetch_data(urls['left_area']) >>> values, name = read_scalars(values_file, True, True) >>> areas, name = read_scalars(area_file, True, True) >>> labels, name = read_scalars(labels_file) >>> include_labels = [] >>> exclude_labels = [-1] >>> weights = areas >>> precision = 1 >>> medians, mads, means, sdevs, skews, kurts, lower_quarts, upper_quarts, label_list = stats_per_label(values, ... labels, include_labels, exclude_labels, weights, precision) >>> print(np.array_str(np.array(medians[0:5]), ... precision=5, suppress_small=True)) [-1.13602 -1.22961 -2.49665 -3.80782 -3.37309] >>> print(np.array_str(np.array(mads[0:5]), ... precision=5, suppress_small=True)) [ 1.17026 1.5045 1.28234 2.11515 1.69333] >>> print(np.array_str(np.array(means[0:5]), ... precision=5, suppress_small=True)) [-1.1793 -1.21405 -2.49318 -3.58116 -3.34987] >>> print(np.array_str(np.array(kurts[0:5]), ... precision=5, suppress_small=True)) [ 2.34118 -0.3969 -0.55787 -0.73993 0.3807 ] """ import numpy as np from scipy.stats import skew, kurtosis, scoreatpercentile from mindboggle.guts.compute import weighted_to_repeated_values, median_abs_dev # Make sure arguments are numpy arrays: if not isinstance(values, np.ndarray): values = np.asarray(values) if not isinstance(weights, np.ndarray): weights = np.asarray(weights) # Initialize all statistical lists: if include_labels: label_list = include_labels else: label_list = np.unique(labels) label_list = [int(x) for x in label_list if int(x) not in exclude_labels] medians = [] mads = [] means = [] sdevs = [] skews = [] kurts = [] lower_quarts = [] upper_quarts = [] # Extract all vertex indices for each label: for label in label_list: I = [i for i,x in enumerate(labels) if x == label] if I: # Get the vertex values: X = values[I] if len([x for x in X if x != 0]): # If there are as many weights as values, apply the weights to the values: if np.size(weights) == np.size(values): W = weights[I] sumW = np.sum(W) # If the sum of the weights and the standard deviation is non-zero, # compute all statistics of the weighted values: if sumW > 0: Xdiff = X - np.mean(X) Xstd = np.sqrt(np.sum(W * Xdiff**2) / sumW) means.append(np.sum(W * X) / sumW) sdevs.append(Xstd) if Xstd > 0: skews.append((np.sum(W * Xdiff**3) / sumW) / Xstd**3) kurts.append((np.sum(W * Xdiff**4) / sumW) / Xstd**4 - 3) else: skews.append(skew(X)) kurts.append(kurtosis(X)) X = weighted_to_repeated_values(X, W, precision) # If the sum of the weights equals zero, simply compute the statistics: else: means.append(np.mean(X)) sdevs.append(np.std(X)) skews.append(skew(X)) kurts.append(kurtosis(X)) # If there are no (or not enough) weights, simply compute the statistics: else: means.append(np.mean(X)) sdevs.append(np.std(X)) skews.append(skew(X)) kurts.append(kurtosis(X)) # Compute median, median absolute deviation, and lower and upper quartiles: if np.size(X): medians.append(np.median(X)) mads.append(median_abs_dev(X)) lower_quarts.append(scoreatpercentile(X, 25)) upper_quarts.append(scoreatpercentile(X, 75)) # If the weights are all smaller than the precision, then X will disappear, # so set the above statistics (in the 'if' block) to zero: else: medians.append(0) mads.append(0) lower_quarts.append(0) upper_quarts.append(0) # If all values are equal to zero, set all statistics to zero: else: medians.append(0) mads.append(0) means.append(0) sdevs.append(0) skews.append(0) kurts.append(0) lower_quarts.append(0) upper_quarts.append(0) # If there are no vertices for the label, set all statistics to zero: else: medians.append(0) mads.append(0) means.append(0) sdevs.append(0) skews.append(0) kurts.append(0) lower_quarts.append(0) upper_quarts.append(0) return medians, mads, means, sdevs, skews, kurts, \ lower_quarts, upper_quarts, label_list
def stats_per_label(values, labels, include_labels=[], exclude_labels=[], weights=[], precision=1): """ Compute various statistical measures across vertices per label, optionally using weights (such as surface area per vertex). Example (area-weighted mean): average value = sum(a_i * v_i) / total_surface_area, where *a_i* and *v_i* are the area and value for each vertex *i*. Note :: This function is different than means_per_label() in two ways: 1. It computes more than simply the (weighted) mean and sdev. 2. It only accepts 1-D arrays of values. Reference --------- Weighted skewness and kurtosis unbiased by sample size Lorenzo Rimoldini, arXiv:1304.6564 (2013) http://arxiv.org/abs/1304.6564 Parameters ---------- values : numpy array of individual or lists of integers or floats values for all vertices labels : list or array of integers label for each value include_labels : list of integers labels to include exclude_labels : list of integers labels to be excluded weights : numpy array of floats weights to compute weighted statistical measures precision : integer number of decimal places to consider weights Returns ------- medians : list of floats median for each label mads : list of floats median absolute deviation for each label means : list of floats mean for each label sdevs : list of floats standard deviation for each label skews : list of floats skew for each label kurts : list of floats kurtosis value for each label lower_quarts : list of floats lower quartile for each label upper_quarts : list of floats upper quartile for each label label_list : list of integers list of unique labels Examples -------- >>> import os >>> from mindboggle.mio.vtks import read_scalars >>> from mindboggle.guts.compute import stats_per_label >>> data_path = os.environ['MINDBOGGLE_DATA'] >>> values_file = os.path.join(data_path, 'arno', 'shapes', 'lh.pial.mean_curvature.vtk') >>> area_file = os.path.join(data_path, 'arno', 'shapes', 'lh.pial.area.vtk') >>> labels_file = os.path.join(data_path, 'arno', 'labels', 'lh.labels.DKT25.manual.vtk') >>> values, name = read_scalars(values_file, True, True) >>> areas, name = read_scalars(area_file, True, True) >>> labels, name = read_scalars(labels_file) >>> include_labels = [] >>> exclude_labels = [-1] >>> weights = areas >>> precision = 1 >>> stats_per_label(values, labels, include_labels, exclude_labels, weights, precision) """ import numpy as np from scipy.stats import skew, kurtosis, scoreatpercentile from mindboggle.guts.compute import weighted_to_repeated_values, median_abs_dev # Make sure arguments are numpy arrays: if not isinstance(values, np.ndarray): values = np.asarray(values) if not isinstance(weights, np.ndarray): weights = np.asarray(weights) # Initialize all statistical lists: if include_labels: label_list = include_labels else: label_list = np.unique(labels) label_list = [int(x) for x in label_list if int(x) not in exclude_labels] medians = [] mads = [] means = [] sdevs = [] skews = [] kurts = [] lower_quarts = [] upper_quarts = [] # Extract all vertex indices for each label: for label in label_list: I = [i for i, x in enumerate(labels) if x == label] if I: # Get the vertex values: X = values[I] if len([x for x in X if x != 0]): # If there are as many weights as values, apply the weights to the values: if np.size(weights) == np.size(values): W = weights[I] sumW = np.sum(W) # If the sum of the weights and the standard deviation is non-zero, # compute all statistics of the weighted values: if sumW > 0: Xdiff = X - np.mean(X) Xstd = np.sqrt(np.sum(W * Xdiff ** 2) / sumW) means.append(np.sum(W * X) / sumW) sdevs.append(Xstd) if Xstd > 0: skews.append((np.sum(W * Xdiff ** 3) / sumW) / Xstd ** 3) kurts.append((np.sum(W * Xdiff ** 4) / sumW) / Xstd ** 4 - 3) else: skews.append(skew(X)) kurts.append(kurtosis(X)) X = weighted_to_repeated_values(X, W, precision) # If the sum of the weights equals zero, simply compute the statistics: else: means.append(np.mean(X)) sdevs.append(np.std(X)) skews.append(skew(X)) kurts.append(kurtosis(X)) # If there are no (or not enough) weights, simply compute the statistics: else: means.append(np.mean(X)) sdevs.append(np.std(X)) skews.append(skew(X)) kurts.append(kurtosis(X)) # Compute median, median absolute deviation, and lower and upper quartiles: if np.size(X): medians.append(np.median(X)) mads.append(median_abs_dev(X)) lower_quarts.append(scoreatpercentile(X, 25)) upper_quarts.append(scoreatpercentile(X, 75)) # If the weights are all smaller than the precision, then X will disappear, # so set the above statistics (in the 'if' block) to zero: else: medians.append(0) mads.append(0) lower_quarts.append(0) upper_quarts.append(0) # If all values are equal to zero, set all statistics to zero: else: medians.append(0) mads.append(0) means.append(0) sdevs.append(0) skews.append(0) kurts.append(0) lower_quarts.append(0) upper_quarts.append(0) # If there are no vertices for the label, set all statistics to zero: else: medians.append(0) mads.append(0) means.append(0) sdevs.append(0) skews.append(0) kurts.append(0) lower_quarts.append(0) upper_quarts.append(0) return medians, mads, means, sdevs, skews, kurts, lower_quarts, upper_quarts, label_list