def permute_w_replacement(frame, axis=0): ''' Permute the frame values across the given axis. Create simulated dataset were the counts of each component (column) in each sample (row), are randomly sampled from the all the counts of that component in all samples. Parameters ---------- frame : DataFrame Frame to permute. axis : {0, 1} - 0 - Permute row values across columns - 1 - Permute column values across rows Returns ------- Permuted DataFrame (new instance). ''' from numpy.random import randint axis = 1-_get_axis(axis) s = frame.shape[axis] fun = lambda x: x.values[randint(0,s,(1,s))][0] perm = frame.apply(fun, axis=axis) return perm
def alr(frame, ref=None, axis=0): ''' Compute the additive log-ratio (alr) transformation with respect to the component given in ref. Parameters ---------- frame : DataFrame Frame to be transformed ref : valid label | None Label of component to be used as the normalization reference. i.e. values of other component will be divided by values of this reference component. IF None is passed (default), the last col/row is used as ref. axis : {0, 1} 0 : transform each row (default) 1 : transform each colum ''' if not isinstance(frame, DF): return alr_for_array(frame, ref, axis) axis = _get_axis(axis) if ref is None: label = frame._get_axis(1 - axis)[-1] else: label = ref if axis == 0: norm = 1. * frame[label] elif axis == 1: norm = 1. * frame.xs(label) temp = frame.apply(lambda x: log(x / norm), axis=axis) return temp.drop(label, 1 - axis)
def alr_for_array(frame, ref=None, axis=0): axis = _get_axis(axis) if ref is None: label = -1 else: label = ref if axis == 0: norm = 1. * frame[:, label] elif axis == 1: norm = 1. * frame[label, :] temp = np.apply_along_axis(lambda x: log(x / norm), axis, frame) return np.delete(temp, label, 1 - axis)
def correlation(frame, method='pearson', axis=0): ''' Calculate the correlation between all rows/cols. Return frames of correlation values and p-values. Parameters ---------- frame : DataFrame Frame containing data. method : {pearson (default) | spearman | kendall} Type of correlations to be computed axis : {0, 1} - 0 - Compute correlation between columns - 1 - Compute correlation between rows Returns ------- c : frame DataFrame of symmetric pairwise correlation coefficients. Labels are the rows/column labels of the input frame. p : frame DataFrame of p-values associated with correlation values. Labels are the rows/column labels of the input frame. ''' import scipy.stats as stats axis = _get_axis(axis) method = method.lower() if method not in set(['pearson', 'kendall', 'spearman']): raise ValueError('Correlation of method %s is not supported.' %method) if method == 'spearman' : c_mat, p_mat = stats.spearmanr(frame.values, axis=axis) if not np.shape(c_mat): c_mat = np.array([[1, c_mat],[c_mat,1]]) p_mat = np.array([[1, p_mat],[p_mat,1]]) labels = frame._get_axis(1-axis) c = DF(c_mat, index=labels, columns=labels) p = DF(p_mat, index=labels, columns=labels) else: if method == 'pearson': corr_fun = stats.pearsonr elif method == 'kendall': corr_fun = stats.kendalltau if axis == 0: data = frame.T elif axis == 1: data = frame mat = data.values row_labels = data.index n = len(row_labels) c_mat = np.zeros((n, n)) p_mat = np.zeros((n, n)) for i in range(n): for j in range(i, n): if i == j: c_mat[i][i] = 1 p_mat[i][i] = 1 continue c_temp, p_temp = corr_fun(mat[i, :], mat[j, :]) c_mat[i][j] = c_temp c_mat[j][i] = c_temp p_mat[i][j] = p_temp p_mat[j][i] = p_temp c = DF(c_mat, index=row_labels, columns=row_labels) p = DF(p_mat, index=row_labels, columns=row_labels) return c, p