def calc_emd(data,row_tree,alpha=1.0,beta=0.0,exc_sing=False,weights=None): """ Calculates the EMD on the *columns* from data and a tree on the rows. each level is weighted by 2**((1-level)*alpha) each folder size (fraction) is raised to the beta power for weighting. """ rows,_ = np.shape(data) assert rows == row_tree.size, "Tree size must match # rows in data." folder_fraction = np.array([((node.size*1.0/rows)**beta)* (2.0**((1.0-node.level)*alpha)) for node in row_tree]) if weights is not None: folder_fraction = folder_fraction*weights if exc_sing: for node in row_tree: if node.size == 1: folder_fraction[node.idx] = 0.0 coefs = tree_util.tree_averages(data,row_tree) ext_vecs = np.diag(folder_fraction).dot(coefs) pds = spsp.distance.pdist(ext_vecs.T,"cityblock") distances = spsp.distance.squareform(pds) return distances
def calc_avg_val_cols(self,row_tree,col_tree): if row_tree is None: pass else: avg_level_cols = barcode.level_avgs(self.data,col_tree) avg_tree_cols = tree_util.tree_averages(avg_level_cols,row_tree).T self.avg_tree_cols = avg_tree_cols Publisher.sendMessage("embed.col.avg") return avg_tree_cols
def calc_avg_val_rows(self,row_tree,col_tree): if col_tree is None: print "empty column tree" pass else: avg_level_rows = barcode.level_avgs(self.data.T,row_tree).T avg_tree_rows = tree_util.tree_averages(avg_level_rows.T,col_tree).T self.avg_tree_rows = avg_tree_rows Publisher.sendMessage("embed.row.avg") return avg_tree_rows
def _level_avgs(data, col_tree): """ data is a vector of length n. col_tree is a tree with n leaves. Calculates the average of data for each node of col_tree. Return value is an dxn matrix, where d is the depth of the col_tree """ tavg = tree_util.tree_averages(data.T, col_tree) averages = np.zeros([col_tree.tree_depth, col_tree.size]) for node in col_tree: averages[node.level - 1, node.elements] = tavg[node.idx] return averages
def _level_avgs(data,col_tree): """ data is a vector of length n. col_tree is a tree with n leaves. Calculates the average of data for each node of col_tree. Return value is an dxn matrix, where d is the depth of the col_tree """ tavg = tree_util.tree_averages(data.T,col_tree) averages = np.zeros([col_tree.tree_depth,col_tree.size]) for node in col_tree: averages[node.level-1,node.elements] = tavg[node.idx] return averages
def calc_emd_ref(ref_data,data,row_tree,alpha=1.0,beta=0.0): """ Calculates the EMD from a set of points to a reference set of points The columns of ref_data are each a reference set point. The columns of data are each a point outside the reference set. """ ref_rows,ref_cols = np.shape(ref_data) rows,cols = np.shape(data) assert rows == row_tree.size, "Tree size must match # rows in data." assert ref_rows == rows, "Mismatched row #: reference and sample sets." emd = np.zeros([ref_cols,cols]) ref_coefs = tree_util.tree_averages(ref_data, row_tree) coefs = tree_util.tree_averages(data, row_tree) level_elements = collections.defaultdict(list) level_sizes = collections.defaultdict(int) for node in row_tree: level_elements[node.level].append(node.idx) level_sizes[node.level] += node.size folder_fraction = np.array([node.size for node in row_tree],np.float) for level in xrange(1,row_tree.tree_depth+1): fsize = np.sum(folder_fraction[level_elements[level]]) folder_fraction[level_elements[level]] /= fsize folder_fraction = folder_fraction**beta coefs = np.diag(folder_fraction).dot(coefs) ref_coefs = np.diag(folder_fraction).dot(ref_coefs) for level in xrange(1,row_tree.tree_depth+1): distances = spsp.distance.cdist(coefs[level_elements[level],:].T, ref_coefs[level_elements[level],:].T, "cityblock").T emd += (2**((1.0-level)*alpha))*distances return emd
def tree_product_transform(data,row_tree): avs = tree_util.tree_averages(data,row_tree) coefs = np.zeros(np.shape(avs)) if avs.ndim == 1: for node in row_tree: if node.parent is None: coefs[node.idx] = avs[node.idx] else: coefs[node.idx] = avs[node.idx]/avs[node.parent.idx] else: for node in row_tree: if node.parent is None: coefs[node.idx,:] = avs[node.idx,:] else: coefs[node.idx,:] = avs[node.idx,:]/avs[node.parent.idx,:] coefs[np.isnan(coefs)] = 1.0 return coefs
def calculate(self,datadict): self.data = datadict["data"] self.q_descs = datadict["q_descs"] self.p_score_descs = datadict["p_score_descs"] self.p_scores = datadict["p_scores"] self.col_tree = datadict["col_tree"] self.row_tree = datadict["row_tree"] avgs = barcode.level_avgs(self.data,self.col_tree) node_avgs = tree_util.tree_averages(avgs,self.row_tree) orig_shape = np.shape(node_avgs) r_avgs = np.reshape(node_avgs,(-1,orig_shape[-1])) #br_avgs = barcode.organize_cols(self.col_tree,r_avgs) #self.q_image = np.reshape(br_avgs,orig_shape) self.q_image = np.reshape(r_avgs,orig_shape) self.q_image_mg = np.zeros(np.shape(self.q_image)) self.q_image_mg[:,1:,:] = np.diff(self.q_image,axis=1) self.q_image_top = np.zeros(np.shape(self.q_image)) self.q_image_top = self.q_image - self.q_image[:,0,:][:,np.newaxis,:]
def level_avgs(data,col_tree): """ data is a matrix mxn. col_tree is a tree with n leaves and d levels. Return value is an mxdxn matrix, where d is the depth of the col_tree. Entry (i,j,k) is the average response of the ith row to the folder containing k at the jth level. """ if data.ndim == 1: return _level_avgs(data,col_tree) m,n = np.shape(data) averages = np.zeros([m,col_tree.tree_depth,n]) tavg = tree_util.tree_averages(data.T,col_tree) for node in col_tree: averages[:,node.level-1,node.elements] = np.tile(tavg[node.idx], (len(node.elements),1)).T return averages
def level_avgs(data, col_tree): """ data is a matrix mxn. col_tree is a tree with n leaves and d levels. Return value is an mxdxn matrix, where d is the depth of the col_tree. Entry (i,j,k) is the average response of the ith row to the folder containing k at the jth level. """ if data.ndim == 1: return _level_avgs(data, col_tree) m, n = np.shape(data) averages = np.zeros([m, col_tree.tree_depth, n]) tavg = tree_util.tree_averages(data.T, col_tree) for node in col_tree: averages[:, node.level - 1, node.elements] = np.tile(tavg[node.idx], (len(node.elements), 1)).T return averages
def calc_emd(data,row_tree,alpha=1.0,beta=0.0,exc_sing=False): """ Calculates the EMD on the *columns* from data and a tree on the rows. each level is weighted by 2**((1-level)*alpha) each folder size (fraction) is raised to the beta power for weighting. """ rows,_ = np.shape(data) assert rows == row_tree.size, "Tree size must match # rows in data." folder_fraction = np.array([((node.size*1.0/rows)**beta)* (2.0**((1.0-node.level)*alpha)) for node in row_tree]) if exc_sing: for node in row_tree: if node.size == 1: folder_fraction[node.idx] = 0.0 coefs = tree_util.tree_averages(data,row_tree) ext_vecs = np.diag(folder_fraction).dot(coefs) pds = spsp.distance.pdist(ext_vecs.T,"cityblock") distances = spsp.distance.squareform(pds) return distances