示例#1
0
def calc_emd(data,row_tree,alpha=1.0,beta=0.0,exc_sing=False,weights=None):
    """
    Calculates the EMD on the *columns* from data and a tree on the rows.
    each level is weighted by 2**((1-level)*alpha)
    each folder size (fraction) is raised to the beta power for weighting.
    """
    rows,_ = np.shape(data)
    assert rows == row_tree.size, "Tree size must match # rows in data."

    folder_fraction = np.array([((node.size*1.0/rows)**beta)*
                                (2.0**((1.0-node.level)*alpha))
                                 for node in row_tree])
    if weights is not None:
        folder_fraction = folder_fraction*weights
    
    if exc_sing:
        for node in row_tree:
            if node.size == 1:
                folder_fraction[node.idx] = 0.0
    coefs = tree_util.tree_averages(data,row_tree)
    
    ext_vecs = np.diag(folder_fraction).dot(coefs)
    
    pds = spsp.distance.pdist(ext_vecs.T,"cityblock")
    distances = spsp.distance.squareform(pds)

    return distances
示例#2
0
 def calc_avg_val_cols(self,row_tree,col_tree):
     if row_tree is None:
         pass
     else:
         avg_level_cols = barcode.level_avgs(self.data,col_tree)
         avg_tree_cols = tree_util.tree_averages(avg_level_cols,row_tree).T
         self.avg_tree_cols = avg_tree_cols
         Publisher.sendMessage("embed.col.avg")
     return avg_tree_cols
示例#3
0
 def calc_avg_val_rows(self,row_tree,col_tree):
     if col_tree is None:
         print "empty column tree"
         pass
     else:
         avg_level_rows = barcode.level_avgs(self.data.T,row_tree).T
         avg_tree_rows = tree_util.tree_averages(avg_level_rows.T,col_tree).T
         self.avg_tree_rows = avg_tree_rows
         Publisher.sendMessage("embed.row.avg")
     return avg_tree_rows
示例#4
0
def _level_avgs(data, col_tree):
    """
    data is a vector of length n.
    col_tree is a tree with n leaves. 
    Calculates the average of data for each node of col_tree.
    Return value is an dxn matrix, where d is the depth of the col_tree
    """
    tavg = tree_util.tree_averages(data.T, col_tree)
    averages = np.zeros([col_tree.tree_depth, col_tree.size])

    for node in col_tree:
        averages[node.level - 1, node.elements] = tavg[node.idx]

    return averages
示例#5
0
def _level_avgs(data,col_tree):
    """
    data is a vector of length n.
    col_tree is a tree with n leaves. 
    Calculates the average of data for each node of col_tree.
    Return value is an dxn matrix, where d is the depth of the col_tree
    """
    tavg = tree_util.tree_averages(data.T,col_tree)
    averages = np.zeros([col_tree.tree_depth,col_tree.size])
    
    for node in col_tree:
        averages[node.level-1,node.elements] = tavg[node.idx]

    return averages
示例#6
0
def calc_emd_ref(ref_data,data,row_tree,alpha=1.0,beta=0.0):
    """
    Calculates the EMD from a set of points to a reference set of points
    The columns of ref_data are each a reference set point.
    The columns of data are each a point outside the reference set.
    """
    ref_rows,ref_cols = np.shape(ref_data)
    rows,cols = np.shape(data)
    assert rows == row_tree.size, "Tree size must match # rows in data."
    assert ref_rows == rows, "Mismatched row #: reference and sample sets."

    emd = np.zeros([ref_cols,cols])
    ref_coefs = tree_util.tree_averages(ref_data, row_tree)
    coefs = tree_util.tree_averages(data, row_tree)
    level_elements = collections.defaultdict(list)
    level_sizes = collections.defaultdict(int)
    
    for node in row_tree:
        level_elements[node.level].append(node.idx)
        level_sizes[node.level] += node.size
        
    folder_fraction = np.array([node.size for node in row_tree],np.float)
    for level in xrange(1,row_tree.tree_depth+1):
        fsize = np.sum(folder_fraction[level_elements[level]])
        folder_fraction[level_elements[level]] /= fsize
    
    folder_fraction = folder_fraction**beta
    coefs = np.diag(folder_fraction).dot(coefs)
    ref_coefs = np.diag(folder_fraction).dot(ref_coefs)
    for level in xrange(1,row_tree.tree_depth+1):
        distances = spsp.distance.cdist(coefs[level_elements[level],:].T,
                                        ref_coefs[level_elements[level],:].T,
                                        "cityblock").T
        emd += (2**((1.0-level)*alpha))*distances

    return emd
示例#7
0
def calc_emd_ref(ref_data,data,row_tree,alpha=1.0,beta=0.0):
    """
    Calculates the EMD from a set of points to a reference set of points
    The columns of ref_data are each a reference set point.
    The columns of data are each a point outside the reference set.
    """
    ref_rows,ref_cols = np.shape(ref_data)
    rows,cols = np.shape(data)
    assert rows == row_tree.size, "Tree size must match # rows in data."
    assert ref_rows == rows, "Mismatched row #: reference and sample sets."

    emd = np.zeros([ref_cols,cols])
    ref_coefs = tree_util.tree_averages(ref_data, row_tree)
    coefs = tree_util.tree_averages(data, row_tree)
    level_elements = collections.defaultdict(list)
    level_sizes = collections.defaultdict(int)
    
    for node in row_tree:
        level_elements[node.level].append(node.idx)
        level_sizes[node.level] += node.size
        
    folder_fraction = np.array([node.size for node in row_tree],np.float)
    for level in xrange(1,row_tree.tree_depth+1):
        fsize = np.sum(folder_fraction[level_elements[level]])
        folder_fraction[level_elements[level]] /= fsize
    
    folder_fraction = folder_fraction**beta
    coefs = np.diag(folder_fraction).dot(coefs)
    ref_coefs = np.diag(folder_fraction).dot(ref_coefs)
    for level in xrange(1,row_tree.tree_depth+1):
        distances = spsp.distance.cdist(coefs[level_elements[level],:].T,
                                        ref_coefs[level_elements[level],:].T,
                                        "cityblock").T
        emd += (2**((1.0-level)*alpha))*distances

    return emd
def tree_product_transform(data,row_tree):
    avs = tree_util.tree_averages(data,row_tree)
    coefs = np.zeros(np.shape(avs))
    if avs.ndim == 1:
        for node in row_tree:
            if node.parent is None:
                coefs[node.idx] = avs[node.idx]
            else:
                coefs[node.idx] = avs[node.idx]/avs[node.parent.idx]
    else:
        for node in row_tree:
            if node.parent is None:
                coefs[node.idx,:] = avs[node.idx,:]
            else:
                coefs[node.idx,:] = avs[node.idx,:]/avs[node.parent.idx,:]
    coefs[np.isnan(coefs)] = 1.0
    return coefs
示例#9
0
    def calculate(self,datadict):
        self.data = datadict["data"]
        self.q_descs = datadict["q_descs"]
        self.p_score_descs = datadict["p_score_descs"]
        self.p_scores = datadict["p_scores"]
        self.col_tree = datadict["col_tree"]
        self.row_tree = datadict["row_tree"] 

        avgs = barcode.level_avgs(self.data,self.col_tree)
        node_avgs = tree_util.tree_averages(avgs,self.row_tree)
        orig_shape = np.shape(node_avgs)
        r_avgs = np.reshape(node_avgs,(-1,orig_shape[-1]))
        #br_avgs = barcode.organize_cols(self.col_tree,r_avgs)
        #self.q_image = np.reshape(br_avgs,orig_shape)
        self.q_image = np.reshape(r_avgs,orig_shape)
        self.q_image_mg = np.zeros(np.shape(self.q_image))
        self.q_image_mg[:,1:,:] = np.diff(self.q_image,axis=1)
        self.q_image_top = np.zeros(np.shape(self.q_image))
        self.q_image_top = self.q_image - self.q_image[:,0,:][:,np.newaxis,:] 
示例#10
0
def level_avgs(data,col_tree):
    """
    data is a matrix mxn.
    col_tree is a tree with n leaves and d levels.
    Return value is an mxdxn matrix, where d is the depth of the col_tree.
    Entry (i,j,k) is the average response of the ith row to the 
    folder containing k at the jth level.
    """
    if data.ndim == 1:
        return _level_avgs(data,col_tree)
    m,n = np.shape(data)
    averages = np.zeros([m,col_tree.tree_depth,n])
    
    tavg = tree_util.tree_averages(data.T,col_tree)
    for node in col_tree:
        averages[:,node.level-1,node.elements] = np.tile(tavg[node.idx],
                                                         (len(node.elements),1)).T
        
    return averages 
示例#11
0
def level_avgs(data, col_tree):
    """
    data is a matrix mxn.
    col_tree is a tree with n leaves and d levels.
    Return value is an mxdxn matrix, where d is the depth of the col_tree.
    Entry (i,j,k) is the average response of the ith row to the 
    folder containing k at the jth level.
    """
    if data.ndim == 1:
        return _level_avgs(data, col_tree)
    m, n = np.shape(data)
    averages = np.zeros([m, col_tree.tree_depth, n])

    tavg = tree_util.tree_averages(data.T, col_tree)
    for node in col_tree:
        averages[:, node.level - 1,
                 node.elements] = np.tile(tavg[node.idx],
                                          (len(node.elements), 1)).T

    return averages
示例#12
0
def calc_emd(data,row_tree,alpha=1.0,beta=0.0,exc_sing=False):
    """
    Calculates the EMD on the *columns* from data and a tree on the rows.
    each level is weighted by 2**((1-level)*alpha)
    each folder size (fraction) is raised to the beta power for weighting.
    """
    rows,_ = np.shape(data)
    assert rows == row_tree.size, "Tree size must match # rows in data."

    folder_fraction = np.array([((node.size*1.0/rows)**beta)*
                                (2.0**((1.0-node.level)*alpha))
                                 for node in row_tree])
    if exc_sing:
        for node in row_tree:
            if node.size == 1:
                folder_fraction[node.idx] = 0.0
    coefs = tree_util.tree_averages(data,row_tree)
    
    ext_vecs = np.diag(folder_fraction).dot(coefs)
    
    pds = spsp.distance.pdist(ext_vecs.T,"cityblock")
    distances = spsp.distance.squareform(pds)

    return distances