示例#1
0
def quantile_normalize_scores(matrices, weights=None):
    """quantile normalize scores against each other"""

    logging.info("COMPUTING WEIGHTED MEANS...")
    start_time = util.current_millis()

    # rearranges the scores in the input matrices into a matrix
    # with |matrices| columns where the columns contain the values
    # of each matrix in sorted order
    flat_values = np.transpose(np.asarray([np.sort(matrix.values.flatten())
                                           for matrix in matrices]))

    elapsed = util.current_millis() - start_time
    logging.info("flattened/sorted score matrices in %f s.", elapsed / 1000.0)

    start_time = util.current_millis()
    if weights is not None:
        # multiply each column of matrix with each component of the
        # weight vector: Using matrix multiplication resulted in speedup
        # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)!
        scaled = weights * flat_values
        scale = np.sum(np.ma.masked_array(weights, np.isnan(weights)))
        tmp_mean = util.row_means(scaled) / scale
    else:
        tmp_mean = util.row_means(flat_values)
    elapsed = util.current_millis() - start_time
    logging.info("weighted means in %f s.", elapsed / 1000.0)
    start_time = util.current_millis()

    result = qm_result_matrices(matrices, tmp_mean)

    elapsed = util.current_millis() - start_time
    logging.info("result matrices built in %f s.", elapsed / 1000.0)
    return result
示例#2
0
def quantile_normalize_scores(matrices, weights=None):
    """quantile normalize scores against each other"""

    logging.info("COMPUTING WEIGHTED MEANS...")
    start_time = util.current_millis()

    # rearranges the scores in the input matrices into a matrix
    # with |matrices| columns where the columns contain the values
    # of each matrix in sorted order
    flat_values = np.transpose(
        np.asarray([np.sort(matrix.values.flatten()) for matrix in matrices]))

    elapsed = util.current_millis() - start_time
    logging.info("flattened/sorted score matrices in %f s.", elapsed / 1000.0)

    start_time = util.current_millis()
    if weights is not None:
        # multiply each column of matrix with each component of the
        # weight vector: Using matrix multiplication resulted in speedup
        # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)!
        scaled = weights * flat_values
        scale = np.sum(np.ma.masked_array(weights, np.isnan(weights)))
        tmp_mean = util.row_means(scaled) / scale
    else:
        tmp_mean = util.row_means(flat_values)
    elapsed = util.current_millis() - start_time
    logging.info("weighted means in %f s.", elapsed / 1000.0)
    start_time = util.current_millis()

    result = qm_result_matrices(matrices, tmp_mean)

    elapsed = util.current_millis() - start_time
    logging.info("result matrices built in %f s.", elapsed / 1000.0)
    return result
示例#3
0
 def test_row_means_with_nans(self):
     """tests the row_means() function"""
     matrix = [[0.0010, np.nan, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, np.nan]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.08296666, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.297476666, result[2])
示例#4
0
 def test_row_means(self):
     """tests the row_means() function"""
     matrix = [[0.0010, 0.1234, 0.21370, 0.0342],
               [0.2123, -0.2135, -0.99980, -0.0213],
               [-0.4534, 0.5546, 0.79123, 0.00312321]]
     result = util.row_means(matrix)
     self.assertAlmostEqual(0.0930750, result[0])
     self.assertAlmostEqual(-0.255575, result[1])
     self.assertAlmostEqual(0.2238883025, result[2])
示例#5
0
def __compute_row_scores_for_submatrix(matrix, submatrix):
    """For a given matrix, compute the row scores. The second submatrix is
    used to calculate the column means on and should be derived from
    datamatrix filtered by the row names and column names of a specific
    cluster.
    matrix should be filtered by the columns of a specific cluster in
    order for the column means to be applied properly.
    The result is a DataMatrix with one row containing all the row scores"""
    return np.log(
        util.row_means(np.square(matrix.values - submatrix.column_means())) + 1e-99)
示例#6
0
def weighted_row_means(matrix, weights):
    """compute weighted row means"""
    start_time = util.current_millis()
    # multiply each column of matrix with each component of the
    # weight vector: Using matrix multiplication resulted in speedup
    # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)!
    scaled = weights * matrix
    elapsed = util.current_millis() - start_time
    #logging.info("APPLIED WEIGHTS TO COLUMNS in %f s.", elapsed / 1000.0)
    scale = np.sum(np.ma.masked_array(weights, np.isnan(weights)))
    return util.row_means(scaled) / scale
def __compute_row_scores_for_submatrix(matrix, submatrix):
    """For a given matrix, compute the row scores. The second submatrix is
    used to calculate the column means on and should be derived from
    datamatrix filtered by the row names and column names of a specific
    cluster.
    matrix should be filtered by the columns of a specific cluster in
    order for the column means to be applied properly.
    The result is a DataMatrix with one row containing all the row scores"""
    rm = util.row_means(np.square(matrix.values - util.column_means(submatrix.values)))
    # we clip the values to make sure the argument to log will be
    # sufficiently above 0 to avoid errors
    return np.log(np.clip(rm, 1e-20, 1000.0) + 1e-99)
示例#8
0
def __compute_row_scores_for_submatrix(matrix, submatrix):
    """For a given matrix, compute the row scores. The second submatrix is
    used to calculate the column means on and should be derived from
    datamatrix filtered by the row names and column names of a specific
    cluster.
    matrix should be filtered by the columns of a specific cluster in
    order for the column means to be applied properly.
    The result is a DataMatrix with one row containing all the row scores"""
    rm = util.row_means(
        np.square(matrix.values - util.column_means(submatrix.values)))
    # we clip the values to make sure the argument to log will be
    # sufficiently above 0 to avoid errors
    return np.log(np.clip(rm, 1e-20, 1000.0) + 1e-99)
def quantile_normalize_scores(matrices, weights=None):
    """quantile normalize scores against each other"""

    flat_values = as_sorted_flat_values(matrices)
    #logging.info("COMPUTING WEIGHTED MEANS...")
    #start_time = util.current_millis()
    if weights != None:
        # multiply each column of matrix with each component of the
        # weight vector: Using matrix multiplication resulted in speedup
        # from 125 s. to 0.125 seconds over apply_along_axis() (1000x faster)!
        scaled = weights * flat_values
        scale = np.sum(np.ma.masked_array(weights, np.isnan(weights)))
        tmp_mean = util.row_means(scaled) / scale
    else:
        tmp_mean = util.row_means(flat_values)
    #elapsed = util.current_millis() - start_time
    #logging.info("weighted means in %f s.", elapsed / 1000.0)
    #start_time = util.current_millis()
    result = qm_result_matrices(matrices, tmp_mean)
    #elapsed = util.current_millis() - start_time
    #logging.info("result matrices built in %f s.", elapsed / 1000.0)
    return result
示例#10
0
 def residual(self, max_row_variance=None):
     """computes the residual for this matrix, if max_row_variance is given,
     result is normalized by the row variance"""
     d_rows = util.row_means(self.values)
     d_cols = util.column_means(self.values)
     d_all = util.mean(d_rows)
     tmp = self.values + d_all - util.r_outer(d_rows, d_cols, operator.add)
     average = util.mean(np.abs(tmp))
     if max_row_variance is not None:
         row_var = self.row_variance()
         if np.isnan(row_var) or row_var > max_row_variance:
             row_var = max_row_variance
         average = average / row_var
     return average
示例#11
0
 def residual(self, max_row_variance=None):
     """computes the residual for this matrix, if max_row_variance is given,
     result is normalized by the row variance"""
     d_rows = util.row_means(self.values)
     d_cols = util.column_means(self.values)
     d_all = util.mean(d_rows)
     tmp = self.values + d_all - util.r_outer(d_rows, d_cols, operator.add)
     average = util.mean(np.abs(tmp))
     if max_row_variance is not None:
         row_var = self.row_variance()
         if np.isnan(row_var) or row_var > max_row_variance:
             row_var = max_row_variance
         average = average / row_var
     return average
示例#12
0
def quantile_normalize_scores(matrices, weights=None):
    """quantile normalize scores against each other"""

    flat_values = as_sorted_flat_values(matrices)
    #logging.info("COMPUTING WEIGHTED MEANS...")
    start_time = util.current_millis()
    if weights != None:
        tmp_mean = weighted_row_means(flat_values, weights)
    else:
        tmp_mean = util.row_means(flat_values)
    elapsed = util.current_millis() - start_time
    #logging.info("weighted means in %f s.", elapsed / 1000.0)
    start_time = util.current_millis()
    result = qm_result_matrices(matrices, tmp_mean)
    elapsed = util.current_millis() - start_time
    #logging.info("result matrices built in %f s.", elapsed / 1000.0)
    return result
示例#13
0
 def row_means(self):
     """Returns a numpy array, containing the column means"""
     return util.row_means(self.values)