Пример #1
0
 def _concordance_counts_by_axis(r1, r2, groups, num_groups, axis):
     '''Calculate #concordances, #discordances by axis (0=SNP, 1=sample).'''
     # print np.concatenate((g1[groups], g2[groups]), axis=1)
     groups = groups[axis]
     concordant, has_data = util.sum_by_group((r1 == r2).astype(np.byte), groups)
     discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups)
     c = np.zeros((num_groups,), dtype=np.float)
     c[has_data] = concordant
     d = np.zeros((num_groups,), dtype=np.float)
     d[has_data] = discordant
     return has_data, c, d, concordant, discordant
Пример #2
0
def concordance(g1, g2, data_filter=CALLED, samples=None):
    '''Calculate the concordance (# of filled entries that agree) between two genotype data arrays g1,g2.
    Returns an array of SNP indices that have data for comparison, and the corresponding concordance rates.'''
    if samples is not None: g1, g2 = g1[:, samples, :], g2[:, samples, :]
    r1, r2, groups = recode_single(g1, g2, data_filter)
    # print np.concatenate((g1[groups], g2[groups]), axis=1)
    groups = groups[0]
    concordant, snps = util.sum_by_group((r1 == r2).astype(np.byte), groups)
    discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups)
    c = np.zeros((max(groups) + 1,), dtype=np.float)
    c[snps] = 1.0 * concordant / (concordant + discordant)
#    return snps, 1.0 * concordant / (concordant + discordant)
    return c
Пример #3
0
def concordance(g1, g2, data_filter=im.imputation.istat.CALLED, samples=None):
    '''Calculate the concordance (# of filled entries that agree) between two genotype data arrays g1,g2.
    Returns an array of SNP indices that have data for comparison, and the corresponding concordance rates.'''
    if samples is not None: g1, g2 = g1[:, samples, :], g2[:, samples, :]
    r1, r2, groups = recode_single(g1, g2, data_filter)
    # print np.concatenate((g1[groups], g2[groups]), axis=1)
    groups = groups[0]
    concordant, snps = util.sum_by_group((r1 == r2).astype(np.byte), groups)
    discordant, _ = util.sum_by_group((r1 != r2).astype(np.byte), groups)
    c = np.zeros((max(groups) + 1,), dtype=np.float)
    c[snps] = 1.0 * concordant / (concordant + discordant)
#    return snps, 1.0 * concordant / (concordant + discordant)
    return c
Пример #4
0
 def _genotype_call_rate(self, axis, data_filter=CALLED, samples=None, snps=None):
     '''Return the call rate by axis (0=SNP, 1=sample).'''
     rg, _, called, num_groups = self._restrict_arrays(data_filter, samples, snps)
     num_called, has_data = util.sum_by_group(np.ones_like(rg).astype(np.byte), called[axis])
     call_rate = np.zeros((num_groups[axis],), dtype=np.float)
     call_rate[has_data] = (1.0 * num_called) / num_groups[1 - axis]
     return call_rate
Пример #5
0
 def _call_counts_by_axis(self, axis, data_filter=CALLED, samples=None, snps=None):
     '''Return the call counts (called count, all count) by axis (0=SNP, 1=sample).'''
     _, ri, called, num_groups = self._restrict_arrays(data_filter, samples, snps)
     num_called, has_data = util.sum_by_group(np.ones_like(ri).astype(np.byte), called[axis])
     call_rate = np.zeros((num_groups[axis],), dtype=np.float)
     num_all = num_groups[1 - axis]
     call_rate[has_data] = (1.0 * num_called) / num_all
     # return has_data, num_called, num_all
     return num_called, num_all
Пример #6
0
def closest_relatives(sample, num=50):
    '''IBD sharing information on num closest relatives.'''
    s = np.loadtxt('/home/oren/ober/out/validation/affy/segments.%d.%d.out' % (sample, PATERNAL), dtype=int)
    values, groups = util.sum_by_group(s[:, 3] - s[:, 2], 2 * s[:, 4] + s[:, 5])  # hash haps columns 4 and 5
    j = np.argsort(values)[-1::-1][:num]
    return np.concatenate((values[j][np.newaxis].transpose() / total,
                           groups[j][np.newaxis].transpose(),
                           np.array([im.pt.lowest_common_ancestor(ped.graph, sample, relative) for relative in groups[j] / 2])),
                          axis=1)
Пример #7
0
def closest_relatives(sample, num=50):
    '''IBD sharing information on num closest relatives.'''
    s = np.loadtxt('/home/oren/ober/out/validation/affy/segments.%d.%d.out' %
                   (sample, PATERNAL),
                   dtype=int)
    values, groups = util.sum_by_group(s[:, 3] - s[:, 2], 2 * s[:, 4] +
                                       s[:, 5])  # hash haps columns 4 and 5
    j = np.argsort(values)[-1::-1][:num]
    return np.concatenate(
        (values[j][np.newaxis].transpose() / total,
         groups[j][np.newaxis].transpose(),
         np.array([
             im.pt.lowest_common_ancestor(ped.graph, sample, relative)
             for relative in groups[j] / 2
         ])),
        axis=1)