示例#1
0
def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    axis_index = args.axis - 1
    # check for sufficient number of eigenvectors
    if axis_index >= len(pcs):
        msg = 'the requested axis is not available'
        raise ValueError(msg)
    # compute the correlation of each SNP vector the requested PC
    pc = pcs[axis_index]
    corrs = [mycorr(snp, pc) for snp in C_full.T]
    sqcorrs = [mycorr(snp, pc)**2 for snp in C_full.T]
    if args.rank_squared:
        keys = sqcorrs
    else:
        keys = corrs
    corr_index_pairs = [(cor, i) for i, cor in enumerate(keys)]
    sorted_pairs = list(reversed(sorted(corr_index_pairs)))
    indices = zip(*sorted_pairs)[1]
    if args.locus_from_1:
        nominal_indices = [i+1 for i in indices]
    else:
        nominal_indices = indices
    rows = [(nom_i, corrs[i]) for i, nom_i in zip(indices, nominal_indices)]
    lines = ['\t'.join(str(x) for x in row) for row in rows]
    return '\n'.join(lines) + '\n'
示例#2
0
def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    # normalize the names of the isolates
    if args.clean_isolates:
        names = [Carbone.clean_isolate_element(x) for x in names]
    # get the pcs
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    # check for sufficient number of eigenvectors
    if len(pcs) < args.npcs:
        msg_a = 'the number of requested principal components '
        msg_b = 'must be no more than the number of OTUs'
        raise ValueError(msg_a + msg_b)
    # create the R frame
    headers = ['otu'] + ['pc%d' % (i+1) for i in range(args.npcs)]
    print >> out, '\t'.join(headers)
    for i, name in enumerate(names):
        typed_row = [name] + [pcs[j][i] for j in range(args.npcs)]
        if args.add_indices:
            typed_row = [i+1] + typed_row
        row = [str(x) for x in typed_row]
        print >> out, '\t'.join(row)
    return out.getvalue()
示例#3
0
def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    # normalize the names of the isolates
    if args.clean_isolates:
        names = [Carbone.clean_isolate_element(x) for x in names]
    # get the pcs
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    # check for sufficient number of eigenvectors
    if len(pcs) < args.npcs:
        msg_a = 'the number of requested principal components '
        msg_b = 'must be no more than the number of OTUs'
        raise ValueError(msg_a + msg_b)
    # create the R frame
    headers = ['otu'] + ['pc%d' % (i + 1) for i in range(args.npcs)]
    print >> out, '\t'.join(headers)
    for i, name in enumerate(names):
        typed_row = [name] + [pcs[j][i] for j in range(args.npcs)]
        if args.add_indices:
            typed_row = [i + 1] + typed_row
        row = [str(x) for x in typed_row]
        print >> out, '\t'.join(row)
    return out.getvalue()
示例#4
0
def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    # check for sufficient number of eigenvectors
    if len(pcs) < args.ncoords:
        raise ValueError('the number of requested principal components '
                         'must be no more than the number of OTUs')
    # compute the correlation of each SNP vector with each principal PC
    mylist = []
    for snp in C_full.T:
        row = [mycorr(snp, pc) for pc in pcs[:args.ncoords]]
        mylist.append(row)
    np.set_printoptions(linewidth=300, threshold=10000)
    return str(np.array(mylist))
示例#5
0
def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    # check for sufficient number of eigenvectors
    if len(pcs) < args.ncoords:
        raise ValueError(
                'the number of requested principal components '
                'must be no more than the number of OTUs')
    # compute the correlation of each SNP vector with each principal PC
    mylist = []
    for snp in C_full.T:
        row = [mycorr(snp, pc) for pc in pcs[:args.ncoords]]
        mylist.append(row)
    np.set_printoptions(linewidth=300, threshold=10000)
    return str(np.array(mylist))