Exemplos de decode em Python, exemplos de hud.decode em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: 20100623a.py Projeto: BIGtigr/xgcode

def process(line_sources):
    """
    @param line_sources: sources of line iterables
    """
    # get the headers and data from all of the input sources
    header_data_pairs = [hud.decode(lines) for lines in line_sources]
    header_list, data_list = zip(*header_data_pairs)
    # get the header to index map for each input source
    h_to_i_list = [Util.inverse_map(x) for x in header_list]
    # get the intersection of headers in all lists
    header_sets = [set(x) for x in header_list]
    header_intersection = set.intersection(*header_sets)
    # get the ordered list of all headers
    unique_headers = list(
        iterutils.unique_everseen(itertools.chain.from_iterable(header_list)))
    # get the ordered list of headers present in every input source
    out_headers = [h for h in unique_headers if h in header_intersection]
    out_data = []
    for h in out_headers:
        row = []
        for data, h_to_i in zip(data_list, h_to_i_list):
            if h in h_to_i:
                row.extend(data[h_to_i[h]])
        out_data.append(row)
    return hud.encode(out_headers, out_data) + '\n'

Exemplo n.º 2

0

Exibir arquivo

Arquivo: 20100608a.py Projeto: BIGtigr/xgcode

def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    # normalize the names of the isolates
    if args.clean_isolates:
        names = [Carbone.clean_isolate_element(x) for x in names]
    # get the pcs
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    # check for sufficient number of eigenvectors
    if len(pcs) < args.npcs:
        msg_a = 'the number of requested principal components '
        msg_b = 'must be no more than the number of OTUs'
        raise ValueError(msg_a + msg_b)
    # create the R frame
    headers = ['otu'] + ['pc%d' % (i + 1) for i in range(args.npcs)]
    print >> out, '\t'.join(headers)
    for i, name in enumerate(names):
        typed_row = [name] + [pcs[j][i] for j in range(args.npcs)]
        if args.add_indices:
            typed_row = [i + 1] + typed_row
        row = [str(x) for x in typed_row]
        print >> out, '\t'.join(row)
    return out.getvalue()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: 20100623a.py Projeto: argriffing/xgcode

def process(line_sources):
    """
    @param line_sources: sources of line iterables
    """
    # get the headers and data from all of the input sources
    header_data_pairs = [hud.decode(lines) for lines in line_sources]
    header_list, data_list = zip(*header_data_pairs)
    # get the header to index map for each input source
    h_to_i_list = [Util.inverse_map(x) for x in header_list]
    # get the intersection of headers in all lists
    header_sets = [set(x) for x in header_list]
    header_intersection = set.intersection(*header_sets)
    # get the ordered list of all headers
    unique_headers = list(iterutils.unique_everseen(
            itertools.chain.from_iterable(header_list)))
    # get the ordered list of headers present in every input source
    out_headers = [h for h in unique_headers if h in header_intersection]
    out_data = []
    for h in out_headers:
        row = []
        for data, h_to_i in zip(data_list, h_to_i_list):
            if h in h_to_i:
                row.extend(data[h_to_i[h]])
        out_data.append(row)
    return hud.encode(out_headers, out_data) + '\n'

Exemplo n.º 4

0

Exibir arquivo

Arquivo: 20100603e.py Projeto: BIGtigr/xgcode

def process(hud_lines, matpheno_lines):
    """
    @param hud_lines: lines of a .hud file
    @param matpheno_lines: lines of a MAT_pheno.txt file
    @return: contents of an .ind file
    """
    # get the ordered names from the .hud file
    names, hud_data = hud.decode(hud_lines)
    # get case and control status from the matpheno file
    cases = set()
    controls = set()
    for line in iterutils.stripped_lines(matpheno_lines):
        name, classification = line.split(None, 1)
        if classification == '1':
            cases.add(name)
        elif classification == '2':
            controls.add(name)
        elif classification in ('12', 'null'):
            # skip individuals classified like this
            pass
        else:
            msg = 'invalid MAT_pheno classification: ' + classification
            raise Exception(msg)
    # write the .ind file contents
    out = StringIO()
    for name in names:
        gender = 'U'
        classification = 'Ignore'
        if name in cases:
            classification = 'Case'
        elif name in controls:
            classification = 'Control'
        row = [name, gender, classification]
        print >> out, '\t'.join(row)
    return out.getvalue().rstrip()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: 20100524a.py Projeto: BIGtigr/xgcode

def process(args, raw_hud_lines, nseconds=2):
    nwords = args.nwords
    nchars = args.nchars
    names, data = hud.decode(raw_hud_lines)
    out = StringIO()
    if len(data) < nwords:
        msg = 'the number of OTUs is smaller than the desired sample'
        raise HandlingError(msg)
    if len(data[0]) < nchars:
        msg = 'the number of characters is smaller than the desired sample'
        raise HandlingError(msg)
    # create the matrix
    M = np.array(data)
    # select row and column indices
    row_indices, col_indices = get_selections(M, nwords, nchars, nseconds)
    sorted_row_indices = list(sorted(row_indices))
    sorted_col_indices = list(sorted(col_indices))
    # print the separation
    d = get_separation(M, row_indices, col_indices)
    print >> out, 'best separation:', d
    # print the index selections
    print >> out, 'selected row indices:', sorted_row_indices
    print >> out, 'selected column indices:', sorted_col_indices
    # print some selected values
    for i in sorted_row_indices:
        s = ' '.join(str(M[i, j]) for j in sorted_col_indices)
        print >> out, names[i] + '\t' + s
    return out.getvalue().rstrip()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: 20100608a.py Projeto: argriffing/xgcode

def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    # normalize the names of the isolates
    if args.clean_isolates:
        names = [Carbone.clean_isolate_element(x) for x in names]
    # get the pcs
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    # check for sufficient number of eigenvectors
    if len(pcs) < args.npcs:
        msg_a = 'the number of requested principal components '
        msg_b = 'must be no more than the number of OTUs'
        raise ValueError(msg_a + msg_b)
    # create the R frame
    headers = ['otu'] + ['pc%d' % (i+1) for i in range(args.npcs)]
    print >> out, '\t'.join(headers)
    for i, name in enumerate(names):
        typed_row = [name] + [pcs[j][i] for j in range(args.npcs)]
        if args.add_indices:
            typed_row = [i+1] + typed_row
        row = [str(x) for x in typed_row]
        print >> out, '\t'.join(row)
    return out.getvalue()

Exemplo n.º 7

0

Exibir arquivo

def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    axis_index = args.axis - 1
    # check for sufficient number of eigenvectors
    if axis_index >= len(pcs):
        msg = 'the requested axis is not available'
        raise ValueError(msg)
    # compute the correlation of each SNP vector the requested PC
    pc = pcs[axis_index]
    corrs = [mycorr(snp, pc) for snp in C_full.T]
    sqcorrs = [mycorr(snp, pc)**2 for snp in C_full.T]
    if args.rank_squared:
        keys = sqcorrs
    else:
        keys = corrs
    corr_index_pairs = [(cor, i) for i, cor in enumerate(keys)]
    sorted_pairs = list(reversed(sorted(corr_index_pairs)))
    indices = zip(*sorted_pairs)[1]
    if args.locus_from_1:
        nominal_indices = [i+1 for i in indices]
    else:
        nominal_indices = indices
    rows = [(nom_i, corrs[i]) for i, nom_i in zip(indices, nominal_indices)]
    lines = ['\t'.join(str(x) for x in row) for row in rows]
    return '\n'.join(lines) + '\n'

Exemplo n.º 8

0

Exibir arquivo

Arquivo: 20100608b.py Projeto: argriffing/xgcode

def do_pca(hud_lines):
    """
    @param hud_lines: lines of a .hud file
    @return: names, scaled vectors
    """
    # get the ordered names from the .hud file
    names, data = hud.decode(hud_lines)
    # create the floating point count matrix
    C_full = np.array(data)
    m_full, n_full = C_full.shape
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v))>1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u) / np.sqrt(u * (1 - u))
    # construct the sample covariance matrix
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    evals, evecs = EigUtil.eigh(X)
    # scale the eigenvectos by the eigenvalues
    pcs = [w*v for w, v in zip(evals, evecs)]
    return names, pcs

Exemplo n.º 9

0

Exibir arquivo

def do_pca(hud_lines):
    """
    @param hud_lines: lines of a .hud file
    @return: names, scaled vectors
    """
    # get the ordered names from the .hud file
    names, data = hud.decode(hud_lines)
    # create the floating point count matrix
    C_full = np.array(data)
    m_full, n_full = C_full.shape
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v)) > 1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u) / np.sqrt(u * (1 - u))
    # construct the sample covariance matrix
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    evals, evecs = EigUtil.eigh(X)
    # scale the eigenvectos by the eigenvalues
    pcs = [w * v for w, v in zip(evals, evecs)]
    return names, pcs

Exemplo n.º 10

0

Exibir arquivo

Arquivo: 20100916a.py Projeto: argriffing/xgcode

def get_response_content(fs):
    headers, data_rows = hud.decode(fs.table.splitlines())
    rtable_header_line = '\t'.join(headers)
    rows = []
    for i, row in enumerate(zip(*data_rows)):
        rows.append([i] + list(row))
    rtable_data_lines = ['\t'.join(str(x) for x in row) for row in rows]
    return '\n'.join([rtable_header_line] + rtable_data_lines) + '\n'

Exemplo n.º 11

0

Exibir arquivo

Arquivo: 20100809a.py Projeto: argriffing/xgcode

def get_response_content(fs):
    headers, data_rows = hud.decode(fs.table.splitlines())
    data_transpose = zip(*data_rows)
    out = StringIO()
    print >> out, ' '.join(headers)
    for row in data_transpose:
        print >> out, ' '.join(str(x) for x in row)
    return out.getvalue()

Exemplo n.º 12

0

Exibir arquivo

Arquivo: 20100916a.py Projeto: BIGtigr/xgcode

def get_response_content(fs):
    headers, data_rows = hud.decode(fs.table.splitlines())
    rtable_header_line = '\t'.join(headers)
    rows = []
    for i, row in enumerate(zip(*data_rows)):
        rows.append([i] + list(row))
    rtable_data_lines = ['\t'.join(str(x) for x in row) for row in rows]
    return '\n'.join([rtable_header_line] + rtable_data_lines) + '\n'

Exemplo n.º 13

0

Exibir arquivo

Arquivo: 20100809b.py Projeto: BIGtigr/xgcode

def get_response_content(fs):
    headers, data_rows = hud.decode(fs.table.splitlines())
    validate_diploid_data_rows(data_rows)
    nheaders = len(headers)
    D = np.zeros((nheaders, nheaders))
    for i in range(nheaders):
        for j in range(nheaders):
            ri = np.array(data_rows[i])
            rj = np.array(data_rows[j])
            D[i, j] = np.mean(np.abs(rj - ri))
    return '\n'.join('\t'.join(str(x) for x in r) for r in D)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: 20100809b.py Projeto: argriffing/xgcode

def get_response_content(fs):
    headers, data_rows = hud.decode(fs.table.splitlines())
    validate_diploid_data_rows(data_rows)
    nheaders = len(headers)
    D = np.zeros((nheaders, nheaders))
    for i in range(nheaders):
        for j in range(nheaders):
            ri = np.array(data_rows[i])
            rj = np.array(data_rows[j])
            D[i, j] = np.mean(np.abs(rj - ri))
    return "\n".join("\t".join(str(x) for x in r) for r in D)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: 20100525a.py Projeto: argriffing/xgcode

def process(lines):
    """
    @param lines: lines of a .hud file
    """
    names, data = hud.decode(lines)
    out = StringIO()
    for i, genotype in enumerate(data[0]):
        name = 'SNP_%d' % i
        chromosome = '1'
        morgans = '0.0'
        bases = i+1
        row = [name, chromosome, morgans, bases]
        print >> out, '\t'.join(str(x) for x in row)
    return out.getvalue().rstrip()

Exemplo n.º 16

0

Exibir arquivo

def process(lines):
    """
    @param lines: lines of a .hud file
    """
    names, data = hud.decode(lines)
    out = StringIO()
    for i, genotype in enumerate(data[0]):
        name = 'SNP_%d' % i
        chromosome = '1'
        morgans = '0.0'
        bases = i + 1
        row = [name, chromosome, morgans, bases]
        print >> out, '\t'.join(str(x) for x in row)
    return out.getvalue().rstrip()

Exemplo n.º 17

0

Exibir arquivo

Arquivo: 20100731c.py Projeto: BIGtigr/xgcode

def get_response_content(fs):
    # get the headers and data from all of the input sources
    headers, sequences = hud.decode(fs.hud.splitlines())
    h_to_s = dict((h, s) for h, s in zip(headers, sequences))
    headers_out = []
    sequences_out = []
    for p, hs in process_headers(headers):
        headers_out.append(p)
        data = np.vstack(h_to_s[h] for h in hs).sum(axis=0)
        if fs.combine_exist:
            data = np.minimum(1, data)
        sequences_out.append(data)
    if fs.remove_invariant:
        sequences_out = remove_invariant_columns(sequences_out)
    return hud.encode(headers_out, sequences_out) + '\n'

Exemplo n.º 18

0

Exibir arquivo

Arquivo: 20100731c.py Projeto: argriffing/xgcode

def get_response_content(fs):
    # get the headers and data from all of the input sources
    headers, sequences = hud.decode(fs.hud.splitlines())
    h_to_s = dict((h, s) for h, s in zip(headers, sequences))
    headers_out = []
    sequences_out = []
    for p, hs in process_headers(headers):
        headers_out.append(p)
        data = np.vstack(h_to_s[h] for h in hs).sum(axis=0)
        if fs.combine_exist:
            data = np.minimum(1, data)
        sequences_out.append(data)
    if fs.remove_invariant:
        sequences_out = remove_invariant_columns(sequences_out)
    return hud.encode(headers_out, sequences_out) + '\n'

Exemplo n.º 19

0

Exibir arquivo

Arquivo: 20100601a.py Projeto: argriffing/xgcode

def get_response_content(fs):
    out = StringIO()
    # extract names from the .hud file
    names, hud_data = hud.decode(fs.hud.splitlines())
    # read the csv file
    rows = list(csv.reader(Util.get_stripped_lines(fs.info.splitlines())))
    header, data_rows = rows[0], rows[1:]
    cases, controls = get_precipitation_info(data_rows, fs.threshold)
    # write the .ind file contents
    for name in names:
        gender = 'U'
        classification = 'Ignore'
        if name in cases:
            classification = 'Case'
        elif name in controls:
            classification = 'Control'
        row = [name, gender, classification]
        print >> out, '\t'.join(row)
    return out.getvalue()

Exemplo n.º 20

0

Exibir arquivo

Arquivo: 20100603a.py Projeto: BIGtigr/xgcode

def get_response_content(fs):
    out = StringIO()
    # extract name order from the .hud file
    names, hud_data = hud.decode(fs.hud.splitlines())
    # read the csv file
    rows = list(csv.reader(Util.get_stripped_lines(fs.info.splitlines())))
    header, data_rows = rows[0], rows[1:]
    cases, controls = get_temperature_info(data_rows, fs.threshold)
    # write the .ind file contents
    for name in names:
        gender = 'U'
        classification = 'Ignore'
        if name in cases:
            classification = 'Case'
        elif name in controls:
            classification = 'Control'
        row = [name, gender, classification]
        print >> out, '\t'.join(row)
    return out.getvalue()

Exemplo n.º 21

0

Exibir arquivo

Arquivo: 20100820a.py Projeto: BIGtigr/xgcode

def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    # check for sufficient number of eigenvectors
    if len(pcs) < args.ncoords:
        raise ValueError('the number of requested principal components '
                         'must be no more than the number of OTUs')
    # compute the correlation of each SNP vector with each principal PC
    mylist = []
    for snp in C_full.T:
        row = [mycorr(snp, pc) for pc in pcs[:args.ncoords]]
        mylist.append(row)
    np.set_printoptions(linewidth=300, threshold=10000)
    return str(np.array(mylist))

Exemplo n.º 22

0

Exibir arquivo

Arquivo: 20100820a.py Projeto: argriffing/xgcode

def process(args, raw_hud_lines):
    """
    @param args: user options from the web or cmdline
    @param hud_lines: raw lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    names, data = hud.decode(raw_hud_lines)
    C_full = np.array(data, dtype=float)
    pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic)
    # check for sufficient number of eigenvectors
    if len(pcs) < args.ncoords:
        raise ValueError(
                'the number of requested principal components '
                'must be no more than the number of OTUs')
    # compute the correlation of each SNP vector with each principal PC
    mylist = []
    for snp in C_full.T:
        row = [mycorr(snp, pc) for pc in pcs[:args.ncoords]]
        mylist.append(row)
    np.set_printoptions(linewidth=300, threshold=10000)
    return str(np.array(mylist))

Exemplo n.º 23

0

Exibir arquivo

Arquivo: 20100603b.py Projeto: argriffing/xgcode

def process(hud_lines, info_lines, location):
    """
    @param hud_lines: lines of a .hud file
    @param info_lines: lines of a phenotype .csv file
    @param location: the control location string
    """
    out = StringIO()
    # extract name order from the .hud file
    names, hud_data = hud.decode(hud_lines)
    # read the csv file
    rows = list(csv.reader(info_lines))
    header, data_rows = rows[0], rows[1:]
    cases, controls = get_location_info(data_rows, location)
    # write the .ind file contents
    for name in names:
        gender = 'U'
        classification = 'Ignore'
        if name in cases:
            classification = 'Case'
        elif name in controls:
            classification = 'Control'
        row = [name, gender, classification]
        print >> out, '\t'.join(row)
    return out.getvalue().rstrip()

Exemplo n.º 24

0

Exibir arquivo

Arquivo: 20100603b.py Projeto: BIGtigr/xgcode

def process(hud_lines, info_lines, location):
    """
    @param hud_lines: lines of a .hud file
    @param info_lines: lines of a phenotype .csv file
    @param location: the control location string
    """
    out = StringIO()
    # extract name order from the .hud file
    names, hud_data = hud.decode(hud_lines)
    # read the csv file
    rows = list(csv.reader(info_lines))
    header, data_rows = rows[0], rows[1:]
    cases, controls = get_location_info(data_rows, location)
    # write the .ind file contents
    for name in names:
        gender = 'U'
        classification = 'Ignore'
        if name in cases:
            classification = 'Case'
        elif name in controls:
            classification = 'Control'
        row = [name, gender, classification]
        print >> out, '\t'.join(row)
    return out.getvalue().rstrip()

Exemplo n.º 25

0

Exibir arquivo

def process(args, hud_lines):
    """
    @param hud_lines: lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    # get the ordered names from the .hud file
    names, data = hud.decode(hud_lines)
    # create the floating point count matrix
    C_full = np.array(data)
    m_full, n_full = C_full.shape
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v)) > 1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u) / np.sqrt(u * (1 - u))
    # construct the sample covariance matrix
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    evals, evecs = EigUtil.eigh(X)
    L1 = evals.sum()
    L2 = np.dot(evals, evals)
    proportion = evals[0] / L1
    # compute the relative size of the first eigenvalue
    L = m * proportion
    # compute the Tracy-Widom statistic
    x = get_tracy_widom_statistic(m, n, L)
    # do linkage correction
    n_prime = ((m + 1) * L1 * L1) / ((m - 1) * L2 - L1 * L1)
    # detect additional structure using alpha level of 0.05
    crit = 0.9794
    if n_prime < n:
        L_prime = (m - 1) * proportion
        x_prime = get_tracy_widom_statistic(m, n_prime, L_prime)
        sigs, insig = get_corrected_structure(crit, evals, m, n_prime)
    else:
        sigs, insig = get_corrected_structure(crit, evals, m, n)
    # print some infos
    print >> out, 'number of isolates:'
    print >> out, m_full
    print >> out
    print >> out, 'total number of SNPs:'
    print >> out, n_full
    print >> out
    print >> out, 'number of informative SNPs:'
    print >> out, n
    print >> out
    print >> out, 'effective number of linkage-corrected SNPs:'
    if n_prime < n:
        print >> out, n_prime
    else:
        print >> out, '[sample is too degenerate for estimation]'
    print >> out
    print >> out, 'Tracy-Widom statistic (linkage-naive):'
    print >> out, x
    print >> out
    print >> out, 'Tracy-Widom statistic (linkage-corrected):'
    if n_prime < n:
        print >> out, x_prime
    else:
        print >> out, '[sample is too degenerate for estimation]'
    print >> out
    print >> out, 'proportion of variance explained by principal axis:'
    print >> out, proportion
    print >> out
    print >> out, 'number of significant axes of variation:'
    print >> out, len(sigs)
    print >> out
    print >> out, 'significant Tracy-Widom statistics:'
    for sig in sigs:
        print >> out, sig
    print >> out
    print >> out, 'first insignificant Tracy-Widom statistic:'
    print >> out, insig
    print >> out
    print >> out, 'principal axis projection:'
    for loading, name in sorted(zip(evecs[0] * evals[0], names)):
        print >> out, '\t'.join([name, str(loading)])
    print >> out
    # evals should sum to the number of OTUs
    evals_sum = sum(evals)
    if args.sum_to_n:
        print >> out, 'eigenvalues normalized to sum to the number of OTUs:'
        for w in evals:
            print >> out, m_full * w / float(evals_sum)
    elif args.sum_to_1:
        print >> out, 'eigenvalues normalized to sum to 1.0:'
        for w in evals:
            print >> out, w / float(evals_sum)
    return out.getvalue().rstrip()

Exemplo n.º 26

0

Exibir arquivo

Arquivo: 20100607a.py Projeto: argriffing/xgcode

def process(args, hud_lines):
    """
    @param hud_lines: lines of a .hud file
    @return: results in convenient text form
    """
    out = StringIO()
    # get the ordered names from the .hud file
    names, data = hud.decode(hud_lines)
    # create the floating point count matrix
    C_full = np.array(data)
    m_full, n_full = C_full.shape
    # remove invariant columns
    C = np.vstack([v for v in C_full.T if len(set(v))>1]).T
    # get the shape of the matrix
    m, n = C.shape
    # get the column means
    u = C.mean(axis=0)
    # get the centered and normalized counts matrix
    M = (C - u) / np.sqrt(u * (1 - u))
    # construct the sample covariance matrix
    X = np.dot(M, M.T) / n
    # get the eigendecomposition of the covariance matrix
    evals, evecs = EigUtil.eigh(X)
    L1 = evals.sum()
    L2 = np.dot(evals, evals)
    proportion = evals[0] / L1
    # compute the relative size of the first eigenvalue
    L = m*proportion
    # compute the Tracy-Widom statistic
    x = get_tracy_widom_statistic(m, n, L)
    # do linkage correction
    n_prime = ((m+1)*L1*L1) / ((m-1)*L2 - L1*L1)
    # detect additional structure using alpha level of 0.05
    crit = 0.9794
    if n_prime < n:
        L_prime = (m-1)*proportion
        x_prime = get_tracy_widom_statistic(m, n_prime, L_prime)
        sigs, insig = get_corrected_structure(crit, evals, m, n_prime)
    else:
        sigs, insig = get_corrected_structure(crit, evals, m, n)
    # print some infos
    print >> out, 'number of isolates:'
    print >> out, m_full
    print >> out
    print >> out, 'total number of SNPs:'
    print >> out, n_full
    print >> out
    print >> out, 'number of informative SNPs:'
    print >> out, n
    print >> out
    print >> out, 'effective number of linkage-corrected SNPs:'
    if n_prime < n:
        print >> out, n_prime
    else:
        print >> out, '[sample is too degenerate for estimation]'
    print >> out
    print >> out, 'Tracy-Widom statistic (linkage-naive):'
    print >> out, x
    print >> out
    print >> out, 'Tracy-Widom statistic (linkage-corrected):'
    if n_prime < n:
        print >> out, x_prime
    else:
        print >> out, '[sample is too degenerate for estimation]'
    print >> out
    print >> out, 'proportion of variance explained by principal axis:'
    print >> out, proportion
    print >> out
    print >> out, 'number of significant axes of variation:'
    print >> out, len(sigs)
    print >> out
    print >> out, 'significant Tracy-Widom statistics:'
    for sig in sigs:
        print >> out, sig
    print >> out
    print >> out, 'first insignificant Tracy-Widom statistic:'
    print >> out, insig
    print >> out
    print >> out, 'principal axis projection:'
    for loading, name in sorted(zip(evecs[0] * evals[0], names)):
        print >> out, '\t'.join([name, str(loading)])
    print >> out
    # evals should sum to the number of OTUs
    evals_sum = sum(evals)
    if args.sum_to_n:
        print >> out, 'eigenvalues normalized to sum to the number of OTUs:'
        for w in evals:
            print >> out, m_full * w / float(evals_sum)
    elif args.sum_to_1:
        print >> out, 'eigenvalues normalized to sum to 1.0:'
        for w in evals:
            print >> out, w / float(evals_sum)
    return out.getvalue().rstrip()

Exemplo n.º 27

0

Exibir arquivo

Arquivo: 20100823a.py Projeto: argriffing/xgcode

def get_response_content(fs):
    headers, data_rows = hud.decode(fs.table.splitlines())
    sequences = [''.join(str(x) for x in row) for row in data_rows]
    return Phylip.encode(headers, sequences)

Exemplo n.º 28

0

Exibir arquivo

Arquivo: 20100603c.py Projeto: BIGtigr/xgcode

def process(raw_hud_lines):
    names, data = hud.decode(raw_hud_lines)
    columns = zip(*data)
    return '\n'.join(''.join(str(x) for x in c) for c in columns)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: 20100603c.py Projeto: argriffing/xgcode

def process(raw_hud_lines):
    names, data = hud.decode(raw_hud_lines)
    columns = zip(*data)
    return '\n'.join(''.join(str(x) for x in c) for c in columns)

Exemplo n.º 30

0

Exibir arquivo

def get_response_content(fs):
    headers, data_rows = hud.decode(fs.table.splitlines())
    sequences = [''.join(str(x) for x in row) for row in data_rows]
    return Phylip.encode(headers, sequences)