Пример #1
0
def write_groups(groups, out_csv):

    headers = ['group', 'other_groups', 'align', 'consistent', 'sequence']

    titles = groups[0]['peptides'][0]['attr'].keys()

    headers.extend(titles)

    rows = [headers]

    for group in groups:

        calc_alignment(group)

        check_misalignment(group)

        for peptide in group['peptides']:
            row = []

            row.append(group['i'])

            other_groups = []
            for i_group in peptide['groups']:
                if i_group != group['i']:
                    other_groups.append(str(i_group))

            if other_groups:
                other_groups = ';'.join(other_groups)
            else:
                other_groups = ''

            row.append(other_groups)

            row.append(group['alignment'])
            row.append(not group['misaligned'])

            row.append(peptide['sequence'])

            for title in titles:
                if title in peptide['attr']:
                    row.append(peptide['attr'][title])
                else:
                    row.append('')

            row.append(' ')

            for c in peptide['aligned_sequence']:
                row.append(c)

            rows.append(row)

        rows.append([
            group['i'],
            '',
            group['alignment'],
            not group['misaligned'],
        ])

    datafile.write_csv(rows, out_csv)
Пример #2
0
def run(
    motif_csv, test_fasta, out_html, aa_probs,
    variant='fixed', 
    alpha=1.0, beta=0.01):

    log_odds_pssm = get_motif_log_odds(
        motif_csv, aa_probs, 
        alpha=alpha, beta=beta, 
        variant=variant)

    write_log_odds(log_odds_pssm, motif_csv.replace('.pssm', '').replace('.csv', '.pssm'))

    n_position = len(log_odds_pssm.keys())

    seqids, fastas = uniprot.read_fasta(test_fasta)

    saved_scores = []

    html = "<body>"
    for seqid, entry in fastas.items():
        # print "Sequence", seqid
        test_seq = entry['sequence']
        scores = score_full_sequence(log_odds_pssm, test_seq)
        html += "<h1>%s</h1>" % entry['description']
        html += make_html_for_seq_scores(test_seq, n_position, scores)
        saved_scores.append({
            'seqid': seqid,
            'description': entry['description'],
            'scores': scores,
            'seq': test_seq
        })
    html += "</body>"
    open(out_html, 'w').write(html)

    titles = ['seqid', 'description', 'position', 'seq', 'score']
    rows = [titles]
    for entry in saved_scores:
        n_seq = len(entry['seq'])
        for i in range(0, n_seq - n_position):
            score = entry['scores'][i]
            pos = i + 1
            if score > 0:
                row = [
                    entry['seqid'], 
                    entry['description'],
                    pos, 
                    score, 
                    entry['seq'][i:i+n_position]
                ]
                rows.append(row)
    datafile.write_csv(rows, out_html + '.csv')
Пример #3
0
def write_overlap_kernels(groups, out_csv):

    headers = ['group', 'size_group', 'sequence']

    rows = [headers]

    for group in groups:
        rows.append([
            group['i'],
            len(group['peptides']),
            get_kernel(group)
        ])

    datafile.write_csv(rows, out_csv)
Пример #4
0
def write_subset_kernels(groups, out_csv):

    headers = ['group', 'size_group', 'alignment', 'sequence']

    rows = [headers]

    for group in groups:
        rows.append([
            group['i'],
            len(group['peptides']),
            group['alignment'],
            group['shortest']['sequence'],
        ])

    datafile.write_csv(rows, out_csv)
Пример #5
0
def make_csv(data, csv_fname):
    rows = []
    header = ['seqid', 'description' 'n_res', 'DP', 'DQ', 'DR', 'all']
    rows.append(header)
    for seqid in data:
        protein = data[seqid]
        n_res = protein['length']
        sources = protein['sources']
        n_source = len(sources)

        row = [seqid, protein['description']]
        coverage = [[0] * n_res for i in range(n_source + 1)]
        for i_source in range(n_source):
            source = sources[i_source]
            for i, j in source['intervals']:
                for k in range(i, j):
                    coverage[i_source][k] = 1
                    coverage[-1][k] = 1
            row.append(sum(coverage[i_source]) / float(n_res) * 100.0)
        row.append(sum(coverage[-1]) / float(n_res) * 100.0)
        rows.append(row)
    datafile.write_csv(rows, csv_fname)
Пример #6
0
    left = -min(indices)
    max_len = 0
    for index, seq in zip(indices, sequences):
        str_len = left + index + len(seq)
        if str_len > max_len:
            max_len = str_len

    for index, peptide in zip(indices, group['peptides']):
        row = []

        row.append(group['i'])
        row.append(peptide['sequence'])
        row.append(peptide['modifications'])
        row.append(peptide['protein'])

        row.append(' ')

        seq = peptide['sequence']
        s = ' ' * (left + index) + seq
        s = s + ' ' * (max_len - len(s))
        for c in s:
            if c == ' ':
                row.append('.')
            else:
                row.append(c)
        rows.append(row)

    rows.append([])

datafile.write_csv(rows, 'cluster.csv')
Пример #7
0
    left = -min(indices)
    max_len = 0
    for index, seq in zip(indices, sequences):
        str_len = left + index + len(seq)
        if str_len > max_len:
            max_len = str_len

    for index, peptide in zip(indices, group['peptides']):
        row = []

        row.append(group['i'])
        row.append(peptide['sequence'])
        row.append(peptide['modifications'])
        row.append(peptide['protein'])

        row.append(' ')

        seq = peptide['sequence']
        s = ' ' * (left + index) + seq
        s = s + ' ' * (max_len - len(s))
        for c in s:
            if c == ' ':
                row.append('.')
            else:
                row.append(c)
        rows.append(row)

    rows.append([])

datafile.write_csv(rows, 'cluster.csv')