def write_groups(groups, out_csv): headers = ['group', 'other_groups', 'align', 'consistent', 'sequence'] titles = groups[0]['peptides'][0]['attr'].keys() headers.extend(titles) rows = [headers] for group in groups: calc_alignment(group) check_misalignment(group) for peptide in group['peptides']: row = [] row.append(group['i']) other_groups = [] for i_group in peptide['groups']: if i_group != group['i']: other_groups.append(str(i_group)) if other_groups: other_groups = ';'.join(other_groups) else: other_groups = '' row.append(other_groups) row.append(group['alignment']) row.append(not group['misaligned']) row.append(peptide['sequence']) for title in titles: if title in peptide['attr']: row.append(peptide['attr'][title]) else: row.append('') row.append(' ') for c in peptide['aligned_sequence']: row.append(c) rows.append(row) rows.append([ group['i'], '', group['alignment'], not group['misaligned'], ]) datafile.write_csv(rows, out_csv)
def run( motif_csv, test_fasta, out_html, aa_probs, variant='fixed', alpha=1.0, beta=0.01): log_odds_pssm = get_motif_log_odds( motif_csv, aa_probs, alpha=alpha, beta=beta, variant=variant) write_log_odds(log_odds_pssm, motif_csv.replace('.pssm', '').replace('.csv', '.pssm')) n_position = len(log_odds_pssm.keys()) seqids, fastas = uniprot.read_fasta(test_fasta) saved_scores = [] html = "<body>" for seqid, entry in fastas.items(): # print "Sequence", seqid test_seq = entry['sequence'] scores = score_full_sequence(log_odds_pssm, test_seq) html += "<h1>%s</h1>" % entry['description'] html += make_html_for_seq_scores(test_seq, n_position, scores) saved_scores.append({ 'seqid': seqid, 'description': entry['description'], 'scores': scores, 'seq': test_seq }) html += "</body>" open(out_html, 'w').write(html) titles = ['seqid', 'description', 'position', 'seq', 'score'] rows = [titles] for entry in saved_scores: n_seq = len(entry['seq']) for i in range(0, n_seq - n_position): score = entry['scores'][i] pos = i + 1 if score > 0: row = [ entry['seqid'], entry['description'], pos, score, entry['seq'][i:i+n_position] ] rows.append(row) datafile.write_csv(rows, out_html + '.csv')
def write_overlap_kernels(groups, out_csv): headers = ['group', 'size_group', 'sequence'] rows = [headers] for group in groups: rows.append([ group['i'], len(group['peptides']), get_kernel(group) ]) datafile.write_csv(rows, out_csv)
def write_subset_kernels(groups, out_csv): headers = ['group', 'size_group', 'alignment', 'sequence'] rows = [headers] for group in groups: rows.append([ group['i'], len(group['peptides']), group['alignment'], group['shortest']['sequence'], ]) datafile.write_csv(rows, out_csv)
def make_csv(data, csv_fname): rows = [] header = ['seqid', 'description' 'n_res', 'DP', 'DQ', 'DR', 'all'] rows.append(header) for seqid in data: protein = data[seqid] n_res = protein['length'] sources = protein['sources'] n_source = len(sources) row = [seqid, protein['description']] coverage = [[0] * n_res for i in range(n_source + 1)] for i_source in range(n_source): source = sources[i_source] for i, j in source['intervals']: for k in range(i, j): coverage[i_source][k] = 1 coverage[-1][k] = 1 row.append(sum(coverage[i_source]) / float(n_res) * 100.0) row.append(sum(coverage[-1]) / float(n_res) * 100.0) rows.append(row) datafile.write_csv(rows, csv_fname)
left = -min(indices) max_len = 0 for index, seq in zip(indices, sequences): str_len = left + index + len(seq) if str_len > max_len: max_len = str_len for index, peptide in zip(indices, group['peptides']): row = [] row.append(group['i']) row.append(peptide['sequence']) row.append(peptide['modifications']) row.append(peptide['protein']) row.append(' ') seq = peptide['sequence'] s = ' ' * (left + index) + seq s = s + ' ' * (max_len - len(s)) for c in s: if c == ' ': row.append('.') else: row.append(c) rows.append(row) rows.append([]) datafile.write_csv(rows, 'cluster.csv')