def run_group_alignment (sequence_group): seqrecords = [] for seq_id in sequence_group: seqrecords.append(Bio.SeqRecord.SeqRecord (Bio.Seq.Seq(sequence_group[seq_id].replace ('-',''), Bio.Alphabet.IUPAC.IUPACAmbiguousDNA), id = seq_id, name = seq_id, description = '')) if len (seqrecords) == 1: refseq = seqrecords[0].format ('fasta') return {'ref': refseq, 'alignment': refseq, 'seqs': seqrecords} # find the longest sequence seq_lengths = [len(record.seq) for record in seqrecords] refseq = seqrecords[seq_lengths.index(max(seq_lengths))] #print ('Ref set to %s' % refseq.id) sm = BLOSUM62.load() msa, discarded = align_to_refseq( refseq, seqrecords, score_matrix=sm, codon=True, expected_identity=0.6, keep_insertions=False ) string_buffer = io.StringIO () Bio.SeqIO.write (msa, string_buffer, "fasta") all_lines = string_buffer.getvalue() string_buffer.close() return {'ref': refseq.format ('fasta'), 'alignment': all_lines, 'seqs': seqrecords}
def run_group_alignment (seqrecords, refseqs): sm = BLOSUM62.load() seq_scores = {'names':{}} sequence_names = {} for seq in seqrecords: sequence_names[seq.id] = len (sequence_names) seqrecords.insert (0, 0) for type in refseqs: seq_scores [type] = {} seq_scores['names'][type] = [] for seq in refseqs[type]: seqrecords[0] = seq seq_scores['names'][type].append (seq.id) msa, discarded = align_to_refseq( seq, seqrecords, score_matrix=sm, codon=True, expected_identity=0.4, keep_insertions=True, quiet=True ) if (discarded): print (discarded) max_score = msa[0].annotations['_pbpscore'] max_len = msa[0].annotations['_nbpidentical'] seq_scores[type][seq.id] = [None for idx in range (len (sequence_names))] msa.pop (0) for successful in msa: idx = sequence_names [successful.id] seq_scores[type][seq.id][idx] = (successful.annotations['_pbpscore']/max_score+successful.annotations['_nbpidentical']/max_len)*0.5 return seq_scores
def run_group_alignment(seqrecords, refseqs): sm = BLOSUM62.load() seq_scores = {'names': {}} sequence_names = {} for seq in seqrecords: sequence_names[seq.id] = len(sequence_names) seqrecords.insert(0, 0) for type in refseqs: seq_scores[type] = {} seq_scores['names'][type] = [] for seq in refseqs[type]: seqrecords[0] = seq seq_scores['names'][type].append(seq.id) msa, discarded = align_to_refseq(seq, seqrecords, score_matrix=sm, codon=True, expected_identity=0.4, keep_insertions=True, quiet=True) if (discarded): print(discarded) max_score = msa[0].annotations['_pbpscore'] max_len = msa[0].annotations['_nbpidentical'] seq_scores[type][seq.id] = [ None for idx in range(len(sequence_names)) ] msa.pop(0) for successful in msa: idx = sequence_names[successful.id] seq_scores[type][seq.id][idx] = ( successful.annotations['_pbpscore'] / max_score + successful.annotations['_nbpidentical'] / max_len) * 0.5 return seq_scores