def get_alignment(cls, seq1: str, seq2: str, local: bool = True): """ Generate an alignment between two sequences Parameters ---------- seq1: str The first sequence to be aligned seq1: str The second sequence to be aligned local: bool If false, a global alignment is performed (based on the Needleman-Wunsch algorithm), otherwise a local alignment is performed (based on the Smith–Waterman algorithm). (Default: True) Returns ------- Alignment """ import biotite.sequence as seq import biotite.sequence.align as align import numpy as np # create the default matrix # TODO add more options for the choice of matrix matrix = align.SubstitutionMatrix.std_protein_matrix() alignments = align.align_optimal( seq.ProteinSequence(seq1), seq.ProteinSequence(seq2), matrix, local=local, ) alignment = alignments[0] score = alignment.score seq_identity = align.get_sequence_identity(alignment) symbols = align.get_symbols(alignment) codes = align.get_codes(alignment) return cls( alignment=alignment, metadata={ "score": score, "sequence_identity": seq_identity, "symbols": symbols, "codes": codes, }, )
def test_conversion_to_symbols(): """ Test conversion of alignments to strings. """ seq_str1 = "HAKLPRDD--WKL--" seq_str2 = "HA--PRDDADWKLHH" seq_str3 = "HA----DDADWKLHH" seq_strings = [seq_str1, seq_str2, seq_str3] sequences = [seq.ProteinSequence(seq_str.replace("-","")) for seq_str in seq_strings] trace = align.Alignment.trace_from_strings(seq_strings) alignment = align.Alignment(sequences, trace, score=None) # Test the conversion bach to strings of symbols symbols = align.get_symbols(alignment) symbols = ["".join([sym if sym is not None else "-" for sym in sym_list]) for sym_list in symbols] assert symbols == seq_strings
######################################################################## # If you are interested in more advanced visualization examples, have a # look at the :doc:`example gallery <../examples/gallery/index>`. # # You can also do some simple analysis on these objects, like # determining the sequence identity or calculating the score. # For further custom analysis, it can be convenient to have directly the # aligned symbos codes instead of the trace. alignment = alignments[0] print("Score: ", alignment.score) print("Recalculated score:", align.score(alignment, matrix=matrix)) print("Sequence identity:", align.get_sequence_identity(alignment)) print("Symbols:") print(align.get_symbols(alignment)) print("symbols codes:") print(align.get_codes(alignment)) ######################################################################## # # .. currentmodule:: biotite.sequence.io.fasta # # You may ask, why should you recalculate the score, when the score has # already been directly calculated via :func:`align_optimal()`. # The answer is, that you might load an alignment from an external # alignment program as FASTA file using :func:`get_alignment()`. # # .. currentmodule:: biotite.sequence.align # # If you want to perform a multiple sequence alignment, have a look at