示例#1
0
    def test_local_pairwise_align_protein(self):
        obs_msa, obs_score, obs_start_end = local_pairwise_align_protein(
            Protein("HEAGAWGHEE"),
            Protein("PAWHEAE"),
            gap_open_penalty=10.,
            gap_extend_penalty=5.)

        self.assertEqual(obs_msa,
                         TabularMSA([Protein("AWGHE"),
                                     Protein("AW-HE")]))
        self.assertEqual(obs_score, 26.0)
        self.assertEqual(obs_start_end, [(4, 8), (1, 4)])

        obs_msa, obs_score, obs_start_end = local_pairwise_align_protein(
            Protein("HEAGAWGHEE"),
            Protein("PAWHEAE"),
            gap_open_penalty=5.,
            gap_extend_penalty=0.5)

        self.assertEqual(obs_msa,
                         TabularMSA([Protein("AWGHE-E"),
                                     Protein("AW-HEAE")]))
        self.assertEqual(obs_score, 32.0)
        self.assertEqual(obs_start_end, [(4, 9), (1, 6)])

        # Protein sequences with metadata
        obs_msa, obs_score, obs_start_end = local_pairwise_align_protein(
            Protein("HEAGAWGHEE", metadata={'id': "s1"}),
            Protein("PAWHEAE", metadata={'id': "s2"}),
            gap_open_penalty=10.,
            gap_extend_penalty=5.)

        self.assertEqual(obs_msa,
                         TabularMSA([Protein("AWGHE"),
                                     Protein("AW-HE")]))
        self.assertEqual(obs_score, 26.0)
        self.assertEqual(obs_start_end, [(4, 8), (1, 4)])

        # Fails when either input is passed as a TabularMSA
        self.assertRaises(TypeError,
                          local_pairwise_align_protein,
                          TabularMSA(
                              [Protein("HEAGAWGHEE", metadata={'id': "s1"})]),
                          Protein("PAWHEAE", metadata={'id': "s2"}),
                          gap_open_penalty=10.,
                          gap_extend_penalty=5.)
        self.assertRaises(TypeError,
                          local_pairwise_align_protein,
                          Protein("HEAGAWGHEE", metadata={'id': "s1"}),
                          TabularMSA(
                              [Protein("PAWHEAE", metadata={'id': "s2"})]),
                          gap_open_penalty=10.,
                          gap_extend_penalty=5.)

        # TypeError on invalid input
        self.assertRaises(TypeError, local_pairwise_align_protein, 42,
                          Protein("HEAGAWGHEE"))
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          Protein("HEAGAWGHEE"), 42)
示例#2
0
    def test_local_pairwise_align_protein(self):
        expected = ("AWGHE", "AW-HE", 26.0, 4, 1)
        actual = local_pairwise_align_protein(
            "HEAGAWGHEE", "PAWHEAE", gap_open_penalty=10.,
            gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(4, 8), (1, 4)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("AWGHE-E", "AW-HEAE", 32.0, 4, 1)
        actual = local_pairwise_align_protein(
            "HEAGAWGHEE", "PAWHEAE", gap_open_penalty=5.,
            gap_extend_penalty=0.5)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(4, 9), (1, 6)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("AWGHE", "AW-HE", 26.0, 4, 1)
        # Protein (rather than str) as input
        actual = local_pairwise_align_protein(
            Protein("HEAGAWGHEE", metadata={'id': "s1"}),
            Protein("PAWHEAE", metadata={'id': "s2"}),
            gap_open_penalty=10., gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(4, 8), (1, 4)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # Fails when either input is passed as an Alignment
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          Alignment([Protein("HEAGAWGHEE",
                                             metadata={'id': "s1"})]),
                          Protein("PAWHEAE", metadata={'id': "s2"}),
                          gap_open_penalty=10.,
                          gap_extend_penalty=5.)
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          Protein("HEAGAWGHEE", metadata={'id': "s1"}),
                          Alignment([Protein("PAWHEAE",
                                             metadata={'id': "s2"})]),
                          gap_open_penalty=10., gap_extend_penalty=5.)

        # ids are provided if they're not passed in
        actual = local_pairwise_align_protein(
            Protein("HEAGAWGHEE"),
            Protein("PAWHEAE"),
            gap_open_penalty=10., gap_extend_penalty=5.)
        self.assertEqual(actual.ids(), list('01'))

        # TypeError on invalid input
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          42, "HEAGAWGHEE")
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          "HEAGAWGHEE", 42)
示例#3
0
    def test_local_pairwise_align_protein(self):
        obs_msa, obs_score, obs_start_end = local_pairwise_align_protein(
            Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10.,
            gap_extend_penalty=5.)

        self.assertEqual(obs_msa, TabularMSA([Protein("AWGHE"),
                                              Protein("AW-HE")]))
        self.assertEqual(obs_score, 26.0)
        self.assertEqual(obs_start_end, [(4, 8), (1, 4)])

        obs_msa, obs_score, obs_start_end = local_pairwise_align_protein(
            Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=5.,
            gap_extend_penalty=0.5)

        self.assertEqual(obs_msa, TabularMSA([Protein("AWGHE-E"),
                                              Protein("AW-HEAE")]))
        self.assertEqual(obs_score, 32.0)
        self.assertEqual(obs_start_end, [(4, 9), (1, 6)])

        # Protein sequences with metadata
        obs_msa, obs_score, obs_start_end = local_pairwise_align_protein(
            Protein("HEAGAWGHEE", metadata={'id': "s1"}),
            Protein("PAWHEAE", metadata={'id': "s2"}),
            gap_open_penalty=10., gap_extend_penalty=5.)

        self.assertEqual(
            obs_msa, TabularMSA([Protein("AWGHE", metadata={'id': "s1"}),
                                 Protein("AW-HE", metadata={'id': "s2"})]))

        self.assertEqual(obs_score, 26.0)
        self.assertEqual(obs_start_end, [(4, 8), (1, 4)])

        # Fails when either input is passed as a TabularMSA
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          TabularMSA([Protein("HEAGAWGHEE",
                                      metadata={'id': "s1"})]),
                          Protein("PAWHEAE", metadata={'id': "s2"}),
                          gap_open_penalty=10.,
                          gap_extend_penalty=5.)
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          Protein("HEAGAWGHEE", metadata={'id': "s1"}),
                          TabularMSA([Protein("PAWHEAE",
                                      metadata={'id': "s2"})]),
                          gap_open_penalty=10., gap_extend_penalty=5.)

        # TypeError on invalid input
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          42, Protein("HEAGAWGHEE"))
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          Protein("HEAGAWGHEE"), 42)
示例#4
0
def calculate_sim(target_protein, ):

    protein_list = target_protein.seq.tolist()
    protein_num = len(protein_list)
    sim_matrix = np.zeros(shape=[protein_num, protein_num])
    print(f'==Start== with protein : {protein_num}')
    for i in range(len(protein_list)):
        for j in range(len(protein_list)):

            protein_similarity = local_pairwise_align_protein(
                seq1=Protein(protein_list[i]),
                seq2=Protein(protein_list[j]),
            )
            print(protein_similarity)

            sim_matrix[i, j] = protein_similarity[1]
            print(sim_matrix)

    sim_value = np.zeros(shape=sim_matrix.shape)

    for i in range(protein_num):
        for j in range(protein_num):
            value = (sim_matrix[i, j] + sim_matrix[j, i]) / (sim_matrix[i, i] +
                                                             sim_matrix[j, j])

            sim_value[i, j] = value
            sim_value[j, i] = value

            print(sim_value)

    return sim_matrix, sim_value
示例#5
0
文件: sw.py 项目: yesgomez/hw3_BMI203
def align(seq1, seq2, go, ge):
    ''' Perform alignment using scikit-bio for any two given sequences, gap penalties, and score matrix. '''
    a, b = read_seq(seq1, seq2)
    # scoreMatrix = read_matrix(sys.argv[1])
    alignment, score, start_end_positions = local_pairwise_align_protein(
        Protein(a, lowercase=True),
        Protein(b, lowercase=True),
        gap_open_penalty=go,
        gap_extend_penalty=ge,
        substitution_matrix=None)
    print("\nScore:", score)
    return score
示例#6
0
    def test_local_pairwise_align_protein(self):
        expected = ("AWGHE", "AW-HE", 26.0, 4, 1)
        actual = local_pairwise_align_protein("HEAGAWGHEE",
                                              "PAWHEAE",
                                              gap_open_penalty=10.,
                                              gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(4, 8), (1, 4)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("AWGHE-E", "AW-HEAE", 32.0, 4, 1)
        actual = local_pairwise_align_protein("HEAGAWGHEE",
                                              "PAWHEAE",
                                              gap_open_penalty=5.,
                                              gap_extend_penalty=0.5)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(4, 9), (1, 6)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("AWGHE", "AW-HE", 26.0, 4, 1)
        # Protein (rather than str) as input
        actual = local_pairwise_align_protein(Protein("HEAGAWGHEE", "s1"),
                                              Protein("PAWHEAE", "s2"),
                                              gap_open_penalty=10.,
                                              gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(4, 8), (1, 4)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # Fails when either input is passed as an Alignment
        self.assertRaises(TypeError,
                          local_pairwise_align_protein,
                          Alignment([Protein("HEAGAWGHEE", "s1")]),
                          Protein("PAWHEAE", "s2"),
                          gap_open_penalty=10.,
                          gap_extend_penalty=5.)
        self.assertRaises(TypeError,
                          local_pairwise_align_protein,
                          Protein("HEAGAWGHEE", "s1"),
                          Alignment([Protein("PAWHEAE", "s2")]),
                          gap_open_penalty=10.,
                          gap_extend_penalty=5.)

        # ids are provided if they're not passed in
        actual = local_pairwise_align_protein(Protein("HEAGAWGHEE"),
                                              Protein("PAWHEAE"),
                                              gap_open_penalty=10.,
                                              gap_extend_penalty=5.)
        self.assertEqual(actual.ids(), list('01'))

        # TypeError on invalid input
        self.assertRaises(TypeError, local_pairwise_align_protein, 42,
                          "HEAGAWGHEE")
        self.assertRaises(TypeError, local_pairwise_align_protein,
                          "HEAGAWGHEE", 42)
示例#7
0
def seq_score(s1, s2):
    alignment,score,start_end_positions = local_pairwise_align_protein(s1,s2,gap_open_penalty=11,gap_extend_penalty=1)
    return score
示例#8
0
with open(data_check[-1], 'rb') as f:
    protein_data = pickle.load(f)

from skbio.alignment import local_pairwise_align_protein
from skbio import Protein
from skbio.alignment import local_pairwise_align
# %%
seq_list = protein_data.iloc[:10].seq.tolist()
protein_num = len(seq_list)
similarity_matrix = np.zeros(shape=[protein_num, protein_num])

for i in range(len(seq_list)):
    for j in range(len(seq_list)):

        protein_similarity = local_pairwise_align_protein(
            seq1=Protein(seq_list[i]),
            seq2=Protein(seq_list[j]),
        )
        print(protein_similarity)

        similarity_matrix[i, j] = protein_similarity[1]
        print(similarity_matrix)
similarity_value = np.zeros(shape=similarity_matrix.shape)

for i in range(protein_num):
    for j in range(protein_num):
        value = (similarity_matrix[i, j] + similarity_matrix[j, i]) / (
            similarity_matrix[i, i] + similarity_matrix[j, j])

        similarity_value[i, j] = value
        similarity_value[j, i] = value