示例#1
0
    def test_on_protein(self):
        from multicov.alignment import Alignment
        from multicov.alphabet import protein_alphabet
        from multicov.filtering import filter_rows
        threshold = 1 / 21
        align = Alignment([
            'WKHNAYDMLSSDCQFESSHKHTQVCSAGMGYCAKPNNWGYW-LIVKMMW-CDYKQKLIYIAPLN',
            'KHRCDANC-MLAN-SVIKYTHSACALIWTWNS-KIIRYFFVGAWFKEHFDSVPTAQACVCDSTP',
            'LGVVGYYFKPCT-EVPSYSRFNVFHRIFPYLVYRVEE-NHTGHHVQ-KIVRNQYELRSIFDEHG',
            'LIGDDHRN-LALCPS-T-GTTCCNWKWRSEWTMHSDTNCNPVAE--SYSKRCNDIGYITWINYA',
            'CMPRYWYTYQYDCIFGWRFYSVYWPCLDDMFWQPYVDSMELF-NPMVATEWIMENCQGWG-N-K',
            'QWFWRARPFE--FSC-C-PGP-GWVNLIDWMSCNKAMETLMRPYCNPYLKIQLPRSKNLLDDDG',
            'VTMPEGHHCPAM-PLDLNGQR-KMWGSDFKKEDCKGYPEKFDCENLIDMDICLSLNTRPED-QR',
            'LNYINMHVD-IGP-PCPQYDL--KFKCMYW-GQIEDV-NMQ-WKK-RTMDAVEQIVSMYHMSVE',
            'WHV-EWKPVLC-PHWQFYM-VITEYVAMFQWCPPKGMASPKKGNLPRMFQSAKAIGAHRSDM-Y',
            'PIWGGFNFPWID-GSQRQQR-EVTTGCDDFEHKYNPYLVPG-WEFGKYSNCWT-RCWRVNHDTV',
            'PPCWVEAPYKPMGMWN-GRKV-NVAVWHHVIVL-DMYGLHLLRDWTMVKNAAHIFSHNMEMSNI',
            'E-MWRGLIWSKGAY-YQNDNGTFNWPKQKHP-ARCSF-PTVNKDQNPGP-MVQMREFKSQQGQQ',
            'RFGKFTCMGFRWKEYFTKQ-NPYKYRGIVHVKVQMIYSANGNLDWIDIPMIIRLKCPFGTRVTQ',
            'CGRCGSH-EWL-NIMRNCKFIFWWRPTNAAHIWCARHESPKAD-QIAMTYRML-LDAHIIIVR-',
            'T-PMVWRLVWYDHGCDPWMLIV-PIEPCVVKKPQYKDMERFSPDIKCHYLHDKDDGFWGSDKYI',
            'LNCPYADLDGL-NPQR-FVVS-RCMRDGFRAVVRVSPDDLS-MWCKAGA-NTTV-DNRH-IVQW'
        ], protein_alphabet)
        align_clean = filter_rows(align, max_gaps=threshold)

        # noinspection PyTypeChecker
        gap_fraction = np.mean(align.data == '-', axis=1)
        # noinspection PyTypeChecker
        gap_fraction_clean = np.mean(align_clean.data == '-', axis=1)

        self.assertLess(len(align_clean), len(align))
        self.assertLessEqual(np.max(gap_fraction_clean), threshold)
        self.assertEqual(np.sum(gap_fraction <= threshold), len(align_clean))
示例#2
0
    def test_returns_copy_even_when_unchanged(self):
        from multicov.alignment import Alignment
        from multicov.filtering import filter_rows
        from multicov.alphabet import protein_alphabet, dna_alphabet

        align = Alignment(['IVGGYTCQ', '-VGGTEAQ', 'IGG-KDT-'],
                          protein_alphabet)
        align.add(['ATACAT', 'GATACA', 'AA--GG'], dna_alphabet)
        align_clean = filter_rows(align, 0.9)

        self.assertEqual(len(align_clean), 3)
        self.assertIsNot(align_clean, align)
示例#3
0
    def test_on_multi_alpha(self):
        from multicov.alignment import Alignment
        from multicov.filtering import filter_rows
        from multicov.alphabet import protein_alphabet, dna_alphabet

        align = Alignment(['IVGGYTCQ', '-VGGTEAQ', 'IGG-KDT-'],
                          protein_alphabet)
        align.add(['ATACAT', 'GATACA', 'AA--GG'], dna_alphabet)
        align_clean = filter_rows(align, 0.2)

        expected = Alignment(['IVGGYTCQ', '-VGGTEAQ'],
                             protein_alphabet).add(['ATACAT', 'GATACA'],
                                                   dna_alphabet)

        self.assertEqual(align_clean, expected)
示例#4
0
 def test_on_empty(self):
     from multicov.alignment import Alignment
     from multicov.filtering import filter_rows
     align1 = Alignment()
     align2 = filter_rows(Alignment())
     self.assertEqual(align1, align2)