def test_generated_envs(self): print('Testing generated matrices') generated_envs = pyopa.generate_all_env(self.log_pam1, 1266) for i in range(1266): curr_ref = self.alignment_environments[i] curr_gen = generated_envs[i] self.assertAlmostEqual(curr_ref.pam, curr_gen.pam) self.assertAlmostEqual(curr_ref.threshold, curr_gen.threshold) self.assertAlmostEqual(curr_ref.gap_open, curr_gen.gap_open) self.assertAlmostEqual(curr_ref.gap_ext, curr_gen.gap_ext) for j in range(26): for k in range(26): self.assertAlmostEqual(curr_ref.float64_matrix[j][k], curr_gen.float64_matrix[j][k])
for i in range(20): for j in range(20): qsum[i] += json_data['scores'][j][i] for i in range(20): json_data['scores'][i][i] = -qsum[i] return json_data json_matrix = convert_to_json(os.path.dirname(os.path.abspath(__file__)) + '/test/data/jtt.dat') for i in range(20): print(json_matrix['scores'][i]) log_pam1 = pyopa.read_env_json( os.path.dirname(os.path.abspath(__file__)) + '/test/data/matrices/json/logPAM1.json') envs = pyopa.read_all_env_json( os.path.dirname(os.path.abspath(__file__)) + '/test/data/matrices/json/all_matrices.json') generated_envs = pyopa.generate_all_env(log_pam1, 1266) mul1 = pyopa.MutipleAlEnv(envs, log_pam1) mul2 = pyopa.MutipleAlEnv(generated_envs, log_pam1) s1 = 'PDVRTQYSRTKTIKLAQVRKCGAWRVLCLDLIPDLTAKNNHMRTKWTEVQYLAFVVSIVKKRPLSHSLVLITTGKAWNGTWRALPRLSNKLIETAFKEIQAEETVYDTKAFVAGKKPRWVSPFICYGLPFVISRFDFAQYRLKDMLILFSDMLLSRICNFYNGNTGPVPNSKTNEDTDLFFDGLSGMLKLNLKRSDAICHVICYEAPIARVKFGREVKDKFSLPKGGKNPSRRISWNILGILIDRTMFIRPRLVARKEAIHLFDLIGENIDAITQRLRAHKTLMVHESQVVEQPLKVKNLDLRPELVGEEEKNRHGRAKQLDRMANGNMAQIKNGHFKQTYLISVFRPQWLQLQGGCLIAEGFHSEVGGTVDGLKGTPCAQGPVVKGLFAVWRRCDTLAGRYYQKAADIDKLGDILLASLYYIPQGAIITLSEEMAKRIGANVLLVGLINVRYSGIGYEACVGDLAPEVSWLNAGHGNIQMVLHTIDGDGCQTPHGLKIYTDKRLLDLYQGAQLKVTVATTGSVKVSKSMGWLQEGGLDYFALAGRFYRADLREIEHPRAMAVSAHLCAVGLNWVFLADIICDPNEAFKFGKDFEPRTLTYGFANEDENPKNGGATTTSFAVAVYKIKTVATLKVIGKALWKGIQMRTQQGSGPTCQWALRKGKNSILLLAQDSRGGIPKNEFTILGDLPEGQTTTCTHTEIKTRLLYGATVFFMRGDLVGLYADGCSHLYRSSNLMSQACAAAKTILCSLDGERANFSNPTDFAMYNAVFRPRLYTVSFGVFDNNVDVLQAALYYLIMMAMKQYWGVKQGGLEGTLYTWSKVSGKKETSDSRNNPSICVSVCKNPLKDVQLRIAALKRFAEAEEIGKPAVVIRALEPGLTLYILLSSHGSEGKKTHNPILVSAFVVTTVADTSKPKVTYHKDQEMAIYQVLGNNPAGYEVELAFLLPTASSKQQSGRTRKFMDTASGELKEMPIQSSHEITQAADINNLRQLPRTYKKESAKVKVAACKQPPAALNTGIEKVPSHPDGLQLIIEDEWKLLEASSMSQYNEQAKEWPFHKGGIFFKGHEQKCIDASELPRGITRDLRVILINEALVLNTFCGERKLQNEATLILLRAYVWGRHLLANYFRAPNEQDGVLVDIPQGRSTLKSDHLRASIPLFLYTTIETCTSNVTIHKRVQPMIILDIAVAGEGVCDMKNGQVFKRRMARSNDRRLPPGARMKIILFRRNHECYPLQKHQEQWILGAIRTPYGLYNLQEKATLTTRYLIKLQINNRNDLVTTLVSLLMHTRESYIRFTKERRTTESPIDVLAATLYQEFTREVRRAGEQRAGIFFSQDTNYEQAIFETKMAAYPPFGANSWNPTLRYEAWTIIKTPNSKGQEFFLEHMQDVGYGKIASSKYQEKDDDEEVARGRIVPAWY' s2 = 'PPFQPDKKLAGIELVLCNADLPGRSIYLRKVLQANANKRASASKRCTDDDIIKVDSAPDPQRKLVQAGKVPRVLYNGDVSNIISQILICAYVTGASRNFQHVMLLMDKGWGRGFTLMVNYPCPKVLEEFNPTLLTALVIISVYLNSIECERAGVTIAALNVKLEATDRLALLGRQTANTVMRAPLLLLCQGDSAKNTLNWSLEDLAIVFGRAATRVCKNLALLLNSQVFFQKTTGYKSQLGKNVINFDLYKPLVCDLVDATKYMKFYGTNDDSTDIQGRSSEKAAALAAAAMGVVGWHFLAPTGLVGAGSTFSPVFCIKGNAQLCCKRFDIDEWKALLTLQKSKIANIDYLRYRTGAVIEVGANYDGCSGQPKLQCFYDYLIRYPETVLGTNRQERVMTDEGGEHVRDLILRNVLENPTGFIGSGTHPGNISCTLETTNADLIIGSTDYDGVGSYLIIMGTCFMVTGCVVFTYAVMELVRPLKIHIFACAKVILQEADGSQKTNLRGRGKVSSFGDLPVRFRTLDGIATPSTTHAEMGASFDAAVLVIGRTGTAKFRQFATLDNRNLACNINLSSIRRYFNDNNWLEAGAKNAAEILVNHADKSLTPWVVGLGPLLKPGDIACPMIAVSYLVLVIMDMYLASYSDSFAKHLKNKHRTTTSAHKPSNQQLALDGALTAKRSSQAASIIFEAEEWGFLEWAMIGHLQTKMIYDDAFRLNSPEEELLTQATTHKIKPNYLIALQMLHRDFCIGFFHTLIHASVADSIVYASRLKQNAAIIDRGKTARQDLLGIALKLIVSASTKNAASFNRDFKLPVDVMFRFLDKMLNHGVNTIVHGGQDPKNGNPVGAGLPSWAKNIKVELQVTMFQLFESVDCTSELRLLSTAVDTTLHGEVQVMSAKDLFGRFRYRILSAGESLMENGISPKSFVEALKYFIMYYWTDITEPRCRGSALYPITIQPNLYKRTSATSLHPKGERWLPFEETSRTTISTVLMNNALLGICLYKSYQLLDHDFLGDKKQSNKRVSENSFLGIQTLHDPTGYLQKLDHSRLSKFNRDIRWGQGKSPEQWAVTLVPTLFVKKGTNAWRKKNNAEPIIVTTGTNTAPLEELHKAWMQLAHDGIVVSTLTENEKLEFFSFQDGMPSLVLFSIMAETNQLRYIGNKIYASRKWMADAQKASWVYASLPTNSCNWTAVEVAFEPKGECQMAKKFDLHSMAIVMVRLLAQERSDGADGMNNASSVKWLRKEANEKVCKWWFASPKINAMFQTVKIQSSGKYLARNPKAATKDVKKVEQDLLSRIQTQEHGLLWFYVRLIGEISEVPILSCNKALFLTIKLFNKFIRWNIAPLEITSGVDAWHTIFTSSRFSETDTGIEMTALDLTLPQGNWGTMKKKVALAATGFILFLAYSMGTLSKKFEGNHHWTW' print(mul1.estimate_pam(pyopa.Sequence(s1), pyopa.Sequence(s2))) print(mul2.estimate_pam(pyopa.Sequence(s1), pyopa.Sequence(s2)))
log_pam1_env = defaults['log_pam1'] #the default directory (created at installation time) matrix_dir = pyopa.matrix_dir() #or alternatively, you can specify an exact location env_list = pyopa.read_all_env_json( os.path.join(matrix_dir, 'all_matrices.json')) log_pam1_env = pyopa.read_env_json(os.path.join(matrix_dir, 'logPAM1.json')) #--------------------------------------------------------------------------------------------------- #generates a signle AlignmentEnvironment # with a pam distance of 250 generated_env = pyopa.generate_env(log_pam1_env, 250) #generates 1000 environments for different pam distances gen_env_list = pyopa.generate_all_env(log_pam1_env, 1000) #--------------------------------------------------------------------------------------------------- s1 = pyopa.Sequence('AATCGGA') s2 = pyopa.Sequence('AAAA') s3 = pyopa.Sequence('CATACCTGGTGTGATGCC') #not optimal, multiple hidden profile generations in the background print(pyopa.align_short(s1, s2, generated_env)) print(pyopa.align_short(s1, s3, generated_env)) print(pyopa.align_byte(s1, s2, generated_env)) print(pyopa.align_byte(s1, s3, generated_env)) #one profile generation profile = pyopa.AlignmentProfile() profile.create_profiles(s1, generated_env)
def setUp(self): self.log_pam1 = pyopa.read_env_json(os.path.join(pyopa.matrix_dir(), 'logPAM1.json')) self.env_generated = pyopa.generate_all_env(self.log_pam1, 1266) self.env_loaded = pyopa.load_default_environments()['environments']