def setUp(self): self.precision = 10 #resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) with open( os.path.join(os.path.dirname(__file__), 'data', 'testseqs.txt')) as f: self.sequences = f.readlines() self.sequences = list( map(lambda s: pyopa.Sequence(s.strip()), self.sequences)) self.darwin_results = [] defaults = pyopa.load_default_environments() self.alignment_environments = defaults['environments'] self.log_pam1 = defaults['log_pam1'] self.dms = pyopa.MutipleAlEnv(self.alignment_environments, self.log_pam1) """ write_all_env_files(self.alignment_environments) with open(os.path.dirname(__file__) + '/data/matrices/json/logPAM1.json') as lp: json_data = json.load(lp) #json_data["Scores"] = map(lambda l: map(lambda s: s/(2048*2048*2048), l), json_data["Scores"]) logPAM1 = pyopa.read_env_json(json_data, self.alignment_environments[0].columns) write_env_file(logPAM1, "logPAM1") """ with open( os.path.join(os.path.dirname(__file__), 'data', 'reference_test_results.dat')) as f: #skip header next(f) reader = csv.reader(f, delimiter='\t') for s1, s2, matrix_nr, pam, threshold, score_d, score_f, score_s,\ score_b, als1, als2, ep_sim, ep_pamn, ep_var, in reader: curr = DarwinResult() curr.s1_id = int(s1) curr.s2_id = int(s2) curr.matrix_nr = int(matrix_nr) curr.pam = float(pam) curr.threshold = float(threshold) curr.score_double = float(score_d) curr.score_float = float(score_f) curr.score_short = float(score_s) curr.score_byte = float(score_b) curr.als1 = als1 curr.als2 = als2 curr.ep_sim = float(ep_sim) curr.ep_pamn = float(ep_pamn) curr.ep_var = float(ep_var) self.darwin_results.append(curr) '''
for i in range(20): for j in range(20): qsum[i] += json_data['scores'][j][i] for i in range(20): json_data['scores'][i][i] = -qsum[i] return json_data json_matrix = convert_to_json(os.path.dirname(os.path.abspath(__file__)) + '/test/data/jtt.dat') for i in range(20): print(json_matrix['scores'][i]) log_pam1 = pyopa.read_env_json( os.path.dirname(os.path.abspath(__file__)) + '/test/data/matrices/json/logPAM1.json') envs = pyopa.read_all_env_json( os.path.dirname(os.path.abspath(__file__)) + '/test/data/matrices/json/all_matrices.json') generated_envs = pyopa.generate_all_env(log_pam1, 1266) mul1 = pyopa.MutipleAlEnv(envs, log_pam1) mul2 = pyopa.MutipleAlEnv(generated_envs, log_pam1) s1 = 'PDVRTQYSRTKTIKLAQVRKCGAWRVLCLDLIPDLTAKNNHMRTKWTEVQYLAFVVSIVKKRPLSHSLVLITTGKAWNGTWRALPRLSNKLIETAFKEIQAEETVYDTKAFVAGKKPRWVSPFICYGLPFVISRFDFAQYRLKDMLILFSDMLLSRICNFYNGNTGPVPNSKTNEDTDLFFDGLSGMLKLNLKRSDAICHVICYEAPIARVKFGREVKDKFSLPKGGKNPSRRISWNILGILIDRTMFIRPRLVARKEAIHLFDLIGENIDAITQRLRAHKTLMVHESQVVEQPLKVKNLDLRPELVGEEEKNRHGRAKQLDRMANGNMAQIKNGHFKQTYLISVFRPQWLQLQGGCLIAEGFHSEVGGTVDGLKGTPCAQGPVVKGLFAVWRRCDTLAGRYYQKAADIDKLGDILLASLYYIPQGAIITLSEEMAKRIGANVLLVGLINVRYSGIGYEACVGDLAPEVSWLNAGHGNIQMVLHTIDGDGCQTPHGLKIYTDKRLLDLYQGAQLKVTVATTGSVKVSKSMGWLQEGGLDYFALAGRFYRADLREIEHPRAMAVSAHLCAVGLNWVFLADIICDPNEAFKFGKDFEPRTLTYGFANEDENPKNGGATTTSFAVAVYKIKTVATLKVIGKALWKGIQMRTQQGSGPTCQWALRKGKNSILLLAQDSRGGIPKNEFTILGDLPEGQTTTCTHTEIKTRLLYGATVFFMRGDLVGLYADGCSHLYRSSNLMSQACAAAKTILCSLDGERANFSNPTDFAMYNAVFRPRLYTVSFGVFDNNVDVLQAALYYLIMMAMKQYWGVKQGGLEGTLYTWSKVSGKKETSDSRNNPSICVSVCKNPLKDVQLRIAALKRFAEAEEIGKPAVVIRALEPGLTLYILLSSHGSEGKKTHNPILVSAFVVTTVADTSKPKVTYHKDQEMAIYQVLGNNPAGYEVELAFLLPTASSKQQSGRTRKFMDTASGELKEMPIQSSHEITQAADINNLRQLPRTYKKESAKVKVAACKQPPAALNTGIEKVPSHPDGLQLIIEDEWKLLEASSMSQYNEQAKEWPFHKGGIFFKGHEQKCIDASELPRGITRDLRVILINEALVLNTFCGERKLQNEATLILLRAYVWGRHLLANYFRAPNEQDGVLVDIPQGRSTLKSDHLRASIPLFLYTTIETCTSNVTIHKRVQPMIILDIAVAGEGVCDMKNGQVFKRRMARSNDRRLPPGARMKIILFRRNHECYPLQKHQEQWILGAIRTPYGLYNLQEKATLTTRYLIKLQINNRNDLVTTLVSLLMHTRESYIRFTKERRTTESPIDVLAATLYQEFTREVRRAGEQRAGIFFSQDTNYEQAIFETKMAAYPPFGANSWNPTLRYEAWTIIKTPNSKGQEFFLEHMQDVGYGKIASSKYQEKDDDEEVARGRIVPAWY' s2 = 'PPFQPDKKLAGIELVLCNADLPGRSIYLRKVLQANANKRASASKRCTDDDIIKVDSAPDPQRKLVQAGKVPRVLYNGDVSNIISQILICAYVTGASRNFQHVMLLMDKGWGRGFTLMVNYPCPKVLEEFNPTLLTALVIISVYLNSIECERAGVTIAALNVKLEATDRLALLGRQTANTVMRAPLLLLCQGDSAKNTLNWSLEDLAIVFGRAATRVCKNLALLLNSQVFFQKTTGYKSQLGKNVINFDLYKPLVCDLVDATKYMKFYGTNDDSTDIQGRSSEKAAALAAAAMGVVGWHFLAPTGLVGAGSTFSPVFCIKGNAQLCCKRFDIDEWKALLTLQKSKIANIDYLRYRTGAVIEVGANYDGCSGQPKLQCFYDYLIRYPETVLGTNRQERVMTDEGGEHVRDLILRNVLENPTGFIGSGTHPGNISCTLETTNADLIIGSTDYDGVGSYLIIMGTCFMVTGCVVFTYAVMELVRPLKIHIFACAKVILQEADGSQKTNLRGRGKVSSFGDLPVRFRTLDGIATPSTTHAEMGASFDAAVLVIGRTGTAKFRQFATLDNRNLACNINLSSIRRYFNDNNWLEAGAKNAAEILVNHADKSLTPWVVGLGPLLKPGDIACPMIAVSYLVLVIMDMYLASYSDSFAKHLKNKHRTTTSAHKPSNQQLALDGALTAKRSSQAASIIFEAEEWGFLEWAMIGHLQTKMIYDDAFRLNSPEEELLTQATTHKIKPNYLIALQMLHRDFCIGFFHTLIHASVADSIVYASRLKQNAAIIDRGKTARQDLLGIALKLIVSASTKNAASFNRDFKLPVDVMFRFLDKMLNHGVNTIVHGGQDPKNGNPVGAGLPSWAKNIKVELQVTMFQLFESVDCTSELRLLSTAVDTTLHGEVQVMSAKDLFGRFRYRILSAGESLMENGISPKSFVEALKYFIMYYWTDITEPRCRGSALYPITIQPNLYKRTSATSLHPKGERWLPFEETSRTTISTVLMNNALLGICLYKSYQLLDHDFLGDKKQSNKRVSENSFLGIQTLHDPTGYLQKLDHSRLSKFNRDIRWGQGKSPEQWAVTLVPTLFVKKGTNAWRKKNNAEPIIVTTGTNTAPLEELHKAWMQLAHDGIVVSTLTENEKLEFFSFQDGMPSLVLFSIMAETNQLRYIGNKIYASRKWMADAQKASWVYASLPTNSCNWTAVEVAFEPKGECQMAKKFDLHSMAIVMVRLLAQERSDGADGMNNASSVKWLRKEANEKVCKWWFASPKINAMFQTVKIQSSGKYLARNPKAATKDVKKVEQDLLSRIQTQEHGLLWFYVRLIGEISEVPILSCNKALFLTIKLFNKFIRWNIAPLEITSGVDAWHTIFTSSRFSETDTGIEMTALDLTLPQGNWGTMKKKVALAATGFILFLAYSMGTLSKKFEGNHHWTW' print(mul1.estimate_pam(pyopa.Sequence(s1), pyopa.Sequence(s2))) print(mul2.estimate_pam(pyopa.Sequence(s1), pyopa.Sequence(s2)))
print('Local byte estimation with threshold %f: %f' % (env.threshold, profile.align_byte(s2, env))) #since the byte estimation is larger than the threshold, double_max is returned, which indicates that by # the byte estimation the score is larger than the threshold #if we wish to change the threshold, we have to recreate the matrices and the profile used for the estimations, # as follows: env.threshold = 62.0 #if we forget about this part we will get inaccurate results! env.create_scaled_matrices() profile.create_profiles(s1, env) print('Local short estimation with threshold of %f: %f' % (env.threshold, profile.align_short(s2, env))) print('Local byte estimation with threshold of %f: %f' % (env.threshold, profile.align_byte(s2, env))) #to hide the profile creation we can also use the simple align_short/byte function, but since # it always creates a new profile, it is inefficient print('Simple short alignment %f' % pyopa.align_short(s1, s2, env)) print('Simple byte alignment %f' % pyopa.align_byte(s1, s2, env)) #to use the EstimatePam function we have to create a data structure that stores the matrices #loading the logPAM1 matrix, which was used to generate the other matrices log_pam1 = defaults['log_pam1'] dms = pyopa.MutipleAlEnv(envs, log_pam1) t = threading.Thread(None, nt_epam, 'EstimatePam Thread', (s1, s2, dms, env)) t.start() t.join() #to generate environments instead of reading from a file generated_envs = pyopa.generate_all_env(log_pam1, 1000)
#--------------------------------------------------------------------------------------------------- #to do the concrete alignment in a new thread #or alternatively you can increase your stack size on UNIX-based systems: #'resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))' def nt_align(s1, s2, env, is_global, aligned_strs): print('Concrete %s alignment:' % ('global' if is_global else 'local')) tmp_aligned_strings = pyopa.align_strings(s1, s2, env, is_global) print('\taligned_s1: %s' % tmp_aligned_strings[0]) print('\taligned_s2: %s' % tmp_aligned_strings[1]) aligned_strs.extend(tmp_aligned_strings) s1 = pyopa.Sequence('PISRIDNNKITTTLGNTGIISVTIGVIIFKDLHAKVHGF') s2 = pyopa.Sequence('PIERIENNKILANTGVISVTIGVIIYQDLHADTVMTSDY') threading.stack_size(100000000) # aligned_s1: PISRIDNNKITTTLGNTGIISVTIGVIIFKDLHAKV # aligned_s2: PIERIENNKI___LANTGVISVTIGVIIYQDLHADT aligned_strings = [] t = threading.Thread(None, nt_align, 'Aligning Thread', (s1, s2, generated_env, False, aligned_strings)) t.start() t.join() print(aligned_strings[0]) print(aligned_strings[1]) #--------------------------------------------------------------------------------------------------- dms = pyopa.MutipleAlEnv(gen_env_list, log_pam1_env) #returns an array: [similarity, pam_distance, variance] print(dms.estimate_pam(aligned_strings[0], aligned_strings[1]))
def test_estimate_pam(self): dms = pyopa.MutipleAlEnv(self.envs, self.log_pam1) self.assertRaises(ValueError, dms.estimate_pam, self.s1, self.s2)