示例#1
0
    def setUp(self):
        self.precision = 10
        #resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))

        with open(
                os.path.join(os.path.dirname(__file__), 'data',
                             'testseqs.txt')) as f:
            self.sequences = f.readlines()

        self.sequences = list(
            map(lambda s: pyopa.Sequence(s.strip()), self.sequences))
        self.darwin_results = []

        defaults = pyopa.load_default_environments()
        self.alignment_environments = defaults['environments']
        self.log_pam1 = defaults['log_pam1']
        self.dms = pyopa.MutipleAlEnv(self.alignment_environments,
                                      self.log_pam1)
        """
        write_all_env_files(self.alignment_environments)
        with open(os.path.dirname(__file__) + '/data/matrices/json/logPAM1.json') as lp:
            json_data = json.load(lp)
            #json_data["Scores"] = map(lambda l: map(lambda s: s/(2048*2048*2048), l), json_data["Scores"])
            logPAM1 = pyopa.read_env_json(json_data, self.alignment_environments[0].columns)
            write_env_file(logPAM1, "logPAM1")
        """

        with open(
                os.path.join(os.path.dirname(__file__), 'data',
                             'reference_test_results.dat')) as f:
            #skip header
            next(f)
            reader = csv.reader(f, delimiter='\t')

            for s1, s2, matrix_nr, pam, threshold, score_d, score_f, score_s,\
                score_b, als1, als2, ep_sim, ep_pamn, ep_var, in reader:
                curr = DarwinResult()
                curr.s1_id = int(s1)
                curr.s2_id = int(s2)
                curr.matrix_nr = int(matrix_nr)
                curr.pam = float(pam)
                curr.threshold = float(threshold)
                curr.score_double = float(score_d)
                curr.score_float = float(score_f)
                curr.score_short = float(score_s)
                curr.score_byte = float(score_b)
                curr.als1 = als1
                curr.als2 = als2
                curr.ep_sim = float(ep_sim)
                curr.ep_pamn = float(ep_pamn)
                curr.ep_var = float(ep_var)

                self.darwin_results.append(curr)
                '''
示例#2
0
        for i in range(20):
            for j in range(20):
                qsum[i] += json_data['scores'][j][i]

        for i in range(20):
            json_data['scores'][i][i] = -qsum[i]

        return json_data

json_matrix = convert_to_json(os.path.dirname(os.path.abspath(__file__)) + '/test/data/jtt.dat')

for i in range(20):
    print(json_matrix['scores'][i])

log_pam1 = pyopa.read_env_json(
            os.path.dirname(os.path.abspath(__file__)) + '/test/data/matrices/json/logPAM1.json')

envs = pyopa.read_all_env_json(
            os.path.dirname(os.path.abspath(__file__)) + '/test/data/matrices/json/all_matrices.json')

generated_envs = pyopa.generate_all_env(log_pam1, 1266)

mul1 = pyopa.MutipleAlEnv(envs, log_pam1)
mul2 = pyopa.MutipleAlEnv(generated_envs, log_pam1)

s1 = 'PDVRTQYSRTKTIKLAQVRKCGAWRVLCLDLIPDLTAKNNHMRTKWTEVQYLAFVVSIVKKRPLSHSLVLITTGKAWNGTWRALPRLSNKLIETAFKEIQAEETVYDTKAFVAGKKPRWVSPFICYGLPFVISRFDFAQYRLKDMLILFSDMLLSRICNFYNGNTGPVPNSKTNEDTDLFFDGLSGMLKLNLKRSDAICHVICYEAPIARVKFGREVKDKFSLPKGGKNPSRRISWNILGILIDRTMFIRPRLVARKEAIHLFDLIGENIDAITQRLRAHKTLMVHESQVVEQPLKVKNLDLRPELVGEEEKNRHGRAKQLDRMANGNMAQIKNGHFKQTYLISVFRPQWLQLQGGCLIAEGFHSEVGGTVDGLKGTPCAQGPVVKGLFAVWRRCDTLAGRYYQKAADIDKLGDILLASLYYIPQGAIITLSEEMAKRIGANVLLVGLINVRYSGIGYEACVGDLAPEVSWLNAGHGNIQMVLHTIDGDGCQTPHGLKIYTDKRLLDLYQGAQLKVTVATTGSVKVSKSMGWLQEGGLDYFALAGRFYRADLREIEHPRAMAVSAHLCAVGLNWVFLADIICDPNEAFKFGKDFEPRTLTYGFANEDENPKNGGATTTSFAVAVYKIKTVATLKVIGKALWKGIQMRTQQGSGPTCQWALRKGKNSILLLAQDSRGGIPKNEFTILGDLPEGQTTTCTHTEIKTRLLYGATVFFMRGDLVGLYADGCSHLYRSSNLMSQACAAAKTILCSLDGERANFSNPTDFAMYNAVFRPRLYTVSFGVFDNNVDVLQAALYYLIMMAMKQYWGVKQGGLEGTLYTWSKVSGKKETSDSRNNPSICVSVCKNPLKDVQLRIAALKRFAEAEEIGKPAVVIRALEPGLTLYILLSSHGSEGKKTHNPILVSAFVVTTVADTSKPKVTYHKDQEMAIYQVLGNNPAGYEVELAFLLPTASSKQQSGRTRKFMDTASGELKEMPIQSSHEITQAADINNLRQLPRTYKKESAKVKVAACKQPPAALNTGIEKVPSHPDGLQLIIEDEWKLLEASSMSQYNEQAKEWPFHKGGIFFKGHEQKCIDASELPRGITRDLRVILINEALVLNTFCGERKLQNEATLILLRAYVWGRHLLANYFRAPNEQDGVLVDIPQGRSTLKSDHLRASIPLFLYTTIETCTSNVTIHKRVQPMIILDIAVAGEGVCDMKNGQVFKRRMARSNDRRLPPGARMKIILFRRNHECYPLQKHQEQWILGAIRTPYGLYNLQEKATLTTRYLIKLQINNRNDLVTTLVSLLMHTRESYIRFTKERRTTESPIDVLAATLYQEFTREVRRAGEQRAGIFFSQDTNYEQAIFETKMAAYPPFGANSWNPTLRYEAWTIIKTPNSKGQEFFLEHMQDVGYGKIASSKYQEKDDDEEVARGRIVPAWY'
s2 = 'PPFQPDKKLAGIELVLCNADLPGRSIYLRKVLQANANKRASASKRCTDDDIIKVDSAPDPQRKLVQAGKVPRVLYNGDVSNIISQILICAYVTGASRNFQHVMLLMDKGWGRGFTLMVNYPCPKVLEEFNPTLLTALVIISVYLNSIECERAGVTIAALNVKLEATDRLALLGRQTANTVMRAPLLLLCQGDSAKNTLNWSLEDLAIVFGRAATRVCKNLALLLNSQVFFQKTTGYKSQLGKNVINFDLYKPLVCDLVDATKYMKFYGTNDDSTDIQGRSSEKAAALAAAAMGVVGWHFLAPTGLVGAGSTFSPVFCIKGNAQLCCKRFDIDEWKALLTLQKSKIANIDYLRYRTGAVIEVGANYDGCSGQPKLQCFYDYLIRYPETVLGTNRQERVMTDEGGEHVRDLILRNVLENPTGFIGSGTHPGNISCTLETTNADLIIGSTDYDGVGSYLIIMGTCFMVTGCVVFTYAVMELVRPLKIHIFACAKVILQEADGSQKTNLRGRGKVSSFGDLPVRFRTLDGIATPSTTHAEMGASFDAAVLVIGRTGTAKFRQFATLDNRNLACNINLSSIRRYFNDNNWLEAGAKNAAEILVNHADKSLTPWVVGLGPLLKPGDIACPMIAVSYLVLVIMDMYLASYSDSFAKHLKNKHRTTTSAHKPSNQQLALDGALTAKRSSQAASIIFEAEEWGFLEWAMIGHLQTKMIYDDAFRLNSPEEELLTQATTHKIKPNYLIALQMLHRDFCIGFFHTLIHASVADSIVYASRLKQNAAIIDRGKTARQDLLGIALKLIVSASTKNAASFNRDFKLPVDVMFRFLDKMLNHGVNTIVHGGQDPKNGNPVGAGLPSWAKNIKVELQVTMFQLFESVDCTSELRLLSTAVDTTLHGEVQVMSAKDLFGRFRYRILSAGESLMENGISPKSFVEALKYFIMYYWTDITEPRCRGSALYPITIQPNLYKRTSATSLHPKGERWLPFEETSRTTISTVLMNNALLGICLYKSYQLLDHDFLGDKKQSNKRVSENSFLGIQTLHDPTGYLQKLDHSRLSKFNRDIRWGQGKSPEQWAVTLVPTLFVKKGTNAWRKKNNAEPIIVTTGTNTAPLEELHKAWMQLAHDGIVVSTLTENEKLEFFSFQDGMPSLVLFSIMAETNQLRYIGNKIYASRKWMADAQKASWVYASLPTNSCNWTAVEVAFEPKGECQMAKKFDLHSMAIVMVRLLAQERSDGADGMNNASSVKWLRKEANEKVCKWWFASPKINAMFQTVKIQSSGKYLARNPKAATKDVKKVEQDLLSRIQTQEHGLLWFYVRLIGEISEVPILSCNKALFLTIKLFNKFIRWNIAPLEITSGVDAWHTIFTSSRFSETDTGIEMTALDLTLPQGNWGTMKKKVALAATGFILFLAYSMGTLSKKFEGNHHWTW'

print(mul1.estimate_pam(pyopa.Sequence(s1), pyopa.Sequence(s2)))
print(mul2.estimate_pam(pyopa.Sequence(s1), pyopa.Sequence(s2)))
示例#3
0
print('Local byte estimation with threshold %f: %f' % (env.threshold, profile.align_byte(s2, env)))
#since the byte estimation is larger than the threshold, double_max is returned, which indicates that by
#  the byte estimation the score is larger than the threshold
#if we wish to change the threshold, we have to recreate the matrices and the profile used for the estimations,
#  as follows:
env.threshold = 62.0
#if we forget about this part we will get inaccurate results!
env.create_scaled_matrices()
profile.create_profiles(s1, env)
print('Local short estimation with threshold of %f: %f' % (env.threshold, profile.align_short(s2, env)))
print('Local byte estimation with threshold of %f: %f' % (env.threshold, profile.align_byte(s2, env)))

#to hide the profile creation we can also use the simple align_short/byte function, but since
# it always creates a new profile, it is inefficient
print('Simple short alignment %f' % pyopa.align_short(s1, s2, env))
print('Simple byte alignment %f' % pyopa.align_byte(s1, s2, env))

#to use the EstimatePam function we have to create a data structure that stores the matrices

#loading the logPAM1 matrix, which was used to generate the other matrices
log_pam1 = defaults['log_pam1']

dms = pyopa.MutipleAlEnv(envs, log_pam1)
t = threading.Thread(None, nt_epam, 'EstimatePam Thread', (s1, s2, dms, env))
t.start()
t.join()

#to generate environments instead of reading from a file
generated_envs = pyopa.generate_all_env(log_pam1, 1000)

示例#4
0
#---------------------------------------------------------------------------------------------------
#to do the concrete alignment in a new thread
#or alternatively you can increase your stack size on UNIX-based systems:
#'resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))'
def nt_align(s1, s2, env, is_global, aligned_strs):
    print('Concrete %s alignment:' % ('global' if is_global else 'local'))
    tmp_aligned_strings = pyopa.align_strings(s1, s2, env, is_global)
    print('\taligned_s1: %s' % tmp_aligned_strings[0])
    print('\taligned_s2: %s' % tmp_aligned_strings[1])
    aligned_strs.extend(tmp_aligned_strings)


s1 = pyopa.Sequence('PISRIDNNKITTTLGNTGIISVTIGVIIFKDLHAKVHGF')
s2 = pyopa.Sequence('PIERIENNKILANTGVISVTIGVIIYQDLHADTVMTSDY')
threading.stack_size(100000000)

# aligned_s1: PISRIDNNKITTTLGNTGIISVTIGVIIFKDLHAKV
# aligned_s2: PIERIENNKI___LANTGVISVTIGVIIYQDLHADT
aligned_strings = []
t = threading.Thread(None, nt_align, 'Aligning Thread',
                     (s1, s2, generated_env, False, aligned_strings))
t.start()
t.join()
print(aligned_strings[0])
print(aligned_strings[1])
#---------------------------------------------------------------------------------------------------
dms = pyopa.MutipleAlEnv(gen_env_list, log_pam1_env)

#returns an array: [similarity, pam_distance, variance]
print(dms.estimate_pam(aligned_strings[0], aligned_strings[1]))
示例#5
0
    def test_estimate_pam(self):
        dms = pyopa.MutipleAlEnv(self.envs, self.log_pam1)

        self.assertRaises(ValueError, dms.estimate_pam, self.s1, self.s2)