def __init__(self, seqs, submodel, threeway=False, motif_probs=None, do_pair_align=False, rigorous_align=False, est_params=None, modify_lf=None): """Arguments: - seqs: an Alignment or SeqCollection instance with > 1 sequence - submodel: substitution model object Predefined models can be imported from cogent.evolve.models - threeway: a boolean flag for using threeway comparisons to estimate distances. default False. Ignored if do_pair_align is True. - do_pair_align: if the input sequences are to be pairwise aligned first and then the distance will be estimated. A pair HMM based on the submodel will be used. - rigorous_align: if True the pairwise alignments are actually numerically optimised, otherwise the current substitution model settings are used. This slows down estimation considerably. - est_params: substitution model parameters to save estimates from in addition to length (distance) - modify_lf: a callback function for that takes a likelihood function (with alignment set) and modifies it. Can be used to configure local_params, set bounds, optimise using a restriction for faster performance. Note: Unless you know a priori your alignment will be flush ended (meaning no sequence has terminal gaps) it is advisable to construct a substitution model that recodes gaps. Otherwise the terminal gaps will significantly bias the estimation of branch lengths when using do_pair_align. """ if do_pair_align: self.__threeway = False else: # whether pairwise is to be estimated from 3-way self.__threeway = [threeway, False][do_pair_align] self.__seq_collection = seqs self.__seqnames = seqs.getSeqNames() self.__motif_probs = motif_probs # the following may be pairs or three way combinations self.__combination_aligns = None self._do_pair_align = do_pair_align self._rigorous_align = rigorous_align # substitution model stuff self.__sm = submodel self._modify_lf = modify_lf # store for the results self.__param_ests = {} self.__est_params = list(est_params or []) self.__run = False # a flag indicating whether estimation completed # whether we're on the master CPU or not self._on_master_cpu = parallel.getCommunicator().Get_rank() == 0
def __init__(self, filename, interval=None, noisy=True): if interval is None: interval = 1800 self.filename = filename self.interval = interval self.last_time = time.time() self.noisy = noisy self._redundant = parallel.getCommunicator().Get_rank() > 0
def run(self, ui, **opt_args): # Sets self.observed and self.results (a list _numreplicates long) to # whatever is returned from self.simplify([LF result from each PC]). # self.simplify() is used as the entire LF result might not be picklable # for MPI. Subclass must provide self.alignment and # self.parameter_controllers if 'random_series' not in opt_args and not opt_args.get('local', None): opt_args['random_series'] = random.Random() null_pc = self.parameter_controllers[0] pcs = len(self.parameter_controllers) if pcs == 1: model_label = [''] elif pcs == 2: model_label = ['null', 'alt '] else: model_label = ['null'] + ['alt%s'%i for i in range(1,pcs)] @UI.display_wrap def each_model(alignment, ui): def one_model(pc): pc.setAlignment(alignment) return pc.optimise(return_calculator=True, **opt_args) # This is not done in parallel because we depend on the side- # effect of changing the parameter_controller current values memos = ui.eager_map(one_model, self.parameter_controllers, labels=model_label, pure=False) concise_result = self.simplify(*self.parameter_controllers) return (memos, concise_result) #optimisations = pcs * (self._numreplicates + 1) init_work = pcs / (self._numreplicates + pcs) ui.display('Original data', 0.0, init_work) (starting_points, self.observed) = each_model(self.alignment) ui.display('Randomness', init_work, 0.0) alignment_random_state = random.Random(self.seed).getstate() if self.seed is None: comm = parallel.getCommunicator() alignment_random_state = comm.bcast(alignment_random_state, 0) def one_replicate(i): for (pc, start_point) in zip(self.parameter_controllers, starting_points): # may have fewer CPUs per replicate than for original pc.setupParallelContext() # using a calculator as a memo object to reset the params pc.updateFromCalculator(start_point) aln_rnd = random.Random(0) aln_rnd.setstate(alignment_random_state) aln_rnd.jumpahead(i*10**9) simalign = null_pc.simulateAlignment(random_series=aln_rnd) (dummy, result) = each_model(simalign) return result ui.display('Bootstrap', init_work) self.results = ui.eager_map( one_replicate, range(self._numreplicates), noun='replicate', start=init_work)
def __init__(self, seqs, submodel, threeway=False, motif_probs = None, do_pair_align=False, rigorous_align=False, est_params=None, modify_lf=None): """Arguments: - seqs: an Alignment or SeqCollection instance with > 1 sequence - submodel: substitution model object Predefined models can be imported from cogent.evolve.models - threeway: a boolean flag for using threeway comparisons to estimate distances. default False. Ignored if do_pair_align is True. - do_pair_align: if the input sequences are to be pairwise aligned first and then the distance will be estimated. A pair HMM based on the submodel will be used. - rigorous_align: if True the pairwise alignments are actually numerically optimised, otherwise the current substitution model settings are used. This slows down estimation considerably. - est_params: substitution model parameters to save estimates from in addition to length (distance) - modify_lf: a callback function for that takes a likelihood function (with alignment set) and modifies it. Can be used to configure local_params, set bounds, optimise using a restriction for faster performance. Note: Unless you know a priori your alignment will be flush ended (meaning no sequence has terminal gaps) it is advisable to construct a substitution model that recodes gaps. Otherwise the terminal gaps will significantly bias the estimation of branch lengths when using do_pair_align. """ if do_pair_align: self._threeway = False else: # whether pairwise is to be estimated from 3-way self._threeway = [threeway, False][do_pair_align] self._seq_collection = seqs self._seqnames = seqs.getSeqNames() self._motif_probs = motif_probs # the following may be pairs or three way combinations self._combination_aligns = None self._do_pair_align = do_pair_align self._rigorous_align = rigorous_align # substitution model stuff self._sm = submodel self._modify_lf = modify_lf # store for the results self._param_ests = {} self._est_params = list(est_params or []) self._run = False # a flag indicating whether estimation completed # whether we're on the master CPU or not self._on_master_cpu = parallel.getCommunicator().Get_rank() == 0
def measureEvalsPerSecond(self, time_limit=1.0, wall=True, sa=False): # Returns an estimate of the number of evaluations per second # an each-optpar-in-turn simulated annealing type optimiser # can achive, spending not much more than 'time_limit' doing # so. 'wall'=False causes process time to be used instead of # wall time. # 'sa' makes it simulated-annealing-like, with frequent backtracks if wall: now = time.time else: now = time.clock x = self.getValueArray() samples = [] elapsed = 0.0 rounds_per_sample = 2 comm = parallel.getCommunicator() while elapsed < time_limit and len(samples) < 5: time.sleep(0.01) t0 = now() last = [] for j in range(rounds_per_sample): for (i,v) in enumerate(x): # Not a real change, but works like one. self.change(last + [(i, v)]) if sa and (i+j) % 2: last = [(i, v)] else: last = [] # Use one agreed on delta otherwise different cpus will finish the # loop at different times causing chaos. delta = comm.allreduce(now()-t0, parallel.MPI.MAX) if delta < 0.1: # time.clock is low res, so need to ensure each sample # is long enough to take SOME time. rounds_per_sample *= 2 continue else: rate = rounds_per_sample * len(x) / delta samples.append(rate) elapsed += delta if wall: samples.sort() return samples[len(samples)//2] else: return sum(samples) / len(samples)
def measureEvalsPerSecond(self, time_limit=1.0, wall=True, sa=False): # Returns an estimate of the number of evaluations per second # an each-optpar-in-turn simulated annealing type optimiser # can achive, spending not much more than 'time_limit' doing # so. 'wall'=False causes process time to be used instead of # wall time. # 'sa' makes it simulated-annealing-like, with frequent backtracks if wall: now = time.time else: now = time.clock x = self.getValueArray() samples = [] elapsed = 0.0 rounds_per_sample = 2 comm = parallel.getCommunicator() while elapsed < time_limit and len(samples) < 5: time.sleep(0.01) t0 = now() last = [] for j in range(rounds_per_sample): for (i, v) in enumerate(x): # Not a real change, but works like one. self.change(last + [(i, v)]) if sa and (i + j) % 2: last = [(i, v)] else: last = [] # Use one agreed on delta otherwise different cpus will finish the # loop at different times causing chaos. delta = comm.allreduce(now() - t0, parallel.MPI.MAX) if delta < 0.1: # time.clock is low res, so need to ensure each sample # is long enough to take SOME time. rounds_per_sample *= 2 continue else: rate = rounds_per_sample * len(x) / delta samples.append(rate) elapsed += delta if wall: samples.sort() return samples[len(samples) // 2] else: return sum(samples) / len(samples)
def setupRootUiContext(progressBarConstructor=None, rate=None): """Select a UI Context type depending on system environment""" if parallel.getCommunicator().Get_rank() != 0: klass = None elif progressBarConstructor is not None: klass = progressBarConstructor elif curses_terminal and sys.stdout.isatty(): klass = CursesTerminalProgressBar elif isinstance(sys.stdout, file): klass = LogFileOutput if rate is None: rate = 5.0 else: klass = None if klass is None: CURRENT.context = NULL_CONTEXT else: if rate is None: rate = 0.1 CURRENT.context = RootProgressContext(klass, rate)
# def asym_predicate((a,b)): # print a, b, 'a' in a # return 'a' in a # mA = Codon() # mA.setPredicates({'asym': asym_predicate}) def exponentiator_switch(switch): import cogent.evolve.substitution_calculation cogent.evolve.substitution_calculation.use_new = switch import sys if "relative" in sys.argv: test = CompareImplementations(exponentiator_switch) else: test = evals_per_sec parallel.inefficiency_forgiven = True if parallel.getCommunicator().Get_rank() > 0: # benchmarks(test) quiet(benchmarks, test) else: try: benchmarks(test) except KeyboardInterrupt: print " OK"
def dp(self, TM, dp_options, cells=None, backward=False): """Score etc. from a Dynamic Programming function applied to this pair. TM - (state_directions, array) describing the Transition Matrix. dp_options - instance of DPFlags indicating algorithm etc. cells - List of (state, posn) for which posterior probs are requested. backward - run algorithm in reverse order. """ (state_directions, T) = TM if dp_options.viterbi and cells is None: encoder = self.pair.getPointerEncoding(len(T)) problem_dimensions = self.pair.size + [len(T)] problem_size = numpy.product(problem_dimensions) memory = problem_size * encoder.bytes / 10**6 if dp_options.local: msg = 'Local alignment' elif cells is not None: msg = 'Posterior probs' elif self.pair.size[0]-2 >= 3 and not backward and ( problem_size > HIRSCHBERG_LIMIT or parallel.getCommunicator().Get_size() > 1): return self.hirschberg(TM, dp_options) else: msg = 'dp' if memory > 500: warnings.warn('%s will use > %sMb.' % (msg, memory)) track = encoder.getEmptyArray(problem_dimensions) else: track = encoder = None kw = dict( use_scaling=dp_options.use_scaling, use_logs=dp_options.use_logs, viterbi=dp_options.viterbi, local=dp_options.local) if dp_options.backward: backward = not backward if backward: pair = self.pair.backward() origT = T T = numpy.zeros(T.shape, float) T[1:-1,1:-1] = numpy.transpose(origT[1:-1,1:-1]) T[0,:] = origT[:, -1] T[:,-1] = origT[0,:] else: pair = self.pair if dp_options.use_logs: T = numpy.log(T) scores = self._getEmissionProbs( dp_options.use_logs, dp_options.use_cost_function) rows = pair.getEmptyScoreArrays(len(T), dp_options) if cells is not None: assert not dp_options.local result = self._calc_global_probs( pair, scores, kw, state_directions, T, rows, cells, backward) else: (M, N) = pair.size if dp_options.local: (maxpos, state, score) = pair.calcRows(1, M-1, 1, N-1, state_directions, T, scores, rows, track, encoder, **kw) else: pair.calcRows(0, M-1, 0, N-1, state_directions, T, scores, rows, track, encoder, **kw) end_state_only = numpy.array([(len(T)-1, 0, 1, 1)]) (maxpos, state, score) = pair.calcRows(M-1, M, N-1, N, end_state_only, T, scores, rows, track, encoder, **kw) if track is None: result = score else: tb = self.pair.traceback(track, encoder, maxpos, state, skip_last = not dp_options.local) result = (score, tb) return result
#def asym_predicate((a,b)): # print a, b, 'a' in a # return 'a' in a #mA = Codon() #mA.setPredicates({'asym': asym_predicate}) def exponentiator_switch(switch): import cogent.evolve.substitution_calculation cogent.evolve.substitution_calculation.use_new = switch import sys if 'relative' in sys.argv: test = CompareImplementations(exponentiator_switch) else: test = evals_per_sec parallel.inefficiency_forgiven = True if parallel.getCommunicator().Get_rank() > 0: #benchmarks(test) quiet(benchmarks, test) else: try: benchmarks(test) except KeyboardInterrupt: print(' OK')
#def asym_predicate((a,b)): # print a, b, 'a' in a # return 'a' in a #mA = Codon() #mA.setPredicates({'asym': asym_predicate}) def exponentiator_switch(switch): import cogent.evolve.substitution_calculation cogent.evolve.substitution_calculation.use_new = switch import sys if 'relative' in sys.argv: test = CompareImplementations(exponentiator_switch) else: test = evals_per_sec parallel.inefficiency_forgiven = True if parallel.getCommunicator().rank > 0: #benchmarks(test) quiet(benchmarks, test) else: try: benchmarks(test) except KeyboardInterrupt: print ' OK'
def silly_predicate(a,b): return a.count('A') > a.count('T') or b.count('A') > b.count('T') #def asym_predicate((a,b)): # print a, b, 'a' in a # return 'a' in a #mA = Codon() #mA.setPredicates({'asym': asym_predicate}) def exponentiator_switch(switch): import cogent.evolve.substitution_calculation cogent.evolve.substitution_calculation.use_new = switch import sys if 'relative' in sys.argv: test = CompareImplementations(exponentiator_switch) else: test = evals_per_sec parallel.inefficiency_forgiven = True if parallel.getCommunicator().rank > 0: #benchmarks(test) quiet(benchmarks, test) else: try: benchmarks(test) except KeyboardInterrupt: print ' OK'