示例#1
0
    def __init__(self,
                 seqs,
                 submodel,
                 threeway=False,
                 motif_probs=None,
                 do_pair_align=False,
                 rigorous_align=False,
                 est_params=None,
                 modify_lf=None):
        """Arguments:
            - seqs: an Alignment or SeqCollection instance with > 1 sequence
            - submodel: substitution model object Predefined models can
              be imported from cogent.evolve.models
            - threeway: a boolean flag for using threeway comparisons to
              estimate distances. default False. Ignored if do_pair_align is
              True.
            - do_pair_align: if the input sequences are to be pairwise aligned
              first and then the distance will be estimated. A pair HMM based
              on the submodel will be used.
            - rigorous_align: if True the pairwise alignments are actually
              numerically optimised, otherwise the current substitution model
              settings are used. This slows down estimation considerably.
            - est_params: substitution model parameters to save estimates from
              in addition to length (distance)
            - modify_lf: a callback function for that takes a likelihood
              function (with alignment set) and modifies it. Can be used to
              configure local_params, set bounds, optimise using a restriction
              for faster performance.
        
        Note: Unless you know a priori your alignment will be flush ended
        (meaning no sequence has terminal gaps) it is advisable to construct a
        substitution model that recodes gaps. Otherwise the terminal gaps will
        significantly bias the estimation of branch lengths when using
        do_pair_align.
        """

        if do_pair_align:
            self.__threeway = False
        else:
            # whether pairwise is to be estimated from 3-way
            self.__threeway = [threeway, False][do_pair_align]

        self.__seq_collection = seqs
        self.__seqnames = seqs.getSeqNames()
        self.__motif_probs = motif_probs
        # the following may be pairs or three way combinations
        self.__combination_aligns = None
        self._do_pair_align = do_pair_align
        self._rigorous_align = rigorous_align
        # substitution model stuff
        self.__sm = submodel

        self._modify_lf = modify_lf
        # store for the results
        self.__param_ests = {}
        self.__est_params = list(est_params or [])

        self.__run = False  # a flag indicating whether estimation completed
        # whether we're on the master CPU or not
        self._on_master_cpu = parallel.getCommunicator().Get_rank() == 0
示例#2
0
 def __init__(self, filename, interval=None, noisy=True):
     if interval is None:
         interval = 1800
     self.filename = filename
     self.interval = interval
     self.last_time = time.time()
     self.noisy = noisy
     self._redundant = parallel.getCommunicator().Get_rank() > 0
示例#3
0
 def __init__(self, filename, interval=None, noisy=True):
     if interval is None:
         interval = 1800
     self.filename = filename
     self.interval = interval
     self.last_time = time.time()
     self.noisy = noisy
     self._redundant = parallel.getCommunicator().Get_rank() > 0
示例#4
0
 def run(self, ui, **opt_args):
     # Sets self.observed and self.results (a list _numreplicates long) to
     # whatever is returned from self.simplify([LF result from each PC]).
     # self.simplify() is used as the entire LF result might not be picklable
     # for MPI. Subclass must provide self.alignment and
     # self.parameter_controllers
     if 'random_series' not in opt_args and not opt_args.get('local', None):
         opt_args['random_series'] = random.Random()
     
     null_pc = self.parameter_controllers[0]
     pcs = len(self.parameter_controllers)
     if pcs == 1:
         model_label = ['']
     elif pcs == 2:
         model_label = ['null', 'alt ']
     else:
         model_label = ['null'] + ['alt%s'%i for i in range(1,pcs)]
     
     @UI.display_wrap
     def each_model(alignment, ui):
         def one_model(pc):
             pc.setAlignment(alignment)
             return pc.optimise(return_calculator=True, **opt_args)
         # This is not done in parallel because we depend on the side-
         # effect of changing the parameter_controller current values 
         memos = ui.eager_map(one_model, self.parameter_controllers, 
                 labels=model_label, pure=False)
         concise_result = self.simplify(*self.parameter_controllers)
         return (memos, concise_result)
     
     #optimisations = pcs * (self._numreplicates + 1)
     init_work = pcs / (self._numreplicates + pcs)
     ui.display('Original data', 0.0, init_work)
     (starting_points, self.observed) = each_model(self.alignment)
     
     ui.display('Randomness', init_work, 0.0)
     alignment_random_state = random.Random(self.seed).getstate()
     if self.seed is None:
         comm  = parallel.getCommunicator()
         alignment_random_state = comm.bcast(alignment_random_state, 0)
     
     def one_replicate(i):
         for (pc, start_point) in zip(self.parameter_controllers, starting_points):
             # may have fewer CPUs per replicate than for original
             pc.setupParallelContext()
             # using a calculator as a memo object to reset the params
             pc.updateFromCalculator(start_point)
         aln_rnd = random.Random(0)
         aln_rnd.setstate(alignment_random_state)
         aln_rnd.jumpahead(i*10**9)
         simalign = null_pc.simulateAlignment(random_series=aln_rnd)
         (dummy, result) = each_model(simalign)
         return result
     
     ui.display('Bootstrap', init_work)
     self.results = ui.eager_map(
             one_replicate, range(self._numreplicates), noun='replicate',
             start=init_work)
示例#5
0
 def run(self, ui, **opt_args):
     # Sets self.observed and self.results (a list _numreplicates long) to
     # whatever is returned from self.simplify([LF result from each PC]).
     # self.simplify() is used as the entire LF result might not be picklable
     # for MPI. Subclass must provide self.alignment and
     # self.parameter_controllers
     if 'random_series' not in opt_args and not opt_args.get('local', None):
         opt_args['random_series'] = random.Random()
     
     null_pc = self.parameter_controllers[0]
     pcs = len(self.parameter_controllers)
     if pcs == 1:
         model_label = ['']
     elif pcs == 2:
         model_label = ['null', 'alt ']
     else:
         model_label = ['null'] + ['alt%s'%i for i in range(1,pcs)]
     
     @UI.display_wrap
     def each_model(alignment, ui):
         def one_model(pc):
             pc.setAlignment(alignment)
             return pc.optimise(return_calculator=True, **opt_args)
         # This is not done in parallel because we depend on the side-
         # effect of changing the parameter_controller current values 
         memos = ui.eager_map(one_model, self.parameter_controllers, 
                 labels=model_label, pure=False)
         concise_result = self.simplify(*self.parameter_controllers)
         return (memos, concise_result)
     
     #optimisations = pcs * (self._numreplicates + 1)
     init_work = pcs / (self._numreplicates + pcs)
     ui.display('Original data', 0.0, init_work)
     (starting_points, self.observed) = each_model(self.alignment)
     
     ui.display('Randomness', init_work, 0.0)
     alignment_random_state = random.Random(self.seed).getstate()
     if self.seed is None:
         comm  = parallel.getCommunicator()
         alignment_random_state = comm.bcast(alignment_random_state, 0)
     
     def one_replicate(i):
         for (pc, start_point) in zip(self.parameter_controllers, starting_points):
             # may have fewer CPUs per replicate than for original
             pc.setupParallelContext()
             # using a calculator as a memo object to reset the params
             pc.updateFromCalculator(start_point)
         aln_rnd = random.Random(0)
         aln_rnd.setstate(alignment_random_state)
         aln_rnd.jumpahead(i*10**9)
         simalign = null_pc.simulateAlignment(random_series=aln_rnd)
         (dummy, result) = each_model(simalign)
         return result
     
     ui.display('Bootstrap', init_work)
     self.results = ui.eager_map(
             one_replicate, range(self._numreplicates), noun='replicate',
             start=init_work)
示例#6
0
 def __init__(self, seqs, submodel, threeway=False, motif_probs = None,
             do_pair_align=False, rigorous_align=False, est_params=None,
             modify_lf=None):
     """Arguments:
         - seqs: an Alignment or SeqCollection instance with > 1 sequence
         - submodel: substitution model object Predefined models can
           be imported from cogent.evolve.models
         - threeway: a boolean flag for using threeway comparisons to
           estimate distances. default False. Ignored if do_pair_align is
           True.
         - do_pair_align: if the input sequences are to be pairwise aligned
           first and then the distance will be estimated. A pair HMM based
           on the submodel will be used.
         - rigorous_align: if True the pairwise alignments are actually
           numerically optimised, otherwise the current substitution model
           settings are used. This slows down estimation considerably.
         - est_params: substitution model parameters to save estimates from
           in addition to length (distance)
         - modify_lf: a callback function for that takes a likelihood
           function (with alignment set) and modifies it. Can be used to
           configure local_params, set bounds, optimise using a restriction
           for faster performance.
     
     Note: Unless you know a priori your alignment will be flush ended
     (meaning no sequence has terminal gaps) it is advisable to construct a
     substitution model that recodes gaps. Otherwise the terminal gaps will
     significantly bias the estimation of branch lengths when using
     do_pair_align.
     """
     
     if do_pair_align:
         self._threeway = False
     else:
         # whether pairwise is to be estimated from 3-way
         self._threeway = [threeway, False][do_pair_align]
     
     self._seq_collection = seqs
     self._seqnames = seqs.getSeqNames()
     self._motif_probs = motif_probs
     
     # the following may be pairs or three way combinations
     self._combination_aligns = None
     self._do_pair_align = do_pair_align
     self._rigorous_align = rigorous_align
     
     # substitution model stuff
     self._sm = submodel
     self._modify_lf = modify_lf
     
     # store for the results
     self._param_ests = {}
     self._est_params = list(est_params or [])
     
     self._run = False # a flag indicating whether estimation completed
     # whether we're on the master CPU or not
     self._on_master_cpu = parallel.getCommunicator().Get_rank() == 0
示例#7
0
 def measureEvalsPerSecond(self, time_limit=1.0, wall=True, sa=False):
     # Returns an estimate of the number of evaluations per second
     # an each-optpar-in-turn simulated annealing type optimiser
     # can achive, spending not much more than 'time_limit' doing
     # so.  'wall'=False causes process time to be used instead of
     # wall time.
     # 'sa' makes it simulated-annealing-like, with frequent backtracks
     if wall:
         now = time.time
     else:
         now = time.clock
     x = self.getValueArray()
     samples = []
     elapsed = 0.0
     rounds_per_sample = 2
     comm = parallel.getCommunicator()
     while elapsed < time_limit and len(samples) < 5:
         time.sleep(0.01)
         t0 = now()
         last = []
         for j in range(rounds_per_sample):
             for (i,v) in enumerate(x):
                  # Not a real change, but works like one.
                 self.change(last + [(i, v)])
                 if sa and (i+j) % 2:
                     last = [(i, v)]
                 else:
                     last = []
         # Use one agreed on delta otherwise different cpus will finish the
         # loop at different times causing chaos.
         delta = comm.allreduce(now()-t0, parallel.MPI.MAX)
         if delta < 0.1:
             # time.clock is low res, so need to ensure each sample
             # is long enough to take SOME time.
             rounds_per_sample *= 2
             continue
         else:
             rate = rounds_per_sample * len(x) / delta
             samples.append(rate)
             elapsed += delta
     
     if wall:
         samples.sort()
         return samples[len(samples)//2]
     else:
         return sum(samples) / len(samples)
    def measureEvalsPerSecond(self, time_limit=1.0, wall=True, sa=False):
        # Returns an estimate of the number of evaluations per second
        # an each-optpar-in-turn simulated annealing type optimiser
        # can achive, spending not much more than 'time_limit' doing
        # so.  'wall'=False causes process time to be used instead of
        # wall time.
        # 'sa' makes it simulated-annealing-like, with frequent backtracks
        if wall:
            now = time.time
        else:
            now = time.clock
        x = self.getValueArray()
        samples = []
        elapsed = 0.0
        rounds_per_sample = 2
        comm = parallel.getCommunicator()
        while elapsed < time_limit and len(samples) < 5:
            time.sleep(0.01)
            t0 = now()
            last = []
            for j in range(rounds_per_sample):
                for (i, v) in enumerate(x):
                    # Not a real change, but works like one.
                    self.change(last + [(i, v)])
                    if sa and (i + j) % 2:
                        last = [(i, v)]
                    else:
                        last = []
            # Use one agreed on delta otherwise different cpus will finish the
            # loop at different times causing chaos.
            delta = comm.allreduce(now() - t0, parallel.MPI.MAX)
            if delta < 0.1:
                # time.clock is low res, so need to ensure each sample
                # is long enough to take SOME time.
                rounds_per_sample *= 2
                continue
            else:
                rate = rounds_per_sample * len(x) / delta
                samples.append(rate)
                elapsed += delta

        if wall:
            samples.sort()
            return samples[len(samples) // 2]
        else:
            return sum(samples) / len(samples)
示例#9
0
def setupRootUiContext(progressBarConstructor=None, rate=None):
    """Select a UI Context type depending on system environment"""
    if parallel.getCommunicator().Get_rank() != 0:
        klass = None
    elif progressBarConstructor is not None:
        klass = progressBarConstructor
    elif curses_terminal and sys.stdout.isatty():
        klass = CursesTerminalProgressBar
    elif isinstance(sys.stdout, file):
        klass = LogFileOutput
        if rate is None:
            rate = 5.0
    else:
        klass = None

    if klass is None:
        CURRENT.context = NULL_CONTEXT
    else:
        if rate is None:
            rate = 0.1
        CURRENT.context = RootProgressContext(klass, rate)
示例#10
0
def setupRootUiContext(progressBarConstructor=None, rate=None):
    """Select a UI Context type depending on system environment"""
    if parallel.getCommunicator().Get_rank() != 0:
        klass = None
    elif progressBarConstructor is not None:
        klass = progressBarConstructor
    elif curses_terminal and sys.stdout.isatty():
        klass = CursesTerminalProgressBar
    elif isinstance(sys.stdout, file):
        klass = LogFileOutput
        if rate is None:
            rate = 5.0
    else:
        klass = None
    
    if klass is None:
        CURRENT.context = NULL_CONTEXT
    else:
        if rate is None:
            rate = 0.1
        CURRENT.context = RootProgressContext(klass, rate)
示例#11
0
# def asym_predicate((a,b)):
#    print a, b, 'a' in a
#    return 'a' in a
# mA = Codon()
# mA.setPredicates({'asym': asym_predicate})


def exponentiator_switch(switch):
    import cogent.evolve.substitution_calculation

    cogent.evolve.substitution_calculation.use_new = switch


import sys

if "relative" in sys.argv:
    test = CompareImplementations(exponentiator_switch)
else:
    test = evals_per_sec

parallel.inefficiency_forgiven = True

if parallel.getCommunicator().Get_rank() > 0:
    # benchmarks(test)
    quiet(benchmarks, test)
else:
    try:
        benchmarks(test)
    except KeyboardInterrupt:
        print " OK"
示例#12
0
 def dp(self, TM, dp_options, cells=None, backward=False):
     """Score etc. from a Dynamic Programming function applied to this pair.
     
     TM - (state_directions, array) describing the Transition Matrix.
     dp_options - instance of DPFlags indicating algorithm etc.
     cells - List of (state, posn) for which posterior probs are requested.
     backward - run algorithm in reverse order.
     """
     (state_directions, T) = TM
     if dp_options.viterbi and cells is None:
         encoder = self.pair.getPointerEncoding(len(T))
         problem_dimensions = self.pair.size + [len(T)]
         problem_size = numpy.product(problem_dimensions)
         memory = problem_size * encoder.bytes / 10**6
         if dp_options.local:
             msg = 'Local alignment'
         elif cells is not None:
             msg = 'Posterior probs'
         elif self.pair.size[0]-2 >= 3 and not backward and (
                 problem_size > HIRSCHBERG_LIMIT or 
                 parallel.getCommunicator().Get_size() > 1):
              return self.hirschberg(TM, dp_options)
         else:
             msg = 'dp'
         if memory > 500:
             warnings.warn('%s will use > %sMb.' % (msg, memory))
         track = encoder.getEmptyArray(problem_dimensions)
     else:
         track = encoder = None
     
     kw = dict(
             use_scaling=dp_options.use_scaling,
             use_logs=dp_options.use_logs,
             viterbi=dp_options.viterbi,
             local=dp_options.local)
     
     if dp_options.backward:
         backward = not backward
     
     if backward:
         pair = self.pair.backward()
         origT = T
         T = numpy.zeros(T.shape, float)
         T[1:-1,1:-1] = numpy.transpose(origT[1:-1,1:-1])
         T[0,:] = origT[:, -1]
         T[:,-1] = origT[0,:]
     else:
         pair = self.pair
     
     if dp_options.use_logs:
         T = numpy.log(T)
     
     scores = self._getEmissionProbs(
             dp_options.use_logs, dp_options.use_cost_function)
     
     rows = pair.getEmptyScoreArrays(len(T), dp_options)
     
     if cells is not None:
         assert not dp_options.local
         result = self._calc_global_probs(
                 pair, scores, kw, state_directions, T, rows, cells,
                 backward)
     else:
         (M, N) = pair.size
         if dp_options.local:
             (maxpos, state, score) =  pair.calcRows(1, M-1, 1, N-1,
                 state_directions, T, scores, rows, track, encoder, **kw)
         else:
             pair.calcRows(0, M-1, 0, N-1,
                 state_directions, T, scores, rows, track, encoder, **kw)
             end_state_only = numpy.array([(len(T)-1, 0, 1, 1)])
             (maxpos, state, score) = pair.calcRows(M-1, M, N-1, N,
                 end_state_only, T, scores, rows, track, encoder, **kw)
                 
         if track is None:
             result = score
         else:
             tb = self.pair.traceback(track, encoder, maxpos, state,
                 skip_last = not dp_options.local)
             result = (score, tb)
     return result
示例#13
0

#def asym_predicate((a,b)):
#    print a, b, 'a' in a
#    return 'a' in a
#mA = Codon()
#mA.setPredicates({'asym': asym_predicate})


def exponentiator_switch(switch):
    import cogent.evolve.substitution_calculation
    cogent.evolve.substitution_calculation.use_new = switch


import sys
if 'relative' in sys.argv:
    test = CompareImplementations(exponentiator_switch)
else:
    test = evals_per_sec

parallel.inefficiency_forgiven = True

if parallel.getCommunicator().Get_rank() > 0:
    #benchmarks(test)
    quiet(benchmarks, test)
else:
    try:
        benchmarks(test)
    except KeyboardInterrupt:
        print(' OK')
示例#14
0
 def dp(self, TM, dp_options, cells=None, backward=False):
     """Score etc. from a Dynamic Programming function applied to this pair.
     
     TM - (state_directions, array) describing the Transition Matrix.
     dp_options - instance of DPFlags indicating algorithm etc.
     cells - List of (state, posn) for which posterior probs are requested.
     backward - run algorithm in reverse order.
     """
     (state_directions, T) = TM
     if dp_options.viterbi and cells is None:
         encoder = self.pair.getPointerEncoding(len(T))
         problem_dimensions = self.pair.size + [len(T)]
         problem_size = numpy.product(problem_dimensions)
         memory = problem_size * encoder.bytes / 10**6
         if dp_options.local:
             msg = 'Local alignment'
         elif cells is not None:
             msg = 'Posterior probs'
         elif self.pair.size[0]-2 >= 3 and not backward and (
                 problem_size > HIRSCHBERG_LIMIT or 
                 parallel.getCommunicator().Get_size() > 1):
              return self.hirschberg(TM, dp_options)
         else:
             msg = 'dp'
         if memory > 500:
             warnings.warn('%s will use > %sMb.' % (msg, memory))
         track = encoder.getEmptyArray(problem_dimensions)
     else:
         track = encoder = None
     
     kw = dict(
             use_scaling=dp_options.use_scaling,
             use_logs=dp_options.use_logs,
             viterbi=dp_options.viterbi,
             local=dp_options.local)
     
     if dp_options.backward:
         backward = not backward
     
     if backward:
         pair = self.pair.backward()
         origT = T
         T = numpy.zeros(T.shape, float)
         T[1:-1,1:-1] = numpy.transpose(origT[1:-1,1:-1])
         T[0,:] = origT[:, -1]
         T[:,-1] = origT[0,:]
     else:
         pair = self.pair
     
     if dp_options.use_logs:
         T = numpy.log(T)
     
     scores = self._getEmissionProbs(
             dp_options.use_logs, dp_options.use_cost_function)
     
     rows = pair.getEmptyScoreArrays(len(T), dp_options)
     
     if cells is not None:
         assert not dp_options.local
         result = self._calc_global_probs(
                 pair, scores, kw, state_directions, T, rows, cells,
                 backward)
     else:
         (M, N) = pair.size
         if dp_options.local:
             (maxpos, state, score) =  pair.calcRows(1, M-1, 1, N-1,
                 state_directions, T, scores, rows, track, encoder, **kw)
         else:
             pair.calcRows(0, M-1, 0, N-1,
                 state_directions, T, scores, rows, track, encoder, **kw)
             end_state_only = numpy.array([(len(T)-1, 0, 1, 1)])
             (maxpos, state, score) = pair.calcRows(M-1, M, N-1, N,
                 end_state_only, T, scores, rows, track, encoder, **kw)
                 
         if track is None:
             result = score
         else:
             tb = self.pair.traceback(track, encoder, maxpos, state,
                 skip_last = not dp_options.local)
             result = (score, tb)
     return result
示例#15
0

#def asym_predicate((a,b)):
#    print a, b, 'a' in a
#    return 'a' in a
#mA = Codon()
#mA.setPredicates({'asym': asym_predicate})


def exponentiator_switch(switch):
    import cogent.evolve.substitution_calculation
    cogent.evolve.substitution_calculation.use_new = switch


import sys
if 'relative' in sys.argv:
    test = CompareImplementations(exponentiator_switch)
else:
    test = evals_per_sec

parallel.inefficiency_forgiven = True

if parallel.getCommunicator().rank > 0:
    #benchmarks(test)
    quiet(benchmarks, test)
else:
    try:
        benchmarks(test)
    except KeyboardInterrupt:
        print ' OK'
示例#16
0
def silly_predicate(a,b):
    return a.count('A') > a.count('T') or b.count('A') > b.count('T')

#def asym_predicate((a,b)):
#    print a, b, 'a' in a
#    return 'a' in a
#mA = Codon()
#mA.setPredicates({'asym': asym_predicate})

def exponentiator_switch(switch):
    import cogent.evolve.substitution_calculation
    cogent.evolve.substitution_calculation.use_new = switch

import sys
if 'relative' in sys.argv:
    test = CompareImplementations(exponentiator_switch)
else:
    test = evals_per_sec

parallel.inefficiency_forgiven = True

if parallel.getCommunicator().rank > 0:
    #benchmarks(test)
    quiet(benchmarks, test)
else:
    try:
        benchmarks(test)
    except KeyboardInterrupt:
        print ' OK'