def process(ntaxa, nseconds, branch_length_sampler): """ @param ntaxa: the number of taxa in the sampled trees @param nseconds: allow this many seconds to run or None to run forever @param branch_length_sampler: a functor that returns a branch length and has a string cast @return: a multi-line string that summarizes the results """ start_time = time.time() # initialize some state that will be tracked over the entire run degenerate_count = 0 invalid_split_count = 0 valid_split_count = 0 spectral_error_count = 0 atteson_error_count = 0 counterexample_D = None counterexample_tree = None # do a bunch of reconstructions from sampled distance matrices try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample the tree topology and get its set of implied full label splits tree = TreeSampler.sample_agglomerated_tree(ntaxa) true_splits = tree.get_nontrivial_splits() # sample the branch lengths for branch in tree.get_branches(): branch.length = branch_length_sampler() # sample the atteson distance matrix D = sample_atteson_distance_matrix(tree) # assert that the atteson condition is true if not BuildTreeTopology.is_atteson(tree, D): atteson_error_count += 1 else: try: # see if the eigensplit is in the set of true splits eigensplit = BuildTreeTopology.split_using_eigenvector(D) if eigensplit in true_splits: valid_split_count += 1 else: invalid_split_count += 1 counterexample_D = D counterexample_tree = tree break except BuildTreeTopology.DegenerateSplitException, e: degenerate_count += 1 except BuildTreeTopology.InvalidSpectralSplitException, e: spectral_error_count += 1
def do_search(self, nseconds, sampling_function): """ @param nseconds: allowed search time or None @param sampling_function: a function that samples a branch length @return: True if a tree was found that met the criteria """ if not self.is_initialized(): raise RuntimeError("the search was not sufficiently initialized") true_splits = self.tree.get_nontrivial_splits() start_time = time.time() while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: return False # assign new sampled branch lengths for branch in self.tree.get_branches(): branch.length = sampling_function() # get the distance matrix so we can use a library function to get the split D = np.array(self.tree.get_distance_matrix()) ntips = len(D) # get the Laplacian matrix of the full tree and the corresponding Fiedler split of the leaves if self.force_difference or self.informative_full_split: A_aug = np.array(self.tree.get_weighted_adjacency_matrix(self.id_to_index)) L_aug = Euclid.adjacency_to_laplacian(A_aug) v_aug = BuildTreeTopology.laplacian_to_fiedler(L_aug) left_aug, right_aug = BuildTreeTopology.eigenvector_to_split(v_aug) left = [x for x in left_aug if x in range(ntips)] right = [x for x in right_aug if x in range(ntips)] leaf_eigensplit_aug = BuildTreeTopology.make_split(left, right) if self.force_difference: if leaf_eigensplit_aug == self.desired_primary_split: self.aug_split_collision_count += 1 continue if self.informative_full_split: if min(len(s) for s in leaf_eigensplit_aug) < 2: self.aug_split_degenerate_count += 1 continue # get the eigensplit try: eigensplit = BuildTreeTopology.split_using_eigenvector(D) except BuildTreeTopology.DegenerateSplitException, e: self.degenerate_primary_split_count += 1 continue except BuildTreeTopology.InvalidSpectralSplitException, e: self.error_primary_split_count += 1 continue
def do_search(self, nseconds, sampling_function): """ @param nseconds: allowed search time or None @param sampling_function: a function that samples a branch length @return: True if a tree was found that met the criteria """ if not self.is_initialized(): raise RuntimeError('the search was not sufficiently initialized') true_splits = self.tree.get_nontrivial_splits() start_time = time.time() while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: return False # assign new sampled branch lengths for branch in self.tree.get_branches(): branch.length = sampling_function() # get the distance matrix so we can use a library function to get the split D = np.array(self.tree.get_distance_matrix()) ntips = len(D) # get the Laplacian matrix of the full tree and the corresponding Fiedler split of the leaves if self.force_difference or self.informative_full_split: A_aug = np.array(self.tree.get_weighted_adjacency_matrix(self.id_to_index)) L_aug = Euclid.adjacency_to_laplacian(A_aug) v_aug = BuildTreeTopology.laplacian_to_fiedler(L_aug) left_aug, right_aug = BuildTreeTopology.eigenvector_to_split(v_aug) left = [x for x in left_aug if x in range(ntips)] right = [x for x in right_aug if x in range(ntips)] leaf_eigensplit_aug = BuildTreeTopology.make_split(left, right) if self.force_difference: if leaf_eigensplit_aug == self.desired_primary_split: self.aug_split_collision_count += 1 continue if self.informative_full_split: if min(len(s) for s in leaf_eigensplit_aug) < 2: self.aug_split_degenerate_count += 1 continue # get the eigensplit try: eigensplit = BuildTreeTopology.split_using_eigenvector(D) except BuildTreeTopology.DegenerateSplitException, e: self.degenerate_primary_split_count += 1 continue except BuildTreeTopology.InvalidSpectralSplitException, e: self.error_primary_split_count += 1 continue
# see if the bottom up reconstruction was successful nj_splits = BuildTreeTopology.get_splits(D, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj) nj_success = (nj_splits == true_splits) # note the joint results of the two reconstruction methods if top_down_success and nj_success: incr_attribute(attribute_array, 'nsuccesses.both') elif (not top_down_success) and (not nj_success): incr_attribute(attribute_array, 'nsuccesses.neither') elif top_down_success and (not nj_success): incr_attribute(attribute_array, 'nsuccesses.topdown.only') elif (not top_down_success) and nj_success: incr_attribute(attribute_array, 'nsuccesses.nj.only') # characterize the result of the first spectral split try: eigensplit = BuildTreeTopology.split_using_eigenvector(D) if eigensplit in true_splits: incr_attribute(attribute_array, 'first.split.informative') else: incr_attribute(attribute_array, 'first.split.invalid') except BuildTreeTopology.DegenerateSplitException, e: incr_attribute(attribute_array, 'first.split.uninformative') # return the attribute array return attribute_array def process(ntaxa, nseconds, nlengths, nsamples, nj_like, branch_length_sampler, use_pbar): """ @param ntaxa: the number of taxa per tree @param nseconds: stop after this many seconds
incr_attribute(attribute_array, 'nsamples.accepted.atteson') # see if the bottom up reconstruction was successful nj_splits = BuildTreeTopology.get_splits(D, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj) nj_success = (nj_splits == true_splits) # note the joint results of the two reconstruction methods if top_down_success and nj_success: incr_attribute(attribute_array, 'nsuccesses.both') elif (not top_down_success) and (not nj_success): incr_attribute(attribute_array, 'nsuccesses.neither') elif top_down_success and (not nj_success): incr_attribute(attribute_array, 'nsuccesses.topdown.only') elif (not top_down_success) and nj_success: incr_attribute(attribute_array, 'nsuccesses.nj.only') # characterize the result of the first spectral split try: eigensplit = BuildTreeTopology.split_using_eigenvector(D) if eigensplit in true_splits: incr_attribute(attribute_array, 'first.split.informative') else: incr_attribute(attribute_array, 'first.split.invalid') except BuildTreeTopology.DegenerateSplitException, e: incr_attribute(attribute_array, 'first.split.uninformative') # return the attribute array return attribute_array def process(ntaxa, nseconds, nlengths, nsamples, nj_like, branch_length_sampler, use_pbar): """ @param ntaxa: the number of taxa per tree @param nseconds: stop after this many seconds @param nlengths: use this many different sequence lengths @param nsamples: stop after this many samples per sequence length
def process(ntaxa, nseconds, seqlen, nsamples, branch_length_sampler, use_pbar): """ @param ntaxa: the number of taxa per tree @param nseconds: stop after this many seconds @param seqlen: use this sequence length @param nsamples: stop after this many samples per sequence length @param branch_length_sampler: this function samples branch lengths independently @param use_pbar: True iff a progress bar should be used @return: a multi-line string of the contents of an R table """ # initialize the global rejection counts nrejected_zero = 0 nrejected_inf = 0 nrejected_fail = 0 naccepted = 0 # Initialize the accumulation matrix. # The rows specify the size of the smaller side of the initial split. # The columns specify the compatibility status of the split. nsmall_sizes = (ntaxa / 2) + 1 accum = np.zeros((nsmall_sizes, 2), dtype=np.int) # Repeatedly analyze samples. # We might have to stop early if we run out of time or if ctrl-c is pressed. # If we have to stop early, then show the results of the progress so far. termination_reason = 'no reason for termination was given' start_time = time.time() pbar = Progress.Bar(nsamples) if use_pbar else None try: for sample_index in range(nsamples): # keep trying to get an accepted sample while True: # check the time if nseconds and time.time() - start_time > nseconds: raise TimeoutError() # first sample a tree and get its set of informative splits tree = TreeSampler.sample_agglomerated_tree(ntaxa) true_splits = tree.get_nontrivial_splits() # sample the branch lengths for branch in tree.get_branches(): branch.length = branch_length_sampler() # Attempt to sample a distance matrix. # If the sample was rejected then note the reason and go back to the drawing board. try: D = sample_distance_matrix(tree, seqlen) except InfiniteDistanceError as e: nrejected_inf += 1 continue except ZeroDistanceError as e: nrejected_zero += 1 continue # Attempt to estimate the primary split of the tree from the distance matrix. # If there was a technical failure then note it and go back to the drawing board. # Otherwise note the compatibility and balance of the split. try: eigensplit = BuildTreeTopology.split_using_eigenvector(D) small_size = min(len(side) for side in eigensplit) if eigensplit in true_splits: compatibility = 1 else: compatibility = 0 except BuildTreeTopology.DegenerateSplitException, e: small_size = 0 compatibility = 1 except BuildTreeTopology.InvalidSpectralSplitException, e: nrejected_fail += 1 continue