def process(ntaxa, length, nseconds, builders, branch_length_sampler): """ @param ntaxa: the number of taxa in the sampled trees @param length: the length of sequences used to sample the distance matrix @param nseconds: allow this many seconds to run @param builders: tree builder objects @param branch_length_sampler: returns a tree drawn from some distribution @return: a multi-line string that summarizes the results """ start_time = time.time() # track the number of samples that failed for various reasons n_zero_errors = 0 n_infinite_errors = 0 n_failed_spectral_splits = 0 # define the number of attempts that fall into each of the four categories non_atteson_results = [[0, 0], [0, 0]] atteson_results = [[0, 0], [0, 0]] #pachter_results = [[0, 0], [0, 0]] # evaluate the quality of reconstructions from a bunch of different samples try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample the tree topology and get its set of implied full label splits tree = TreeSampler.sample_agglomerated_tree(ntaxa) true_splits = tree.get_nontrivial_splits() # sample the branch lengths for branch in tree.get_branches(): branch.length = branch_length_sampler() try: D = sample_distance_matrix(tree, length) a, b = [ builder.evaluate(true_splits, D) for builder in builders ] if BuildTreeTopology.is_atteson(tree, D): atteson_results[a][b] += 1 #elif BuildTreeTopology.is_quartet_additive(tree, D) and BuildTreeTopology.is_quartet_consistent(tree, D): #pachter_results[a][b] += 1 else: non_atteson_results[a][b] += 1 except InfiniteDistanceError as e: n_infinite_errors += 1 except ZeroDistanceError as e: n_zero_errors += 1 except BuildTreeTopology.InvalidSpectralSplitException, e: n_failed_spectral_splits += 1 except KeyboardInterrupt, e: pass
def process(ntaxa, nseconds, branch_length_sampler): """ @param ntaxa: the number of taxa in the sampled trees @param nseconds: allow this many seconds to run or None to run forever @param branch_length_sampler: a functor that returns a branch length and has a string cast @return: a multi-line string that summarizes the results """ start_time = time.time() # initialize some state that will be tracked over the entire run degenerate_count = 0 invalid_split_count = 0 valid_split_count = 0 spectral_error_count = 0 atteson_error_count = 0 counterexample_D = None counterexample_tree = None # do a bunch of reconstructions from sampled distance matrices try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample the tree topology and get its set of implied full label splits tree = TreeSampler.sample_agglomerated_tree(ntaxa) true_splits = tree.get_nontrivial_splits() # sample the branch lengths for branch in tree.get_branches(): branch.length = branch_length_sampler() # sample the atteson distance matrix D = sample_atteson_distance_matrix(tree) # assert that the atteson condition is true if not BuildTreeTopology.is_atteson(tree, D): atteson_error_count += 1 else: try: # see if the eigensplit is in the set of true splits eigensplit = BuildTreeTopology.split_using_eigenvector(D) if eigensplit in true_splits: valid_split_count += 1 else: invalid_split_count += 1 counterexample_D = D counterexample_tree = tree break except BuildTreeTopology.DegenerateSplitException, e: degenerate_count += 1 except BuildTreeTopology.InvalidSpectralSplitException, e: spectral_error_count += 1
def process(ntaxa, length, nseconds, builders, branch_length_sampler): """ @param ntaxa: the number of taxa in the sampled trees @param length: the length of sequences used to sample the distance matrix @param nseconds: allow this many seconds to run @param builders: tree builder objects @param branch_length_sampler: returns a tree drawn from some distribution @return: a multi-line string that summarizes the results """ start_time = time.time() # track the number of samples that failed for various reasons n_zero_errors = 0 n_infinite_errors = 0 n_failed_spectral_splits = 0 # define the number of attempts that fall into each of the four categories non_atteson_results = [[0, 0], [0, 0]] atteson_results = [[0, 0], [0, 0]] #pachter_results = [[0, 0], [0, 0]] # evaluate the quality of reconstructions from a bunch of different samples try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample the tree topology and get its set of implied full label splits tree = TreeSampler.sample_agglomerated_tree(ntaxa) true_splits = tree.get_nontrivial_splits() # sample the branch lengths for branch in tree.get_branches(): branch.length = branch_length_sampler() try: D = sample_distance_matrix(tree, length) a, b = [builder.evaluate(true_splits, D) for builder in builders] if BuildTreeTopology.is_atteson(tree, D): atteson_results[a][b] += 1 #elif BuildTreeTopology.is_quartet_additive(tree, D) and BuildTreeTopology.is_quartet_consistent(tree, D): #pachter_results[a][b] += 1 else: non_atteson_results[a][b] += 1 except InfiniteDistanceError as e: n_infinite_errors += 1 except ZeroDistanceError as e: n_zero_errors += 1 except BuildTreeTopology.InvalidSpectralSplitException, e: n_failed_spectral_splits += 1 except KeyboardInterrupt, e: pass
def process(ntaxa, length, nseconds, branch_length_sampler, use_nj, use_modified_nj, use_all_spectral, use_one_spectral): """ @param ntaxa: the number of taxa in the sampled trees @param length: the length of sequences used to sample the distance matrix @param nseconds: allow this many seconds to run or None to run forever @param branch_length_sampler: a functor that returns a branch length and has a string cast @return: a multi-line string that summarizes the results """ start_time = time.time() # initialize the builder object builder = Builder() # track the number of samples that failed for various reasons n_zero_errors = 0 n_infinite_errors = 0 n_failed_spectral_splits = 0 # do a bunch of reconstructions of sampled distance matrices try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample the tree topology and get its set of implied full label splits tree = TreeSampler.sample_agglomerated_tree(ntaxa) true_splits = tree.get_nontrivial_splits() # sample the branch lengths for branch in tree.get_branches(): branch.length = branch_length_sampler() try: D = sample_distance_matrix(tree, length) # determine whether or not the distance matrix is Atteson with respect to the tree atteson = BuildTreeTopology.is_atteson(tree, D) # record information about the splits builder.evaluate(true_splits, D, atteson, use_nj, use_modified_nj, use_all_spectral, use_one_spectral) except InfiniteDistanceError as e: n_infinite_errors += 1 except ZeroDistanceError as e: n_zero_errors += 1 except BuildTreeTopology.InvalidSpectralSplitException, e: n_failed_spectral_splits += 1 except KeyboardInterrupt, e: pass
# see if the top down reconstruction was successful try: splitter = BuildTreeTopology.split_using_eigenvector_with_nj_fallback if nj_like: updater = BuildTreeTopology.update_generalized_nj else: updater = BuildTreeTopology.update_using_laplacian all_spectral_splits = BuildTreeTopology.get_splits( D, splitter, updater) top_down_success = (all_spectral_splits == true_splits) except BuildTreeTopology.InvalidSpectralSplitException, e: return incr_attribute(attribute_array, 'nsamples.rejected.fail') # at this point the sample is accepted incr_attribute(attribute_array, 'nsamples.accepted') # determine whether or not the distance matrix is Atteson with respect to the tree if BuildTreeTopology.is_atteson(tree, D): incr_attribute(attribute_array, 'nsamples.accepted.atteson') # see if the bottom up reconstruction was successful nj_splits = BuildTreeTopology.get_splits(D, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj) nj_success = (nj_splits == true_splits) # note the joint results of the two reconstruction methods if top_down_success and nj_success: incr_attribute(attribute_array, 'nsuccesses.both') elif (not top_down_success) and (not nj_success): incr_attribute(attribute_array, 'nsuccesses.neither') elif top_down_success and (not nj_success): incr_attribute(attribute_array, 'nsuccesses.topdown.only') elif (not top_down_success) and nj_success: incr_attribute(attribute_array, 'nsuccesses.nj.only') # characterize the result of the first spectral split
return incr_attribute(attribute_array, 'nsamples.rejected.fail') # see if the top down reconstruction was successful try: splitter = BuildTreeTopology.split_using_eigenvector_with_nj_fallback if nj_like: updater = BuildTreeTopology.update_generalized_nj else: updater = BuildTreeTopology.update_using_laplacian all_spectral_splits = BuildTreeTopology.get_splits(D, splitter, updater) top_down_success = (all_spectral_splits == true_splits) except BuildTreeTopology.InvalidSpectralSplitException, e: return incr_attribute(attribute_array, 'nsamples.rejected.fail') # at this point the sample is accepted incr_attribute(attribute_array, 'nsamples.accepted') # determine whether or not the distance matrix is Atteson with respect to the tree if BuildTreeTopology.is_atteson(tree, D): incr_attribute(attribute_array, 'nsamples.accepted.atteson') # see if the bottom up reconstruction was successful nj_splits = BuildTreeTopology.get_splits(D, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj) nj_success = (nj_splits == true_splits) # note the joint results of the two reconstruction methods if top_down_success and nj_success: incr_attribute(attribute_array, 'nsuccesses.both') elif (not top_down_success) and (not nj_success): incr_attribute(attribute_array, 'nsuccesses.neither') elif top_down_success and (not nj_success): incr_attribute(attribute_array, 'nsuccesses.topdown.only') elif (not top_down_success) and nj_success: incr_attribute(attribute_array, 'nsuccesses.nj.only') # characterize the result of the first spectral split try: