Exemplo n.º 1
0
def process(ntaxa, length, nseconds, builders, branch_length_sampler):
    """
    @param ntaxa: the number of taxa in the sampled trees
    @param length: the length of sequences used to sample the distance matrix
    @param nseconds: allow this many seconds to run
    @param builders: tree builder objects
    @param branch_length_sampler: returns a tree drawn from some distribution
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    # track the number of samples that failed for various reasons
    n_zero_errors = 0
    n_infinite_errors = 0
    n_failed_spectral_splits = 0
    # define the number of attempts that fall into each of the four categories
    non_atteson_results = [[0, 0], [0, 0]]
    atteson_results = [[0, 0], [0, 0]]
    #pachter_results = [[0, 0], [0, 0]]
    # evaluate the quality of reconstructions from a bunch of different samples
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample the tree topology and get its set of implied full label splits
            tree = TreeSampler.sample_agglomerated_tree(ntaxa)
            true_splits = tree.get_nontrivial_splits()
            # sample the branch lengths
            for branch in tree.get_branches():
                branch.length = branch_length_sampler()
            try:
                D = sample_distance_matrix(tree, length)
                a, b = [
                    builder.evaluate(true_splits, D) for builder in builders
                ]
                if BuildTreeTopology.is_atteson(tree, D):
                    atteson_results[a][b] += 1
                #elif BuildTreeTopology.is_quartet_additive(tree, D) and BuildTreeTopology.is_quartet_consistent(tree, D):
                #pachter_results[a][b] += 1
                else:
                    non_atteson_results[a][b] += 1
            except InfiniteDistanceError as e:
                n_infinite_errors += 1
            except ZeroDistanceError as e:
                n_zero_errors += 1
            except BuildTreeTopology.InvalidSpectralSplitException, e:
                n_failed_spectral_splits += 1
    except KeyboardInterrupt, e:
        pass
Exemplo n.º 2
0
def process(ntaxa, nseconds, branch_length_sampler):
    """
    @param ntaxa: the number of taxa in the sampled trees
    @param nseconds: allow this many seconds to run or None to run forever
    @param branch_length_sampler: a functor that returns a branch length and has a string cast
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    # initialize some state that will be tracked over the entire run
    degenerate_count = 0
    invalid_split_count = 0
    valid_split_count = 0
    spectral_error_count = 0
    atteson_error_count = 0
    counterexample_D = None
    counterexample_tree = None
    # do a bunch of reconstructions from sampled distance matrices
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample the tree topology and get its set of implied full label splits
            tree = TreeSampler.sample_agglomerated_tree(ntaxa)
            true_splits = tree.get_nontrivial_splits()
            # sample the branch lengths
            for branch in tree.get_branches():
                branch.length = branch_length_sampler()
            # sample the atteson distance matrix
            D = sample_atteson_distance_matrix(tree)
            # assert that the atteson condition is true
            if not BuildTreeTopology.is_atteson(tree, D):
                atteson_error_count += 1
            else:
                try:
                    # see if the eigensplit is in the set of true splits
                    eigensplit = BuildTreeTopology.split_using_eigenvector(D)
                    if eigensplit in true_splits:
                        valid_split_count += 1
                    else:
                        invalid_split_count += 1
                        counterexample_D = D
                        counterexample_tree = tree
                        break
                except BuildTreeTopology.DegenerateSplitException, e:
                    degenerate_count += 1
                except BuildTreeTopology.InvalidSpectralSplitException, e:
                    spectral_error_count += 1
Exemplo n.º 3
0
def process(ntaxa, nseconds, branch_length_sampler):
    """
    @param ntaxa: the number of taxa in the sampled trees
    @param nseconds: allow this many seconds to run or None to run forever
    @param branch_length_sampler: a functor that returns a branch length and has a string cast
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    # initialize some state that will be tracked over the entire run
    degenerate_count = 0
    invalid_split_count = 0
    valid_split_count = 0
    spectral_error_count = 0
    atteson_error_count = 0
    counterexample_D = None
    counterexample_tree = None
    # do a bunch of reconstructions from sampled distance matrices
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample the tree topology and get its set of implied full label splits
            tree = TreeSampler.sample_agglomerated_tree(ntaxa)
            true_splits = tree.get_nontrivial_splits()
            # sample the branch lengths
            for branch in tree.get_branches():
                branch.length = branch_length_sampler()
            # sample the atteson distance matrix
            D = sample_atteson_distance_matrix(tree)
            # assert that the atteson condition is true
            if not BuildTreeTopology.is_atteson(tree, D):
                atteson_error_count += 1
            else:
                try:
                    # see if the eigensplit is in the set of true splits
                    eigensplit = BuildTreeTopology.split_using_eigenvector(D)
                    if eigensplit in true_splits:
                        valid_split_count += 1
                    else:
                        invalid_split_count += 1
                        counterexample_D = D
                        counterexample_tree = tree
                        break
                except BuildTreeTopology.DegenerateSplitException, e:
                    degenerate_count += 1
                except BuildTreeTopology.InvalidSpectralSplitException, e:
                    spectral_error_count += 1
Exemplo n.º 4
0
def process(ntaxa, length, nseconds, builders, branch_length_sampler):
    """
    @param ntaxa: the number of taxa in the sampled trees
    @param length: the length of sequences used to sample the distance matrix
    @param nseconds: allow this many seconds to run
    @param builders: tree builder objects
    @param branch_length_sampler: returns a tree drawn from some distribution
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    # track the number of samples that failed for various reasons
    n_zero_errors = 0
    n_infinite_errors = 0
    n_failed_spectral_splits = 0
    # define the number of attempts that fall into each of the four categories
    non_atteson_results = [[0, 0], [0, 0]]
    atteson_results = [[0, 0], [0, 0]]
    #pachter_results = [[0, 0], [0, 0]]
    # evaluate the quality of reconstructions from a bunch of different samples
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample the tree topology and get its set of implied full label splits
            tree = TreeSampler.sample_agglomerated_tree(ntaxa)
            true_splits = tree.get_nontrivial_splits()
            # sample the branch lengths
            for branch in tree.get_branches():
                branch.length = branch_length_sampler()
            try:
                D = sample_distance_matrix(tree, length)
                a, b = [builder.evaluate(true_splits, D) for builder in builders]
                if BuildTreeTopology.is_atteson(tree, D):
                    atteson_results[a][b] += 1
                #elif BuildTreeTopology.is_quartet_additive(tree, D) and BuildTreeTopology.is_quartet_consistent(tree, D):
                    #pachter_results[a][b] += 1
                else:
                    non_atteson_results[a][b] += 1
            except InfiniteDistanceError as e:
                n_infinite_errors += 1
            except ZeroDistanceError as e:
                n_zero_errors += 1
            except BuildTreeTopology.InvalidSpectralSplitException, e:
                n_failed_spectral_splits += 1
    except KeyboardInterrupt, e:
        pass
Exemplo n.º 5
0
def process(ntaxa, length, nseconds, branch_length_sampler, use_nj,
            use_modified_nj, use_all_spectral, use_one_spectral):
    """
    @param ntaxa: the number of taxa in the sampled trees
    @param length: the length of sequences used to sample the distance matrix
    @param nseconds: allow this many seconds to run or None to run forever
    @param branch_length_sampler: a functor that returns a branch length and has a string cast
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    # initialize the builder object
    builder = Builder()
    # track the number of samples that failed for various reasons
    n_zero_errors = 0
    n_infinite_errors = 0
    n_failed_spectral_splits = 0
    # do a bunch of reconstructions of sampled distance matrices
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample the tree topology and get its set of implied full label splits
            tree = TreeSampler.sample_agglomerated_tree(ntaxa)
            true_splits = tree.get_nontrivial_splits()
            # sample the branch lengths
            for branch in tree.get_branches():
                branch.length = branch_length_sampler()
            try:
                D = sample_distance_matrix(tree, length)
                # determine whether or not the distance matrix is Atteson with respect to the tree
                atteson = BuildTreeTopology.is_atteson(tree, D)
                # record information about the splits
                builder.evaluate(true_splits, D, atteson, use_nj,
                                 use_modified_nj, use_all_spectral,
                                 use_one_spectral)
            except InfiniteDistanceError as e:
                n_infinite_errors += 1
            except ZeroDistanceError as e:
                n_zero_errors += 1
            except BuildTreeTopology.InvalidSpectralSplitException, e:
                n_failed_spectral_splits += 1
    except KeyboardInterrupt, e:
        pass
Exemplo n.º 6
0
def process(ntaxa, length, nseconds, branch_length_sampler, use_nj, use_modified_nj, use_all_spectral, use_one_spectral):
    """
    @param ntaxa: the number of taxa in the sampled trees
    @param length: the length of sequences used to sample the distance matrix
    @param nseconds: allow this many seconds to run or None to run forever
    @param branch_length_sampler: a functor that returns a branch length and has a string cast
    @return: a multi-line string that summarizes the results
    """
    start_time = time.time()
    # initialize the builder object
    builder = Builder()
    # track the number of samples that failed for various reasons
    n_zero_errors = 0
    n_infinite_errors = 0
    n_failed_spectral_splits = 0
    # do a bunch of reconstructions of sampled distance matrices
    try:
        while True:
            elapsed_time = time.time() - start_time
            if nseconds and elapsed_time > nseconds:
                break
            # sample the tree topology and get its set of implied full label splits
            tree = TreeSampler.sample_agglomerated_tree(ntaxa)
            true_splits = tree.get_nontrivial_splits()
            # sample the branch lengths
            for branch in tree.get_branches():
                branch.length = branch_length_sampler()
            try:
                D = sample_distance_matrix(tree, length)
                # determine whether or not the distance matrix is Atteson with respect to the tree
                atteson = BuildTreeTopology.is_atteson(tree, D)
                # record information about the splits
                builder.evaluate(true_splits, D, atteson, use_nj, use_modified_nj, use_all_spectral, use_one_spectral)
            except InfiniteDistanceError as e:
                n_infinite_errors += 1
            except ZeroDistanceError as e:
                n_zero_errors += 1
            except BuildTreeTopology.InvalidSpectralSplitException, e:
                n_failed_spectral_splits += 1
    except KeyboardInterrupt, e:
        pass
Exemplo n.º 7
0
 # see if the top down reconstruction was successful
 try:
     splitter = BuildTreeTopology.split_using_eigenvector_with_nj_fallback
     if nj_like:
         updater = BuildTreeTopology.update_generalized_nj
     else:
         updater = BuildTreeTopology.update_using_laplacian
     all_spectral_splits = BuildTreeTopology.get_splits(
         D, splitter, updater)
     top_down_success = (all_spectral_splits == true_splits)
 except BuildTreeTopology.InvalidSpectralSplitException, e:
     return incr_attribute(attribute_array, 'nsamples.rejected.fail')
 # at this point the sample is accepted
 incr_attribute(attribute_array, 'nsamples.accepted')
 # determine whether or not the distance matrix is Atteson with respect to the tree
 if BuildTreeTopology.is_atteson(tree, D):
     incr_attribute(attribute_array, 'nsamples.accepted.atteson')
 # see if the bottom up reconstruction was successful
 nj_splits = BuildTreeTopology.get_splits(D, BuildTreeTopology.split_nj,
                                          BuildTreeTopology.update_nj)
 nj_success = (nj_splits == true_splits)
 # note the joint results of the two reconstruction methods
 if top_down_success and nj_success:
     incr_attribute(attribute_array, 'nsuccesses.both')
 elif (not top_down_success) and (not nj_success):
     incr_attribute(attribute_array, 'nsuccesses.neither')
 elif top_down_success and (not nj_success):
     incr_attribute(attribute_array, 'nsuccesses.topdown.only')
 elif (not top_down_success) and nj_success:
     incr_attribute(attribute_array, 'nsuccesses.nj.only')
 # characterize the result of the first spectral split
Exemplo n.º 8
0
     return incr_attribute(attribute_array, 'nsamples.rejected.fail')
 # see if the top down reconstruction was successful
 try:
     splitter = BuildTreeTopology.split_using_eigenvector_with_nj_fallback
     if nj_like:
         updater = BuildTreeTopology.update_generalized_nj
     else:
         updater = BuildTreeTopology.update_using_laplacian
     all_spectral_splits = BuildTreeTopology.get_splits(D, splitter, updater)
     top_down_success = (all_spectral_splits == true_splits)
 except BuildTreeTopology.InvalidSpectralSplitException, e:
     return incr_attribute(attribute_array, 'nsamples.rejected.fail')
 # at this point the sample is accepted
 incr_attribute(attribute_array, 'nsamples.accepted')
 # determine whether or not the distance matrix is Atteson with respect to the tree
 if BuildTreeTopology.is_atteson(tree, D):
     incr_attribute(attribute_array, 'nsamples.accepted.atteson')
 # see if the bottom up reconstruction was successful
 nj_splits = BuildTreeTopology.get_splits(D, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj)
 nj_success = (nj_splits == true_splits)
 # note the joint results of the two reconstruction methods
 if top_down_success and nj_success:
     incr_attribute(attribute_array, 'nsuccesses.both')
 elif (not top_down_success) and (not nj_success):
     incr_attribute(attribute_array, 'nsuccesses.neither')
 elif top_down_success and (not nj_success):
     incr_attribute(attribute_array, 'nsuccesses.topdown.only')
 elif (not top_down_success) and nj_success:
     incr_attribute(attribute_array, 'nsuccesses.nj.only')
 # characterize the result of the first spectral split
 try: