Python TreeWrapper примеры, lmpy.TreeWrapper Python примеры использования

Пример #1

0

Показать файл

Файл: get_accepted_names_for_tree.py Проект: biotaphy/projects

def main():
    """Main method for script."""
    parser = argparse.ArgumentParser()
    parser.add_argument('in_tree_filename',
                        type=str,
                        help='Path to initial tree')
    parser.add_argument('in_tree_schema',
                        type=str,
                        choices=['nexus', 'newick'],
                        help='The input tree schema')
    parser.add_argument('out_tree_filename', type=str, help='Ouput tree path')
    parser.add_argument('out_tree_schema',
                        type=str,
                        choices=['nexus', 'newick'],
                        help='The output tree schema')
    parser.add_argument('accepted_taxa_filename',
                        type=str,
                        help='File path to write out accepted taxon names')
    args = parser.parse_args()
    tree = TreeWrapper.get(path=args.in_tree_filename,
                           schema=args.in_tree_schema)
    out_tree, accepted_taxa = get_and_replace_names(tree)
    # Write tree
    out_tree.write(path=args.out_tree_filename, schema=args.out_tree_schema)
    # Write accepted taxa
    with open(args.accepted_taxa_filename, 'w') as taxa_out_file:
        for taxon_name in accepted_taxa:
            taxa_out_file.write(taxon_name)

Пример #2

0

Показать файл

Файл: test_phylo_beta_diversity.py Проект: biotaphy/BiotaPhyPy

    def test_valid(self, valid_phylo_beta_diversity_package):
        """Test the method with valid data.

        Args:
            valid_phylo_beta_diversity_package (tuple): A tuple of information that
                together forms a valid phylogenetic beta diversity package.

        Note:
            * Test values were determined from example at
                https://rdrr.io/rforge/betapart/man/phylo.beta.pair.html
        """
        (pam_fn, tree_fn, _, _, _, test_beta_sim_fn, test_beta_sne_fn,
         test_beta_sor_fn, _, _, _, test_phylo_beta_sim_fn,
         test_phylo_beta_sne_fn,
         test_phylo_beta_sor_fn) = valid_phylo_beta_diversity_package

        with open(pam_fn) as in_f:
            pam = Matrix.load_csv(in_f, num_header_rows=1, num_header_cols=1)
        tree = TreeWrapper.from_filename(tree_fn)
        with open(test_beta_sim_fn) as in_f:
            test_beta_sim = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_beta_sne_fn) as in_f:
            test_beta_sne = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_beta_sor_fn) as in_f:
            test_beta_sor = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_phylo_beta_sim_fn) as in_f:
            test_phylo_beta_sim = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)
        with open(test_phylo_beta_sne_fn) as in_f:
            test_phylo_beta_sne = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)
        with open(test_phylo_beta_sor_fn) as in_f:
            test_phylo_beta_sor = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)

        (beta_sim, phylo_beta_sim, beta_sne, phylo_beta_sne, beta_sor,
         phylo_beta_sor) = pbd.calculate_phylo_beta_diversity_sorensen(
             pam, tree)
        # Check matrix outputs to see if they are within tolerance
        assert np.allclose(beta_sim, test_beta_sim)
        assert np.allclose(phylo_beta_sim, test_phylo_beta_sim)
        assert np.allclose(beta_sne, test_beta_sne)
        assert np.allclose(phylo_beta_sne, test_phylo_beta_sne)
        assert np.allclose(beta_sor, test_beta_sor)
        assert np.allclose(phylo_beta_sor, test_phylo_beta_sor)

Пример #3

0

Показать файл

Файл: test_annotators.py Проект: biotaphy/BiotaPhyPy

 def test_valid(self):
     """Test the function with valid inputs."""
     # Create a tree
     tree = TreeWrapper.get(data='(A,(B,((C,D),(E,F))));', schema='newick')
     mtx = Matrix(np.random.random((6, 2, 1)),
                  headers={
                      '0': ['A', 'B', 'C', 'D', 'E', 'F'],
                      '1': ['label', 'other_val']
                  })
     # This should not fail
     annotators.annotate_tree_with_label(tree, mtx, label_column=0)

Пример #4

0

Показать файл

def main():
    pam_fn = 'C:/Users/cj/Desktop/ryan_v3/pam.lmm'
    tree_fn = 'C:/Users/cj/Desktop/ryan_v3/squid_tree.nex'
    out_fn = 'C:/Users/cj/Desktop/ryan_v3/tree_mtx.lmm'

    with open(pam_fn, 'rb') as in_file:
        pam = Matrix.load_flo(in_file)
    tree = TreeWrapper.get(path=tree_fn, schema='nexus')
    tree_mtx = calculate_tree_site_statistics(pam, tree)
    with open(out_fn, 'wb') as out_file:
        tree_mtx.save(out_file)
    print(tree_mtx.max(axis=1))
    print(tree_mtx.max(axis=0))

Пример #5

0

Показать файл

    def test_valid(self, valid_phylo_beta_diversity_package):
        """Test the method with valid data

        Note:
            * Test values were determined from example at
                https://rdrr.io/rforge/betapart/man/phylo.beta.pair.html
        """
        (pam_fn, tree_fn, test_beta_jac_fn, test_beta_jne_fn, test_beta_jtu_fn,
         _, _, _, test_phylo_beta_jac_fn, test_phylo_beta_jne_fn,
         test_phylo_beta_jtu_fn, _, _, _) = valid_phylo_beta_diversity_package

        with open(pam_fn) as in_f:
            pam = Matrix.load_csv(in_f, num_header_rows=1, num_header_cols=1)
        tree = TreeWrapper.from_filename(tree_fn)
        with open(test_beta_jac_fn) as in_f:
            test_beta_jac = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_beta_jne_fn) as in_f:
            test_beta_jne = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_beta_jtu_fn) as in_f:
            test_beta_jtu = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_phylo_beta_jac_fn) as in_f:
            test_phylo_beta_jac = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)
        with open(test_phylo_beta_jne_fn) as in_f:
            test_phylo_beta_jne = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)
        with open(test_phylo_beta_jtu_fn) as in_f:
            test_phylo_beta_jtu = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)

        (beta_jtu, phylo_beta_jtu, beta_jne, phylo_beta_jne, beta_jac,
         phylo_beta_jac) = pbd.calculate_phylo_beta_diversity_jaccard(
             pam, tree)
        # Check matrix outputs to see if they are within tolerance
        assert np.allclose(beta_jtu, test_beta_jtu)
        assert np.allclose(phylo_beta_jtu, test_phylo_beta_jtu)
        assert np.allclose(beta_jne, test_beta_jne)
        assert np.allclose(phylo_beta_jne, test_phylo_beta_jne)
        assert np.allclose(beta_jac, test_beta_jac)
        assert np.allclose(phylo_beta_jac, test_phylo_beta_jac)

Пример #6

0

Показать файл

def purge_tree(tree_filename, tree_schema, occurrence_filename, species_col):
    """Get a tree and purge taxa not in occurrence data."""
    tree = TreeWrapper.get(path=tree_filename, schema=tree_schema)
    species = set([])
    with open(occurrence_filename, 'r') as in_file:
        for line in in_file:
            parts = line.split(', ')
            sp_name = parts[species_col].strip()
            species.add(sp_name)
    purge_taxa = []
    for taxon in tree.taxon_namespace:
        if not taxon.label in species:
            purge_taxa.append(taxon)
    tree.prune_taxa(purge_taxa)
    tree.purge_taxon_namespace()
    return tree

Пример #7

0

Показать файл

    def test_valid(self, tmpdir):
        """Test the function with valid inputs.

        Args:
            tmpdir (:obj:`py.path.local`): A temporary directory test fixture
                generated by pytest.
        """
        # Create a tree
        tree = TreeWrapper.get(data='(A,(B,((C,D),(E,F))));', schema='newick')
        mtx = Matrix(
            np.random.random((6, 3, 2)),
            headers={'0': ['A', 'B', 'C', 'D', 'E', 'F'],
                     '1': ['label', 'other_val', 'one_more_val']})
        # This should not fail
        output_directory = os.path.join(tmpdir.dirname, 'plots')
        create_distribution_plots(tree, mtx, output_directory)

Пример #8

0

Показать файл

Файл: package_outputs_to_matrix_and_tree.py Проект: biotaphy/projects

def get_squidded_tree(tree_fn, tree_schema, squid_json):
    tree = TreeWrapper.get(path=tree_fn, schema=tree_schema)
    json_data_lines = []
    with open(squid_json) as in_file:
        first_line = True
        for line in in_file:
            if first_line:
                first_line = False
            else:
                json_data_lines.append(line)
    squid_list_json = json.loads(''.join(json_data_lines))
    squid_dict = {
        i['scientific_name'].replace('_', ' '): i['header']
        for i in squid_list_json
    }
    tree.annotate_tree_tips('squid', squid_dict)
    return tree

Пример #9

0

Показать файл

Файл: join_env_and_pam_stats.py Проект: biotaphy/projects

def main():
    """Main method for script."""
    parser = argparse.ArgumentParser()
    parser.add_argument('shapegrid_filename',
                        type=str,
                        help='File location of the shapegrid shapefile')
    parser.add_argument('pam_filename',
                        type=str,
                        help='File location of the PAM matrix for statistics')
    parser.add_argument('tree_filename',
                        type=str,
                        help='File location of the tree to use for statistics')
    parser.add_argument('tree_schema',
                        choices=['newick', 'nexus'],
                        help='The tree schema')
    parser.add_argument('out_geojson_filename',
                        type=str,
                        help='File location to write the output GeoJSON')
    parser.add_argument('--layer',
                        nargs=2,
                        action='append',
                        help='File location of a layer followed by a label')
    args = parser.parse_args()

    # Load data
    pam = Matrix.load(args.pam_filename)
    tree = TreeWrapper.get(path=args.tree_filename, schema=args.tree_schema)

    # Encode layers
    encoded_layers = encode_environment_layers(args.shapegrid_filename,
                                               args.layer)
    # Calculate PAM statistics
    stats_mtx = calculate_tree_site_statistics(pam, tree)
    # Join encoded layers and PAM statistics
    mtx = join_encoded_layers_and_pam_stats(encoded_layers, stats_mtx)
    # Generate GeoJSON
    geojson_data = create_geojson(args.shapegrid_filename, mtx)
    # Write GeoJSON
    with open(args.out_geojson_filename, 'w') as out_file:
        json.dump(geojson_data, out_file)

Пример #10

0

Показать файл

Файл: get_accepted_names_for_tree.py Проект: biotaphy/projects

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('in_tree_filename',
                        type=str,
                        help='The file location of the input tree')
    parser.add_argument('in_tree_schema',
                        type=str,
                        choices=['newick', 'nexus'],
                        help='The schema of the input tree')
    parser.add_argument('out_tree_filename',
                        type=str,
                        help='The file location of the output tree')
    parser.add_argument('out_tree_schema',
                        type=str,
                        choices=['newick', 'nexus'],
                        help='The schema of the output tree')

    args = parser.parse_args()
    tree = TreeWrapper.get(path=args.in_tree_filename,
                           schema=args.in_tree_schema)
    out_tree = get_and_replace_names(tree)
    out_tree.write(path=args.out_tree_filename, schema=args.out_tree_schema)

Пример #11

0

Показать файл

Файл: ancestral_distribution.py Проект: hmarx/analyses

        '-c', '--out_csv_filename', type=str,
        help='If provided, write the output character matrix CSV '
             'to this file location')

    args = parser.parse_args()

    # Check that input files exist
    if not os.path.exists(args.in_tree_filename):
        raise IOError(
            'Input tree {} does not exist'.format(args.in_tree_filename))
    if not os.path.exists(args.data_filename):
        raise IOError(
            'Input data file {} does not exist'.format(args.data_filename))

    # Read the tree
    tree = TreeWrapper.get(
        path=args.in_tree_filename, schema=args.in_tree_schema)

    # Read data
    if args.data_format == 'csv':
        with open(args.data_filename) as in_file:
            sequences, headers = data_readers.read_csv_alignment_flo(
                in_file)
    elif args.data_format == 'json':
        with open(args.data_filename) as in_file:
            sequences, headers = data_readers.read_json_alginment_flo(
                inf_file)
    elif args.data_format == 'phylip':
        with open(args.data_filename) as in_file:
            sequences = data_reders.read_phylip_alignment_flo(in_file)
        headers = None
    elif args.data_format == 'table':

Пример #12

0

Показать файл

def calculate_continuous_ancestral_states(tree,
                                          char_mtx,
                                          sum_to_one=False,
                                          calc_std_err=False):
    """Calculates the continuous ancestral states for the nodes in a tree.

    Args:
        tree (Tree): A dendropy tree or TreeWrapper object.
        char_mtx (Matrix): A Matrix object with character information.  Each
            row should represent a tip in the tree and each column should be a
            variable to calculate ancestral state for.
        calc_std_err (:obj:`bool`, optional): If True, calculate standard error
            for each variable.  Defaults to False.
        sum_to_one (:obj:`bool`, optional): If True, standardize the character
            matrix so that the values in a row sum to one. Defaults to False.

    Returns:
        A matrix of character data with the following dimensions:
            * rows: nodes / tips in the tree
            * columns: character variables
            * depth: first is the calculated value, second layer is standard
                error if desired

    Todo:
        * Add function for consistent label handling.
    """
    # Wrap tree if dendropy tree
    if not isinstance(tree, TreeWrapper):
        tree = TreeWrapper.from_base_tree(tree)

    # Assign labels to nodes that don't have them
    tree.add_node_labels()

    # Synchronize tree and character data
    # Prune tree
    prune_taxa = []
    keep_taxon_labels = []
    init_row_headers = char_mtx.get_row_headers()
    for taxon in tree.taxon_namespace:
        label = taxon.label.replace(' ', '_')
        if label not in init_row_headers:
            prune_taxa.append(taxon)
            print(
                'Could not find {} in character matrix, pruning'.format(label))
        else:
            keep_taxon_labels.append(label)

    if len(keep_taxon_labels) == 0:
        raise Exception(
            'None of the tree tips were found in the character data')

    tree.prune_taxa(prune_taxa)
    tree.purge_taxon_namespace()

    # Prune character data
    keep_rows = []
    i = 0
    for label in init_row_headers:
        if label in keep_taxon_labels:
            keep_rows.append(i)
        else:
            print('Could not find {} in tree tips, pruning'.format(label))
        i += 1
    char_mtx = char_mtx.slice(keep_rows)

    # Standardize character matrix if requested
    tip_count, num_vars = char_mtx.shape
    if sum_to_one:
        for i in range(tip_count):
            sc = float(1.0) / np.sum(char_mtx[i])
            for j in range(num_vars):
                char_mtx[i, j] *= sc

    # Initialize data matrix
    num_nodes = len(tree.nodes())
    data_shape = (num_nodes, num_vars, 2 if calc_std_err else 1)
    data = np.zeros(data_shape, dtype=float)

    # Initialize headers
    row_headers = []

    tip_col_headers = char_mtx.get_column_headers()
    tip_row_headers = char_mtx.get_row_headers()
    tip_lookup = dict([(tip_row_headers[i].replace('_', ' '), i)
                       for i in range(tip_count)])

    # Get the number of internal nodes in the tree
    internal_node_count = num_nodes - tip_count
    # Loop through the tree and set the matrix index for each node
    # Also set data values
    node_headers = []
    node_i = tip_count
    tip_i = 0
    node_index_lookup = {}
    for node in tree.nodes():
        label = _get_node_label(node)
        if len(node.child_nodes()) == 0:
            # Tip
            node_index_lookup[label] = tip_i
            row_headers.append(label)
            data[tip_i, :, 0] = char_mtx[tip_lookup[label]]
            tip_i += 1
        else:
            node_index_lookup[label] = node_i
            node_headers.append(label)
            # Internal node
            data[node_i, :, 0] = np.zeros((1, num_vars), dtype=float)
            node_i += 1

    # Row headers should be extended with node headers
    row_headers.extend(node_headers)

    # For each variable
    for x in range(num_vars):
        # Compute the ML estimate of the root
        full_mcp = np.zeros((internal_node_count, internal_node_count),
                            dtype=float)
        full_vcp = np.zeros(internal_node_count, dtype=float)

        for k in tree.postorder_edge_iter():
            i = k.head_node
            if len(i.child_nodes()) != 0:
                node_num_i = node_index_lookup[_get_node_label(i)] - tip_count
                for j in i.child_nodes():
                    tbl = 2. / j.edge_length
                    full_mcp[node_num_i][node_num_i] += tbl
                    node_num_j = node_index_lookup[_get_node_label(j)]

                    if len(j.child_nodes()) == 0:
                        full_vcp[node_num_i] += (data[node_num_j, x, 0] * tbl)
                    else:
                        node_num_j -= tip_count
                        full_mcp[node_num_i][node_num_j] -= tbl
                        full_mcp[node_num_j][node_num_i] -= tbl
                        full_mcp[node_num_j][node_num_j] += tbl

        b = la.cho_factor(full_mcp)

        # these are the ML estimates for the ancestral states
        ml_est = la.cho_solve(b, full_vcp)
        sos = 0
        for k in tree.postorder_edge_iter():
            i = k.head_node
            node_num_i = node_index_lookup[_get_node_label(i)]
            if len(i.child_nodes()) != 0:
                data[node_num_i, x, 0] = ml_est[node_num_i - tip_count]

                if calc_std_err:
                    for j in i.child_nodes():
                        node_num_j = node_index_lookup[_get_node_label(j)]
                        temp = data[node_num_i, x, 0] - data[node_num_j, x, 0]
                        sos += temp * temp / j.edge_length

                    # nni is node_num_i adjusted for only nodes
                    nni = node_num_i - tip_count
                    qpq = full_mcp[nni][nni]
                    tm1 = np.delete(full_mcp, (nni), axis=0)
                    tm = np.delete(tm1, (nni), axis=1)
                    b = la.cho_factor(tm)
                    sol = la.cho_solve(b, tm1[:, nni])
                    temp_std_err = qpq - np.inner(tm1[:, nni], sol)
                    data[node_num_i, x, 1] = math.sqrt(
                        2.0 * sos / ((internal_node_count - 1) * temp_std_err))

    depth_headers = ['maximum_likelihood']
    if calc_std_err:
        depth_headers.append('standard_error')

    mtx_headers = {'0': row_headers, '1': tip_col_headers, '2': depth_headers}
    return tree, Matrix(data, headers=mtx_headers)

Пример #13

0

Показать файл

Файл: join_env_and_pam_stats.py Проект: biotaphy/projects

def main():
    """Main method for script."""
    parser = argparse.ArgumentParser()
    parser.add_argument('--out_stats_matrix_filename',
                        type=str,
                        help='Location to write statistics matrix.')
    parser.add_argument('shapegrid_filename',
                        type=str,
                        help='File location of the shapegrid shapefile')
    parser.add_argument('pam_filename',
                        type=str,
                        help='File location of the PAM matrix for statistics')
    parser.add_argument('tree_filename',
                        type=str,
                        help='File location of the tree to use for statistics')
    parser.add_argument('tree_schema',
                        choices=['newick', 'nexus'],
                        help='The tree schema')
    parser.add_argument('out_geojson_filename',
                        type=str,
                        help='File location to write the output GeoJSON')
    parser.add_argument('out_csv_filename',
                        type=str,
                        help='File location to write the output CSV')
    parser.add_argument('out_matrix_filename',
                        type=str,
                        help='File location to write the output matrix')
    parser.add_argument('--layer',
                        nargs=2,
                        action='append',
                        help='File location of a layer followed by a label')
    args = parser.parse_args()

    # Load data
    pam = Matrix.load(args.pam_filename)
    tree = TreeWrapper.get(path=args.tree_filename, schema=args.tree_schema)

    # Encode layers
    encoded_layers = encode_environment_layers(args.shapegrid_filename,
                                               args.layer)
    # Calculate PAM statistics
    stats_mtx = calculate_tree_site_statistics(pam, tree)
    if args.out_stats_matrix_filename:
        stats_mtx.write(args.out_stats_matrix_filename)
    # Join encoded layers and PAM statistics
    mtx = join_encoded_layers_and_pam_stats(encoded_layers, stats_mtx)
    # Generate GeoJSON
    geojson_data = create_geojson(args.shapegrid_filename, mtx)
    # Write GeoJSON
    with open(args.out_geojson_filename, 'w') as out_file:
        json.dump(geojson_data, out_file, indent=4)

    # Write matrix data
    new_rh = []
    res = 0.5
    for _, x, y in mtx.get_row_headers():
        min_x = x - res
        max_x = x + res
        min_y = y - res
        max_y = y + res
        new_rh.append('"POLYGON (({} {},{} {},{} {},{} {},{} {}))"'.format(
            min_x, max_y, max_x, max_y, max_x, min_y, min_x, min_y, min_x,
            max_y))
    mtx.write(args.out_matrix_filename)
    mtx.set_row_headers(new_rh)
    with open(args.out_csv_filename, 'w') as out_file:
        mtx.write_csv(out_file)

Пример #14

0

Показать файл

    def test_package_valid(self, valid_ancestral_state_package):
        """Tests the calculate_continusous_ancestral_states method.

        Args:
            valid_ancestral_state_package (pytest.fixture): A parameterized
                pytest fixture defined in conftest.py that provides a valid
                test package.

        Note:
            * This test will need to evolve as the output format changes.  It
                will probably be better to return a data structure with various
                values for each node rather than assigning the value to the
                node label.

        Raises:
            IOError: When the tree or alignment cannot be loaded for the
                specified file extension.
            Exception: When a specified successful result value cannot be
                found.
        """
        # Get the data files
        (tree_filename, alignment_filename,
         results_filename) = valid_ancestral_state_package

        # Process the tree file
        _, tree_ext = os.path.splitext(tree_filename)
        if tree_ext == '.nex':
            tree_schema = 'nexus'
        elif tree_ext == '.xml':
            tree_schema = 'nexml'
        elif tree_ext == '.tre':
            tree_schema = 'newick'
        else:
            raise IOError(
                'Cannot handle tree with extension: {}'.format(tree_ext))
        # tree = dendropy.Tree.get(path=tree_filename, schema=tree_schema)
        tree = TreeWrapper.get(path=tree_filename, schema=tree_schema)

        # Process the alignment file
        _, align_ext = os.path.splitext(alignment_filename)
        if align_ext == '.csv':
            with open(alignment_filename) as align_file:
                sequences, headers = data_readers.read_csv_alignment_flo(
                    align_file)
        elif align_ext == '.json':
            with open(alignment_filename) as align_file:
                sequences, headers = data_readers.read_json_alignment_flo(
                    align_file)
        elif align_ext == '.phylip':
            with open(alignment_filename) as align_file:
                sequences = data_readers.read_phylip_alignment_flo(align_file)
        elif align_ext == '.tbl':
            with open(alignment_filename) as align_file:
                sequences = data_readers.read_table_alignment_flo(align_file)
        else:
            raise IOError('Cannot handle alignments with extension: {}'.format(
                align_ext))

        char_mtx = data_readers.get_character_matrix_from_sequences_list(
            sequences)
        # Run analysis
        _, anc_mtx = anc_dp.calculate_continuous_ancestral_states(
            tree, char_mtx, calc_std_err=True, sum_to_one=False)

        # New testing method
        # (For now) assume that results file is csv with row headers for
        #    node labels and column headers for variables
        results = []
        h = None
        with open(results_filename) as results_file:
            for line in results_file:
                if h is None:
                    # Get headers
                    h = line.strip().split(',')[1:]
                else:
                    # Add result (without label) to list
                    node_result = [
                        float(i) for i in line.strip().split(',')[1:]
                    ]
                    results.append(np.array(node_result, dtype=float))

        # Look for all results (only maximum likelihood)
        for row in anc_mtx[:, :, 0]:
            found = False
            for i in range(len(results)):
                # Allow for some wiggle room with decimal precision
                if np.all(np.isclose(row, results[i])):
                    found = True
                    results.pop(i)
                    break
            if not found:
                raise Exception(
                    'Could not find expected result: {} in results'.format(
                        row))

Пример #15

0

Показать файл

    def test_package_valid(self, valid_ancestral_distribution_package):
        """Tests the calculate_ancestral_distributions method.

        Args:
            invalid_ancestral_distribution_package (pytest.fixture): A pytest
                fixture that is parametrized to provide invalid ancestral
                distributions, one at a time, so that there are multiple test
                functions defined for each invalid package.

        Raises:
            IOError: When the tree or alignment cannot be loaded for the
                specified file extension.
            Exception: When a specified successful result value cannot be
                found.
        """
        # Get the data files
        (tree_filename, alignment_filename,
         results_filename) = valid_ancestral_distribution_package
        # Process the tree file
        _, tree_ext = os.path.splitext(tree_filename)
        if tree_ext == '.nex':
            tree_schema = 'nexus'
        elif tree_ext == '.xml':
            tree_schema = 'nexml'
        elif tree_ext == '.tre':
            tree_schema = 'newick'
        else:
            raise IOError(
                'Cannot handle tree with extension: {}'.format(tree_ext))
        # tree = dendropy.Tree.get(path=tree_filename, schema=tree_schema)
        tree = TreeWrapper.get(path=tree_filename, schema=tree_schema)

        # Process the alignment file
        _, align_ext = os.path.splitext(alignment_filename)
        if align_ext == '.csv':
            with open(alignment_filename) as align_file:
                sequences, headers = data_readers.read_csv_alignment_flo(
                    align_file)
        elif align_ext == '.json':
            with open(alignment_filename) as align_file:
                sequences, headers = data_readers.read_json_alignment_flo(
                    align_file)
        elif align_ext == '.phylip':
            with open(alignment_filename) as align_file:
                sequences = data_readers.read_phylip_alignment_flo(align_file)
        elif align_ext == '.tbl':
            with open(alignment_filename) as align_file:
                sequences = data_readers.read_table_alignment_flo(align_file)
        else:
            raise IOError('Cannot handle alignments with extension: {}'.format(
                align_ext))

        char_mtx = data_readers.get_character_matrix_from_sequences_list(
            sequences)
        # Run analysis
        _, anc_mtx = anc_dp.calculate_ancestral_distributions(tree, char_mtx)

        # Testing method
        # Assume that the results file is a csv with row headers for node
        #    labels and output layer (maximum_likeliehood / standard_error)
        #    and column headers for variables
        ml_results = []
        std_err_results = []
        h = None
        with open(results_filename) as results_file:
            for line in results_file:
                if h is None:
                    # Get headers
                    h = line.strip().split(',')[1:]
                else:
                    # Add result (without label) to appropriate list
                    parts = line.strip().split(',')
                    layer = parts[1].lower()
                    values = np.array([float(i) for i in parts[2:]],
                                      dtype=float)
                    if layer == 'maximum_likelihood':
                        ml_results.append(values)
                    else:
                        std_err_results.append(values)
        assert (len(ml_results) == len(std_err_results))
        print('ml results')
        print(ml_results)
        print('std err results')
        print(std_err_results)

        # Look for all results (ml and std err results should match rows)
        for row_idx in range(anc_mtx.shape[0]):
            found = False
            # Get rows from data
            ml_row = anc_mtx[row_idx, :, 0]
            std_err_row = anc_mtx[row_idx, :, 1]

            for i in range(len(ml_results)):
                print(ml_results[i])
                print(std_err_results[i])
                if np.all(np.isclose(ml_row, ml_results[i])) and \
                        np.all(np.isclose(
                            std_err_row, std_err_results[i])):
                    found = True
                    ml_results.pop(i)
                    std_err_results.pop(i)
                    break
            if not found:
                raise Exception('Could not find {}, {} in results'.format(
                    ml_row, std_err_row))

Python TreeWrapper примеры использования