Exemplo n.º 1
0
    def __init__(self, observed, expected=None):
        """Parameters
        -------------
        observed
            a DictArray instance, or something that can be converted to one.
            Values must be integers.
        expected
            provide in the case where you know the prior proportions, otherwise
            calculated from marginal frequencies
        """
        if not isinstance(observed, DictArray):
            observed = DictArray(observed)

        # make sure values are int
        observed.array = _astype(observed.array, int)

        if observed.array.sum() == 0:
            raise ValueError("at least one value must be > 0")

        if observed.array.min() < 0:
            raise ValueError("negative values encountered")

        if observed.array.ndim > 2:
            raise NotImplementedError("not designed for >2D")

        self._observed = observed
        self.expected = expected
        self._residuals = None
        self._df = None
        self.shape = observed.shape
Exemplo n.º 2
0
 def test_to_dict_roundtrip(self):
     """roundtrip of DictArray.to_dict() should produce same order."""
     d1 = dict(a=dict(k=1, l=2, m=3), b=dict(k=4, l=5, m=6))
     darr1 = DictArray(d1)
     d2 = darr1.to_dict()
     darr2 = DictArray(d2)
     self.assertEqual(d1, d2)
     d3 = DictArray(d2)
     self.assertEqual(d1, d3)
Exemplo n.º 3
0
    def test_to_dict_1d(self):
        """should successfully produce a 1D dict"""
        data = {
            "ABAYE2984": {
                "ABAYE2984": 0,
                "Atu3667": None,
                "Avin_42730": 0.6381173875591908,
                "BAA10469": None,
            },
            "Atu3667": {
                "ABAYE2984": None,
                "Atu3667": 0,
                "Avin_42730": 2.3682377869318993,
                "BAA10469": None,
            },
            "Avin_42730": {
                "ABAYE2984": 0.6381173875591908,
                "Atu3667": 2.3682377869318993,
                "Avin_42730": 0,
                "BAA10469": 1.8515731266342546,
            },
            "BAA10469": {
                "ABAYE2984": None,
                "Atu3667": None,
                "Avin_42730": 1.8515731266342546,
                "BAA10469": 0,
            },
        }
        darr = DictArray(data, dtype="O")
        expect = {
            (n1, n2): darr[n1, n2]
            for n1 in darr.template.names[0]
            for n2 in darr.template.names[1]
        }
        self.assertEqual(darr.to_dict(flatten=True), expect)

        darr = DictArrayTemplate(["s1", "s2"], ["s1", "s2"]).wrap(
            [[0.0, 0.25], [0.25, 0.0]]
        )
        self.assertEqual(
            darr.to_dict(flatten=True),
            {
                ("s1", "s2"): 0.25,
                ("s2", "s1"): 0.25,
                ("s1", "s1"): 0.0,
                ("s2", "s2"): 0.0,
            },
        )
Exemplo n.º 4
0
    def test_inputs_from_dict_array(self):
        """inputs_from_dict_array makes an array object and PhyloNode list"""
        twod = {
            "1": {
                "1": 0,
                "2": 0.86,
                "3": 0.92
            },
            "2": {
                "1": 0.86,
                "2": 0,
                "3": 0.67
            },
            "3": {
                "1": 0.92,
                "2": 0.67,
                "3": 0
            },
        }
        matrix_d2d = DictArray(twod)

        matrix_array, PhyloNode_order = inputs_from_dict_array(matrix_d2d)
        self.assertEqual(PhyloNode_order[0].name, "1")
        self.assertEqual(PhyloNode_order[2].name, "3")
        assert_allclose(matrix_array[0][2], 0.92)
        assert_allclose(matrix_array[1][0], 0.86)
Exemplo n.º 5
0
    def __init__(self, observed, expected=None):
        """Parameters
        -------------
        observed
            a DictArray instance, or something that can be converted to one
        expected
            provide in the case where you know the prior proportions, otherwise
            calculated from marginal frequencies
        """
        if not isinstance(observed, DictArray):
            observed = DictArray(observed)

        if observed.array.sum() == 0:
            raise ValueError("at least one value must be > 0")

        if expected:
            expected = observed.template.wrap(expected)

        if observed.array.min() < 0 or expected and expected.array.min() < 0:
            raise ValueError("negative values encountered")

        if expected:
            assert_allclose(observed.array.sum(), expected.array.sum()
                            ), "unequal totals of observed and expected"

        self._observed = observed
        self._expected = expected
        self._residuals = None
        self._df = None
        self.shape = observed.shape
Exemplo n.º 6
0
 def test_valid_setitem(self):
     """tabular_result works when set correct item type"""
     tr = tabular_result("null")
     tr["result"] = make_table(data={"A": [0, 1]})
     darr = DictArray({"A": [0, 1]})
     tr["result2"] = darr
     js = tr.to_json()
     self.assertIsInstance(js, str)
Exemplo n.º 7
0
 def test_direct_construction(self):
     """directly construct a dict array"""
     b = DictArrayTemplate("abc", "ABC").wrap(self.a)
     data_types = (
         [[245, 599]],
         dict(a=dict(b=4, c=5)),
         {("a", "b"): 4, ("a", "c"): 5},
         dict(a=0, b=35, c=45),
         b,
     )
     for data in data_types:
         g = DictArray(data)
Exemplo n.º 8
0
def upgma(pairwise_distances):
    """Uses the UPGMA algorithm to cluster sequences

    pairwise_distances: a dictionary with pair tuples mapped to a distance
    returns a PhyloNode object of the UPGMA cluster
    """
    darr = DictArray(pairwise_distances)
    matrix_a, node_order = inputs_from_dict_array(darr)
    tree = UPGMA_cluster(matrix_a, node_order, BIG_NUM)
    index = 0
    for node in tree.traverse():
        if not node.parent:
            node.name = "root"
        elif not node.name:
            node.name = "edge." + str(index)
            index += 1
    return tree
Exemplo n.º 9
0
    def __init__(
        self,
        header=None,
        rows=None,
        row_order=None,
        digits=4,
        space=4,
        title="",
        missing_data="",
        max_width=1e100,
        row_ids=None,
        legend="",
        column_templates=None,
        dtype=None,
        data_frame=None,
        format="simple",
    ):
        """

        Parameters
        ----------
        header
            column headings
        rows
            a 2D dict, list or tuple. If a dict, it must have column
            headings as top level keys, and common row labels as keys in each
            column.
        row_order
            the order in which rows will be pulled from the twoDdict
        digits
            floating point resolution
        space
            number of spaces between columns or a string
        title
            as implied
        missing_data
            character assigned if a row has no entry for a column
        max_width
            maximum column width for printing
        row_ids
            if True, the 0'th column is used as row identifiers and keys
            for slicing.
        legend
            table legend
        column_templates
            dict of column headings
            or a function that will handle the formatting.
        dtype
            optional numpy array typecode.
        data_frame
            pandas DataFrame, Table will be created from this
        format
            output format when using str(Table)

        """
        if data_frame is not None and not _pandas_available:
            raise ValueError("data_frame provided when pandas not installed")
        elif data_frame is not None:
            if rows or header:
                warnings.warn("provided rows/header will be over ridden by "
                              "DataFrame")

            rows = data_frame.to_records(index=False).tolist()
            header = data_frame.columns.tolist()

        if type(header) == numpy.ndarray:
            header = header.tolist()

        if not header:
            raise ValueError("header must be provided to Table")
        elif rows is None:
            raise ValueError("rows cannot be None")

        if len(rows) == 0:
            rows = numpy.empty((0, len(header)))

        try:
            num_cols = len(header)
            assert num_cols > 0
            if type(rows) == numpy.ndarray:
                assert num_cols == rows.shape[1]
            elif type(rows) == dict:
                assert num_cols == len(rows)
            else:
                assert num_cols == len(rows[0])
        except (IndexError, TypeError, AssertionError):
            raise RuntimeError("header and rows must be provided to Table")

        header = [str(head) for head in header]
        if isinstance(rows, dict):
            rows = convert2DDict(rows, header=header, row_order=row_order)

        # if row_ids, we select that column as the row identifiers
        if row_ids is not None:
            identifiers = [row[0] for row in rows]
        else:
            identifiers = len(rows)

        if not dtype:
            dtype = "O"
        DictArray.__init__(self, rows, identifiers, header, dtype=dtype)

        # forcing all column headings to be strings
        self._header = _Header([str(head) for head in header])
        self._missing_data = missing_data

        # default title / legend to be empty strings
        self.title = str(title) if title else ""
        self.legend = str(legend) if legend else ""
        try:
            self.space = " " * space
        except TypeError:
            self.space = space
        self._digits = digits
        self._row_ids = row_ids
        self._max_width = max_width

        # some attributes are not preserved in any file format, so always based
        # on args
        self._column_templates = column_templates or {}

        self.format = format

        # define the repr() display policy
        random = 0
        if self.shape[0] < 50:
            head = self.shape[0]
            tail = None
        else:
            head, tail = 5, 5

        self._repr_policy = dict(head=tail, tail=tail, random=random)
Exemplo n.º 10
0
 def row_totals(self):
     """returns totalled row values"""
     row_sums = self.array.sum(axis=1)
     template = DictArray(1, row_sums.shape[0])
     return template.wrap(row_sums)