Exemplo n.º 1
0
def test_conversion_key():
    key_1 = RowKey(1)
    assert key_1 == (1, )

    key_1 = RowKey(1.0)
    assert key_1 == (1.0, )

    key_1 = RowKey("A")
    assert key_1 == ("A", )

    key_1 = RowKey("ABC")
    assert key_1 == ("ABC", )

    key_1 = RowKey((1, ))
    assert key_1 == (1, )

    key_1 = RowKey((1, 2))
    assert key_1 == (1, 2)

    key_1 = RowKey((1, 2, 3))
    assert key_1 == (1, 2, 3)

    key_1 = RowKey([1])
    assert key_1 == (1, )

    key_1 = RowKey([1, 2])
    assert key_1 == (1, 2)

    key_1 = RowKey([1, 2, 3])
    assert key_1 == (1, 2, 3)
Exemplo n.º 2
0
    def __init__(self, rows, names=None, _internal_=False, _children_names_=None):

        if _internal_:
            # rows are dictionary for internal calls
            key_values = rows
            try:
                self._row_sample_ = next(iter(rows))
            except StopIteration:
                # Rows are empty
                super().__init__(key_values)
                self._row_sample_ = None
                self.names = names
                if _children_names_ is None:
                    self.children_names = []
                    self.columns = TableColumns(
                        names=names, children_names=[], table=self
                    )
                else:
                    self.children_names = _children_names_
                    self.columns = TableColumns(
                        names=names, children_names=_children_names_, table=self
                    )
                return
        else:
            if isinstance(rows, Mapping):
                key_values = [(RowKey(k), value) for k, value in rows.items()]
            elif isinstance(rows, Iterable):
                key_values = [(RowKey(k), value) for k, value in rows]
            else:
                raise ValueError("Table expect rows as Mapping/Iterable")

            self._row_sample_ = key_values[0][0]

        if names is None:
            names = [f"X{i+1}" for i, _ in enumerate(self._row_sample_)]

        if len(names) != len(self._row_sample_):
            raise ValueError("The length of column names and columns are not the same.")

        super().__init__(key_values)

        self.names = names
        value_sample = super().__getitem__(self._row_sample_)
        if isinstance(value_sample, Table):
            self.columns = TableColumns(
                names=names, children_names=value_sample.names, table=self
            )
            self.children_names = value_sample.names
        else:
            if _children_names_ is None:
                self.children_names = []
                self.columns = TableColumns(names=names, children_names=[], table=self)
            else:
                self.children_names = _children_names_
                self.columns = TableColumns(
                    names=names, children_names=_children_names_, table=self
                )
Exemplo n.º 3
0
    def from_np_array(cls, samples, names=None):
        """Construct a FrequencyTable from a 2d numpy array or list of lists.
           The resulting keys are tuples.

        Args:
            samples (list or numpy.ndarray):
                the observed samples.
            names (list, optional):
                List of names of the columns.
                If it is not provided, it creates as 'Xn'.
                Defaults to None.

        Raises:
            ValueError: Raises when the samples argument is not list or
                        numpy.ndarray.
        """
        if not isinstance(samples, (np.ndarray, list)):
            raise ValueError(
                "'sample' argument must be numpy 2D ndarray or list of list.")
        elif isinstance(samples, list) and not isinstance(samples[0], list):
            raise ValueError(
                "'sample' argument must be numpy 2D ndarray or list of list.")
        # Convert rows to element, before calling
        # the construct
        return cls(samples=[RowKey(row) for row in samples], names=names)
Exemplo n.º 4
0
    def reduce(self, **kwargs):
        """Reduce the Table by one or more columns.
           P(X, Y) -> P(X = x, Y) or  P(X,  Y = y)
        Args:
            kwargs (dict):
                A dictionary that its 'key' is the name
                of the column and its 'value'
                is the value that must be reduced by.

        Raises:
            ValueError:
                If the provided names do not exist in the Table.

        Returns:
            [Table]: A reduce Table.
        """
        # split columns to indices and comp_indices
        columns = list(kwargs.keys())
        if len(columns) == self.columns.size:
            raise ValueError("Cannot reduce on all column names.")
        columns_info = self.columns.split_columns(*columns)
        values = np.array([value for _, value in kwargs.items()], dtype=np.object)
        #
        # Convert the key:values to 2D numpy array
        # the array rows are (keys, value)
        arr_counter = self.to_2d_array()
        # filter the 2d array rows by provided values of the reduce
        # conditioned_arr is a boolean one, and filtering happens
        # in the second line
        conditioned_arr = np.all(arr_counter[:, columns_info.indices] == values, axis=1)
        sliced_arr = arr_counter[conditioned_arr, :]
        # filter the 2d array columns (the compliment columns)
        # plus the value column (which is the last column)
        sliced_arr = sliced_arr[:, columns_info.complimnet_indices + [-1]]
        # divide the 2d array's rows to a tuple of columns
        # and value
        # So, we make a generator that divide the rows to the tuple of
        # columns (tuple(row[:-1]) and value (row[-1])
        arr_gen = ((RowKey(row[:-1]), row[-1]) for row in sliced_arr)
        # Before calling the groupby, we have to sort the generator
        # by the tuple of column (index zero in itemgetter)
        sorted_slice_arr = sorted(arr_gen, key=itemgetter(0))
        # group by the filtered columns (compliment
        # columns) and sum the value per key
        # Note that the 'itemgetter' read the first index which
        # is the tuple of compliment columns
        return Table(
            {
                k: sum([item[1] for item in g])
                for k, g in groupby(sorted_slice_arr, key=itemgetter(0))
            },
            columns_info.complimnet_names,
            _internal_=True,
        )
Exemplo n.º 5
0
    def marginal(self, *args, normalise=True):
        """Marginal of (group by) the Table over a set of columns.
           P(X, Y, Z) -> P(X, Y) or P(X, Z) or P(Y, Z)
        Args:
            args (list):
                List of column names to marginalised.

        Raises:
            ValueError:
                Raises when one of the column names is
                not defined.
                Or raises when requested for all column names.

        Returns:
            Table: (rows, names).
        """
        # check the validity of operation based on column names
        if len(args) == self.columns.size:
            raise ValueError("Cannot marginalize on all column names.")

        # split columns to indices and comp_indices
        columns_info = self.columns.split_columns(*args)
        #
        # Convert the key:values to 2D numpy array
        # the array rows are (row, value)
        arr = self.to_2d_array()
        # filter the compliment columns
        filtered_arr = np.c_[arr[:, columns_info.complimnet_indices], arr[:, -1]]
        # split the 2d array's rows to a tuple of
        # compliment columns (row[comp_indices])
        # and count row[-1]
        arr_gen = ((RowKey(row[:-1]), row[-1]) for row in filtered_arr)
        # Before calling the groupby, we have to sort the generator
        # by the tuple of compliment columns (index zero in itemgetter)
        sorted_arr = sorted(arr_gen, key=itemgetter(0))
        # since the values in each 'group' are
        # (compliment columns, value)
        # here we group by 'compliment columns' and apply
        # the sum on the value. Then the dictionary of
        # compliment columns:op_of_values
        # is an acceptable argument for Table
        grouped_arr = {
            k: sum([item[1] for item in g])
            for k, g in groupby(sorted_arr, key=itemgetter(0))
        }
        table = Table(grouped_arr, columns_info.complimnet_names, _internal_=True)
        if normalise:
            table.normalise()

        return table
Exemplo n.º 6
0
 def compliment_key(key):
     # Method to split the keys
     return RowKey(*[key[i] for i in compliment_indices])
Exemplo n.º 7
0
 def compliment_key(key):
     # Method to make a split key
     return RowKey(*[key[i] for i in compliment_indices])
Exemplo n.º 8
0
    def condition_on(self, *args, normalise=True):
        """Creates the conditional based on
           the provided names of columns.
           P(X, Y) -> P(X | Y) or P(Y | X)
        Args:
            args (list):
                List of names of provided random
                variables.

        Raises:
            ValueError:
                If the provided RV names do not exist
                in the distribution.

        Returns:
            MultiTable
        """
        if self.columns.size == 1:
            raise ValueError("This is a single column Table and cannot condition on.")

        if len(args) == self.columns.size:
            raise ValueError("Cannot condition on all columns.")
        # split columns to indices and comp_indices
        columns_info = self.columns.split_columns(*args)
        # Convert the key:value to 2D numpy array
        # the array rows are (rows, value)
        arr = self.to_2d_array()
        # divide the 2d array's rows to a tuple of columns,
        # (row[indices]), compliment columns (row[comp_indices])
        # and values row[-1]
        arr_gen = (
            (
                RowKey(row[columns_info.indices]),
                RowKey(row[columns_info.complimnet_indices]),
                row[-1],
            )
            for row in arr
        )
        # Before calling the groupby, we have to sort the generator
        # by the tuple of columns (index zero in itemgetter)
        # And since later we will call the group by on group,
        # for each key we do the inner sort too (index one in itemgetter)
        sorted_arr = sorted(arr_gen, key=itemgetter(0, 1))
        # This method convert a group to a dictionary

        def make_dict(group):
            # since the values in 'group' argument are
            # (columns, compliment columns, value)
            # here we group by 'compliment columns' and sum
            # the values.
            return {
                k: sum([item[2] for item in g2])
                for k, g2 in groupby(group, key=itemgetter(1))
            }

        # For each group (belongs a unique values), we create
        # a dictionary in a dictionary comprehension
        grouped_arr = {
            k: make_dict(g) for k, g in groupby(sorted_arr, key=itemgetter(0))
        }
        # The above dictionary is dictionary of dictionaries
        # # the first set of names is for parent dictionary
        # and the second set is for children
        table = MultiTable(
            {
                key: Table(values, columns_info.complimnet_names, _internal_=True)
                for key, values in grouped_arr.items()
            },
            columns_info.indices_names,
        )
        if normalise:
            table.normalise()

        return table