Exemplo n.º 1
def test_conversion_key():
    key_1 = RowKey(1)
    assert key_1 == (1, )

    key_1 = RowKey(1.0)
    assert key_1 == (1.0, )

    key_1 = RowKey("A")
    assert key_1 == ("A", )

    key_1 = RowKey("ABC")
    assert key_1 == ("ABC", )

    key_1 = RowKey((1, ))
    assert key_1 == (1, )

    key_1 = RowKey((1, 2))
    assert key_1 == (1, 2)

    key_1 = RowKey((1, 2, 3))
    assert key_1 == (1, 2, 3)

    key_1 = RowKey([1])
    assert key_1 == (1, )

    key_1 = RowKey([1, 2])
    assert key_1 == (1, 2)

    key_1 = RowKey([1, 2, 3])
    assert key_1 == (1, 2, 3)
Exemplo n.º 2
    def __init__(self, rows, names=None, _internal_=False, _children_names_=None):

        if _internal_:
            # rows are dictionary for internal calls
            key_values = rows
                self._row_sample_ = next(iter(rows))
            except StopIteration:
                # Rows are empty
                self._row_sample_ = None
                self.names = names
                if _children_names_ is None:
                    self.children_names = []
                    self.columns = TableColumns(
                        names=names, children_names=[], table=self
                    self.children_names = _children_names_
                    self.columns = TableColumns(
                        names=names, children_names=_children_names_, table=self
            if isinstance(rows, Mapping):
                key_values = [(RowKey(k), value) for k, value in rows.items()]
            elif isinstance(rows, Iterable):
                key_values = [(RowKey(k), value) for k, value in rows]
                raise ValueError("Table expect rows as Mapping/Iterable")

            self._row_sample_ = key_values[0][0]

        if names is None:
            names = [f"X{i+1}" for i, _ in enumerate(self._row_sample_)]

        if len(names) != len(self._row_sample_):
            raise ValueError("The length of column names and columns are not the same.")


        self.names = names
        value_sample = super().__getitem__(self._row_sample_)
        if isinstance(value_sample, Table):
            self.columns = TableColumns(
                names=names, children_names=value_sample.names, table=self
            self.children_names = value_sample.names
            if _children_names_ is None:
                self.children_names = []
                self.columns = TableColumns(names=names, children_names=[], table=self)
                self.children_names = _children_names_
                self.columns = TableColumns(
                    names=names, children_names=_children_names_, table=self
Exemplo n.º 3
    def from_np_array(cls, samples, names=None):
        """Construct a FrequencyTable from a 2d numpy array or list of lists.
           The resulting keys are tuples.

            samples (list or numpy.ndarray):
                the observed samples.
            names (list, optional):
                List of names of the columns.
                If it is not provided, it creates as 'Xn'.
                Defaults to None.

            ValueError: Raises when the samples argument is not list or
        if not isinstance(samples, (np.ndarray, list)):
            raise ValueError(
                "'sample' argument must be numpy 2D ndarray or list of list.")
        elif isinstance(samples, list) and not isinstance(samples[0], list):
            raise ValueError(
                "'sample' argument must be numpy 2D ndarray or list of list.")
        # Convert rows to element, before calling
        # the construct
        return cls(samples=[RowKey(row) for row in samples], names=names)
Exemplo n.º 4
    def reduce(self, **kwargs):
        """Reduce the Table by one or more columns.
           P(X, Y) -> P(X = x, Y) or  P(X,  Y = y)
            kwargs (dict):
                A dictionary that its 'key' is the name
                of the column and its 'value'
                is the value that must be reduced by.

                If the provided names do not exist in the Table.

            [Table]: A reduce Table.
        # split columns to indices and comp_indices
        columns = list(kwargs.keys())
        if len(columns) == self.columns.size:
            raise ValueError("Cannot reduce on all column names.")
        columns_info = self.columns.split_columns(*columns)
        values = np.array([value for _, value in kwargs.items()], dtype=np.object)
        # Convert the key:values to 2D numpy array
        # the array rows are (keys, value)
        arr_counter = self.to_2d_array()
        # filter the 2d array rows by provided values of the reduce
        # conditioned_arr is a boolean one, and filtering happens
        # in the second line
        conditioned_arr = np.all(arr_counter[:, columns_info.indices] == values, axis=1)
        sliced_arr = arr_counter[conditioned_arr, :]
        # filter the 2d array columns (the compliment columns)
        # plus the value column (which is the last column)
        sliced_arr = sliced_arr[:, columns_info.complimnet_indices + [-1]]
        # divide the 2d array's rows to a tuple of columns
        # and value
        # So, we make a generator that divide the rows to the tuple of
        # columns (tuple(row[:-1]) and value (row[-1])
        arr_gen = ((RowKey(row[:-1]), row[-1]) for row in sliced_arr)
        # Before calling the groupby, we have to sort the generator
        # by the tuple of column (index zero in itemgetter)
        sorted_slice_arr = sorted(arr_gen, key=itemgetter(0))
        # group by the filtered columns (compliment
        # columns) and sum the value per key
        # Note that the 'itemgetter' read the first index which
        # is the tuple of compliment columns
        return Table(
                k: sum([item[1] for item in g])
                for k, g in groupby(sorted_slice_arr, key=itemgetter(0))
Exemplo n.º 5
    def marginal(self, *args, normalise=True):
        """Marginal of (group by) the Table over a set of columns.
           P(X, Y, Z) -> P(X, Y) or P(X, Z) or P(Y, Z)
            args (list):
                List of column names to marginalised.

                Raises when one of the column names is
                not defined.
                Or raises when requested for all column names.

            Table: (rows, names).
        # check the validity of operation based on column names
        if len(args) == self.columns.size:
            raise ValueError("Cannot marginalize on all column names.")

        # split columns to indices and comp_indices
        columns_info = self.columns.split_columns(*args)
        # Convert the key:values to 2D numpy array
        # the array rows are (row, value)
        arr = self.to_2d_array()
        # filter the compliment columns
        filtered_arr = np.c_[arr[:, columns_info.complimnet_indices], arr[:, -1]]
        # split the 2d array's rows to a tuple of
        # compliment columns (row[comp_indices])
        # and count row[-1]
        arr_gen = ((RowKey(row[:-1]), row[-1]) for row in filtered_arr)
        # Before calling the groupby, we have to sort the generator
        # by the tuple of compliment columns (index zero in itemgetter)
        sorted_arr = sorted(arr_gen, key=itemgetter(0))
        # since the values in each 'group' are
        # (compliment columns, value)
        # here we group by 'compliment columns' and apply
        # the sum on the value. Then the dictionary of
        # compliment columns:op_of_values
        # is an acceptable argument for Table
        grouped_arr = {
            k: sum([item[1] for item in g])
            for k, g in groupby(sorted_arr, key=itemgetter(0))
        table = Table(grouped_arr, columns_info.complimnet_names, _internal_=True)
        if normalise:

        return table
Exemplo n.º 6
 def compliment_key(key):
     # Method to split the keys
     return RowKey(*[key[i] for i in compliment_indices])
Exemplo n.º 7
 def compliment_key(key):
     # Method to make a split key
     return RowKey(*[key[i] for i in compliment_indices])
Exemplo n.º 8
    def condition_on(self, *args, normalise=True):
        """Creates the conditional based on
           the provided names of columns.
           P(X, Y) -> P(X | Y) or P(Y | X)
            args (list):
                List of names of provided random

                If the provided RV names do not exist
                in the distribution.

        if self.columns.size == 1:
            raise ValueError("This is a single column Table and cannot condition on.")

        if len(args) == self.columns.size:
            raise ValueError("Cannot condition on all columns.")
        # split columns to indices and comp_indices
        columns_info = self.columns.split_columns(*args)
        # Convert the key:value to 2D numpy array
        # the array rows are (rows, value)
        arr = self.to_2d_array()
        # divide the 2d array's rows to a tuple of columns,
        # (row[indices]), compliment columns (row[comp_indices])
        # and values row[-1]
        arr_gen = (
            for row in arr
        # Before calling the groupby, we have to sort the generator
        # by the tuple of columns (index zero in itemgetter)
        # And since later we will call the group by on group,
        # for each key we do the inner sort too (index one in itemgetter)
        sorted_arr = sorted(arr_gen, key=itemgetter(0, 1))
        # This method convert a group to a dictionary

        def make_dict(group):
            # since the values in 'group' argument are
            # (columns, compliment columns, value)
            # here we group by 'compliment columns' and sum
            # the values.
            return {
                k: sum([item[2] for item in g2])
                for k, g2 in groupby(group, key=itemgetter(1))

        # For each group (belongs a unique values), we create
        # a dictionary in a dictionary comprehension
        grouped_arr = {
            k: make_dict(g) for k, g in groupby(sorted_arr, key=itemgetter(0))
        # The above dictionary is dictionary of dictionaries
        # # the first set of names is for parent dictionary
        # and the second set is for children
        table = MultiTable(
                key: Table(values, columns_info.complimnet_names, _internal_=True)
                for key, values in grouped_arr.items()
        if normalise:

        return table