Пример #1
0
def all_info_measures(vars):
    """
    """
    for stuff in islice(powerset(vars), 1, None):
        others = set(vars) - set(stuff)
        for part in partitions(stuff, tuples=True):
            for cond in powerset(others):
                yield (part, cond)
Пример #2
0
def all_info_measures(vars):
    """
    """
    for stuff in islice(powerset(vars), 1, None):
        others = set(vars) - set(stuff)
        for part in partitions(stuff, tuples=True):
            for cond in powerset(others):
                yield (part , cond)
Пример #3
0
def poset_lattice(elements):
    """
    Return the Hasse diagram of the lattice induced by `elements`.
    """
    child = lambda a, b: a.issubset(b) and (len(b) - len(a) == 1)

    lattice = DiGraph()

    for a in powerset(elements):
        for b in powerset(elements):
            if child(set(a), set(b)):
                lattice.add_edge(b, a)

    return lattice
Пример #4
0
def poset_lattice(elements):
    """
    Return the Hasse diagram of the lattice induced by `elements`.
    """
    child = lambda a, b: a.issubset(b) and (len(b) - len(a) == 1)

    lattice = DiGraph()

    for a in powerset(elements):
        for b in powerset(elements):
            if child(set(a), set(b)):
                lattice.add_edge(b, a)

    return lattice
Пример #5
0
def all_dist_structures(outcome_length, alphabet_size):
    """
    Return an iterator of distributions over the
    2**(`alphabet_size`**`outcome_length`) possible combinations of joint
    events.

    Parameters
    ----------
    outcome_length : int
        The length of outcomes to consider.
    alphabet_length : int
        The size of the alphabet for each random variable.

    Yields
    ------
    d : Distribution
        A uniform distribution over a subset of the possible joint events.
    """
    alphabet = ''.join(str(i) for i in range(alphabet_size))
    words = product(alphabet, repeat=outcome_length)
    topologies = powerset(words)
    next(topologies) # the first element is the null set
    for t in topologies:
        outcomes = [''.join(_) for _ in t]
        yield uniform(outcomes)
Пример #6
0
 def test_quartet_set_representable_tree_sets(self):
     """
     Sloane sequence does not exist.
     The extended sequence is [4, 41, 1586, ???].
     """
     expected = [4, 41]
     Ns = range(4, 4 + len(expected))
     observed = []
     for N in Ns:
         # get all quartets
         qs = list(get_quartets(N))
         # get all resolved trees
         ts = list(get_bifurcating_trees(N))
         # map from quartet to quartet index
         q_to_i = dict((q, i) for i, q in enumerate(qs))
         # map tree index to set of indices of compatible quartets
         ti_to_qi_set = dict((i, set(q_to_i[q] for q in resolved_to_quartets(t))) for i, t in enumerate(ts))
         # begin the definition of representable tree index sets
         representable_ti_sets = set()
         # For each tree look at every subset of compatible quartets.
         # Record the set of tree indices compatible with each subset.
         for t in ts:
             tqis = [q_to_i[q] for q in resolved_to_quartets(t)]
             for qi_subset_tuple in iterutils.powerset(tqis):
                 qi_subset = set(qi_subset_tuple)
                 #ti_pattern = frozenset(i for i, tree in enumerate(ts) if 
                 ti_pattern = frozenset(i for i, s in ti_to_qi_set.items() if qi_subset <= s)
                 representable_ti_sets.add(ti_pattern)
         n = len(representable_ti_sets)
         observed.append(n)
     self.assertEqual(observed, expected)
Пример #7
0
def get_barrier(R):
    """
    Return the subset of vertices on one side of a strong barrier.
    A strong barrier is one that minimizes the ratio of the
    flow across the barrier to the logical entropy of the partition.
    An alternative characterization is that this barrier
    minimizes the randomization rate of the corresponding
    Markov-ized 2-state process.
    @param R: general reversible rate matrix
    @return: a vertex subset
    """
    n = len(R)
    v = mrate.R_to_distn(R)
    best_subset = None
    best_ratio = None
    for A_tuple in iterutils.powerset(range(n)):
        # define the vertex set and its complement
        A = set(A_tuple)
        B = set(range(n)) - A
        Pa = sum(v[i] for i in A)
        Pb = sum(v[i] for i in B)
        if Pa and Pb:
            flow = sum(v[i]*R[i,j] for i, j in product(A, B))
            ratio = flow / (Pa * Pb)
            if (best_ratio is None) or (ratio < best_ratio):
                best_ratio = ratio
                best_subset = A
    print best_ratio
    return set(best_subset)
Пример #8
0
def all_dist_structures(outcome_length, alphabet_size):
    """
    Return an iterator of distributions over the
    2**(`alphabet_size`**`outcome_length`) possible combinations of joint
    events.

    Parameters
    ----------
    outcome_length : int
        The length of outcomes to consider.
    alphabet_length : int
        The size of the alphabet for each random variable.

    Yields
    ------
    d : Distribution
        A uniform distribution over a subset of the possible joint events.
    """
    alphabet = ''.join(str(i) for i in range(alphabet_size))
    words = product(alphabet, repeat=outcome_length)
    topologies = powerset(words)
    next(topologies) # the first element is the null set
    for t in topologies:
        outcomes = [''.join(_) for _ in t]
        yield uniform(outcomes)
Пример #9
0
 def test_quartet_set_representable_tree_sets(self):
     """
     Sloane sequence does not exist.
     The extended sequence is [4, 41, 1586, ???].
     """
     expected = [4, 41]
     Ns = range(4, 4 + len(expected))
     observed = []
     for N in Ns:
         # get all quartets
         qs = list(get_quartets(N))
         # get all resolved trees
         ts = list(get_bifurcating_trees(N))
         # map from quartet to quartet index
         q_to_i = dict((q, i) for i, q in enumerate(qs))
         # map tree index to set of indices of compatible quartets
         ti_to_qi_set = dict(
             (i, set(q_to_i[q] for q in resolved_to_quartets(t)))
             for i, t in enumerate(ts))
         # begin the definition of representable tree index sets
         representable_ti_sets = set()
         # For each tree look at every subset of compatible quartets.
         # Record the set of tree indices compatible with each subset.
         for t in ts:
             tqis = [q_to_i[q] for q in resolved_to_quartets(t)]
             for qi_subset_tuple in iterutils.powerset(tqis):
                 qi_subset = set(qi_subset_tuple)
                 #ti_pattern = frozenset(i for i, tree in enumerate(ts) if
                 ti_pattern = frozenset(i for i, s in ti_to_qi_set.items()
                                        if qi_subset <= s)
                 representable_ti_sets.add(ti_pattern)
         n = len(representable_ti_sets)
         observed.append(n)
     self.assertEqual(observed, expected)
Пример #10
0
def test_join_sigalg():
    """ Test join_sigalg """
    outcomes = ['00', '01', '10', '11']
    pmf = [1/4]*4
    d = Distribution(outcomes, pmf)
    sigalg = frozenset([frozenset(_) for _ in powerset(outcomes)])
    joined = join_sigalg(d, [[0], [1]])
    assert_equal(sigalg, joined)
Пример #11
0
def test_tse1():
    """ Test identity comparing TSE to B from Olbrich's talk """
    for i, j in zip(range(3, 6), range(2, 5)):
        d = n_mod_m(i, j)
        indices = [[k] for k in range(i)]
        tse = TSE(d)
        x = 1/2 * sum(B(d, rv)/nCk(i, len(rv)) for rv in powerset(indices))
        yield assert_almost_equal, tse, x
Пример #12
0
def coinformation(dist, rvs=None, crvs=None, rv_names=None):
    """
    Calculates the coinformation.

    Parameters
    ----------
    dist : Distribution
        The distribution from which the coinformation is calculated.
    rvs : list, None
        The indexes of the random variable used to calculate the coinformation
        between. If None, then the coinformation is calculated over all random
        variables.
    crvs : list, None
        The indexes of the random variables to condition on. If None, then no
        variables are condition on.
    rv_names : bool
        If `True`, then the elements of `rvs` are treated as random variable
        names. If `False`, then the elements of `rvs` are treated as random
        variable indexes.  If `None`, then the value `True` is used if the
        distribution has specified names for its random variables.

    Returns
    -------
    I : float
        The coinformation.

    Raises
    ------
    ditException
        Raised if `dist` is not a joint distribution.
    """
    if dist.is_joint():
        if rvs is None:
            # Set to entropy of entire distribution
            rvs = [ [i] for i in range(dist.outcome_length()) ]
            rv_names = False
        if crvs is None:
            crvs = []
    else:
        msg = "The coinformation is applicable to joint distributions."
        raise ditException(msg)

    def entropy(rvs, dist=dist, crvs=crvs, rv_names=rv_names):
        return H(dist, set().union(*rvs), crvs, rv_names)

    I = sum( (-1)**(len(Xs)+1) * entropy(Xs) for Xs in powerset(rvs) )

    return I
Пример #13
0
def _get_cheeger_constant(R, v):
    """
    This is also known as the second isoperimetric constant.
    @param R: a reversible rate matrix
    @param v: stationary distribution
    @return: the second isoperimetric constant
    """
    n = len(v)
    I2 = None
    for A_tuple in iterutils.powerset(range(n)):
        # define the vertex set and its complement
        A = set(A_tuple)
        B = set(range(n)) - A
        A_measure = sum(v[i] for i in A)
        B_measure = sum(v[i] for i in B)
        if A_measure and B_measure:
            boundary_measure = sum(v[i]*R[i,j] for i, j in product(A, B))
            A_connectivity = boundary_measure / A_measure
            B_connectivity = boundary_measure / B_measure
            connectivity = max(A_connectivity, B_connectivity)
            if I2 is None or connectivity < I2:
                I2 = connectivity
    return I2
Пример #14
0
def _get_cheeger_constant(R, v):
    """
    This is also known as the second isoperimetric constant.
    @param R: a reversible rate matrix
    @param v: stationary distribution
    @return: the second isoperimetric constant
    """
    n = len(v)
    I2 = None
    for A_tuple in iterutils.powerset(range(n)):
        # define the vertex set and its complement
        A = set(A_tuple)
        B = set(range(n)) - A
        A_measure = sum(v[i] for i in A)
        B_measure = sum(v[i] for i in B)
        if A_measure and B_measure:
            boundary_measure = sum(v[i] * R[i, j] for i, j in product(A, B))
            A_connectivity = boundary_measure / A_measure
            B_connectivity = boundary_measure / B_measure
            connectivity = max(A_connectivity, B_connectivity)
            if I2 is None or connectivity < I2:
                I2 = connectivity
    return I2
Пример #15
0
def get_real_cheeger(Q):
    """
    Get the real cheeger constant.
    This is not a bound or an approximation.
    And it is NP hard to compute.
    """
    n = len(Q)
    all_vertices = set(range(n))
    min_ratio = None
    for vertices in iterutils.powerset(range(n)):
        if not (0 < len(vertices) <= n / 2):
            continue
        vset = set(vertices)
        complement = all_vertices - vset
        volume = len(vset)
        boundary_size = 0
        for v in vset:
            for x in complement:
                if Q[v, x]:
                    boundary_size += 1
        ratio = boundary_size / float(volume)
        if (min_ratio is None) or (ratio < min_ratio):
            min_ratio = ratio
    return min_ratio
Пример #16
0
def get_real_cheeger(Q):
    """
    Get the real cheeger constant.
    This is not a bound or an approximation.
    And it is NP hard to compute.
    """
    n = len(Q)
    all_vertices = set(range(n))
    min_ratio = None
    for vertices in iterutils.powerset(range(n)):
        if not (0 < len(vertices) <= n/2):
            continue
        vset = set(vertices)
        complement = all_vertices - vset
        volume = len(vset)
        boundary_size = 0
        for v in vset:
            for x in complement:
                if Q[v, x]:
                    boundary_size += 1
        ratio = boundary_size / float(volume)
        if (min_ratio is None) or (ratio < min_ratio):
            min_ratio = ratio
    return min_ratio
Пример #17
0
def coinformation(dist, rvs=None, crvs=None, rv_mode=None):
    """
    Calculates the coinformation.

    Parameters
    ----------
    dist : Distribution
        The distribution from which the coinformation is calculated.
    rvs : list, None
        The indexes of the random variable used to calculate the coinformation
        between. If None, then the coinformation is calculated over all random
        variables.
    crvs : list, None
        The indexes of the random variables to condition on. If None, then no
        variables are condition on.
    rv_mode : str, None
        Specifies how to interpret `rvs` and `crvs`. Valid options are:
        {'indices', 'names'}. If equal to 'indices', then the elements of
        `crvs` and `rvs` are interpreted as random variable indices. If equal
        to 'names', the the elements are interpreted as random variable names.
        If `None`, then the value of `dist._rv_mode` is consulted, which
        defaults to 'indices'.

    Returns
    -------
    I : float
        The coinformation.

    Raises
    ------
    ditException
        Raised if `dist` is not a joint distribution or if `rvs` or `crvs`
        contain non-existant random variables.

    Examples
    --------
    Let's construct a 3-variable distribution for the XOR logic gate and name
    the random variables X, Y, and Z.

    >>> d = dit.example_dists.Xor()
    >>> d.set_rv_names(['X', 'Y', 'Z'])

    To calculate coinformations, recall that `rvs` specifies which groups of
    random variables are involved. For example, the 3-way mutual information
    I[X:Y:Z] is calculated as:

    >>> dit.multivariate.coinformation(d, ['X', 'Y', 'Z'])
    -1.0

    It is a quirk of strings that each element of a string is also an iterable.
    So an equivalent way to calculate the 3-way mutual information I[X:Y:Z] is:

    >>> dit.multivariate.coinformation(d, 'XYZ')
    -1.0

    The reason this works is that list('XYZ') == ['X', 'Y', 'Z']. If we want
    to use random variable indexes, we need to have explicit groupings:

    >>> dit.multivariate.coinformation(d, [[0], [1], [2]], rv_mode='indexes')
    -1.0



    To calculate the mutual information I[X, Y : Z], we use explicit groups:

    >>> dit.multivariate.coinformation(d, ['XY', 'Z'])

    Using indexes, this looks like:

    >>> dit.multivariate.coinformation(d, [[0, 1], [2]], rv_mode='indexes')



    The mutual information I[X:Z] is given by:

    >>> dit.multivariate.coinformation(d, 'XZ')
    0.0

    Equivalently,

    >>> dit.multivariate.coinformation(d, ['X', 'Z'])
    0.0

    Using indexes, this becomes:

    >>> dit.multivariate.coinformation(d, [[0], [2]])
    0.0



    Conditional mutual informations can be calculated by passing in the
    conditional random variables. The conditional entropy I[X:Y|Z] is:

    >>> dit.multivariate.coinformation(d, 'XY', 'Z')
    1.0

    Using indexes, this becomes:

    >>> rvs = [[0], [1]]
    >>> crvs = [[2]] # broken
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    1.0

    For the conditional random variables, groupings have no effect, so you
    can also obtain this as:

    >>> rvs = [[0], [1]]
    >>> crvs = [2]
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    1.0



    Finally, note that entropy can also be calculated. The entropy H[Z|XY]
    is obtained as:

    >>> rvs = [[2]]
    >>> crvs = [[0], [1]] # broken
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    0.0

    >>> crvs = [[0, 1]] # broken
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    0.0

    >>> crvs = [0, 1]
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    0.0

    >>> rvs = 'Z'
    >>> crvs = 'XY'
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    0.0

    Note that [[0], [1]] says to condition on two groups. But conditioning
    is a flat operation and doesn't respect the groups, so it is equal to
    a single group of 2 random variables: [[0, 1]]. With random variable
    names 'XY' is acceptable because list('XY') = ['X', 'Y'], which is
    species two singleton groups. By the previous argument, this is will
    be treated the same as ['XY'].

    """
    rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode)

    def entropy(rvs, dist=dist, crvs=crvs, rv_mode=rv_mode):
        """
        Helper function to aid in computing the entropy of subsets.
        """
        return H(dist, set().union(*rvs), crvs, rv_mode=rv_mode)

    I = sum((-1)**(len(Xs)+1) * entropy(Xs) for Xs in powerset(rvs))

    return I
Пример #18
0
def coinformation(dist, rvs=None, crvs=None, rv_mode=None):
    """
    Calculates the coinformation.

    Parameters
    ----------
    dist : Distribution
        The distribution from which the coinformation is calculated.
    rvs : list, None
        The indexes of the random variable used to calculate the coinformation
        between. If None, then the coinformation is calculated over all random
        variables.
    crvs : list, None
        The indexes of the random variables to condition on. If None, then no
        variables are condition on.
    rv_mode : str, None
        Specifies how to interpret `rvs` and `crvs`. Valid options are:
        {'indices', 'names'}. If equal to 'indices', then the elements of
        `crvs` and `rvs` are interpreted as random variable indices. If equal
        to 'names', the the elements are interpreted as random variable names.
        If `None`, then the value of `dist._rv_mode` is consulted, which
        defaults to 'indices'.

    Returns
    -------
    I : float
        The coinformation.

    Raises
    ------
    ditException
        Raised if `dist` is not a joint distribution or if `rvs` or `crvs`
        contain non-existant random variables.

    Examples
    --------
    Let's construct a 3-variable distribution for the XOR logic gate and name
    the random variables X, Y, and Z.

    >>> d = dit.example_dists.Xor()
    >>> d.set_rv_names(['X', 'Y', 'Z'])

    To calculate coinformations, recall that `rvs` specifies which groups of
    random variables are involved. For example, the 3-way mutual information
    I[X:Y:Z] is calculated as:

    >>> dit.multivariate.coinformation(d, ['X', 'Y', 'Z'])
    -1.0

    It is a quirk of strings that each element of a string is also an iterable.
    So an equivalent way to calculate the 3-way mutual information I[X:Y:Z] is:

    >>> dit.multivariate.coinformation(d, 'XYZ')
    -1.0

    The reason this works is that list('XYZ') == ['X', 'Y', 'Z']. If we want
    to use random variable indexes, we need to have explicit groupings:

    >>> dit.multivariate.coinformation(d, [[0], [1], [2]], rv_mode='indexes')
    -1.0



    To calculate the mutual information I[X, Y : Z], we use explicit groups:

    >>> dit.multivariate.coinformation(d, ['XY', 'Z'])

    Using indexes, this looks like:

    >>> dit.multivariate.coinformation(d, [[0, 1], [2]], rv_mode='indexes')



    The mutual information I[X:Z] is given by:

    >>> dit.multivariate.coinformation(d, 'XZ')
    0.0

    Equivalently,

    >>> dit.multivariate.coinformation(d, ['X', 'Z'])
    0.0

    Using indexes, this becomes:

    >>> dit.multivariate.coinformation(d, [[0], [2]])
    0.0



    Conditional mutual informations can be calculated by passing in the
    conditional random variables. The conditional entropy I[X:Y|Z] is:

    >>> dit.multivariate.coinformation(d, 'XY', 'Z')
    1.0

    Using indexes, this becomes:

    >>> rvs = [[0], [1]]
    >>> crvs = [[2]] # broken
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    1.0

    For the conditional random variables, groupings have no effect, so you
    can also obtain this as:

    >>> rvs = [[0], [1]]
    >>> crvs = [2]
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    1.0



    Finally, note that entropy can also be calculated. The entropy H[Z|XY]
    is obtained as:

    >>> rvs = [[2]]
    >>> crvs = [[0], [1]] # broken
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    0.0

    >>> crvs = [[0, 1]] # broken
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    0.0

    >>> crvs = [0, 1]
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    0.0

    >>> rvs = 'Z'
    >>> crvs = 'XY'
    >>> dit.multivariate.coinformation(d, rvs, crvs, rv_mode='indexes')
    0.0

    Note that [[0], [1]] says to condition on two groups. But conditioning
    is a flat operation and doesn't respect the groups, so it is equal to
    a single group of 2 random variables: [[0, 1]]. With random variable
    names 'XY' is acceptable because list('XY') = ['X', 'Y'], which is
    species two singleton groups. By the previous argument, this is will
    be treated the same as ['XY'].

    """
    rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode)

    def entropy(rvs, dist=dist, crvs=crvs, rv_mode=rv_mode):
        """
        Helper function to aid in computing the entropy of subsets.
        """
        return H(dist, set().union(*rvs), crvs, rv_mode=rv_mode)

    I = sum((-1)**(len(Xs) + 1) * entropy(Xs) for Xs in powerset(rvs))

    return I