Exemplo n.º 1
0
def OA_find_disjoint_blocks(OA, k, n, x):
    r"""
    Return `x` disjoint blocks contained in a given `OA(k,n)`.

    `x` blocks of an `OA` are said to be disjoint if they all have
    different values for a every given index, i.e. if they correspond to
    disjoint blocks in the `TD` assciated with the `OA`.

    INPUT:

    - ``OA`` -- an orthogonal array

    - ``k,n,x`` (integers)

    .. SEEALSO::

        :func:`incomplete_orthogonal_array`

    EXAMPLES::

        sage: from sage.combinat.designs.orthogonal_arrays import OA_find_disjoint_blocks
        sage: k=3;n=4;x=3
        sage: Bs = OA_find_disjoint_blocks(designs.orthogonal_array(k,n),k,n,x)
        sage: assert len(Bs) == x
        sage: for i in range(k):
        ....:     assert len(set([B[i] for B in Bs])) == x
        sage: OA_find_disjoint_blocks(designs.orthogonal_array(k,n),k,n,5)
        Traceback (most recent call last):
        ...
        ValueError: There does not exist 5 disjoint blocks in this OA(3,4)
    """

    # Computing an independent set of order x with a Linear Program
    from sage.numerical.mip import MixedIntegerLinearProgram, MIPSolverException
    p = MixedIntegerLinearProgram()
    b = p.new_variable(binary=True)
    p.add_constraint(p.sum(b[i] for i in range(len(OA))) == x)

    # t[i][j] lists of blocks of the OA whose i'th component is j
    t = [[[] for _ in range(n)] for _ in range(k)]
    for c, B in enumerate(OA):
        for i, j in enumerate(B):
            t[i][j].append(c)

    for R in t:
        for L in R:
            p.add_constraint(p.sum(b[i] for i in L) <= 1)

    try:
        p.solve()
    except MIPSolverException:
        raise ValueError(
            "There does not exist {} disjoint blocks in this OA({},{})".format(
                x, k, n))

    b = p.get_values(b)
    independent_set = [OA[i] for i, v in b.items() if v]
    return independent_set
Exemplo n.º 2
0
def OA_find_disjoint_blocks(OA,k,n,x):
    r"""
    Return `x` disjoint blocks contained in a given `OA(k,n)`.

    `x` blocks of an `OA` are said to be disjoint if they all have
    different values for a every given index, i.e. if they correspond to
    disjoint blocks in the `TD` assciated with the `OA`.

    INPUT:

    - ``OA`` -- an orthogonal array

    - ``k,n,x`` (integers)

    .. SEEALSO::

        :func:`incomplete_orthogonal_array`

    EXAMPLES::

        sage: from sage.combinat.designs.orthogonal_arrays import OA_find_disjoint_blocks
        sage: k=3;n=4;x=3
        sage: Bs = OA_find_disjoint_blocks(designs.orthogonal_array(k,n),k,n,x)
        sage: assert len(Bs) == x
        sage: for i in range(k):
        ....:     assert len(set([B[i] for B in Bs])) == x
        sage: OA_find_disjoint_blocks(designs.orthogonal_array(k,n),k,n,5)
        Traceback (most recent call last):
        ...
        ValueError: There does not exist 5 disjoint blocks in this OA(3,4)
    """

    # Computing an independent set of order x with a Linear Program
    from sage.numerical.mip import MixedIntegerLinearProgram, MIPSolverException
    p = MixedIntegerLinearProgram()
    b = p.new_variable(binary=True)
    p.add_constraint(p.sum(b[i] for i in range(len(OA))) == x)

    # t[i][j] lists of blocks of the OA whose i'th component is j
    t = [[[] for _ in range(n)] for _ in range(k)]
    for c,B in enumerate(OA):
        for i,j in enumerate(B):
            t[i][j].append(c)

    for R in t:
        for L in R:
            p.add_constraint(p.sum(b[i] for i in L) <= 1)

    try:
        p.solve()
    except MIPSolverException:
        raise ValueError("There does not exist {} disjoint blocks in this OA({},{})".format(x,k,n))

    b = p.get_values(b)
    independent_set = [OA[i] for i,v in b.items() if v]
    return independent_set
Exemplo n.º 3
0
def solve_magic_hexagon(solver=None):
    r"""
    Solves the magic hexagon problem

    We use the following convention for the positions::

           1   2  3
         4  5   6   7
       8   9  10  11  12
        13  14  15  16
          17  18  19

    INPUT:

    - ``solver`` -- string (default:``None``)

    EXAMPLES::

        sage: from slabbe.magic_hexagon import solve_magic_hexagon
        sage: solve_magic_hexagon() # long time (90s if GLPK, <1s if Gurobi)
        [15, 14, 9, 13, 8, 6, 11, 10, 4, 5, 1, 18, 12, 2, 7, 17, 16, 19, 3]

    """
    p = MixedIntegerLinearProgram(solver=solver)
    x = p.new_variable(binary=True)

    A = range(1,20)

    # exactly one tile at each position pos
    for pos in A:
        S = p.sum(x[pos,tile] for tile in A)
        name = "one tile at {}".format(pos)
        p.add_constraint(S==1, name=name)

    # each tile used exactly once
    for tile in A:
        S = p.sum(x[pos,tile] for pos in A)
        name = "tile {} used once".format(tile)
        p.add_constraint(S==1, name=name)

    lines = [(1,2,3), (4,5,6,7), (8,9,10,11,12), (13,14,15,16), (17,18,19),
             (1,4,8), (2,5,9,13), (3,6,10,14,17), (7,11,15,18), (12,16,19),
             (8,13,17), (4,9,14,18), (1,5,10,15,19), (2,6,11,16), (3,7,12)]

    # the sum is 38 on each line
    for line in lines:
        S = p.sum(tile*x[pos,tile] for tile in A for pos in line) 
        name = "sum of line {} must be 38".format(line)
        p.add_constraint(S==38, name=name)

    p.solve()
    soln = p.get_values(x)
    nonzero = sorted(key for key in soln if soln[key]!=0)
    return [tile for (pos,tile) in nonzero]
Exemplo n.º 4
0
    def packing(self, solver=None, verbose=0):
        r"""
        Return a maximum packing

        A maximum packing in a hypergraph is collection of disjoint sets/blocks
        of maximal cardinality. This problem is NP-complete in general, and in
        particular on 3-uniform hypergraphs. It is solved here with an Integer
        Linear Program.

        For more information, see the :wikipedia:`Packing_in_a_hypergraph`.

        INPUT:

        - ``solver`` -- (default: ``None``) Specify a Linear Program (LP)
          solver to be used. If set to ``None``, the default one is used. For
          more information on LP solvers and which default solver is used, see
          the method
          :meth:`solve <sage.numerical.mip.MixedIntegerLinearProgram.solve>`
          of the class
          :class:`MixedIntegerLinearProgram <sage.numerical.mip.MixedIntegerLinearProgram>`.

        - ``verbose`` -- integer (default: ``0``). Sets the level of
          verbosity. Set to 0 by default, which means quiet.
          Only useful when ``algorithm == "LP"``.

        EXAMPLE::

            sage; IncidenceStructure([[1,2],[3,"A"],[2,3]]).packing()
            [[1, 2], [3, 'A']]
            sage: len(designs.steiner_triple_system(9).packing())
            3
        """
        from sage.numerical.mip import MixedIntegerLinearProgram

        # List of blocks containing a given point x
        d = [[] for x in self._points]
        for i,B in enumerate(self._blocks):
            for x in B:
                d[x].append(i)

        p = MixedIntegerLinearProgram(solver=solver)
        b = p.new_variable(binary=True)
        for x,L in enumerate(d): # Set of disjoint blocks
            p.add_constraint(p.sum([b[i] for i in L]) <= 1)

        # Maximum number of blocks
        p.set_objective(p.sum([b[i] for i in range(self.num_blocks())]))

        p.solve(log=verbose)

        return [[self._points[x] for x in self._blocks[i]]
                for i,v in p.get_values(b).iteritems() if v]
Exemplo n.º 5
0
    def packing(self, solver=None, verbose=0):
        r"""
        Return a maximum packing

        A maximum packing in a hypergraph is collection of disjoint sets/blocks
        of maximal cardinality. This problem is NP-complete in general, and in
        particular on 3-uniform hypergraphs. It is solved here with an Integer
        Linear Program.

        For more information, see the :wikipedia:`Packing_in_a_hypergraph`.

        INPUT:

        - ``solver`` -- (default: ``None``) Specify a Linear Program (LP)
          solver to be used. If set to ``None``, the default one is used. For
          more information on LP solvers and which default solver is used, see
          the method
          :meth:`solve <sage.numerical.mip.MixedIntegerLinearProgram.solve>`
          of the class
          :class:`MixedIntegerLinearProgram <sage.numerical.mip.MixedIntegerLinearProgram>`.

        - ``verbose`` -- integer (default: ``0``). Sets the level of
          verbosity. Set to 0 by default, which means quiet.
          Only useful when ``algorithm == "LP"``.

        EXAMPLE::

            sage; IncidenceStructure([[1,2],[3,"A"],[2,3]]).packing()
            [[1, 2], [3, 'A']]
            sage: len(designs.steiner_triple_system(9).packing())
            3
        """
        from sage.numerical.mip import MixedIntegerLinearProgram

        # List of blocks containing a given point x
        d = [[] for x in self._points]
        for i, B in enumerate(self._blocks):
            for x in B:
                d[x].append(i)

        p = MixedIntegerLinearProgram(solver=solver)
        b = p.new_variable(binary=True)
        for x, L in enumerate(d):  # Set of disjoint blocks
            p.add_constraint(p.sum([b[i] for i in L]) <= 1)

        # Maximum number of blocks
        p.set_objective(p.sum([b[i] for i in range(self.num_blocks())]))

        p.solve(log=verbose)

        return [[self._points[x] for x in self._blocks[i]]
                for i, v in p.get_values(b).iteritems() if v]
Exemplo n.º 6
0
def dominating_set(g,
                   independent=False,
                   total=False,
                   value_only=False,
                   solver=None,
                   verbose=0):
    r"""
    Return a minimum dominating set of the graph.

    A minimum dominating set `S` of a graph `G` is a set of its vertices of
    minimal cardinality such that any vertex of `G` is in `S` or has one of its
    neighbors in `S`. See the :wikipedia:`Dominating_set`.

    As an optimization problem, it can be expressed as:

    .. MATH::

        \mbox{Minimize : }&\sum_{v\in G} b_v\\
        \mbox{Such that : }&\forall v \in G, b_v+\sum_{(u,v)\in G.edges()} b_u\geq 1\\
        &\forall x\in G, b_x\mbox{ is a binary variable}

    INPUT:

    - ``independent`` -- boolean (default: ``False``); when ``True``, computes a
      minimum independent dominating set, that is a minimum dominating set that
      is also an independent set (see also
      :meth:`~sage.graphs.graph.independent_set`)

    - ``total`` -- boolean (default: ``False``); when ``True``, computes a total
      dominating set (see the See the :wikipedia:`Dominating_set`)

    - ``value_only`` -- boolean (default: ``False``); whether to only return the
      cardinality of the computed dominating set, or to return its list of
      vertices (default)

    - ``solver`` -- (default: ``None``); specifies a Linear Program (LP) solver
      to be used. If set to ``None``, the default one is used. For more
      information on LP solvers and which default solver is used, see the method
      :meth:`solve <sage.numerical.mip.MixedIntegerLinearProgram.solve>` of the
      class :class:`MixedIntegerLinearProgram
      <sage.numerical.mip.MixedIntegerLinearProgram>`.

    - ``verbose`` -- integer (default: ``0``); sets the level of verbosity. Set
      to 0 by default, which means quiet.

    EXAMPLES:

    A basic illustration on a ``PappusGraph``::

        sage: g = graphs.PappusGraph()
        sage: g.dominating_set(value_only=True)
        5

    If we build a graph from two disjoint stars, then link their centers we will
    find a difference between the cardinality of an independent set and a stable
    independent set::

        sage: g = 2 * graphs.StarGraph(5)
        sage: g.add_edge(0, 6)
        sage: len(g.dominating_set())
        2
        sage: len(g.dominating_set(independent=True))
        6

    The total dominating set of the Petersen graph has cardinality 4::

        sage: G = graphs.PetersenGraph()
        sage: G.dominating_set(total=True, value_only=True)
        4

    The dominating set is calculated for both the directed and undirected graphs
    (modification introduced in :trac:`17905`)::

        sage: g = digraphs.Path(3)
        sage: g.dominating_set(value_only=True)
        2
        sage: g = graphs.PathGraph(3)
        sage: g.dominating_set(value_only=True)
        1

    """
    g._scream_if_not_simple(allow_multiple_edges=True, allow_loops=not total)

    from sage.numerical.mip import MixedIntegerLinearProgram
    p = MixedIntegerLinearProgram(maximization=False, solver=solver)
    b = p.new_variable(binary=True)

    # For any vertex v, one of its neighbors or v itself is in the minimum
    # dominating set. If g is directed, we use the in neighbors of v instead.

    neighbors_iter = g.neighbor_in_iterator if g.is_directed(
    ) else g.neighbor_iterator

    if total:
        # We want a total dominating set
        for v in g:
            p.add_constraint(p.sum(b[u] for u in neighbors_iter(v)), min=1)
    else:
        for v in g:
            p.add_constraint(b[v] + p.sum(b[u] for u in neighbors_iter(v)),
                             min=1)

    if independent:
        # no two adjacent vertices are in the set
        for u, v in g.edge_iterator(labels=None):
            p.add_constraint(b[u] + b[v], max=1)

    # Minimizes the number of vertices used
    p.set_objective(p.sum(b[v] for v in g))

    if value_only:
        return Integer(round(p.solve(objective_only=True, log=verbose)))
    else:
        p.solve(log=verbose)
        b = p.get_values(b)
        return [v for v in g if b[v] == 1]
def gale_ryser_theorem(p1, p2, algorithm="gale"):
    r"""
        Returns the binary matrix given by the Gale-Ryser theorem.

        The Gale Ryser theorem asserts that if `p_1,p_2` are two
        partitions of `n` of respective lengths `k_1,k_2`, then there is
        a binary `k_1\times k_2` matrix `M` such that `p_1` is the vector
        of row sums and `p_2` is the vector of column sums of `M`, if
        and only if the conjugate of `p_2` dominates `p_1`.

        INPUT:

        - ``p1, p2``-- list of integers representing the vectors
          of row/column sums

        - ``algorithm`` -- two possible string values :

            - ``"ryser"`` implements the construction due
              to Ryser [Ryser63]_.

            - ``"gale"`` (default) implements the construction due to Gale [Gale57]_.

        OUTPUT:

        - A binary matrix if it exists, ``None`` otherwise.

        Gale's Algorithm:

        (Gale [Gale57]_): A matrix satisfying the constraints of its
        sums can be defined as the solution of the following
        Linear Program, which Sage knows how to solve.

        .. MATH::

            \forall i&\sum_{j=1}^{k_2} b_{i,j}=p_{1,j}\\
            \forall i&\sum_{j=1}^{k_1} b_{j,i}=p_{2,j}\\
            &b_{i,j}\mbox{ is a binary variable}

        Ryser's Algorithm:

        (Ryser [Ryser63]_): The construction of an `m\times n` matrix `A=A_{r,s}`,
        due to Ryser, is described as follows. The
        construction works if and only if have `s\preceq r^*`.

        * Construct the `m\times n` matrix `B` from `r` by defining
          the `i`-th row of `B` to be the vector whose first `r_i`
          entries are `1`, and the remainder are 0's, `1\leq i\leq
          m`.  This maximal matrix `B` with row sum `r` and ones left
          justified has column sum `r^{*}`.

        * Shift the last `1` in certain rows of `B` to column `n` in
          order to achieve the sum `s_n`.  Call this `B` again.

          * The `1`'s in column n are to appear in those
            rows in which `A` has the largest row sums, giving
            preference to the bottom-most positions in case of ties.
          * Note: When this step automatically "fixes" other columns,
            one must skip ahead to the first column index
            with a wrong sum in the step below.

        * Proceed inductively to construct columns `n-1`, ..., `2`, `1`.

        * Set `A = B`. Return `A`.

        EXAMPLES:

        Computing the matrix for `p_1=p_2=2+2+1` ::

            sage: from sage.combinat.integer_vector import gale_ryser_theorem
            sage: p1 = [2,2,1]
            sage: p2 = [2,2,1]
            sage: print gale_ryser_theorem(p1, p2)     # not tested
            [1 1 0]
            [1 0 1]
            [0 1 0]
            sage: A = gale_ryser_theorem(p1, p2)
            sage: rs = [sum(x) for x in A.rows()]
            sage: cs = [sum(x) for x in A.columns()]
            sage: p1 == rs; p2 == cs
            True
            True

        Or for a non-square matrix with `p_1=3+3+2+1` and `p_2=3+2+2+1+1`, using Ryser's algorithm ::

            sage: from sage.combinat.integer_vector import gale_ryser_theorem
            sage: p1 = [3,3,1,1]
            sage: p2 = [3,3,1,1]
            sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
            [1 1 0 1]
            [1 1 1 0]
            [0 1 0 0]
            [1 0 0 0]
            sage: p1 = [4,2,2]
            sage: p2 = [3,3,1,1]
            sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
            [1 1 1 1]
            [1 1 0 0]
            [1 1 0 0]
            sage: p1 = [4,2,2,0]
            sage: p2 = [3,3,1,1,0,0]
            sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
            [1 1 1 1 0 0]
            [1 1 0 0 0 0]
            [1 1 0 0 0 0]
            [0 0 0 0 0 0]
            sage: p1 = [3,3,2,1]
            sage: p2 = [3,2,2,1,1]
            sage: print gale_ryser_theorem(p1, p2, algorithm="gale")  # not tested
            [1 1 1 0 0]
            [1 1 0 0 1]
            [1 0 1 0 0]
            [0 0 0 1 0]

        With `0` in the sequences, and with unordered inputs ::

            sage: from sage.combinat.integer_vector import gale_ryser_theorem
            sage: gale_ryser_theorem([3,3,0,1,1,0], [3,1,3,1,0], algorithm = "ryser")
            [1 0 1 1 0]
            [1 1 1 0 0]
            [0 0 0 0 0]
            [0 0 1 0 0]
            [1 0 0 0 0]
            [0 0 0 0 0]
            sage: p1 = [3,1,1,1,1]; p2 = [3,2,2,0]
            sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
            [1 1 1 0]
            [0 0 1 0]
            [0 1 0 0]
            [1 0 0 0]
            [1 0 0 0]

        TESTS:

        This test created a random bipartite graph on `n+m` vertices. Its
        adjacency matrix is binary, and it is used to create some
        "random-looking" sequences which correspond to an existing matrix. The
        ``gale_ryser_theorem`` is then called on these sequences, and the output
        checked for correction.::

            sage: def test_algorithm(algorithm, low = 10, high = 50):
            ...      n,m = randint(low,high), randint(low,high)
            ...      g = graphs.RandomBipartite(n, m, .3)
            ...      s1 = sorted(g.degree([(0,i) for i in range(n)]), reverse = True)
            ...      s2 = sorted(g.degree([(1,i) for i in range(m)]), reverse = True)
            ...      m = gale_ryser_theorem(s1, s2, algorithm = algorithm)
            ...      ss1 = sorted(map(lambda x : sum(x) , m.rows()), reverse = True)
            ...      ss2 = sorted(map(lambda x : sum(x) , m.columns()), reverse = True)
            ...      if ((ss1 == s1) and (ss2 == s2)):
            ...          return True
            ...      return False

            sage: for algorithm in ["gale", "ryser"]:                            # long time
            ...      for i in range(50):                                         # long time
            ...         if not test_algorithm(algorithm, 3, 10):                 # long time
            ...             print "Something wrong with algorithm ", algorithm   # long time
            ...             break                                                # long time

        Null matrix::

            sage: gale_ryser_theorem([0,0,0],[0,0,0,0], algorithm="gale")
            [0 0 0 0]
            [0 0 0 0]
            [0 0 0 0]
            sage: gale_ryser_theorem([0,0,0],[0,0,0,0], algorithm="ryser")
            [0 0 0 0]
            [0 0 0 0]
            [0 0 0 0]

        REFERENCES:

        ..  [Ryser63] H. J. Ryser, Combinatorial Mathematics,
                Carus Monographs, MAA, 1963.
        ..  [Gale57] D. Gale, A theorem on flows in networks, Pacific J. Math.
                7(1957)1073-1082.
        """
    from sage.combinat.partition import Partition
    from sage.matrix.constructor import matrix

    if not (is_gale_ryser(p1, p2)):
        return False

    if algorithm == "ryser":  # ryser's algorithm
        from sage.combinat.permutation import Permutation

        # Sorts the sequences if they are not, and remembers the permutation
        # applied
        tmp = sorted(enumerate(p1), reverse=True, key=lambda x: x[1])
        r = [x[1] for x in tmp if x[1] > 0]
        r_permutation = [
            x - 1 for x in Permutation([x[0] + 1 for x in tmp]).inverse()
        ]
        m = len(r)

        tmp = sorted(enumerate(p2), reverse=True, key=lambda x: x[1])
        s = [x[1] for x in tmp if x[1] > 0]
        s_permutation = [
            x - 1 for x in Permutation([x[0] + 1 for x in tmp]).inverse()
        ]
        n = len(s)

        A0 = matrix([[1] * r[j] + [0] * (n - r[j]) for j in range(m)])

        for k in range(1, n + 1):
            goodcols = [i for i in range(n) if s[i] == sum(A0.column(i))]
            if sum(A0.column(n - k)) != s[n - k]:
                A0 = _slider01(A0, s[n - k], n - k, p1, p2, goodcols)

        # If we need to add empty rows/columns
        if len(p1) != m:
            A0 = A0.stack(matrix([[0] * n] * (len(p1) - m)))

        if len(p2) != n:
            A0 = A0.transpose().stack(matrix([[0] * len(p1)] *
                                             (len(p2) - n))).transpose()

        # Applying the permutations to get a matrix satisfying the
        # order given by the input
        A0 = A0.matrix_from_rows_and_columns(r_permutation, s_permutation)
        return A0

    elif algorithm == "gale":
        from sage.numerical.mip import MixedIntegerLinearProgram
        k1, k2 = len(p1), len(p2)
        p = MixedIntegerLinearProgram()
        b = p.new_variable(binary=True)
        for (i, c) in enumerate(p1):
            p.add_constraint(p.sum([b[i, j] for j in xrange(k2)]) == c)
        for (i, c) in enumerate(p2):
            p.add_constraint(p.sum([b[j, i] for j in xrange(k1)]) == c)
        p.set_objective(None)
        p.solve()
        b = p.get_values(b)
        M = [[0] * k2 for i in xrange(k1)]
        for i in xrange(k1):
            for j in xrange(k2):
                M[i][j] = int(b[i, j])
        return matrix(M)

    else:
        raise ValueError(
            "The only two algorithms available are \"gale\" and \"ryser\"")
Exemplo n.º 8
0
def binpacking(items, maximum=1, k=None, solver=None, verbose=0):
    r"""
    Solve the bin packing problem.

    The Bin Packing problem is the following :

    Given a list of items of weights `p_i` and a real value `k`, what is the
    least number of bins such that all the items can be packed in the bins,
    while ensuring that the sum of the weights of the items packed in each bin
    is at most `k` ?

    For more informations, see :wikipedia:`Bin_packing_problem`.

    Two versions of this problem are solved by this algorithm :

    - Is it possible to put the given items in `k` bins ?
    - What is the assignment of items using the least number of bins with
      the given list of items ?

    INPUT:

    - ``items`` -- list or dict; either a list of real values (the items'
      weight), or a dictionary associating to each item its weight.

    - ``maximum`` -- (default: 1); the maximal size of a bin

    - ``k`` -- integer (default: ``None``); Number of bins

      - When set to an integer value, the function returns a partition of the
        items into `k` bins if possible, and raises an exception otherwise.

      - When set to ``None``, the function returns a partition of the items
        using the least possible number of bins.

    - ``solver`` -- (default: ``None``); Specify a Linear Program (LP) solver to
      be used. If set to ``None``, the default one is used. For more information
      on LP solvers and which default solver is used, see the method
      :meth:`~sage.numerical.mip.MixedIntegerLinearProgram.solve` of the class
      :class:`~sage.numerical.mip.MixedIntegerLinearProgram`.

    - ``verbose`` -- integer (default: ``0``); sets the level of verbosity. Set
      to 0 by default, which means quiet.

    OUTPUT:

    A list of lists, each member corresponding to a bin and containing either
    the list of the weights inside it when ``items`` is a list of items' weight,
    or the list of items inside it when ``items`` is a dictionary. If there is
    no solution, an exception is raised (this can only happen when ``k`` is
    specified or if ``maximum`` is less than the weight of one item).

    EXAMPLES:

    Trying to find the minimum amount of boxes for 5 items of weights
    `1/5, 1/4, 2/3, 3/4, 5/7`::

        sage: from sage.numerical.optimize import binpacking
        sage: values = [1/5, 1/3, 2/3, 3/4, 5/7]
        sage: bins = binpacking(values)
        sage: len(bins)
        3

    Checking the bins are of correct size ::

        sage: all(sum(b) <= 1 for b in bins)
        True

    Checking every item is in a bin ::

        sage: b1, b2, b3 = bins
        sage: all((v in b1 or v in b2 or v in b3) for v in values)
        True

    And only in one bin ::

        sage: sum(len(b) for b in bins) == len(values)
        True

    One way to use only three boxes (which is best possible) is to put
    `1/5 + 3/4` together in a box, `1/3+2/3` in another, and `5/7`
    by itself in the third one.

    Of course, we can also check that there is no solution using only two boxes ::

        sage: from sage.numerical.optimize import binpacking
        sage: binpacking([0.2,0.3,0.8,0.9], k=2)
        Traceback (most recent call last):
        ...
        ValueError: this problem has no solution !

    We can also provide a dictionary keyed by items and associating to each item
    its weight. Then, the bins contain the name of the items inside it ::

        sage: values = {'a':1/5, 'b':1/3, 'c':2/3, 'd':3/4, 'e':5/7}
        sage: bins = binpacking(values)
        sage: set(flatten(bins)) == set(values.keys())
        True

    TESTS:

    Wrong type for parameter items::

        sage: binpacking(set())
        Traceback (most recent call last):
        ...
        TypeError: parameter items must be a list or a dictionary.
    """
    if isinstance(items, list):
        weight = {i: w for i, w in enumerate(items)}
    elif isinstance(items, dict):
        weight = items
    else:
        raise TypeError("parameter items must be a list or a dictionary.")

    if max(weight.values()) > maximum:
        raise ValueError("this problem has no solution !")

    if k is None:
        from sage.functions.other import ceil
        k = ceil(sum(weight.values()) / maximum)
        while True:
            from sage.numerical.mip import MIPSolverException
            try:
                return binpacking(items,
                                  k=k,
                                  maximum=maximum,
                                  solver=solver,
                                  verbose=verbose)
            except MIPSolverException:
                k = k + 1

    from sage.numerical.mip import MixedIntegerLinearProgram, MIPSolverException
    p = MixedIntegerLinearProgram(solver=solver)

    # Boolean variable indicating whether the ith element belongs to box b
    box = p.new_variable(binary=True)

    # Capacity constraint of each bin
    for b in range(k):
        p.add_constraint(
            p.sum(weight[i] * box[i, b] for i in weight) <= maximum)

    # Each item is assigned exactly one bin
    for i in weight:
        p.add_constraint(p.sum(box[i, b] for b in range(k)) == 1)

    try:
        p.solve(log=verbose)
    except MIPSolverException:
        raise ValueError("this problem has no solution !")

    box = p.get_values(box)

    boxes = [[] for i in range(k)]

    for i, b in box:
        if box[i, b] == 1:
            boxes[b].append(weight[i] if isinstance(items, list) else i)

    return boxes
Exemplo n.º 9
0
def gale_ryser_theorem(p1, p2, algorithm="gale"):
        r"""
        Returns the binary matrix given by the Gale-Ryser theorem.

        The Gale Ryser theorem asserts that if `p_1,p_2` are two
        partitions of `n` of respective lengths `k_1,k_2`, then there is
        a binary `k_1\times k_2` matrix `M` such that `p_1` is the vector
        of row sums and `p_2` is the vector of column sums of `M`, if
        and only if the conjugate of `p_2` dominates `p_1`.

        INPUT:

        - ``p1, p2``-- list of integers representing the vectors
          of row/column sums

        - ``algorithm`` -- two possible string values :

            - ``"ryser"`` implements the construction due
              to Ryser [Ryser63]_.

            - ``"gale"`` (default) implements the construction due to Gale [Gale57]_.

        OUTPUT:

        - A binary matrix if it exists, ``None`` otherwise.

        Gale's Algorithm:

        (Gale [Gale57]_): A matrix satisfying the constraints of its
        sums can be defined as the solution of the following
        Linear Program, which Sage knows how to solve.

        .. MATH::

            \forall i&\sum_{j=1}^{k_2} b_{i,j}=p_{1,j}\\
            \forall i&\sum_{j=1}^{k_1} b_{j,i}=p_{2,j}\\
            &b_{i,j}\mbox{ is a binary variable}

        Ryser's Algorithm:

        (Ryser [Ryser63]_): The construction of an `m\times n` matrix
        `A=A_{r,s}`, due to Ryser, is described as follows. The
        construction works if and only if have `s\preceq r^*`.

        * Construct the `m\times n` matrix `B` from `r` by defining
          the `i`-th row of `B` to be the vector whose first `r_i`
          entries are `1`, and the remainder are 0's, `1\leq i\leq
          m`.  This maximal matrix `B` with row sum `r` and ones left
          justified has column sum `r^{*}`.

        * Shift the last `1` in certain rows of `B` to column `n` in
          order to achieve the sum `s_n`.  Call this `B` again.

          * The `1`'s in column n are to appear in those
            rows in which `A` has the largest row sums, giving
            preference to the bottom-most positions in case of ties.
          * Note: When this step automatically "fixes" other columns,
            one must skip ahead to the first column index
            with a wrong sum in the step below.

        * Proceed inductively to construct columns `n-1`, ..., `2`, `1`.
          Note: when performing the induction on step `k`, we consider
          the row sums of the first `k` columns.

        * Set `A = B`. Return `A`.

        EXAMPLES:

        Computing the matrix for `p_1=p_2=2+2+1` ::

            sage: from sage.combinat.integer_vector import gale_ryser_theorem
            sage: p1 = [2,2,1]
            sage: p2 = [2,2,1]
            sage: print gale_ryser_theorem(p1, p2)     # not tested
            [1 1 0]
            [1 0 1]
            [0 1 0]
            sage: A = gale_ryser_theorem(p1, p2)
            sage: rs = [sum(x) for x in A.rows()]
            sage: cs = [sum(x) for x in A.columns()]
            sage: p1 == rs; p2 == cs
            True
            True

        Or for a non-square matrix with `p_1=3+3+2+1` and `p_2=3+2+2+1+1`, using Ryser's algorithm ::

            sage: from sage.combinat.integer_vector import gale_ryser_theorem
            sage: p1 = [3,3,1,1]
            sage: p2 = [3,3,1,1]
            sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
            [1 1 1 0]
            [1 1 0 1]
            [1 0 0 0]
            [0 1 0 0]
            sage: p1 = [4,2,2]
            sage: p2 = [3,3,1,1]
            sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
            [1 1 1 1]
            [1 1 0 0]
            [1 1 0 0]
            sage: p1 = [4,2,2,0]
            sage: p2 = [3,3,1,1,0,0]
            sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
            [1 1 1 1 0 0]
            [1 1 0 0 0 0]
            [1 1 0 0 0 0]
            [0 0 0 0 0 0]
            sage: p1 = [3,3,2,1]
            sage: p2 = [3,2,2,1,1]
            sage: print gale_ryser_theorem(p1, p2, algorithm="gale")  # not tested
            [1 1 1 0 0]
            [1 1 0 0 1]
            [1 0 1 0 0]
            [0 0 0 1 0]

        With `0` in the sequences, and with unordered inputs ::

            sage: from sage.combinat.integer_vector import gale_ryser_theorem
            sage: gale_ryser_theorem([3,3,0,1,1,0], [3,1,3,1,0], algorithm = "ryser")
            [1 1 1 0 0]
            [1 0 1 1 0]
            [0 0 0 0 0]
            [1 0 0 0 0]
            [0 0 1 0 0]
            [0 0 0 0 0]
            sage: p1 = [3,1,1,1,1]; p2 = [3,2,2,0]
            sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
            [1 1 1 0]
            [1 0 0 0]
            [1 0 0 0]
            [0 1 0 0]
            [0 0 1 0]

        TESTS:

        This test created a random bipartite graph on `n+m` vertices. Its
        adjacency matrix is binary, and it is used to create some
        "random-looking" sequences which correspond to an existing matrix. The
        ``gale_ryser_theorem`` is then called on these sequences, and the output
        checked for correctness.::

            sage: def test_algorithm(algorithm, low = 10, high = 50):
            ....:     n,m = randint(low,high), randint(low,high)
            ....:     g = graphs.RandomBipartite(n, m, .3)
            ....:     s1 = sorted(g.degree([(0,i) for i in range(n)]), reverse = True)
            ....:     s2 = sorted(g.degree([(1,i) for i in range(m)]), reverse = True)
            ....:     m = gale_ryser_theorem(s1, s2, algorithm = algorithm)
            ....:     ss1 = sorted(map(lambda x : sum(x) , m.rows()), reverse = True)
            ....:     ss2 = sorted(map(lambda x : sum(x) , m.columns()), reverse = True)
            ....:     if ((ss1 != s1) or (ss2 != s2)):
            ....:         print "Algorithm %s failed with this input:" % algorithm
            ....:         print s1, s2

            sage: for algorithm in ["gale", "ryser"]:                        # long time
            ....:     for i in range(50):                                    # long time
            ....:         test_algorithm(algorithm, 3, 10)                   # long time
            
        Null matrix::

            sage: gale_ryser_theorem([0,0,0],[0,0,0,0], algorithm="gale")
            [0 0 0 0]
            [0 0 0 0]
            [0 0 0 0]
            sage: gale_ryser_theorem([0,0,0],[0,0,0,0], algorithm="ryser")
            [0 0 0 0]
            [0 0 0 0]
            [0 0 0 0]

        Check that :trac:`16638` is fixed::

            sage: tests = [([4, 3, 3, 2, 1, 1, 1, 1, 0], [6, 5, 1, 1, 1, 1, 1]),
            ....:          ([4, 4, 3, 3, 1, 1, 0], [5, 5, 2, 2, 1, 1]),
            ....:          ([4, 4, 3, 2, 1, 1], [5, 5, 1, 1, 1, 1, 1, 0, 0]),
            ....:          ([3, 3, 3, 3, 2, 1, 1, 1, 0], [7, 6, 2, 1, 1, 0]),
            ....:          ([3, 3, 3, 1, 1, 0], [4, 4, 1, 1, 1])]
            sage: for s1, s2 in tests:
            ....:     m = gale_ryser_theorem(s1, s2, algorithm="ryser")
            ....:     ss1 = sorted(map(lambda x : sum(x) , m.rows()), reverse = True)
            ....:     ss2 = sorted(map(lambda x : sum(x) , m.columns()), reverse = True)
            ....:     if ((ss1 != s1) or (ss2 != s2)):
            ....:         print("Error in Ryser algorithm")
            ....:         print(s1, s2)

        REFERENCES:

        ..  [Ryser63] \H. J. Ryser, Combinatorial Mathematics,
            Carus Monographs, MAA, 1963.
        ..  [Gale57] \D. Gale, A theorem on flows in networks, Pacific J. Math.
            7(1957)1073-1082.
        """
        from sage.combinat.partition import Partition
        from sage.matrix.constructor import matrix

        if not(is_gale_ryser(p1,p2)):
            return False

        if algorithm=="ryser": # ryser's algorithm
            from sage.combinat.permutation import Permutation

            # Sorts the sequences if they are not, and remembers the permutation
            # applied
            tmp = sorted(enumerate(p1), reverse=True, key=lambda x:x[1])
            r = [x[1] for x in tmp]
            r_permutation = [x-1 for x in Permutation([x[0]+1 for x in tmp]).inverse()]
            m = len(r)

            tmp = sorted(enumerate(p2), reverse=True, key=lambda x:x[1])
            s = [x[1] for x in tmp]
            s_permutation = [x-1 for x in Permutation([x[0]+1 for x in tmp]).inverse()]
            n = len(s)

            # This is the partition equivalent to the sliding algorithm
            cols = []
            for t in reversed(s):
                c = [0] * m
                i = 0
                while t:
                    k = i + 1
                    while k < m and r[i] == r[k]:
                        k += 1
                    if t >= k - i: # == number rows of the same length
                        for j in range(i, k):
                            r[j] -= 1
                            c[j] = 1
                        t -= k - i
                    else: # Remove the t last rows of that length
                        for j in range(k-t, k):
                            r[j] -= 1
                            c[j] = 1
                        t = 0
                    i = k
                cols.append(c)

            # We added columns to the back instead of the front
            A0 = matrix(list(reversed(cols))).transpose()

            # Applying the permutations to get a matrix satisfying the
            # order given by the input
            A0 = A0.matrix_from_rows_and_columns(r_permutation, s_permutation)
            return A0

        elif algorithm == "gale":
          from sage.numerical.mip import MixedIntegerLinearProgram
          k1, k2=len(p1), len(p2)
          p = MixedIntegerLinearProgram()
          b = p.new_variable(binary = True)
          for (i,c) in enumerate(p1):
              p.add_constraint(p.sum([b[i,j] for j in xrange(k2)]) ==c)
          for (i,c) in enumerate(p2):
              p.add_constraint(p.sum([b[j,i] for j in xrange(k1)]) ==c)
          p.set_objective(None)
          p.solve()
          b = p.get_values(b)
          M = [[0]*k2 for i in xrange(k1)]
          for i in xrange(k1):
              for j in xrange(k2):
                  M[i][j] = int(b[i,j])
          return matrix(M)

        else:
            raise ValueError("The only two algorithms available are \"gale\" and \"ryser\"")
Exemplo n.º 10
0
def knapsack(seq, binary=True, max=1, value_only=False):
    r"""
    Solves the knapsack problem

    Knapsack problems:

    You have already had a knapsack problem, so you should know,
    but in case you do not, a knapsack problem is what happens
    when you have hundred of items to put into a bag which is
    too small for all of them.

    When you formally write it, here is your problem:

    * Your bag can contain a weight of at most `W`.
    * Each item `i` you have has a weight `w_i`.
    * Each item `i` has a usefulness of `u_i`.

    You then want to maximize the usefulness of the items you
    will store into your bag, while keeping sure the weight of
    the bag will not go over `W`.

    As a linear program, this problem can be represented this way
    (if you define `b_i` as the binary variable indicating whether
    the item `i` is to be included in your bag):

    .. MATH::

        \mbox{Maximize: }\sum_i b_i u_i \\
        \mbox{Such that: }
        \sum_i b_i w_i \leq W \\
        \forall i, b_i \mbox{ binary variable} \\

    (For more information,
    cf. http://en.wikipedia.org/wiki/Knapsack_problem.)

    EXAMPLE:

    If your knapsack problem is composed of three
    items (weight, value) defined by (1,2), (1.5,1), (0.5,3),
    and a bag of maximum weight 2, you can easily solve it this way::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack( [(1,2), (1.5,1), (0.5,3)], max=2)
        [5.0, [(1, 2), (0.500000000000000, 3)]]

        sage: knapsack( [(1,2), (1.5,1), (0.5,3)], max=2, value_only=True)
        5.0

    In the case where all the values (usefulness) of the items
    are equal to one, you do not need embarrass yourself with
    the second values, and you can just type for items
    `(1,1), (1.5,1), (0.5,1)` the command::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack([1,1.5,0.5], max=2, value_only=True)
        2.0

    INPUT:

    - ``seq`` -- Two different possible types:

      - A sequence of pairs (weight, value).
      - A sequence of reals (a value of 1 is assumed).

    - ``binary`` -- When set to True, an item can be taken 0 or 1 time.
      When set to False, an item can be taken any amount of
      times (while staying integer and positive).

    - ``max`` -- Maximum admissible weight.

    - ``value_only`` -- When set to True, only the maximum useful
      value is returned. When set to False, both the maximum useful
      value and an assignment are returned.

    OUTPUT:

    If ``value_only`` is set to True, only the maximum useful value
    is returned. Else (the default), the function returns a pair
    ``[value,list]``, where ``list`` can be of two types according
    to the type of ``seq``:

    - A list of pairs `(w_i, u_i)` for each object `i` occurring
      in the solution.
    - A list of reals where each real is repeated the number
      of times it is taken into the solution.
    """
    reals = not isinstance(seq[0], tuple)
    if reals:
        seq = [(x, 1) for x in seq]

    from sage.numerical.mip import MixedIntegerLinearProgram
    p = MixedIntegerLinearProgram(maximization=True)
    present = p.new_variable()
    p.set_objective(p.sum([present[i] * seq[i][1] for i in range(len(seq))]))
    p.add_constraint(p.sum([present[i] * seq[i][0] for i in range(len(seq))]),
                     max=max)

    if binary:
        p.set_binary(present)
    else:
        p.set_integer(present)

    if value_only:
        return p.solve(objective_only=True)

    else:
        objective = p.solve()
        present = p.get_values(present)

        val = []

        if reals:
            [
                val.extend([seq[i][0]] * int(present[i]))
                for i in range(len(seq))
            ]
        else:
            [val.extend([seq[i]] * int(present[i])) for i in range(len(seq))]

        return [objective, val]
Exemplo n.º 11
0
def gale_ryser_theorem(p1,
                       p2,
                       algorithm="gale",
                       *,
                       solver=None,
                       integrality_tolerance=1e-3):
    r"""
    Returns the binary matrix given by the Gale-Ryser theorem.

    The Gale Ryser theorem asserts that if `p_1,p_2` are two
    partitions of `n` of respective lengths `k_1,k_2`, then there is
    a binary `k_1\times k_2` matrix `M` such that `p_1` is the vector
    of row sums and `p_2` is the vector of column sums of `M`, if
    and only if the conjugate of `p_2` dominates `p_1`.

    INPUT:

    - ``p1, p2``-- list of integers representing the vectors
      of row/column sums

    - ``algorithm`` -- two possible string values:

      - ``'ryser'`` implements the construction due to Ryser [Ryser63]_.
      - ``'gale'`` (default) implements the construction due to Gale [Gale57]_.

    - ``solver`` -- (default: ``None``) Specify a Mixed Integer Linear Programming
      (MILP) solver to be used. If set to ``None``, the default one is used. For
      more information on MILP solvers and which default solver is used, see
      the method
      :meth:`solve <sage.numerical.mip.MixedIntegerLinearProgram.solve>`
      of the class
      :class:`MixedIntegerLinearProgram <sage.numerical.mip.MixedIntegerLinearProgram>`.

    - ``integrality_tolerance`` -- parameter for use with MILP solvers over an
      inexact base ring; see :meth:`MixedIntegerLinearProgram.get_values`.

    OUTPUT:

    A binary matrix if it exists, ``None`` otherwise.

    Gale's Algorithm:

    (Gale [Gale57]_): A matrix satisfying the constraints of its
    sums can be defined as the solution of the following
    Linear Program, which Sage knows how to solve.

    .. MATH::

        \forall i&\sum_{j=1}^{k_2} b_{i,j}=p_{1,j}\\
        \forall i&\sum_{j=1}^{k_1} b_{j,i}=p_{2,j}\\
        &b_{i,j}\mbox{ is a binary variable}

    Ryser's Algorithm:

    (Ryser [Ryser63]_): The construction of an `m \times n` matrix
    `A=A_{r,s}`, due to Ryser, is described as follows. The
    construction works if and only if have `s\preceq r^*`.

    * Construct the `m \times n` matrix `B` from `r` by defining
      the `i`-th row of `B` to be the vector whose first `r_i`
      entries are `1`, and the remainder are 0's, `1 \leq i \leq m`.
      This maximal matrix `B` with row sum `r` and ones left
      justified has column sum `r^{*}`.

    * Shift the last `1` in certain rows of `B` to column `n` in
      order to achieve the sum `s_n`.  Call this `B` again.

      * The `1`'s in column `n` are to appear in those
        rows in which `A` has the largest row sums, giving
        preference to the bottom-most positions in case of ties.
      * Note: When this step automatically "fixes" other columns,
        one must skip ahead to the first column index
        with a wrong sum in the step below.

    * Proceed inductively to construct columns `n-1`, ..., `2`, `1`.
      Note: when performing the induction on step `k`, we consider
      the row sums of the first `k` columns.

    * Set `A = B`. Return `A`.

    EXAMPLES:

    Computing the matrix for `p_1=p_2=2+2+1`::

        sage: from sage.combinat.integer_vector import gale_ryser_theorem
        sage: p1 = [2,2,1]
        sage: p2 = [2,2,1]
        sage: print(gale_ryser_theorem(p1, p2))     # not tested
        [1 1 0]
        [1 0 1]
        [0 1 0]
        sage: A = gale_ryser_theorem(p1, p2)
        sage: rs = [sum(x) for x in A.rows()]
        sage: cs = [sum(x) for x in A.columns()]
        sage: p1 == rs; p2 == cs
        True
        True

    Or for a non-square matrix with `p_1=3+3+2+1` and `p_2=3+2+2+1+1`,
    using Ryser's algorithm::

        sage: from sage.combinat.integer_vector import gale_ryser_theorem
        sage: p1 = [3,3,1,1]
        sage: p2 = [3,3,1,1]
        sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
        [1 1 1 0]
        [1 1 0 1]
        [1 0 0 0]
        [0 1 0 0]
        sage: p1 = [4,2,2]
        sage: p2 = [3,3,1,1]
        sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
        [1 1 1 1]
        [1 1 0 0]
        [1 1 0 0]
        sage: p1 = [4,2,2,0]
        sage: p2 = [3,3,1,1,0,0]
        sage: gale_ryser_theorem(p1, p2, algorithm = "ryser")
        [1 1 1 1 0 0]
        [1 1 0 0 0 0]
        [1 1 0 0 0 0]
        [0 0 0 0 0 0]
        sage: p1 = [3,3,2,1]
        sage: p2 = [3,2,2,1,1]
        sage: print(gale_ryser_theorem(p1, p2, algorithm="gale"))  # not tested
        [1 1 1 0 0]
        [1 1 0 0 1]
        [1 0 1 0 0]
        [0 0 0 1 0]

    With `0` in the sequences, and with unordered inputs::

        sage: from sage.combinat.integer_vector import gale_ryser_theorem
        sage: gale_ryser_theorem([3,3,0,1,1,0], [3,1,3,1,0], algorithm="ryser")
        [1 1 1 0 0]
        [1 0 1 1 0]
        [0 0 0 0 0]
        [1 0 0 0 0]
        [0 0 1 0 0]
        [0 0 0 0 0]
        sage: p1 = [3,1,1,1,1]; p2 = [3,2,2,0]
        sage: gale_ryser_theorem(p1, p2, algorithm="ryser")
        [1 1 1 0]
        [1 0 0 0]
        [1 0 0 0]
        [0 1 0 0]
        [0 0 1 0]

    TESTS:

    This test created a random bipartite graph on `n+m` vertices. Its
    adjacency matrix is binary, and it is used to create some
    "random-looking" sequences which correspond to an existing matrix. The
    ``gale_ryser_theorem`` is then called on these sequences, and the output
    checked for correction.::

        sage: def test_algorithm(algorithm, low = 10, high = 50):
        ....:    n,m = randint(low,high), randint(low,high)
        ....:    g = graphs.RandomBipartite(n, m, .3)
        ....:    s1 = sorted(g.degree([(0,i) for i in range(n)]), reverse = True)
        ....:    s2 = sorted(g.degree([(1,i) for i in range(m)]), reverse = True)
        ....:    m = gale_ryser_theorem(s1, s2, algorithm = algorithm)
        ....:    ss1 = sorted(map(lambda x : sum(x) , m.rows()), reverse = True)
        ....:    ss2 = sorted(map(lambda x : sum(x) , m.columns()), reverse = True)
        ....:    if ((ss1 != s1) or (ss2 != s2)):
        ....:        print("Algorithm %s failed with this input:" % algorithm)
        ....:        print(s1, s2)

        sage: for algorithm in ["gale", "ryser"]:             # long time
        ....:    for i in range(50):                          # long time
        ....:       test_algorithm(algorithm, 3, 10)          # long time

    Null matrix::

        sage: gale_ryser_theorem([0,0,0],[0,0,0,0], algorithm="gale")
        [0 0 0 0]
        [0 0 0 0]
        [0 0 0 0]
        sage: gale_ryser_theorem([0,0,0],[0,0,0,0], algorithm="ryser")
        [0 0 0 0]
        [0 0 0 0]
        [0 0 0 0]

    REFERENCES:

    ..  [Ryser63] \H. J. Ryser, Combinatorial Mathematics,
        Carus Monographs, MAA, 1963.
    ..  [Gale57] \D. Gale, A theorem on flows in networks, Pacific J. Math.
        7(1957)1073-1082.
    """
    from sage.matrix.constructor import matrix

    if not is_gale_ryser(p1, p2):
        return False

    if algorithm == "ryser":  # ryser's algorithm
        from sage.combinat.permutation import Permutation

        # Sorts the sequences if they are not, and remembers the permutation
        # applied
        tmp = sorted(enumerate(p1), reverse=True, key=lambda x: x[1])
        r = [x[1] for x in tmp]
        r_permutation = [
            x - 1 for x in Permutation([x[0] + 1 for x in tmp]).inverse()
        ]
        m = len(r)

        tmp = sorted(enumerate(p2), reverse=True, key=lambda x: x[1])
        s = [x[1] for x in tmp]
        s_permutation = [
            x - 1 for x in Permutation([x[0] + 1 for x in tmp]).inverse()
        ]

        # This is the partition equivalent to the sliding algorithm
        cols = []
        for t in reversed(s):
            c = [0] * m
            i = 0
            while t:
                k = i + 1
                while k < m and r[i] == r[k]:
                    k += 1
                if t >= k - i:  # == number rows of the same length
                    for j in range(i, k):
                        r[j] -= 1
                        c[j] = 1
                    t -= k - i
                else:  # Remove the t last rows of that length
                    for j in range(k - t, k):
                        r[j] -= 1
                        c[j] = 1
                    t = 0
                i = k
            cols.append(c)

        # We added columns to the back instead of the front
        A0 = matrix(list(reversed(cols))).transpose()

        # Applying the permutations to get a matrix satisfying the
        # order given by the input
        A0 = A0.matrix_from_rows_and_columns(r_permutation, s_permutation)
        return A0

    elif algorithm == "gale":
        from sage.numerical.mip import MixedIntegerLinearProgram
        k1, k2 = len(p1), len(p2)
        p = MixedIntegerLinearProgram(solver=solver)
        b = p.new_variable(binary=True)
        for (i, c) in enumerate(p1):
            p.add_constraint(p.sum([b[i, j] for j in range(k2)]) == c)
        for (i, c) in enumerate(p2):
            p.add_constraint(p.sum([b[j, i] for j in range(k1)]) == c)
        p.set_objective(None)
        p.solve()
        b = p.get_values(b, convert=ZZ, tolerance=integrality_tolerance)
        M = [[0] * k2 for i in range(k1)]
        for i in range(k1):
            for j in range(k2):
                M[i][j] = b[i, j]
        return matrix(M)

    else:
        raise ValueError(
            'the only two algorithms available are "gale" and "ryser"')
Exemplo n.º 12
0
    def arc(self, s=2, solver=None, verbose=0):
        r"""
        Return the ``s``-arc with maximum cardinality.

        A `s`-arc is a subset of points in a BIBD that intersects each block on
        at most `s` points. It is one possible generalization of independent set
        for graphs.

        A simple counting shows that the cardinality of a `s`-arc is at most
        `(s-1) * r + 1` where `r` is the number of blocks incident to any point.
        A `s`-arc in a BIBD with cardinality `(s-1) * r + 1` is called maximal
        and is characterized by the following property: it is not empty and each
        block either contains `0` or `s` points of this arc. Equivalently, the
        trace of the BIBD on these points is again a BIBD (with block size `s`).

        For more informations, see :wikipedia:`Arc_(projective_geometry)`.

        INPUT:

        - ``s`` - (default to ``2``) the maximum number of points from the arc
          in each block

        - ``solver`` -- (default: ``None``) Specify a Linear Program (LP)
          solver to be used. If set to ``None``, the default one is used. For
          more information on LP solvers and which default solver is used, see
          the method
          :meth:`solve <sage.numerical.mip.MixedIntegerLinearProgram.solve>`
          of the class
          :class:`MixedIntegerLinearProgram <sage.numerical.mip.MixedIntegerLinearProgram>`.

        - ``verbose`` -- integer (default: ``0``). Sets the level of
          verbosity. Set to 0 by default, which means quiet.

        EXAMPLES::

            sage: B = designs.balanced_incomplete_block_design(21, 5)
            sage: a2 = B.arc()
            sage: a2 # random
            [5, 9, 10, 12, 15, 20]
            sage: len(a2)
            6
            sage: a4 = B.arc(4)
            sage: a4 # random
            [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20]
            sage: len(a4)
            16

        The `2`-arc and `4`-arc above are maximal. One can check that they
        intersect the blocks in either 0 or `s` points. Or equivalently that the
        traces are again BIBD::

            sage: r = (21-1)/(5-1)
            sage: 1 + r*1
            6
            sage: 1 + r*3
            16

            sage: B.trace(a2).is_t_design(2, return_parameters=True)
            (True, (2, 6, 2, 1))
            sage: B.trace(a4).is_t_design(2, return_parameters=True)
            (True, (2, 16, 4, 1))

        Some other examples which are not maximal::

            sage: B = designs.balanced_incomplete_block_design(25, 4)
            sage: a2 = B.arc(2)
            sage: r = (25-1)/(4-1)
            sage: print len(a2), 1 + r
            8 9
            sage: sa2 = set(a2)
            sage: set(len(sa2.intersection(b)) for b in B.blocks())
            {0, 1, 2}
            sage: B.trace(a2).is_t_design(2)
            False

            sage: a3 = B.arc(3)
            sage: print len(a3), 1 + 2*r
            15 17
            sage: sa3 = set(a3)
            sage: set(len(sa3.intersection(b)) for b in B.blocks()) == set([0,3])
            False
            sage: B.trace(a3).is_t_design(3)
            False

        TESTS:

        Test consistency with relabeling::

            sage: b = designs.balanced_incomplete_block_design(7,3)
            sage: b.relabel(list("abcdefg"))
            sage: set(b.arc()).issubset(b.ground_set())
            True
        """
        s = int(s)

        # trivial cases
        if s <= 0:
            return []
        elif s >= max(self.block_sizes()):
            return self._points[:]

        # linear program
        from sage.numerical.mip import MixedIntegerLinearProgram

        p = MixedIntegerLinearProgram(solver=solver)
        b = p.new_variable(binary=True)
        p.set_objective(p.sum(b[i] for i in range(len(self._points))))
        for i in self._blocks:
            p.add_constraint(p.sum(b[k] for k in i) <= s)
        p.solve(log=verbose)
        return [self._points[i] for (i,j) in p.get_values(b).items() if j == 1]
Exemplo n.º 13
0
def knapsack(seq, binary=True, max=1, value_only=False):
    r"""
    Solves the knapsack problem

    Knapsack problems:

    You have already had a knapsack problem, so you should know,
    but in case you do not, a knapsack problem is what happens
    when you have hundred of items to put into a bag which is
    too small for all of them.

    When you formally write it, here is your problem:

    * Your bag can contain a weight of at most `W`.
    * Each item `i` you have has a weight `w_i`.
    * Each item `i` has a usefulness of `u_i`.

    You then want to maximize the usefulness of the items you
    will store into your bag, while keeping sure the weight of
    the bag will not go over `W`.

    As a linear program, this problem can be represented this way
    (if you define `b_i` as the binary variable indicating whether
    the item `i` is to be included in your bag):

    .. MATH::

        \mbox{Maximize: }\sum_i b_i u_i \\
        \mbox{Such that: }
        \sum_i b_i w_i \leq W \\
        \forall i, b_i \mbox{ binary variable} \\

    (For more information,
    cf. http://en.wikipedia.org/wiki/Knapsack_problem.)

    EXAMPLE:

    If your knapsack problem is composed of three
    items (weight, value) defined by (1,2), (1.5,1), (0.5,3),
    and a bag of maximum weight 2, you can easily solve it this way::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack( [(1,2), (1.5,1), (0.5,3)], max=2)
        [5.0, [(1, 2), (0.500000000000000, 3)]]

        sage: knapsack( [(1,2), (1.5,1), (0.5,3)], max=2, value_only=True)
        5.0

    In the case where all the values (usefulness) of the items
    are equal to one, you do not need embarrass yourself with
    the second values, and you can just type for items
    `(1,1), (1.5,1), (0.5,1)` the command::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack([1,1.5,0.5], max=2, value_only=True)
        2.0

    INPUT:

    - ``seq`` -- Two different possible types:

      - A sequence of pairs (weight, value).
      - A sequence of reals (a value of 1 is assumed).

    - ``binary`` -- When set to True, an item can be taken 0 or 1 time.
      When set to False, an item can be taken any amount of
      times (while staying integer and positive).

    - ``max`` -- Maximum admissible weight.

    - ``value_only`` -- When set to True, only the maximum useful
      value is returned. When set to False, both the maximum useful
      value and an assignment are returned.

    OUTPUT:

    If ``value_only`` is set to True, only the maximum useful value
    is returned. Else (the default), the function returns a pair
    ``[value,list]``, where ``list`` can be of two types according
    to the type of ``seq``:

    - A list of pairs `(w_i, u_i)` for each object `i` occurring
      in the solution.
    - A list of reals where each real is repeated the number
      of times it is taken into the solution.
    """
    reals = not isinstance(seq[0], tuple)
    if reals:
        seq = [(x, 1) for x in seq]

    from sage.numerical.mip import MixedIntegerLinearProgram

    p = MixedIntegerLinearProgram(maximization=True)
    present = p.new_variable()
    p.set_objective(p.sum([present[i] * seq[i][1] for i in range(len(seq))]))
    p.add_constraint(p.sum([present[i] * seq[i][0] for i in range(len(seq))]), max=max)

    if binary:
        p.set_binary(present)
    else:
        p.set_integer(present)

    if value_only:
        return p.solve(objective_only=True)

    else:
        objective = p.solve()
        present = p.get_values(present)

        val = []

        if reals:
            [val.extend([seq[i][0]] * int(present[i])) for i in range(len(seq))]
        else:
            [val.extend([seq[i]] * int(present[i])) for i in range(len(seq))]

        return [objective, val]
Exemplo n.º 14
0
def binpacking(items,maximum=1,k=None):
    r"""
    Solves the bin packing problem.

    The Bin Packing problem is the following :

    Given a list of items of weights `p_i` and a real value `K`, what is
    the least number of bins such that all the items can be put in the
    bins, while keeping sure that each bin contains a weight of at most `K` ?

    For more informations : http://en.wikipedia.org/wiki/Bin_packing_problem

    Two version of this problem are solved by this algorithm :
         * Is it possible to put the given items in `L` bins ?
         * What is the assignment of items using the
           least number of bins with the given list of items ?

    INPUT:

    - ``items`` -- A list of real values (the items' weight)

    - ``maximum``   -- The maximal size of a bin

    - ``k``     -- Number of bins

      - When set to an integer value, the function returns a partition
        of the items into `k` bins if possible, and raises an
        exception otherwise.

      - When set to ``None``, the function returns a partition of the items
        using the least number possible of bins.

    OUTPUT:

    A list of lists, each member corresponding to a box and containing
    the list of the weights inside it. If there is no solution, an
    exception is raised (this can only happen when ``k`` is specified
    or if ``maximum`` is less that the size of one item).

    EXAMPLES:

    Trying to find the minimum amount of boxes for 5 items of weights
    `1/5, 1/4, 2/3, 3/4, 5/7`::

        sage: from sage.numerical.optimize import binpacking
        sage: values = [1/5, 1/3, 2/3, 3/4, 5/7]
        sage: bins = binpacking(values)
        sage: len(bins)
        3

    Checking the bins are of correct size ::

        sage: all([ sum(b)<= 1 for b in bins ])
        True

    Checking every item is in a bin ::

        sage: b1, b2, b3 = bins
        sage: all([ (v in b1 or v in b2 or v in b3) for v in values ])
        True

    One way to use only three boxes (which is best possible) is to put
    `1/5 + 3/4` together in a box, `1/3+2/3` in another, and `5/7`
    by itself in the third one.

    Of course, we can also check that there is no solution using only two boxes ::

        sage: from sage.numerical.optimize import binpacking
        sage: binpacking([0.2,0.3,0.8,0.9], k=2)
        Traceback (most recent call last):
        ...
        ValueError: This problem has no solution !
    """

    if max(items) > maximum:
        raise ValueError("This problem has no solution !")

    if k==None:
        from sage.functions.other import ceil
        k=ceil(sum(items)/maximum)
        while True:
            from sage.numerical.mip import MIPSolverException
            try:
                return binpacking(items,k=k,maximum=maximum)
            except MIPSolverException:
                k = k + 1

    from sage.numerical.mip import MixedIntegerLinearProgram, MIPSolverException
    p=MixedIntegerLinearProgram()

    # Boolean variable indicating whether
    # the i th element belongs to box b
    box=p.new_variable(dim=2)

    # Each bin contains at most max
    for b in range(k):
        p.add_constraint(p.sum([items[i]*box[i][b] for i in range(len(items))]),max=maximum)

    # Each item is assigned exactly one bin
    for i in range(len(items)):
        p.add_constraint(p.sum([box[i][b] for b in range(k)]),min=1,max=1)

    p.set_objective(None)
    p.set_binary(box)

    try:
        p.solve()
    except MIPSolverException:
        raise ValueError("This problem has no solution !")

    box=p.get_values(box)

    boxes=[[] for i in range(k)]

    for b in range(k):
        boxes[b].extend([items[i] for i in range(len(items)) if round(box[i][b])==1])

    return boxes
Exemplo n.º 15
0
def binpacking(items, maximum=1, k=None, solver=None, verbose=0):
    r"""
    Solve the bin packing problem.

    The Bin Packing problem is the following :

    Given a list of items of weights `p_i` and a real value `k`, what is the
    least number of bins such that all the items can be packed in the bins,
    while ensuring that the sum of the weights of the items packed in each bin
    is at most `k` ?

    For more informations, see :wikipedia:`Bin_packing_problem`.

    Two versions of this problem are solved by this algorithm :

    - Is it possible to put the given items in `k` bins ?
    - What is the assignment of items using the least number of bins with
      the given list of items ?

    INPUT:

    - ``items`` -- list or dict; either a list of real values (the items'
      weight), or a dictionary associating to each item its weight.

    - ``maximum`` -- (default: 1); the maximal size of a bin

    - ``k`` -- integer (default: ``None``); Number of bins

      - When set to an integer value, the function returns a partition of the
        items into `k` bins if possible, and raises an exception otherwise.

      - When set to ``None``, the function returns a partition of the items
        using the least possible number of bins.

    - ``solver`` -- (default: ``None``); Specify a Linear Program (LP) solver to
      be used. If set to ``None``, the default one is used. For more information
      on LP solvers and which default solver is used, see the method
      :meth:`~sage.numerical.mip.MixedIntegerLinearProgram.solve` of the class
      :class:`~sage.numerical.mip.MixedIntegerLinearProgram`.

    - ``verbose`` -- integer (default: ``0``); sets the level of verbosity. Set
      to 0 by default, which means quiet.

    OUTPUT:

    A list of lists, each member corresponding to a bin and containing either
    the list of the weights inside it when ``items`` is a list of items' weight,
    or the list of items inside it when ``items`` is a dictionary. If there is
    no solution, an exception is raised (this can only happen when ``k`` is
    specified or if ``maximum`` is less than the weight of one item).

    EXAMPLES:

    Trying to find the minimum amount of boxes for 5 items of weights
    `1/5, 1/4, 2/3, 3/4, 5/7`::

        sage: from sage.numerical.optimize import binpacking
        sage: values = [1/5, 1/3, 2/3, 3/4, 5/7]
        sage: bins = binpacking(values)
        sage: len(bins)
        3

    Checking the bins are of correct size ::

        sage: all(sum(b) <= 1 for b in bins)
        True

    Checking every item is in a bin ::

        sage: b1, b2, b3 = bins
        sage: all((v in b1 or v in b2 or v in b3) for v in values)
        True

    And only in one bin ::

        sage: sum(len(b) for b in bins) == len(values)
        True

    One way to use only three boxes (which is best possible) is to put
    `1/5 + 3/4` together in a box, `1/3+2/3` in another, and `5/7`
    by itself in the third one.

    Of course, we can also check that there is no solution using only two boxes ::

        sage: from sage.numerical.optimize import binpacking
        sage: binpacking([0.2,0.3,0.8,0.9], k=2)
        Traceback (most recent call last):
        ...
        ValueError: this problem has no solution !

    We can also provide a dictionary keyed by items and associating to each item
    its weight. Then, the bins contain the name of the items inside it ::

        sage: values = {'a':1/5, 'b':1/3, 'c':2/3, 'd':3/4, 'e':5/7}
        sage: bins = binpacking(values)
        sage: set(flatten(bins)) == set(values.keys())
        True

    TESTS:

    Wrong type for parameter items::

        sage: binpacking(set())
        Traceback (most recent call last):
        ...
        TypeError: parameter items must be a list or a dictionary.
    """
    if isinstance(items, list):
        weight = {i:w for i,w in enumerate(items)}
    elif isinstance(items, dict):
        weight = items
    else:
        raise TypeError("parameter items must be a list or a dictionary.")

    if max(weight.values()) > maximum:
        raise ValueError("this problem has no solution !")

    if k is None:
        from sage.functions.other import ceil
        k = ceil(sum(weight.values())/maximum)
        while True:
            from sage.numerical.mip import MIPSolverException
            try:
                return binpacking(items, k=k, maximum=maximum, solver=solver, verbose=verbose)
            except MIPSolverException:
                k = k + 1

    from sage.numerical.mip import MixedIntegerLinearProgram, MIPSolverException
    p = MixedIntegerLinearProgram(solver=solver)

    # Boolean variable indicating whether the ith element belongs to box b
    box = p.new_variable(binary=True)

    # Capacity constraint of each bin
    for b in range(k):
        p.add_constraint(p.sum(weight[i]*box[i,b] for i in weight) <= maximum)

    # Each item is assigned exactly one bin
    for i in weight:
        p.add_constraint(p.sum(box[i,b] for b in range(k)) == 1)

    try:
        p.solve(log=verbose)
    except MIPSolverException:
        raise ValueError("this problem has no solution !")

    box = p.get_values(box)

    boxes = [[] for i in range(k)]

    for i,b in box:
        if box[i,b] == 1:
            boxes[b].append(weight[i] if isinstance(items, list) else i)

    return boxes
Exemplo n.º 16
0
def OA_and_oval(q):
    r"""
    Return a `OA(q+1,q)` whose blocks contains `\leq 2` zeroes in the last `q`
    columns.

    This `OA` is build from a projective plane of order `q`, in which there
    exists an oval `O` of size `q+1` (i.e. a set of `q+1` points no three of which
    are [colinear/contained in a common set of the projective plane]).

    Removing an element `x\in O` and all sets that contain it, we obtain a
    `TD(q+1,q)` in which `O` intersects all columns except one. As `O` is an
    oval, no block of the `TD` intersects it more than twice.

    INPUT:

    - ``q`` -- a prime power

    .. NOTE::

            This function is called by :func:`construction_3_6`, an
            implementation of Construction 3.6 from [AC07]_.

    EXAMPLES::

        sage: from sage.combinat.designs.orthogonal_arrays_recursive import OA_and_oval
        sage: _ = OA_and_oval

    """
    from sage.rings.arith import is_prime_power
    from sage.combinat.designs.block_design import projective_plane
    from orthogonal_arrays import OA_relabel

    assert is_prime_power(q)
    B = projective_plane(q, check=False)

    # We compute the oval with a linear program
    from sage.numerical.mip import MixedIntegerLinearProgram
    p = MixedIntegerLinearProgram()
    b = p.new_variable(binary=True)
    V = B.ground_set()
    p.add_constraint(p.sum([b[i] for i in V]) == q+1)
    for bl in B:
        p.add_constraint(p.sum([b[i] for i in bl]) <= 2)
    p.solve()
    b = p.get_values(b)
    oval = [x for x,i in b.items() if i]
    assert len(oval) == q+1

    # We remove one element from the oval
    x = oval.pop()
    oval.sort()

    # We build the TD by relabelling the point set, and removing those which
    # contain x.
    r = {}
    B = list(B)
    # (this is to make sure that the first set containing x in B is the one
    # which contains no other oval point)

    B.sort(key=lambda b:int(any([xx in oval for xx in b])))
    BB = []
    for b in B:
        if x in b:
            for xx in b:
                if xx == x:
                    continue
                r[xx] = len(r)
        else:
            BB.append(b)

    assert len(r) == (q+1)*q # all points except x have an image
    assert len(set(r.values())) == len(r) # the images are different

    # Relabelling/sorting the blocks and the oval
    BB = [[r[xx] for xx in b] for b in BB]
    oval = [r[xx] for xx in oval]

    for b in BB:
        b.sort()
    oval.sort()

    # Turning the TD into an OA
    BB = [[xx%q for xx in b] for b in BB]
    oval = [xx%q for xx in oval]
    assert len(oval) == q

    # We relabel the "oval" as relabelled as [0,...,0]
    OA = OA_relabel(BB+([[0]+oval]),q+1,q,blocks=[[0]+oval])
    OA = [[(x+1)%q for x in B] for B in OA]
    OA.remove([0]*(q+1))

    assert all(sum([xx == 0 for xx in b[1:]]) <= 2 for b in OA)
    return OA
Exemplo n.º 17
0
class hard_EM:
    def __init__(self, author_graph, TAU=0.5001, nparts=5, init_partition=None):
        self.parts = range(nparts)
        self.TAU = TAU
        self.author_graph = nx.convert_node_labels_to_integers(author_graph, discard_old_labels=False)
        self._lp_init = False
        # init hidden vars
        if init_partition:
            self.partition = init_partition
        else:
            self._rand_init_partition()
        self.m_step()

    def _rand_init_partition(self):
        slog('Random partitioning with seed: %s' % os.getpid())
        random.seed(os.getpid())
        self.partition = {}
        nparts = len(self.parts)
        for a in self.author_graph:
            self.partition[a] = randint(0, nparts - 1)

    def _init_LP(self):
        if self._lp_init:
            return

        slog('Init LP')
        self.lp = MixedIntegerLinearProgram(solver='GLPK', maximization=False)
        #self.lp.solver_parameter(backend.glp_simplex_or_intopt, backend.glp_simplex_only)       # LP relaxation
        self.lp.solver_parameter("iteration_limit", LP_ITERATION_LIMIT)
        # self.lp.solver_parameter("timelimit", LP_TIME_LIMIT)

    # add constraints once here
        # constraints
        self.alpha = self.lp.new_variable(dim=2)
        beta2 = self.lp.new_variable(dim=2)
        beta3 = self.lp.new_variable(dim=3)
        # alphas are indicator vars
        for a in self.author_graph:
            self.lp.add_constraint(sum(self.alpha[a][p] for p in self.parts) == 1)

        # beta2 is the sum of beta3s
        slog('Init LP - pair constraints')
        for a, b in self.author_graph.edges():
            if self.author_graph[a][b]['denom'] <= 2:
                continue
            self.lp.add_constraint(0.5 * sum(beta3[a][b][p] for p in self.parts) - beta2[a][b], min=0, max=0)
            for p in self.parts:
                self.lp.add_constraint(self.alpha[a][p] - self.alpha[b][p] - beta3[a][b][p], max=0)
                self.lp.add_constraint(self.alpha[b][p] - self.alpha[a][p] - beta3[a][b][p], max=0)

        # store indiv potential linear function as a dict to improve performance
        self.objF_indiv_dict = {}
        self.alpha_dict = {}
        for a in self.author_graph:
            self.alpha_dict[a] = {}
            for p in self.parts:
                var_id = self.alpha_dict[a][p] = self.alpha[a][p].dict().keys()[0]
                self.objF_indiv_dict[var_id] = 0        # init variables coeffs to zero

        # pairwise potentials
        slog('Obj func - pair potentials')
        objF_pair_dict = {}
        s = log(1 - self.TAU) - log(self.TAU)
        for a, b in self.author_graph.edges():
            if self.author_graph[a][b]['denom'] <= 2:
                continue
            var_id = beta2[a][b].dict().keys()[0]
            objF_pair_dict[var_id] = -self.author_graph[a][b]['weight'] * s
        self.objF_pair = self.lp(objF_pair_dict)

        self._lp_init = True
        slog('Init LP Done')

    def log_phi(self, a, p):
        author = self.author_graph.node[a]
        th = self.theta[p]
        res = th['logPr']
        if author['hlpful_fav_unfav']:
            res += th['logPrH']
        else:
            res += th['log1-PrH']
        if author['isRealName']:
            res += th['logPrR']
        else:
            res += th['log1-PrR']
        res += -((author['revLen'] - th['muL']) ** 2) / (2 * th['sigma2L'] + EPS) - (log_2pi + log(th['sigma2L'])) / 2.0
        return res

    def log_likelihood(self):
        ll = sum(self.log_phi(a, self.partition[a]) for a in self.author_graph.nodes())
        log_TAU, log_1_TAU = log(self.TAU), log(1 - self.TAU)
        for a, b in self.author_graph.edges():
            if self.partition[a] == self.partition[b]:
                ll += log_TAU * self.author_graph[a][b]['weight']
            else:
                ll += log_1_TAU * self.author_graph[a][b]['weight']
        return ll

    def e_step(self):
        slog('E-Step')
        if not self._lp_init:
            self._init_LP()

        slog('Obj func - indiv potentials')
        # individual potentials
        for a in self.author_graph:
            for p in self.parts:
                self.objF_indiv_dict[self.alpha_dict[a][p]] = -self.log_phi(a, p)

        objF_indiv = self.lp(self.objF_indiv_dict)
        self.lp.set_objective(self.lp.sum([objF_indiv, self.objF_pair]))

        # solve the LP
        slog('Solving the LP')
        self.lp.solve(log=3)
        slog('Solving the LP Done')

        # hard partitions for nodes (authors)
        self.partition = {}
        for a in self.author_graph:
            membship = self.lp.get_values(self.alpha[a])
            self.partition[a] = max(membship, key=membship.get)
        slog('E-Step Done')

    def m_step(self):
        slog('M-Step')
        stat = {p: [0.0] * len(self.parts) for p in ['freq', 'hlpful', 'realNm', 'muL', 'M2']}
        for a in self.author_graph:
            p = self.partition[a]
            author = self.author_graph.node[a]
            stat['freq'][p] += 1
            if author['hlpful_fav_unfav']: stat['hlpful'][p] += 1
            if author['isRealName']: stat['realNm'][p] += 1
            delta = author['revLen'] - stat['muL'][p]
            stat['muL'][p] += delta / stat['freq'][p]
            stat['M2'][p] += delta * (author['revLen'] - stat['muL'][p])

        self.theta = [{p: 0.0 for p in ['logPr', 'logPrH', 'log1-PrH', 'logPrR', 'log1-PrR', 'sigma2L', 'muL']}
                      for p in self.parts]
        sum_freq = sum(stat['freq'][p] for p in self.parts)

        for p in self.parts:
            self.theta[p]['logPr'] = log(stat['freq'][p] / (sum_freq + EPS) + EPS)
            self.theta[p]['logPrH'] = log(stat['hlpful'][p] / (stat['freq'][p] + EPS) + EPS)
            self.theta[p]['log1-PrH'] = log(1 - stat['hlpful'][p] / (stat['freq'][p] + EPS) + EPS)
            self.theta[p]['logPrR'] = log(stat['realNm'][p] / (stat['freq'][p] + EPS) + EPS)
            self.theta[p]['log1-PrR'] = log(1 - stat['realNm'][p] / (stat['freq'][p] + EPS) + EPS)
            self.theta[p]['muL'] = stat['muL'][p]
            self.theta[p]['sigma2L'] = stat['M2'][p] / (stat['freq'][p] - 1 + EPS) + EPS

        slog('M-Step Done')

    def iterate(self, MAX_ITER=20):
        past_ll = -float('inf')
        ll = self.log_likelihood()
        EPS = 0.1
        itr = 0
        while abs(ll - past_ll) > EPS and itr < MAX_ITER:
            if ll < past_ll:
                slog('ll decreased')
            itr += 1
            self.e_step()
            self.m_step()
            past_ll = ll
            ll = self.log_likelihood()
            slog('itr #%s\tlog_l: %s\tdelta: %s' % (itr, ll, ll - past_ll))

        if itr == MAX_ITER:
            slog('Hit max iteration: %d' % MAX_ITER)

        return ll, self.partition

    def run_EM_pool(self, nprocs=mp.cpu_count()):
        pool = Pool(processes=nprocs)
        ll_partitions = pool.map(em_parallel_mapper, [self] * EM_RESTARTS)
        ll, partition = reduce(ll_partition_reducer, ll_partitions)
        pool.terminate()

        int_to_orig_node_label = {v: k for k, v in self.author_graph.node_labels.items()}
        node_to_partition = {int_to_orig_node_label[n]: partition[n] for n in partition}

        return ll, node_to_partition
Exemplo n.º 18
0
def dominating_set(g,
                   k=1,
                   independent=False,
                   total=False,
                   value_only=False,
                   solver=None,
                   verbose=0,
                   *,
                   integrality_tolerance=1e-3):
    r"""
    Return a minimum distance-`k` dominating set of the graph.

    A minimum dominating set `S` of a graph `G` is a set of its vertices of
    minimal cardinality such that any vertex of `G` is in `S` or has one of its
    neighbors in `S`. See the :wikipedia:`Dominating_set`.

    A minimum distance-`k` dominating set is a set `S` of vertices of `G` of
    minimal cardinality such that any vertex of `G` is in `S` or at distance at
    most `k` from a vertex in `S`. A distance-`0` dominating set is the set of
    vertices itself, and when `k` is the radius of the graph, any vertex
    dominates all the other vertices.

    As an optimization problem, it can be expressed as follows, where `N^k(u)`
    denotes the set of vertices at distance at most `k` from `u` (the set of
    neighbors when `k=1`):

    .. MATH::

        \mbox{Minimize : }&\sum_{v\in G} b_v\\
        \mbox{Such that : }&\forall v \in G, b_v+\sum_{u \in N^k(v)} b_u\geq 1\\
        &\forall x\in G, b_x\mbox{ is a binary variable}

    INPUT:

    - ``k`` -- a non-negative integer (default: ``1``); the domination distance

    - ``independent`` -- boolean (default: ``False``); when ``True``, computes a
      minimum independent dominating set, that is a minimum dominating set that
      is also an independent set (see also
      :meth:`~sage.graphs.graph.independent_set`)

    - ``total`` -- boolean (default: ``False``); when ``True``, computes a total
      dominating set (see the See the :wikipedia:`Dominating_set`)

    - ``value_only`` -- boolean (default: ``False``); whether to only return the
      cardinality of the computed dominating set, or to return its list of
      vertices (default)

    - ``solver`` -- string (default: ``None``); specify a Mixed Integer Linear
      Programming (MILP) solver to be used. If set to ``None``, the default one
      is used. For more information on MILP solvers and which default solver is
      used, see the method :meth:`solve
      <sage.numerical.mip.MixedIntegerLinearProgram.solve>` of the class
      :class:`MixedIntegerLinearProgram
      <sage.numerical.mip.MixedIntegerLinearProgram>`.

    - ``verbose`` -- integer (default: ``0``); sets the level of verbosity. Set
      to 0 by default, which means quiet.

    - ``integrality_tolerance`` -- float; parameter for use with MILP solvers
      over an inexact base ring; see
      :meth:`MixedIntegerLinearProgram.get_values`.

    EXAMPLES:

    A basic illustration on a ``PappusGraph``::

        sage: g = graphs.PappusGraph()
        sage: g.dominating_set(value_only=True)
        5

    If we build a graph from two disjoint stars, then link their centers we will
    find a difference between the cardinality of an independent set and a stable
    independent set::

        sage: g = 2 * graphs.StarGraph(5)
        sage: g.add_edge(0, 6)
        sage: len(g.dominating_set())
        2
        sage: len(g.dominating_set(independent=True))
        6

    The total dominating set of the Petersen graph has cardinality 4::

        sage: G = graphs.PetersenGraph()
        sage: G.dominating_set(total=True, value_only=True)
        4

    The dominating set is calculated for both the directed and undirected graphs
    (modification introduced in :trac:`17905`)::

        sage: g = digraphs.Path(3)
        sage: g.dominating_set(value_only=True)
        2
        sage: g = graphs.PathGraph(3)
        sage: g.dominating_set(value_only=True)
        1

    Cardinality of distance-`k` dominating sets::

        sage: G = graphs.PetersenGraph()
        sage: [G.dominating_set(k=k, value_only=True) for k in range(G.radius() + 1)]
        [10, 3, 1]
        sage: G = graphs.PathGraph(5)
        sage: [G.dominating_set(k=k, value_only=True) for k in range(G.radius() + 1)]
        [5, 2, 1]
    """
    g._scream_if_not_simple(allow_multiple_edges=True, allow_loops=not total)

    if not k:
        return g.order() if value_only else list(g)
    elif k < 0:
        raise ValueError(
            "the domination distance must be a non-negative integer")

    from sage.numerical.mip import MixedIntegerLinearProgram
    p = MixedIntegerLinearProgram(maximization=False, solver=solver)
    b = p.new_variable(binary=True)

    if k == 1:
        # For any vertex v, one of its neighbors or v itself is in the minimum
        # dominating set. If g is directed, we use the in neighbors of v
        # instead.
        neighbors_iter = g.neighbor_in_iterator if g.is_directed(
        ) else g.neighbor_iterator
    else:
        # When k > 1, we use BFS to determine the vertices that can reach v
        # through a path of length at most k
        gg = g.reverse() if g.is_directed() else g

        def neighbors_iter(x):
            it = gg.breadth_first_search(x, distance=k)
            _ = next(it)
            yield from it

    if total:
        # We want a total dominating set
        for v in g:
            p.add_constraint(p.sum(b[u] for u in neighbors_iter(v)), min=1)
    else:
        for v in g:
            p.add_constraint(b[v] + p.sum(b[u] for u in neighbors_iter(v)),
                             min=1)

    if independent:
        # no two adjacent vertices are in the set
        for u, v in g.edge_iterator(labels=None):
            p.add_constraint(b[u] + b[v], max=1)

    # Minimizes the number of vertices used
    p.set_objective(p.sum(b[v] for v in g))

    p.solve(log=verbose)
    b = p.get_values(b, convert=bool, tolerance=integrality_tolerance)
    dom = [v for v in g if b[v]]
    return Integer(len(dom)) if value_only else dom
Exemplo n.º 19
0
def min_cover(npts, sets, solver='sage'):
    r"""
    EXAMPLES::

        sage: from max_plus.rank import min_cover
        sage: min_cover(5, [[0,1,2],[1,2,3],[2,4]], solver='sage')
        3
        sage: min_cover(5, [[0,1,2],[1,2,3],[2,4]], solver='lp_solve')   # optional -- lp_solve
        3
    """
    # check if the problem is solvable
    covered = [False for i in range(npts)]
    for set in sets:
        for ndx in set:
            covered[ndx] = True
    for c in covered:
        if not c:
            return False

    # Write the Free MPS format integer programming problem
    fh = open("tmp-rank-fmps", "w")
    fh.write("NAME min cover\n")
    fh.write("ROWS\n") # constraints
    for i in range(npts):
        fh.write(" G POINT" + str(i) + "\n")
    fh.write(" N NUMSETS\n")
    fh.write("COLUMNS\n") # variables
    for i in range(len(sets)):
        fh.write("  SET%d NUMSETS 1\n" % i)
        for point in sets[i]:
            fh.write("  SET%d POINT%d 1\n" % (i, point))
    fh.write("RHS\n") # right hand side to constraints
    for i in range(npts):
        fh.write("  COVER POINT%d 1\n" % i)
    fh.write("BOUNDS\n") # bounds on variables
    for i in range(len(sets)):
        fh.write(" BV A SET%d\n" % i)
    fh.write("ENDATA\n")
    fh.close()

    # Run the solver
    if solver == 'GLPK' or solver == 'glpk':
        # GLPK solver
        os.system("glpsol -w tmp-rank-glpsol tmp-rank-fmps > /dev/null")
        fh = open("tmp-rank-glpsol")
        fh.readline()
        line = fh.readline()
        min = int(line.split()[1])
        fh.close()
    elif solver == 'lp_solve':
        # lpsolve solver
        p = Popen(["lp_solve", "-fmps", "tmp-rank-fmps"], stdout=PIPE)
        fh = p.stdout
        fh.readline() # blank
        line = fh.readline()
        start = "Value of objective function: "
        if line[0:len(start)] != start:
            stderr.write("Unexpected output from lp_solve\n")
            min = 0
        else:
            min = int(line[len(start):])

        # read to the end of the output without storing anything
        while line:
            line = fh.readline()
        p.communicate()
        fh.close()

    elif solver == 'sage':
        from sage.numerical.mip import MixedIntegerLinearProgram
        M = MixedIntegerLinearProgram(maximization=False)
        x = M.new_variable(binary=True)

        nsets = len(sets)
        dual_sets = [[] for _ in range(npts)]

        for i,s in enumerate(sets):
            for k in s:
                dual_sets[k].append(i)

        for k in range(npts):
            M.add_constraint(M.sum(x[i] for i in dual_sets[k]) >= 1)

        M.set_objective(M.sum(x[i] for i in range(nsets)))

        min = int(M.solve())

    return min
Exemplo n.º 20
0
def binpacking(items, maximum=1, k=None):
    r"""
    Solves the bin packing problem.

    The Bin Packing problem is the following :

    Given a list of items of weights `p_i` and a real value `K`, what is
    the least number of bins such that all the items can be put in the
    bins, while keeping sure that each bin contains a weight of at most `K` ?

    For more informations : http://en.wikipedia.org/wiki/Bin_packing_problem

    Two version of this problem are solved by this algorithm :
         * Is it possible to put the given items in `L` bins ?
         * What is the assignment of items using the
           least number of bins with the given list of items ?

    INPUT:

    - ``items`` -- A list of real values (the items' weight)

    - ``maximum``   -- The maximal size of a bin

    - ``k``     -- Number of bins

      - When set to an integer value, the function returns a partition
        of the items into `k` bins if possible, and raises an
        exception otherwise.

      - When set to ``None``, the function returns a partition of the items
        using the least number possible of bins.

    OUTPUT:

    A list of lists, each member corresponding to a box and containing
    the list of the weights inside it. If there is no solution, an
    exception is raised (this can only happen when ``k`` is specified
    or if ``maximum`` is less that the size of one item).

    EXAMPLES:

    Trying to find the minimum amount of boxes for 5 items of weights
    `1/5, 1/4, 2/3, 3/4, 5/7`::

        sage: from sage.numerical.optimize import binpacking
        sage: values = [1/5, 1/3, 2/3, 3/4, 5/7]
        sage: bins = binpacking(values)
        sage: len(bins)
        3

    Checking the bins are of correct size ::

        sage: all([ sum(b)<= 1 for b in bins ])
        True

    Checking every item is in a bin ::

        sage: b1, b2, b3 = bins
        sage: all([ (v in b1 or v in b2 or v in b3) for v in values ])
        True

    One way to use only three boxes (which is best possible) is to put
    `1/5 + 3/4` together in a box, `1/3+2/3` in another, and `5/7`
    by itself in the third one.

    Of course, we can also check that there is no solution using only two boxes ::

        sage: from sage.numerical.optimize import binpacking
        sage: binpacking([0.2,0.3,0.8,0.9], k=2)
        Traceback (most recent call last):
        ...
        ValueError: This problem has no solution !
    """

    if max(items) > maximum:
        raise ValueError("This problem has no solution !")

    if k == None:
        from sage.functions.other import ceil
        k = ceil(sum(items) / maximum)
        while True:
            from sage.numerical.mip import MIPSolverException
            try:
                return binpacking(items, k=k, maximum=maximum)
            except MIPSolverException:
                k = k + 1

    from sage.numerical.mip import MixedIntegerLinearProgram, MIPSolverException
    p = MixedIntegerLinearProgram()

    # Boolean variable indicating whether
    # the i th element belongs to box b
    box = p.new_variable(dim=2)

    # Each bin contains at most max
    for b in range(k):
        p.add_constraint(p.sum(
            [items[i] * box[i][b] for i in range(len(items))]),
                         max=maximum)

    # Each item is assigned exactly one bin
    for i in range(len(items)):
        p.add_constraint(p.sum([box[i][b] for b in range(k)]), min=1, max=1)

    p.set_objective(None)
    p.set_binary(box)

    try:
        p.solve()
    except MIPSolverException:
        raise ValueError("This problem has no solution !")

    box = p.get_values(box)

    boxes = [[] for i in range(k)]

    for b in range(k):
        boxes[b].extend(
            [items[i] for i in range(len(items)) if round(box[i][b]) == 1])

    return boxes
Exemplo n.º 21
0
def knapsack(seq,
             binary=True,
             max=1,
             value_only=False,
             solver=None,
             verbose=0,
             *,
             integrality_tolerance=1e-3):
    r"""
    Solves the knapsack problem

    For more information on the knapsack problem, see the documentation of the
    :mod:`knapsack module <sage.numerical.knapsack>` or the
    :wikipedia:`Knapsack_problem`.

    INPUT:

    - ``seq`` -- Two different possible types:

      - A sequence of tuples ``(weight, value, something1, something2,
        ...)``. Note that only the first two coordinates (``weight`` and
        ``values``) will be taken into account. The rest (if any) will be
        ignored. This can be useful if you need to attach some information to
        the items.

      - A sequence of reals (a value of 1 is assumed).

    - ``binary`` -- When set to ``True``, an item can be taken 0 or 1 time.
      When set to ``False``, an item can be taken any amount of times (while
      staying integer and positive).

    - ``max`` -- Maximum admissible weight.

    - ``value_only`` -- When set to ``True``, only the maximum useful value is
      returned. When set to ``False``, both the maximum useful value and an
      assignment are returned.

    - ``solver`` -- (default: ``None``) Specify a Mixed Integer Linear Programming
      (MILP) solver to be used. If set to ``None``, the default one is used. For
      more information on MILP solvers and which default solver is used, see
      the method
      :meth:`solve <sage.numerical.mip.MixedIntegerLinearProgram.solve>`
      of the class
      :class:`MixedIntegerLinearProgram <sage.numerical.mip.MixedIntegerLinearProgram>`.

    - ``verbose`` -- integer (default: ``0``). Sets the level of verbosity. Set
      to 0 by default, which means quiet.

    - ``integrality_tolerance`` -- parameter for use with MILP solvers over an
      inexact base ring; see :meth:`MixedIntegerLinearProgram.get_values`.

    OUTPUT:

    If ``value_only`` is set to ``True``, only the maximum useful value is
    returned. Else (the default), the function returns a pair ``[value,list]``,
    where ``list`` can be of two types according to the type of ``seq``:

    - The list of tuples `(w_i, u_i, ...)` occurring in the solution.

    - A list of reals where each real is repeated the number of times it is
      taken into the solution.

    EXAMPLES:

    If your knapsack problem is composed of three items ``(weight, value)``
    defined by ``(1,2), (1.5,1), (0.5,3)``, and a bag of maximum weight `2`, you
    can easily solve it this way::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack( [(1,2), (1.5,1), (0.5,3)], max=2)
        [5.0, [(1, 2), (0.500000000000000, 3)]]

        sage: knapsack( [(1,2), (1.5,1), (0.5,3)], max=2, value_only=True)
        5.0

    Besides weight and value, you may attach any data to the items::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack( [(1, 2, 'spam'), (0.5, 3, 'a', 'lot')])
        [3.0, [(0.500000000000000, 3, 'a', 'lot')]]

    In the case where all the values (usefulness) of the items are equal to one,
    you do not need embarrass yourself with the second values, and you can just
    type for items `(1,1), (1.5,1), (0.5,1)` the command::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack([1,1.5,0.5], max=2, value_only=True)
        2.0
    """
    reals = not isinstance(seq[0], tuple)
    if reals:
        seq = [(x, 1) for x in seq]

    from sage.numerical.mip import MixedIntegerLinearProgram
    from sage.rings.integer_ring import ZZ

    p = MixedIntegerLinearProgram(solver=solver, maximization=True)

    if binary:
        present = p.new_variable(binary=True)
    else:
        present = p.new_variable(integer=True)

    p.set_objective(p.sum([present[i] * seq[i][1] for i in range(len(seq))]))
    p.add_constraint(p.sum([present[i] * seq[i][0] for i in range(len(seq))]),
                     max=max)

    if value_only:
        return p.solve(objective_only=True, log=verbose)

    else:
        objective = p.solve(log=verbose)
        present = p.get_values(present,
                               convert=ZZ,
                               tolerance=integrality_tolerance)

        val = []

        if reals:
            [val.extend([seq[i][0]] * present[i]) for i in range(len(seq))]
        else:
            [val.extend([seq[i]] * present[i]) for i in range(len(seq))]

        return [objective, val]
Exemplo n.º 22
0
def knapsack(seq, binary=True, max=1, value_only=False, solver=None, verbose=0):
    r"""
    Solves the knapsack problem

    For more information on the knapsack problem, see the documentation of the
    :mod:`knapsack module <sage.numerical.knapsack>` or the
    :wikipedia:`Knapsack_problem`.

    INPUT:

    - ``seq`` -- Two different possible types:

      - A sequence of tuples ``(weight, value, something1, something2,
        ...)``. Note that only the first two coordinates (``weight`` and
        ``values``) will be taken into account. The rest (if any) will be
        ignored. This can be useful if you need to attach some information to
        the items.

      - A sequence of reals (a value of 1 is assumed).

    - ``binary`` -- When set to ``True``, an item can be taken 0 or 1 time.
      When set to ``False``, an item can be taken any amount of times (while
      staying integer and positive).

    - ``max`` -- Maximum admissible weight.

    - ``value_only`` -- When set to ``True``, only the maximum useful value is
      returned. When set to ``False``, both the maximum useful value and an
      assignment are returned.

    - ``solver`` -- (default: ``None``) Specify a Linear Program (LP) solver to
      be used. If set to ``None``, the default one is used. For more information
      on LP solvers and which default solver is used, see the documentation of
      class :class:`MixedIntegerLinearProgram
      <sage.numerical.mip.MixedIntegerLinearProgram>`.

    - ``verbose`` -- integer (default: ``0``). Sets the level of verbosity. Set
      to 0 by default, which means quiet.

    OUTPUT:

    If ``value_only`` is set to ``True``, only the maximum useful value is
    returned. Else (the default), the function returns a pair ``[value,list]``,
    where ``list`` can be of two types according to the type of ``seq``:

    - The list of tuples `(w_i, u_i, ...)` occurring in the solution.

    - A list of reals where each real is repeated the number of times it is
      taken into the solution.

    EXAMPLES:

    If your knapsack problem is composed of three items ``(weight, value)``
    defined by ``(1,2), (1.5,1), (0.5,3)``, and a bag of maximum weight `2`, you
    can easily solve it this way::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack( [(1,2), (1.5,1), (0.5,3)], max=2)
        [5.0, [(1, 2), (0.500000000000000, 3)]]

        sage: knapsack( [(1,2), (1.5,1), (0.5,3)], max=2, value_only=True)
        5.0

    Besides weight and value, you may attach any data to the items::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack( [(1, 2, 'spam'), (0.5, 3, 'a', 'lot')])
        [3.0, [(0.500000000000000, 3, 'a', 'lot')]]

    In the case where all the values (usefulness) of the items are equal to one,
    you do not need embarrass yourself with the second values, and you can just
    type for items `(1,1), (1.5,1), (0.5,1)` the command::

        sage: from sage.numerical.knapsack import knapsack
        sage: knapsack([1,1.5,0.5], max=2, value_only=True)
        2.0
    """
    reals = not isinstance(seq[0], tuple)
    if reals:
        seq = [(x,1) for x in seq]

    from sage.numerical.mip import MixedIntegerLinearProgram
    p = MixedIntegerLinearProgram(solver=solver, maximization=True)

    if binary:
        present = p.new_variable(binary = True)
    else:
        present = p.new_variable(integer = True)

    p.set_objective(p.sum([present[i] * seq[i][1] for i in range(len(seq))]))
    p.add_constraint(p.sum([present[i] * seq[i][0] for i in range(len(seq))]), max=max)

    if value_only:
        return p.solve(objective_only=True, log=verbose)

    else:
        objective = p.solve(log=verbose)
        present = p.get_values(present)

        val = []

        if reals:
            [val.extend([seq[i][0]] * int(present[i])) for i in range(len(seq))]
        else:
            [val.extend([seq[i]] * int(present[i])) for i in range(len(seq))]

        return [objective,val]
Exemplo n.º 23
0
class hard_EM:
    def __init__(self,
                 author_graph,
                 TAU=0.5001,
                 nparts=5,
                 init_partition=None):
        self.parts = range(nparts)
        self.TAU = TAU
        self.author_graph = nx.convert_node_labels_to_integers(
            author_graph, discard_old_labels=False)
        self._lp_init = False
        # init hidden vars
        if init_partition:
            self.partition = init_partition
        else:
            self._rand_init_partition()
        self.m_step()

    def _rand_init_partition(self):
        slog('Random partitioning with seed: %s' % os.getpid())
        random.seed(os.getpid())
        self.partition = {}
        nparts = len(self.parts)
        for a in self.author_graph:
            self.partition[a] = randint(0, nparts - 1)

    def _init_LP(self):
        if self._lp_init:
            return

        slog('Init LP')
        self.lp = MixedIntegerLinearProgram(solver='GLPK', maximization=False)
        #self.lp.solver_parameter(backend.glp_simplex_or_intopt, backend.glp_simplex_only)       # LP relaxation
        self.lp.solver_parameter("iteration_limit", LP_ITERATION_LIMIT)
        # self.lp.solver_parameter("timelimit", LP_TIME_LIMIT)

        # add constraints once here
        # constraints
        self.alpha = self.lp.new_variable(dim=2)
        beta2 = self.lp.new_variable(dim=2)
        beta3 = self.lp.new_variable(dim=3)
        # alphas are indicator vars
        for a in self.author_graph:
            self.lp.add_constraint(
                sum(self.alpha[a][p] for p in self.parts) == 1)

        # beta2 is the sum of beta3s
        slog('Init LP - pair constraints')
        for a, b in self.author_graph.edges():
            if self.author_graph[a][b]['denom'] <= 2:
                continue
            self.lp.add_constraint(
                0.5 * sum(beta3[a][b][p] for p in self.parts) - beta2[a][b],
                min=0,
                max=0)
            for p in self.parts:
                self.lp.add_constraint(self.alpha[a][p] - self.alpha[b][p] -
                                       beta3[a][b][p],
                                       max=0)
                self.lp.add_constraint(self.alpha[b][p] - self.alpha[a][p] -
                                       beta3[a][b][p],
                                       max=0)

        # store indiv potential linear function as a dict to improve performance
        self.objF_indiv_dict = {}
        self.alpha_dict = {}
        for a in self.author_graph:
            self.alpha_dict[a] = {}
            for p in self.parts:
                var_id = self.alpha_dict[a][p] = self.alpha[a][p].dict().keys(
                )[0]
                self.objF_indiv_dict[
                    var_id] = 0  # init variables coeffs to zero

        # pairwise potentials
        slog('Obj func - pair potentials')
        objF_pair_dict = {}
        s = log(1 - self.TAU) - log(self.TAU)
        for a, b in self.author_graph.edges():
            if self.author_graph[a][b]['denom'] <= 2:
                continue
            var_id = beta2[a][b].dict().keys()[0]
            objF_pair_dict[var_id] = -self.author_graph[a][b]['weight'] * s
        self.objF_pair = self.lp(objF_pair_dict)

        self._lp_init = True
        slog('Init LP Done')

    def log_phi(self, a, p):
        author = self.author_graph.node[a]
        th = self.theta[p]
        res = th['logPr']
        if author['hlpful_fav_unfav']:
            res += th['logPrH']
        else:
            res += th['log1-PrH']
        if author['isRealName']:
            res += th['logPrR']
        else:
            res += th['log1-PrR']
        res += -((author['revLen'] - th['muL'])**2) / (
            2 * th['sigma2L'] + EPS) - (log_2pi + log(th['sigma2L'])) / 2.0
        return res

    def log_likelihood(self):
        ll = sum(
            self.log_phi(a, self.partition[a])
            for a in self.author_graph.nodes())
        log_TAU, log_1_TAU = log(self.TAU), log(1 - self.TAU)
        for a, b in self.author_graph.edges():
            if self.partition[a] == self.partition[b]:
                ll += log_TAU * self.author_graph[a][b]['weight']
            else:
                ll += log_1_TAU * self.author_graph[a][b]['weight']
        return ll

    def e_step(self):
        slog('E-Step')
        if not self._lp_init:
            self._init_LP()

        slog('Obj func - indiv potentials')
        # individual potentials
        for a in self.author_graph:
            for p in self.parts:
                self.objF_indiv_dict[self.alpha_dict[a][p]] = -self.log_phi(
                    a, p)

        objF_indiv = self.lp(self.objF_indiv_dict)
        self.lp.set_objective(self.lp.sum([objF_indiv, self.objF_pair]))

        # solve the LP
        slog('Solving the LP')
        self.lp.solve(log=3)
        slog('Solving the LP Done')

        # hard partitions for nodes (authors)
        self.partition = {}
        for a in self.author_graph:
            membship = self.lp.get_values(self.alpha[a])
            self.partition[a] = max(membship, key=membship.get)
        slog('E-Step Done')

    def m_step(self):
        slog('M-Step')
        stat = {
            p: [0.0] * len(self.parts)
            for p in ['freq', 'hlpful', 'realNm', 'muL', 'M2']
        }
        for a in self.author_graph:
            p = self.partition[a]
            author = self.author_graph.node[a]
            stat['freq'][p] += 1
            if author['hlpful_fav_unfav']: stat['hlpful'][p] += 1
            if author['isRealName']: stat['realNm'][p] += 1
            delta = author['revLen'] - stat['muL'][p]
            stat['muL'][p] += delta / stat['freq'][p]
            stat['M2'][p] += delta * (author['revLen'] - stat['muL'][p])

        self.theta = [{
            p: 0.0
            for p in [
                'logPr', 'logPrH', 'log1-PrH', 'logPrR', 'log1-PrR', 'sigma2L',
                'muL'
            ]
        } for p in self.parts]
        sum_freq = sum(stat['freq'][p] for p in self.parts)

        for p in self.parts:
            self.theta[p]['logPr'] = log(stat['freq'][p] / (sum_freq + EPS) +
                                         EPS)
            self.theta[p]['logPrH'] = log(stat['hlpful'][p] /
                                          (stat['freq'][p] + EPS) + EPS)
            self.theta[p]['log1-PrH'] = log(1 - stat['hlpful'][p] /
                                            (stat['freq'][p] + EPS) + EPS)
            self.theta[p]['logPrR'] = log(stat['realNm'][p] /
                                          (stat['freq'][p] + EPS) + EPS)
            self.theta[p]['log1-PrR'] = log(1 - stat['realNm'][p] /
                                            (stat['freq'][p] + EPS) + EPS)
            self.theta[p]['muL'] = stat['muL'][p]
            self.theta[p]['sigma2L'] = stat['M2'][p] / (stat['freq'][p] - 1 +
                                                        EPS) + EPS

        slog('M-Step Done')

    def iterate(self, MAX_ITER=20):
        past_ll = -float('inf')
        ll = self.log_likelihood()
        EPS = 0.1
        itr = 0
        while abs(ll - past_ll) > EPS and itr < MAX_ITER:
            if ll < past_ll:
                slog('ll decreased')
            itr += 1
            self.e_step()
            self.m_step()
            past_ll = ll
            ll = self.log_likelihood()
            slog('itr #%s\tlog_l: %s\tdelta: %s' % (itr, ll, ll - past_ll))

        if itr == MAX_ITER:
            slog('Hit max iteration: %d' % MAX_ITER)

        return ll, self.partition

    def run_EM_pool(self, nprocs=mp.cpu_count()):
        pool = Pool(processes=nprocs)
        ll_partitions = pool.map(em_parallel_mapper, [self] * EM_RESTARTS)
        ll, partition = reduce(ll_partition_reducer, ll_partitions)
        pool.terminate()

        int_to_orig_node_label = {
            v: k
            for k, v in self.author_graph.node_labels.items()
        }
        node_to_partition = {
            int_to_orig_node_label[n]: partition[n]
            for n in partition
        }

        return ll, node_to_partition