def CGmanopt(X, objective_function, A, **kwargs):
    '''
    Minimizes the objective function subject to the constraint that X.T * X = I_k using the
    conjugate gradient method

    Args:
        X: Initial 2D array of shape (n, k) such that X.T * X = I_k
        objective_function: Objective function F(X, A) to minimize.
        A: Additional parameters for the objective function F(X, A)

    Keyword Args:
        None

    Returns:
        Xopt: Value of X that minimizes the objective subject to the constraint.
    '''

    manifold = Stiefel(X.shape[0], X.shape[1])

    def cost(X):
        c, _ = objective_function(X, A)
        return c

    problem = Problem(manifold=manifold, cost=cost, verbosity=0)
    solver = ConjugateGradient(logverbosity=0)
    Xopt = solver.solve(problem)
    return Xopt, None
Пример #2
0
def run(backend=SUPPORTED_BACKENDS[0], quiet=True):
    dimension = 3  # Dimension of the embedding space, i.e. R^k
    num_points = 24  # Points on the sphere
    # This value should be as close to 0 as affordable. If it is too close to
    # zero, optimization first becomes much slower, than simply doesn't work
    # anymore because of floating point overflow errors (NaN's and Inf's start
    # to appear). If it is too large, then log-sum-exp is a poor approximation
    # of the max function, and the spread will be less uniform. An okay value
    # seems to be 0.01 or 0.001 for example. Note that a better strategy than
    # using a small epsilon straightaway is to reduce epsilon bit by bit and to
    # warm-start subsequent optimization in that way. Trustregions will be more
    # appropriate for these fine tunings.
    epsilon = 0.0015

    cost = create_cost(backend, dimension, num_points, epsilon)
    manifold = Elliptope(num_points, dimension)
    problem = pymanopt.Problem(manifold, cost)
    if quiet:
        problem.verbosity = 0

    solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5)
    Yopt = solver.solve(problem)

    if quiet:
        return

    Xopt = Yopt @ Yopt.T
    maxdot = np.triu(Xopt, 1).max()
    print("Maximum angle between any two points:", maxdot)
def run(backend=SUPPORTED_BACKENDS[0], quiet=True):
    m, n, rank = 5, 4, 2
    matrix = rnd.randn(m, n)

    cost, egrad = create_cost_egrad(backend, matrix, rank)
    manifold = FixedRankEmbedded(m, n, rank)
    problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad)
    if quiet:
        problem.verbosity = 0

    solver = ConjugateGradient()
    left_singular_vectors, singular_values, right_singular_vectors = \
        solver.solve(problem)
    low_rank_approximation = (left_singular_vectors @ np.diag(singular_values)
                              @ right_singular_vectors)

    if not quiet:
        u, s, vt = la.svd(matrix, full_matrices=False)
        indices = np.argsort(s)[-rank:]
        low_rank_solution = (
            u[:, indices] @ np.diag(s[indices]) @ vt[indices, :])
        print("Analytic low-rank solution:")
        print()
        print(low_rank_solution)
        print()
        print("Rank-{} approximation:".format(rank))
        print()
        print(low_rank_approximation)
        print()
        print("Frobenius norm error:",
              la.norm(low_rank_approximation - low_rank_solution))
        print()
Пример #4
0
    def fit(self):
        f = self.f
        X = self.X
        tol = self.tol
        d = self.d
        n = self.n

        current_best_residual = np.inf
        for r in range(self.restarts):
            print('restart %d' % r)
            M0 = np.linalg.qr(np.random.randn(self.d, self.n))[0]
            my_params = [Parameter(order=self.order, distribution='uniform', lower=-5, upper=5) for _ in range(n)]
            my_basis = Basis('total-order')
            my_poly_init = Poly(parameters=my_params, basis=my_basis, method='least-squares',
                                sampling_args={'mesh': 'user-defined',
                                               'sample-points': X @ M0,
                                               'sample-outputs': f})
            my_poly_init.set_model()
            c0 = my_poly_init.coefficients.copy()

            residual = self.cost(f, X, M0, c0)

            cauchy_length = self.cauchy_length
            residual_history = []
            iter_ind = 0
            M = M0.copy()
            c = c0.copy()
            while residual > tol:
                if self.verbosity == 2:
                    print(residual)
                residual_history.append(residual)
                # Minimize over M
                func_M = lambda M_var: self.cost(f, X, M_var, c)
                grad_M = lambda M_var: self.dcostdM(f, X, M_var, c)

                manifold = Stiefel(d, n)
                solver = ConjugateGradient(maxiter=self.max_M_iters)

                problem = Problem(manifold=manifold, cost=func_M, egrad=grad_M, verbosity=0)

                M = solver.solve(problem, x=M)

                # Minimize over c
                func_c = lambda c_var: self.cost(f, X, M, c_var)
                grad_c = lambda c_var: self.dcostdc(f, X, M, c_var)

                res = minimize(func_c, x0=c, method='CG', jac=grad_c)
                c = res.x
                residual = self.cost(f, X, M, c)
                if iter_ind < cauchy_length:
                    iter_ind += 1
                elif np.abs(np.mean(residual_history[-cauchy_length:]) - residual)/residual < self.cauchy_tol:
                    break

            if self.verbosity > 0:
                print('final residual on training data: %f' % self.cost(f, X, M, c))
            if residual < current_best_residual:
                self.M = M
                self.c = c
                current_best_residual = residual
def fixedrank(A, YT, r):
    """ Solves the AX=YT problem on the manifold of r-rank matrices with  
    """

    # Instantiate a manifold
    manifold = FixedRankEmbedded(N, r, r)

    # Define the cost function (here using autograd.numpy)
    def cost(X):
        U = X[0]
        cst = 0
        for n in range(N):
            cst = cst + huber(U[n, :])
        Mat = np.matmul(np.matmul(X[0], np.diag(X[1])), X[2])
        fidelity = LA.norm(np.subtract(np.matmul(A, Mat), YT))
        return cst + lambd * fidelity**2

    problem = Problem(manifold=manifold, cost=cost)
    solver = ConjugateGradient(maxiter=maxiter)

    # Let Pymanopt do the rest
    Xopt = solver.solve(problem)

    #Solve
    Sol = np.dot(np.dot(Xopt[0], np.diag(Xopt[1])), Xopt[2])

    return Sol
Пример #6
0
def solve_manopt(X, d, cost, egrad, Wo=None):

    D = X.shape[1]
    manifold = Grassmann(height=D, width=d)
    problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=0)

    solver = ConjugateGradient(mingradnorm=1e-3)
    W = solver.solve(problem, x=Wo)

    return W
Пример #7
0
def main_mds(D, dim=3, X=None, space='real'):
    """MDS via gradient descent with the chordal metric.

    Parameters
    ----------
    D : ndarray (n, n)
        Goal distance matrix.
    dim : int, optional
        Goal dimension (of ambient Euclidean space). Default is `dim = 3`.
    X : ndarray (dim, n), optional
        Initial value for gradient descent. `n` points in dimension `dim`. If
        both a dimension and an initial condition are specified, the initial
        condition overrides the dimension.
    field : str
        Choice of real or complex version. Options 'real', 'complex'. If
        'complex' dim must be even.

    """

    n = D.shape[0]
    max_d = np.max(D)
    if max_d > 1:
        print('WARNING: maximum value in distance matrix exceeds diameter of '\
            'projective space. Max distance = $2.4f.' %max_d)
    manifold = Oblique(dim, n)
    solver = ConjugateGradient()
    if space == 'real':
        cost = setup_cost(D)
    elif space == 'complex':
        cost = setup_CPn_cost(D, int(dim/2))
    problem = pymanopt.Problem(manifold=manifold, cost=cost)
    if X is None:
        X_out = solver.solve(problem)
    else:
        if X.shape[0] != dim:
            print('WARNING: initial condition does not match specified goal '\
                'dimension. Finding optimum in dimension %d' %X.shape[0])
        X_out = solver.solve(problem, x=X)
    return X_out
def closest_unit_norm_column_approximation(A):
    """
    Returns the matrix with unit-norm columns that is closests to A w.r.t. the
    Frobenius norm.
    """
    m, n = A.shape

    manifold = Oblique(m, n)
    solver = ConjugateGradient()
    X = T.matrix()
    cost = 0.5 * T.sum((X - A) ** 2)

    problem = Problem(manifold=manifold, cost=cost, arg=X)
    return solver.solve(problem)
Пример #9
0
def closest_unit_norm_column_approximation(A):
    """
    Returns the matrix with unit-norm columns that is closests to A w.r.t. the
    Frobenius norm.
    """
    m, n = A.shape

    manifold = Oblique(m, n)
    solver = ConjugateGradient()
    X = T.matrix()
    cost = 0.5 * T.sum((X - A)**2)

    problem = Problem(manifold=manifold, cost=cost, arg=X)
    return solver.solve(problem)
Пример #10
0
def cp_mds_reg(X, D, lam=1.0, v=1, maxiter=1000):
    """Version of MDS in which "signs" are also an optimization parameter.

    Rather than performing a full optimization and then resetting the
    sign matrix, here we treat the signs as a parameter `A = [a_ij]` and
    minimize the cost function
        F(X,A) = ||W*(X^H(A*X) - cos(D))||^2 + lambda*||A - X^HX/|X^HX| ||^2
    Lambda is a regularization parameter we can experiment with. The
    collection of data, `X`, is treated as a point on the `Oblique`
    manifold, consisting of `k*n` matrices with unit-norm columns. Since
    we are working on a sphere in complex space we require `k` to be
    even. The first `k/2` entries of each column are the real components
    and the last `k/2` entries are the imaginary parts.

    Parameters
    ----------
    X : ndarray (k, n)
        Initial guess for data.
    D : ndarray (k, k)
        Goal distance matrix.
    lam : float, optional
        Weight to give regularization term.
    v : int, optional
        Verbosity

    Returns
    -------
    X_opt : ndarray (k, n)
        Collection of points optimizing cost.

    """

    dim = X.shape[0]
    num_points = X.shape[1]
    W = distance_to_weights(D)
    Sreal, Simag = norm_rotations(X)
    A = np.vstack(
        (np.reshape(Sreal,
                    (1, num_points**2)), np.reshape(Simag, num_points**2)))
    cp_manifold = Oblique(dim, num_points)
    a_manifold = Oblique(2, num_points**2)
    manifold = Product((cp_manifold, a_manifold))
    solver = ConjugateGradient(maxiter=maxiter, maxtime=float('inf'))
    cost = setup_reg_autograd_cost(D, int(dim / 2), num_points, lam=lam)
    problem = pymanopt.Problem(cost=cost, manifold=manifold)
    Xopt, Aopt = solver.solve(problem, x=(X, A))
    Areal = np.reshape(Aopt[0, :], (num_points, num_points))
    Aimag = np.reshape(Aopt[1, :], (num_points, num_points))
    return Xopt, Areal, Aimag
Пример #11
0
def solve_manopt(X, d, cost, egrad):

    D = X.shape[1]
    manifold = Grassmann(height=D, width=d)
    problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=0)

    solver = ConjugateGradient(mingradnorm=1e-3)

    M = mean_riemann(X)
    w, v = np.linalg.eig(M)
    idx = w.argsort()[::-1]
    v_ = v[:, idx]
    Wo = v_[:, :d]
    W = solver.solve(problem, x=Wo)
    return W
Пример #12
0
def packing_on_the_sphere(n, k, epsilon):
    manifold = Elliptope(n, k)
    solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5)

    def cost(X):
        Y = np.dot(X, X.T)
        # Shift the exponentials by the maximum value to reduce numerical
        # trouble due to possible overflows.
        s = np.triu(Y, 1).max()
        expY = np.exp((Y - s) / epsilon)
        # Zero out the diagonal
        expY -= np.diag(np.diag(expY))
        u = np.triu(expY, 1).sum()
        return s + epsilon * np.log(u)

    problem = Problem(manifold, cost)
    return solver.solve(problem)
Пример #13
0
def packing_on_the_sphere(n, k, epsilon):
    manifold = Elliptope(n, k)
    solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5)
    
    def cost(X):
        Y = np.dot(X, X.T)
        # Shift the exponentials by the maximum value to reduce numerical
        # trouble due to possible overflows.
        s = np.triu(Y, 1).max()
        expY = np.exp((Y - s) / epsilon)
        # Zero out the diagonal
        expY -= np.diag(np.diag(expY))
        u = np.triu(expY, 1).sum()
        return s + epsilon * np.log(u)

    problem = Problem(manifold, cost)
    return solver.solve(problem)
Пример #14
0
    def _optimize(self, max_opt_time, max_opt_iter, verbosity):
        """Optimize the GeoIMC optimization problem

        Args: The args of `solve`
        """
        residual_global = np.zeros(self.Y.data.shape)

        solver = ConjugateGradient(maxtime=max_opt_time,
                                   maxiter=max_opt_iter,
                                   linesearch=LineSearchBackTracking())
        prb = Problem(manifold=self.manifold,
                      cost=lambda x: self._cost(x, residual_global),
                      egrad=lambda z: self._egrad(z, residual_global),
                      verbosity=verbosity)
        solution = solver.solve(prb, x=self.W)
        self.W = [solution[0], solution[1], solution[2]]

        return self._cost(self.W, residual_global)
Пример #15
0
def dominant_eigenvector(A):
    """
    Returns the dominant eigenvector of the symmetric matrix A.

    Note: For the same A, this should yield the same as the dominant invariant
    subspace example with p = 1.
    """
    m, n = A.shape
    assert m == n, "matrix must be square"
    assert np.allclose(np.sum(A - A.T), 0), "matrix must be symmetric"

    manifold = Sphere(n)
    solver = ConjugateGradient(maxiter=500, minstepsize=1e-6)
    x = T.matrix()
    cost = -x.T.dot(T.dot(A, x)).trace()

    problem = Problem(man=manifold, ad_cost=cost, ad_arg=x)
    xopt = solver.solve(problem)
    return xopt.squeeze()
Пример #16
0
def main():
    r"""Main entry point in the graph embedding procedure."""
    args = config_parser().parse_args()

    g_pdists = load_pdists(args)
    n = g_pdists.shape[0]
    d = args.manifold_dim

    # we are actually using only the upper diagonal part
    g_pdists = g_pdists[np.triu_indices(n, 1)]
    g_sq_pdists = g_pdists**2

    # read the graph
    # the distortion cost
    def distortion_cost(X):
        man_sq_pdists = manifold_pdists(X, squared=True)

        return np.sum(np.abs(man_sq_pdists / g_sq_pdists - 1))

    # the manifold, problem, and solver
    manifold = PositiveDefinite(d, k=n)
    problem = Problem(manifold=manifold, cost=distortion_cost, verbosity=2)
    linesearch = ReduceLROnPlateau(start_lr=2e-2,
                                   patience=10,
                                   threshold=1e-4,
                                   factor=0.1,
                                   verbose=1)
    solver = ConjugateGradient(linesearch=linesearch, maxiter=1000)

    # solve it
    with Timer('training') as t:
        X_opt = solver.solve(problem, x=sample_init_points(n, d))

    # the distortion achieved
    man_pdists = manifold_pdists(X_opt)
    print('Average distortion: ', average_distortion(g_pdists, man_pdists))
    man_pdists_sym = pdists_vec_to_sym(man_pdists, n, 1e12)
    print('MAP: ', mean_average_precision(g,
                                          man_pdists_sym,
                                          diag_adjusted=True))
def run(backend=SUPPORTED_BACKENDS[0], quiet=True):
    m = 5
    n = 8
    matrix = rnd.randn(m, n)

    cost, egrad = create_cost_egrad(backend, matrix)
    manifold = Oblique(m, n)
    problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad)
    if quiet:
        problem.verbosity = 0

    solver = ConjugateGradient()
    Xopt = solver.solve(problem)

    if quiet:
        return

    # Calculate the actual solution by normalizing the columns of A.
    X = matrix / la.norm(matrix, axis=0)[np.newaxis, :]

    # Print information about the solution.
    print("Solution found: %s" % np.allclose(X, Xopt, rtol=1e-3))
    print("Frobenius-error: %f" % la.norm(X - Xopt))
Пример #18
0
def test_doublystochastic(N, M, K):
    rnd.seed(21)

    ns = [N] * K
    ms = [M] * K
    batch = len(ns)

    p = []
    q = []
    A = []
    for i in range(batch):
        n, m = ns[i], ms[i]
        p0 = np.random.rand(n)
        q0 = np.random.rand(m)
        p.append(p0 / np.sum(p0))
        q.append(q0 / np.sum(q0))
        A0 = rnd.rand(n, m)
        A0 = A0[np.newaxis, :]
        A0 = SKnopp(A0, p[i], q[i], n+m)
        A.append(A0)
    A = np.vstack((C for C in A))

    def _cost(x):
        return 0.5 * (np.linalg.norm(np.array(x) - np.array(A))**2)

    def _egrad(x):
        return x - A

    def _ehess(x, u):
        return u

    manf = DoublyStochastic(n, m, p, q)
    solver = ConjugateGradient(maxiter=3, maxtime=100000)
    prblm = Problem(manifold=manf, cost=lambda x: _cost(x), egrad=lambda x: _egrad(x), ehess=lambda x, u: _ehess(x, u), verbosity=3)

    U = manf.rand()
    Uopt = solver.solve(prblm, x=U)
Пример #19
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description='Generate latent space embeddings')
    parser.add_argument('emb1', help='path to embedding 1')
    parser.add_argument('emb2', help='path to embedding 2')
    parser.add_argument(
        '--geomm_embeddings_path',
        default=None,
        type=str,
        help=
        'directory to save the output GeoMM latent space embeddings. The output embeddings are normalized.'
    )
    parser.add_argument(
        '--encoding',
        default='utf-8',
        help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument('--verbose', default=0, type=int, help='Verbose')
    mapping_group = parser.add_argument_group(
        'mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument('--dictionary',
                               default=sys.stdin.fileno(),
                               help='the dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '--normalize',
        choices=['unit', 'center', 'unitdim', 'centeremb', 'no'],
        nargs=2,
        default=[],
        help=
        'the normalization actions performed in sequence for embeddings 1 and 2'
    )

    geomm_group = parser.add_argument_group('GeoMM arguments',
                                            'Arguments for GeoMM method')
    geomm_group.add_argument('--l2_reg',
                             type=float,
                             default=1e2,
                             help='Lambda for L2 Regularization')
    geomm_group.add_argument(
        '--max_opt_time',
        type=int,
        default=5000,
        help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument(
        '--max_opt_iter',
        type=int,
        default=150,
        help='Maximum number of iterations for optimization')

    args = parser.parse_args()

    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'
    if args.verbose:
        print('Loading embeddings data...')

    # Read input embeddings
    emb1file = open(args.emb1,
                    encoding=args.encoding,
                    errors='surrogateescape')
    emb2file = open(args.emb2,
                    encoding=args.encoding,
                    errors='surrogateescape')
    emb1_words, x = embeddings.read(emb1file, max_voc=0, dtype=dtype)
    emb2_words, z = embeddings.read(emb2file, max_voc=0, dtype=dtype)

    # Build word to index map
    emb1_word2ind = {word: i for i, word in enumerate(emb1_words)}
    emb2_word2ind = {word: i for i, word in enumerate(emb2_words)}

    noov = 0
    emb1_indices = []
    emb2_indices = []
    f = open(args.dictionary, encoding=args.encoding, errors='surrogateescape')
    for line in f:
        emb1, emb2 = line.split()
        try:
            emb1_ind = emb1_word2ind[emb1]
            emb2_ind = emb2_word2ind[emb2]
            emb1_indices.append(emb1_ind)
            emb2_indices.append(emb2_ind)
        except KeyError:
            noov += 1
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(
                    emb1, emb2))  #, file=sys.stderr
    f.close()
    if args.verbose:
        print('Number of embedding pairs having at least one OOV: {}'.format(
            noov))
    emb1_indices = emb1_indices
    emb2_indices = emb2_indices
    if args.verbose:
        print('Normalizing embeddings...')

    # STEP 0: Normalization
    if len(args.normalize) > 0:
        x = normalize_emb(x, args.normalize[0])
        z = normalize_emb(z, args.normalize[1])

    # Step 1: Optimization
    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    x_count = len(set(emb1_indices))
    z_count = len(set(emb2_indices))

    # Filter out uniq values
    map_dict_emb1 = {}
    map_dict_emb2 = {}
    I = 0
    uniq_emb1 = []
    uniq_emb2 = []
    for i in range(len(emb1_indices)):
        if emb1_indices[i] not in map_dict_emb1.keys():
            map_dict_emb1[emb1_indices[i]] = I
            I += 1
            uniq_emb1.append(emb1_indices[i])
    J = 0
    for j in range(len(emb2_indices)):
        if emb2_indices[j] not in map_dict_emb2.keys():
            map_dict_emb2[emb2_indices[j]] = J
            J += 1
            uniq_emb2.append(emb2_indices[j])

    # Creating dictionary matrix
    row = list(range(0, x_count))
    col = list(range(0, x_count))
    data = [1 for i in range(0, x_count)]
    print(f"Counts: {x_count}, {z_count}")
    A = coo_matrix((data, (row, col)), shape=(x_count, z_count))

    np.random.seed(0)
    Lambda = args.l2_reg

    U1 = TT.matrix()
    U2 = TT.matrix()
    B = TT.matrix()

    Xemb1 = x[uniq_emb1]
    Zemb2 = z[uniq_emb2]
    del x, z
    gc.collect()

    Kx, Kz = Xemb1, Zemb2
    XtAZ = Kx.T.dot(A.dot(Kz))
    XtX = Kx.T.dot(Kx)
    ZtZ = Kz.T.dot(Kz)
    AA = np.sum(A * A)

    W = (U1.dot(B)).dot(U2.T)
    regularizer = 0.5 * Lambda * (TT.sum(B**2))
    sXtX = shared(XtX)
    sZtZ = shared(ZtZ)
    sXtAZ = shared(XtAZ)

    cost = regularizer
    wtxtxw = W.T.dot(sXtX.dot(W))
    wtxtxwztz = wtxtxw.dot(sZtZ)
    cost += TT.nlinalg.trace(wtxtxwztz)
    cost += -2 * TT.sum(W * sXtAZ)
    cost += shared(AA)

    solver = ConjugateGradient(maxtime=args.max_opt_time,
                               maxiter=args.max_opt_iter)

    manifold = Product([
        Stiefel(Kx.shape[1], Kx.shape[1]),
        Stiefel(Kz.shape[1], Kz.shape[1]),
        PositiveDefinite(Kx.shape[1])
    ])
    problem = Problem(manifold=manifold,
                      cost=cost,
                      arg=[U1, U2, B],
                      verbosity=3)
    wopt = solver.solve(problem)
    print(f"Problem solved ...")

    w = wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    print(f"Model copied ...")

    gc.collect()

    # Step 2: Transformation
    xw = Kx.dot(U1).dot(scipy.linalg.sqrtm(B))
    zw = Kz.dot(U2).dot(scipy.linalg.sqrtm(B))
    print(f"Transformation done ...")

    end_time = time.time()
    if args.verbose:
        print('Completed training in {0:.2f} seconds'.format(end_time -
                                                             start_time))

    del Kx, Kz, B, U1, U2
    gc.collect()

    ### Save the GeoMM embeddings if requested
    xw_n = embeddings.length_normalize(xw)
    zw_n = embeddings.length_normalize(zw)

    del xw, zw
    gc.collect()

    if args.geomm_embeddings_path is not None:
        os.makedirs(args.geomm_embeddings_path, exist_ok=True)

        out_emb_fname = os.path.join(args.geomm_embeddings_path, 'emb1.vec')
        new_emb1_words = []
        for id in uniq_emb1:
            new_emb1_words.append(emb1_words[id])
        with open(out_emb_fname, 'w', encoding=args.encoding) as outfile:
            embeddings.write(new_emb1_words, xw_n, outfile)

        new_emb2_words = []
        for id in uniq_emb2:
            new_emb2_words.append(emb2_words[id])
        out_emb_fname = os.path.join(args.geomm_embeddings_path, 'emb2.vec')
        with open(out_emb_fname, 'w', encoding=args.encoding) as outfile:
            embeddings.write(new_emb2_words, zw_n, outfile)

    exit(0)
Пример #20
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Map the source embeddings into the target embedding space')
    parser.add_argument('emb_file', help='the input target embeddings')
    parser.add_argument('--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument('--max_vocab', default=0,type=int, help='Maximum vocabulary to be loaded, 0 allows complete vocabulary')
    parser.add_argument('--verbose', default=0,type=int, help='Verbose')
  
    mapping_group = parser.add_argument_group('mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument('-dtrain_file', '--dictionary_train_file', default=sys.stdin.fileno(), help='the training dictionary file (defaults to stdin)')
    mapping_group.add_argument('-dtest_file', '--dictionary_test_file', default=sys.stdin.fileno(), help='the test dictionary file (defaults to stdin)')
    mapping_group.add_argument('--normalize', choices=['unit', 'center', 'unitdim', 'centeremb'], nargs='*', default=[], help='the normalization actions to perform in order')
    
    geomm_group = parser.add_argument_group('GeoMM Multi arguments', 'Arguments for GeoMM Multi method')
    geomm_group.add_argument('--l2_reg', type=float,default=1e3, help='Lambda for L2 Regularization')
    geomm_group.add_argument('--max_opt_time', type=int,default=5000, help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument('--max_opt_iter', type=int,default=150, help='Maximum number of iterations for optimization')
   
    eval_group = parser.add_argument_group('evaluation arguments', 'Arguments for evaluation')
    eval_group.add_argument('--normalize_eval', action='store_true', help='Normalize the embeddings at test time')
    eval_group.add_argument('--eval_batch_size', type=int,default=1000, help='Batch size for evaluation')
    eval_group.add_argument('--csls_neighbourhood', type=int,default=10, help='Neighbourhood size for CSLS')

    args = parser.parse_args()
    BATCH_SIZE = args.eval_batch_size

    # Logging
    method_name = os.path.join('logs','geomm_multi')
    directory = os.path.join(os.path.join(os.getcwd(),method_name), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    if not os.path.exists(directory):
        os.makedirs(directory)
    log_file_name, file_extension = os.path.splitext(os.path.basename(args.dictionary_train_file))
    log_file_name = log_file_name + '.log'
    class Logger(object):
        def __init__(self):
            self.terminal = sys.stdout
            self.log = open(os.path.join(directory,log_file_name), "a")

        def write(self, message):
            self.terminal.write(message)
            self.log.write(message)  

        def flush(self):
            #this flush method is needed for python 3 compatibility.
            #this handles the flush command by doing nothing.
            #you might want to specify some extra behavior here.
            pass    
    sys.stdout = Logger()
    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'

    if args.verbose:
        print('Loading train data...')
    words = []
    emb = []
    with open(args.emb_file, encoding=args.encoding, errors='surrogateescape') as f:
        for line in f:
            srcfile = open(line.strip(), encoding=args.encoding, errors='surrogateescape')
            words_temp, x_temp = embeddings.read(srcfile,max_voc=args.max_vocab, dtype=dtype)
            words.append(words_temp)
            emb.append(x_temp)


    # Build word to index map
    word2ind = []
    for lang in words:
        word2ind.append({word: i for i, word in enumerate(lang)})

    # Build training dictionary
    train_pairs = []
    with open(args.dictionary_train_file, encoding=args.encoding, errors='surrogateescape') as ff:
        for line in ff:
            vals = line.split(',')
            curr_dict=[int(vals[0].strip()),int(vals[1].strip())]
            src_indices = []
            trg_indices = []
            with open(vals[2].strip(), encoding=args.encoding, errors='surrogateescape') as f:
                for line in f:
                    src,trg = line.split()
                    if args.max_vocab:
                        src=src.lower()
                        trg=trg.lower()
                    try:
                        src_ind = word2ind[curr_dict[0]][src]
                        trg_ind = word2ind[curr_dict[1]][trg]
                        src_indices.append(src_ind)
                        trg_indices.append(trg_ind)
                    except KeyError:
                        if args.verbose:
                            print('WARNING: OOV dictionary entry ({0} - {1})'.format(src, trg), file=sys.stderr)
            curr_dict.append(src_indices)
            curr_dict.append(trg_indices)
            train_pairs.append(curr_dict)
    if args.verbose:
        print('Normalizing embeddings...')
    # Step 0: Normalization
    for action in args.normalize:
        if action == 'unit':
            for i in range(len(emb)):
                emb[i] = embeddings.length_normalize(emb[i])
        elif action == 'center':
            for i in range(len(emb)):
                emb[i] = embeddings.mean_center(emb[i])
        elif action == 'unitdim':
            for i in range(len(emb)):
                emb[i] = embeddings.length_normalize_dimensionwise(emb[i])
        elif action == 'centeremb':
            for i in range(len(emb)):
                emb[i] = embeddings.mean_center_embeddingwise(emb[i])


    # Step 1: Optimization
    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    mean_size=0
    for tp in range(len(train_pairs)):
        src_indices = train_pairs[tp][2]
        trg_indices = train_pairs[tp][3]
        x_count = len(set(src_indices))
        z_count = len(set(trg_indices))
        A = np.zeros((x_count,z_count))
        
        # Creating dictionary matrix from training set
        map_dict_src={}
        map_dict_trg={}
        I=0
        uniq_src=[]
        uniq_trg=[]
        for i in range(len(src_indices)):
            if src_indices[i] not in map_dict_src.keys():
                map_dict_src[src_indices[i]]=I
                I+=1
                uniq_src.append(src_indices[i])
        J=0
        for j in range(len(trg_indices)):
            if trg_indices[j] not in map_dict_trg.keys():
                map_dict_trg[trg_indices[j]]=J
                J+=1
                uniq_trg.append(trg_indices[j])

        for i in range(len(src_indices)):
            A[map_dict_src[src_indices[i]],map_dict_trg[trg_indices[i]]]=1
        train_pairs[tp].append(uniq_src)
        train_pairs[tp].append(uniq_trg)
        train_pairs[tp].append(A)
        mean_size+= (len(uniq_src)*len(uniq_trg))
    mean_size = mean_size/len(train_pairs)
    np.random.seed(0)
    Lambda=args.l2_reg

    variables=[]
    manif = []
    low_rank=emb[0].shape[1]
    for i in range(len(emb)):
        variables.append(TT.matrix())
        manif.append(Stiefel(emb[i].shape[1],low_rank))
    variables.append(TT.matrix())
    manif.append(PositiveDefinite(low_rank))
    B = variables[-1]
    cost = 0.5*Lambda*(TT.sum(B**2))
    for i in range(len(train_pairs)):
        x = emb[train_pairs[i][0]]
        z = emb[train_pairs[i][1]]
        U1 = variables[train_pairs[i][0]]
        U2 = variables[train_pairs[i][1]]
        cost = cost + TT.sum(((shared(x[train_pairs[i][4]]).dot(U1.dot(B.dot(U2.T)))).dot(shared(z[train_pairs[i][5]]).T)-shared(train_pairs[i][6]))**2)/float(len(train_pairs[i][2]))
    solver = ConjugateGradient(maxtime=args.max_opt_time,maxiter=args.max_opt_iter,mingradnorm=1e-12)
    manifold =Product(manif)
    problem = Problem(manifold=manifold, cost=cost, arg=variables, verbosity=3)
    wopt = solver.solve(problem)
    w= wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    # Step 2: Transformation
    Bhalf = scipy.linalg.sqrtm(wopt[-1])
    test_emb = []
    for i in range(len(emb)):
        test_emb.append(emb[i].dot(wopt[i]).dot(Bhalf))

    end_time = time.time()
    if args.verbose:
        print('Completed training in {0:.2f} seconds'.format(end_time-start_time))
    gc.collect()


    # Step 3: Evaluation
    if args.verbose:
        print('Beginning Evaluation')

    if args.normalize_eval:
        for i in range(len(test_emb)):
            test_emb[i] = embeddings.length_normalize(test_emb[i])

    # Loading test dictionary
    with open(args.dictionary_test_file, encoding=args.encoding, errors='surrogateescape') as ff:
        for line in ff:
            vals = line.split(',')
            curr_dict=[int(vals[0].strip()),int(vals[1].strip())]
            with open(vals[2].strip(), encoding=args.encoding, errors='surrogateescape') as f:
                src_word2ind = word2ind[curr_dict[0]]
                trg_word2ind = word2ind[curr_dict[1]]
                xw = test_emb[curr_dict[0]]
                zw = test_emb[curr_dict[1]]
                src2trg = collections.defaultdict(set)
                trg2src = collections.defaultdict(set)
                oov = set()
                vocab = set()
                for line in f:
                    src, trg = line.split()
                    if args.max_vocab:
                        src=src.lower()
                        trg=trg.lower()
                    try:
                        src_ind = src_word2ind[src]
                        trg_ind = trg_word2ind[trg]
                        src2trg[src_ind].add(trg_ind)
                        trg2src[trg_ind].add(src_ind)
                        vocab.add(src)
                    except KeyError:
                        oov.add(src)
                src = list(src2trg.keys())
                trgt = list(trg2src.keys())

                oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
                coverage = len(src2trg) / (len(src2trg) + len(oov))
                f.close()

                translation = collections.defaultdict(int)
                translation5 = collections.defaultdict(list)
                translation10 = collections.defaultdict(list)

                t=time.time()
                nbrhood_x=np.zeros(xw.shape[0])
                nbrhood_z=np.zeros(zw.shape[0])
                nbrhood_z2=cp.zeros(zw.shape[0])
                for i in range(0, len(src), BATCH_SIZE):
                    j = min(i + BATCH_SIZE, len(src))
                    similarities = xw[src[i:j]].dot(zw.T)
                    similarities_x = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
                    nbrhood_x[src[i:j]]=np.mean(similarities_x[:,:args.csls_neighbourhood],axis=1)
                batch_num=1
                for i in range(0, zw.shape[0], BATCH_SIZE):
                    j = min(i + BATCH_SIZE, zw.shape[0])
                    similarities = -1*cp.partition(-1*cp.dot(cp.asarray(zw[i:j]),cp.transpose(cp.asarray(xw))),args.csls_neighbourhood-1 ,axis=1)[:,:args.csls_neighbourhood]
                    nbrhood_z2[i:j]=(cp.mean(similarities[:,:args.csls_neighbourhood],axis=1))
                    batch_num+=1
                nbrhood_z=cp.asnumpy(nbrhood_z2)
                for i in range(0, len(src), BATCH_SIZE):
                    j = min(i + BATCH_SIZE, len(src))
                    similarities = xw[src[i:j]].dot(zw.T)
                    similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]])- nbrhood_z
                    nn = similarities.argmax(axis=1).tolist()
                    similarities = np.argsort((similarities),axis=1)

                    nn5 = (similarities[:,-5:])
                    nn10 = (similarities[:,-10:])
                    for k in range(j-i):
                        translation[src[i+k]] = nn[k]
                        translation5[src[i+k]] = nn5[k]
                        translation10[src[i+k]] = nn10[k]
                accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
                mean=0
                for i in src:
                    for k in translation5[i]:
                        if k in src2trg[i]:
                            mean+=1
                            break

                mean/=len(src)
                accuracy5 = mean

                mean=0
                for i in src:
                    for k in translation10[i]:
                        if k in src2trg[i]:
                            mean+=1
                            break

                mean/=len(src)
                accuracy10 = mean
                print('Coverage:{0:7.2%}  Accuracy:{1:7.2%}  Accuracy(Top 5):{2:7.2%}  Accuracy(Top 10):{3:7.2%}'.format(coverage, accuracy, accuracy5, accuracy10))
Пример #21
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Map the source embeddings into the target embedding space')
    parser.add_argument('src_input', help='the input source embeddings')
    parser.add_argument('trg_input', help='the input target embeddings')
    parser.add_argument('--model_path', default=None, type=str, help='directory to save the model')
    parser.add_argument('--geomm_embeddings_path', default=None, type=str, help='directory to save the output GeoMM latent space embeddings. The output embeddings are normalized.')
    parser.add_argument('--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument('--max_vocab', default=0,type=int, help='Maximum vocabulary to be loaded, 0 allows complete vocabulary')
    parser.add_argument('--verbose', default=0,type=int, help='Verbose')
    mapping_group = parser.add_argument_group('mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument('-dtrain', '--dictionary_train', default=sys.stdin.fileno(), help='the training dictionary file (defaults to stdin)')
    mapping_group.add_argument('-dtest', '--dictionary_test', default=sys.stdin.fileno(), help='the test dictionary file (defaults to stdin)')
    mapping_group.add_argument('--normalize', choices=['unit', 'center', 'unitdim', 'centeremb'], nargs='*', default=[], help='the normalization actions to perform in order')

    geomm_group = parser.add_argument_group('GeoMM arguments', 'Arguments for GeoMM method')
    geomm_group.add_argument('--l2_reg', type=float,default=1e2, help='Lambda for L2 Regularization')
    geomm_group.add_argument('--max_opt_time', type=int,default=5000, help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument('--max_opt_iter', type=int,default=150, help='Maximum number of iterations for optimization')

    eval_group = parser.add_argument_group('evaluation arguments', 'Arguments for evaluation')
    eval_group.add_argument('--normalize_eval', action='store_true', help='Normalize the embeddings at test time')
    eval_group.add_argument('--eval_batch_size', type=int,default=1000, help='Batch size for evaluation')
    eval_group.add_argument('--csls_neighbourhood', type=int,default=10, help='Neighbourhood size for CSLS')

    args = parser.parse_args()
    BATCH_SIZE = args.eval_batch_size

    ## Logging
    #method_name = os.path.join('logs','geomm')
    #directory = os.path.join(os.path.join(os.getcwd(),method_name), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    #if not os.path.exists(directory):
    #    os.makedirs(directory)
    #log_file_name, file_extension = os.path.splitext(os.path.basename(args.dictionary_train))
    #log_file_name = log_file_name + '.log'
    #class Logger(object):
    #    def __init__(self):
    #        self.terminal = sys.stdout
    #        self.log = open(os.path.join(directory,log_file_name), "a")

    #    def write(self, message):
    #        self.terminal.write(message)
    #        self.log.write(message)

    #    def flush(self):
    #        #this flush method is needed for python 3 compatibility.
    #        #this handles the flush command by doing nothing.
    #        #you might want to specify some extra behavior here.
    #        pass
    #sys.stdout = Logger()
    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'
    if args.verbose:
        print('Loading train data...')
    # Read input embeddings
    srcfile = open(args.src_input, encoding=args.encoding, errors='surrogateescape')
    trgfile = open(args.trg_input, encoding=args.encoding, errors='surrogateescape')
    src_words, x = embeddings.read(srcfile,max_voc=args.max_vocab, dtype=dtype)
    trg_words, z = embeddings.read(trgfile,max_voc=args.max_vocab, dtype=dtype)

    # Build word to index map
    src_word2ind = {word: i for i, word in enumerate(src_words)}
    trg_word2ind = {word: i for i, word in enumerate(trg_words)}

    # Build training dictionary
    noov=0
    src_indices = []
    trg_indices = []
    f = open(args.dictionary_train, encoding=args.encoding, errors='surrogateescape')
    for line in f:
        src,trg = line.split()
        if args.max_vocab:
            src=src.lower()
            trg=trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src_indices.append(src_ind)
            trg_indices.append(trg_ind)
        except KeyError:
            noov+=1
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(src, trg)) #, file=sys.stderr
    f.close()
    if args.verbose:
        print('Number of training pairs having at least one OOV: {}'.format(noov))
    src_indices = src_indices
    trg_indices = trg_indices
    if args.verbose:
        print('Normalizing embeddings...')
    # STEP 0: Normalization
    for action in args.normalize:
        if action == 'unit':
            x = embeddings.length_normalize(x)
            z = embeddings.length_normalize(z)
        elif action == 'center':
            x = embeddings.mean_center(x)
            z = embeddings.mean_center(z)
        elif action == 'unitdim':
            x = embeddings.length_normalize_dimensionwise(x)
            z = embeddings.length_normalize_dimensionwise(z)
        elif action == 'centeremb':
            x = embeddings.mean_center_embeddingwise(x)
            z = embeddings.mean_center_embeddingwise(z)


    # Step 1: Optimization
    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    x_count = len(set(src_indices))
    z_count = len(set(trg_indices))
    A = np.zeros((x_count,z_count))

    # Creating dictionary matrix from training set
    map_dict_src={}
    map_dict_trg={}
    I=0
    uniq_src=[]
    uniq_trg=[]
    for i in range(len(src_indices)):
        if src_indices[i] not in map_dict_src.keys():
            map_dict_src[src_indices[i]]=I
            I+=1
            uniq_src.append(src_indices[i])
    J=0
    for j in range(len(trg_indices)):
        if trg_indices[j] not in map_dict_trg.keys():
            map_dict_trg[trg_indices[j]]=J
            J+=1
            uniq_trg.append(trg_indices[j])

    for i in range(len(src_indices)):
        A[map_dict_src[src_indices[i]],map_dict_trg[trg_indices[i]]]=1

    np.random.seed(0)
    Lambda=args.l2_reg

    U1 = TT.matrix()
    U2 = TT.matrix()
    B  = TT.matrix()

    Kx, Kz = x[uniq_src], z[uniq_trg]
    XtAZ = Kx.T.dot(A.dot(Kz))
    XtX = Kx.T.dot(Kx)
    ZtZ = Kz.T.dot(Kz)
    # AA = np.sum(A*A) # this can be added if cost needs to be compared to original geomm

    W = (U1.dot(B)).dot(U2.T)
    regularizer = 0.5*Lambda*(TT.sum(B**2))
    sXtX = shared(XtX)
    sZtZ = shared(ZtZ)
    sXtAZ = shared(XtAZ)

    cost = regularizer
    wtxtxw = W.T.dot(sXtX.dot(W))
    wtxtxwztz = wtxtxw.dot(sZtZ)
    cost += TT.nlinalg.trace(wtxtxwztz)
    cost += -2 * TT.sum(W * sXtAZ)
    # cost += shared(AA) # this can be added if cost needs to be compared with original geomm

    solver = ConjugateGradient(maxtime=args.max_opt_time,maxiter=args.max_opt_iter)

    manifold =Product([Stiefel(x.shape[1], x.shape[1]),Stiefel(z.shape[1], x.shape[1]),PositiveDefinite(x.shape[1])])
    #manifold =Product([Stiefel(x.shape[1], 200),Stiefel(z.shape[1], 200),PositiveDefinite(200)])
    problem = Problem(manifold=manifold, cost=cost, arg=[U1,U2,B], verbosity=3)
    wopt = solver.solve(problem)

    w= wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    ### Save the models if requested
    if args.model_path is not None:
        os.makedirs(args.model_path,exist_ok=True)
        np.savetxt('{}/U_src.csv'.format(args.model_path),U1)
        np.savetxt('{}/U_tgt.csv'.format(args.model_path),U2)
        np.savetxt('{}/B.csv'.format(args.model_path),B)

    # Step 2: Transformation
    xw = x.dot(U1).dot(scipy.linalg.sqrtm(B))
    zw = z.dot(U2).dot(scipy.linalg.sqrtm(B))

    end_time = time.time()
    if args.verbose:
        print('Completed training in {0:.2f} seconds'.format(end_time-start_time))
    gc.collect()

    ### Save the GeoMM embeddings if requested
    xw_n = embeddings.length_normalize(xw)
    zw_n = embeddings.length_normalize(zw)
    if args.geomm_embeddings_path is not None:
        os.makedirs(args.geomm_embeddings_path,exist_ok=True)

        out_emb_fname=os.path.join(args.geomm_embeddings_path,'src.vec')
        with open(out_emb_fname,'w',encoding=args.encoding) as outfile:
            embeddings.write(src_words,xw_n,outfile)

        out_emb_fname=os.path.join(args.geomm_embeddings_path,'trg.vec')
        with open(out_emb_fname,'w',encoding=args.encoding) as outfile:
            embeddings.write(trg_words,zw_n,outfile)

    # Step 3: Evaluation
    if args.normalize_eval:
        xw = xw_n
        zw = zw_n

    X = xw[src_indices]
    Z = zw[trg_indices]

    # Loading test dictionary
    f = open(args.dictionary_test, encoding=args.encoding, errors='surrogateescape')
    src2trg = collections.defaultdict(set)
    trg2src = collections.defaultdict(set)
    oov = set()
    vocab = set()
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src=src.lower()
            trg=trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src2trg[src_ind].add(trg_ind)
            trg2src[trg_ind].add(src_ind)
            vocab.add(src)
        except KeyError:
            oov.add(src)
    src = list(src2trg.keys())
    trgt = list(trg2src.keys())

    oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
    coverage = len(src2trg) / (len(src2trg) + len(oov))
    f.close()

    translation = collections.defaultdict(int)
    translation5 = collections.defaultdict(list)
    translation10 = collections.defaultdict(list)

    ### compute nearest neigbours of x in z
    t=time.time()
    nbrhood_x=np.zeros(xw.shape[0])

    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities_x = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
        nbrhood_x[src[i:j]]=np.mean(similarities_x[:,:args.csls_neighbourhood],axis=1)

    ### compute nearest neigbours of z in x (GPU version)
    nbrhood_z=np.zeros(zw.shape[0])
    with cp.cuda.Device(0):
        nbrhood_z2=cp.zeros(zw.shape[0])
        batch_num=1
        for i in range(0, zw.shape[0], BATCH_SIZE):
            j = min(i + BATCH_SIZE, zw.shape[0])
            similarities = -1*cp.partition(-1*cp.dot(cp.asarray(zw[i:j]),cp.transpose(cp.asarray(xw))),args.csls_neighbourhood-1 ,axis=1)[:,:args.csls_neighbourhood]
            nbrhood_z2[i:j]=(cp.mean(similarities[:,:args.csls_neighbourhood],axis=1))
            batch_num+=1
        nbrhood_z=cp.asnumpy(nbrhood_z2)

    #### compute nearest neigbours of z in x (CPU version)
    #nbrhood_z=np.zeros(zw.shape[0])
    #for i in range(0, len(zw.shape[0]), BATCH_SIZE):
    #    j = min(i + BATCH_SIZE, len(zw.shape[0]))
    #    similarities = zw[i:j].dot(xw.T)
    #    similarities_z = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
    #    nbrhood_z[i:j]=np.mean(similarities_z[:,:args.csls_neighbourhood],axis=1)

    #### find translation
    #for i in range(0, len(src), BATCH_SIZE):
    #    j = min(i + BATCH_SIZE, len(src))
    #    similarities = xw[src[i:j]].dot(zw.T)
    #    similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
    #    nn = similarities.argmax(axis=1).tolist()
    #    similarities = np.argsort((similarities),axis=1)

    #    nn5 = (similarities[:,-5:])
    #    nn10 = (similarities[:,-10:])
    #    for k in range(j-i):
    #        translation[src[i+k]] = nn[k]
    #        translation5[src[i+k]] = nn5[k]
    #        translation10[src[i+k]] = nn10[k]


    #if args.geomm_embeddings_path is not None:
    #    delim=','
    #    os.makedirs(args.geomm_embeddings_path,exist_ok=True)

    #    translations_fname=os.path.join(args.geomm_embeddings_path,'translations.csv')
    #    with open(translations_fname,'w',encoding=args.encoding) as translations_file:
    #        for src_id in src:
    #            src_word = src_words[src_id]
    #            all_trg_words = [ trg_words[trg_id] for trg_id in src2trg[src_id] ]
    #            trgout_words = [ trg_words[j] for j in translation10[src_id] ]
    #            ss = list(nn10[src_id,:])
    #
    #            p1 = ':'.join(all_trg_words)
    #            p2 = delim.join( [ '{}{}{}'.format(w,delim,s) for w,s in zip(trgout_words,ss) ] )
    #            translations_file.write( '{s}{delim}{p1}{delim}{p2}\n'.format(s=src_word, delim=delim, p1=p1, p2=p2) )

    ### find translation  (and write to file if output requested)
    delim=','
    translations_file =None
    if args.geomm_embeddings_path is not None:
        os.makedirs(args.geomm_embeddings_path,exist_ok=True)
        translations_fname=os.path.join(args.geomm_embeddings_path,'translations.csv')
        translations_file = open(translations_fname,'w',encoding=args.encoding)

    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
        nn = similarities.argmax(axis=1).tolist()
        similarities = np.argsort((similarities),axis=1)

        nn5 = (similarities[:,-5:])
        nn10 = (similarities[:,-10:])
        for k in range(j-i):
            translation[src[i+k]] = nn[k]
            translation5[src[i+k]] = nn5[k]
            translation10[src[i+k]] = nn10[k]


            if args.geomm_embeddings_path is not None:
                src_id=src[i+k]
                src_word = src_words[src_id]
                all_trg_words = [ trg_words[trg_id] for trg_id in src2trg[src_id] ]
                trgout_words = [ trg_words[j] for j in translation10[src_id] ]
                #ss = list(nn10[src_id,:])

                p1 = ':'.join(all_trg_words)
                p2 = ':'.join(trgout_words)
                #p2 = delim.join( [ '{}{}{}'.format(w,delim,s) for w,s in zip(trgout_words,ss) ] )
                translations_file.write( '{s}{delim}{p1}{delim}{p2}\n'.format(s=src_word, p1=p1, p2=p2, delim=delim) )

    if args.geomm_embeddings_path is not None:
        translations_file.close()

    accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
    mean=0
    for i in src:
        for k in translation5[i]:
            if k in src2trg[i]:
                mean+=1
                break

    mean/=len(src)
    accuracy5 = mean

    mean=0
    for i in src:
        for k in translation10[i]:
            if k in src2trg[i]:
                mean+=1
                break

    mean/=len(src)
    accuracy10 = mean
    message = src_input.split(".")[-2] + "-->" + trg_input.split(".")[-2] + ":"
        'Coverage:{0:7.2%}  Accuracy:{1:7.2%}'.format(coverage, accuracy)
Пример #22
0
class LogisticMF():
    def __init__(self,
                 counts,
                 num_factors,
                 reg_param=0.6,
                 gamma=1.0,
                 iterations=30,
                 minstepsize=1e-9):
        self.counts = counts
        N = 20000
        self.counts = counts[:N, :N]
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors + 2
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded2Factors(self.num_users,
                                                  self.num_items,
                                                  self.num_factors)
        self.solver = ConjugateGradient(maxiter=self.iterations,
                                        minstepsize=self.minstepsize)

    def train_model(self):
        self.L = T.matrix('L')
        self.R = T.matrix('R')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.L, self.R])
        left, right = self.solver.solve(problem)
        self.user_vectors = left[:, :-2]
        self.item_vectors = right[:, :-2]
        self.user_biases = left[:, -1]
        self.item_biases = right[:, -2]
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))
        print("how much user outer? {}".format(
            np.average(np.isclose(left[:, -2], 1))))
        print("how much item outer? {}".format(
            np.average(np.isclose(right[:, -1], 1))))
        print('user delta: {} in norm, {} in max abs'.format(
            la.norm(left[:, -2] - 1), np.max(np.abs(left[:, -2] - 1))))
        print('item delta: {} in norm, {} in max abs'.format(
            la.norm(right[:, -1] - 1), np.max(np.abs(right[:, -1] - 1))))

    def evaluate_lowrank(self, U, V, item, fast=False):
        if hasattr(item, '__len__') and len(item) == 2 and len(item[0]) == len(
                item[1]):
            if fast:
                rows = U[item[0], :]
                cols = V[item[1], :]
                data = (rows * cols).sum(1)
                return data
            else:
                idx_argsort = item[0].argsort()
                item = (item[0][idx_argsort], item[1][idx_argsort])

                vals, idxs, counts = [theano.shared(it) for it in\
                                      np.unique(item[0], return_index=True, return_counts=True)]
                output = T.zeros(int(np.max(counts.get_value())))
                it1 = theano.shared(item[1])

                def process_partial_dot(row_idx, out, U, V, item):
                    partial_dot = T.dot(
                        U[vals[row_idx], :],
                        V[item[idxs[row_idx]:idxs[row_idx] +
                               counts[row_idx]], :].T)
                    return T.set_subtensor(out[:counts[row_idx]], partial_dot)

                parts, updates = theano.scan(fn=process_partial_dot,
                                             outputs_info=output,
                                             sequences=T.arange(vals.size),
                                             non_sequences=[U, V, it1])
                mask = np.ones(
                    (vals.get_value().size, int(np.max(counts.get_value()))))
                for i, count in enumerate(counts.get_value()):
                    mask[i, count:] = 0
                return parts[theano.shared(mask).nonzero()].ravel()
        else:
            raise ValueError('__getitem__ now supports only indices set')

    def log_likelihood(self):
        Users = self.L[:, :-2]
        Items = self.R[:, :-2]
        UserBiases = self.L[:, -1]
        ItemBiases = self.R[:, -2]
        UserOuter = self.L[:, -2]
        ItemOuter = self.R[:, -1]

        ## A = T.dot(Users, Items.T)
        ## A += UserBiases
        ## A += ItemBiases.T
        ## B = A * self.counts
        ## loglik = T.sum(B)

        # A implicitly stored as self.L @ self.R.T
        # loglik = T.sum(A * self.counts) => sum over nonzeros only
        print('nnz size: {}'.format(self.counts.nonzero()[0].size))
        loglik = T.dot(
            self.evaluate_lowrank(self.L,
                                  self.R,
                                  self.counts.nonzero(),
                                  fast=False),
            np.array(self.counts[self.counts.nonzero()]).ravel())

        ## A = T.exp(A)
        ## A += 1
        ## A = T.log(A)
        # There we use Taylor series ln(exp(x) + 1) = ln(2) + x/2 + x^2/8 + O(x^4) at x=0
        # ln(2)
        const_term = (T.ones(
            (self.num_users, 1)) * np.log(2), T.ones((self.num_items, 1)))
        # x/2
        first_order_term = (0.5 * self.L, 0.5 * self.R)
        # x^2/8
        second_order_term = hadamard((self.L, self.R), (self.L, self.R),
                                     self.num_factors)
        second_order_term = tuple(factor / 8.0 for factor in second_order_term)

        grouped_factors = list(
            zip(const_term, first_order_term, second_order_term))
        A = (T.concatenate(grouped_factors[0],
                           axis=1), T.concatenate(grouped_factors[1], axis=1))

        ## A = (self.counts + 1) * A
        ## loglik -= T.sum(A)
        loglik -= sum_lowrank(A)
        loglik -= T.dot(
            self.evaluate_lowrank(A[0],
                                  A[1],
                                  self.counts.nonzero(),
                                  fast=False),
            np.array(self.counts[self.counts.nonzero()]).ravel())

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(Users))
        loglik -= 0.5 * self.reg_param * T.sum(T.square(Items))

        # we need strictly maintain UserOuter and ItemOuter be ones, just to ensure they properly
        # outer products with biases
        loglik -= self.num_users * T.sum(T.square(UserOuter - 1))
        loglik -= self.num_items * T.sum(T.square(ItemOuter - 1))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Пример #23
0
class UsvRiemannianLogisticMF():
    def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0,
                 iterations=30, minstepsize=1e-9):
        self.counts = counts
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded(self.num_users, self.num_items, self.num_factors + 1)
        self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize)

    def train_model(self, x0=None):
        self.U = T.matrix('U')
        self.S = T.matrix('S')
        self.V = T.matrix('V')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.U, self.S, self.V])

        if x0 is None:
            user_vectors = np.random.normal(size=(self.num_users,
                                                       self.num_factors + 1))
            item_vectors = np.random.normal(size=(self.num_items,
                                                       self.num_factors + 1))
            s = rnd.random(self.num_factors + 1)
            s[:-1] = np.sort(s[:-1])[::-1]

            x0 = (user_vectors, np.diag(s), item_vectors.T)
        else:
            x0 = x0
        (left, middle, right), self.loss_history = self.solver.solve(problem, x=x0)
        right = right.T

        s_mid = np.diag(np.sqrt(np.diag(middle)[:-1]))
        self.middle = s_mid


        print('U norm: {}'.format(la.norm(left[:, :-1])))
        print('V norm: {}'.format(la.norm(right[:, :-1])))
        self.user_vectors = left[:, :-1].dot(s_mid)
        self.item_vectors = right[:, :-1].dot(s_mid)
        self.user_biases = left[:, -1] * np.sqrt(middle[-1, -1])
        self.item_biases = right[:, -1] * np.sqrt(middle[-1, -1])
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))
        print('LL: {}'.format(self._log_likelihood()))

    def _log_likelihood(self):
        loglik = 0
        A = np.dot(self.user_vectors, self.item_vectors.T)
        A += self.user_biases
        A += self.item_biases.T
        B = A * self.counts
        loglik += np.sum(B)

        A = np.exp(A)
        A += 1

        A = np.log(A)
        A = (self.counts + 1) * A
        loglik -= np.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * np.sum(np.square(np.diag(self.middle)))
        return loglik

    def log_likelihood(self):
        Users = self.U[:, :-1]
        Middle = self.S
        Items = self.V[:-1, :]
        UserBiases = self.U[:, -1].reshape((-1, 1))
        ItemBiases = self.V[-1, :].reshape((-1, 1))

        A = T.dot(T.dot(self.U[:, :-1], self.S[:-1, :-1]), self.V[:-1, :])
        A = T.inc_subtensor(A[:, :], UserBiases * T.sqrt(self.S[-1, -1]))
        A = T.inc_subtensor(A[:, :], ItemBiases.T * T.sqrt(self.S[-1, -1]))
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(T.diag(self.S)[:-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Пример #24
0
      end='\n\n')

#Take a step further and solve the problem using a pymanopt to solve the optimization as a constrained problem
from pymanopt import Problem
from pymanopt.solvers import ConjugateGradient
from pymanopt.manifolds import Stiefel


# define objective function
def pca_objective(U):
    return -np.trace(np.dot(U.T, np.dot(cov, U)))


# set up Pymanopt problem and solve.
solver = ConjugateGradient(maxiter=1000)
manifold = Stiefel(D, components)
problem = Problem(manifold=manifold, cost=pca_objective, verbosity=0)
Uopt = solver.solve(problem)

print('Solution found using Pymanopt = \n', Uopt, end='\n\n')

# Find the matrix T such that UA = Uopt
T = np.linalg.lstsq(U, Uopt, rcond=None)[0]

# Assuming this is a rotation matrix, compute the angle and confirm this is indeed a rotation
angle = np.arccos(T[0, 0])
rotation = np.array([[np.cos(angle), np.sin(angle)],
                     [-np.sin(angle), np.cos(angle)]])
print('T matrix found as: \n', T)
print('rotation matrix associated with an angle of ', np.round(angle, 4),
      ' rad : \n', rotation)
Пример #25
0
class UsvRiemannianLogisticMF():
    def __init__(self,
                 counts,
                 num_factors,
                 reg_param=0.6,
                 gamma=1.0,
                 iterations=30,
                 minstepsize=1e-9):
        self.counts = counts
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded(self.num_users, self.num_items,
                                          self.num_factors + 1)
        self.solver = ConjugateGradient(maxiter=self.iterations,
                                        minstepsize=self.minstepsize)

    def train_model(self, x0=None):
        self.U = T.matrix('U')
        self.S = T.matrix('S')
        self.V = T.matrix('V')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.U, self.S, self.V])

        if x0 is None:
            user_vectors = np.random.normal(size=(self.num_users,
                                                  self.num_factors + 1))
            item_vectors = np.random.normal(size=(self.num_items,
                                                  self.num_factors + 1))
            s = rnd.random(self.num_factors + 1)
            s[:-1] = np.sort(s[:-1])[::-1]

            x0 = (user_vectors, np.diag(s), item_vectors.T)
        else:
            x0 = x0
        (left, middle, right), self.loss_history = self.solver.solve(problem,
                                                                     x=x0)
        right = right.T

        s_mid = np.diag(np.sqrt(np.diag(middle)[:-1]))
        self.middle = s_mid

        print('U norm: {}'.format(la.norm(left[:, :-1])))
        print('V norm: {}'.format(la.norm(right[:, :-1])))
        self.user_vectors = left[:, :-1].dot(s_mid)
        self.item_vectors = right[:, :-1].dot(s_mid)
        self.user_biases = left[:, -1] * np.sqrt(middle[-1, -1])
        self.item_biases = right[:, -1] * np.sqrt(middle[-1, -1])
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))
        print('LL: {}'.format(self._log_likelihood()))

    def _log_likelihood(self):
        loglik = 0
        A = np.dot(self.user_vectors, self.item_vectors.T)
        A += self.user_biases
        A += self.item_biases.T
        B = A * self.counts
        loglik += np.sum(B)

        A = np.exp(A)
        A += 1

        A = np.log(A)
        A = (self.counts + 1) * A
        loglik -= np.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * np.sum(np.square(np.diag(
            self.middle)))
        return loglik

    def log_likelihood(self):
        Users = self.U[:, :-1]
        Middle = self.S
        Items = self.V[:-1, :]
        UserBiases = self.U[:, -1].reshape((-1, 1))
        ItemBiases = self.V[-1, :].reshape((-1, 1))

        A = T.dot(T.dot(self.U[:, :-1], self.S[:-1, :-1]), self.V[:-1, :])
        A = T.inc_subtensor(A[:, :], UserBiases * T.sqrt(self.S[-1, -1]))
        A = T.inc_subtensor(A[:, :], ItemBiases.T * T.sqrt(self.S[-1, -1]))
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(T.diag(self.S)[:-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Пример #26
0
class WildLogisticMF():
    def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0,
                 iterations=30, minstepsize=1e-10):
        self.counts = counts
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded2Factors(self.num_users, self.num_items, self.num_factors + 1)
        self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize)

    def train_model(self, x0=None):
        self.L = T.matrix('L')
        self.R = T.matrix('R')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.L, self.R])

        if x0 is None:
            user_vectors = np.random.normal(size=(self.num_users,
                                                       self.num_factors))
            item_vectors = np.random.normal(size=(self.num_items,
                                                       self.num_factors))
            user_biases = np.random.normal(size=(self.num_users, 1)) / SCONST
            item_biases = np.random.normal(size=(self.num_items, 1)) / SCONST
            x0 = (np.hstack((user_vectors, user_biases)),
                  np.hstack((item_vectors, item_biases)))
        else:
            x0 = x0
        (left, right), self.loss_history = self.solver.solve(problem, x=x0)

        self.user_vectors = left[:, :-1]
        self.item_vectors = right[:, :-1]
        self.user_biases = left[:, -1].reshape((self.num_users, 1))
        self.item_biases = right[:, -1].reshape((self.num_items, 1))
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))

    def log_likelihood(self):
        Users = self.L[:, :-1]
        Items = self.R[:, :-1]
        UserBiases = self.L[:, -1].reshape((-1, 1))
        ItemBiases = self.R[:, -1].reshape((-1, 1))

        A = T.dot(self.L[:, :-1], (self.R[:, :-1]).T)
        A = T.inc_subtensor(A[:, :], UserBiases)
        A = T.inc_subtensor(A[:, :], ItemBiases.T)
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(self.L[:, :-1]))
        loglik -= 0.5 * self.reg_param * T.sum(T.square(self.R[:, :-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Пример #27
0
class LogisticMF():
    def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0,
                 iterations=30, minstepsize=1e-9):
        self.counts = counts
        N = 20000
        self.counts = counts[:N, :N]
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors + 2
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded2Factors(self.num_users, self.num_items, self.num_factors)
        self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize)

    def train_model(self):
        self.L = T.matrix('L')
        self.R = T.matrix('R')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.L, self.R])
        left, right = self.solver.solve(problem)
        self.user_vectors = left[:, :-2]
        self.item_vectors = right[:, :-2]
        self.user_biases = left[:, -1]
        self.item_biases = right[:, -2]
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))
        print("how much user outer? {}".format(np.average(np.isclose(left[:, -2], 1))))
        print("how much item outer? {}".format(np.average(np.isclose(right[:, -1], 1))))
        print('user delta: {} in norm, {} in max abs'.format(la.norm(left[:, -2] - 1), np.max(np.abs(left[:, -2] - 1))))
        print('item delta: {} in norm, {} in max abs'.format(la.norm(right[:, -1] - 1), np.max(np.abs(right[:, -1] - 1))))

    def evaluate_lowrank(self, U, V, item, fast=False):
        if hasattr(item, '__len__') and len(item) == 2 and len(item[0]) == len(item[1]):
            if fast:
                rows = U[item[0], :]
                cols = V[item[1], :]
                data = (rows * cols).sum(1)
                return data
            else:
                idx_argsort = item[0].argsort()
                item = (item[0][idx_argsort], item[1][idx_argsort])

                vals, idxs, counts = [theano.shared(it) for it in\
                                      np.unique(item[0], return_index=True, return_counts=True)]
                output = T.zeros(int(np.max(counts.get_value())))
                it1 = theano.shared(item[1])

                def process_partial_dot(row_idx, out, U, V, item):
                    partial_dot = T.dot(U[vals[row_idx], :], V[item[idxs[row_idx]: idxs[row_idx] + counts[row_idx]], :].T)
                    return T.set_subtensor(out[:counts[row_idx]], partial_dot)
                parts, updates = theano.scan(fn=process_partial_dot,
                                             outputs_info=output,
                                             sequences=T.arange(vals.size),
                                             non_sequences=[U, V, it1])
                mask = np.ones((vals.get_value().size, int(np.max(counts.get_value()))))
                for i, count in enumerate(counts.get_value()):
                    mask[i, count:] = 0
                return parts[theano.shared(mask).nonzero()].ravel()
        else:
            raise ValueError('__getitem__ now supports only indices set')

    def log_likelihood(self):
        Users = self.L[:, :-2]
        Items = self.R[:, :-2]
        UserBiases = self.L[:, -1]
        ItemBiases = self.R[:, -2]
        UserOuter = self.L[:, -2]
        ItemOuter = self.R[:, -1]

        ## A = T.dot(Users, Items.T)
        ## A += UserBiases
        ## A += ItemBiases.T
        ## B = A * self.counts
        ## loglik = T.sum(B)

        # A implicitly stored as self.L @ self.R.T
        # loglik = T.sum(A * self.counts) => sum over nonzeros only
        print('nnz size: {}'.format(self.counts.nonzero()[0].size))
        loglik = T.dot(self.evaluate_lowrank(self.L, self.R, self.counts.nonzero(), fast=False),
                  np.array(self.counts[self.counts.nonzero()]).ravel())

        ## A = T.exp(A)
        ## A += 1
        ## A = T.log(A)
        # There we use Taylor series ln(exp(x) + 1) = ln(2) + x/2 + x^2/8 + O(x^4) at x=0
        # ln(2)
        const_term = (T.ones((self.num_users, 1)) * np.log(2), T.ones((self.num_items, 1)))
        # x/2
        first_order_term = (0.5 * self.L, 0.5 * self.R)
        # x^2/8
        second_order_term = hadamard((self.L, self.R), (self.L, self.R), self.num_factors)
        second_order_term = tuple(factor / 8.0 for factor in second_order_term)

        grouped_factors = list(zip(const_term, first_order_term, second_order_term))
        A = (T.concatenate(grouped_factors[0], axis=1), T.concatenate(grouped_factors[1], axis=1))

        ## A = (self.counts + 1) * A
        ## loglik -= T.sum(A)
        loglik -= sum_lowrank(A)
        loglik -= T.dot(self.evaluate_lowrank(A[0], A[1], self.counts.nonzero(), fast=False),
                  np.array(self.counts[self.counts.nonzero()]).ravel())


        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(Users))
        loglik -= 0.5 * self.reg_param * T.sum(T.square(Items))

        # we need strictly maintain UserOuter and ItemOuter be ones, just to ensure they properly
        # outer products with biases
        loglik -= self.num_users * T.sum(T.square(UserOuter - 1))
        loglik -= self.num_items * T.sum(T.square(ItemOuter - 1))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Пример #28
0
class WildLogisticMF():
    def __init__(self,
                 counts,
                 num_factors,
                 reg_param=0.6,
                 gamma=1.0,
                 iterations=30,
                 minstepsize=1e-10):
        self.counts = counts
        self.num_users = self.counts.shape[0]
        self.num_items = self.counts.shape[1]
        self.num_factors = num_factors
        self.iterations = iterations
        self.minstepsize = minstepsize
        self.reg_param = reg_param
        self.gamma = gamma
        self._bootstrap_problem()

    def _bootstrap_problem(self):
        self.manifold = FixedRankEmbeeded2Factors(self.num_users,
                                                  self.num_items,
                                                  self.num_factors + 1)
        self.solver = ConjugateGradient(maxiter=self.iterations,
                                        minstepsize=self.minstepsize)

    def train_model(self, x0=None):
        self.L = T.matrix('L')
        self.R = T.matrix('R')
        problem = Problem(man=self.manifold,
                          theano_cost=self.log_likelihood(),
                          theano_arg=[self.L, self.R])

        if x0 is None:
            user_vectors = np.random.normal(size=(self.num_users,
                                                  self.num_factors))
            item_vectors = np.random.normal(size=(self.num_items,
                                                  self.num_factors))
            user_biases = np.random.normal(size=(self.num_users, 1)) / SCONST
            item_biases = np.random.normal(size=(self.num_items, 1)) / SCONST
            x0 = (np.hstack((user_vectors, user_biases)),
                  np.hstack((item_vectors, item_biases)))
        else:
            x0 = x0
        (left, right), self.loss_history = self.solver.solve(problem, x=x0)

        self.user_vectors = left[:, :-1]
        self.item_vectors = right[:, :-1]
        self.user_biases = left[:, -1].reshape((self.num_users, 1))
        self.item_biases = right[:, -1].reshape((self.num_items, 1))
        print('U norm: {}'.format(la.norm(self.user_vectors)))
        print('V norm: {}'.format(la.norm(self.item_vectors)))

    def log_likelihood(self):
        Users = self.L[:, :-1]
        Items = self.R[:, :-1]
        UserBiases = self.L[:, -1].reshape((-1, 1))
        ItemBiases = self.R[:, -1].reshape((-1, 1))

        A = T.dot(self.L[:, :-1], (self.R[:, :-1]).T)
        A = T.inc_subtensor(A[:, :], UserBiases)
        A = T.inc_subtensor(A[:, :], ItemBiases.T)
        B = A * self.counts
        loglik = T.sum(B)

        A = T.exp(A)
        A += 1
        A = T.log(A)

        A = (self.counts + 1) * A
        loglik -= T.sum(A)

        # L2 regularization
        loglik -= 0.5 * self.reg_param * T.sum(T.square(self.L[:, :-1]))
        loglik -= 0.5 * self.reg_param * T.sum(T.square(self.R[:, :-1]))

        # Return negation of LogLikelihood cause we will minimize cost
        return -loglik

    def print_vectors(self):
        user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_users):
            vec = ' '.join(map(str, self.user_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            user_vecs_file.write(line)
        user_vecs_file.close()
        item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w')
        for i in range(self.num_items):
            vec = ' '.join(map(str, self.item_vectors[i]))
            line = '%i\t%s\n' % (i, vec)
            item_vecs_file.write(line)
        item_vecs_file.close()
Пример #29
0
    def _update_w(self, data_align, data_sup, labels, w, s, theta, bias):
        """

        Parameters
        ----------
        data_align : list of 2D arrays, element i has shape=[voxels_i, n_align]
            Each element in the list contains the fMRI data for alignment of
            one subject. There are n_align samples for each subject.

        data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        s : array, shape=[features, samples]
            The shared response.

        theta : array, shape=[classes, features]
            The MLR class plane parameters.

        bias : array, shape=[classes]
            The MLR class biases.

        Returns
        -------

        w : list of 2D array, element i has shape=[voxels_i, features]
            The updated orthogonal transforms (mappings).
        """
        subjects = len(data_align)

        s_th = S.shared(s.astype(theano.config.floatX))
        theta_th = S.shared(theta.T.astype(theano.config.floatX))
        bias_th = S.shared(bias.T.astype(theano.config.floatX),
                           broadcastable=(True, False))

        for subject in range(subjects):
            logger.info('Subject Wi %d' % subject)
            # Solve for subject i
            # Create the theano function
            w_th = T.matrix(name='W', dtype=theano.config.floatX)
            data_srm_subject = \
                S.shared(data_align[subject].astype(theano.config.floatX))
            constf1 = \
                S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1],
                         allow_downcast=True)
            f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2)

            if data_sup[subject] is not None:
                lr_samples_S = S.shared(data_sup[subject].shape[1])
                data_sup_subject = \
                    S.shared(data_sup[subject].astype(theano.config.floatX))
                labels_S = S.shared(labels[subject])
                constf2 = S.shared(-self.alpha / self.gamma
                                   / data_sup[subject].shape[1],
                                   allow_downcast=True)

                log_p_y_given_x = T.log(T.nnet.softmax((theta_th.dot(
                    w_th.T.dot(data_sup_subject))).T + bias_th))
                f2 = constf2 * T.sum(
                    log_p_y_given_x[T.arange(lr_samples_S), labels_S])
                f = f1 + f2
            else:
                f = f1

            # Define the problem and solve
            f_subject = self._objective_function_subject(data_align[subject],
                                                         data_sup[subject],
                                                         labels[subject],
                                                         w[subject],
                                                         s, theta, bias)
            minstep = np.min((10**-np.floor(np.log10(f_subject))), 1e-1)
            manifold = Stiefel(w[subject].shape[0], w[subject].shape[1])
            problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0)
            solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep)
            w[subject] = np.array(solver.solve(
                problem, x=w[subject].astype(theano.config.floatX)))
            if data_sup[subject] is not None:
                del f2
                del log_p_y_given_x
                del data_sup_subject
                del labels_S
            del solver
            del problem
            del manifold
            del f
            del f1
            del data_srm_subject
            del w_th
        del theta_th
        del bias_th
        del s_th

        # Run garbage collector to avoid filling up the memory
        gc.collect()
        return w
Пример #30
0
    def _update_w(self, data_align, data_sup, labels, w, s, theta, bias):
        """

        Parameters
        ----------
        data_align : list of 2D arrays, element i has shape=[voxels_i, n_align]
            Each element in the list contains the fMRI data for alignment of
            one subject. There are n_align samples for each subject.

        data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        s : array, shape=[features, samples]
            The shared response.

        theta : array, shape=[classes, features]
            The MLR class plane parameters.

        bias : array, shape=[classes]
            The MLR class biases.

        Returns
        -------

        w : list of 2D array, element i has shape=[voxels_i, features]
            The updated orthogonal transforms (mappings).
        """
        subjects = len(data_align)

        s_th = S.shared(s.astype(theano.config.floatX))
        theta_th = S.shared(theta.T.astype(theano.config.floatX))
        bias_th = S.shared(bias.T.astype(theano.config.floatX),
                           broadcastable=(True, False))

        for subject in range(subjects):
            logger.info('Subject Wi %d' % subject)
            # Solve for subject i
            # Create the theano function
            w_th = T.matrix(name='W', dtype=theano.config.floatX)
            data_srm_subject = \
                S.shared(data_align[subject].astype(theano.config.floatX))
            constf1 = \
                S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1],
                         allow_downcast=True)
            f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2)

            if data_sup[subject] is not None:
                lr_samples_S = S.shared(data_sup[subject].shape[1])
                data_sup_subject = \
                    S.shared(data_sup[subject].astype(theano.config.floatX))
                labels_S = S.shared(labels[subject])
                constf2 = S.shared(-self.alpha / self.gamma /
                                   data_sup[subject].shape[1],
                                   allow_downcast=True)

                log_p_y_given_x = T.log(
                    T.nnet.softmax(
                        (theta_th.dot(w_th.T.dot(data_sup_subject))).T +
                        bias_th))
                f2 = constf2 * T.sum(log_p_y_given_x[T.arange(lr_samples_S),
                                                     labels_S])
                f = f1 + f2
            else:
                f = f1

            # Define the problem and solve
            f_subject = self._objective_function_subject(
                data_align[subject], data_sup[subject], labels[subject],
                w[subject], s, theta, bias)
            minstep = np.amin(((10**-np.floor(np.log10(f_subject))), 1e-1))
            manifold = Stiefel(w[subject].shape[0], w[subject].shape[1])
            problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0)
            solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep)
            w[subject] = np.array(
                solver.solve(problem,
                             x=w[subject].astype(theano.config.floatX)))
            if data_sup[subject] is not None:
                del f2
                del log_p_y_given_x
                del data_sup_subject
                del labels_S
            del solver
            del problem
            del manifold
            del f
            del f1
            del data_srm_subject
            del w_th
        del theta_th
        del bias_th
        del s_th

        # Run garbage collector to avoid filling up the memory
        gc.collect()
        return w
Пример #31
0
# (a) Instantiation of a manifold
# points on the manifold are parameterized via their singular value
# decomposition (u, s, vt) where
# u is a 5 x 2 matrix with orthonormal columns,
# s is a vector of length 2,
# vt is a 2 x 4 matrix with orthonormal rows,
# so that u*diag(s)*vt is a 5 x 4 matrix of rank 2.
manifold = FixedRankEmbedded(A.shape[0], A.shape[1], k)


# (b) Definition of a cost function (here using autograd.numpy)
#       Note that the cost must be defined in terms of u, s and vt, where
#       X = u * diag(s) * vt.
def cost(usv):
    delta = .5
    u = usv[0]
    s = usv[1]
    vt = usv[2]
    X = np.dot(np.dot(u, np.diag(s)), vt)
    return np.sum(np.sqrt((X - A)**2 + delta**2) - delta)


# define the Pymanopt problem
problem = Problem(manifold=manifold, cost=cost)
# (c) Instantiation of a Pymanopt solver
solver = ConjugateGradient()

# let Pymanopt do the rest
X = solver.solve(problem)
Пример #32
0
    def fit(self, X_train, X_test, f_train, f_test, M0=None, tol=1e-5):
        """
        Fits a gaussian ridge function.
        This code is based upon the grf_fit() function from https://github.com/psesh/Gaussian-Ridges.
        See https://doi.org/10.1137/18M1168571.
        """

        # Standardise targets
        f_train, min_f, max_f = standardise_minmax(f_train.reshape(-1, 1))
        f_train = np.squeeze(f_train)
        f_test, *_ = standardise_minmax(f_test.reshape(-1, 1),
                                        min_value=min_f,
                                        max_value=max_f)
        f_test = np.squeeze(f_test)
        self.scaling = [min_f, max_f]

        attempts = 0
        success = False
        while attempts < self.nattempts and not success:
            if self.verbose > 0: print('Attempt %d' % attempts)

            # Initial guess
            if M0 is None:
                d = np.shape(X_train)[1]
                M0 = np.random.randn(d, self.subdim)
                Q = np.linalg.qr(M0)[0]
                M0 = Q.copy()
            last_r = 1.0
            err = 1.0
            M_guess = M0.copy()
            d, m = M0.shape

            # Fit gaussian ridge function
            out_iter = 0
            in_iter = []
            fx = []
            n_iter = []
            gradnorm = []
            final_gradnorm = []
            while err > tol:
                if self.verbose > 0:
                    print('Outer iteration %d' % (out_iter + 1))
                U_train = X_train @ M_guess
                ker = 1.0 * RBF(
                    length_scale=[1 for _ in range(m)]) + WhiteKernel(
                        noise_level=1.0)
                gpr = GaussianProcessRegressor(kernel=ker,
                                               n_restarts_optimizer=10,
                                               alpha=.1)
                gpr.fit(U_train, f_train)

                kernel = gpr.kernel_

                my_cost = lambda M: cost(M, X_train, X_test, f_train, f_test,
                                         kernel)
                my_dcost = lambda M: dcost(M, X_train, X_test, f_train, f_test,
                                           kernel)

                manifold = Stiefel(d, m)
                problem = Problem(manifold=manifold,
                                  cost=my_cost,
                                  grad=my_dcost,
                                  verbosity=self.verbose)
                solver = ConjugateGradient(logverbosity=2,
                                           maxtime=self.maxtime,
                                           maxiter=self.maxiter,
                                           mingradnorm=self.mingradnorm,
                                           minstepsize=self.minstepsize,
                                           maxcostevals=self.maxcostevals)
                solver_results = solver.solve(problem, x=M_guess)
                M_new = solver_results[0]
                M_guess = M_new.copy()

                r = cost(M_guess, X_train, X_test, f_train, f_test, kernel)

                err = np.abs(last_r - r) / r
                last_r = r

                # Store convergence info
                log = solver_results[1]
                n_iter.append(log['final_values']['iterations'])
                final_gradnorm.append(log['final_values']['gradnorm'])
                in_iter.append(log['iterations']['iteration'])
                fx.append(log['iterations']['f(x)'])
                gradnorm.append(log['iterations']['gradnorm'])
                out_iter += 1

                # If break conditions is mingradnorm one (on first iter), then redo from M_guess
                reason = log['stoppingreason']
                if 'min grad norm' in reason:
                    if self.verbose > 0:
                        print(reason + ', restarting with new M0')
                    success = False
                    M0 = None
                    break
                else:
                    success = True
            attempts += 1

        self.log = {
            'n_out_iter': out_iter,
            'in_iter': in_iter,
            'fx': fx,
            'grad_norm': gradnorm,
            'n_in_iter': n_iter,
            'final_grad_norm': final_gradnorm,
            'err': err,
            'tol': tol,
            'attempts': attempts,
            'stoppingreason': reason
        }

        # Store M, GP model
        self.M = M_guess
        self.model = gpr
Пример #33
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description='Map the source embeddings into the target embedding space'
    )
    parser.add_argument('src_input', help='the input source embeddings')
    parser.add_argument('trg_input', help='the input target embeddings')
    parser.add_argument(
        '--encoding',
        default='utf-8',
        help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument(
        '--max_vocab',
        default=0,
        type=int,
        help='Maximum vocabulary to be loaded, 0 allows complete vocabulary')
    parser.add_argument('--verbose', default=0, type=int, help='Verbose')
    mapping_group = parser.add_argument_group(
        'mapping arguments', 'Basic embedding mapping arguments')
    mapping_group.add_argument(
        '-dtrain',
        '--dictionary_train',
        default=sys.stdin.fileno(),
        help='the training dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '-dtest',
        '--dictionary_test',
        default=sys.stdin.fileno(),
        help='the test dictionary file (defaults to stdin)')
    mapping_group.add_argument(
        '-dtrainspl',
        '--dictionary_trainspl',
        default=sys.stdin.fileno(),
        help='the training dictionary split file (defaults to stdin)')
    mapping_group.add_argument(
        '-dvalspl',
        '--dictionary_valspl',
        default=sys.stdin.fileno(),
        help='the validation dictionary split file (defaults to stdin)')
    mapping_group.add_argument(
        '--normalize',
        choices=['unit', 'center', 'unitdim', 'centeremb'],
        nargs='*',
        default=[],
        help='the normalization actions to perform in order')

    geomm_group = parser.add_argument_group('GeoMM arguments',
                                            'Arguments for GeoMM method')
    geomm_group.add_argument('--l2_reg',
                             type=float,
                             default=1e-1,
                             help='Lambda for L2 Regularization')
    geomm_group.add_argument(
        '--max_opt_time',
        type=int,
        default=5000,
        help='Maximum time limit for optimization in seconds')
    geomm_group.add_argument(
        '--max_opt_iter',
        type=int,
        default=150,
        help='Maximum number of iterations for optimization')
    geomm_group.add_argument(
        '--x_cutoff',
        type=int,
        default=25000,
        help='Vocabulary cutoff for first language for bootstrapping')
    geomm_group.add_argument(
        '--z_cutoff',
        type=int,
        default=25000,
        help='Vocabulary cutoff for second language for bootstrapping')
    geomm_group.add_argument(
        '--patience',
        type=int,
        default=1,
        help=
        'Number of iterations with a decrease in validation accuracy permissible during bootstrapping'
    )

    eval_group = parser.add_argument_group('evaluation arguments',
                                           'Arguments for evaluation')
    eval_group.add_argument('--normalize_eval',
                            action='store_true',
                            help='Normalize the embeddings at test time')
    eval_group.add_argument('--eval_batch_size',
                            type=int,
                            default=500,
                            help='Batch size for evaluation')
    eval_group.add_argument('--csls_neighbourhood',
                            type=int,
                            default=10,
                            help='Neighbourhood size for CSLS')

    args = parser.parse_args()
    BATCH_SIZE = args.eval_batch_size

    # Logging
    method_name = os.path.join('logs', 'geomm_semi')
    directory = os.path.join(
        os.path.join(os.getcwd(), method_name),
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    if not os.path.exists(directory):
        os.makedirs(directory)
    log_file_name, file_extension = os.path.splitext(
        os.path.basename(args.dictionary_train))
    log_file_name = log_file_name + '.log'

    class Logger(object):
        def __init__(self):
            self.terminal = sys.stdout
            self.log = open(os.path.join(directory, log_file_name), "a")

        def write(self, message):
            self.terminal.write(message)
            self.log.write(message)

        def flush(self):
            #this flush method is needed for python 3 compatibility.
            #this handles the flush command by doing nothing.
            #you might want to specify some extra behavior here.
            pass

    sys.stdout = Logger()
    if args.verbose:
        print('Current arguments: {0}'.format(args))

    dtype = 'float32'

    if args.verbose:
        print('Loading train data...')
    # Read input embeddings
    srcfile = open(args.src_input,
                   encoding=args.encoding,
                   errors='surrogateescape')
    trgfile = open(args.trg_input,
                   encoding=args.encoding,
                   errors='surrogateescape')
    src_words, x = embeddings.read(srcfile,
                                   max_voc=args.max_vocab,
                                   dtype=dtype)
    trg_words, z = embeddings.read(trgfile,
                                   max_voc=args.max_vocab,
                                   dtype=dtype)

    # Build word to index map
    src_word2ind = {word: i for i, word in enumerate(src_words)}
    trg_word2ind = {word: i for i, word in enumerate(trg_words)}

    # Build training dictionary
    src_indices = []
    trg_indices = []
    f = open(args.dictionary_train,
             encoding=args.encoding,
             errors='surrogateescape')
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src_indices.append(src_ind)
            trg_indices.append(trg_ind)
        except KeyError:
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(
                    src, trg),
                      file=sys.stderr)
    f.close()
    src_indices = src_indices
    trg_indices = trg_indices
    src_indices_train = list(src_indices)
    trg_indices_train = list(trg_indices)
    src_indices = []
    trg_indices = []

    # Loading train-split dictionary
    f = open(args.dictionary_trainspl,
             encoding=args.encoding,
             errors='surrogateescape')
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src_indices.append(src_ind)
            trg_indices.append(trg_ind)
        except KeyError:
            if args.verbose:
                print('WARNING: OOV dictionary entry ({0} - {1})'.format(
                    src, trg),
                      file=sys.stderr)
    f.close()

    if args.verbose:
        print('Normalizing embeddings...')
    # STEP 0: Normalization
    for action in args.normalize:
        if action == 'unit':
            x = embeddings.length_normalize(x)
            z = embeddings.length_normalize(z)
        elif action == 'center':
            x = embeddings.mean_center(x)
            z = embeddings.mean_center(z)
        elif action == 'unitdim':
            x = embeddings.length_normalize_dimensionwise(x)
            z = embeddings.length_normalize_dimensionwise(z)
        elif action == 'centeremb':
            x = embeddings.mean_center_embeddingwise(x)
            z = embeddings.mean_center_embeddingwise(z)
    orig_src = src_indices
    orig_trg = trg_indices
    best_val_acc = 0
    best_add_src = []
    best_add_trg = []
    add_src = []
    add_trg = []

    if args.verbose:
        print('Beginning Optimization')
    start_time = time.time()
    it_count = 0
    drop_count = 0

    # Bootstrap loop
    while True:
        if args.verbose:
            print('Starting bootstrap iteration {0}'.format(it_count + 1))
        # Step 1.1: Optimization
        x_count = len(set(src_indices))
        z_count = len(set(trg_indices))

        # Creating dictionary matrix from training set
        map_dict_src = {}
        map_dict_trg = {}
        I = 0
        uniq_src = []
        uniq_trg = []
        for i in range(len(src_indices)):
            if src_indices[i] not in map_dict_src.keys():
                map_dict_src[src_indices[i]] = I
                I += 1
                uniq_src.append(src_indices[i])
        J = 0
        for j in range(len(trg_indices)):
            if trg_indices[j] not in map_dict_trg.keys():
                map_dict_trg[trg_indices[j]] = J
                J += 1
                uniq_trg.append(trg_indices[j])

        np.random.seed(0)
        Lambda = args.l2_reg
        U1 = TT.matrix()
        U2 = TT.matrix()
        B = TT.matrix()
        X_tot = x[uniq_src].T.dot(x[uniq_src])
        Z_tot = z[uniq_trg].T.dot(z[uniq_trg])
        W = U1.dot(B.dot(U2.T))
        cost = (TT.nlinalg.trace(
            U2.dot(
                B.dot(
                    U1.T.dot(
                        shared(X_tot).dot(
                            U1.dot(B.dot(U2.T.dot(shared(Z_tot))))))))) -
                2 * TT.sum(
                    (shared(x[src_indices]).dot(W)) * shared(z[trg_indices]))
                ) / (len(src_indices)) + 0.5 * Lambda * (TT.sum(B**2))
        solver = ConjugateGradient(maxtime=args.max_opt_time,
                                   maxiter=args.max_opt_iter,
                                   mingradnorm=1e-15)

        low_rank = 300
        manifold = Product([
            Stiefel(x.shape[1], low_rank),
            Stiefel(z.shape[1], low_rank),
            PositiveDefinite(low_rank)
        ])
        problem = Problem(manifold=manifold,
                          cost=cost,
                          arg=[U1, U2, B],
                          verbosity=3)
        wopt = solver.solve(problem)
        w = wopt
        U1 = w[0]
        U2 = w[1]
        B = w[2]

        # Step 1.2: Transformation
        xw = x.dot(U1).dot(scipy.linalg.sqrtm(B))
        zw = z.dot(U2).dot(scipy.linalg.sqrtm(B))

        it_count += 1

        # Step 1.3: Compute Validation Accuracy
        if args.normalize_eval:
            xw = embeddings.length_normalize(xw)
            zw = embeddings.length_normalize(zw)

        # Loading validation dictionary
        f = open(args.dictionary_valspl,
                 encoding=args.encoding,
                 errors='surrogateescape')
        src2trg = collections.defaultdict(set)
        trg2src = collections.defaultdict(set)
        oov = set()
        vocab = set()
        for line in f:
            src, trg = line.split()
            if args.max_vocab:
                src = src.lower()
                trg = trg.lower()
            try:
                src_ind = src_word2ind[src]
                trg_ind = trg_word2ind[trg]
                src2trg[src_ind].add(trg_ind)
                trg2src[trg_ind].add(src_ind)
                vocab.add(src)
            except KeyError:
                oov.add(src)
        src = list(src2trg.keys())
        trgt = list(trg2src.keys())

        oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
        coverage = len(src2trg) / (len(src2trg) + len(oov))
        f.close()

        translation = collections.defaultdict(int)
        translation5 = collections.defaultdict(list)
        translation10 = collections.defaultdict(list)

        t = time.time()
        nbrhood_x = cp.zeros(xw.shape[0])
        nbrhood_z = cp.zeros(zw.shape[0])
        for i in range(0, len(src), BATCH_SIZE):
            j = min(i + BATCH_SIZE, len(src))
            similarities = -1 * cp.partition(
                -1 *
                cp.dot(cp.asarray(xw[src[i:j]]), cp.transpose(cp.asarray(zw))),
                args.csls_neighbourhood - 1,
                axis=1)[:, :args.csls_neighbourhood]
            nbrhood_x[src[i:j]] = (cp.mean(similarities, axis=1))

        for i in range(0, zw.shape[0], BATCH_SIZE):
            j = min(i + BATCH_SIZE, zw.shape[0])
            similarities = -1 * cp.partition(
                -1 * cp.dot(cp.asarray(zw[i:j]), cp.transpose(cp.asarray(xw))),
                args.csls_neighbourhood - 1,
                axis=1)[:, :args.csls_neighbourhood]
            nbrhood_z[i:j] = (cp.mean(similarities, axis=1))

        for i in range(0, len(src), BATCH_SIZE):
            j = min(i + BATCH_SIZE, len(src))
            similarities = cp.transpose(
                cp.transpose(2 * cp.asarray(xw[src[i:j]]).dot(
                    cp.transpose(cp.asarray(zw)))) -
                nbrhood_x[src[i:j]]) - nbrhood_z
            nn = cp.argmax(similarities, axis=1).tolist()
            similarities = cp.argsort((similarities), axis=1)

            nn5 = (similarities[:, -5:])
            nn10 = (similarities[:, -10:])
            for k in range(j - i):
                translation[src[i + k]] = nn[k]
                translation5[src[i + k]] = nn5[k].tolist()
                translation10[src[i + k]] = nn10[k].tolist()
        accuracy = np.mean(
            [1 if translation[i] in src2trg[i] else 0 for i in src])
        mean = 0
        for i in src:
            for k in translation5[i]:
                if k in src2trg[i]:
                    mean += 1
                    break

        mean /= len(src)
        accuracy5 = mean

        mean = 0
        for i in src:
            for k in translation10[i]:
                if k in src2trg[i]:
                    mean += 1
                    break

        mean /= len(src)
        accuracy10 = mean
        drop_count += 1
        if accuracy > best_val_acc:
            if args.verbose:
                print('Improvement of {0}%  over best validation accuracy!'.
                      format((accuracy - best_val_acc) * 100))
            best_val_acc = accuracy
            best_add_src = list(add_src)
            best_add_trg = list(add_trg)
            drop_count = 0

        if args.verbose:
            print(
                'Val Set:- Coverage:{0:7.2%}  Accuracy:{1:7.2%}  Accuracy(Top 5):{2:7.2%}  Accuracy(Top 10):{3:7.2%}'
                .format(coverage, accuracy, accuracy5, accuracy10))
        if drop_count >= args.patience:
            if args.verbose:
                print('Training ended')
            break

        # Step 1.4: Dictionary Induction Stage (Bootstrap)
        # Consider x_cutoff and z_cutoff to be the vocabulary of the two languages(First k words of vocabulary are the most frequent words in the language(as per standard word embeddings)).
        # CSLS Inferencing will be performed on this vocabulary subset. Bidirectional bootstrapping is performed.
        # Dictionary entries for first "x_cutoff" words of Language-1 and for first "z-cutoff" words of Language-2 are inferred. Original training dictionary is also added.
        # Total dictionary size=x_cutoff+z_cutoff+size(train_set)
        if args.normalize_eval:
            xw = embeddings.length_normalize(xw)
            zw = embeddings.length_normalize(zw)

        x_vocab_size = min(xw.shape[0], args.x_cutoff)
        z_vocab_size = min(zw.shape[0], args.z_cutoff)
        t = time.time()
        nbrhood_x = cp.zeros(x_vocab_size)
        best_sim_x = cp.zeros(x_vocab_size)
        best_sim_x_csls = cp.zeros(x_vocab_size)
        nbrhood_z = cp.zeros(z_vocab_size)

        batch_num = 1
        for i in range(0, x_vocab_size, BATCH_SIZE):
            j = min(i + BATCH_SIZE, x_vocab_size)
            similarities = -1 * cp.partition(
                -1 * cp.dot(cp.asarray(xw[i:j]),
                            cp.transpose(cp.asarray(zw[:z_vocab_size]))),
                args.csls_neighbourhood - 1,
                axis=1)[:, :args.csls_neighbourhood]
            nbrhood_x[i:j] = (cp.mean(similarities, axis=1))
            best_sim_x[i:j] = (cp.max(similarities, axis=1))
            batch_num += 1

        batch_num = 1
        for i in range(0, z_vocab_size, BATCH_SIZE):
            j = min(i + BATCH_SIZE, z_vocab_size)
            similarities = -1 * cp.partition(
                -1 * cp.dot(cp.asarray(zw[i:j]),
                            cp.transpose(cp.asarray(xw[:x_vocab_size]))),
                args.csls_neighbourhood - 1,
                axis=1)[:, :args.csls_neighbourhood]
            nbrhood_z[i:j] = (cp.mean(similarities, axis=1))
            batch_num += 1

        src_indices = list(range(0, x_vocab_size))
        trg_indices = []
        batch_num = 1
        for i in range(0, x_vocab_size, BATCH_SIZE):
            j = min(i + BATCH_SIZE, x_vocab_size)
            similarities = cp.transpose(
                cp.transpose(2 * cp.asarray(xw[i:j]).dot(
                    cp.transpose(cp.asarray(zw[:z_vocab_size])))) -
                nbrhood_x[i:j]) - nbrhood_z
            nn = cp.argmax(similarities, axis=1).tolist()
            trg_indices.append(nn)
            batch_num += 1

        src_indices2 = []
        trg_indices2 = list(range(0, z_vocab_size))
        batch_num = 1
        for i in range(0, z_vocab_size, BATCH_SIZE):
            j = min(i + BATCH_SIZE, z_vocab_size)
            similarities = cp.transpose(
                cp.transpose(2 * cp.asarray(zw[i:j]).dot(
                    cp.transpose(cp.asarray(xw[:x_vocab_size])))) -
                nbrhood_z[i:j]) - nbrhood_x
            nn = cp.argmax(similarities, axis=1).tolist()
            src_indices2.append(nn)
            batch_num += 1
        trg_indices = [item for sublist in trg_indices for item in sublist]
        src_indices2 = [item for sublist in src_indices2 for item in sublist]

        add_src = list(src_indices + src_indices2)
        add_trg = list(trg_indices + trg_indices2)
        src_indices = src_indices + src_indices2 + orig_src
        trg_indices = trg_indices + trg_indices2 + orig_trg

    end_time = time.time()
    if args.verbose:
        print('Completed bootstrapping in {0:.2f} seconds'.format(end_time -
                                                                  start_time))

    # Step 2: Final Training with bootstrapped dictionary
    if args.verbose:
        print('Training final model')
    src_indices = best_add_src + src_indices_train
    trg_indices = best_add_trg + trg_indices_train
    x_count = len(set(src_indices))
    z_count = len(set(trg_indices))

    # Creating dictionary matrix from training set
    map_dict_src = {}
    map_dict_trg = {}
    I = 0
    uniq_src = []
    uniq_trg = []
    for i in range(len(src_indices)):
        if src_indices[i] not in map_dict_src.keys():
            map_dict_src[src_indices[i]] = I
            I += 1
            uniq_src.append(src_indices[i])
    J = 0
    for j in range(len(trg_indices)):
        if trg_indices[j] not in map_dict_trg.keys():
            map_dict_trg[trg_indices[j]] = J
            J += 1
            uniq_trg.append(trg_indices[j])

    np.random.seed(0)
    Lambda = args.l2_reg
    U1 = TT.matrix()
    U2 = TT.matrix()
    B = TT.matrix()
    X_tot = x[uniq_src].T.dot(x[uniq_src])
    Z_tot = z[uniq_trg].T.dot(z[uniq_trg])
    W = U1.dot(B.dot(U2.T))
    cost = (TT.nlinalg.trace(
        U2.dot(
            B.dot(
                U1.T.dot(
                    shared(X_tot).dot(U1.dot(B.dot(U2.T.dot(shared(Z_tot)))))))
        )) - 2 * TT.sum(
            (shared(x[src_indices]).dot(W)) * shared(z[trg_indices]))
            ) / len(src_indices) + 0.5 * Lambda * (TT.sum(B**2))
    solver = ConjugateGradient(maxtime=args.max_opt_time,
                               maxiter=args.max_opt_iter)

    low_rank = 300
    manifold = Product([
        Stiefel(x.shape[1], low_rank),
        Stiefel(z.shape[1], low_rank),
        PositiveDefinite(low_rank)
    ])
    problem = Problem(manifold=manifold,
                      cost=cost,
                      arg=[U1, U2, B],
                      verbosity=3)
    wopt = solver.solve(problem)

    w = wopt
    U1 = w[0]
    U2 = w[1]
    B = w[2]

    xw = x.dot(U1).dot(scipy.linalg.sqrtm(B))
    zw = z.dot(U2).dot(scipy.linalg.sqrtm(B))

    gc.collect()

    # Step 3: Evaluation
    if args.verbose:
        print('Beginning Evaluation')

    if args.normalize_eval:
        xw = embeddings.length_normalize(xw)
        zw = embeddings.length_normalize(zw)
    # Loading test dictionary
    f = open(args.dictionary_test,
             encoding=args.encoding,
             errors='surrogateescape')
    src2trg = collections.defaultdict(set)
    trg2src = collections.defaultdict(set)
    oov = set()
    vocab = set()
    for line in f:
        src, trg = line.split()
        if args.max_vocab:
            src = src.lower()
            trg = trg.lower()
        try:
            src_ind = src_word2ind[src]
            trg_ind = trg_word2ind[trg]
            src2trg[src_ind].add(trg_ind)
            trg2src[trg_ind].add(src_ind)
            vocab.add(src)
        except KeyError:
            oov.add(src)
    src = list(src2trg.keys())
    trgt = list(trg2src.keys())

    oov -= vocab  # If one of the translation options is in the vocabulary, then the entry is not an oov
    coverage = len(src2trg) / (len(src2trg) + len(oov))
    f.close()

    translation = collections.defaultdict(int)
    translation5 = collections.defaultdict(list)
    translation10 = collections.defaultdict(list)

    t = time.time()
    nbrhood_x = np.zeros(xw.shape[0])
    nbrhood_z = np.zeros(zw.shape[0])
    nbrhood_z2 = cp.zeros(zw.shape[0])
    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities_x = -1 * np.partition(
            -1 * similarities, args.csls_neighbourhood - 1, axis=1)
        nbrhood_x[src[i:j]] = np.mean(
            similarities_x[:, :args.csls_neighbourhood], axis=1)

    batch_num = 1
    for i in range(0, zw.shape[0], BATCH_SIZE):
        j = min(i + BATCH_SIZE, zw.shape[0])
        similarities = -1 * cp.partition(
            -1 * cp.dot(cp.asarray(zw[i:j]), cp.transpose(cp.asarray(xw))),
            args.csls_neighbourhood - 1,
            axis=1)[:, :args.csls_neighbourhood]
        nbrhood_z2[i:j] = (cp.mean(similarities, axis=1))
        batch_num += 1
    nbrhood_z = cp.asnumpy(nbrhood_z2)
    for i in range(0, len(src), BATCH_SIZE):
        j = min(i + BATCH_SIZE, len(src))
        similarities = xw[src[i:j]].dot(zw.T)
        similarities = np.transpose(
            np.transpose(2 * similarities) - nbrhood_x[src[i:j]]) - nbrhood_z
        nn = similarities.argmax(axis=1).tolist()
        similarities = np.argsort((similarities), axis=1)

        nn5 = (similarities[:, -5:])
        nn10 = (similarities[:, -10:])
        for k in range(j - i):
            translation[src[i + k]] = nn[k]
            translation5[src[i + k]] = nn5[k]
            translation10[src[i + k]] = nn10[k]
    accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src])
    mean = 0
    for i in src:
        for k in translation5[i]:
            if k in src2trg[i]:
                mean += 1
                break

    mean /= len(src)
    accuracy5 = mean

    mean = 0
    for i in src:
        for k in translation10[i]:
            if k in src2trg[i]:
                mean += 1
                break

    mean /= len(src)
    accuracy10 = mean
    print(
        'Coverage:{0:7.2%}  Accuracy:{1:7.2%}  Accuracy(Top 5):{2:7.2%}  Accuracy(Top 10):{3:7.2%}'
        .format(coverage, accuracy, accuracy5, accuracy10))
Пример #34
0
    def _update_classifier(self, data, labels, w, classes):
        """Update the classifier parameters theta and bias

        Parameters
        ----------

        data : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of 2D array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        classes : int
            The number of classes in the classifier.


        Returns
        -------

        theta : array, shape=[features, classes]
            The MLR parameter for the class planes.

        bias : array shape=[classes,]
            The MLR parameter for class biases.
        """

        # Stack the data and labels for training the classifier
        data_stacked, labels_stacked, weights = \
            SSSRM._stack_list(data, labels, w)

        features = w[0].shape[1]
        total_samples = weights.size

        data_th = S.shared(data_stacked.astype(theano.config.floatX))
        val_ = S.shared(labels_stacked)
        total_samples_S = S.shared(total_samples)
        theta_th = T.matrix(name='theta', dtype=theano.config.floatX)
        bias_th = T.col(name='bias', dtype=theano.config.floatX)
        constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True)
        weights_th = S.shared(weights)

        log_p_y_given_x = \
            T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T))
        f = -constf2 * T.sum(
            (log_p_y_given_x[T.arange(total_samples_S), val_]) /
            weights_th) + 0.5 * T.sum(theta_th**2)

        manifold = Product((Euclidean(features,
                                      classes), Euclidean(classes, 1)))
        problem = Problem(manifold=manifold,
                          cost=f,
                          arg=[theta_th, bias_th],
                          verbosity=0)
        solver = ConjugateGradient(mingradnorm=1e-6)
        solution = solver.solve(problem)
        theta = solution[0]
        bias = solution[1]

        del constf2
        del theta_th
        del bias_th
        del data_th
        del val_
        del solver
        del solution

        return theta, bias
def RELMM(data, A_init, S0, lambda_S, lambda_S0):

    [L, N] = data.shape

    [L, P] = S0.shape

    V = P * np.eye(P) - np.outer(np.ones(P), np.transpose(np.ones(P)))

    def cost(X):

        data_fit = np.zeros(N)

        for n in np.arange(N):
            data_fit[n] = np.linalg.norm(
                S[:, :, n] - np.dot(X, np.diag(psi[:, n])), 'fro')**2

        cost = lambda_S / 2 * np.sum(data_fit,
                                     axis=0) + lambda_S0 / 2 * np.trace(
                                         np.dot(np.dot(X, V), np.transpose(X)))

        return cost

    def egrad(X):

        partial_grad = np.zeros([L, P, N])

        for n in np.arange(N):
            partial_grad[:, :, n] = np.dot(X, np.diag(psi[:, n])) - np.dot(
                S[:, :, n], np.diag(psi[:, n]))

        egrad = lambda_S * np.sum(partial_grad, axis=2) + lambda_S0 * np.dot(
            X, V)

        return egrad

    A = A_init
    S = np.zeros([L, P, N])
    psi = np.ones([P, N])

    for n in np.arange(N):
        S[:, :, n] = S0

    maxiter = 200

    U = A  # split variable
    D = np.zeros(A.shape)  # Lagrange mutlipliers

    rho = 1

    maxiter_ADMM = 100
    tol_A_ADMM = 10**-3
    tol_A = 10**-3
    tol_S = 10**-3
    tol_psi = 10**-3
    tol_S0 = 10**-3

    I = np.identity(P)

    for i in np.arange(maxiter):

        A_old = np.copy(A)
        psi_old = np.copy(psi)
        S_old = np.copy(S)
        S0_old = np.copy(S0)

        # A update

        for j in np.arange(maxiter_ADMM):

            A_old_ADMM = np.copy(A)

            for n in np.arange(N):
                A[:, n] = np.dot(
                    np.linalg.inv(
                        np.dot(np.transpose(S[:, :, n]), S[:, :, n]) +
                        rho * I),
                    np.dot(np.transpose(S[:, :, n]), data[:, n]) + rho *
                    (U[:, n] - D[:, n]))

            U = proj_simplex(A + D)

            D = D + A - U

            if j > 0:
                rel_A_ADMM = np.abs((np.linalg.norm(A, 'fro') - np.linalg.norm(
                    A_old_ADMM, 'fro'))) / np.linalg.norm(A_old_ADMM, 'fro')

                print("iteration ", j, " of ", maxiter_ADMM, ", rel_A_ADMM =",
                      rel_A_ADMM)

                if rel_A_ADMM < tol_A_ADMM:
                    break

# psi update

        for n in np.arange(N):
            for p in np.arange(P):
                psi[p,
                    n] = np.dot(np.transpose(S0[:, p]), S[:, p, n]) / np.dot(
                        np.transpose(S0[:, p]), S0[:, p])

# S update

        for n in np.arange(N):
            S[:, :, n] = np.dot(
                np.outer(data[:, n], np.transpose(A[:, n])) +
                lambda_S * np.dot(S0, np.diag(psi[:, n])),
                np.linalg.inv(
                    np.outer(A[:, n], np.transpose(A[:, n])) + lambda_S * I))

# S0 update

        manifold = Oblique(L, P)
        solver = ConjugateGradient()
        problem = Problem(manifold=manifold, cost=cost, egrad=egrad)
        S0 = solver.solve(problem)

        # termination checks

        if i > 0:

            S_vec = np.hstack(S)

            rel_A = np.abs(
                np.linalg.norm(A, 'fro') -
                np.linalg.norm(A_old, 'fro')) / np.linalg.norm(A_old, 'fro')
            rel_psi = np.abs(
                np.linalg.norm(psi, 'fro') -
                np.linalg.norm(psi_old, 'fro')) / np.linalg.norm(
                    psi_old, 'fro')
            rel_S = np.abs(
                np.linalg.norm(S_vec) -
                np.linalg.norm(np.hstack(S_old))) / np.linalg.norm(S_old)
            rel_S0 = np.abs(
                np.linalg.norm(S0, 'fro') -
                np.linalg.norm(S0_old, 'fro')) / np.linalg.norm(S0_old, 'fro')

            print("iteration ", i, " of ", maxiter, ", rel_A =", rel_A,
                  ", rel_psi =", rel_psi, "rel_S =", rel_S, "rel_S0 =", rel_S0)

            if rel_A < tol_A and rel_psi and tol_psi and rel_S < tol_S and rel_S0 < tol_S0 and i > 1:
                break

    return A, psi, S, S0
    loss = loss / len(S) + lam12 * norm12
    return loss


# create the problems, defined over the manifold
problem_L1 = Problem(manifold=manifold, cost=costL1)
problem_L12 = Problem(manifold=manifold, cost=costL12)
# Instantiate a pymanopt solver
solver = ConjugateGradient(maxiter=100)

# solve each problem:

# Lasso method:
print("Beginning test with L_1")
ts = time()
Khat_L1 = solver.solve(problem_L1)
print(np.shape(Khat_L1))
tot_time = time() - ts
emp_loss, log_loss = lossK(Khat_L1, X, S)
print("L_1 regularization: Time=%f, emp_loss=%f, log_loss=%f" %
      (tot_time, emp_loss, log_loss))

# L_{1,2} regularized method
print("Beginning test with L_{1,2}")
ts = time()
Khat_L12 = solver.solve(problem_L12)
tot_time = time() - ts
emp_loss, log_loss = lossK(Khat_L12, X, S)
print("L_{1,2} regularization: Time=%f, emp_loss=%f, log_loss=%f" %
      (tot_time, emp_loss, log_loss))
Пример #37
0
    def _update_classifier(self, data, labels, w, classes):
        """Update the classifier parameters theta and bias

        Parameters
        ----------

        data : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject for
            the classification task.

        labels : list of arrays of int, element i has shape=[samples_i]
            Each element in the list contains the labels for the data samples
            in data_sup.

        w : list of 2D array, element i has shape=[voxels_i, features]
            The orthogonal transforms (mappings) :math:`W_i` for each subject.

        classes : int
            The number of classes in the classifier.


        Returns
        -------

        theta : array, shape=[features, classes]
            The MLR parameter for the class planes.

        bias : array shape=[classes,]
            The MLR parameter for class biases.
        """

        # Stack the data and labels for training the classifier
        data_stacked, labels_stacked, weights = \
            SSSRM._stack_list(data, labels, w)

        features = w[0].shape[1]
        total_samples = weights.size

        data_th = S.shared(data_stacked.astype(theano.config.floatX))
        val_ = S.shared(labels_stacked)
        total_samples_S = S.shared(total_samples)
        theta_th = T.matrix(name='theta', dtype=theano.config.floatX)
        bias_th = T.col(name='bias', dtype=theano.config.floatX)
        constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True)
        weights_th = S.shared(weights)

        log_p_y_given_x = \
            T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T))
        f = -constf2 * T.sum((log_p_y_given_x[T.arange(total_samples_S), val_])
                             / weights_th) + 0.5 * T.sum(theta_th ** 2)

        manifold = Product((Euclidean(features, classes),
                            Euclidean(classes, 1)))
        problem = Problem(manifold=manifold, cost=f, arg=[theta_th, bias_th],
                          verbosity=0)
        solver = ConjugateGradient(mingradnorm=1e-6)
        solution = solver.solve(problem)
        theta = solution[0]
        bias = solution[1]

        del constf2
        del theta_th
        del bias_th
        del data_th
        del val_
        del solver
        del solution

        return theta, bias