Пример #1
0
def gtv_cvlam(X, y, q, num_folds=5, num_lams=20):
    n = len(X)
    folds = create_folds(n, num_folds)
    scores = np.zeros(num_lams)
    lams = None
    for i, fold in enumerate(folds):
        mask = np.ones(n, dtype=bool)
        mask[fold] = False
        x_train, y_train = X[mask], y[mask]
        x_test, y_test = X[~mask], y[~mask]
        data, weights, grid = bucket_vals(x_train, y_train, q)
        results = solve_gfl(data,
                            None,
                            weights=weights,
                            full_path=True,
                            minlam=0.1,
                            maxlam=20.,
                            numlam=num_lams)
        fold_score = np.array([
            mse(y_test, predict(x_test, beta, grid))
            for beta in results['beta']
        ])
        scores += fold_score
        if i == 0:
            lams = results['lambda']
    scores /= float(num_folds)
    lam_best = lams[np.argmin(scores)]
    data, weights, grid = bucket_vals(X, y, q)
    beta = solve_gfl(data, None, weights=weights, lam=lam_best)
    return beta.reshape(q), grid
Пример #2
0
def imtv():
    parser = argparse.ArgumentParser(
        description=
        'Runs the graph-fused lasso (GFL) solver on an image. Note: this is currently pretty slow for even medium-sized color images.'
    )

    parser.add_argument('imagefile',
                        help='The file containing the image to denoise.')
    parser.add_argument('output', help='The file to output the results.')

    parser.add_argument(
        '--verbose',
        type=int,
        default=1,
        help=
        'The level of print statements. 0=none, 1=moderate, 2=all. Default=0.')
    parser.add_argument('--time',
                        action='store_true',
                        help='Print the timing stats for the algorithm.')

    parser.set_defaults()

    args = parser.parse_args()

    ########### Load data from file
    if args.verbose:
        print('Loading image')

    from scipy.misc import imsave, imread
    from utils import hypercube_edges

    y = imread(args.imagefile).astype(float)
    y_mean = y.mean()
    y -= y_mean

    edges = hypercube_edges(y.shape)

    if args.verbose:
        print('Solving the GFL for {0} variables with {1} edges'.format(
            len(y.flatten()), len(edges)))

    ########### Run the C solver
    t0 = time.clock()
    beta = solve_gfl(y.flatten(),
                     edges,
                     verbose=args.verbose,
                     converge=1e-3,
                     maxsteps=3000)
    t1 = time.clock()

    ########### Print the timing stats
    if args.time:
        print('Solved the GFL in {0}s and {1} total steps of ADMM.'.format(
            t1 - t0,
            np.array(solver.steps).sum()))

    ########### Save the results to file
    if args.verbose:
        print('Saving results to {0}'.format(args.output))
    imsave(args.output, beta.reshape(y.shape) + y_mean)
Пример #3
0
    def __init__(self, y):
        # Pre-cache a sparse LU decomposition of the FL matrix
        from pygfl.utils import get_1d_penalty_matrix
        from scipy.sparse.linalg import factorized
        from scipy.sparse import csc_matrix
        D = get_1d_penalty_matrix(y.shape[0])
        D = np.vstack([D, np.zeros(y.shape[0])])
        D[-1,-1] = 1e-6 # Nugget for full rank matrix
        D = csc_matrix(D)
        self.invD = factorized(D)

        # Setup the fast GFL solver
        from pygfl.solver import TrailSolver
        from pygfl.trails import decompose_graph
        from pygfl.utils import hypercube_edges, chains_to_trails
        from networkx import Graph
        edges = hypercube_edges(y.shape)
        g = Graph()
        g.add_edges_from(edges)
        chains = decompose_graph(g, heuristic='greedy')
        ntrails, trails, breakpoints, edges = chains_to_trails(chains)
        self.solver = TrailSolver()
        self.solver.set_data(y, edges, ntrails, trails, breakpoints)

        from pygfl.easy import solve_gfl
        self.beta = solve_gfl(y)
Пример #4
0
def gfusedlasso(z, A, lam=None):
    # print(type(z),type(A),type(lam))
    A = np.triu(A) > 0
    edges = np.stack(np.mask_indices(A.shape[0], lambda n, k: A), axis=-1)
    # print(z.shape,z.dtype,edges.shape,edges.dtype,lam)
    z_fused = solve_gfl(z.astype(np.float64), edges, lam=lam)
    return z_fused.astype(z.dtype)
Пример #5
0
def TVTR(X, Y, edges, stacked=0, lam=1, rho=1, tol=0.05, verbose=1):
    #y = np.array(Y, dtype='float64')
    print(X.shape)
    print(Y.shape)
    print(edges.shape)
    y = np.array(Y, dtype='float64')
    n = X.shape[0]
    m = Y.shape[1]
    Im = np.eye(n)
    M = Im - X.dot(LA.inv(X.T.dot(X))).dot(X.T)
    edge_all = edges
    if stacked == 0:
        for k in range(1, n):
            edge_all = np.vstack((edge_all, edges + m * k))

    edge_all = np.asarray(edge_all, dtype='int')
    converge = 0
    #initialize parameters
    theta = np.random.rand(n, m)
    mu = np.random.rand(n, m)
    eta = np.random.rand(n, m)
    U = np.zeros((n, m))
    V = np.zeros((n, m))
    # infeas = LA.norm(M.dot(G))
    iter = 0
    while not converge:
        theta_last = theta
        print(iter)
        theta = (y + rho * (mu - V + eta - U)) / (1 + 2 * rho)
        eta = (Im - M).dot((theta + U))
        #print(np.shape(theta+V))
        if verbose:
            print('entering interation ' + str(iter) + ' solve_gfl')

        mu = solve_gfl((theta + V).reshape((n * m, ), order="C"),
                       edge_all,
                       minlam=lam / rho,
                       maxlam=lam / rho,
                       numlam=1)
        mu = mu.reshape(n, m)
        U = U + theta - eta
        V = V + theta - mu
        infeas = LA.norm(theta - eta) / LA.norm(theta)
        relerr = LA.norm(theta_last - theta) / LA.norm(theta_last)
        converge = infeas < tol and relerr < tol
        iter += 1
        print('Iter: ' + str(iter) + '\t rel_err ' + str(relerr) +
              '\t Infeasibility ' + str(infeas))

    gamma = LA.inv(X.T.dot(X)).dot(X.T).dot(theta)
    return gamma
Пример #6
0
def TVTRminibatch(X,Y,edges,num_epochs=1,mini_batch_size=16,lam=1,rho=1,tol=0.05,verbose=1,seed=135):
    #y = np.array(Y, dtype='float64')
    if verbose:
        print (X.shape)
        print (Y.shape)
        print (edges.shape)

    y = np.array(Y, dtype='float64')
    m = y.shape[1]
    n= X.shape[0]
    theta = np.random.rand(n, m)
    mu = np.random.rand(n, m)
    eta = np.random.rand(n, m)
    U = np.zeros((n, m))
    V = np.zeros((n, m))
    Im = np.eye(n)
    M = Im - X.dot(LA.inv(X.T.dot(X))).dot(X.T)
    converge = 0
    for i in range(num_epochs):
        seed=seed+1
        iter = 1
        minibatches=random_mini_batches(X,mini_batch_size,seed)
        for minibatch in minibatches:
            theta_last = theta
            theta = (y+rho*(mu-V+eta-U))/(1+2*rho)
            eta = (Im - M).dot((theta + U))
        #print(np.shape(theta+V))
            print('entering epoch '+str(i)+ ' , iteration' + str(iter)+ ' solve_gfl')

            n_minibatch=len(minibatch)
            edge_all = edges
            for k in range(1, n_minibatch):
                edge_all = np.vstack((edge_all, edges + m * k))

            minibatch_mu = solve_gfl((theta+V)[minibatch].reshape((n_minibatch*m,), order="C"), edge_all, minlam=lam / rho, maxlam=lam / rho, numlam=1)
            mu[minibatch] = minibatch_mu.reshape(n_minibatch, m)
            U = U + theta - eta
            V = V + theta - mu
            infeas = LA.norm(theta - eta) / LA.norm(theta)
            relerr = LA.norm(theta_last - theta) / LA.norm(theta_last)
            converge = infeas < tol and relerr < tol
            iter += 1
            print 'epoch '+str(i)+ 'Iter: ' + str(iter) + '\t rel_err ' + str(relerr) + '\t Infeasibility ' + str(infeas)
            if converge==1:
                break

    gamma = LA.inv(X.T.dot(X)).dot(X.T).dot(theta)
    return gamma
Пример #7
0
def denoisemini(y,edge,lam,rho):
    num,m=y.shape
    minibatch_mu = solve_gfl(y.reshape((num * m,), order="C"), edge, minlam=lam / rho,
                             maxlam=lam / rho, numlam=1)
    minibatch_mu=minibatch_mu.reshape((num,m))
    return minibatch_mu
Пример #8
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Runs the graph-fused lasso (GFL) solver on a given dataset with a given edge set. The GFL problem is defined as finding Beta such that it minimizes the equation f(Y, Beta) + lambda * g(E, Beta) where f is a smooth, convex loss function, typically 1/2 sum_i (y_i - Beta_i)^2, and g is sum of first differences on edges, sum_(s,t) |Beta_s - Beta_t| for each edge (s,t) in E.'
    )

    parser.add_argument(
        'data', help='The CSV file containing the vector of data points.')
    parser.add_argument(
        'edges',
        help=
        'The CSV file containing the edges connecting the variables, with one edge per line.'
    )
    parser.add_argument('--output',
                        '--o',
                        help='The file to output the results.')

    parser.add_argument(
        '--verbose',
        type=int,
        default=1,
        help=
        'The level of print statements. 0=none, 1=moderate, 2=all. Default=0.')
    parser.add_argument('--time',
                        action='store_true',
                        help='Print the timing stats for the algorithm.')

    parser.set_defaults()

    args = parser.parse_args()

    ########### Load data from file
    if args.verbose:
        print('Loading data')

    y = np.loadtxt(args.data, delimiter=',')
    edges = np.loadtxt(args.edges, delimiter=',', dtype=int)

    if args.verbose:
        print('Solving the GFL for {0} variables with {1} edges'.format(
            len(y), len(edges)))

    ########### Run the C solver
    t0 = time.clock()
    beta = solve_gfl(y, edges, verbose=args.verbose)
    t1 = time.clock()

    ########### Print the timing stats
    if args.time:
        print('Solved the GFL in {0}s and {1} total steps of ADMM.'.format(
            t1 - t0,
            np.array(solver.steps).sum()))

    ########### Save the results to file
    if args.output:
        if args.verbose:
            print('Saving results to {0}'.format(args.output))
        np.savetxt(args.output, beta, delimiter=',')
    else:
        print('Results:')
        print(beta)
Пример #9
0
def numpy_gfusedlasso(z,edge,lam=None):
    z_fused = solve_gfl(z.astype(np.float64),edge.astype('int'),lam=float(lam))
    return z_fused.astype(z.dtype)
Пример #10
0
import matplotlib.pylab as plt
import numpy as np
from pygfl.easy import solve_gfl

truth = np.zeros(200)
truth[:50] = 0.5
truth[50:100] = 0.75
truth[100:150] = 0.25
truth[150:180] = 0.1
truth[180:] = 0.9

trials = np.random.poisson(10, size=200)
successes = np.array([(np.random.random(size=t) <= p).sum() for t,p in zip(trials, truth)])

beta = solve_gfl((trials, successes), loss='binomial')

plt.scatter(np.arange(200)+1, successes / trials.astype(float))
plt.plot(np.arange(200)+1, truth, color='gray', alpha=0.5)
plt.plot(np.arange(200)+1, 1. / (1+np.exp(-beta)), color='orange')
plt.show()
Пример #11
0
def ss_gfl(y, min_spike=1e-4, max_spike=1e2, nspikes=30, max_steps=100, rel_tol=1e-6, a=None, b=None, **kwargs):
    # if a is None:
    #     a = np.sqrt(len(y))
    # if b is None:
    #     b = np.sqrt(len(y))
    sigma, a, b = estimate_hyperparams(y)
    print('sigma: {} a: {} b: {} expected proportion of nulls: {:.2f}'.format(sigma, a, b, a / (a+b)))
    
    # Create the log-space grid of spikes
    spike_grid = np.exp(np.linspace(np.log(min_spike), np.log(max_spike), nspikes))

    # Initialize beta at the observations
    # beta = y.copy()
    from pygfl.easy import solve_gfl
    beta = solve_gfl(y)

    # Use an equal weighted mixture to start, with two identical slabs
    # diffs = np.abs(beta[1:] - beta[:-1])
    # theta = (a + (diffs < 1e-3).sum()) / (a + b + beta.shape[0] - 3)
    theta = 0.5
    slab = min_spike

    # Track convergence
    prev = beta.copy()

    # Track the BIC path
    bic = np.zeros(nspikes)

    # Create a fast solver for the 1d fused lasso
    fl_solver = FastWeightedFusedLassoSolver(y)

    # Run over the entire solution path of spikes, starting from very flat
    # spikes and going to very sharp spikes
    betas = np.zeros((nspikes, y.shape[0]))
    for spike_idx, spike in enumerate(spike_grid):
        print('Spike {}/{}: {:.4f}'.format(spike_idx+1, nspikes, spike))
        # Run the EM algorithm for the fixed spike, using the warm-started
        # beta and theta values
        for step in range(max_steps):
            print('\tStep {}'.format(step))
            # Get the beta differences
            diffs = np.abs(beta[1:] - beta[:-1])
            # np.set_printoptions(suppress=True, precision=2)
            # print(diffs)

            # E-step: Expected local mixture probabilities given theta and beta
            spike_prob = theta * np.exp(-diffs * spike) * spike
            slab_prob = (1-theta) * np.exp(-diffs * slab) * slab
            gamma = spike_prob / (spike_prob + slab_prob)

            # We have a 2-part M-step.
            # (i) M-step for beta: run the fused lasso with edge weights:
            #           lam_ij = gamma_ij*spike + (1-gamma_ij)*slab
            lams = gamma*spike + (1-gamma)*slab
            beta = solve_weighted_gfl(y, lams * sigma, beta_init=beta, **kwargs)

            # (ii) M-step for theta: MLE prior mixture probabilities
            theta = (a + gamma.sum()) / (a + b + beta.shape[0] - 3)
            theta = theta.clip(1e-3,1-1e-3) # Don't let theta get too big. Equivalent to choosing a and b proportional to the number of nodes
            print('\ttheta: {:.3f}'.format(theta))

            # Check for convergence
            delta = np.linalg.norm(prev - beta)
            if delta <= rel_tol:
                print()
                break

            print('\tDelta={:.6f}'.format(delta))
            print()
            prev = beta.copy()

        # Calculate BIC = -2ln(L) + dof * (ln(n) - ln(2pi))
        nll = -0.5 / sigma * ((y - beta)**2).sum()
        dof = (np.abs(beta[1:] - beta[:-1]) >= 1e-4).sum() + 1
        bic[spike_idx] = 2*nll + dof * (np.log(beta.shape[0]) - np.log(2 * np.pi))
        print('NLL: {:.4f} dof: {} BIC: {:.2f}'.format(nll, dof, bic[spike_idx]))

        # if spike_idx > 0 and np.abs(bic[spike_idx] - bic[spike_idx-1]) <= rel_tol:
        #     break

        # Save the entire path of solutions
        betas[spike_idx] = beta
        

    return {'betas': betas[:spike_idx], 'bic': bic[:spike_idx]}
Пример #12
0
        # Save the entire path of solutions
        betas[spike_idx] = beta
        

    return {'betas': betas[:spike_idx], 'bic': bic[:spike_idx]}

if __name__ == '__main__':
    from pygfl.easy import solve_gfl
    import matplotlib.pyplot as plt
    np.random.seed(5)
    truth = np.array([0]*20 + [4]*30 + [-1]*40 + [-5]*20 + [-1]*30 + [1.8]*10 + [-0.8]*30 + [3]*50)
    X = np.arange(1,1+len(truth))
    Y = np.random.normal(truth)

    # Fit the ordinary GFL
    beta_gfl = solve_gfl(Y)

    # Fit the spike-and-slab GFL
    results = ss_gfl(Y)

    # Use the last solution
    beta_ssl = results['betas'][-1]

    # Use the BIC solution
    # beta_ssl_bic = results['betas'][np.argmin(results['bic'])]

    plt.scatter(X, Y, color='gray', alpha=0.2, label='Observations')
    plt.plot(X, truth, color='black', label='Truth')
    plt.plot(X, beta_gfl, color='blue', label='FL')
    plt.plot(X, beta_ssl, color='orange', label='SSFL')
    # plt.plot(X, beta_ssl_bic, color='green', label='SSFL (BIC)')
Пример #13
0
import matplotlib.pylab as plt
import numpy as np
from pygfl.easy import solve_gfl

truth = np.zeros(200)
truth[:50] = 0.5
truth[50:100] = 0.75
truth[100:150] = 0.25
truth[150:180] = 0.1
truth[180:] = 0.9

data = (np.random.random(size=200) <= truth).astype(int)

beta = solve_gfl(data, loss='logistic')

plt.scatter(np.arange(200) + 1, data)
plt.plot(np.arange(200) + 1, truth, color='gray', alpha=0.5)
plt.plot(np.arange(200) + 1, 1. / (1 + np.exp(-beta)), color='orange')
plt.show()