示例#1
0
def learnSubmodularMixture(training_data,
                           submod_shells,
                           loss_fun,
                           params=None,
                           loss_supermodular=False):
    '''
    Learns mixture weights of submodular functions. This code implements algorithm 1 of [1]

    :param training_data: training data. S[t].Y:             indices of possible set elements
                      S[t].y_gt:          indices selected in the ground truth solution
                      S[t].budget:        The budget of for this example
    :param submod_shells:    A cell containing submodular shell functions
                      They need to be of the format submodular_function = shell_function(S[t])
    :param   loss_function:    A (submodular) loss
    :param   maxIter:          Maximal number of iterations
    :param   loss_supermodular: True, if the loss is supermodular. Then, [5] is used for loss-augmented inference
    :return: learnt weights, weights per iteration
    '''

    if params == None:
        params = SGDparams()
    logger.info('%s' % params)

    if len(training_data) == 0:
        raise IOError('No training examples given')
    # Make a copy of the training samples so that is doesn't shuffle the  input list
    training_examples = training_data[:]
    ''' Initialize the weights '''
    function_list, names = utils.instaciateFunctions(submod_shells,
                                                     training_examples[0])
    w_0 = np.ones(len(function_list), np.float)
    #w_0=np.random.rand(len(function_list))

    learn_lambda = params.learn_lambda
    T = len(training_examples) * params.max_iter
    if learn_lambda is None:
        ''' Set learning rate according to theorem from
            "Learning Mixtures of Submodular Shells" - Lin & Bilmes 2012 '''
        M = len(submod_shells)
        G = 1.0
        ''' Assume:
         - w_i,f_i are all upperbounded by 1
         - loss l <= B for some B
         - ||g_t|| <= G, for some G
         then, we use learning rate nu=2/ (lambda*t)
        with '''
        learn_lambda = G / M * np.sqrt((2 + (1 + np.log(T)) / float(T)))

    # fudge factor as in http://xcorr.net/2014/01/23/adagrad-eliminating-learning-rates-in-stochastic-gradient-descent/
    fudge_factor = 1e-6  #for numerical stability
    logger.debug('Training using %d samples' % T)

    if len(function_list) <= 1:
        logger.info('Just 1 function. No work for me here :-)\n')
        return 1
    ''' Start training '''
    logger.info('regularizer lambda: %.3f' % learn_lambda)

    it = 0
    w = []
    exitTraining = False

    g_t_old = np.zeros(len(function_list))
    if params.use_ada_grad:
        historical_grad = np.zeros(len(function_list))

    while exitTraining == False:

        start_time = time.time()

        if it == 0:
            w.append(w_0)
        else:
            w.append(w[it - 1])

        t = np.mod(it, len(training_examples))
        ''' Before each iteration: shuffle training examples '''
        if t == 0:
            logger.debug('Suffle training examples')

            training_examples = training_examples
            random.shuffle(training_examples)

        if np.mod(it, 50) == 0:
            logger.info('Example %d of %d' % (it, T))
        logger.debug('%s (budget: %d)' %
                     (training_examples[t], training_examples[t].budget))
        logger.debug(training_examples[t].y_gt)
        ''' Instanciate the shells to submodular functions '''
        function_list, names = utils.instaciateFunctions(
            submod_shells, training_examples[t])
        ''' Approximate loss augmented inference
        (this is equivalent to a greedy submodular optimization) '''
        if loss_supermodular:
            y_t, score = submodular_supermodular_maximization(
                training_examples[t], w[it], function_list,
                training_examples[t].budget, loss_fun)
        else:
            y_t, score, online_bound = leskovec_maximize(
                training_examples[t], w[it], function_list,
                training_examples[t].budget, loss_fun)
        assert (len(y_t) == training_examples[t].budget)
        ''' Subgradient '''
        score_t = utils.evalSubFun(function_list, y_t, False)
        score_gt = utils.evalSubFun(function_list,
                                    list(training_examples[t].y_gt), True)

        if params.norm_objective_scores:
            score_t /= score_t.sum()
            score_gt /= score_gt.sum()

        if params.use_l1_projection:
            g_t = score_t - score_gt
        else:  # Lin et al. use an l2 regularized formulation, and have thus a different gradient
            g_t = learn_lambda * w[it] + (score_t - score_gt)
        g_t = ((1 - params.momentum) * g_t + params.momentum * g_t_old)

        if params.use_ada_grad:
            # See [6,7]
            g_t_old = g_t
            historical_grad += g_t**2
            g_t = g_t / (fudge_factor + np.sqrt(historical_grad))
        logger.debug('Gradient:')
        logger.debug(g_t)
        ''' Update weights '''
        if params.nu is None:
            nu = 2.0 / float(learn_lambda * (it + 1))
        else:
            if hasattr(params.nu, '__call__'):
                nu = params.nu(it, T)
            else:
                nu = params.nu
        if np.mod(it, 10) == 0:
            logger.info(
                'Nu: %.3f; Gradient: %s; Grad magnitue (abs): %.4f' %
                (nu, ', '.join(map(str, g_t)), nu * np.sum(np.abs(g_t))))

        w[it] = w[it] - nu * g_t
        ''' Project to feasible set'''
        if params.use_l1_projection:
            # We want to keep the euclidean distance between the initial and the projected weight minimal
            if params.use_ada_grad:
                # See [7]
                obj = lambda w_t: (np.multiply(w_t - w[it], w_t - w[it]) /
                                   (fudge_factor + historical_grad)).sum()
            else:
                obj = lambda w_t: np.inner(w_t - w[it], w_t - w[it])
            cons = []
            bnds = []
            # Define the bounds such that w[it]>0
            for idx in range(0, len(function_list)):
                bnds.append((0, None))

            # Define the l1-ball inequality
            cons.append({'type': 'ineq', 'fun': lambda x: 1 - np.abs(x).sum()})
            cons = tuple(cons)
            bnds = tuple(bnds)

            # Optimize for the best projection into the l-1 ball
            if it == 0:
                res = scipy.optimize.minimize(
                    obj, w_0, constraints=cons,
                    bounds=bnds)  #, options={'maxiter':10**3})
            else:
                res = scipy.optimize.minimize(
                    obj, w[it - 1], constraints=cons,
                    bounds=bnds)  #, options={'maxiter':10**3})
            if res.success:
                assert (res.x < -10**-5).any() == False
                w[it] = res.x

                # Note: We need to re-normalize the weights to sum to one, in order to give each SGD step the same weight
                if np.sum(w[it]) > 0:
                    w[it] = w[it] / np.sum(w[it])
            else:
                logger.warn(
                    'Iteration %d: l1: Failed to find constraint solution on w'
                    % it)
                w[it][w[it] < 0] = 0
                if w[it].sum() > 0:
                    w[it] = w[it] / w[it].sum()

        else:  # projection of [1]
            ''' update the weights accoring to  [1] algorithm 1'''
            w[it][w[it] < 0] = 0
            if w[it].sum() > 0:
                w[it] = w[it] / np.sum(np.abs(w[it]))
            #w[it][np.isnan(w[it])]=0

        if np.mod(it, 10) == 0:
            logger.info('w[it]:\t%s' % ', '.join(map(str, w[it])))
        it = it + 1
        logger.debug(it)
        if it >= len(training_examples) * params.max_iter:
            logger.warn('Break without convergence\n')
            exitTraining = True
        logger.debug("--- %.1f seconds ---" % (time.time() - start_time))
    ''' Return the averaged weights (See [1] algorithm 1) '''
    w_res = np.asarray(w).mean(axis=0)
    w_res /= np.abs(w_res).sum()

    logger.info('----------------------------\n')
    logger.info('Weights:\n')
    for w_idx in range(len(w_res)):
        logger.info(' %20s: %2.3f%%' %
                    (names[w_idx], round(10000 * w_res[w_idx]) / 100))
    logger.info('----------------------------\n')

    return w_res, w
示例#2
0
def learnSubmodularMixture(training_data, submod_shells, loss_fun, params=None, loss_supermodular=False):
    '''
    Learns mixture weights of submodular functions. This code implements algorithm 1 of [1]

    :param training_data: training data. S[t].Y:             indices of possible set elements
                      S[t].y_gt:          indices selected in the ground truth solution
                      S[t].budget:        The budget of for this example
    :param submod_shells:    A cell containing submodular shell functions
                      They need to be of the format submodular_function = shell_function(S[t])
    :param   loss_function:    A (submodular) loss
    :param   maxIter:          Maximal number of iterations
    :param   loss_supermodular: True, if the loss is supermodular. Then, [5] is used for loss-augmented inference
    :return: learnt weights, weights per iteration
    '''

    if params == None:
        params = SGDparams()
    logger.info('%s' % params)

    if len(training_data) ==0:
        raise IOError('No training examples given')
    # Make a copy of the training samples so that is doesn't shuffle the  input list
    training_examples=training_data[:]

    ''' Initialize the weights '''
    function_list,names=utils.instaciateFunctions(submod_shells,training_examples[0])
    w_0=np.ones(len(function_list),np.float)
    #w_0=np.random.rand(len(function_list))

    learn_lambda = params.learn_lambda
    T = len(training_examples)*params.max_iter
    if learn_lambda is None:
        ''' Set learning rate according to theorem from
            "Learning Mixtures of Submodular Shells" - Lin & Bilmes 2012 '''
        M=len(submod_shells)
        G=1.0
        ''' Assume:
         - w_i,f_i are all upperbounded by 1
         - loss l <= B for some B
         - ||g_t|| <= G, for some G
         then, we use learning rate nu=2/ (lambda*t)
        with '''
        learn_lambda=G/M * np.sqrt((2+(1+np.log(T)) / float(T)))

    # fudge factor as in http://xcorr.net/2014/01/23/adagrad-eliminating-learning-rates-in-stochastic-gradient-descent/
    fudge_factor = 1e-6 #for numerical stability
    logger.debug('Training using %d samples' % T)

    if len(function_list)<=1:
        logger.info('Just 1 function. No work for me here :-)\n')
        return 1

    ''' Start training '''
    logger.info('regularizer lambda: %.3f' % learn_lambda)

    it=0
    w=[]
    exitTraining=False

    g_t_old=np.zeros(len(function_list))
    if params.use_ada_grad:
        historical_grad=np.zeros(len(function_list))

    while exitTraining==False:
        
        start_time = time.time()

        if it==0:
            w.append(w_0);
        else:
            w.append(w[it-1])

        t=np.mod(it,len(training_examples))

        ''' Before each iteration: shuffle training examples '''
        if t==0:
            logger.debug('Suffle training examples')

            training_examples=training_examples
            random.shuffle(training_examples)

        if np.mod(it,50)==0:
            logger.info('Example %d of %d' % (it,T))
        logger.debug('%s (budget: %d)' % (training_examples[t],training_examples[t].budget))
        logger.debug(training_examples[t].y_gt)

        ''' Instanciate the shells to submodular functions '''
        function_list,names=utils.instaciateFunctions(submod_shells,training_examples[t])

        ''' Approximate loss augmented inference
        (this is equivalent to a greedy submodular optimization) '''
        if loss_supermodular:
            y_t,score = submodular_supermodular_maximization(training_examples[t],w[it],function_list,training_examples[t].budget,loss_fun)
        else:
            y_t,score,online_bound = leskovec_maximize(training_examples[t],w[it],function_list,training_examples[t].budget,loss_fun)
        assert(len(y_t)==training_examples[t].budget)


        ''' Subgradient '''
        score_t  = utils.evalSubFun(function_list,y_t,False)
        score_gt = utils.evalSubFun(function_list,list(training_examples[t].y_gt),True)

        if params.norm_objective_scores:
            score_t /= score_t.sum()
            score_gt /= score_gt.sum()


        if params.use_l1_projection:
            g_t = score_t - score_gt
        else: # Lin et al. use an l2 regularized formulation, and have thus a different gradient
            g_t = learn_lambda*w[it] + (score_t - score_gt)
        g_t = ((1 - params.momentum) * g_t + params.momentum * g_t_old)

        if params.use_ada_grad:
            # See [6,7]
            g_t_old=g_t
            historical_grad+= g_t**2
            g_t= g_t / (fudge_factor + np.sqrt(historical_grad))
        logger.debug('Gradient:')
        logger.debug(g_t)

        ''' Update weights '''
        if params.nu is None:
            nu = 2.0 / float(learn_lambda*(it+1))
        else:
            if hasattr(params.nu,'__call__'):
                nu=params.nu(it,T)
            else:
                nu=params.nu
        if np.mod(it,10)==0:
            logger.info('Nu: %.3f; Gradient: %s; Grad magnitue (abs): %.4f' % (nu, ', '.join(map(str,g_t)),nu*np.sum(np.abs(g_t))))

        w[it]=w[it]-nu*g_t

        ''' Project to feasible set'''
        if params.use_l1_projection:
            # We want to keep the euclidean distance between the initial and the projected weight minimal
            if params.use_ada_grad:
                # See [7]
                obj=lambda w_t: (np.multiply(w_t-w[it],w_t-w[it]) / (fudge_factor + historical_grad)).sum()
            else:
                obj=lambda w_t: np.inner(w_t-w[it],w_t-w[it])
            cons=[]
            bnds=[]
            # Define the bounds such that w[it]>0
            for idx in range(0,len(function_list)):
                bnds.append((0, None))

            # Define the l1-ball inequality
            cons.append({'type': 'ineq','fun' : lambda x: 1-np.abs(x).sum()})
            cons=tuple(cons)
            bnds=tuple(bnds)

            # Optimize for the best projection into the l-1 ball
            if it==0:
                res=scipy.optimize.minimize(obj,w_0,constraints=cons,bounds=bnds)#, options={'maxiter':10**3})
            else:
                res=scipy.optimize.minimize(obj,w[it-1],constraints=cons,bounds=bnds)#, options={'maxiter':10**3})
            if res.success:
                assert (res.x<-10**-5).any()==False
                w[it]=res.x

                # Note: We need to re-normalize the weights to sum to one, in order to give each SGD step the same weight
                if np.sum(w[it])>0:
                    w[it]=w[it]/np.sum(w[it])
            else:
                logger.warn('Iteration %d: l1: Failed to find constraint solution on w' % it)
                w[it][w[it]<0]=0
                if w[it].sum()>0:
                    w[it]=w[it]/w[it].sum()

        else: # projection of [1]
            ''' update the weights accoring to  [1] algorithm 1'''
            w[it][w[it]<0]=0
            if w[it].sum()>0:
                w[it]=w[it]/np.sum(np.abs(w[it]))
            #w[it][np.isnan(w[it])]=0

        if np.mod(it,10)==0:
            logger.info('w[it]:\t%s' % ', '.join(map(str,w[it])))
        it=it+1
        logger.debug(it)
        if it>=len(training_examples)*params.max_iter:
            logger.warn('Break without convergence\n')
            exitTraining=True
        logger.debug("--- %.1f seconds ---" % (time.time() - start_time))

    ''' Return the averaged weights (See [1] algorithm 1) '''
    w_res = np.asarray(w).mean(axis=0)
    w_res/=np.abs(w_res).sum()

    logger.info('----------------------------\n')
    logger.info('Weights:\n')
    for w_idx in range(len(w_res)):
        logger.info(' %20s: %2.3f%%' % (names[w_idx],round(10000*w_res[w_idx]) / 100))
    logger.info('----------------------------\n')

    return w_res,w
示例#3
0
def lazy_greedy_maximize(S,
                         w,
                         submod_fun,
                         budget,
                         loss_fun=None,
                         useCost=False,
                         randomize=True):
    '''
    Implements the submodular maximization algorithm of [4]

    :param S: data object containing information on needed in the objective functions
    :param w: weights of the objectives
    :param submod_fun: submodular functions
    :param budget: budget
    :param loss_fun: optional loss function (for learning)
    :param useCost: boolean. Take into account the costs per element or not
    :param randomize: randomize marginals brefore getting the maximum. This results in selecting a random element among the top scoring ones, rather then taking the one with the lowest index.
    :return: y, score: selected indices y and the score of the solution
    '''

    sel_indices = []
    type = 'UC'
    if useCost:
        type = 'CB'
    ''' Init arrays to keep track of marginal benefits '''
    marginal_benefits = np.ones(len(S.Y), np.float32) * np.Inf
    mb_indices = np.arange(len(S.Y))
    isUpToDate = np.zeros((len(S.Y), 1))

    costs = S.getCosts()

    currCost = 0.0
    currScore = 0.0
    i = 0

    if loss_fun is None:
        #FIXME: this is not actually a zero loss, but just a loss that is the same for all elements
        # This is a hack to ensure that, in case all weights w are zero, a non empty set is selected
        # i.e., just a random subset of size S.budget
        loss_fun = utils.zero_loss
    ''' Select as long as we are within budget and have elements to select '''
    while True:
        ''' Find the highest scoring element '''
        while (isUpToDate[mb_indices[0]] == 0):
            cand = list(sel_indices)
            cand.append(mb_indices[0])
            if useCost:
                t_marg = (
                    (np.dot(w, utils.evalSubFun(submod_fun, cand, False, w)) +
                     loss_fun(S, cand)) - currScore) / float(
                         costs[mb_indices[0]])
            else:
                t_marg = (
                    np.dot(w, utils.evalSubFun(submod_fun, cand, False, w)) +
                    loss_fun(S, cand) - currScore)

            if not skipAssertions:
                assert marginal_benefits[mb_indices[0]] - t_marg >= -10**-5, (
                    '%s: Non-submodular objective at element %d!: Now: %.3f; Before: %.3f'
                    % (type, mb_indices[0], t_marg,
                       marginal_benefits[mb_indices[0]]))
            marginal_benefits[mb_indices[0]] = t_marg
            isUpToDate[mb_indices[0]] = True

            if randomize:
                idx1 = np.random.permutation(len(marginal_benefits))
                idx2 = (-marginal_benefits[idx1]).argsort(axis=0)
                mb_indices = idx1[idx2]
            else:
                mb_indices = (-marginal_benefits).argsort(axis=0)

            if not skipAssertions:
                assert marginal_benefits[
                    -1] > -10**-5, 'Non monotonic objective'

        # Compute upper bound (see [4])
        if i == 0:
            best_sel_indices = np.where(
                costs[mb_indices].cumsum() <= budget)[0]
            minoux_bound = marginal_benefits[mb_indices][best_sel_indices].sum(
            )
        ''' Select the highest scoring element '''
        if marginal_benefits[mb_indices[0]] > 0.0:
            logger.debug('Select element %d (gain %.3f)' %
                         (mb_indices[0], marginal_benefits[mb_indices[0]]))
            sel_indices.append(mb_indices[0])

            if useCost:
                currScore = currScore + marginal_benefits[
                    mb_indices[0]] * float(costs[mb_indices[0]])
            else:
                currScore = currScore + marginal_benefits[mb_indices[0]]
            currCost = currCost + costs[mb_indices[0]]

            # Set the selected element to -1 (so that it is not becoming a candidate again)
            # Set all others to not up to date (so that the marignal gain will be recomputed)
            marginal_benefits[mb_indices[0]] = 0  #-np.inf
            isUpToDate[isUpToDate == 1] = 0
            isUpToDate[mb_indices[0]] = -1

            mb_indices = (-marginal_benefits).argsort()

        else:
            logger.debug(' If the best element is zero, we are done ')
            logger.debug(sel_indices)
            return sel_indices, currScore, minoux_bound
        ''' Check if we still have budget to select something '''
        for elIdx in range(0, len(S.Y)):
            if costs[elIdx] + currCost > budget:
                marginal_benefits[elIdx] = 0
                isUpToDate[elIdx] = 1

        if marginal_benefits.max() == 0:
            logger.debug('no elements left to select. Done')
            logger.debug(
                'Selected %d elements with a cost of %.1f (max: %.1f)' %
                (len(sel_indices), currCost, budget))
            logger.debug(sel_indices)
            return sel_indices, currScore, minoux_bound
        ''' Increase iteration number'''
        i += 1
示例#4
0
def lazy_greedy_maximize(S,w,submod_fun,budget,loss_fun=None,useCost=False,randomize=True):
    '''
    Implements the submodular maximization algorithm of [4]

    :param S: data object containing information on needed in the objective functions
    :param w: weights of the objectives
    :param submod_fun: submodular functions
    :param budget: budget
    :param loss_fun: optional loss function (for learning)
    :param useCost: boolean. Take into account the costs per element or not
    :param randomize: randomize marginals brefore getting the maximum. This results in selecting a random element among the top scoring ones, rather then taking the one with the lowest index.
    :return: y, score: selected indices y and the score of the solution
    '''

    sel_indices=[]
    type='UC'
    if useCost:
        type='CB'

    ''' Init arrays to keep track of marginal benefits '''
    marginal_benefits = np.ones(len(S.Y),np.float32)*np.Inf
    mb_indices = np.arange(len(S.Y))
    isUpToDate = np.zeros((len(S.Y),1))

    costs = S.getCosts()


    currCost  = 0.0
    currScore = 0.0
    i = 0

    if loss_fun is None:
        #FIXME: this is not actually a zero loss, but just a loss that is the same for all elements
        # This is a hack to ensure that, in case all weights w are zero, a non empty set is selected
        # i.e., just a random subset of size S.budget
        loss_fun=utils.zero_loss

    ''' Select as long as we are within budget and have elements to select '''
    while True:
        ''' Find the highest scoring element '''
        while (isUpToDate[mb_indices[0]]==0):
            cand=list(sel_indices)
            cand.append(mb_indices[0])
            if useCost:
                t_marg=((np.dot(w,utils.evalSubFun(submod_fun,cand,False,w)) + loss_fun(S,cand)) - currScore) / float(costs[mb_indices[0]])
            else:
                t_marg=(np.dot(w,utils.evalSubFun(submod_fun,cand,False,w)) + loss_fun(S,cand) - currScore)

            if not skipAssertions:
                assert marginal_benefits[mb_indices[0]]-t_marg >= np.max(-10**-5,-10**-8*t_marg), ('%s: Non-submodular objective at element %d!: Now: %.3f; Before: %.3f' % (type,mb_indices[0],t_marg,marginal_benefits[mb_indices[0]]))
            marginal_benefits[mb_indices[0]]=t_marg
            isUpToDate[mb_indices[0]]=True

            if randomize:
                idx1=np.random.permutation(len(marginal_benefits))
                idx2=(-marginal_benefits[idx1]).argsort(axis=0)
                mb_indices=idx1[idx2]
            else:
                mb_indices=(-marginal_benefits).argsort(axis=0)

            if not skipAssertions:
                assert marginal_benefits[-1]> -10**-5,'Non monotonic objective'

        # Compute online bound (see [4])
        if i==0:
            best_sel_indices=np.where(costs[mb_indices].cumsum()<=budget)[0]
            minoux_bound = marginal_benefits[mb_indices][best_sel_indices].sum()


        ''' Select the highest scoring element '''
        if marginal_benefits[mb_indices[0]] > 0.0:
            logger.debug('Select element %d (gain %.3f)' % (mb_indices[0],marginal_benefits[mb_indices[0]]))
            sel_indices.append(mb_indices[0])

            if useCost:
                currScore=currScore + marginal_benefits[mb_indices[0]] * float(costs[mb_indices[0]])
            else:
                currScore=currScore + marginal_benefits[mb_indices[0]]
            currCost=currCost+ costs[mb_indices[0]]

            # Set the selected element to -1 (so that it is not becoming a candidate again)
            # Set all others to not up to date (so that the marignal gain will be recomputed)
            marginal_benefits[mb_indices[0]] = 0#-np.inf
            isUpToDate[isUpToDate==1]=0
            isUpToDate[mb_indices[0]]=-1

            mb_indices=(-marginal_benefits).argsort()

        else:
            logger.debug(' If the best element is zero, we are done ')
            logger.debug(sel_indices)
            return sel_indices,currScore,minoux_bound

        ''' Check if we still have budget to select something '''
        for elIdx in range(0,len(S.Y)):
            if costs[elIdx]+currCost>budget:
                marginal_benefits[elIdx]=0
                isUpToDate[elIdx]=1

        if marginal_benefits.max()==0:
            logger.debug('no elements left to select. Done')
            logger.debug('Selected %d elements with a cost of %.1f (max: %.1f)' % (len(sel_indices),currCost,budget))
            logger.debug(sel_indices)
            return sel_indices,currScore,minoux_bound
        ''' Increase iteration number'''
        i+=1