示例#1
0
def learn_scoal_wrapper(**argv):
    Z, W, rowAttr, colAttr, crossAttr, learner, params, \
    train_loss, test_loss, \
    num_cv, init_K, init_L, model_filename = read_options(**argv)
    
    Z = Z.tocsr()
    W = W.tocsr()
    param = params[0]
    I,J= sp.find(W)[:2]
    num_data = len(I)
    model = GeneralScoalModel()
    model.set_attributes(rowAttr, colAttr, crossAttr)
    #learn_scoal(model, Z, W, K, L, learner, param, train_loss, test_loss)
    model.save(model_filename)
示例#2
0
def model_selector(**argv):
    ''' select the best model from cv set and parameter options '''
    # read data from input dict
    Z, W, rowAttr, colAttr, crossAttr, learner, params, \
    train_loss, test_loss, \
    num_cv, init_K, init_L, model_filename = read_options(**argv)
    
    save_validation_loss = defaultdict(list)
    I,J= sp.find(W)[:2]
    num_data = len(I)
    
    for ix, (trainIdx, validationIdx) in enumerate(validSets(num_data, num_cv)):
        if __debug__:
            print '\nValidation set:',ix
        # for each cv split
        model = GeneralScoalModel()
        model.set_attributes(rowAttr, colAttr, crossAttr)
        train_I = I[trainIdx]
        train_J = J[trainIdx]
        validation_I = I[validationIdx]
        validation_J = J[validationIdx]
        
        Z_training = sp.coo_matrix((np.ravel(Z[(train_I, train_J)]), (train_I, train_J)), shape=Z.shape).tocsr()
        W_training = sp.coo_matrix((np.ravel(W[(train_I, train_J)]), (train_I, train_J)), shape=W.shape).tocsr()
        Z_validation = sp.coo_matrix((np.ravel(Z[(validation_I, validation_J)]), (validation_I, validation_J)), shape=Z.shape).tocsr()
        W_validation = sp.coo_matrix((np.ravel(W[(validation_I, validation_J)]), (validation_I, validation_J)), shape=W.shape).tocsr()

        # Do model selection
        for jx, param in enumerate(params): # for each alpha parameter
            if __debug__:
                print 'parameter:', param
            K = init_K
            L = init_L
            learn_scoal(model, Z, W_training, K, L, learner, param, train_loss, test_loss)
            test_Z = np.ravel(Z[(validation_I, validation_J)])
            validation_loss = model.test_loss(test_Z, model.predict(validation_I, validation_J) )
            save_validation_loss[(jx, K, L)].append(validation_loss)
            if __debug__:
                print "Starting validation loss: %f" % (validation_loss,)
            for _ in range(maxIterations):
                row_test_model = model.copy()
                row_split_validation_loss = test_row_split(row_test_model, Z_training, W_training, \
                                                          Z_validation, W_validation, K, \
                                                          L, learner, param, train_loss, test_loss)
                save_validation_loss[(jx, K+1, L)].append(row_split_validation_loss)
                
                col_test_model = model.copy()
                col_split_validation_loss = test_col_split(col_test_model, Z_training, W_training, \
                                                          Z_validation, W_validation, K, \
                                                          L, learner, param, train_loss, test_loss)
                save_validation_loss[(jx, K, L+1)].append(col_split_validation_loss)
                
                if __debug__:
                    print "Row split loss: %f" % (row_split_validation_loss,)
                    print "Col split loss: %f" % (col_split_validation_loss,)
                
                if row_split_validation_loss <= col_split_validation_loss and row_split_validation_loss < validation_loss:
                    K += 1
                    model = row_test_model
                    validation_loss = row_split_validation_loss
                elif col_split_validation_loss < row_split_validation_loss and col_split_validation_loss < validation_loss:
                    L += 1
                    model = col_test_model
                    validation_loss = col_split_validation_loss
                else:
                    break
                if __debug__:
                    print "k: %d, l: %d" % (K, L)
                    print "Current validation loss: %f" % validation_loss
                
            if __debug__:
                print "Final k,l: %d,%d" % (K, L)
                print "Final validation loss: %f" % validation_loss
    
    save_validation_loss = dict(save_validation_loss)
    if __debug__:
        print "\nValidation losses"
        for key, vals in save_validation_loss.iteritems():
            print key, vals
        print "\n"
        
    ''' Compute best parameter: Sort by length, then by -mean() i.e. longest then min(mean(error)) '''
    sorted_loss = sorted( [ [ key, -np.array(val).mean(), len(val) ] for key, val in save_validation_loss.iteritems()] , key=itemgetter(2,1) )
    best_loss = sorted_loss[-1]
    best_loss[1] = -best_loss[1]
    
    print "\nVALIDATION COMPLETE"
    print "Selected parameter", params[best_loss[0][0]]
    print "Selected K:%d, L:%d" % (best_loss[0][1], best_loss[0][2]) 
    print "Mean validation error %f, with %d observations" % (best_loss[1], best_loss[2])
    return best_loss, save_validation_loss