def main(C=1): dataset = NYUSegmentation() # load training data data_train = load_nyu('train', n_sp=500, sp='rgbd') data_train = add_edges(data_train) data_train = add_edge_features(dataset, data_train, depth_diff=True, normal_angles=True) data_train = make_hierarchical_data(dataset, data_train) data_train = discard_void(dataset, data_train) n_states = 4. print("number of samples: %s" % len(data_train.X)) class_weights = 1. / np.bincount(np.hstack(data_train.Y)) class_weights *= n_states / np.sum(class_weights) #class_weights = np.ones(n_states) print(class_weights) #model = crfs.GraphCRF(n_states=n_states, #n_features=data_train.X[0][0].shape[1], #inference_method='qpbo', class_weight=class_weights) model = crfs.EdgeFeatureLatentNodeCRF(n_hidden_states=5, n_edge_features=5, inference_method='qpbo', class_weight=class_weights, symmetric_edge_features=[0, 1], latent_node_features=False, n_labels=4) experiment_name = "rgbd_normal_angles_fold1_strong_reweight%f" % C base_ssvm = learners.OneSlackSSVM(model, verbose=2, C=C, max_iter=100000, n_jobs=1, tol=0.001, show_loss_every=100, inference_cache=50, cache_tol='auto', logger=SaveLogger(experiment_name + ".pickle", save_every=100), inactive_threshold=1e-5, break_on_bad=False, inactive_window=50, switch_to=("ad3", { 'branch_and_bound': True })) latent_logger = SaveLogger("lssvm_" + experiment_name + "_%d.pickle", save_every=1) ssvm = learners.LatentSSVM(base_ssvm, logger=latent_logger, latent_iter=3) ssvm.fit(data_train.X, data_train.Y) print("fit finished!") return
def define_learners(self): if self.learners == 'OneSlackSSVM': import pystruct.learners as ssvm self.clf = ssvm.OneSlackSSVM( model=self.crf, # C=100, # inference_cache=100, # tol=.1, verbose=self.learners_parameters['verbose'], max_iter=self.learners_parameters['max_iter'], n_jobs=self.learners_parameters['n_jobs'] ) if self.learners == 'SubgradientSSVM': import pystruct.learners as ssvm self.clf = ssvm.OneSlackSSVM( model=self.crf, # C=100, # inference_cache=100, # tol=.1, verbose=self.learners_parameters['verbose'], max_iter=self.learners_parameters['max_iter'], n_jobs=self.learners_parameters['n_jobs'], show_loss_every = self.learners_parameters['show_loss_every'] ) if self.learners == 'StructuredPerceptron': import pystruct.learners as structured_perceptron self.clf = structured_perceptron.StructuredPerceptron( model=self.crf, # C=100, # inference_cache=100, # tol=.1, verbose=self.learners_parameters['verbose'], max_iter=self.learners_parameters['max_iter'], n_jobs=self.learners_parameters['n_jobs'], # show_loss_every=self.learners_parameters['show_loss_every'] ) return
The center state is not encoded in the input, so that the task can not be solved without pairwise interactions. """ import numpy as np import matplotlib.pyplot as plt from pystruct.models import GridCRF import pystruct.learners as ssvm from pystruct.datasets import generate_crosses_explicit from pystruct.utils import expand_sym X, Y = generate_crosses_explicit(n_samples=50, noise=10) crf = GridCRF(neighborhood=4) clf = ssvm.OneSlackSSVM(model=crf, C=100, n_jobs=-1, inference_cache=100, tol=.1) clf.fit(X, Y) Y_pred = np.array(clf.predict(X)) print("overall accuracy (training set): %f" % clf.score(X, Y)) # plot one example x, y, y_pred = X[0], Y[0], Y_pred[0] y_pred = y_pred.reshape(x.shape[:2]) fig, plots = plt.subplots(1, 4, figsize=(12, 4)) plots[0].matshow(y) plots[0].set_title("ground truth") plots[1].matshow(np.argmax(x, axis=-1)) plots[1].set_title("input") plots[2].matshow(y_pred) plots[2].set_title("prediction")
def main(C=1, test=False): ds = PascalSegmentation() # load training data edge_type = "pairwise" if test: which = "train" else: which = "kTrain" data_train = load_pascal(which=which, sp_type="cpmc") data_train = add_edges(data_train, edge_type) data_train = add_edge_features(ds, data_train) data_train = discard_void(ds, data_train, ds.void_label) print("number of samples: %s" % len(data_train.X)) class_weights = 1. / np.bincount(np.hstack(data_train.Y)) class_weights *= 21. / np.sum(class_weights) print(class_weights) #model = crfs.GraphCRF(n_states=n_states, #n_features=data_train.X[0][0].shape[1], #inference_method='qpbo', class_weight=class_weights) model = crfs.EdgeFeatureGraphCRF(inference_method='qpbo', class_weight=class_weights, symmetric_edge_features=[0, 1], antisymmetric_edge_features=[2]) experiment_name = "cpmc_edge_features_trainval_new_%f" % C #warm_start = True warm_start = False ssvm = learners.OneSlackSSVM(model, verbose=2, C=C, max_iter=100000, n_jobs=-1, tol=0.0001, show_loss_every=50, inference_cache=50, cache_tol='auto', logger=SaveLogger(experiment_name + ".pickle", save_every=100), inactive_threshold=1e-5, break_on_bad=False, inactive_window=50, switch_to=None) #ssvm = learners.SubgradientSSVM( #model, verbose=3, C=C, max_iter=10000, n_jobs=-1, show_loss_every=10, #logger=SaveLogger(experiment_name + ".pickle", save_every=10), #momentum=0, learning_rate=0.1, decay_exponent=1, decay_t0=100) if warm_start: ssvm = SaveLogger(experiment_name + ".pickle").load() ssvm.logger = SaveLogger(file_name=experiment_name + "_refit.pickle", save_every=10) #ssvm.learning_rate = 0.000001 ssvm.model.inference_method = 'ad3bb' #ssvm.n_jobs = 1 ssvm.fit(data_train.X, data_train.Y, warm_start=warm_start) return print("fit finished!") if test: data_val = load_pascal('val') else: data_val = load_pascal('kVal') data_val = add_edges(data_val, edge_type) data_val = add_edge_features(ds, data_val, more_colors=True) eval_on_sp(ds, data_val, ssvm.predict(data_val.X), print_results=True)
This example illustrates the role of approximate inference and caching in exact learning of a 1-slack SSVM. Please see plot_objetive_curve.py for an interpretation of the curves. We start learning by using an undergenerating inference method, QPBO-based alpha expansion. One the algorithm can not find a violated constraint any more, we switch to a less efficient but exact inference procedure, branch-and-bound based on AD3. The switch to AD3 can be seen in the graph after the (approximate) primal objective and the cutting plane lower bound touch. (zoom in) After the switch to exact inference, the red circles show the true primal objective. """ from pystruct.models import DirectionalGridCRF import pystruct.learners as ssvm from pystruct.datasets import generate_blocks_multinomial from pystruct.plot_learning import plot_learning X, Y = generate_blocks_multinomial(noise=2, n_samples=20, seed=1) crf = DirectionalGridCRF(inference_method="qpbo", neighborhood=4) clf = ssvm.OneSlackSSVM(model=crf, n_jobs=-1, inference_cache=100, show_loss_every=10, switch_to=("ad3", {'branch_and_bound': True})) clf.fit(X, Y) plot_learning(clf, time=False)
print Y.shape # raise ValueError() from pystruct.models import GraphCRF, GridCRF import pystruct.learners as ssvm # X, Y = generate_crosses_explicit(n_samples=50, noise=10) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.90) print X_train.shape model = GridCRF(n_states=4, neighborhood=4, inference_method="max-product") clf = ssvm.OneSlackSSVM(model=model, C=100, inference_cache=100, tol=.1, max_iter=10, show_loss_every=2) clf.fit(X_train, y_train) Y_pred = np.array(clf.predict(X_train)) print("overall accuracy (training set): %f" % clf.score(X_train, Y_train)) # plot one example # x, y, y_pred = X[0], Y[0], Y_pred[0] # y_pred = y_pred.reshape(x.shape[:2]) # fig, plots = plt.subplots(1, 4, figsize=(12, 4)) # plots[0].matshow(y) # plots[0].set_title("ground truth") # plots[1].matshow(np.argmax(x, axis=-1)) # plots[1].set_title("input")
def main(C=1, test=False): # load training data #independent = True independent = False data_train = load_data(which="piecewise") data_train = add_edges(data_train, independent=independent, fully_connected=True) data_train = add_kraehenbuehl_features(data_train, which="train_30px") data_train = add_kraehenbuehl_features(data_train, which="train") #data_train = load_data_global_probs() if not independent: data_train = add_edge_features(data_train) data_train = discard_void(data_train, 21) if test: data_val = load_data("val", which="piecewise_train") data_val = add_edges(data_val, independent=independent) data_val = add_kraehenbuehl_features(data_val, which="train_30px") data_val = add_kraehenbuehl_features(data_val, which="train") data_val = add_edge_features(data_val) data_val = discard_void(data_val, 21) data_train = concatenate_datasets(data_train, data_val) #X_.extend(data_val.X) #Y_.extend(data_val.Y) n_states = 21 print("number of samples: %s" % len(data_train.X)) class_weights = 1. / np.bincount(np.hstack(data_train.Y)) #class_weights[21] = 0 class_weights *= 21. / np.sum(class_weights) #class_weights = np.ones(n_states) print(class_weights) #model = crfs.GraphCRF(n_states=n_states, #n_features=data_train.X[0][0].shape[1], #inference_method='qpbo', class_weight=class_weights) model = crfs.EdgeFeatureGraphCRF(n_states=n_states, n_features=data_train.X[0][0].shape[1], inference_method='qpbo', class_weight=class_weights, n_edge_features=3, symmetric_edge_features=[0, 1], antisymmetric_edge_features=[2]) experiment_name = "fully_connected_%f" % C #warm_start = True warm_start = False ssvm = learners.OneSlackSSVM(model, verbose=2, C=C, max_iter=100000, n_jobs=-1, tol=0.0001, show_loss_every=50, inference_cache=50, cache_tol='auto', logger=SaveLogger(experiment_name + ".pickle", save_every=100), inactive_threshold=1e-5, break_on_bad=False, inactive_window=50, switch_to_ad3=False) #ssvm = learners.SubgradientSSVM( #model, verbose=3, C=C, max_iter=10000, n_jobs=-1, show_loss_every=10, #logger=SaveLogger(experiment_name + ".pickle", save_every=10), #momentum=0, learning_rate=0.001, decay_exponent=1) if warm_start: ssvm = SaveLogger(experiment_name + ".pickle").load() ssvm.logger = SaveLogger(file_name=experiment_name + "_refit.pickle", save_every=10) ssvm.learning_rate = 0.000001 #ssvm.model.inference_method = 'ad3' #ssvm.n_jobs = 1 ssvm.fit(data_train.X, data_train.Y, warm_start=warm_start) print("fit finished!") return
#Save Data print "Loading Data...." (X_train, Y_train)=cPickle.load(open(folder_out+"crf_general_data_200.pkl","r")) print "Initializing CRF" nr_states= 12; class_counts = np.bincount(np.hstack(Y_train)) class_frequency = 1./ class_counts; class_weights = class_frequency*(nr_states/np.sum(class_frequency)) C = 0.01 experiment_name = "edge_features_one_slack_trainval_%f" % C model = crfs.GraphCRF( n_states = nr_states, inference_method='max-product', class_weight=class_weights) #symmetric_edge_features=[0]) ssvm = learners.OneSlackSSVM( model, verbose=2, C=C, max_iter=100000, n_jobs=-1, tol=0.0001, show_loss_every=5, logger=SaveLogger(experiment_name + ".pickle", save_every=100), inactive_threshold=1e-3, inactive_window=10) #Fit CRF ssvm.fit(X_train, Y_train) cPickle.dump(ssvm, open(folder_out+"trained_general_crf.pkl", "w")) #Use Potts Model -> Manually #Modify for edge_function
In contrast to the caching proposed in [1], we do not produce constraints from the cache as long as possible. A heuristic is used to see whether the cached constraint is strong enough. Here training is stopped when the green curve goes below the blue curve. This means no strong enough constraint could be found. The fact that the primal objective can go below the cutting plane objective is a result of approximate inference. The real most violating constraint could not be found, leading to underestimating the primal objective. See plot_exact_learning.py for a way to deal with this. """ from pystruct.models import DirectionalGridCRF import pystruct.learners as ssvm from pystruct.datasets import generate_blocks_multinomial from pystruct.plot_learning import plot_learning X, Y = generate_blocks_multinomial(noise=2, n_samples=20, seed=1) crf = DirectionalGridCRF(inference_method="qpbo", neighborhood=4) clf = ssvm.OneSlackSSVM(model=crf, C=1, n_jobs=-1, inference_cache=100, tol=.1, show_loss_every=10) clf.fit(X, Y) plot_learning(clf, time=False)
def validate(self): """ Tweaks C for the svc. self.validation_set is used for validating """ validation_features = \ bookfunctions.get_features_from_pages_data(self.validation_set, self.number_of_blocks, self.overlap, self.svm_path) if self.use_page_classifier: # FIXME: number of blocks is fixed to what the page classifier has # learned on in my test case, for now. page_validation_features = bookfunctions.get_all_features(self.validation_set, \ (5,5)) s = page_validation_features.shape # Reshape all features to 1 feature vector page_validation_features.shape = (s[0], s[1] * s[2] * s[3]) validation_labels = bookfunctions.get_all_labels(self.validation_set, \ self.number_of_blocks, overlap=self.overlap) print """validation set features size after concatenate %s. validation labels size: %s""" % (str(np.shape(validation_features)), \ str(np.shape(validation_labels))) best_f = 0 # Count the number of class labels in order to set the class weights class_weights = 1. / np.bincount(self.train_labels.flatten()) # Normalize class weights in order to have a scalable tolerance # parameter class_weights *= float(np.shape(self.train_features)[3]) / np.sum(class_weights) print "class weights: %s" % str(class_weights) self.crf = WeightedGridCRF(neighborhood=4, class_weight=class_weights) for i in range(1, 5): c = 10**i self.logger = SaveLogger(get_log_path('model', c, self.use_svm, \ self.overlap, self.use_page_classifier), save_every=15) print "validating with c = " + str(c) temp_classifier = ssvm.OneSlackSSVM(model=self.crf, C=c, n_jobs=-1, verbose=2, logger=self.logger, tol=.01) # Fit the classifier: temp_classifier.fit(self.train_features, self.train_labels) # Write the ssvm parameters! with open(get_log_path('param', c, self.use_svm, self.overlap, self.use_page_classifier), 'w') as f: f.write(str(temp_classifier.get_params())) print "validation features shape: %s" + str(np.shape(validation_features)) validation_predicted_labels = temp_classifier.predict(validation_features) validation_predicted_labels = np.array(validation_predicted_labels) if self.use_page_classifier: # Get the page predictions, which have pretttyy high accuracy validation_predicted_pages = self.page_classifier.predict( \ page_validation_features) for i, page in enumerate(validation_predicted_pages): if page != 0: # Replace any page that has no images according to the # page classifier, with a page that is fully classified # as 1. validation_predicted_labels[i] = \ np.ones((validation_predicted_labels.shape[1], validation_predicted_labels.shape[2])) print "C = %d" % (c) prfs = precision_recall_fscore_support(validation_labels.flatten(), \ validation_predicted_labels.flatten()) print """ Precision: Image: %f Text: %f Recall: Image: %f Text: %f Fscore: Image: %f Text: %f Support: Image: %f Text: %f """ % tuple(np.ndarray.flatten(np.array(prfs))) f = prfs[2][0] if f > best_f: best_f = f self.classifier = temp_classifier print "F-score for best c: %s" % str(best_f) return best_f
def svm_on_segments(C=.1, learning_rate=.001, subgradient=True): # load and prepare data lateral = True latent = True test = False #data_train = load_data(which="piecewise") #data_train = add_edges(data_train, independent=False) #data_train = add_kraehenbuehl_features(data_train, which="train_30px") #data_train = add_kraehenbuehl_features(data_train, which="train") #if lateral: #data_train = add_edge_features(data_train) data_train = load_data_global_probs(latent=latent) X_org_ = data_train.X #data_train = make_hierarchical_data(data_train, lateral=lateral, #latent=latent, latent_lateral=True) data_train = discard_void(data_train, 21, latent_features=True) X_, Y_ = data_train.X, data_train.Y # remove edges if not lateral: X_org_ = [(x[0], np.zeros((0, 2), dtype=np.int)) for x in X_org_] if test: data_val = load_data('val', which="piecewise") data_val = add_edges(data_val, independent=False) data_val = add_kraehenbuehl_features(data_val) data_val = make_hierarchical_data(data_val, lateral=lateral, latent=latent) data_val = discard_void(data_val, 21) X_.extend(data_val.X) Y_.extend(data_val.Y) n_states = 21 class_weights = 1. / np.bincount(np.hstack(Y_)) class_weights *= 21. / np.sum(class_weights) experiment_name = ("latent5_features_C%f_top_node" % C) logger = SaveLogger(experiment_name + ".pickle", save_every=10) if latent: model = LatentNodeCRF(n_labels=n_states, n_features=data_train.X[0][0].shape[1], n_hidden_states=5, inference_method='qpbo' if lateral else 'dai', class_weight=class_weights, latent_node_features=True) if subgradient: ssvm = learners.LatentSubgradientSSVM(model, C=C, verbose=1, show_loss_every=10, logger=logger, n_jobs=-1, learning_rate=learning_rate, decay_exponent=1, momentum=0., max_iter=100000) else: latent_logger = SaveLogger("lssvm_" + experiment_name + "_%d.pickle", save_every=1) base_ssvm = learners.OneSlackSSVM(model, verbose=2, C=C, max_iter=100000, n_jobs=-1, tol=0.001, show_loss_every=200, inference_cache=50, logger=logger, cache_tol='auto', inactive_threshold=1e-5, break_on_bad=False, switch_to_ad3=True) ssvm = learners.LatentSSVM(base_ssvm, logger=latent_logger) warm_start = False if warm_start: ssvm = logger.load() ssvm.logger = SaveLogger(experiment_name + "_retrain.pickle", save_every=10) ssvm.max_iter = 100000 ssvm.learning_rate = 0.00001 ssvm.momentum = 0 else: #model = GraphCRF(n_states=n_states, #n_features=data_train.X[0][0].shape[1], #inference_method='qpbo' if lateral else 'dai', #class_weight=class_weights) model = EdgeFeatureGraphCRF( n_states=n_states, n_features=data_train.X[0][0].shape[1], inference_method='qpbo' if lateral else 'dai', class_weight=class_weights, n_edge_features=4, symmetric_edge_features=[0, 1], antisymmetric_edge_features=[2]) ssvm = learners.OneSlackSSVM(model, verbose=2, C=C, max_iter=100000, n_jobs=-1, tol=0.0001, show_loss_every=200, inference_cache=50, logger=logger, cache_tol='auto', inactive_threshold=1e-5, break_on_bad=False) #ssvm = logger.load() X_, Y_ = shuffle(X_, Y_) #ssvm.fit(data_train.X, data_train.Y) #ssvm.fit(X_, Y_, warm_start=warm_start) ssvm.fit(X_, Y_) print("fit finished!")