def test_linear_classifier_weights_on(self, dataset): # Test get_linear_svm_weights classifier = self.LEARNER(dataset) weights = get_linear_svm_weights(classifier, sum=True) weights = get_linear_svm_weights(classifier, sum=False) n_class = len(classifier.class_var.values) def class_pairs(n_class): for i in range(n_class - 1): for j in range(i + 1, n_class): yield i, j l_map = classifier._get_libsvm_labels_map() for inst in dataset[:20]: dec_values = classifier.get_decision_values(inst) for dec_v, weight, rho, pair in zip(dec_values, weights, classifier.rho, class_pairs(n_class)): t_inst = Orange.data.Instance(classifier.domain, inst) dec_v1 = example_weighted_sum(t_inst, weight) - rho self.assertAlmostEqual(dec_v, dec_v1, 4)
def test_linear_regression_weights_on(self, dataset): predictor = self.LEARNER(dataset) weights = get_linear_svm_weights(predictor) for inst in dataset[:20]: t_inst = Orange.data.Instance(predictor.domain, inst) prediction = predictor(inst) w_sum = example_weighted_sum(t_inst, weights) self.assertAlmostEqual(float(prediction), w_sum - predictor.rho[0], places=4)
def features_ranking_svm(self, name="ranking", random_state=64): """Get ranking of features by REF random_state: Set seed in svm.SVMLearnerEasy, but it does not work. """ ranking_list = [] f_list = [col for col in self.features] logging_per_run = list() for i in range(self.n_features): train_X = self.data_X[f_list] train_y = self.data_y if len(train_X.columns) == 0: break tuned_learner = svm.SVMLearnerEasy(folds=5, kernel_type=svm.kernels.Linear, svm_type=svm.SVMLearner.C_SVC, random_state=random_state) org_data_table = df2tb(train_X, train_y) weights = svm.get_linear_svm_weights(tuned_learner(org_data_table), sum=False) internal_scores = defaultdict(float) for w in weights: magnitude = np.sqrt( sum([w_attr**2 for attr, w_attr in w.items()])) for attr, w_attr in w.items(): internal_scores["%s" % attr] += (w_attr / magnitude)**2 features_score = [] for i in internal_scores: attr_name = i.split("Orange.feature.Continuous 'N_")[1].split( "'")[0] features_score.append((attr_name, internal_scores[i])) features_score.sort(lambda a, b: cmp(a[1], b[1])) # Results for low-score feature logging_per_run.append(features_score) ranking_list.append(features_score[0][0]) f_list.remove(features_score[0][0]) self.logging.append((name, logging_per_run)) return ranking_list
from Orange import data from Orange.classification import svm brown = data.Table("brown-selected") classifier = svm.SVMLearner(brown, kernel_type=svm.kernels.Linear, normalization=False) weights = svm.get_linear_svm_weights(classifier) print sorted("%.10f" % w for w in weights.values()) import pylab as plt plt.hist(weights.values())