def QBoost(X_train, y_train, X_test, y_test): NUM_READS = 1000 DW_PARAMS = {'num_reads': NUM_READS, 'auto_scale': True, 'num_spin_reversal_transforms': 10, 'postprocess': 'optimization', } from dwave.system.samplers import DWaveSampler from dwave.system.composites import EmbeddingComposite dwave_sampler = DWaveSampler(solver={'qpu': True}) emb_sampler = EmbeddingComposite(dwave_sampler) from qboost import WeakClassifiers, QBoostClassifier clf4 = QBoostClassifier(n_estimators=30, max_depth=2) clf4.fit(X_train, y_train, emb_sampler, lmd=1.0, **DW_PARAMS) y_train4 = clf4.predict(X_train) y_test4 = clf4.predict(X_test) from sklearn.metrics import accuracy_score print('Accuracy for training data: \t', (accuracy_score(y_train, y_train4))) print('Accuracy for test data: \t', (accuracy_score(y_test, y_test4))) return clf4
def train_model(X_train, y_train, X_test, y_test, lmd): """ Train qboost model :param X_train: train input :param y_train: train label :param X_test: test input :param y_test: test label :param lmd: lmbda to control regularization term :return: """ NUM_READS = 3000 NUM_WEAK_CLASSIFIERS = 35 # lmd = 0.5 TREE_DEPTH = 3 # define sampler dwave_sampler = DWaveSampler(solver={'qpu': True}) # sa_sampler = micro.dimod.SimulatedAnnealingSampler() emb_sampler = EmbeddingComposite(dwave_sampler) N_train = len(X_train) N_test = len(X_test) print("\n======================================") print("Train#: %d, Test: %d" % (N_train, N_test)) print('Num weak classifiers:', NUM_WEAK_CLASSIFIERS) print('Tree depth:', TREE_DEPTH) # input: dataset X and labels y (in {+1, -1} # Preprocessing data # imputer = SimpleImputer() scaler = preprocessing.StandardScaler() # standardize features normalizer = preprocessing.Normalizer() # normalize samples # X = imputer.fit_transform(X) X_train = scaler.fit_transform(X_train) X_train = normalizer.fit_transform(X_train) # X_test = imputer.fit_transform(X_test) X_test = scaler.fit_transform(X_test) X_test = normalizer.fit_transform(X_test) ## Adaboost print('\nAdaboost') clf = AdaBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS) # scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy') print('fitting...') clf.fit(X_train, y_train) hypotheses_ada = clf.estimators_ # clf.estimator_weights_ = np.random.uniform(0,1,size=NUM_WEAK_CLASSIFIERS) print('testing...') y_train_pred = clf.predict(X_train) y_test_pred = clf.predict(X_test) print('accu (train): %5.2f' % (metric(y_train, y_train_pred))) print('accu (test): %5.2f' % (metric(y_test, y_test_pred))) # Ensembles of Decision Tree print('\nDecision tree') clf2 = WeakClassifiers(n_estimators=NUM_WEAK_CLASSIFIERS, max_depth=TREE_DEPTH) clf2.fit(X_train, y_train) y_train_pred2 = clf2.predict(X_train) y_test_pred2 = clf2.predict(X_test) print(clf2.estimator_weights) print('accu (train): %5.2f' % (metric(y_train, y_train_pred2))) print('accu (test): %5.2f' % (metric(y_test, y_test_pred2))) # Ensembles of Decision Tree print('\nQBoost') DW_PARAMS = { 'num_reads': NUM_READS, 'auto_scale': True, # "answer_mode": "histogram", 'num_spin_reversal_transforms': 10, # 'annealing_time': 10, 'postprocess': 'optimization', } clf3 = QBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS, max_depth=TREE_DEPTH) clf3.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS) y_train_dw = clf3.predict(X_train) y_test_dw = clf3.predict(X_test) print(clf3.estimator_weights) print('accu (train): %5.2f' % (metric(y_train, y_train_dw))) print('accu (test): %5.2f' % (metric(y_test, y_test_dw))) # Ensembles of Decision Tree print('\nQBoostPlus') clf4 = QboostPlus([clf, clf2, clf3]) clf4.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS) y_train4 = clf4.predict(X_train) y_test4 = clf4.predict(X_test) print(clf4.estimator_weights) print('accu (train): %5.2f' % (metric(y_train, y_train4))) print('accu (test): %5.2f' % (metric(y_test, y_test4))) print("=============================================") print("Method \t Adaboost \t DecisionTree \t Qboost \t QboostIt") print("Train\t %5.2f \t\t %5.2f \t\t\t %5.2f \t\t %5.2f" % (metric(y_train, y_train_pred), metric(y_train, y_train_pred2), metric(y_train, y_train_dw), metric(y_train, y_train4))) print("Test\t %5.2f \t\t %5.2f \t\t\t %5.2f \t\t %5.2f" % (metric(y_test, y_test_pred), metric(y_test, y_test_pred2), metric(y_test, y_test_dw), metric(y_test, y_test4))) print("=============================================") # plt.subplot(211) # plt.bar(range(len(y_test)), y_test) # plt.subplot(212) # plt.bar(range(len(y_test)), y_test_dw) # plt.show() return
X, y = make_blob_data( n_samples=n_samples, n_features=n_features, n_informative=n_informative) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.4) if args.cross_validation: # See Boyda et al. (2017), Eq. (17) regarding normalization normalized_lambdas = np.linspace(0.0, 0.5, 10) lambdas = normalized_lambdas / n_features print('Performing cross-validation using {} values of lambda, this may take several minutes...'.format(len(lambdas))) qboost, lam = qboost_lambda_sweep( X_train, y_train, lambdas, verbose=args.verbose) else: qboost = QBoostClassifier(X_train, y_train, args.lam) if args.verbose: qboost.report_baseline(X_test, y_test) print('Informative features:', list(range(n_informative))) print('Selected features:', qboost.get_selected_features()) print('Score on test set: {:.3f}'.format(qboost.score(X_test, y_test))) elif args.dataset == 'digits': if args.digit1 == args.digit2: raise ValueError("must use two different digits") X, y = get_handwritten_digits_data(args.digit1, args.digit2) n_features = np.size(X, 1)
def setUpClass(cls): cls.X, cls.y = make_blob_data() cls.clf = QBoostClassifier(cls.X, cls.y, 0.0)
def train_models(X_train, y_train, X_test, y_test, lmd, verbose=False): NUM_READS = 3000 NUM_WEAK_CLASSIFIERS = 35 # lmd = 0.5 TREE_DEPTH = 3 # define sampler dwave_sampler = DWaveSampler() emb_sampler = EmbeddingComposite(dwave_sampler) N_train = len(X_train) N_test = len(X_test) print('Size of training set:', N_train) print('Size of test set: ', N_test) print('Number of weak classifiers:', NUM_WEAK_CLASSIFIERS) print('Tree depth:', TREE_DEPTH) # input: dataset X and labels y (in {+1, -1} # Preprocessing data scaler = preprocessing.StandardScaler() # standardize features normalizer = preprocessing.Normalizer() # normalize samples X_train = scaler.fit_transform(X_train) X_train = normalizer.fit_transform(X_train) X_test = scaler.fit_transform(X_test) X_test = normalizer.fit_transform(X_test) # =============================================== print('\nAdaboost:') clf = AdaBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS) clf.fit(X_train, y_train) hypotheses_ada = clf.estimators_ y_train_pred = clf.predict(X_train) y_test_pred = clf.predict(X_test) print_accuracy(y_train, y_train_pred, y_test, y_test_pred) # =============================================== print('\nDecision tree:') clf2 = WeakClassifiers(n_estimators=NUM_WEAK_CLASSIFIERS, max_depth=TREE_DEPTH) clf2.fit(X_train, y_train) y_train_pred2 = clf2.predict(X_train) y_test_pred2 = clf2.predict(X_test) if verbose: print('weights:\n', clf2.estimator_weights) print_accuracy(y_train, y_train_pred2, y_test, y_test_pred2) # =============================================== print('\nQBoost:') DW_PARAMS = { 'num_reads': NUM_READS, 'auto_scale': True, 'num_spin_reversal_transforms': 10, } clf3 = QBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS, max_depth=TREE_DEPTH) clf3.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS) y_train_dw = clf3.predict(X_train) y_test_dw = clf3.predict(X_test) if verbose: print('weights\n', clf3.estimator_weights) print_accuracy(y_train, y_train_dw, y_test, y_test_dw)