def run_classifier(train_fv, train_label, val_fv, val_label, k, C=1.0, gamma='auto'): estimator = SVC(kernel=k, C=C, gamma=gamma) classifier = OneVsRestClassifier(estimator, n_jobs=-1) classifier.fit(train_fv, train_label) val_pd = classifier.predict(val_fv) val_f1 = npmetrics.label_f1_macro(val_label, val_pd) print("\n---val---", "k:", k, 'C:', C, 'gamma', gamma, 'f1:', val_f1) npmetrics.print_metrics(val_label, val_pd) return classifier, val_f1
def svm(fv='fv0'): train_items, val_items, test_items = load_fv(fv) print("train items", len(train_items)) print("val items", len(val_items)) print("test items", len(test_items)) train_gene, train_fv, train_label = zip(*train_items) val_gene, val_fv, val_label = zip(*val_items) test_gene, test_fv, test_label = zip(*test_items) print("-------run svm for bov--------", fv) scaler = StandardScaler() scaler.fit(train_fv) train_fv = np.stack(scaler.transform(train_fv)) val_fv = np.stack(scaler.transform(val_fv)) test_fv = np.stack(scaler.transform(test_fv)) train_label = np.stack(train_label) val_label = np.stack(val_label) test_label = np.stack(test_label) kernels = ['linear', 'rbf', 'poly', 'sigmoid'] class_weights = ['balanced', None] best_f1 = 0.0 best_classifier = None best_k = None best_b = None for k in kernels: for b in class_weights: estimator = SVC(kernel=k, class_weight=b) classifier = OneVsRestClassifier(estimator, n_jobs=-1) classifier.fit(train_fv, train_label) val_pd = classifier.predict(val_fv) val_f1 = npmetrics.label_f1_macro(val_label, val_pd) print("\n---svm for bov---", "k:", k, 'b:', b, 'f1:', val_f1) npmetrics.print_metrics(val_label, val_pd) if val_f1 > best_f1: best_f1 = val_f1 best_classifier = classifier best_k = k best_b = b test_pd = best_classifier.predict(test_fv) print("\n---svm for bov test result---", "k:", best_k, "b:", best_b) npmetrics.print_metrics(test_label, test_pd)