def task(cls, cost, opts, features, targets): suffix = '_'.join( opts.targets_data_file.split('/')[-1].split('.')[0].split('_')[-2:]) out_file = svm_helper.get_low_shot_output_file(opts, cls, cost, suffix) if not os.path.exists(out_file): clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=0, penalty='l2', loss='squared_hinge', tol=0.0001, dual=True, max_iter=2000, ) train_feats, train_cls_labels = svm_helper.get_cls_feats_labels( cls, features, targets, opts.dataset) clf.fit(train_feats, train_cls_labels) # cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True) # cls_labels[np.where(cls_labels == 0)] = -1 # clf.fit(features, cls_labels) with open(out_file, 'wb') as fwrite: pickle.dump(clf, fwrite) return 0
def train_svm_low_shot(opts): assert os.path.exists(opts.data_file), "Data file not found. Abort!" if not os.path.exists(opts.output_path): os.makedirs(opts.output_path) features, targets = svm_helper.load_input_data(opts.data_file, opts.targets_data_file) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) # parse the cost values for training the SVM on costs_list = svm_helper.parse_cost_list(opts.costs_list) logger.info('Training SVM for costs: {}'.format(costs_list)) # classes for which SVM testing should be done num_classes, cls_list = svm_helper.get_low_shot_svm_classes( targets, opts.dataset) for cls in cls_list: for cost_idx in range(len(costs_list)): cost = costs_list[cost_idx] suffix = '_'.join( opts.targets_data_file.split('/')[-1].split('.')[0].split('_') [-2:]) out_file = svm_helper.get_low_shot_output_file( opts, cls, cost, suffix) if os.path.exists(out_file): logger.info('SVM model exists: {}'.format(out_file)) else: logger.info('SVM model not found: {}'.format(out_file)) logger.info('Training model with the cost: {}'.format(cost)) clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=1, penalty='l2', loss='squared_hinge', tol=0.0001, dual=True, max_iter=2000, ) train_feats, train_cls_labels = svm_helper.get_cls_feats_labels( cls, features, targets, opts.dataset) num_positives = len(np.where(train_cls_labels == 1)[0]) num_negatives = len(np.where(train_cls_labels == -1)[0]) logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format( cls, num_positives, num_negatives, float(num_positives) / num_negatives)) logger.info('features: {} cls_labels: {}'.format( train_feats.shape, train_cls_labels.shape)) clf.fit(train_feats, train_cls_labels) logger.info('Saving SVM model to: {}'.format(out_file)) with open(out_file, 'wb') as fwrite: pickle.dump(clf, fwrite) logger.info('All done!')
def task(cls, cost, opts, features, targets): suffix = '_'.join( opts.targets_data_file.split('/')[-1].split('.')[0].split('_')[-2:]) out_file = svm_helper.get_low_shot_output_file(opts, cls, cost, suffix) if os.path.exists(out_file): logger.info('SVM model exists: {}'.format(out_file)) else: #logger.info('SVM model not found: {}'.format(out_file)) #logger.info('Training model with the cost: {}'.format(cost)) clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=1, penalty='l2', loss='squared_hinge', tol=0.0001, dual=True, max_iter=2000, ) train_feats, train_cls_labels = svm_helper.get_cls_feats_labels( cls, features, targets, opts.dataset) #num_positives = len(np.where(train_cls_labels == 1)[0]) #num_negatives = len(np.where(train_cls_labels == -1)[0]) #logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format( #cls, num_positives, num_negatives, #float(num_positives) / num_negatives) #) #logger.info('features: {} cls_labels: {}'.format( #train_feats.shape, train_cls_labels.shape)) clf.fit(train_feats, train_cls_labels) #logger.info('Saving SVM model to: {}'.format(out_file)) with open(out_file, 'wb') as fwrite: pickle.dump(clf, fwrite) return 0
def test_svm_low_shot(opts): k_values = [int(val) for val in opts.k_values.split(",")] sample_inds = [int(val) for val in opts.sample_inds.split(",")] logger.info('Testing svm for k-values: {} and sample_inds: {}'.format( k_values, sample_inds)) img_ids, cls_names = [], [] if opts.generate_json: img_ids, cls_names = load_json(opts.json_targets) assert os.path.exists(opts.data_file), "Data file not found. Abort!" # we test the svms on the full test set. Given the test features and the # targets, we test it for various k-values (low-shot), cost values and # 5 independent samples. features, targets = svm_helper.load_input_data(opts.data_file, opts.targets_data_file) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) # parse the cost values for training the SVM on costs_list = svm_helper.parse_cost_list(opts.costs_list) logger.info('Testing SVM for costs: {}'.format(costs_list)) # classes for which SVM testing should be done num_classes, cls_list = svm_helper.get_low_shot_svm_classes( targets, opts.dataset) # create the output for per sample, per k-value and per cost. sample_ap_matrices = [] for _ in range(len(sample_inds)): ap_matrix = np.zeros((len(k_values), len(costs_list))) sample_ap_matrices.append(ap_matrix) # the test goes like this: For a given sample, for a given k-value and a # given cost value, we evaluate the trained svm model for all classes. # After computing over all classes, we get the mean AP value over all # classes. We hence end up with: output = [sample][k_value][cost] for inds in range(len(sample_inds)): sample_idx = sample_inds[inds] for k_idx in range(len(k_values)): k_low = k_values[k_idx] suffix = 'sample{}_k{}'.format(sample_idx + 1, k_low) for cost_idx in range(len(costs_list)): cost = costs_list[cost_idx] local_cost_ap = np.zeros((num_classes, 1)) for cls in cls_list: logger.info( 'Test sample/k_value/cost/cls: {}/{}/{}/{}'.format( sample_idx + 1, k_low, cost, cls)) model_file = svm_helper.get_low_shot_output_file( opts, cls, cost, suffix) with open(model_file, 'rb') as fopen: if six.PY2: model = pickle.load(fopen) else: model = pickle.load(fopen, encoding='latin1') prediction = model.decision_function(features) eval_preds, eval_cls_labels = svm_helper.get_cls_feats_labels( cls, prediction, targets, opts.dataset) P, R, score, ap = svm_helper.get_precision_recall( eval_cls_labels, eval_preds) local_cost_ap[cls][0] = ap mean_cost_ap = np.mean(local_cost_ap, axis=0) sample_ap_matrices[inds][k_idx][cost_idx] = mean_cost_ap out_k_sample_file = os.path.join( opts.output_path, 'test_ap_sample{}_k{}.npy'.format(sample_idx + 1, k_low)) save_data = sample_ap_matrices[inds][k_idx] save_data = save_data.reshape((1, -1)) np.save(out_k_sample_file, save_data) logger.info('Saved sample test k_idx AP to file: {} {}'.format( out_k_sample_file, save_data.shape)) if opts.generate_json: argmax_cls = np.argmax(save_data, axis=1) chosen_cost = costs_list[argmax_cls[0]] logger.info('chosen cost: {}'.format(chosen_cost)) save_json_predictions(opts, chosen_cost, sample_idx, k_low, features, cls_list, cls_names, img_ids) logger.info('All done!!')