def train_sample(feature_str, label, pos_train=0.5, neg_train=1000): """Perform training and testing using disjont samples from the full set of label_values. This is equivalent to doing one round of cross validation (see classipy.cross_validation) only it keeps the values around for display. Args: """ all_hashes = list(cass.get_image_hashes()) pos_hashes = [_[0] for _ in cass.buffered_get_row(cf_labels, label)] neg_hashes = list(set(all_hashes) - set(pos_hashes)) if 0 < pos_train <= 1: pos_train = int(pos_train * len(pos_hashes)) if 0 < neg_train <= 1: neg_train = int(neg_train * len(neg_hashes)) # Choose a training sample and a testing sample if len(pos_hashes) < pos_train: raise ValueError('Not enough positive examples %s(%d)' % \ (label, len(pos_hashes))) if len(neg_hashes) < neg_train: raise ValueError('Not enough negative examples %s(%d)' % \ (label, len(neg_hashes))) pos_sample = random.sample(pos_hashes, pos_train) neg_sample = random.sample(neg_hashes, neg_train) labels = [-1 for _ in neg_sample] + [1 for _ in pos_sample] values = cass.get_feature_values(feature_str, neg_sample+pos_sample) global label_values label_values = zip(labels, values) print 'Training classifier with sample %d' % len(label_values) train_classifier(label_values)
def run_train_test(feature_str, label, split_opts={}): """ Args: split_opts: passed to split_train_test Returns: (label, [(conf, gt, hash), ...] where label: string label, e.g. 'airplane' conf: -inf to inf, prediction confidence gt: -1 or 1 for negative or positive annotation hash: image key for use with cass.get_image """ train, test = split_train_test(label, **split_opts) # Train labels = (L for L, k in train) hashes = [k for L, k in train] values = cass.get_feature_values(feature_str, hashes) label_values = zip(labels, values) print 'Training classifier with %d values' % len(label_values) classifier = train_classifier(label_values) # Test hashes = [k for L, k in test] values = list(cass.get_feature_values(feature_str, hashes)) print('Testing with %d(%d) values' % (len(hashes),len(values))) conf_label = (classifier.predict(value) for value in values) conf_gt_hash = [(pred*conf, L, k) for ((pred, conf),), (L, k) in zip(conf_label, test)] conf_gt_hash = sorted(conf_gt_hash, key=lambda _: _[0]) return (label, conf_gt_hash)