def addTrainingData(labeled_pairs, data_model, training_data=[]): """ Appends training data to the training data collection. """ fields = data_model['fields'] examples = [record_pair for example in labeled_pairs.values() for record_pair in example] new_training_data = numpy.empty(len(examples), dtype=training_data.dtype) new_training_data['label'] = ([0] * len(labeled_pairs[0]) + [1] * len(labeled_pairs[1])) new_training_data['field_distances'], _ = core.buildRecordDistances(examples, fields) training_data = numpy.append(training_data, new_training_data) return training_data
def addTrainingData(labeled_pairs, data_model, training_data=[]): fields = data_model['fields'] n_distinct_pairs, n_dupe_pairs = len(labeled_pairs[0]), len(labeled_pairs[1]) new_training_data = numpy.zeros(n_distinct_pairs + n_dupe_pairs, dtype=training_data.dtype) labels = labeled_pairs.keys() examples = [record_pair for example in labeled_pairs.values() for record_pair in example] new_training_data['label'] = [labels[0]] * n_distinct_pairs + [labels[1]] * n_dupe_pairs new_training_data['field_distances'] = core.buildRecordDistances(examples, fields)[0] training_data = numpy.append(training_data, new_training_data) return training_data