Пример #1
0
def addTrainingData(labeled_pairs, data_model, training_data=[]):
    """
    Appends training data to the training data collection.
    """

    fields = data_model['fields']

    examples = [record_pair for example
                in labeled_pairs.values()
                for record_pair in example]

    new_training_data = numpy.empty(len(examples),
                                    dtype=training_data.dtype)


    new_training_data['label'] = ([0] * len(labeled_pairs[0])
                                  + [1] * len(labeled_pairs[1]))
    new_training_data['field_distances'], _ = core.buildRecordDistances(examples, fields)




    training_data = numpy.append(training_data, new_training_data)

    return training_data
Пример #2
0
def addTrainingData(labeled_pairs, data_model, training_data=[]):

    fields = data_model['fields']

    n_distinct_pairs, n_dupe_pairs = len(labeled_pairs[0]), len(labeled_pairs[1])

    new_training_data = numpy.zeros(n_distinct_pairs + n_dupe_pairs,
                                    dtype=training_data.dtype)

    labels = labeled_pairs.keys()
    examples = [record_pair for example in labeled_pairs.values() for record_pair in example]

    new_training_data['label'] = [labels[0]] * n_distinct_pairs + [labels[1]] * n_dupe_pairs
    new_training_data['field_distances'] = core.buildRecordDistances(examples, fields)[0] 

    training_data = numpy.append(training_data, new_training_data)

    return training_data