示例#1
0
    def __init__(self,
                 data_model,
                 candidates,
                 data_1,
                 data_2,
                 index_include):

        super().__init__(data_model, candidates)

        sampled_records_1 = Sample(data_1, 600)
        index_data = Sample(data_2, 50000)
        sampled_records_2 = Sample(index_data, 600)

        preds = self.data_model.predicates(canopies=False)

        self.block_learner = training.RecordLinkBlockLearner(preds,
                                                             sampled_records_1,
                                                             sampled_records_2,
                                                             index_data)

        examples_to_index = candidates.copy()
        if index_include:
            examples_to_index += index_include

        self._index_predicates(examples_to_index)
示例#2
0
    def __init__(self, data_model, candidates, data_1, data_2,
                 original_length_1, original_length_2):

        super().__init__(data_model, candidates)

        sampled_records_1 = Sample(data_1, 600, original_length_1)
        index_data = Sample(data_2, 50000, original_length_2)
        sampled_records_2 = Sample(index_data, 600, original_length_2)

        preds = self.data_model.predicates(canopies=False)

        self.block_learner = training.RecordLinkBlockLearner(
            preds, sampled_records_1, sampled_records_2, index_data)

        self._index_predicates(self.candidates)
示例#3
0
 def _blockLearner(self, predicates):
     return training.RecordLinkBlockLearner(predicates,
                                            self.sampled_records_1,
                                            self.sampled_records_2)
示例#4
0
    def _init_product(self, candidates, *args):
        preds = self.data_model.predicates(canopies=False)
        self.block_learner = training.RecordLinkBlockLearner(preds, *args)

        self.candidates = candidates[:]