def __init__(self, data_model, candidates, data_1, data_2, index_include): super().__init__(data_model, candidates) sampled_records_1 = Sample(data_1, 600) index_data = Sample(data_2, 50000) sampled_records_2 = Sample(index_data, 600) preds = self.data_model.predicates(canopies=False) self.block_learner = training.RecordLinkBlockLearner(preds, sampled_records_1, sampled_records_2, index_data) examples_to_index = candidates.copy() if index_include: examples_to_index += index_include self._index_predicates(examples_to_index)
def __init__(self, data_model, candidates, data_1, data_2, original_length_1, original_length_2): super().__init__(data_model, candidates) sampled_records_1 = Sample(data_1, 600, original_length_1) index_data = Sample(data_2, 50000, original_length_2) sampled_records_2 = Sample(index_data, 600, original_length_2) preds = self.data_model.predicates(canopies=False) self.block_learner = training.RecordLinkBlockLearner( preds, sampled_records_1, sampled_records_2, index_data) self._index_predicates(self.candidates)
def _blockLearner(self, predicates): return training.RecordLinkBlockLearner(predicates, self.sampled_records_1, self.sampled_records_2)
def _init_product(self, candidates, *args): preds = self.data_model.predicates(canopies=False) self.block_learner = training.RecordLinkBlockLearner(preds, *args) self.candidates = candidates[:]