def domain_map(features, feature_format, train_context, test_context, intervals={}, format='arff', positive_class=None): dataset = None if feature_format in ['rsd', 'aleph']: train_rsd = RSDConverter(train_context) test_rsd = RSDConverter(test_context, discr_intervals=intervals) mapper_target_name = train_context.target_table + '_mapper' train_examples = train_rsd.all_examples(pred_name=mapper_target_name) test_examples = test_rsd.all_examples(pred_name=mapper_target_name) if feature_format == 'aleph': features = aleph_to_rsd_features(features) prolog_bk = '\n'.join([ _example_ids('testExampleIDs', test_examples), '%% test examples', test_examples, '%% train examples', train_examples, '%% train background knowledge', train_rsd.background_knowledge(), '%% test background knowledge', test_rsd.background_knowledge(), _feature_numbers(features), '%% features', features, ]) THIS_DIR = os.path.dirname(__file__) if os.path.dirname(__file__) else '.' f = tempfile.NamedTemporaryFile(delete=False) f.write(prolog_bk) f.close() cmd_args = ['yap', '-L', '--', '%s/mapper.pl' % THIS_DIR, f.name, mapper_target_name] evaluations = subprocess.check_output(cmd_args) dataset = dump_dataset(features, feature_format, evaluations, train_context, format=format, positive_class=positive_class) # Cleanup os.remove(f.name) elif feature_format == 'treeliker': # We provide treeliker with the test dataset # since it has a built-in ability to evaluate features treeliker_test = TreeLikerConverter(test_context, discr_intervals=intervals) treeliker = features treeliker.test_dataset = treeliker_test.dataset() _, test_dataset = treeliker.run() if format == 'arff': dataset = test_dataset else: return 'unsupported format' return dataset
def domain_map(features, feature_format, train_context, test_context, intervals={}, format='arff', positive_class=None): ''' Use the features returned by a propositionalization method to map unseen test examples into the new feature space. :param features: string of features as returned by rsd, aleph or treeliker :param feature_format: 'rsd', 'aleph', 'treeliker' :param train_context: DBContext with training examples :param test_context: DBContext with test examples :param intervals: discretization intervals (optional) :param format: output format (only arff is used atm) :param positive_class: required for aleph :return: returns the test examples in propositional form :rtype: str :Example: >>> test_arff = mapper.domain_map(features, 'rsd', train_context, test_context) ''' dataset = None if feature_format in ['rsd', 'aleph']: train_rsd = RSDConverter(train_context) test_rsd = RSDConverter(test_context, discr_intervals=intervals) mapper_target_name = train_context.target_table + '_mapper' train_examples = train_rsd.all_examples(pred_name=mapper_target_name) test_examples = test_rsd.all_examples(pred_name=mapper_target_name) if feature_format == 'aleph': features = aleph_to_rsd_features(features) prolog_bk = '\n'.join([ _example_ids('testExampleIDs', test_examples), '%% test examples', test_examples, '%% train examples', train_examples, '%% train background knowledge', train_rsd.background_knowledge(), '%% test background knowledge', test_rsd.background_knowledge(), _feature_numbers(features), '%% features', features, ]) THIS_DIR = os.path.dirname(__file__) if os.path.dirname( __file__) else '.' f = tempfile.NamedTemporaryFile(delete=False) f.write(prolog_bk) f.close() cmd_args = [ 'yap', '-L', '--', '%s/mapper.pl' % THIS_DIR, f.name, mapper_target_name ] evaluations = subprocess.check_output(cmd_args) dataset = dump_dataset(features, feature_format, evaluations, train_context, format=format, positive_class=positive_class) # Cleanup os.remove(f.name) elif feature_format == 'treeliker': # We provide treeliker with the test dataset # since it has a built-in ability to evaluate features treeliker_test = TreeLikerConverter(test_context, discr_intervals=intervals) treeliker = features treeliker.test_dataset = treeliker_test.dataset() _, test_dataset = treeliker.run() if format == 'arff': dataset = test_dataset else: return 'unsupported format' return dataset
def database_rsd_converter(input_dict): dump = input_dict['dump'] == 'true' rsd = RSDConverter(input_dict['context'], discr_intervals=input_dict['discr_intervals'] or {}) return {'examples' : rsd.all_examples(), 'bk' : rsd.background_knowledge()}