Пример #1
0
    def test_composite_transducer(self):
        '''Transducer should allow composite transducer. A composite transducer is a list of transducers that are applied
        in a specific order. Now by making a composite transducer of a->b + aa->c we can get the 'bbbb' output from the length ordering test
        '''
        data = [{"word": "aaba"}]
        transduced_data = [{"word": "bbbb"}]

        data_df = DataFrame(data)
        transduced_data_df = DataFrame(transduced_data)

        t_path = os.path.join(self.test_transducers_path,
                              'test_composite.json')
        transducer = Transducer(
            transducers_needed=[{
                'source': 'word',
                'target': 'word',
                'functions': ['test_composite']
            }],
            transducers_available_dir=self.test_transducers_path)

        fns = transducer.load_composite(t_path)
        f_in = data[0]['word']
        for fn in fns:
            f_in = fn(f_in)
        self.assertEqual(f_in, 'bbbb')
        self.assertTrue(
            transduced_data_df.equals(transducer.apply_to_data_frame(data_df)))
Пример #2
0
 def test_intermediate(self):
     '''Transductions should not feed. a -> b & b -> c should turn a -> b, not a -> (b) -> c
     '''
     t_path = os.path.join(self.test_transducers_path,
                           'test_intermediate_transducer.csv')
     transducer = Transducer()
     transducer_fn = transducer.create_transducer_function(t_path)
     self.assertEqual('bcb', transducer_fn('aba'))
Пример #3
0
 def test_length_ordering(self):
     '''Transductions should be reverse ordered by length. Here with a transducer that turns 
     a->b and aa->c we get 'cbb' instead of 'bbbb'
     '''
     t_path = os.path.join(self.test_transducers_path,
                           'test_length_transducer.csv')
     transducer = Transducer()
     transducer_fn = transducer.create_transducer_function(t_path)
     self.assertEqual('cbb', transducer_fn('aaba'))
Пример #4
0
 def transduce(self) -> List[Dict[str, Union[ResourceManifest, pd.DataFrame]]]:
     '''Return transduced data objs. Also transduces self.data_objs
     '''
     transduced_data_objs = []
     for data_obj in self.data_objs:
         df = data_obj['data']
         transducers = []
         if "transducers" in data_obj['manifest']:
             transducers = data_obj['manifest']['transducers']
         transducer = Transducer(transducers)
         data_obj['data'] = transducer.apply_to_data_frame(df)
     return transduced_data_objs
Пример #5
0
 def test_lambda_transducer(self):
     '''Transducer should allow lambda transductions.
     '''
     data = [{"word": "test"}]
     transduced_data = [{"word": "TEST"}]
     data_df = DataFrame(data)
     transduced_data_df = DataFrame(transduced_data)
     transducer = Transducer([{
         'source': 'word',
         'target': 'word',
         'functions': ['lambda x: x.upper()']
     }])
     transducer_fn = eval(transducer.transducers_needed[0]['functions'][0])
     self.assertEqual('TEST', transducer_fn('test'))
     self.assertTrue(
         transduced_data_df.equals(transducer.apply_to_data_frame(data_df)))
Пример #6
0
    def test_feeding(self):
        '''Prevent feeding. I.e. if a rules a->b and b->c exist, aba should produce bcb, not ccc
        '''
        data = [{"word": "aba"}]
        transduced_data = [{"word": "bcb"}]

        t_path = os.path.join(self.test_transducers_path, 'test_feeding.csv')

        transducer = Transducer([{
            'source': 'word',
            'target': 'word',
            'functions': [t_path]
        }])
        transducer_fn = transducer.create_transducer_function(t_path)
        self.assertEqual(transduced_data[0]["word"],
                         transducer_fn(data[0]['word']))
Пример #7
0
    def test_incorrect_source_transducer(self):
        '''Test incorrect source specified
        '''
        data = [{"word": "aaa"}]
        transduced_data = [{"word": "bbb"}]
        data_df = DataFrame(data)
        t_path = os.path.join(self.test_transducers_path,
                              'test_transducer.csv')

        transducer = Transducer([{
            'source': 'foobar',
            'target': 'word',
            'functions': [t_path]
        }])

        transducer_fn = transducer.create_transducer_function(t_path)
        with self.assertRaises(TransducerSourceNotFoundError):
            transducer.apply_to_data_frame(data_df)
Пример #8
0
 def return_formatted_config(self, form: str = "js") -> Union[str, dict]:
     '''Return config for Dictionary as either obj, js, or json.
     '''
     config_template_object = {
         "L1": {
             "name": self.config['L1'],
             "lettersInLanguage": self.config['alphabet']
         },
         "L2": {
             "name": self.config['L2']
         },
         "build": datetime.datetime.today().strftime('%Y%m%d%H%M')
     }
     ## Add transducer name that converts search queries
     if 'L1_compare_transducer_name' in self.config:
         config_template_object['L1']['compare'] = self.config[
             'L1_compare_transducer_name']
     if "audio_path" in self.config:
         config_template_object['audio_path'] = self.config['audio_path']
     if "img_path" in self.config:
         config_template_object['img_path'] = self.config['img_path']
     if form == 'obj':
         return config_template_object
     elif form == 'js':
         ## Add adhoc_vars
         adhoc_vars = ''
         if "adhoc_vars" in self.config:
             adhoc_vars = []
             for av in self.config['adhoc_vars']:
                 for k, v in av.items():
                     adhoc_vars.append(f"var {k} = {v};")
             adhoc_vars = "\n".join(adhoc_vars)
         ## Add transducers
         transducers_config = {}
         for data_obj in self.data_objs:
             transducers = []
             if "transducers" in data_obj['manifest']:
                 transducers = data_obj['manifest']['transducers']
             transducer_obj = Transducer(transducers)
             configs = transducer_obj.return_mapping_configs()
             config_template_object["L1"]["transducers"] = configs
         return f"var config = {json.dumps(config_template_object)}" + adhoc_vars
     elif form == 'json':
         return json.dumps(config_template_object)
Пример #9
0
    def test_javascript_transducer(self):
        '''Test javascript transducer
        '''
        data = [{"word": "aaa"}]
        transduced_data = [{"word": "bbb"}]

        t_path = os.path.join(self.test_transducers_path,
                              'test_transducer.csv')

        transducer = Transducer([{
            'source': 'word',
            'target': 'word',
            'functions': [t_path]
        }])

        js = "var mtd = {'transducers': {}};"
        js = js + transducer.return_js_template(t_path)
        js = js + f"mtd.transducers.test_transducer('aaa');"
        self.assertEqual(eval_js(js), transduced_data[0]['word'])
Пример #10
0
 def test_find_path_to_transducers(self):
     '''Find path to transducer. Raise NotFoundError if transducer doesn't exist.
     '''
     transducer = Transducer()
     path = transducer.return_transducer_path('norm')
     name = transducer.return_transducer_name(path)
     self.assertEqual(name, 'norm')
     self.assertTrue(os.path.exists(path))
     with self.assertRaises(TransducerNotFoundError):
         transducer.return_transducer_path('foobar')
     with self.assertRaises(TransducerNotFoundError):
         transducer.return_transducer_name(
             os.path.join(self.transducers_path, 'foobar.csv'))
Пример #11
0
 def test_chained_transducer(self):
     '''Transducer should allow chained transductions with lambda transductions
     '''
     transducer = Transducer([{
         'source':
         'word',
         'target':
         'word',
         'functions': [
             'lambda x: x.upper()', 'lambda x: x.split("-")',
             'lambda x: "".join(x)'
         ]
     }])
     f_in = 'test-test'
     for t in transducer.transducers_needed:
         for fn in t['functions']:
             fn = eval(fn)
             f_in = fn(f_in)
     self.assertEqual('TESTTEST', f_in)
Пример #12
0
    def test_normal_transducer(self):
        '''Sanity check a->b transducer.
        '''
        data = [{"word": "aaa"}]
        transduced_data = [{"word": "bbb"}]

        data_df = DataFrame(data)
        transduced_data_df = DataFrame(transduced_data)

        t_path = os.path.join(self.test_transducers_path,
                              'test_transducer.csv')

        t_path_json = os.path.join(self.test_transducers_path,
                                   'test_transducer.csv')

        transducer = Transducer([{
            'source': 'word',
            'target': 'word',
            'functions': [t_path]
        }])

        transducer_json = Transducer([{
            'source': 'word',
            'target': 'word',
            'functions': [t_path_json]
        }])

        transducer_fn = transducer.create_transducer_function(t_path)

        self.assertEqual(transduced_data[0]["word"],
                         transducer_fn(data[0]['word']))
        self.assertTrue(
            transduced_data_df.equals(transducer.apply_to_data_frame(data_df)))
        self.assertTrue(
            transduced_data_df.equals(
                transducer_json.apply_to_data_frame(data_df)))