Exemplo n.º 1
0
    def __init__(self,config):
        self.config=config['learning_config']

        self.am = AM(config)
        self.am.load_model(training=True)
        if self.am.model_type!='MultiTask':
            self.dg = AM_DataLoader(config)
        else:
            self.dg=MultiTask_DataLoader(config)
        self.dg.speech_config['reduction_factor']=self.am.model.time_reduction_factor
        self.dg.load_state(self.config['running_config']['outdir'])
        if self.am.model_type=='CTC':
            self.runner = ctc_runners.CTCTrainer(self.dg.speech_featurizer,self.dg.text_featurizer,self.config['running_config'])
        elif self.am.model_type=='LAS':
            self.runner=las_runners.LASTrainer(self.dg.speech_featurizer,self.dg.text_featurizer,self.config['running_config'])
            self.dg.LAS=True
        elif self.am.model_type == 'MultiTask':
            self.runner = multi_runners.MultiTaskLASTrainer(self.dg.speech_featurizer, self.dg.token4_featurizer,
                                                 self.config['running_config'])


        else:

            self.runner = transducer_runners.TransducerTrainer(self.dg.speech_featurizer,self.dg.text_featurizer,self.config['running_config'] )
        self.STT = self.am.model

        if self.dg.augment.available():
            factor=2
        else:
            factor=1
        self.opt = tf.keras.optimizers.Adamax(**config['optimizer_config'])
        self.runner.set_total_train_steps(self.dg.get_per_epoch_steps() * self.config['running_config']['num_epochs']*factor)
        self.runner.compile(self.STT,self.opt)
        self.dg.batch=self.runner.global_batch_size
Exemplo n.º 2
0
    def __init__(self, am_config, lm_config):

        self.am = AM(am_config)
        self.am.load_model(False)

        self.lm = LM(lm_config)
        self.lm.load_model(False)
Exemplo n.º 3
0
 def __init__(self, config):
     self.am = AM(config)
     self.am.load_model(False)
     self.speech_config = config['speech_config']
     self.text_config = config['decoder_config']
     self.speech_feature = SpeechFeaturizer(self.speech_config)
     self.text_featurizer = TextFeaturizer(self.text_config)
     self.decoded = tf.constant([self.text_featurizer.start])
Exemplo n.º 4
0
    def __init__(self, am_config, lm_config, punc_config=None):

        self.am = AM(am_config)
        self.am.load_model(False)

        self.lm = LM(lm_config, punc_config)
        self.lm.load_model(False)
        if punc_config is not None:
            self.punc_recover = True
        else:
            self.punc_recover = False
Exemplo n.º 5
0
    def __init__(self, config):
        self.config = config['learning_config']
        self.config['running_config'].update(
            {'streaming': config['speech_config']['streaming']})
        self.am = AM(config)
        self.am.load_model(training=True)
        if self.am.model_type != 'MultiTask':
            self.dg = AM_DataLoader(config)
        else:
            self.dg = MultiTask_DataLoader(config)
        self.dg.speech_config[
            'reduction_factor'] = self.am.model.time_reduction_factor
        self.dg.load_state(self.config['running_config']['outdir'])
        if self.am.model_type == 'CTC':
            self.runner = ctc_runners.CTCTrainer(self.dg.speech_featurizer,
                                                 self.dg.text_featurizer,
                                                 self.config['running_config'])
        elif self.am.model_type == 'LAS':
            self.runner = las_runners.LASTrainer(self.dg.speech_featurizer,
                                                 self.dg.text_featurizer,
                                                 self.config['running_config'])
            self.dg.LAS = True
        elif self.am.model_type == 'MultiTask':
            self.runner = multi_runners.MultiTaskCTCTrainer(
                self.dg.speech_featurizer, self.config['running_config'])

        else:

            self.runner = transducer_runners.TransducerTrainer(
                self.dg.speech_featurizer, self.dg.text_featurizer,
                self.config['running_config'])
        self.STT = self.am.model

        if self.dg.augment.available():
            factor = 2
        else:
            factor = 1
        all_train_step = self.dg.get_per_epoch_steps(
        ) * self.config['running_config']['num_epochs'] * factor
        lr = CustomSchedule(config['model_config']['dmodel'],
                            warmup_steps=int(all_train_step * 0.1))
        config['optimizer_config']['learning_rate'] = lr
        self.opt = tf.keras.optimizers.Adam(**config['optimizer_config'])
        self.runner.set_total_train_steps(all_train_step)
        self.runner.compile(self.STT, self.opt)
        self.dg.batch = self.runner.global_batch_size
Exemplo n.º 6
0
    def __init__(self,config):
        self.config=config['learning_config']

        self.am = AM(config)
        self.am.load_model(training=False)
        f,c=self.am.speech_feature.compute_feature_dim()
        self.am.model.return_pb_function(f,c)
        if self.am.model_type!='MultiTask':
            self.dg = AM_DataLoader(config,training=False)
            self.runner = am_tester.AMTester(self.config['running_config'], self.dg.text_featurizer)

        else:
            self.dg=MultiTask_DataLoader(config,training=False)
            self.runner=multi_task_tester.MultiTaskTester(self.config['running_config'],self.dg.token3_featurizer,self.dg.token4_featurizer)


        self.STT = self.am.model
        self.runner.set_progbar(self.dg.eval_per_epoch_steps())
        self.runner.compile(self.STT)
Exemplo n.º 7
0
    def __init__(self, config):
        self.config = config['learning_config']

        self.am = AM(config)
        self.am.load_model(training=False)

        if self.am.model_type != 'MultiTask':
            self.dg = AM_DataLoader(config, training=False)
            self.runner = am_tester.AMTester(
                self.config['running_config'],
                self.dg.text_featurizer,
                streaming=config['speech_config']['streaming'])

        else:
            self.dg = MultiTask_DataLoader(config, training=False)
            self.runner = multi_task_tester.MultiTaskTester(
                self.config['running_config'], self.dg.token3_featurizer)

        self.STT = self.am.model
        self.runner.set_progbar(self.dg.eval_per_epoch_steps())
        self.runner.set_all_steps(self.dg.eval_per_epoch_steps())
        self.runner.compile(self.STT)