def _init_data_handler(self): data_handler = LanguageModelDataHandler.from_config( LanguageModelDataHandler.Config(), FeatureConfig(), WordLabelConfig(), featurizer=create_featurizer(SimpleFeaturizer.Config(), FeatureConfig()), shuffle=False, ) data_handler.init_metadata_from_path(FILE_NAME, FILE_NAME, FILE_NAME) return data_handler
class Config(Task.Config): data_handler: Union[LanguageModelDataHandler.Config, BPTTLanguageModelDataHandler. Config] = LanguageModelDataHandler.Config() model: LMLSTM.Config = LMLSTM.Config() trainer: Trainer.Config = Trainer.Config() labels: Optional[WordLabelConfig] = None metric_reporter: LanguageModelMetricReporter.Config = ( LanguageModelMetricReporter.Config())
class Config(Task_Deprecated.Config): # Have PlaceHolder to keep it as Union so we don't have to write config adapter # for it, this class should be removed soon data_handler: Union[LanguageModelDataHandler.Config, PlaceHolder] = LanguageModelDataHandler.Config() model: LMLSTM_Deprecated.Config = LMLSTM_Deprecated.Config() trainer: Trainer.Config = Trainer.Config() labels: Optional[WordLabelConfig] = None metric_reporter: LanguageModelMetricReporter.Config = ( LanguageModelMetricReporter.Config())
def create_language_model_data_handler(cls) -> LanguageModelDataHandler: # TODO: Refactor this after Shicong refactors PyText config and removes # Thrift. After that directly use Data Handler's from config method # with synthetic configs columns = [DFColumn.UTTERANCE] features: Dict[str, Field] = { DatasetFieldName.TEXT_FIELD: TextFeatureField( eos_token=VocabMeta.EOS_TOKEN, init_token=VocabMeta.INIT_TOKEN ) } return LanguageModelDataHandler( raw_columns=columns, features=features, labels={}, featurizer=create_featurizer(SimpleFeaturizer.Config(), FeatureConfig()), )