示例#1
0
    def __init__(self, resource_name, backend, language_name):
        self.intent_examples = []
        self.entity_examples = []
        self.resource_name = resource_name
        self.files = util.recursively_find_files(resource_name)
        self.fformat = self.guess_format(self.files)
        self.tokenizer = None
        self.language_name = language_name

        if backend in ['mitie', 'mitie_sklearn']:
            from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
            self.tokenizer = MITIETokenizer()
        elif backend in ['spacy_sklearn']:
            from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
            self.tokenizer = SpacyTokenizer(language_name)
        else:
            from rasa_nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
            self.tokenizer = WhitespaceTokenizer()
            warnings.warn(
                "backend not recognised by TrainingData : defaulting to tokenizing by splitting on whitespace"
            )

        if self.fformat == 'luis':
            self.load_luis_data(self.files[0])
        elif self.fformat == 'wit':
            self.load_wit_data(self.files[0])
        elif self.fformat == 'api':
            self.load_api_data(self.files)
        elif self.fformat == 'rasa_nlu':
            self.load_data(self.files[0])
        else:
            raise ValueError("unknown training file format : {0}".format(
                self.fformat))
示例#2
0
 def resolve_data_files(self, resource_name):
     try:
         return util.recursively_find_files(resource_name)
     except ValueError, e:
         raise ValueError(
             "Invalid training data file / folder specified. " + e.message)