示例#1
0
    def get_data(self,train_data, language):
        lookup_tables = []
        composite_entities = []

        #cmdline_args = create_argument_parser().parse_args()
        import pdb
        pdb.set_trace()
        print(train_data)
        files = utils.list_files(cmdline_args.data)

        for file in files:
            fformat = _guess_format(file)
            file_content = utils.read_json_file(file)
            if fformat == DIALOGFLOW_ENTITIES:
                entity = file_content['name']
                dialogflowReader = DialogflowReader()
                examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat)
                lookup_table = self._extract_lookup_tables(entity, examples_js)
                if(lookup_table):
                    lookup_tables.append(lookup_table)
                composite_entity = self._extract_composite_entities(
                            entity,
                            examples_js)
                if(composite_entity):
                    composite_entities.append(composite_entity)

            if fformat == rasa_nlu:
                rasa_nlu_data = file_content['rasa.nlu_data']
                composite_entities = rasa_nlu_data['composite_entities']
                lookup_tables = rasa_nlu_data['lookup_tables']

        return lookup_tables, composite_entities
示例#2
0
 def _get_train_files_cmd():
     """Get the raw train data by fetching the train file given in the
     command line arguments to the train script.
     """
     cmdline_args = create_argument_parser().parse_args()
     if not cmdline_args.__contains__("nlu"):
         cmdline_args.nlu = 'data/nlu.json'
     files = utils.list_files(cmdline_args.nlu)
     return [file for file in files if _guess_format(file) == RASA_NLU]
示例#3
0
 def _get_train_files_cmd():
     """Get the raw train data by fetching the train file given in the
     command line arguments to the train script. When training the NLU model
     explicitly, the training data will be in the "nlu" argument, otherwise
     it will be in the "data" argument.
     """
     cmdline_args = create_argument_parser().parse_args()
     try:
         files = list_files(cmdline_args.nlu)
     except AttributeError:
         files = list(get_core_nlu_files(cmdline_args.data)[1])
     return [file for file in files if _guess_format(file) == RASA_NLU]
示例#4
0
def get_file_format(resource_name: Text) -> Text:
    from rasa.nlu.training_data.loading import _guess_format

    if resource_name is None or not os.path.exists(resource_name):
        raise AttributeError(
            "Resource '{}' does not exist.".format(resource_name))

    files = utils.list_files(resource_name)

    file_formats = list(map(lambda f: _guess_format(f), files))

    if not file_formats:
        return "json"

    fformat = file_formats[0]
    if fformat == "md" and all(f == fformat for f in file_formats):
        return fformat

    return "json"
示例#5
0
async def _write_nlu_to_file(
    export_nlu_path: Text,
    evts: List[Dict[Text, Any]]
) -> None:
    """Write the nlu data of the sender_id to the file paths."""
    from rasa.nlu.training_data import TrainingData

    msgs = _collect_messages(evts)

    # noinspection PyBroadException
    try:
        previous_examples = load_data(export_nlu_path)
    except Exception as e:
        logger.exception("An exception occurred while trying to load the "
                         "NLU data.")

        export_nlu_path = questionary.text(
            message="Could not load existing NLU data, please "
                    "specify where to store NLU data learned in "
                    "this session (this will overwrite any "
                    "existing file). {}".format(str(e)),
            default=PATHS["backup"]).ask()

        if export_nlu_path is None:
            return

        previous_examples = TrainingData()

    nlu_data = previous_examples.merge(TrainingData(msgs))

    # need to guess the format of the file before opening it to avoid a read
    # in a write
    if _guess_format(export_nlu_path) in {"md", "unk"}:
        fformat = "md"
    else:
        fformat = "json"

    with open(export_nlu_path, 'w', encoding="utf-8") as f:
        if fformat == "md":
            f.write(nlu_data.as_markdown())
        else:
            f.write(nlu_data.as_json())