Exemplos de load_data em Python, exemplos de rasa.nlu.training_data.load_data em Python

Exemplo n.º 1

0

Exibir arquivo

def compare_nlu_models(
    configs: List[Text],
    nlu: Text,
    output: Text,
    runs: int,
    exclusion_percentages: List[int],
):
    """Trains multiple models, compares them and saves the results."""

    from rasa.nlu.test import drop_intents_below_freq
    from rasa.nlu.training_data import load_data
    from rasa.nlu.utils import write_json_to_file
    from rasa.utils.io import create_path
    from rasa.nlu.test import compare_nlu
    from rasa.core.test import plot_nlu_results

    data = load_data(nlu)
    data = drop_intents_below_freq(data, cutoff=5)

    create_path(output)

    bases = [os.path.basename(nlu_config) for nlu_config in configs]
    model_names = [os.path.splitext(base)[0] for base in bases]

    f1_score_results = {
        model_name: [[] for _ in range(runs)]
        for model_name in model_names
    }

    training_examples_per_run = compare_nlu(
        configs,
        data,
        exclusion_percentages,
        f1_score_results,
        model_names,
        output,
        runs,
    )

    f1_path = os.path.join(output, RESULTS_FILE)
    write_json_to_file(f1_path, f1_score_results)

    plot_nlu_results(output, training_examples_per_run)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_training_data.py Projeto: sysang/rasa

def test_entities_synonyms(tmp_path):
    data = """
{
  "rasa_nlu_data": {
    "entity_synonyms": [
      {
        "value": "nyc",
        "synonyms": ["New York City", "nyc", "the big apple"]
      }
    ],
    "common_examples" : [
      {
        "text": "show me flights to New York City",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 32,
            "value": "NYC"
          }
        ]
      },
      {
        "text": "show me flights to nyc",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 22,
            "value": "nyc"
          }
        ]
      }
    ]
  }
}"""
    f = tmp_path / "tmp_training_data.json"
    f.write_text(data, io_utils.DEFAULT_ENCODING)
    td = training_data.load_data(str(f))
    assert td.entity_synonyms["New York City"] == "nyc"

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_evaluation.py Projeto: Johan1us/rasa-1

def test_run_cv_evaluation(pretrained_embeddings_spacy_config):
    td = training_data.load_data("data/examples/rasa/demo-rasa.json")

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        td, n_folds, pretrained_embeddings_spacy_config
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert len(entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds
    assert len(entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds

Exemplo n.º 4

0

Exibir arquivo

Arquivo: train.py Projeto: haonguyen1915/rasa-nlp

def train_nlu(config_file="config.yml",
              model_directory="models",
              model_name="current",
              training_data_file="data/nlu.md"):
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.model import Trainer

    training_data = load_data(training_data_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(training_data)

    # Attention: trainer.persist stores the model and all meta data into a folder.
    # The folder itself is not zipped.
    model_path = os.path.join(model_directory, model_name)
    model_directory = trainer.persist(model_path, fixed_model_name="nlu")

    logger.info(f"Model trained. Stored in '{model_directory}'.")

    return model_directory

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_featurizers.py Projeto: zijiannc/RASA_NLU

def test_spacy_featurizer_casing(spacy_nlp):
    from rasa.nlu.featurizers import spacy_featurizer

    # if this starts failing for the default model, we should think about
    # removing the lower casing the spacy nlp component does when it
    # retrieves vectors. For compressed spacy models (e.g. models
    # ending in _sm) this test will most likely fail.

    td = training_data.load_data("data/examples/rasa/demo-rasa.json")
    for e in td.intent_examples:
        doc = spacy_nlp(e.text)
        doc_capitalized = spacy_nlp(e.text.capitalize())

        vecs = spacy_featurizer.features_for_doc(doc)
        vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized)

        assert np.allclose(
            vecs, vecs_capitalized,
            atol=1e-5), "Vectors are unequal for texts '{}' and '{}'".format(
                e.text, e.text.capitalize())

Exemplo n.º 6

0

Exibir arquivo

def test_demo_data(filename):
    td = training_data.load_data(filename)
    assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye"}
    assert td.entities == {"location", "cuisine"}
    assert len(td.training_examples) == 42
    assert len(td.intent_examples) == 42
    assert len(td.entity_examples) == 11

    assert td.entity_synonyms == {
        "Chines": "chinese",
        "Chinese": "chinese",
        "chines": "chinese",
        "vegg": "vegetarian",
        "veggie": "vegetarian",
    }

    assert td.regex_features == [
        {"name": "greet", "pattern": r"hey[^\s]*"},
        {"name": "zipcode", "pattern": r"[0-9]{5}"},
    ]

Exemplo n.º 7

0

Exibir arquivo

Arquivo: intentclassifier.py Projeto: adderbyte/Conversational_AI

    def train_nlu(data, configuration, model_dir, train):
        '''
		
        input : 
        		data: training data, in json format
        		configuration: configuration file
        		model_dir: where to save model after training
        		train : flag, to check that we really want to train

        output: 
        		model_directory : where the output model will be saved
		'''
        rasamodel.Train = train
        assert rasamodel.Train == True
        training_data = load_data(data)
        trainer = Trainer(config.load(configuration))
        trainer.train(training_data)
        model_directory = trainer.persist(model_dir,
                                          fixed_model_name='Intentnlu')
        return model_directory

Exemplo n.º 8

0

Exibir arquivo

def test_interpreter(pipeline_template, component_builder, tmpdir):
    test_data = "data/examples/rasa/demo-rasa.json"
    _conf = utilities.base_test_conf(pipeline_template)
    _conf["data"] = test_data
    td = training_data.load_data(test_data)
    interpreter = utilities.interpreter_for(
        component_builder, "data/examples/rasa/demo-rasa.json", tmpdir.strpath, _conf
    )

    texts = ["good bye", "i am looking for an indian spot"]

    for text in texts:
        result = interpreter.parse(text, time=None)
        assert result["text"] == text
        assert not result["intent"]["name"] or result["intent"]["name"] in td.intents
        assert result["intent"]["confidence"] >= 0
        # Ensure the model doesn't detect entity types that are not present
        # Models on our test data set are not stable enough to
        # require the exact entities to be found
        for entity in result["entities"]:
            assert entity["entity"] in td.entities

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_featurizers.py Projeto: suhaibmujahid/rasa

def test_convert_featurizer_output_shape():
    from rasa.nlu.featurizers.convert_featurizer import ConveRTFeaturizer

    td = training_data.load_data("data/examples/rasa/demo-rasa.json")

    convert_featurizer = ConveRTFeaturizer()
    convert_featurizer.train(td, config=None)

    text_features_dim = np.array([
        example.get("text_features").shape[0] for example in td.intent_examples
        if example.get("text_features") is not None
    ])

    response_features_dim = np.array([
        example.get("response_features").shape[0]
        for example in td.intent_examples
        if example.get("response_features") is not None
    ])

    assert np.all(text_features_dim == 1024)
    assert np.all(response_features_dim == 1024)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_training_data.py Projeto: sysang/rasa

def test_composite_entities_data():
    td = training_data.load_data("data/test/demo-rasa-composite-entities.md")
    assert not td.is_empty()
    assert len(td.entity_examples) == 11
    assert len(td.intent_examples) == 45
    assert len(td.training_examples) == 45
    assert td.entity_synonyms == {"SF": "San Fransisco"}
    assert td.intents == {
        "order_pizza",
        "book_flight",
        "chitchat",
        "greet",
        "goodbye",
        "affirm",
    }
    assert td.entities == {"location", "topping", "size"}
    assert td.entity_groups == {"1", "2"}
    assert td.entity_roles == {"to", "from"}
    assert td.number_of_examples_per_entity["entity 'location'"] == 8
    assert td.number_of_examples_per_entity["group '1'"] == 9
    assert td.number_of_examples_per_entity["role 'from'"] == 3

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test.py Projeto: yungliu/rasa_nlu

def main():
    parser = create_argument_parser()
    cmdline_args = parser.parse_args()
    utils.configure_colored_logging(cmdline_args.loglevel)

    if cmdline_args.mode == "crossvalidation":

        # TODO: move parsing into sub parser
        # manual check argument dependency
        if cmdline_args.model is not None:
            parser.error("Crossvalidation will train a new model "
                         "- do not specify external model.")

        if cmdline_args.config is None:
            parser.error("Crossvalidation will train a new model "
                         "you need to specify a model configuration.")

        nlu_config = config.load(cmdline_args.config)
        data = training_data.load_data(cmdline_args.data)
        data = drop_intents_below_freq(data, cutoff=5)
        results, entity_results = cross_validate(data, int(cmdline_args.folds),
                                                 nlu_config)
        logger.info("CV evaluation (n={})".format(cmdline_args.folds))

        if any(results):
            logger.info("Intent evaluation results")
            return_results(results.train, "train")
            return_results(results.test, "test")
        if any(entity_results):
            logger.info("Entity evaluation results")
            return_entity_results(entity_results.train, "train")
            return_entity_results(entity_results.test, "test")

    elif cmdline_args.mode == "evaluation":
        run_evaluation(cmdline_args.data, cmdline_args.model,
                       cmdline_args.report, cmdline_args.successes,
                       cmdline_args.errors, cmdline_args.confmat,
                       cmdline_args.histogram)

    logger.info("Finished evaluation")

Exemplo n.º 12

0

Exibir arquivo

async def train(
    nlu_config: Union[Text, Dict, RasaNLUModelConfig],
    data: Union[Text, "TrainingDataImporter"],
    path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    storage: Optional[Text] = None,
    component_builder: Optional[ComponentBuilder] = None,
    training_data_endpoint: Optional[EndpointConfig] = None,
    persist_nlu_training_data: bool = False,
    **kwargs: Any,
) -> Tuple[Trainer, Interpreter, Optional[Text]]:
    """Loads the trainer and the data and runs the training of the model."""
    from rasa.importers.importer import TrainingDataImporter

    if not isinstance(nlu_config, RasaNLUModelConfig):
        nlu_config = config.load(nlu_config)

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(nlu_config, component_builder)
    persistor = create_persistor(storage)
    if training_data_endpoint is not None:
        training_data = await load_data_from_endpoint(training_data_endpoint,
                                                      nlu_config.language)
    elif isinstance(data, TrainingDataImporter):
        training_data = await data.get_nlu_data(nlu_config.data)
    else:
        training_data = load_data(data, nlu_config.language)

    training_data.print_stats()
    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(path, persistor, fixed_model_name,
                                         persist_nlu_training_data)
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path

Exemplo n.º 13

0

Exibir arquivo

Arquivo: conftest.py Projeto: rajanala/rasa

def zipped_nlu_model():
    spacy_config_path = "sample_configs/config_pretrained_embeddings_spacy.yml"

    cfg = config.load(spacy_config_path)
    trainer = Trainer(cfg)
    td = training_data.load_data(DEFAULT_DATA_PATH)

    trainer.train(td)
    trainer.persist("test_models",
                    project_name="test_model_pretrained_embeddings")

    model_dir_list = os.listdir(TEST_MODEL_PATH)

    # directory name of latest model
    model_dir = sorted(model_dir_list)[-1]

    # path of that directory
    model_path = os.path.join(TEST_MODEL_PATH, model_dir)

    zip_path = zip_folder(model_path)

    return zip_path

Exemplo n.º 14

0

Exibir arquivo

async def visualize(
    config_path: Text,
    domain_path: Text,
    stories_path: Text,
    nlu_data_path: Text,
    output_path: Text,
    max_history: int,
):
    from rasa.core.agent import Agent
    from rasa.core import config

    policies = config.load(config_path)

    agent = Agent(domain_path, policies=policies)

    # this is optional, only needed if the `/greet` type of
    # messages in the stories should be replaced with actual
    # messages (e.g. `hello`)
    if nlu_data_path is not None:
        from rasa.nlu.training_data import load_data

        nlu_data_path = load_data(nlu_data_path)
    else:
        nlu_data_path = None

    logger.info("Starting to visualize stories...")
    await agent.visualize(stories_path,
                          output_path,
                          max_history,
                          nlu_training_data=nlu_data_path)

    full_output_path = "file://{}".format(os.path.abspath(output_path))
    logger.info(
        "Finished graph creation. Saved into {}".format(full_output_path))

    import webbrowser

    webbrowser.open(full_output_path)

Exemplo n.º 15

0

Exibir arquivo

def test_dialogflow_data():
    td = training_data.load_data("data/examples/dialogflow/")
    assert len(td.entity_examples) == 5
    assert len(td.intent_examples) == 24
    assert len(td.training_examples) == 24
    assert len(td.lookup_tables) == 2
    assert td.intents == {"affirm", "goodbye", "hi", "inform"}
    assert td.entities == {"cuisine", "location"}
    non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v}
    assert non_trivial_synonyms == {
        "mexico": "mexican",
        "china": "chinese",
        "india": "indian",
    }
    # The order changes based on different computers hence the grouping
    assert {td.lookup_tables[0]["name"], td.lookup_tables[1]["name"]} == {
        "location",
        "cuisine",
    }
    assert {
        len(td.lookup_tables[0]["elements"]),
        len(td.lookup_tables[1]["elements"]),
    } == {4, 6}

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_02.py Projeto: haonguyen1915/rasa-nlp

def test_train_model_without_data():
    td = load_data(DEFAULT_DATA_PATH)
    # language, pipeline = pipelines_for_tests()[1]
    # show_dict(pipeline)
    # exit()
    language = "en"
    pipeline = load_json(
        "{}/test_case/test_pipelines/config_pipeline.json".format(prj_dir))
    # exit()
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})

    trainer = Trainer(_config)
    trainer.train(td)
    persisted_path = trainer.persist(model_dir)
    loaded = Interpreter.load(persisted_path)
    assert loaded.pipeline

    # Inference
    # result = loaded.parse("i'm looking for a place in the north of town")
    result = loaded.parse("show me chinese restaurants")
    result = dict(
        filter(lambda item: item[0] not in ["intent_ranking"], result.items()))
    show_dict(result)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: train.py Projeto: tgalery/rasa_nlu

def train(nlu_config: Union[Text, RasaNLUModelConfig],
          data: Text,
          path: Optional[Text] = None,
          project: Optional[Text] = None,
          fixed_model_name: Optional[Text] = None,
          storage: Optional[Text] = None,
          component_builder: Optional[ComponentBuilder] = None,
          training_data_endpoint: Optional[EndpointConfig] = None,
          **kwargs: Any
          ) -> Tuple[Trainer, Interpreter, Text]:
    """Loads the trainer and the data and runs the training of the model."""

    if isinstance(nlu_config, str):
        nlu_config = config.load(nlu_config)

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(nlu_config, component_builder)
    persistor = create_persistor(storage)
    if training_data_endpoint is not None:
        training_data = load_data_from_endpoint(training_data_endpoint,
                                                nlu_config.language)
    else:
        training_data = load_data(data, nlu_config.language)
    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(path,
                                         persistor,
                                         project,
                                         fixed_model_name)
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path

Exemplo n.º 18

0

Exibir arquivo

def get_nlu_stats() -> NoReturn:
    """Creates temporary file with NLU stats.

    Creates a temporary file with all intents and entities from the NLU
    data, `./data/nlu.md` file. This values can be used for updating
    intents and entities in the domain file.

    Note: It is recommended to use this function for checking new
    intents & entities.
    """
    from tempfile import TemporaryFile
    from rasa.nlu.training_data import load_data

    try:
        make_dir(ai_dir['temp'])
        # Loads NLU data from `./data/nlu.md` file.
        nlu_data = load_data(str(ai_file['nlu']))
        # Creates set of present intents and entities in the NLU data.
        intents = [nlu_data.intents][0]
        entities = [nlu_data.entities][0]
        # Creates a temporary file in `./temp/` directory.  The created
        # temporary file does not auto delete.
        named_temp_file = TemporaryFile(dir=ai_dir['temp'], delete=False)
        # Creates a list of all the intents and entities using the sets.
        with open(named_temp_file.name, 'w', encoding=_ENCODING) as temp_file:
            temp_file.write('Intents:\n')
            for index in intents:
                temp_file.write(index + '\n')
            temp_file.write('\nEntities:\n')
            for index in entities:
                temp_file.write(index + '\n')
        show(f'Done. Results are stored in {named_temp_file.name} file.')
    except Exception as error:
        print('An error occured while performing this operation because of'
              f' {error} in function "{stack()[0][3]}" on line'
              f' {exc_info()[-1].tb_lineno}.')

Exemplo n.º 19

0

Exibir arquivo

Arquivo: main.py Projeto: RubenSmn/KettingAI

# import rasa
from rasa.nlu.training_data import load_data
from rasa.nlu.config import RasaNLUModelConfig
from rasa.nlu.model import Trainer
from rasa.nlu import config
from rasa.nlu.model import Metadata, Interpreter

# Import speech_recognition
import speech_recognition as sr

# This will load the nlu data in the md file, train a model and save it as the current model
# loading the nlu training samples
training_data = load_data("./data/nlu.md")
# trainer to educate our pipeline
trainer = Trainer(config.load("config.yml"))
# train the model!
interpreter = trainer.train(training_data)
# store it for future use
model_directory = trainer.persist("./models", fixed_model_name="current")

# Use this line when you already trained a model
# interpreter = Interpreter.load('./models/current')


# small helper to make dict dumps a bit prettier
def pprint(o):
    print(json.dumps(o, indent=2))


r = sr.Recognizer()

Exemplo n.º 20

0

Exibir arquivo

def train_nlu(data, configs, model_dir):
    training_data = load_data(data)                                                #load NLU training sample
    trainer = Trainer(config.load(configs))                                        #train the pipeline first
    interpreter = trainer.train(training_data)                                     #train the model
    model_directory = trainer.persist("models/nlu", fixed_model_name = "chatter")  #store in directory

Exemplo n.º 21

0

Exibir arquivo

def CV_eval(td_file, config_file, Nfolds=10):
    # trains a model with crossvalidation using the training data and config

    td = load_data(td_file)
    configuration = config.load(config_file)
    cross_validate(td, Nfolds, configuration)

Exemplo n.º 22

0

Exibir arquivo

def train_nlu(data, configuration, model_dir):
    training_data = load_data(data)
    trainer = Trainer(config.load(configuration))
    trainer.train(training_data)
    model_directory = trainer.persist(model_dir, fixed_model_name='whethernlu')

Exemplo n.º 23

0

Exibir arquivo

Arquivo: test_classifiers.py Projeto: Robosensus1/rasa-1

def training_data():
    return load_data(DEFAULT_DATA_PATH)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: test_training_data.py Projeto: Robosensus1/rasa-1

def test_section_value_with_delimiter():
    td_section_with_delimiter = training_data.load_data(
        "data/test/markdown_single_sections/section_with_delimiter.md")
    assert td_section_with_delimiter.entity_synonyms == {"10:00 am": "10:00"}

Exemplo n.º 25

0

Exibir arquivo

Arquivo: edit_route.py Projeto: oyfml/opennlu

def edit_tf_pt():
    if request.method == 'POST':
        if 'create' in request.form: #create new data folder
            folder_name = request.form['new_name']
            folder_path = os.path.join(app.config['UPLOAD_FOLDER'],secure_filename(folder_name))
            label_path = os.path.join(folder_path,'label')
            text_path = os.path.join(folder_path,'seq.in')
            tags_path = os.path.join(folder_path,'seq.out')
            if not os.path.exists(folder_path): #create new folder & files if dont exist
                os.makedirs(folder_path)
                os.mknod(label_path)
                os.mknod(text_path)
                os.mknod(tags_path)
            else: #create files in folder if dont exist
                if not os.path.exists(label_path):
                    os.mknod(label_path)
                if not os.path.exists(text_path):
                    os.mknod(text_path)
                if not os.path.exists(tags_path):
                    os.mknod(tags_path)
            return redirect(url_for('content_tf_pt',path=folder_path))
        
        elif 'open' in request.form: #edit existing data folder
            #download multiple files from the folder
            list_folder = request.files.getlist('folder') #list()
            #check if folder contains correct files
            file_check = {'label':0, 'seq.in':0, 'seq.out':0}
            for file in list_folder:
                if os.path.basename(file.filename) in file_check:
                    file_check[os.path.basename(file.filename)] = file_check[os.path.basename(file.filename)] + 1           
            if 0 in file_check.values(): #check if filenames meet requirement
                fail = True
                fail_message = 'Files uploaded do not match filename requirements. Please check if your label, text sequence and BIO-tag sequence files are named as label, seq.in and seq.out respectively for system to recognise.'
                return redirect(url_for('edit_tf_pt',fail=fail,fail_message=fail_message))
            elif not all([False for value in file_check.values() if value>1]): #invalid data folder: contains more than one of each label,seq.in,seq.out files
                fail = True
                fail_message = 'Invalid folder selected! Folder contains more than required number of files (3). Please select the direct parent data folder with only one instance of label, seq.in and seq.out file.'
                return redirect(url_for('edit_tf_pt',fail=fail,fail_message=fail_message))
            else: #success
                for file in list_folder:
                    file.save(os.path.join(app.config['UPLOAD_FOLDER'],file.filename)) #save files into folder
                folder_path = os.path.join(app.config['UPLOAD_FOLDER'],os.path.dirname(list_folder[0].filename))
                return redirect(url_for('content_tf_pt',path=folder_path))
        
        elif 'convert_rasa' in request.form: #convert rasa data file to tf/pt format
            from rasa.nlu import training_data, load_data
            from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer

            curr = request.files['convert_rasa_file']
            curr.save(os.path.join(app.config['UPLOAD_FOLDER'],secure_filename(curr.filename)))
            file = os.path.join(app.config['UPLOAD_FOLDER'],secure_filename(curr.filename))

            td = training_data.load_data(file)
            formatted_examples = [ example.as_dict_nlu() for example in td.training_examples ]
            labels = [ex['intent'] for ex in formatted_examples]

            #Tokenize and clean text
            white_space_tokenizer = WhitespaceTokenizer()
            sentences = list()
            BIO_tagging = list()
            types = dict()
            for ex in formatted_examples:
                #Tokenize by white space
                white_space_tokens = white_space_tokenizer.tokenize(ex['text'])
                tokens = [token.text for token in white_space_tokens]
                #Form into input sentence
                sentence = ' '.join(tokens)
                sentences.append(sentence) #seq.in
                #Perform entity tagging
                if 'entities' in ex: #entity exists
                    ent_values = [entity['value'] for entity in ex['entities']] #entity value
                    ent_length = [len(value.split()) for value in ent_values] #length of entity word
                    ent_types = [entity['entity'] for entity in ex['entities']] #entity type
                    #form BI tags
                    for idx, typ in enumerate(ent_types):
                        ent_types[idx] = 'B-' + typ + ''.join([' I-' + typ]*(ent_length[idx] - 1))
                        types['B-' + typ] = True
                        types['I-' + typ] = True
                        #replace sentence with BI
                        sentence = sentence.replace(ent_values[idx].strip(),ent_types[idx].strip()) #and, remove leading and trailing spaces
                    tag_seq = sentence.split()
                    for idx, token in enumerate(tag_seq):
                        #replace sentence with O
                        if token not in types:
                            tag_seq[idx] = 'O'
                #no entity
                else: 
                    tag_seq = ['O' for t in tokens]
                tags = ' '.join(tag_seq)
                BIO_tagging.append(tags)
            
            file_chunk = {
                'folder_name':os.path.splitext(os.path.basename(file))[0],
                'label_name':'label',
                'text_name':'seq.in',
                'tags_name':'seq.out',
                'label_content':'\n'.join([str(i) for i in labels]) + '\n',
                'text_content':'\n'.join([str(i) for i in sentences]) + '\n',
                'tags_content':'\n'.join([str(i) for i in BIO_tagging]) + '\n'
            }
            return render_template('/edit/editor_3.html', **file_chunk) 
        
        else: #convert tf/pt data file to rasa format
            #download multiple files from the folder
            list_folder = request.files.getlist('convert_tf_pt_folder') #list()
            #check if folder contains correct files
            file_check = {'label':0, 'seq.in':0, 'seq.out':0}
            for file in list_folder:
                if os.path.basename(file.filename) in file_check:
                    file_check[os.path.basename(file.filename)] = file_check[os.path.basename(file.filename)] + 1           
            if 0 in file_check.values(): #check if filenames meet requirement
                fail = True
                fail_message = 'Files uploaded do not match filename requirements. Please check if your label, text sequence and BIO-tag sequence files are named as label, seq.in and seq.out respectively for system to recognise.'
                return redirect(url_for('edit_tf_pt',fail=fail,fail_message=fail_message))
            elif not all([False for value in file_check.values() if value>1]): #invalid data folder: contains more than one of each label,seq.in,seq.out files
                fail = True
                fail_message = 'Invalid folder selected! Folder contains more than required number of files (3). Please select the direct parent data folder with only one instance of label, seq.in and seq.out file.'
                return redirect(url_for('edit_tf_pt',fail=fail,fail_message=fail_message))
            else: #success
                for file in list_folder:
                    file.save(os.path.join(app.config['UPLOAD_FOLDER'],file.filename)) #save files into folder
                folder_path = os.path.join(app.config['UPLOAD_FOLDER'],os.path.dirname(list_folder[0].filename))
                return redirect(url_for('content_to_rasa',path=folder_path))
    
    else:
        if 'fail' in request.args:
            fail = request.args.get('fail')
            fail_msg = request.args.get('fail_message')
        else:
            fail = False
            fail_msg = ""
        return render_template('/edit/index_tf-pt.html',fail=fail,fail_message=fail_msg)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: test.py Projeto: vivihuang/rasa

def run_evaluation(
    data_path: Text,
    model_path: Text,
    output_directory: Optional[Text] = None,
    successes: bool = False,
    errors: bool = False,
    confmat: Optional[Text] = None,
    histogram: Optional[Text] = None,
    component_builder: Optional[ComponentBuilder] = None,
) -> Dict:  # pragma: no cover
    """
    Evaluate intent classification, response selection and entity extraction.

    :param data_path: path to the test data
    :param model_path: path to the model
    :param output_directory: path to folder where all output will be stored
    :param successes: if true successful predictions are written to a file
    :param errors: if true incorrect predictions are written to a file
    :param confmat: path to file that will show the confusion matrix
    :param histogram: path fo file that will show a histogram
    :param component_builder: component builder

    :return: dictionary containing evaluation results
    """

    # get the metadata config from the package data
    interpreter = Interpreter.load(model_path, component_builder)

    interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)

    result = {
        "intent_evaluation": None,
        "entity_evaluation": None,
        "response_selection_evaluation": None,
    }  # type: Dict[Text, Optional[Dict]]

    if output_directory:
        io_utils.create_directory(output_directory)

    intent_results, response_selection_results, entity_results, = get_eval_data(
        interpreter, test_data)

    if intent_results:
        logger.info("Intent evaluation results:")
        result["intent_evaluation"] = evaluate_intents(intent_results,
                                                       output_directory,
                                                       successes, errors,
                                                       confmat, histogram)

    if response_selection_results:
        logger.info("Response selection evaluation results:")
        result["response_selection_evaluation"] = evaluate_response_selections(
            response_selection_results, output_directory)

    if entity_results:
        logger.info("Entity evaluation results:")
        extractors = get_entity_extractors(interpreter)
        result["entity_evaluation"] = evaluate_entities(
            entity_results, extractors, output_directory, successes, errors)

    return result

Exemplo n.º 27

0

Exibir arquivo

from rasa.nlu.training_data import load_data

# This re-uses the Rasa NLU converters code to turn a JSON Rasa NLU training
# file into MD format and save it

# Assumes you have Rasa NLU installed :-)

# If you want other options, look at the NLU code to work out how to handle them

# USE AT YOUR OWN RISK

files = {
    './commands.json': '../data/auto-generated/commands.md',
    './clarification.json': '../data/auto-generated/clarification.md',
}
# *******************************************************
# TAKE CARE: output_md_file is overwritten automatically
# *******************************************************

for file in files.keys():
    output_md_file = files[file]
    input_training_file = file
    with open(output_md_file,'w') as f:
        f.write(load_data(input_training_file).as_markdown())

Exemplo n.º 28

0

Exibir arquivo

'''this file converts our nlu md training files to nlu json training files which are used to store in
nosql databases'''

import json
import glob  #used to read all file path in a specific directory
from rasa.nlu import training_data

nlu_mdfiles_path = '../mdfiles/*.md'
nlu_jsonfiles_path = '../nlu/'
files = glob.glob(nlu_mdfiles_path)  #list of all the mdfile paths
for f in files:
    td = training_data.load_data(f)
    output = td.as_json()
    json_data = json.loads(output)
    filename_list = f.split('\\')
    filename = filename_list[-1].split('.')
    with open(nlu_jsonfiles_path + filename[0] + '.json', 'w') as f:
        json.dump(json_data, f, indent=4)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: train_nlu.py Projeto: xanthas/CA2020_instructions

from rasa.nlu.model import Trainer
from rasa.nlu import config
from rasa.nlu.training_data import load_data

# loading training data
training_data = load_data('./data/nlu.md')

# initialising the trainer
trainer = Trainer(config.load("config.yml"))

# training
trainer.train(training_data)

# saving the model in the specified directory
trainer.persist('./models/')

Exemplo n.º 30

0

Exibir arquivo

Arquivo: test_training_data.py Projeto: Robosensus1/rasa-1

def test_markdown_not_existing_section():
    with pytest.raises(ValueError):
        training_data.load_data(
            "data/test/markdown_single_sections/not_existing_section.md")