Exemplo n.º 1
0
def train_test_metrics(train_dataset_path,
                       test_dataset_path,
                       output_path,
                       config_path=None,
                       exclude_slot_metrics=False,
                       include_errors=False,
                       verbose=False):
    if verbose:
        set_nlu_logger(logging.DEBUG)

    if config_path is not None:
        with Path(config_path).open("r", encoding="utf-8") as f:
            config = json.load(f)
        engine_cls = make_engine_cls(config)
    else:
        engine_cls = SnipsNLUEngine

    metrics_args = dict(train_dataset=train_dataset_path,
                        test_dataset=test_dataset_path,
                        engine_class=engine_cls,
                        include_slot_metrics=not exclude_slot_metrics)

    with Path(train_dataset_path).open("r", encoding="utf8") as f:
        load_resources(json.load(f)["language"])

    from snips_nlu_metrics import compute_train_test_metrics

    metrics = compute_train_test_metrics(**metrics_args)
    if not include_errors:
        metrics.pop("parsing_errors")

    with Path(output_path).open(mode="w", encoding="utf8") as f:
        f.write(json_string(metrics))
Exemplo n.º 2
0
def main_train_test_metrics():
    args = vars(parse_train_test_args(sys.argv[1:]))

    train_dataset_path = args.pop("train_dataset_path")
    test_dataset_path = args.pop("test_dataset_path")
    output_path = args.pop("output_path")
    exclude_slot_metrics = args.get("exclude_slot_metrics", False)

    metrics_args = dict(
        train_dataset=train_dataset_path,
        test_dataset=test_dataset_path,
        engine_class=SnipsNLUEngine,
        include_slot_metrics=not exclude_slot_metrics
    )

    include_errors = args.get("include_errors", False)
    with io.open(train_dataset_path, "r", encoding="utf-8") as f:
        load_resources(json.load(f)["language"])

    from snips_nlu_metrics import compute_train_test_metrics

    metrics = compute_train_test_metrics(**metrics_args)
    if not include_errors:
        metrics.pop("parsing_errors")

    with io.open(output_path, mode="w") as f:
        json_dump = json.dumps(metrics, sort_keys=True, indent=2)
        f.write(bytes(json_dump, encoding="utf8").decode("utf8"))
Exemplo n.º 3
0
def main_train_test_metrics():
    args = vars(parse_train_test_args(sys.argv[1:]))

    train_dataset_path = args.pop("train_dataset_path")
    test_dataset_path = args.pop("test_dataset_path")
    output_path = args.pop("output_path")
    exclude_slot_metrics = args.get("exclude_slot_metrics", False)

    metrics_args = dict(
        train_dataset=train_dataset_path,
        test_dataset=test_dataset_path,
        engine_class=SnipsNLUEngine,
        include_slot_metrics=not exclude_slot_metrics
    )

    include_errors = args.get("include_errors", False)
    with io.open(train_dataset_path, "r", encoding="utf-8") as f:
        load_resources(json.load(f)["language"])

    metrics = compute_train_test_metrics(**metrics_args)
    if not include_errors:
        metrics.pop("parsing_errors")

    with io.open(output_path, mode="w") as f:
        f.write(bytes(json.dumps(metrics), encoding="utf8").decode("utf8"))
def train_eval_snips_nlu_model(lang='en', cross=False, save=''):
    """ Train snips data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :return: None
    :rtype: None
    """
    from snips_nlu import SnipsNLUEngine
    from snips_nlu.default_configs import CONFIG_EN
    from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics
    import pickle
    import json

    if cross:
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_snips_data_task1()
        print("--> Evaluating training data with Snips metrics...")
        filename_results = source_result / "snips_semeval_2020_evaluation_task1_{}.pkl".format(save)
        if not Path(filename_results).exists():
            tt_metrics = compute_train_test_metrics(train_dataset=train_data[0],
                                                test_dataset=train_data[1],
                                                engine_class=SnipsNLUEngine,
                                                include_slot_metrics=False)
            #print(tt_metrics)
            if not Path(filename_results).exists():
                print("--> Writing snips nlu metrics data to file...")
                with codecs.open(filename_results, 'wb') as metric:
                    pickle.dump(tt_metrics, metric)
                from datetime import datetime
                dmtime = "_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S"))
                name = "snips_semeval_2020_evaluation_task1{}.json".format(dmtime)
                filename_results_json = source_result / name
                with codecs.open(filename_results_json, 'w', "utf-8") as m_json:
                    json.dump(tt_metrics, m_json)

    else:
        filename_results = source_result / "snips_semeval_2020_model_task1_{}".format(save)
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_snips_data_task1()
        nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
        print("--> Training patent data with Snips...")
        nlu_engine.fit(train_data)
        try:     
            print("--> Saving model trained with Snips (JOBLIB)...")
            filename_joblib = source_result / "snips_semeval_2020_model_task1_{}.pkl".format(save)            
            with codecs.open(filename_joblib, 'wb') as metric:
                pickle.dump(nlu_engine, metric)
        except: pass
        print("--> Saving model trained with Snips (SNIPS)...")
        try: nlu_engine.persist(filename_results)
        except: pass
Exemplo n.º 5
0
def train_test_metrics(train_dataset_path,
                       test_dataset_path,
                       output_path,
                       exclude_slot_metrics=False,
                       include_errors=False):
    metrics_args = dict(train_dataset=train_dataset_path,
                        test_dataset=test_dataset_path,
                        engine_class=SnipsNLUEngine,
                        include_slot_metrics=not exclude_slot_metrics)

    with Path(train_dataset_path).open("r", encoding="utf8") as f:
        load_resources(json.load(f)["language"])

    from snips_nlu_metrics import compute_train_test_metrics

    metrics = compute_train_test_metrics(**metrics_args)
    if not include_errors:
        metrics.pop("parsing_errors")

    with Path(output_path).open(mode="w", encoding="utf8") as f:
        f.write(json_string(metrics))
Exemplo n.º 6
0
def train_test_metrics(train_dataset_path,
                       test_dataset_path,
                       output_path,
                       config_path=None,
                       exclude_slot_metrics=False,
                       include_errors=False,
                       verbosity=0):
    import json
    import logging
    from pathlib import Path
    from snips_nlu_metrics import compute_train_test_metrics
    from snips_nlu import SnipsNLUEngine
    from snips_nlu.cli.utils import set_nlu_logger
    from snips_nlu.common.utils import json_string

    if verbosity == 1:
        set_nlu_logger(logging.INFO)
    elif verbosity >= 2:
        set_nlu_logger(logging.DEBUG)

    if config_path is not None:
        with Path(config_path).open("r", encoding="utf-8") as f:
            config = json.load(f)
        engine_cls = make_engine_cls(config)
    else:
        engine_cls = SnipsNLUEngine

    metrics_args = dict(train_dataset=train_dataset_path,
                        test_dataset=test_dataset_path,
                        engine_class=engine_cls,
                        include_slot_metrics=not exclude_slot_metrics,
                        slot_matching_lambda=_match_trimmed_values)

    metrics = compute_train_test_metrics(**metrics_args)
    if not include_errors:
        metrics.pop("parsing_errors")

    with Path(output_path).open(mode="w", encoding="utf8") as f:
        f.write(json_string(metrics))
Exemplo n.º 7
0
def main_train_test_metrics():
    args = vars(parse_train_test_args(sys.argv[1:]))

    train_dataset_path = args.pop("train_dataset_path")
    test_dataset_path = args.pop("test_dataset_path")
    output_path = args.pop("output_path")

    metrics_args = dict(
        train_dataset=train_dataset_path,
        test_dataset=test_dataset_path,
        engine_class=SnipsNLUEngine
    )

    include_errors = args.get("include_errors", False)
    with io.open(train_dataset_path, "r", encoding="utf-8") as f:
        load_resources(json.load(f)["language"])

    metrics = compute_train_test_metrics(**metrics_args)
    if not include_errors:
        metrics.pop("parsing_errors")

    with io.open(output_path, mode="w") as f:
        f.write(bytes(json.dumps(metrics), encoding="utf8").decode("utf8"))
Exemplo n.º 8
0
from automatic_data_generation.utils.conversion import csv2json

datadir = os.path.join(*args.train_path.split('/')[:-1])
csv2json(datadir, datadir, augmented=False)
csv2json(datadir, datadir, augmented=True)

print('Starting benchmarking...')


def my_matching_lambda(lhs_slot, rhs_slot):
    return lhs_slot['text'].strip() == rhs_slot["rawValue"].strip()


raw_metrics = compute_train_test_metrics(
    train_dataset="data/train.json",
    test_dataset="data/validate.json",
    engine_class=SnipsNLUEngine,
    slot_matching_lambda=my_matching_lambda)
augmented_metrics = compute_train_test_metrics(
    train_dataset="data/train_augmented.json",
    test_dataset="data/validate.json",
    engine_class=SnipsNLUEngine,
    slot_matching_lambda=my_matching_lambda)

print('----------METRICS----------')
print('Without augmentation : ')
print(raw_metrics['average_metrics'])
print('With augmentation : ')
print(augmented_metrics['average_metrics'])
intent_improvement = 100 * (
    (augmented_metrics['average_metrics']['intent']['f1'] -
def compute_sample_train_test_metrics():
    load_resources("en")
    return compute_train_test_metrics(train_dataset=TRAIN_DATASET_PATH,
                                      test_dataset=TEST_DATASET_PATH,
                                      engine_class=SnipsNLUEngine)