Пример #1
0
def predict_with_model(config_path):
    config = read_json(config_path)
    set_deeppavlov_root(config)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    read_params = {k: v for k, v in reader_config.items() if k not in ['name', 'data_path']}
    data = reader.read(data_path, **read_params)

    iterator_config = config['dataset_iterator']
    iterator: MorphoTaggerDatasetIterator =\
        from_params(iterator_config, data=data)

    model = build_model_from_config(config, load_trained=True)
    answers = [None] * len(iterator.test)
    batch_size = config['predict'].get("batch_size", -1)
    for indexes, (x, _) in iterator.gen_batches(
            batch_size=batch_size, data_type="test", shuffle=False, return_indexes=True):
        y = model(x)
        for i, elem in zip(indexes, y):
            answers[i] = elem
    outfile = config['predict'].get("outfile")
    if outfile is not None:
        outfile = Path(outfile)
        if not outfile.exists():
            outfile.parent.mkdir(parents=True, exist_ok=True)
        with open(outfile, "w", encoding="utf8") as fout:
            for elem in answers:
                fout.write(elem + "\n")
    return answers
Пример #2
0
 def get_default_agent():
     model_config = read_json(model_config_path)
     model = build_model_from_config(model_config)
     skill = DefaultStatelessSkill(model)
     agent = DefaultAgent([skill],
                          skills_processor=DefaultRichContentWrapper())
     return agent
Пример #3
0
def from_params(params: Dict, mode: str = 'infer', **kwargs) -> Component:
    """Builds and returns the Component from corresponding dictionary of parameters."""
    # what is passed in json:
    config_params = {k: _resolve(v) for k, v in params.items()}

    # get component by reference (if any)
    if 'ref' in config_params:
        try:
            return _refs[config_params['ref']]
        except KeyError:
            e = ConfigError(
                'Component with id "{id}" was referenced but not initialized'.
                format(id=config_params['ref']))
            log.exception(e)
            raise e

    elif 'config_path' in config_params:
        from deeppavlov.core.commands.infer import build_model_from_config
        deeppavlov_root = get_deeppavlov_root()
        refs = _refs.copy()
        _refs.clear()
        config = read_json(expand_path(config_params['config_path']))
        model = build_model_from_config(config)
        set_deeppavlov_root({'deeppavlov_root': deeppavlov_root})
        _refs.clear()
        _refs.update(refs)
        return model

    elif 'class' in config_params:
        cls = cls_from_str(config_params.pop('class'))
    else:
        cls_name = config_params.pop('name', None)
        if not cls_name:
            e = ConfigError(
                'Component config has no `name` nor `ref` or `class` fields')
            log.exception(e)
            raise e
        cls = get_model(cls_name)

    # find the submodels params recursively
    config_params = {k: _init_param(v, mode) for k, v in config_params.items()}

    try:
        spec = inspect.getfullargspec(cls)
        if 'mode' in spec.args + spec.kwonlyargs or spec.varkw is not None:
            kwargs['mode'] = mode

        component = cls(**dict(config_params, **kwargs))
        try:
            _refs[config_params['id']] = component
        except KeyError:
            pass
    except Exception:
        log.exception("Exception in {}".format(cls))
        raise

    return component
Пример #4
0
def from_params(params: Dict, mode: str = 'infer', **kwargs) -> Component:
    """Builds and returns the Component from corresponding dictionary of parameters."""
    # what is passed in json:
    config_params = {k: _resolve(v) for k, v in params.items()}

    # get component by reference (if any)
    if 'ref' in config_params:
        try:
            return _refs[config_params['ref']]
        except KeyError:
            e = ConfigError('Component with id "{id}" was referenced but not initialized'
                            .format(id=config_params['ref']))
            log.exception(e)
            raise e

    elif 'config_path' in config_params:
        from deeppavlov.core.commands.infer import build_model_from_config
        deeppavlov_root = get_deeppavlov_root()
        refs = _refs.copy()
        _refs.clear()
        config = read_json(expand_path(config_params['config_path']))
        model = build_model_from_config(config, as_component=True)
        set_deeppavlov_root({'deeppavlov_root': deeppavlov_root})
        _refs.clear()
        _refs.update(refs)
        return model

    elif 'class' in config_params:
        cls = cls_from_str(config_params.pop('class'))
    else:
        cls_name = config_params.pop('name', None)
        if not cls_name:
            e = ConfigError('Component config has no `name` nor `ref` or `class` fields')
            log.exception(e)
            raise e
        cls = get_model(cls_name)

    # find the submodels params recursively
    config_params = {k: _init_param(v, mode) for k, v in config_params.items()}

    try:
        spec = inspect.getfullargspec(cls)
        if 'mode' in spec.args+spec.kwonlyargs or spec.varkw is not None:
            kwargs['mode'] = mode

        component = cls(**dict(config_params, **kwargs))
        try:
            _refs[config_params['id']] = component
        except KeyError:
            pass
    except Exception:
        log.exception("Exception in {}".format(cls))
        raise

    return component
Пример #5
0
def make_agent() -> EcommerceAgent:
    """Make an agent

    Returns:
        agent: created Ecommerce agent
    """

    config_path = find_config('ecommerce_bot')
    skill = build_model_from_config(config_path, as_component=True)
    agent = EcommerceAgent(skills=[skill])
    return agent
Пример #6
0
def predict_with_model(config_path: [Path, str]) -> List[List[str]]:
    """Returns predictions of morphotagging model given in config :config_path:.

    Args:
        config_path: a path to config

    Returns:
        a list of morphological analyses for each sentence. Each analysis is either a list of tags
        or a list of full CONLL-U descriptions.

    """
    config = read_json(config_path)
    set_deeppavlov_root(config)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    read_params = {
        k: v
        for k, v in reader_config.items() if k not in ['name', 'data_path']
    }
    data: Dict = reader.read(data_path, **read_params)

    iterator_config = config['dataset_iterator']
    iterator: MorphoTaggerDatasetIterator = from_params(iterator_config,
                                                        data=data)

    model = build_model_from_config(config, load_trained=True)
    answers = [None] * len(iterator.test)
    batch_size = config['predict'].get("batch_size", -1)
    for indexes, (x, _) in iterator.gen_batches(batch_size=batch_size,
                                                data_type="test",
                                                shuffle=False,
                                                return_indexes=True):
        y = model(x)
        for i, elem in zip(indexes, y):
            answers[i] = elem
    outfile = config['predict'].get("outfile")
    if outfile is not None:
        outfile = Path(outfile)
        if not outfile.exists():
            outfile.parent.mkdir(parents=True, exist_ok=True)
        with open(outfile, "w", encoding="utf8") as fout:
            for elem in answers:
                fout.write(elem + "\n")
    return answers
Пример #7
0
def init_agent():
    demo = build_model_from_config(json.load(open('skill.demo.json')),
                                   as_component=True)
    open_account = build_model_from_config(json.load(
        open('skill.open_account.json')),
                                           as_component=True)
    sms_inform = build_model_from_config(json.load(
        open('skill.sms_inform.json')),
                                         as_component=True)
    tarifs = build_model_from_config(json.load(open('skill.tarifs.json')),
                                     as_component=True)
    faq = build_model_from_config(json.load(open('skill.faq.json')),
                                  as_component=True)

    classifier = build_model_from_config(json.load(
        open('intent_classifier.json')),
                                         as_component=True)

    hello = RandomResponseSkill(responses=[
        'Я Demo-Бот:) Чем могу помочь? Я умею рассказывать про открытие и резервирование счетов, тарифы на услуги и продуты, подключение SMS-информирование и отвечать на FAQ-вопросы.',
        'Чем Вам помочь? Я умею рассказывать про открытие и резервирование счетов, тарифы на услуги и продуты, подключение SMS-информирование и отвечать на FAQ-вопросы.'
    ],
                                confidence=0.5)

    knn_clf = build_model_from_config(json.load(open('knn_classifier.json')),
                                      as_component=True)

    intents = ['OTHER', 'OPEN_ACCOUNT', 'SMS_INFORM', 'RATES', 'FAQ']

    filter = IntentFilter(intents, knn_clf, default_intent=0, always_open=[0])

    agent = Agent([hello, demo, sms_inform, tarifs, faq],
                  skills_selector=HighestConfidenceSelector(),
                  skills_filter=filter)

    return agent
Пример #8
0
def init_model(model_config_path):
    model_config = read_json(model_config_path)
    model = build_model_from_config(model_config)
    return model
Пример #9
0
	def __init__(self, config_path, download=False):
		if download == True:
			deep_download(config_path)
		self.model = build_model_from_config(config_path)
Пример #10
0
def from_params(params: Dict, mode='infer', **kwargs) -> Component:
    # what is passed in json:
    config_params = {k: _resolve(v) for k, v in params.items()}

    # get component by reference (if any)
    if 'ref' in config_params:
        try:
            return _refs[config_params['ref']]
        except KeyError:
            e = ConfigError('Component with id "{id}" was referenced but not initialized'
                            .format(id=config_params['ref']))
            log.exception(e)
            raise e

    elif 'config_path' in config_params:
        from deeppavlov.core.commands.infer import build_model_from_config
        deeppavlov_root = get_deeppavlov_root()
        refs = _refs.copy()
        _refs.clear()
        config = read_json(expand_path(config_params['config_path']))
        model = build_model_from_config(config, as_component=True)
        set_deeppavlov_root({'deeppavlov_root': deeppavlov_root})
        _refs.clear()
        _refs.update(refs)
        return model

    elif 'class' in config_params:
        c = config_params.pop('class')
        try:
            module_name, cls_name = c.split(':')
            cls = getattr(importlib.import_module(module_name), cls_name)
        except ValueError:
            e = ConfigError('Expected class description in a `module.submodules:ClassName` form, but got `{}`'
                            .format(c))
            log.exception(e)
            raise e
    else:
        cls_name = config_params.pop('name', None)
        if not cls_name:
            e = ConfigError('Component config has no `name` nor `ref` or `class` fields')
            log.exception(e)
            raise e
        try:
            cls = REGISTRY[cls_name]
        except KeyError:
            e = ConfigError('Class {} is not registered.'.format(cls_name))
            log.exception(e)
            raise e

    # find the submodels params recursively
    config_params = {k: _init_param(v, mode) for k, v in config_params.items()}

    try:
        spec = inspect.getfullargspec(cls)
        if 'mode' in spec.args+spec.kwonlyargs or spec.varkw is not None:
            kwargs['mode'] = mode

        component = cls(**dict(config_params, **kwargs))
        try:
            _refs[config_params['id']] = component
        except KeyError:
            pass
    except Exception:
        log.exception("Exception in {}".format(cls))
        raise

    return component
Пример #11
0
def init_model(model_config_path):
    config = read_json(model_config_path)
    model = build_model_from_config(config)
    model_name = type(model.get_main_component()).__name__
    return model, model_name
Пример #12
0
def train_evaluate_model_from_config(config: [str, Path, dict], to_train: bool = True, to_validate: bool = True) -> None:
    """Make training and evaluation of the model described in corresponding configuration file."""
    if isinstance(config, (str, Path)):
        config = read_json(config)
    set_deeppavlov_root(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    dataset_config = config.get('dataset', None)

    if dataset_config:
        config.pop('dataset')
        ds_type = dataset_config['type']
        if ds_type == 'classification':
            reader = {'name': 'basic_classification_reader'}
            iterator = {'name': 'basic_classification_iterator'}
            config['dataset_reader'] = {**dataset_config, **reader}
            config['dataset_iterator'] = {**dataset_config, **iterator}
        else:
            raise Exception("Unsupported dataset type: {}".format(ds_type))

    data = []
    reader_config = config.get('dataset_reader', None)

    if reader_config:
        reader_config = config['dataset_reader']
        if 'class' in reader_config:
            c = reader_config.pop('class')
            try:
                module_name, cls_name = c.split(':')
                reader = getattr(importlib.import_module(module_name), cls_name)()
            except ValueError:
                e = ConfigError('Expected class description in a `module.submodules:ClassName` form, but got `{}`'
                                .format(c))
                log.exception(e)
                raise e
        else:
            reader = get_model(reader_config.pop('name'))()
        data_path = reader_config.pop('data_path', '')
        if isinstance(data_path, list):
            data_path = [expand_path(x) for x in data_path]
        else:
            data_path = expand_path(data_path)
        data = reader.read(data_path, **reader_config)
    else:
        log.warning("No dataset reader is provided in the JSON config.")

    iterator_config = config['dataset_iterator']
    iterator: Union[DataLearningIterator, DataFittingIterator] = from_params(iterator_config,
                                                                             data=data)

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': to_validate,
        'test_best': True,
        'show_examples': False
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(zip(train_config['metrics'], get_metrics_by_names(train_config['metrics'])))

    if to_train:
        model = fit_chainer(config, iterator)

        if callable(getattr(model, 'train_on_batch', None)):
            _train_batches(model, iterator, train_config, metrics_functions)
        elif callable(getattr(model, 'fit_batches', None)):
            _fit_batches(model, iterator, train_config)
        elif callable(getattr(model, 'fit', None)):
            _fit(model, iterator, train_config)
        elif not isinstance(model, Chainer):
            log.warning('Nothing to train')

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid': _test_model(model, metrics_functions, iterator,
                                     train_config.get('batch_size', -1), 'valid',
                                     show_examples=train_config['show_examples'])
            }

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test': _test_model(model, metrics_functions, iterator,
                                    train_config.get('batch_size', -1), 'test',
                                    show_examples=train_config['show_examples'])
            }

            print(json.dumps(report, ensure_ascii=False))
Пример #13
0
def interact_model_by_telegram(config_path, token):
    config = read_json(config_path)
    model = build_model_from_config(config)
    init_bot_for_model(token, model)
Пример #14
0
def init_model(model_config_path):
    model_config = read_json(model_config_path)
    model = build_model_from_config(model_config)
    return model
Пример #15
0
def train_evaluate_model_from_config(
        config: [str, Path, dict],
        iterator=None,
        to_train=True,
        to_validate=True) -> Dict[str, Dict[str, float]]:
    """Make training and evaluation of the model described in corresponding configuration file."""
    if isinstance(config, (str, Path)):
        config = read_json(config)
    set_deeppavlov_root(config)
    import_packages(config.get('metadata', {}).get('imports', []))

    if iterator is None:
        data = read_data_by_config(config)
        iterator = get_iterator_from_config(config, data)

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': to_validate,
        'test_best': True,
        'show_examples': False
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    in_y = config['chainer'].get('in_y', ['y'])
    if isinstance(in_y, str):
        in_y = [in_y]
    if isinstance(config['chainer']['out'], str):
        config['chainer']['out'] = [config['chainer']['out']]
    metrics_functions = _parse_metrics(train_config['metrics'], in_y,
                                       config['chainer']['out'])

    if to_train:
        model = fit_chainer(config, iterator)

        if callable(getattr(model, 'train_on_batch', None)):
            _train_batches(model, iterator, train_config, metrics_functions)
        elif callable(getattr(model, 'fit_batches', None)):
            _fit_batches(model, iterator, train_config)
        elif callable(getattr(model, 'fit', None)):
            _fit(model, iterator, train_config)
        elif not isinstance(model, Chainer):
            log.warning('Nothing to train')

        model.destroy()

    res = {}

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid':
                _test_model(model,
                            metrics_functions,
                            iterator,
                            train_config.get('batch_size', -1),
                            'valid',
                            show_examples=train_config['show_examples'])
            }

            res['valid'] = report['valid']['metrics']

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test':
                _test_model(model,
                            metrics_functions,
                            iterator,
                            train_config.get('batch_size', -1),
                            'test',
                            show_examples=train_config['show_examples'])
            }

            res['test'] = report['test']['metrics']

            print(json.dumps(report, ensure_ascii=False))

        model.destroy()

    return res
Пример #16
0
from deeppavlov.core.commands.infer import build_model_from_config
import thulac as thu

model = build_model_from_config(
    "E:/GithubSpace/DeepPavlov/deeppavlov/configs/ner/wzh_single_ner_config.json"
)
thu1 = thu.thulac(seg_only=True, user_dict="user_DIY_words.txt")
s0 = thu1.cut("双十一期间全场包邮买100立减30", text=True)
# s0 = s0.split(" ")
# print(s0)
# ss = str(input("请输入: "))
# s0 = thu1.cut(ss, text=True)
# print(s0.split(" "))
for i in range(50):
    # print(model(input("请输入: ")))
    s1 = input("请输入: ")
    s2 = thu1.cut(s1, text=True)
    print(model([s2.split(" ")]))
    # print(model([input("请输入: ")]))

# 训练语句如下,要手动把迭代器送进去  因为配置文件没读取器
# from deeppavlov.core.common.params import from_params
# train_evaluate_model_from_config("E:/GithubSpace/DeepPavlov/deeppavlov/configs/ner/wzh_single_ner_config.json",
#                                  from_params({"name": "wzh_ner_iterator"}))
Пример #17
0
def interact_model_by_telegram(config_path, token):
    config = read_json(config_path)
    model = build_model_from_config(config)
    init_bot_for_model(token, model)
Пример #18
0
def train_model_from_config(config_path: str):
    config = read_json(config_path)
    set_deeppavlov_root(config)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    data = reader.read(data_path)

    dataset_config = config['dataset']
    dataset: Dataset = from_params(dataset_config, data=data)

    if 'chainer' in config:
        model = fit_chainer(config, dataset)
    else:
        vocabs = {}
        for vocab_param_name, vocab_config in config.get('vocabs', {}).items():
            v: Estimator = from_params(vocab_config, mode='train')
            vocabs[vocab_param_name] = _fit(v, dataset)

        model_config = config['model']
        model = from_params(model_config, vocabs=vocabs, mode='train')

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': True,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(
        zip(train_config['metrics'],
            get_metrics_by_names(train_config['metrics'])))

    if callable(getattr(model, 'train_on_batch', None)):
        _train_batches(model, dataset, train_config, metrics_functions)
    elif callable(getattr(model, 'fit', None)):
        _fit(model, dataset, train_config)
    elif not isinstance(model, Chainer):
        log.warning('Nothing to train')

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid':
                _test_model(model, metrics_functions, dataset,
                            train_config.get('batch_size', -1), 'valid')
            }

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test':
                _test_model(model, metrics_functions, dataset,
                            train_config.get('batch_size', -1), 'test')
            }

            print(json.dumps(report, ensure_ascii=False))
Пример #19
0
    'save_path': 'dstc2/resto.sqlite',
    'primary_keys': ['name'],
    'table_name': 'mytable'
}
""" Adding vocab and database components to pipe """
db_config['chainer']['pipe'] = []
db_config['chainer']['pipe'].append(vocab_comp_config)
db_config['chainer']['pipe'].append(db_comp_config)
""" Adding database to bot component config """
bot_with_db_comp_config['database'] = '#restaurant_database'
""" Adding bot component to pipe """
db_config['chainer']['pipe'].append(bot_with_db_comp_config)
json.dump(db_config, open("gobot/db_config.json", 'wt'))
"""
Model training and building
"""
""" Train gobot_dstc2_db model """
# train_evaluate_model_from_config("gobot/db_config.json")
""" build model """
model = build_model_from_config(db_config)
"""
Dialogue system
"""

" starting new dialog, if the cell is running, please do not run other cells in parallel -- there is a possibility of a hangup"
model.reset()
utterance = ""

while utterance != 'exit':
    print(">> " + model([utterance])[0])
    utterance = input(':: ')
Пример #20
0
from flask import Flask
from redis import Redis

from deeppavlov.core.commands.infer import build_model_from_config
from deeppavlov.core.common.file import read_json

CONFIG_PATH = '/home/DeepPavlov/deeppavlov/configs/seq2seq_go_bot/bot_kvret_infer.json'
model = build_model_from_config(read_json(CONFIG_PATH))

utterance = ""
while utterance != 'exit':
    print(">> " + model([utterance])[0])
    utterance = input(':: ')

if __name__ == "__main__":
    app.run(host="0.0.0.0", debug=True)
Пример #21
0
def from_params(params: Dict, **kwargs) -> Component:
    # what is passed in json:
    config_params = {k: _resolve(v) for k, v in params.items()}

    # get component by reference (if any)
    if 'ref' in config_params:
        try:
            return _refs[config_params['ref']]
        except KeyError:
            e = ConfigError(
                'Component with id "{id}" was referenced but not initialized'.
                format(id=config_params['ref']))
            log.exception(e)
            raise e

    elif 'config_path' in config_params:
        from deeppavlov.core.commands.infer import build_model_from_config
        deeppavlov_root = get_deeppavlov_root()
        config = read_json(expand_path(config_params['config_path']))
        model = build_model_from_config(config, as_component=True)
        set_deeppavlov_root({'deeppavlov_root': deeppavlov_root})
        return model

    elif 'class' in config_params:
        c = config_params.pop('class')
        try:
            module_name, cls_name = c.split(':')
            cls = getattr(importlib.import_module(module_name), cls_name)
        except ValueError:
            e = ConfigError(
                'Expected class description in a `module.submodules:ClassName` form, but got `{}`'
                .format(c))
            log.exception(e)
            raise e
    else:
        cls_name = config_params.pop('name', None)
        if not cls_name:
            e = ConfigError(
                'Component config has no `name` nor `ref` or `class` fields')
            log.exception(e)
            raise e
        try:
            cls = REGISTRY[cls_name]
        except KeyError:
            e = ConfigError('Class {} is not registered.'.format(cls_name))
            log.exception(e)
            raise e

    # find the submodels params recursively
    for param_name, subcls_params in config_params.items():
        if isinstance(subcls_params, dict):
            if not {'ref', 'name', 'class', 'config_path'
                    }.intersection(subcls_params):
                "This parameter is passed as dict to the class constructor."
                " The user didn't intent it to be a component."
                for k, v in subcls_params.items():
                    subcls_params[k] = _resolve(v)
                continue

            config_params[param_name] = from_params(subcls_params,
                                                    vocabs=kwargs['vocabs'],
                                                    mode=kwargs['mode'])

    try:
        component = cls(**dict(config_params, **kwargs))
        try:
            _refs[config_params['id']] = component
        except KeyError:
            pass
    except Exception:
        log.exception("Exception in {}".format(cls))
        raise

    return component
Пример #22
0
def train_evaluate_model_from_config(config: [str, Path, dict],
                                     to_train=True,
                                     to_validate=True) -> None:
    if isinstance(config, (str, Path)):
        config = read_json(config)
    set_deeppavlov_root(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    dataset_config = config.get('dataset', None)

    if dataset_config:
        config.pop('dataset')
        ds_type = dataset_config['type']
        if ds_type == 'classification':
            reader = {'name': 'basic_classification_reader'}
            iterator = {'name': 'basic_classification_iterator'}
            config['dataset_reader'] = {**dataset_config, **reader}
            config['dataset_iterator'] = {**dataset_config, **iterator}
        else:
            raise Exception("Unsupported dataset type: {}".format(ds_type))

    data = []
    reader_config = config.get('dataset_reader', None)

    if reader_config:
        reader_config = config['dataset_reader']
        if 'class' in reader_config:
            c = reader_config.pop('class')
            try:
                module_name, cls_name = c.split(':')
                reader = getattr(importlib.import_module(module_name),
                                 cls_name)()
            except ValueError:
                e = ConfigError(
                    'Expected class description in a `module.submodules:ClassName` form, but got `{}`'
                    .format(c))
                log.exception(e)
                raise e
        else:
            reader = get_model(reader_config.pop('name'))()
        data_path = expand_path(reader_config.pop('data_path', ''))
        data = reader.read(data_path, **reader_config)
    else:
        log.warning("No dataset reader is provided in the JSON config.")

    iterator_config = config['dataset_iterator']
    iterator: Union[DataLearningIterator,
                    DataFittingIterator] = from_params(iterator_config,
                                                       data=data)

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': to_validate,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(
        zip(train_config['metrics'],
            get_metrics_by_names(train_config['metrics'])))

    if to_train:
        model = fit_chainer(config, iterator)

        if callable(getattr(model, 'train_on_batch', None)):
            _train_batches(model, iterator, train_config, metrics_functions)
        elif callable(getattr(model, 'fit_batches', None)):
            _fit_batches(model, iterator, train_config)
        elif callable(getattr(model, 'fit', None)):
            _fit(model, iterator, train_config)
        elif not isinstance(model, Chainer):
            log.warning('Nothing to train')

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid':
                _test_model(model, metrics_functions, iterator,
                            train_config.get('batch_size', -1), 'valid')
            }

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test':
                _test_model(model, metrics_functions, iterator,
                            train_config.get('batch_size', -1), 'test')
            }

            print(json.dumps(report, ensure_ascii=False))
Пример #23
0
def train_model_from_config(config_path: str) -> None:
    config = read_json(config_path)
    set_deeppavlov_root(config)

    dataset_config = config.get('dataset', None)

    if dataset_config:
        config.pop('dataset')
        ds_type = dataset_config['type']
        if ds_type == 'classification':
            reader = {'name': 'basic_classification_reader'}
            iterator = {'name': 'basic_classification_iterator'}
            config['dataset_reader'] = {**dataset_config, **reader}
            config['dataset_iterator'] = {**dataset_config, **iterator}
        else:
            raise Exception("Unsupported dataset type: {}".format(ds_type))

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    kwargs = {
        k: v
        for k, v in reader_config.items() if k not in ['name', 'data_path']
    }
    data = reader.read(data_path, **kwargs)

    iterator_config = config['dataset_iterator']
    iterator: BasicDatasetIterator = from_params(iterator_config, data=data)

    if 'chainer' in config:
        model = fit_chainer(config, iterator)
    else:
        vocabs = config.get('vocabs', {})
        for vocab_param_name, vocab_config in vocabs.items():
            v: Estimator = from_params(vocab_config, mode='train')
            vocabs[vocab_param_name] = _fit(v, iterator)

        model_config = config['model']
        model = from_params(model_config, vocabs=vocabs, mode='train')

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': True,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(
        zip(train_config['metrics'],
            get_metrics_by_names(train_config['metrics'])))

    if callable(getattr(model, 'train_on_batch', None)):
        _train_batches(model, iterator, train_config, metrics_functions)
    elif callable(getattr(model, 'fit', None)):
        _fit(model, iterator, train_config)
    elif not isinstance(model, Chainer):
        log.warning('Nothing to train')

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid':
                _test_model(model, metrics_functions, iterator,
                            train_config.get('batch_size', -1), 'valid')
            }

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test':
                _test_model(model, metrics_functions, iterator,
                            train_config.get('batch_size', -1), 'test')
            }

            print(json.dumps(report, ensure_ascii=False))