def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) if args.download or args.mode == 'download': deep_download(['-c', pipeline_config_path]) token = args.token or os.getenv('TELEGRAM_TOKEN') if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': if not token: log.error( 'Token required: initiate -t param or TELEGRAM_BOT env var with Telegram bot token' ) else: interact_model_by_telegram(pipeline_config_path, token) elif args.mode == 'riseapi': start_model_server(pipeline_config_path) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path)
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) https = args.https ssl_key = args.key ssl_cert = args.cert if args.download or args.mode == 'download': deep_download(pipeline_config_path) multi_instance = args.multi_instance stateful = args.stateful start_epoch_num = args.start_epoch_num if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, start_epoch_num=start_epoch_num) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False, start_epoch_num=start_epoch_num) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': token = args.token interact_model_by_telegram(pipeline_config_path, token) elif args.mode == 'interactmsbot': ms_id = args.ms_id ms_secret = args.ms_secret run_ms_bf_default_agent(model_config=pipeline_config_path, app_id=ms_id, app_secret=ms_secret, multi_instance=multi_instance, stateful=stateful, port=args.port) elif args.mode == 'alexa': run_alexa_default_agent(model_config=pipeline_config_path, multi_instance=multi_instance, stateful=stateful, port=args.port, https=https, ssl_key=ssl_key, ssl_cert=ssl_cert) elif args.mode == 'riseapi': alice = args.api_mode == 'alice' if alice: start_alice_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port) else: start_model_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: n_folds = args.folds calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id log.info("use gpu id:" + args.gpu_id) if args.download or args.mode == 'download': deep_download(pipeline_config_path) multi_instance = args.multi_instance stateful = args.stateful start_epoch_num = args.start_epoch_num if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, start_epoch_num=start_epoch_num) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False, start_epoch_num=start_epoch_num) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': token = args.token interact_model_by_telegram(pipeline_config_path, token) elif args.mode == 'interactmsbot': ms_id = args.ms_id ms_secret = args.ms_secret run_ms_bf_default_agent(model_config=pipeline_config_path, app_id=ms_id, app_secret=ms_secret, multi_instance=multi_instance, stateful=stateful) elif args.mode == 'riseapi': alice = args.api_mode == 'alice' https = args.https ssl_key = args.key ssl_cert = args.cert if alice: start_alice_server(pipeline_config_path, https, ssl_key, ssl_cert) else: start_model_server(pipeline_config_path, https, ssl_key, ssl_cert) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: n_folds = args.folds calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
def calc_cv_score(config=None, pipeline_config_path=None, data=None, n_folds=5, is_loo=False): if config is None: if pipeline_config_path is not None: config = read_json(pipeline_config_path) else: raise ValueError( 'Both \"config\" and \"pipeline_config_path\" are None') if data is None: data = read_data_by_config(config) config, dirs_for_saved_models = change_savepath_for_model(config) target_metrics = config['train']['metrics'] cv_score = OrderedDict((k, []) for k in target_metrics) for data_i in generate_train_valid(data, n_folds=n_folds, is_loo=is_loo): iterator = get_iterator_from_config(config, data_i) create_dirs_to_save_models(dirs_for_saved_models) score = train_evaluate_model_from_config(config, iterator=iterator) delete_dir_for_saved_models(dirs_for_saved_models) for key, value in score['valid'].items(): cv_score[key].append(value) for key, value in cv_score.items(): cv_score[key] = np.mean(value) log.info('Cross-Validation \"{}\" is: {}'.format(key, cv_score[key])) return cv_score
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) if args.download or args.mode == 'download': deep_download(['-c', pipeline_config_path]) token = args.token or os.getenv('TELEGRAM_TOKEN') if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': if not token: log.error('Token required: initiate -t param or TELEGRAM_BOT env var with Telegram bot token') else: interact_model_by_telegram(pipeline_config_path, token) elif args.mode == 'riseapi': start_model_server(pipeline_config_path) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path)
def calc_cv_score(config, data=None, n_folds=5, is_loo=False): config = parse_config(config) if data is None: data = read_data_by_config(config) config, dirs_for_saved_models = change_savepath_for_model(config) cv_score = OrderedDict() for data_i in generate_train_valid(data, n_folds=n_folds, is_loo=is_loo): iterator = get_iterator_from_config(config, data_i) create_dirs_to_save_models(dirs_for_saved_models) score = train_evaluate_model_from_config(config, iterator=iterator) delete_dir_for_saved_models(dirs_for_saved_models) for key, value in score['valid'].items(): if key not in cv_score: cv_score[key] = [] cv_score[key].append(value) for key, value in cv_score.items(): cv_score[key] = np.mean(value) log.info('Cross-Validation \"{}\" is: {}'.format(key, cv_score[key])) return cv_score
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) https = args.https ssl_key = args.key ssl_cert = args.cert if args.download or args.mode == 'download': deep_download(pipeline_config_path) multi_instance = args.multi_instance stateful = args.stateful if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, start_epoch_num=args.start_epoch_num) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, start_epoch_num=args.start_epoch_num) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': token = args.token interact_model_by_telegram( model_config=pipeline_config_path, token=token, default_skill_wrap=not args.no_default_skill) elif args.mode == 'interactmsbot': ms_id = args.ms_id ms_secret = args.ms_secret run_ms_bf_default_agent(model_config=pipeline_config_path, app_id=ms_id, app_secret=ms_secret, multi_instance=multi_instance, stateful=stateful, port=args.port, https=https, ssl_key=ssl_key, ssl_cert=ssl_cert, default_skill_wrap=not args.no_default_skill) elif args.mode == 'alexa': run_alexa_default_agent(model_config=pipeline_config_path, multi_instance=multi_instance, stateful=stateful, port=args.port, https=https, ssl_key=ssl_key, ssl_cert=ssl_cert, default_skill_wrap=not args.no_default_skill) elif args.mode == 'riseapi': alice = args.api_mode == 'alice' if alice: start_alice_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port) else: start_model_server(pipeline_config_path, https, ssl_key, ssl_cert, port=args.port) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: n_folds = args.folds calc_cv_score(pipeline_config_path, n_folds=n_folds, is_loo=False)
{ "ref": "vocab", "in": ["y_predicted_tokens_ids"], "out": ["y_predicted_tokens"] }, { "name": "postprocessing", "in": ["y_predicted_tokens"], "out": ["y_predicted_tokens"] } ], "out": ["y_predicted_tokens"] }, "train": { "log_every_n_batches": 100, "val_every_n_epochs":0, "batch_size": 64, "validation_patience": 0, "epochs": 20, "metrics": ["bleu"], } } print("Before building from config") model = build_model_from_config(config) model(['Hi, how are you?', 'Any ideas my dear friend?']) json.dump(config, open('seq2seq.json', 'w')) train_evaluate_model_from_config('seq2seq.json') model = build_model_from_config(config) model(['hi, how are you?', 'any ideas my dear friend?', 'okay, i agree with you', 'good bye!'])
# -*- coding: utf-8 -*- import telebot as telebot from telebot import apihelper from deeppavlov import configs, train_model from deeppavlov.core.common.file import read_json from deeppavlov.core.commands.infer import build_model from deeppavlov.core.commands.train import train_evaluate_model_from_config print("import successful") far = train_evaluate_model_from_config("./config.json") faq = build_model("./config.json", download=True) model_config = read_json("./config.json") model_config["dataset_reader"]["data_path"] = "./faq_school_en.csv" model_config["dataset_reader"]["data_url"] = None faq = train_model(model_config) print("train model") bot = telebot.TeleBot('301914397:AAEmR8WlfzyxQT53zdpqHrSwR8iwaKEr-h8') def GetAnswer(question): print("get question") return faq([question])[0][0][0] @bot.message_handler(content_types=['text']) def get_text_messages(message): print("text handler") if message.text == "Привет": bot.send_message(message.from_user.id, "Привет, чем я могу тебе помочь?") elif message.text == "/help":
from deeppavlov import configs from deeppavlov.core.commands.infer import build_model from deeppavlov.core.commands.train import train_evaluate_model_from_config if __name__ == '__main__': train_evaluate_model_from_config( configs.doc_retrieval.en_ranker_tfidf_wiki, download=True) train_evaluate_model_from_config(configs.squad.multi_squad_noans, download=True) odqa = build_model(configs.odqa.en_odqa_infer_wiki, load_trained=True) result = odqa(['What is the name of Darth Vader\'s son?']) print(result)
def main(): params_helper = ParamsSearch() args = parser.parse_args() is_loo = False n_folds = None if args.folds == 'loo': is_loo = True elif args.folds is None: n_folds = None elif args.folds.isdigit(): n_folds = int(args.folds) else: raise NotImplementedError('Not implemented this type of CV') # read config pipeline_config_path = find_config(args.config_path) config_init = read_json(pipeline_config_path) config = parse_config(config_init) data = read_data_by_config(config) target_metric = parse_config(config_init)['train']['metrics'][0] if isinstance(target_metric, dict): target_metric = target_metric['name'] # get all params for search param_paths = list(params_helper.find_model_path(config, 'search_choice')) param_values = [] param_names = [] for path in param_paths: value = params_helper.get_value_from_config(config, path) param_name = path[-1] param_value_search = value['search_choice'] param_names.append(param_name) param_values.append(param_value_search) # find optimal params if args.search_type == 'grid': # generate params combnations for grid search combinations = list(product(*param_values)) # calculate cv scores scores = [] for comb in combinations: config = deepcopy(config_init) for param_path, param_value in zip(param_paths, comb): params_helper.insert_value_or_dict_into_config( config, param_path, param_value) config = parse_config(config) if (n_folds is not None) | is_loo: # CV for model evaluation score_dict = calc_cv_score(config, data=data, n_folds=n_folds, is_loo=is_loo) score = score_dict[next(iter(score_dict))] else: # train/valid for model evaluation data_to_evaluate = data.copy() if len(data_to_evaluate['valid']) == 0: data_to_evaluate['train'], data_to_evaluate[ 'valid'] = train_test_split(data_to_evaluate['train'], test_size=0.2) iterator = get_iterator_from_config(config, data_to_evaluate) score = train_evaluate_model_from_config( config, iterator=iterator)['valid'][target_metric] scores.append(score) # get model with best score best_params_dict = get_best_params(combinations, scores, param_names, target_metric) log.info('Best model params: {}'.format(best_params_dict)) else: raise NotImplementedError('Not implemented this type of search') # save config best_config = config_init for i, param_name in enumerate(best_params_dict.keys()): if param_name != target_metric: params_helper.insert_value_or_dict_into_config( best_config, param_paths[i], best_params_dict[param_name]) best_model_filename = pipeline_config_path.with_suffix('.cvbest.json') save_json(best_config, best_model_filename) log.info('Best model saved in json-file: {}'.format(best_model_filename))
# from deeppavlov.core.commands.train import train_evaluate_model_from_config # import importlib.util # deeppavlov_spec = importlib.util.spec_from_file_location("deeppavlov", "/home/anton/DeepPavlov/deeppavlov/__init__.py") # deeppavlov = importlib.util.module_from_spec(deeppavlov_spec) # deeppavlov_spec.loader.exec_module(deeppavlov) # # train_spec = importlib.util.spec_from_file_location( # "deeppavlov", "/home/anton/DeepPavlov/deeppavlov/core/commands/train.py") # train = importlib.util.module_from_spec(train_spec) # train_spec.loader.exec_module(train) import sys sys.path.append('/home/anton/DeepPavlov') if '/home/anton/dpenv/src/deeppavlov' in sys.path: sys.path.remove('/home/anton/dpenv/src/deeppavlov') import one_str_lm_reader, one_str_lm_iterator, char_lm_vocab, lstm from deeppavlov.core.commands.train import train_evaluate_model_from_config train_evaluate_model_from_config('lstm_config.json')
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) if args.download or args.mode == 'download': deep_download(pipeline_config_path) if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path, recursive=args.recursive, start_epoch_num=args.start_epoch_num) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, start_epoch_num=args.start_epoch_num) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'telegram': interact_model_by_telegram(model_config=pipeline_config_path, token=args.token) elif args.mode == 'msbot': start_ms_bf_server(model_config=pipeline_config_path, app_id=args.ms_id, app_secret=args.ms_secret, port=args.port, https=args.https, ssl_key=args.key, ssl_cert=args.cert) elif args.mode == 'alexa': start_alexa_server(model_config=pipeline_config_path, port=args.port, https=args.https, ssl_key=args.key, ssl_cert=args.cert) elif args.mode == 'alice': start_alice_server(model_config=pipeline_config_path, port=args.port, https=args.https, ssl_key=args.key, ssl_cert=args.cert) elif args.mode == 'riseapi': start_model_server(pipeline_config_path, args.https, args.key, args.cert, port=args.port) elif args.mode == 'risesocket': start_socket_server(pipeline_config_path, args.socket_type, port=args.port, socket_file=args.socket_file) elif args.mode == 'agent-rabbit': start_rabbit_service(model_config=pipeline_config_path, service_name=args.service_name, agent_namespace=args.agent_namespace, batch_size=args.batch_size, utterance_lifetime_sec=args.utterance_lifetime, rabbit_host=args.rabbit_host, rabbit_port=args.rabbit_port, rabbit_login=args.rabbit_login, rabbit_password=args.rabbit_password, rabbit_virtualhost=args.rabbit_virtualhost) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: calc_cv_score(pipeline_config_path, n_folds=args.folds, is_loo=False)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu May 10 16:28:38 2018 @author: lsm """ from deeppavlov.core.commands.train import train_evaluate_model_from_config from deeppavlov.core.commands.utils import expand_path, set_deeppavlov_root from deeppavlov.core.common.file import read_json from model.pipeline.text_normalizer import * from model.pipeline.embedder import * from model.pipeline.CNN_model import * config = read_json('model/subs/pay/pay_config.json') set_deeppavlov_root(config) train_evaluate_model_from_config('model/subs/pay/pay_config.json')
from deeppavlov.deep import find_config from deeppavlov.core.commands.train import train_evaluate_model_from_config from deeppavlov.core.commands.infer import interact_model # PIPELINE_CONFIG_PATH = 'configs/intents/intents_dstc2.json' # PIPELINE_CONFIG_PATH = 'configs/intents/intents_snips.json' # PIPELINE_CONFIG_PATH = 'configs/ner/ner_dstc2.json' # PIPELINE_CONFIG_PATH = 'configs/ner/ner_rus.json' # PIPELINE_CONFIG_PATH = 'configs/ner/slotfill_dstc2.json' # PIPELINE_CONFIG_PATH = 'configs/error_model/brillmoore_wikitypos_en.json' # PIPELINE_CONFIG_PATH = 'configs/error_model/brillmoore_kartaslov_ru.json' # PIPELINE_CONFIG_PATH = 'configs/error_model/levenshtein_searcher.json' # PIPELINE_CONFIG_PATH = 'configs/go_bot/config.json' # PIPELINE_CONFIG_PATH = 'configs/go_bot/config_minimal.json' # PIPELINE_CONFIG_PATH = 'configs/go_bot/config_all.json' # PIPELINE_CONFIG_PATH = 'configs/squad/squad.json' # PIPELINE_CONFIG_PATH = 'configs/ranking/ranking_insurance.json' # PIPELINE_CONFIG_PATH = 'configs/seq2seq_go_bot/bot_kvret.json' # PIPELINE_CONFIG_PATH = 'configs/odqa/en_ranker_prod.json' # PIPELINE_CONFIG_PATH = 'configs/odqa/ru_ranker_prod.json' # PIPELINE_CONFIG_PATH = 'configs/odqa/en_odqa_infer_prod.json' # PIPELINE_CONFIG_PATH = 'configs/odqa/ru_odqa_infer_prod.json' # PIPELINE_CONFIG_PATH = 'configs/odqa/ranker_test.json' # PIPELINE_CONFIG_PATH = find_config('morpho_ru_syntagrus_train') PIPELINE_CONFIG_PATH = find_config('morpho_ru_syntagrus_train_pymorphy') if __name__ == '__main__': train_evaluate_model_from_config(PIPELINE_CONFIG_PATH) # interact_model(PIPELINE_CONFIG_PATH)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu May 10 16:28:38 2018 @author: lsm """ from deeppavlov.core.commands.train import train_evaluate_model_from_config from deeppavlov.core.commands.utils import expand_path, set_deeppavlov_root from deeppavlov.core.common.file import read_json import sys sys.path.insert(0, '../..') from model.pipeline.text_normalizer import * from model.pipeline.embedder import * from model.pipeline.CNN_model import * config = read_json('/subs/deliver/deliver_config.json') set_deeppavlov_root(config) train_evaluate_model_from_config('model/subs/deliver/deliver_config.json')
def main(): args = parser.parse_args() pipeline_config_path = find_config(args.config_path) if args.download or args.mode == 'download': deep_download(['-c', pipeline_config_path]) token = args.token or os.getenv('TELEGRAM_TOKEN') ms_id = args.ms_id or os.getenv('MS_APP_ID') ms_secret = args.ms_secret or os.getenv('MS_APP_SECRET') multi_instance = args.multi_instance stateful = args.stateful if args.mode == 'train': train_evaluate_model_from_config(pipeline_config_path) elif args.mode == 'evaluate': train_evaluate_model_from_config(pipeline_config_path, to_train=False, to_validate=False) elif args.mode == 'interact': interact_model(pipeline_config_path) elif args.mode == 'interactbot': if not token: log.error( 'Token required: initiate -t param or TELEGRAM_BOT env var with Telegram bot token' ) else: interact_model_by_telegram(pipeline_config_path, token) elif args.mode == 'interactmsbot': if not ms_id: log.error( 'Microsoft Bot Framework app id required: initiate -i param ' 'or MS_APP_ID env var with Microsoft app id') elif not ms_secret: log.error( 'Microsoft Bot Framework app secret required: initiate -s param ' 'or MS_APP_SECRET env var with Microsoft app secret') else: run_ms_bf_default_agent(model_config_path=pipeline_config_path, app_id=ms_id, app_secret=ms_secret, multi_instance=multi_instance, stateful=stateful) elif args.mode == 'riseapi': alice = args.api_mode == 'alice' https = args.https ssl_key = args.key ssl_cert = args.cert start_model_server(pipeline_config_path, alice, https, ssl_key, ssl_cert) elif args.mode == 'predict': predict_on_stream(pipeline_config_path, args.batch_size, args.file_path) elif args.mode == 'install': install_from_config(pipeline_config_path) elif args.mode == 'crossval': if args.folds < 2: log.error('Minimum number of Folds is 2') else: n_folds = args.folds calc_cv_score(pipeline_config_path=pipeline_config_path, n_folds=n_folds, is_loo=False)
def train(self, model_level, model_name, path_to_data, path_to_config, path_to_global_embeddings, test_size=0.15, aug_method='word_dropout', samples_per_class=None, class_names=None, path_to_save_file=None, path_to_resulting_file=None): # preparing training/testing data df_raw = pd.read_csv(path_to_data) # preparing config config = read_json(path_to_config) if 'labels' not in df_raw or 'text' not in df_raw: raise InvalidDataFormatError( '\'labels\' and \'text\' columns must be in the dataframe') if model_level not in ['root', 'subs']: raise InvalidModelLevelError( 'model level should be either \'root\' or \'subs\'') __df_train, df_test, _, _ = train_test_split(df_raw, df_raw, test_size=test_size) df_train, df_val, _, _ = train_test_split(__df_train, __df_train, test_size=test_size) if aug_method not in ['word_dropout', 'duplicate']: raise InvalidDataAugmentationMethodError( '\'aug_method\' should be \'word_dropout\' or \'duplicate\'') df_train_equalized = self.__data_equalizer.equalize_classes( df_train, samples_per_class, aug_method) model_path = config['model_path'] if not os.path.isdir(model_path): os.mkdir(model_path) if not os.path.isdir(model_path + 'data/'): os.mkdir(model_path + 'data/') df_train_equalized.to_csv(model_path + 'data/train.csv') df_val[['text', 'labels']].sample(frac=1).to_csv(model_path + 'data/valid.csv') df_test[['text', 'labels']].sample(frac=1).to_csv(model_path + 'df_test.csv') # making embeddings emb_len = IntentsClassifier.get_config_element_by_name( config=config['chainer']['pipe'], name='embedder')['emb_len'] eb = EmbeddingsBuilder( resulting_dim=emb_len, path_to_original_embeddings=path_to_global_embeddings) tc = TextCorrector() corpus_cleaned = tc.tn.transform(df_raw.text.tolist()) if not os.path.isfile(model_path + 'ft_compressed.pkl'): eb.compress_embeddings(corpus_cleaned, model_path + 'ft_compressed.pkl', 'pca', eb.path_to_original_embeddings) gc.collect() if not os.path.isfile(model_path + 'ft_compressed_local.pkl'): eb.build_local_embeddings(corpus_cleaned, model_path + 'ft_compressed_local.pkl') # dealing with class_names if type(class_names) == list: pickle.dump(class_names, open(model_path + 'class_names.pkl', 'wb')) else: pickle.dump(df_train['labels'].value_counts().index.tolist(), open(model_path + 'class_names.pkl', 'wb')) # setting up saving and loading if not path_to_save_file == None: config['chainer']['pipe'][-1][ 'save_path'] = path_to_save_file + '/' + 'weights.hdf5' if not os.path.isdir( path_to_save_file) and not path_to_save_file == None: os.mkdir(path_to_save_file) if not os.path.isdir( path_to_resulting_file) and not path_to_resulting_file == None: os.mkdir(path_to_resulting_file) emb_config = IntentsClassifier.get_config_element_by_name( config['chainer']['pipe'], 'embedder') cnn_config = IntentsClassifier.get_config_element_by_name( config['chainer']['pipe'], 'cnn_model') config['chainer']['pipe'][config['chainer']['pipe'].index(emb_config)][ 'load_path'][0] = model_path + config['chainer']['pipe'][ config['chainer']['pipe'].index(emb_config)]['load_path'][0] config['chainer']['pipe'][config['chainer']['pipe'].index(emb_config)][ 'load_path'][1] = model_path + config['chainer']['pipe'][ config['chainer']['pipe'].index(emb_config)]['load_path'][1] config['chainer']['pipe'][config['chainer']['pipe'].index( cnn_config)]['classes'] = model_path + config['chainer']['pipe'][ config['chainer']['pipe'].index(cnn_config)]['classes'] config['dataset_reader'][ 'data_path'] = model_path + config['dataset_reader']['data_path'] config['train']['tensorboard_log_dir'] = model_path + config['train'][ 'tensorboard_log_dir'] load_path_bckp = config['chainer']['pipe'][-1]['load_path'] check_results = self.check_config(config) if len(check_results) > 0: raise InvalidConfig(check_results, model_path, 'Config file is invalid') # training set_deeppavlov_root(config) # update training status training_status = 'Classification model {} {} is currently training. Total number of epochs is set to {}'.format( model_level, model_name, config['train']['epochs']) with open(model_path + 'status.txt', 'w') as f: f.writelines(training_status) # fukken training train_evaluate_model_from_config(config) # fixing load_path # updating status perf = IntentsClassifier.get_latest_accuracy( config) #self.get_performance(config, model_path + 'df_test.csv') training_status = 'Classification model {} {} is trained \nf1_score (macro avg): {}'.format( model_level, model_name, perf) with open(model_path + 'status.txt', 'w') as f: f.writelines(training_status) # getting performance config['chainer']['pipe'][-1]['load_path'] = load_path_bckp copy( path_to_save_file + '/' + 'weights.hdf5', path_to_resulting_file + '/' + config['chainer']['pipe'][-1]['load_path']) copy(path_to_save_file + '/' + 'weights.hdf5', model_path + config['chainer']['pipe'][-1]['load_path'])
}, 'main': True } # chainer.pipe: a list of consequently run components vocab_config['chainer']['pipe'] = [vocab_comp_config] json.dump(vocab_config, open("gobot/vocab_config.json", 'wt')) """ Download "dstc2_v2" dataset, need to do only once """ deep_download(['--config', 'gobot/vocab_config.json']) dstc2_path = deeppavlov.__path__[ 0] + '/../download/dstc2' # Data was downloaded to dstc2_path """ Step 3: Vocabulary Building """ train_evaluate_model_from_config("gobot/vocab_config.json") vocabs_path = deeppavlov.__path__[0] + '/../download/vocabs' vocab_comp_config['in'] = ['utterance'] vocab_comp_config['out'] = ['utterance_token_indices'] vocab_config['chainer']['pipe'] = [vocab_comp_config] vocab_config['chainer']['out'] = ['utterance_token_indices'] """ Step 4: Gobot Configurations """ db_config = {} """dataset_reader, dataset_iterator and metadata will be the same as for vocabulary only""" db_config['dataset_reader'] = dstc2_reader_comp_config db_config['dataset_iterator'] = dialog_iterator_comp_config