def path_setup(out_folder="", sublevel=0, data_path="dataset/"): #### Relative path data_path = os_package_root_path(__file__, sublevel=sublevel, path_add=data_path) out_path = os.getcwd() + "/" + out_folder os.makedirs(out_path, exist_ok=True) log(data_path, out_path) return data_path, out_path
def fit(model=None, data_pars={}, compute_pars={}, out_pars={}, **kw): """ keras.callbacks.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) """ batch_size = compute_pars['batch_size'] epochs = compute_pars['epochs'] patience = compute_pars["patience"] sess = None log("#### Loading dataset #############################################") data_pars["predict"] = False x_train, y_train, x_test, y_test = get_dataset(data_pars) early_stopping = EarlyStopping(monitor='loss', patience=patience, mode='min') history = model.model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[early_stopping]) model.fit_metrics = history.history return model, sess
def create_tabular_dataset(data_info, **args): disable = [ 'tagger', 'parser', 'ner', 'textcat' 'entity_ruler', 'sentencizer', 'merge_noun_chunks', 'merge_entities', 'merge_subtokens' ] lang = args.get('lang', 'en') pretrained_emb = args.get('pretrained_emb', 'glove.6B.300d') _, path_train_dataset, path_valid_dataset = analyze_datainfo_paths( data_info) try: spacy_en = spacy.load(f'{lang}_core_web_sm', disable=disable) except: log(f"Download {lang}") import importlib os.system(f"python -m spacy download {lang}") spacy_en = importlib.import_module(f'{lang}_core_web_sm').load( disable=disable) # sleep(60) # spacy_en = spacy.load( f'{lang}_core_web_sm', disable= disable) def tokenizer(text): return [tok.text for tok in spacy_en.tokenizer(text)] # Creating field for text and label TEXT = Field(sequential=True, tokenize=tokenizer, lower=True) LABEL = Field(sequential=False) print('Preprocessing the text...') # clean the text TEXT.preprocessing = torchtext.data.Pipeline(clean_str) print('Creating tabular datasets...It might take a while to finish!') train_datafield = [('text', TEXT), ('label', LABEL)] tabular_train = TabularDataset(path=path_train_dataset, format='csv', skip_header=True, fields=train_datafield) valid_datafield = [('text', TEXT), ('label', LABEL)] tabular_valid = TabularDataset(path=path_valid_dataset, format='csv', skip_header=True, fields=valid_datafield) print('Building vocaulary...') TEXT.build_vocab(tabular_train, vectors=pretrained_emb) LABEL.build_vocab(tabular_train) return tabular_train, tabular_valid, TEXT.vocab
def get_params(param_pars=None, **kw): from jsoncomment import JsonComment json = JsonComment() pp = param_pars choice = pp['choice'] config_mode = pp['config_mode'] data_path = pp['data_path'] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, 'r')) cf = cf[config_mode] return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[ 'out_pars'] if choice == "test01": log("#### Path params ##########################################") data_path = path_norm("dataset/text/imdb.csv") out_path = path_norm("ztest/model_tch/textcnn/") model_path = os.path.join(out_path, "model") data_pars = { "data_path": path_norm("dataset/recommender/IMDB_sample.txt"), "train_path": path_norm("dataset/recommender/IMDB_train.csv"), "valid_path": path_norm("dataset/recommender/IMDB_valid.csv"), "split_if_exists": True, "frac": 0.99, "lang": "en", "pretrained_emb": "glove.6B.300d", "batch_size": 64, "val_batch_size": 64, } model_pars = { "dim_channel": 100, "kernel_height": [3, 4, 5], "dropout_rate": 0.5, "num_class": 2 } compute_pars = { "learning_rate": 0.001, "epochs": 1, "checkpointdir": out_path + "/checkpoint/" } out_pars = { "path": model_path, "checkpointdir": out_path + "/checkpoint/" } return model_pars, data_pars, compute_pars, out_pars
def test(data_path="dataset/", pars_choice="json", config_mode="test"): ### Local test log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) print(">>>>>> model_pars, data_pars, compute_pars, out_pars: ", model_pars, data_pars, compute_pars, out_pars) log("#### Loading dataset #############################################") Xtuple = get_dataset(data_pars) print(">>>> Xtuple: ", Xtuple)
def get_params(param_pars={}, **kw): data_path = param_pars["data_path"] config_mode = param_pars["config_mode"] if param_pars["choice"] == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode='r')) cf = cf[config_mode] return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[ 'out_pars'] if param_pars["choice"] == "test0": log("#### Path params ##########################################") data_path = path_norm(data_path) out_path = path_norm("ztest/model_keras/armdn/") os.makedirs(out_path, exist_ok=True) log(data_path, out_path) data_pars = { "train_data_path": data_path + "timeseries/milk.csv", "train": False, "prediction_length": 12, "col_Xinput": ["milk_production_pounds"], "col_ytarget": "milk_production_pounds" } model_pars = { "lstm_h_list": [300, 200, 24], "last_lstm_neuron": 12, "timesteps": 12, "dropout_rate": 0.1, "n_mixes": 3, "dense_neuron": 10, } compute_pars = { "batch_size": 32, "clip_gradient": 100, "ctx": None, "epochs": 10, "learning_rate": 0.05, "patience": 50 } outpath = out_path + "result" out_pars = {"outpath": outpath} return model_pars, data_pars, compute_pars, out_pars
def get_params(param_pars={}, **kw): from jsoncomment import JsonComment json = JsonComment() choice = param_pars['choice'] config_mode = param_pars['config_mode'] data_path = param_pars['data_path'] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode='r')) cf = cf[config_mode] return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf[ 'out_pars'] if choice == "test01": log("#### Path params ##########################################") data_path = path_norm("dataset/text/imdb.csv") out_path = path_norm("ztest/model_keras/textcnn/model.h5") model_path = out_path data_pars = { "path": data_path, "train": 1, "maxlen": 40, "max_features": 5, } model_pars = { "maxlen": 40, "max_features": 5, "embedding_dims": 50, } compute_pars = { "engine": "adam", "loss": "binary_crossentropy", "metrics": ["accuracy"], "batch_size": 1000, "epochs": 1 } out_pars = {"path": out_path, "model_path": model_path} return model_pars, data_pars, compute_pars, out_pars else: raise Exception(f"Not support choice {choice} yet")
def fit(model, sess=None, data_pars=None, compute_pars=None, out_pars=None, **kwargs): model0 = model.model lr = compute_pars['learning_rate'] epochs = compute_pars["epochs"] device = _get_device() train_loss = [] train_acc = [] test_loss = [] test_acc = [] best_test_acc = -1 optimizer = optim.Adam(model0.parameters(), lr=lr) train_iter, valid_iter, vocab = get_dataset(data_pars, out_pars) # load word embeddings to model model0.rebuild_embed(vocab) for epoch in range(1, epochs + 1): tr_loss, tr_acc = _train(model0, device, train_iter, optimizer, epoch, epochs) print(f'Train Epoch: {epoch} \t Loss: {tr_loss} \t Accuracy: {tr_acc}') ts_loss, ts_acc = _valid(model0, device, valid_iter) print(f'Train Epoch: {epoch} \t Loss: {ts_loss} \t Accuracy: {ts_acc}') if ts_acc > best_test_acc: best_test_acc = ts_acc #save paras(snapshot) log(f"model saves at {best_test_acc}% accuracy") os.makedirs(out_pars["checkpointdir"], exist_ok=True) torch.save( model0.state_dict(), os.path.join(out_pars["checkpointdir"], "best_accuracy")) train_loss.append(tr_loss) train_acc.append(tr_acc) test_loss.append(ts_loss) test_acc.append(ts_acc) model.model = model0 return model, None
def get_params(param_pars={}, **kw): from jsoncomment import JsonComment json = JsonComment() pp = param_pars choice = pp["choice"] config_mode = pp["config_mode"] data_path = pp["data_path"] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode="r")) cf = cf[config_mode] return cf["model_pars"], cf["data_pars"], cf["compute_pars"], cf[ "out_pars"] if choice == "test01": log("#### Path params ##########################################") data_path = path_norm("dataset/text/ner_dataset.csv") out_path = path_norm("ztest/model_keras/crf_bilstm/") model_path = os.path.join(out_path, "model") data_pars = { "path": data_path, "train": 1, "maxlen": 400, "max_features": 10, } model_pars = {} compute_pars = { "engine": "adam", "loss": "binary_crossentropy", "metrics": ["accuracy"], "batch_size": 32, "epochs": 1, } out_pars = {"path": out_path, "model_path": model_path} log(data_pars, out_pars) return model_pars, data_pars, compute_pars, out_pars else: raise Exception(f"Not support choice {choice} yet")
def fit(model, data_pars=None, model_pars=None, compute_pars=None, out_pars=None, *args, **kw): """ """ log("############ Dataloader setup #############################") data_readers, interal_states = get_dataset(data_pars) train_reader, val_reader = data_readers train_data = SentencesDataset(train_reader.get_examples('train.gz'), model=model.model) train_dataloader = DataLoader(train_data, shuffle=True, batch_size=compute_pars["batch_size"]) val_data = SentencesDataset(val_reader.get_examples('val/sts-dev.csv'), model=model.model) val_dataloader = DataLoader(val_data, shuffle=True, batch_size=compute_pars["batch_size"]) log("############ Fit setup ##################################") emb_dim = model.model.get_sentence_embedding_dimension() train_num_labels = train_reader.get_num_labels() train_loss = getattr(losses, compute_pars["loss"])( model=model.model, sentence_embedding_dimension=emb_dim, num_labels=train_num_labels) train_loss.float() evaluator = EmbeddingSimilarityEvaluator(val_dataloader) model.model.float() model.fit_metrics = model.model.fit( train_objectives=[(train_dataloader, train_loss)], evaluator=evaluator, epochs=compute_pars["num_epochs"], evaluation_steps=compute_pars["evaluation_steps"], warmup_steps=compute_pars["warmup_steps"], output_path=out_pars["model_path"]) return model, None
def save(model, session=None, save_pars=None): import pickle from mlmodels.util import save_tch save2 = copy.deepcopy(save_pars) path = path_norm( save_pars['path'] + "/torch_model/") os.makedirs(Path(path), exist_ok = True) ### Specialized part save2['path'] = path save_tch(model=model, save_pars=save2) ### Setup Model d = {"model_pars" : model.model_pars, "compute_pars": model.compute_pars, "data_pars" : model.data_pars } pickle.dump(d, open(path + "/torch_model_pars.pkl", mode="wb")) log(path, os.listdir(path))
def fit2(model, data_pars=None, model_pars=None, compute_pars=None, out_pars=None, *args, **kw): """ """ log("############ Dataloader setup ###########################") data_pars['is_train'] = 1 train_dataloader, val_dataloader, pars = get_dataset2(data_pars, model=model) log("############ Fit setup ##################################") emb_dim = model.model.get_sentence_embedding_dimension() train_num_labels = pars["train_num_labels"] # train_num_labels = train_reader.get_num_labels() train_loss = getattr(losses, compute_pars["loss"])( model=model.model, sentence_embedding_dimension=emb_dim, num_labels=train_num_labels) train_loss.float() evaluator = EmbeddingSimilarityEvaluator(val_dataloader) model.model.float() model.fit_metrics = model.model.fit( train_objectives=[(train_dataloader, train_loss)], evaluator=evaluator, epochs=compute_pars["num_epochs"], evaluation_steps=compute_pars["evaluation_steps"], warmup_steps=compute_pars["warmup_steps"], output_path=out_pars["model_path"]) return model, None
def test(data_path="dataset/", pars_choice="test01", config_mode="test"): ### Local test from mlmodels.util import path_norm data_path = path_norm(data_path) log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log("#### Loading dataset #############################################") Xtuple = get_dataset(data_pars) log("#### Model init, fit #############################################") model = Model(model_pars, compute_pars) model, session = fit(model, data_pars, model_pars, compute_pars, out_pars) log("#### Predict #####################################################") ypred = predict(model, session, data_pars, compute_pars, out_pars) log("#### metrics #####################################################") metrics_val = evaluate(model, ypred, data_pars, compute_pars, out_pars) print(metrics_val) log("#### Plot ########################################################") log("#### Save ###################################################") save_pars = {"path": out_pars['path']} save(model, session, save_pars=save_pars) log("#### Load #####################################################") model2, session2 = load(save_pars) print(model2, session2) log("#### Predict ################################################") ypred = predict(model2, session2, data_pars, compute_pars, out_pars) print(ypred)
def test(data_path="dataset/", pars_choice="json", config_mode="test"): ### Local test log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode, } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log("#### Loading dataset #############################################") Xtuple = get_dataset(data_pars) log("#### Model init, fit #############################################") from mlmodels.models import module_load_full, fit, predict module, model = module_load_full( "model_keras.namentity_crm_bilstm_dataloader", model_pars, data_pars, compute_pars, ) model, sess = fit(module, model, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars) # model = Model(model_pars, data_pars, compute_pars) # model, session = fit(model, data_pars, compute_pars, out_pars) log("#### Predict #####################################################") data_pars["train"] = 0 ypred = predict(module, model, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars) log("#### metrics #####################################################") metrics_val = fit_metrics(model, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars) print(metrics_val) log("#### Plot ########################################################") log("#### Save/Load ###################################################")
def test_single(data_path="dataset/", pars_choice="json", config_mode="test"): ### Local test log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log(data_pars, out_pars) log("#### Loading dataset #############################################") #xtuple = get_dataset(data_pars) log("#### Model init, fit #############################################") model = Model(model_pars, data_pars, compute_pars) fitted_model = fit(model.model, data_pars, compute_pars, out_pars) log("#### Predict #####################################################") ypred = predict(fitted_model, data_pars, compute_pars, out_pars) print(ypred[:10]) log("#### metrics #####################################################") metrics_val = evaluate(fitted_model, data_pars, compute_pars, out_pars) print(metrics_val) log("#### Plot ########################################################") log("#### Save/Load ###################################################") ## Export as a Keras Model. save_model = fitted_model.export_model() save(model=save_model, save_pars=out_pars, config_mode=config_mode) loaded_model = load(out_pars, config_mode) ypred = predict(loaded_model, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars) print(ypred[:10])
def get_params(param_pars={}, **kw): from jsoncomment import JsonComment ; json = JsonComment() pp = param_pars choice = pp['choice'] config_mode = pp['config_mode'] data_path = pp['data_path'] if choice == "json": data_path = path_norm(data_path) cf = json.load(open(data_path, mode='r')) cf = cf[config_mode] return cf['model_pars'], cf['data_pars'], cf['compute_pars'], cf['out_pars'] if choice == "test01": log("#### Path params ##########################################") root = path_norm() data_path = path_norm( "dataset/text/imdb.npz" ) out_path = path_norm( "ztest/model_keras/charcnn/" ) model_path = os.path.join(out_path , "model") model_pars = { "embedding_size": 128, "conv_layers": [[256, 10 ], [256, 7 ], [256, 5 ], [256, 3 ] ], "fully_connected_layers": [ 1024, 1024 ], "threshold": 1e-6, "dropout_p": 0.1, "optimizer": "adam", "loss": "categorical_crossentropy" } data_pars = { "train": True, "alphabet": "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}", "alphabet_size": 69, "input_size": 1014, "num_of_classes": 4, "train_data_source": path_norm("dataset/text/ag_news_csv/train.csv") , "val_data_source": path_norm("dataset/text/ag_news_csv/test.csv") } compute_pars = { "epochs": 1, "batch_size": 128 } out_pars = { "path": path_norm( "ztest/ml_keras/charcnn/charcnn.h5"), "data_type": "pandas", "size": [0, 0, 6], "output_size": [0, 6] } return model_pars, data_pars, compute_pars, out_pars else: raise Exception(f"Not support choice {choice} yet")
def test(data_path="dataset/", pars_choice=0, **kwargs): ### Local test log("#### Loading params ##############################################") model_pars, data_pars, compute_pars, out_pars = get_params(choice=pars_choice, data_path=data_path, **kwargs) print(model_pars, data_pars, compute_pars, out_pars) log("#### Loading dataset #############################################") dataset = get_dataset(data_pars) log("#### Model init, fit #############################################") from mlmodels.models import module_load_full, fit, predict module, model = module_load_full("model_keras.01_deepctr", model_pars, data_pars, compute_pars, dataset=dataset) model = fit(module, model, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars, dataset=dataset) # log("#### Predict ####################################################") ypred = predict(module, model, compute_pars=compute_pars, data_pars=data_pars, out_pars=out_pars, dataset=dataset) log("#### metrics ####################################################") metrics_val = metrics(ypred, dataset[1], compute_pars=compute_pars, data_pars=data_pars, out_pars=out_pars) print(metrics_val) log("#### Plot #######################################################") log("#### Save/Load ##################################################") save_keras(model, save_pars=out_pars) from deepctr.layers import custom_objects model2 = load_keras(out_pars, custom_pars={"custom_objects": custom_objects}) model2.model.summary()
def get_params(choice="", data_path="dataset/", config_mode="test", **kwargs): if choice == "json": model_pars, data_pars, compute_pars, out_pars = config_load(data_path, file_default="model_keras/01_deepctr.json", config_mode=config_mode) return model_pars, data_pars, compute_pars, out_pars if choice == 0: log("#### Path params ###################################################") data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "recommender/criteo_sample.txt" data_pars = {"train_data_path": train_data_path, "dataset_type": "criteo", "test_size": 0.2} log("#### Model params #################################################") model_pars = {"task": "binary", "model_name": "DeepFM", "optimization": "adam", "cost": "binary_crossentropy"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 1: log("#### Path params ##################################################") data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "recommender/criteo_sample.txt" data_pars = {"train_data_path": train_data_path, "hash_feature": True, "dataset_type": "criteo", "test_size": 0.2} log("#### Model params #################################################") model_pars = {"task": "binary", "model_name": "DeepFM", "optimization": "adam", "cost": "binary_crossentropy"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 2: log("#### Path params ################################################") data_path, _ = path_setup(out_folder="/ here_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "/recommender/movielens_sample.txt" data_pars = {"train_data_path": train_data_path, "dataset_type": "movie_len", "test_size": 0.2} log("#### Model params ################################################") model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 3: log("#### Path params ##################################################") data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "/recommender/movielens_sample.txt" data_pars = {"train_data_path": train_data_path, "multiple_value": True, "dataset_type": "movie_len", "test_size": 0.2} log("#### Model params ################################################") model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 4: log("#### Path params #################################################") data_path, _ = path_setup(out_folder="/deepctr_test/", data_path=data_path) out_path = path_norm("ztest/model_keras/deepctr/model.h5") train_data_path = data_path + "/recommender/movielens_sample.txt" data_pars = {"train_data_path": train_data_path, "multiple_value": True, "hash_feature": True, "dataset_type": "movie_len", "test_size": 0.2} log("#### Model params ################################################") model_pars = {"task": "regression", "model_name": "DeepFM", "optimization": "adam", "cost": "mse"} compute_pars = {"batch_size": 256, "epochs": 10, "validation_split": 0.2} out_pars = {"path": out_path} elif choice == 5: model_name = kwargs["model_name"] log("#### Path params #################################################") model_name = kwargs["model_name"] out_path = path_norm(f"ztest/model_keras/deepctr/model_{model_name}.h5") data_pars = {"dataset_type": "synthesis", "sample_size": 8, "test_size": 0.2, "dataset_name": model_name, **DATA_PARAMS[model_name]} log("#### Model params ################################################") model_pars = {"model_name": model_name, "optimization": "adam", "cost": "mse"} compute_pars = {"batch_size": 100, "epochs": 1, "validation_split": 0.5} out_pars = {"path": out_path} return model_pars, data_pars, compute_pars, out_pars
def test(data_path="dataset/", pars_choice="test0", config_mode="test"): path = data_path log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "config_mode": config_mode, "data_path": path } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log("#### Model init ##################################################") model = Model(model_pars=model_pars, data_pars=data_pars, compute_pars=compute_pars) log("### Model Fit ######################################################") fit(model=model, data_pars=data_pars, compute_pars=compute_pars) log("fitted metrics", model.fit_metrics) log("#### Predict #####################################################") data_pars["predict"] = True y_pred, y_test = predict(model=model, model_pars=model_pars, data_pars=data_pars) # from mlmodels import metrics # log( metrics.metric_eval([ "mean_absolute_error" ], y_test, y_pred)) log("### Plot #########################################################3#") data_pars["predict"] = True metrics_params = { "plot_type": "line", "pred": y_pred, "outpath": out_pars["outpath"], "actual": y_test } metrics_plot(metrics_params) log("#### Save ###################################################") save(model=model, session=None, save_pars=out_pars) log("#### Load ###################################################")
def test(data_path="dataset/", pars_choice="json", config_mode="test"): ### Local test from mlmodels.util import path_norm data_path = path_norm(data_path) log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log("#### Loading daaset #############################################") Xtuple = get_dataset(data_pars) log("#### Model init, fit #############################################") session = None model = Model(model_pars, data_pars, compute_pars) model, session = fit(model, data_pars, compute_pars, out_pars) log("#### Predict #####################################################") data_pars["train"] = 0 ypred = predict(model, session, data_pars, compute_pars, out_pars) log("#### metrics #####################################################") metrics_val = evaluate(model, data_pars, compute_pars, out_pars) print(metrics_val) log("#### Plot ########################################################") log("#### Save/Load ###################################################") save(model, session, save_pars=out_pars) model2 = load(out_pars) # ypred = predict(model2, data_pars, compute_pars, out_pars) # metrics_val = metrics(model2, ypred, data_pars, compute_pars, out_pars) print(model2)
def test2(data_path="dataset/", pars_choice="json", config_mode="test"): ### Local test log("#### Loading params ##############################################") param_pars = {"choice":pars_choice, "data_path":data_path, "config_mode": config_mode} model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log( data_pars, out_pars ) log("#### Loading dataset #############################################") #xtuple = get_dataset(data_pars) log("#### Model init, fit #############################################") session = None model = Model(model_pars, data_pars, compute_pars) #model, session = fit(model, data_pars, compute_pars, out_pars) log("#### Predict #####################################################") predict(model, session, data_pars, compute_pars, out_pars) log("#### metrics #####################################################") #metrics_val = evaluate(model, data_pars, compute_pars, out_pars) #print(metrics_val) log("#### Plot ########################################################") log("#### Save/Load ###################################################") save_pars = { "path": out_pars["path"] } save(model=model, save_pars=save_pars) model2 = load( save_pars ) ypred = predict(model2, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars) print(model2)
def get_dataset2(data_pars=None, model=None, **kw): """ JSON data_pars to get dataset "data_pars": { "data_path": "dataset/GOOG-year.csv", "data_type": "pandas", "size": [0, 0, 6], "output_size": [0, 6] }, """ # data_path = path_norm(data_pars["data_path"]) istrain = data_pars.get("is_train", 0) mode = "train" if istrain else "test" data_type = data_pars[f"{mode}_type"].lower() def get_reader(data_type, path): if data_type == 'nli': Reader = readers.NLIDataReader elif data_type == 'sts': Reader = readers.STSDataReader else: Reader = "MyCustomReader()" path = os.path.join(path) reader = Reader(path) return reader def get_filename(data_type, mode='test'): if mode == 'train': fname = 'train.gz' if data_pars["train_type"].lower( ) == 'nli' else 'sts-train.csv' if mode == 'test': fname = 'dev.gz' if data_pars["test_type"].lower( ) == 'nli' else 'sts-dev.csv' return fname log("############ Dataloader setup #############################") train_dataloader = None if istrain: train_pars = data_pars.copy() train_pars.update(train=1) train_fname = get_filename( data_pars, mode='train' ) # 'train.gz' if data_pars["train_type"].lower() == 'nli'else 'sts-train.csv' train_reader = get_reader(data_type, data_pars['train_path']) train_data = SentencesDataset(train_reader.get_examples(train_fname), model=model.model) train_dataloader = DataLoader(train_data, shuffle=True, batch_size=data_pars["batch_size"]) val_pars = data_pars.copy() val_pars.update(train=0) val_fname = get_filename( data_pars, mode='test' ) #'dev.gz' if data_pars["test_type"].lower() == 'nli' else 'sts-dev.csv' val_reader = get_reader(data_type, data_pars['test_path']) val_data = SentencesDataset(val_reader.get_examples(val_fname), model=model.model) val_dataloader = DataLoader(val_data, shuffle=True, batch_size=data_pars["batch_size"]) pars = {"train_num_labels": train_reader.get_num_labels()} return train_dataloader, val_dataloader, pars else: #### Inference part val_pars = data_pars.copy() val_pars.update(train=0) val_fname = get_filename( data_pars, mode='test' ) #'dev.gz' if data_pars["test_type"].lower() == 'nli' else 'sts-dev.csv' val_reader = get_reader(data_type, data_pars['test_path']) pars = { "train_fname": 'train.gz' if data_pars["train_type"].lower() == 'nli' else 'sts-train.csv' } return val_reader, pars
def test(data_path="dataset/", pars_choice="json", config_mode="test"): ### Local test from mlmodels.util import path_norm data_path = path_norm(data_path) log("Json file path: ", data_path) log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "config_mode": config_mode } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log(model_pars, data_pars, compute_pars, out_pars) log("#### Loading dataset #############################################") Xtuple = get_dataset(data_pars) print(len(Xtuple)) log("#### Model init #############################################") session = None model = Model(model_pars, data_pars, compute_pars) log("#### Model fit #############################################") data_pars["train"] = 1 model, session = fit(model, session, data_pars, compute_pars, out_pars) log("#### Save ########################################################") save_pars = {"path": out_pars['path'] + "/model.pkl"} save(model, session, save_pars=save_pars) log("#### Load ########################################################") model2, session2 = load(save_pars) log("#### Predict from Load ###########################################") data_pars["train"] = 0 ypred, _ = predict(model2, session2, data_pars, compute_pars, out_pars) log("#### Predict #####################################################") data_pars["train"] = 0 ypred, _ = predict(model, session, data_pars, compute_pars, out_pars) # print("ypred : ", ypred) # print("ypred shape: ", ypred.shape) log("#### metrics #####################################################") metrics_val = evaluate(model, session, data_pars, compute_pars, out_pars) log(metrics_val) log("#### Plot ########################################################")
def test_train(data_path, pars_choice, model_name): ### Local test log("#### Loading params ##############################################") param_pars = { "choice": pars_choice, "data_path": data_path, "model_name": model_name } model_pars, data_pars, compute_pars, out_pars = get_params(param_pars) log(data_pars, out_pars) log("#### Loading dataset #############################################") #xtuple = get_dataset(data_pars) log("#### Model init ##################################################") session = None model = Model(model_pars, data_pars, compute_pars) log("#### Model fit #############################################") model, session = fit(model, data_pars, compute_pars, out_pars) log("#### Predict #####################################################") #ypred = predict(model, session, data_pars, compute_pars, out_pars) log("#### metrics #####################################################") #metrics_val = evaluate(model, data_pars, compute_pars, out_pars) # print(metrics_val) log("#### Plot ########################################################") log("#### Save ########################################################") save_pars = {"path": out_pars["path"]} save(model=model, save_pars=save_pars) log("#### Load ###################################################") model2 = load(save_pars) log("#### Predict after Load ###########################################" ) ypred = predict(model2, data_pars=data_pars, compute_pars=compute_pars, out_pars=out_pars) print(model2)