def main(config_filename): config = Config() config.load(config_filename) train_data, test_data = get_data(config.data_config, config.competition) vocabulary = train_data.get_vocabulary(config.lower_vocabulary).merge( test_data.get_vocabulary(config.lower_vocabulary)) if not os.path.exists(config.embeddings_filename) and \ config.data_config.language == "ru" and \ 'fasttext' in config.embeddings_filename: shrink_w2v("/media/yallen/My Passport/Models/Vectors/FastText/wiki.ru.vec", vocabulary, 100000, config.embeddings_filename) if not os.path.exists(config.embeddings_filename) and \ config.data_config.language == "ru" and \ 'w2v' in config.embeddings_filename: shrink_w2v("/media/yallen/My Passport/Models/Vectors/RDT/russian-big-w2v.txt", vocabulary, 100000, config.embeddings_filename) if not os.path.exists(config.embeddings_filename) and \ config.data_config.language == "en" and \ 'w2v' in config.embeddings_filename: shrink_w2v("/media/yallen/My Passport/Models/Vectors/W2V/GoogleNews-vectors-negative300.vec", vocabulary, 150000, config.embeddings_filename) if not os.path.exists(config.embeddings_filename) and \ config.data_config.language == "en" and \ 'fasttext' in config.embeddings_filename: shrink_w2v("/media/yallen/My Passport/Models/Vectors/FastText/wiki.en.vec", vocabulary, 150000, config.embeddings_filename) char_set = train_data.get_char_set() print(vocabulary.size()) print(char_set) targets, additionals, rev_categories, output_sizes = get_targets_additionals(train_data) train_model(config_filename, train_data, vocabulary, char_set, targets, additionals, output_sizes) predict(config_filename, test_data, vocabulary, char_set, targets, additionals, rev_categories)
def main(): paras = args_process() if paras: # print(paras) if paras.command == "detection": detection_msi(paras) elif paras.command == "train": train_model(paras) elif paras.command == "show": show_model(paras) pass
def test_train_model(mock_cross_validate, input_df, cv_results): # Mock retuirn value of cross_validate mock_cross_validate.return_value = cv_results # Train model df = preprocess_data(input_df) model, test_accuracy = train_model(df) # Should return an sklearn pipeline and test accuracy assert type(model) == Pipeline assert type(test_accuracy) == np.float64
def get_model_loss(params_dict): """ Train model and compute loss (1-AUC) on fold validation sets Parameters: params_dict (dict): Model parameters Returns: dict: Dict with loss and round status """ global max_mean_val_AUC global best_params params_dict["n_estimators"] = int(params_dict["n_estimators"]) kf = KFold(n_splits=NFOLDS) sum_val_AUC = 0 for i, (train_idxs, validation_idxs) in enumerate(kf.split(X)): X_train, X_validation = X[train_idxs], X[validation_idxs] y_train, y_validation = Y[train_idxs], Y[validation_idxs] model, train_AUC, val_AUC = train_model(X_train=X_train, y_train=y_train, X_validation=X_validation, y_validation=y_validation, params_dict=params_dict, verbose=False) sum_val_AUC += val_AUC print("Fold {} Val AUC: {}".format(i, val_AUC)) mean_val_AUC = sum_val_AUC / NFOLDS loss = 1 - mean_val_AUC pickle.dump(trials, open(trials_filepath, "wb")) if max_mean_val_AUC < mean_val_AUC: max_mean_val_AUC = mean_val_AUC best_params = params_dict print("Round Mean Val AUC: {}".format(mean_val_AUC)) print("Max Mean Val AUC: {}".format(max_mean_val_AUC)) print("Best params: {}".format(best_params)) print("Nb trials executed: {}".format(len(trials))) return {"loss": loss, "status": STATUS_OK}
def train(self, pretrain=False): if pretrain: self.reid_network.load_weights("pretrain.h5") log("Loaded pretrain weights") generator_train = ReidGenerator(database=self.dataset, batch_size=self.batch_size, flag="train", p=0.33) generator_val = ReidGenerator(database=self.dataset, batch_size=self.batch_size, flag="validation") log("Training [reid]") train_model(self.reid_network, generator_train=generator_train, generator_val=generator_val, batch_size=self.batch_size, steps_per_epoch=self.steps_per_epoch, epochs=self.epochs, validation_steps=self.validation_steps, plot_title="loss [reid training]")
def main(): cudnn.benchmark = True args = get_args() data_loaders, num_classes = \ get_etl2_dataloaders(args.model) if args.dataset == 'etl2' \ else get_etl2_9g_dataloaders(args.model) if args.train: model, name = \ vgg_model(num_classes) if args.model == "vgg11_bn" \ else chinese_model(num_classes) train_model(model, data_loaders) torch.save(model.state_dict(), f'trained_models/{args.model}_{args.dataset}.weights') elif args.prune: model, name = vgg_model(num_classes) if args.model == "vgg11_bn" \ else chinese_model(num_classes) model.load_state_dict( torch.load(f'trained_models/{args.model}_{args.dataset}.weights')) finetuning_passes = 250 prune_ratio = 0.90 prune_model(model, data_loaders, prune_ratio=prune_ratio, finetuning_passes=finetuning_passes) print(model) print( model, file=open( f"trained_models/{args.model}_{prune_ratio}p_{finetuning_passes}it.txt", 'w')) torch.save( model.state_dict(), f'trained_models/pruned_{args.model}_{args.dataset}_finetune{finetuning_passes}.weights' )
def pretrain_network(self): modellib = importlib.import_module("src.model." + self.model_name) reid_network, feature_network = modellib.generate_model( input_shape=self.input_shape, lr=self.lr, feature=True) generator_train = featureGenerator(database=self.dataset, batch_size=self.batch_size, flag="train") generator_val = featureGenerator(database=self.dataset, batch_size=self.batch_size, flag="validation") log("Training [features]") train_model(feature_network, generator_train=generator_train, generator_val=generator_train, batch_size=self.batch_size, steps_per_epoch=self.steps_per_epoch, epochs=self.epochs, validation_steps=self.validation_steps, plot_title="loss [feature training]") reid_network.save_weights("pretrain.h5")
def test_train_model_unhappy(): df = pd.DataFrame(data_bad, columns=columns_bad) assert train_model(df, 0.3, "delay", 10) is None
def test_train_model(): df = pd.DataFrame(data, columns=columns) model = train_model(df, 0.3, "delay", 10) assert isinstance(model, sklearn.tree.DecisionTreeClassifier)
from src.train import train_model from config.se101_768_config import cfg from src.unet_plus import * import torch.optim.lr_scheduler as lr_scheduler from src.nadam import Nadam model = SE_Res101UNet(6) optimizer = Nadam(model.parameters(), lr=cfg['base_lr']) scheduler = lr_scheduler.MultiStepLR(optimizer, cfg['milestone'], gamma=cfg['gamma']) train_model(model, optimizer, scheduler, cfg)
# Load input file if args.input is not None: input = pd.read_csv(args.input) logger.info('Input data loaded from %s', args.input) # acquire data from S3 and download it to the specified folder if args.step == 'acquire': try: output = acquire(S3_PUBLIC_KEY, S3_SECRET_KEY, FILE_NAME, BUCKET_NAME, S3_OBJECT_NAME) logger.info("Dataset is successfully downloaded from S3 bucket! ") except Exception: logger.error( "Could not open S3 connections given the input credentials! ") # clean the dataset if args.step == 'clean': output = clean(input, **config['transform']['clean']) logger.info("Finished cleaning the raw dataset!") if args.output is not None: output.to_csv(args.output, index=False, header=True) logger.info("Output saved to %s" % args.output) # train the model elif args.step == 'train': output = train_model(input, **config['train']['train_model']) logger.info("Finished training the model!") if args.output is not None: pickle.dump(output, open(args.output, "wb")) logger.info("Model saved to %s" % args.output)
def train(event=None, _=None): train_model() return "API ONLINE v1.0st", 200
f.write("Best n_estimators: " + str(classifier.best_params_["n_estimators"]) + '\n') f.write("Best max_depth: " + str(classifier.best_params_["max_depth"]) + '\n') f.write("Best subsample: " + str(classifier.best_params_["subsample"]) + '\n') f.write("CV AUC: " + str(cv_auc) + "\n") f.write("CV Accuracy: " + str(cv_acc) + "\n") f.write("Test AUC: " + str(test_auc) + "\n") f.write("Test Accuracy: " + str(test_acc) + "\n") f.close() logger.info( "File: {} created -- hyperparameters and scoring metrics saved" .format(args.scores_path)) except Exception: logger.error("Failed to save hyperparameters and scoring metrics") raise if args.full_model: try: trained_model = train_model(args.imputed_path, config.RANDOM_STATE, config.BEST_LR, config.BEST_NUM_EST, config.BEST_MAX_DEPTH, config.BEST_SUBSAMPLE, args.encoder_path) pickle.dump(trained_model, open(args.model_path, "wb")) logger.info("Trained model successfully created") except Exception: logger.error("Trained model was not fit successfully") raise
"""Run Scripts from Command Line""" from src.preprocess import make_dataset from src.train import train_model from src.evaluation import single_customer_evaluation, root_mean_squared_error if __name__ == '__main__': make_dataset() train_model() freq_predictions, freq_holdout = single_customer_evaluation(time_units=243) rmse = root_mean_squared_error(time_units=243) print(f"Single Customer Predictions:" f"\nPrediction:" f"\n {freq_predictions}" f"\nGround Truth: " f"\n {freq_holdout}" f"\n OVERALL RMSE: {rmse}")
def single_test(doc): # 生成训练模型 m = train_model() m.train_model() predict(doc) logger.info('预测成功')
data['TEST'] = pd.read_csv(cfg['PATHS']['TEST_SET']) # Custom Keras callback that logs all training and validation metrics after each epoch to the current Azure run class LogRunMetrics(Callback): def on_epoch_end(self, epoch, log): for metric_name in log: if 'val' in metric_name: run.log('validation_' + metric_name.split('_')[-1], log[metric_name]) else: run.log('training_' + metric_name, log[metric_name]) #run.log('validation_auc', log['val_auc']) # Set model callbacks callbacks = [EarlyStopping(monitor='val_loss', verbose=1, patience=cfg['TRAIN']['PATIENCE'], mode='min', restore_best_weights=True), LogRunMetrics()] # Train a model start_time = datetime.datetime.now() model, test_metrics, test_generator = train_model(cfg, data, callbacks) print("TRAINING TIME = " + str((datetime.datetime.now() - start_time).total_seconds() / 60.0) + " min") # Log test set performance metrics, ROC, confusion matrix in Azure run test_predictions = model.predict_generator(test_generator, verbose=0) test_labels = test_generator.labels for metric_name in test_metrics: run.log('test_' + metric_name, test_metrics[metric_name]) covid_idx = test_generator.class_indices['COVID-19'] roc_plt = plot_roc("Test set", test_generator.labels, test_predictions, class_id=covid_idx) run.log_image("ROC", plot=roc_plt) cm_plt = plot_confusion_matrix(test_generator.labels, test_predictions, class_id=covid_idx) run.log_image("Confusion matrix", plot=cm_plt)
from src.load_data import load_data, prepare_data from src.model import classifier_model from src.train import train_model, save_model, evaluate_model from src.utils import plot_learning_curves # Main file to train a model with the name defined below model_name = "2_blocks_20_epochs" # Load and prepare dataset x_train, y_train, x_test, y_test = load_data() x_train, y_train, x_test, y_test = prepare_data(x_train, y_train, x_test, y_test) # Train model on data model = classifier_model() trained_model, model_history = train_model(model, x_train, y_train, x_test, y_test) # Save model and output results save_model(trained_model, model_name) evaluate_model(trained_model, x_test, y_test) plot_learning_curves(model_history, model_name)
<p style="text-align:justify">This is a proof of concept using streamlit for a breast cancer diagnostic model, since one of the fundamental parts of a project is to allow the end user to have access to the product and be able to use it</p> ''', unsafe_allow_html=True) if check_if_model_exists(): st.write('<p style="color:blue;"><b>There is already a trained model! You can predict data now!</b></p>', unsafe_allow_html=True) else: st.write('<p style="color:red;"><b>We didn\'t find a trained model! Please train a model before making a prediction</b></p>', unsafe_allow_html=True) left_column, center_column, right_column = st.beta_columns(3) with left_column: if st.button('Train Model!'): with st.spinner('Starting trainning the Model!'): train_model(True) st.success('Model Trained and Saved!') di = dict() if check_if_model_exists(): with center_column: di['radius_mean'] = st.number_input('Radius Mean') di['texture_mean'] = st.number_input('Texture Mean') di['perimeter_mean'] = st.number_input('Perimeter Mean') di['area_mean'] = st.number_input('Area Mean') di['smoothness_mean'] = st.number_input('Smoothness Mean') di['compactness_mean'] = st.number_input('Compactness Mean') di['concavity_mean'] = st.number_input('Concavity Mean') di['concave points_mean'] = st.number_input('Concave Points Mean') di['symmetry_mean'] = st.number_input('Symmetry Mean')
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' from src.dataload import load_data from src.train import train_model from src.predict import predict_emotion from src.audio_utils import record_audio, play_audio from src.generic_utils import generate_report print("Welcome to my Speech-Emotion-Detection project. Check out my handles:-\n\n[linkedin.com/in/aitik-gupta][github.com/aitikgupta][kaggle.com/aitikgupta]\n\n") DATASET_PATH = "./voices" OUTPUT_PATH = "./output" MODEL_PATH = "./model/model.h5" choice = int(input("1) Train the model again.\n2) Test the model on 3 random voices.\n3) Test the model by your voice.[Note: In realtime, there are lot of noises than 'just' white noise, so results may differ.] \nEnter choice: ")) if choice == 1: print("[INFO] Model file will be overwritten!") dataset, labels = load_data(DATASET_PATH, mode="dev", n_random=-1, play_runtime=False) train_model(dataset=dataset, labels=labels, model_path=MODEL_PATH, n_splits=5, learning_rate=0.0001, epochs=30, batch_size=64, verbose=True) ytrue, ypred, probabilities = predict_emotion(dataset, labels, mode="dev", model_path=MODEL_PATH, verbose=False) generate_report(ytrue, ypred, verbose=True, just_acc=False) elif choice == 2: dataset, labels = load_data(DATASET_PATH, mode="dev", n_random=3, play_runtime=True) ytrue, ypred, probabilities = predict_emotion(dataset, labels, mode="dev", model_path=MODEL_PATH, verbose=True) generate_report(ytrue, ypred, verbose=True, just_acc=True) else: recording_path = os.path.join(OUTPUT_PATH, "recording.wav") inp = str(input(f"Record audio again? [All voices in {OUTPUT_PATH} will be used] (y|n): ")).lower() if inp == "y" or inp == "yes": record_audio(output_path=recording_path) dataset, _ = load_data(OUTPUT_PATH, mode="user", n_random=-1, play_runtime=True) _, _ = predict_emotion(dataset, mode="user", model_path=MODEL_PATH, verbose=True)
from src.preprocess import make_dataset from src.folds import generate_folds from src.train import train_model from src.predict import test_model import numpy as np if __name__ == '__main__': # Pre-process dataset. train_clean, test_clean = make_dataset(raw_file_name="raw.csv") # Generate Folds for train_clean train_folds = generate_folds(file_name="train_clean.csv", fold_type="skfold", n_splits=5, save_file_name="train_folds.csv") # Train Model cv_results = train_model() print(f"\nTRAIN SCORES:" f"\nROC_AUC: {np.mean(cv_results['test_roc_auc'])}" f"\nPrecision: {np.mean(cv_results['test_roc_auc'])}" f"\nRecall: {np.mean(cv_results['test_recall'])}") # Test Model. report, conf_mx = test_model() print(f"\nTEST RESULTS:" f"\n {report}" f"\nTrue Positive Rate: {round(conf_mx[1][1]*100, 3)}" f"\nFalse Positive Rate: {round(conf_mx[0][1]*100, 3)}" f"\nFalse Negative Rate: {round(conf_mx[1][0]*100, 3)}")
from __future__ import absolute_import from __future__ import division from __future__ import print_function from src.train import train_model if __name__=="__main__": # training cyclic lr 2 hrs # train_model(num_epochs=35) #training constant lr # train_model(num_epochs=25, clr=False) train_model(num_epochs=35, clr=False, weighted_classes=True)