def train(x_train, y_train, x_test, y_test): exp = Experiment(project_name="perception", auto_histogram_gradient_logging=True) # log custom hyperparameters exp.log_parameters(params) # log any custom metric exp.log_metric('custom_metric', 0.95) # log a dataset hash exp.log_dataset_hash(x_train) # Define model model = build_model_graph(exp) model.fit( x_train, y_train, batch_size=exp.get_parameter('batch-size'), epochs=exp.get_parameter('epochs'), validation_data=(x_test, y_test), ) score = model.evaluate(x_test, y_test, verbose=0) logging.info("Score %s", score) # Finalize model includes the following calls # exp.log_confusion_matrix() # exp.log_image() # exp.log_histogram_3d() # exp.add_tag() # exp.log_model() utils.finalize_model(model, x_train, y_train, x_test, y_test, exp)
def train(self): os.mkdir(self.paths['path']) if self.use_comet and self.api_key and self.project_name and self.workspace: experiment = Experiment(api_key=self.api_key, project_name=self.project_name, workspace=self.workspace) experiment.log_dataset_hash(self.train_dataset) experiment.add_tags([ str(self.architecture), "text_generation", f"nb_labels_{self.number_labels}" ]) with experiment.train(): hist = self.fit_dataset(self.train_dataset, self.val_dataset, self.epochs) experiment.end() elif self.use_comet: raise Exception( "Please provide an api_key, project_name and workspace for comet_ml" ) else: callbacks = self.callback_func( tensorboard_dir=self.paths['tensorboard_path'], checkpoint_path=self.paths['checkpoint_path']) hist = self.model.fit_dataset(self.train_dataset, self.val_dataset, self.epochs, callbacks) self.metrics = get_metrics(hist, "sparse_categorical_accuracy") self.export_weights(self.model) self.export_info(self.model_info) self.export_metrics(self.metrics) self.export_tokenizer(self.tokenizer) if self.do_zip_model: self.zip_model()
def log_experiment(datapath, dataset, model, parameters, metrics): experiment = Experiment(api_key="COMET_KEY", project_name="trending-topics") experiment.log_dataset_hash(dataset) experiment.log_parameter("model", model.__name__) experiment.log_parameter("dataset", datapath) for key, value in parameters.items(): experiment.log_parameter(key, value) for key, value in metrics.items(): if ~np.isnan(value): experiment.log_metric(key, value)
def _train_with_comet(self, train_dataset, val_dataset): experiment = Experiment(api_key=self.api_key, project_name=self.project_name, workspace=self.workspace) experiment.log_dataset_hash(train_dataset) experiment.add_tags([ str(self.architecture), self.name, f"nb_labels_{self.label_encoder_classes_number}" ]) with experiment.train(): hist = self.fit_dataset(train_dataset, val_dataset) experiment.end() return hist
def train(hyper_params): mnist = get_data() # Get graph definition, tensors and ops train_step, cross_entropy, accuracy, x, y, y_ = build_model_graph( hyper_params) # log parameters to Comet.ml import os # Setting the API key (saved as environment variable) exp = Experiment( api_key="<HIDDEN>", # or # api_key=os.environ.get("COMET_API_KEY"), project_name="prototype", workspace="jaimemarijke") exp.log_parameters(hyper_params) exp.log_dataset_hash(mnist) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) exp.set_model_graph(sess.graph) for i in range(hyper_params["steps"]): batch = mnist.train.next_batch(hyper_params["batch_size"]) exp.set_step(i) # Compute train accuracy every 10 steps if i % 10 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1] }) print('step %d, training accuracy %g' % (i, train_accuracy)) exp.log_metric("acc", train_accuracy) # Update weights (back propagation) loss = train_step.run(feed_dict={x: batch[0], y_: batch[1]}) exp.log_metric("loss", loss) ### Finished Training ### # Compute test accuracy acc = accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels }) print('test accuracy %g' % acc)
def train(hyper_params): mnist = get_data() # Get graph definition, tensors and ops train_step, cross_entropy, accuracy, x, y, y_ = build_model_graph( hyper_params) experiment = Experiment(project_name="tf") experiment.log_parameters(hyper_params) experiment.log_dataset_hash(mnist) with tf.Session() as sess: with experiment.train(): sess.run(tf.global_variables_initializer()) experiment.set_model_graph(sess.graph) for i in range(hyper_params["steps"]): batch = mnist.train.next_batch(hyper_params["batch_size"]) experiment.set_step(i) # Compute train accuracy every 10 steps if i % 10 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1] }) print('step %d, training accuracy %g' % (i, train_accuracy)) experiment.log_metric("accuracy", train_accuracy, step=i) # Update weights (back propagation) _, loss_val = sess.run([train_step, cross_entropy], feed_dict={ x: batch[0], y_: batch[1] }) experiment.log_metric("loss", loss_val, step=i) ### Finished Training ### with experiment.test(): # Compute test accuracy acc = accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels }) experiment.log_metric("accuracy", acc) print('test accuracy %g' % acc)
def train(x_train, y_train, x_test, y_test): # Define model model = build_model_graph() # initiate an Experiment with your api key from https://www.comet.ml experiment = Experiment(api_key="YOUR-API-KEY", project_name='my project') experiment.log_dataset_hash(x_train) # and thats it... when you run your code all relevant data will be tracked and logged in https://www.comet.ml/view/YOUR-API-KEY model.fit(x_train, y_train, batch_size=128, epochs=50, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0)
def train(x_train, y_train, x_test, y_test): # Define model model = build_model_graph() # Setting the API key (saved as environment variable) experiment = Experiment( #api_key="YOUR API KEY", # or api_key=os.environ.get("COMET_API_KEY"), project_name='comet-examples') experiment.log_dataset_hash(x_train) # and thats it... when you run your code all relevant data will be tracked and logged in https://www.comet.ml/view/YOUR-API-KEY model.fit(x_train, y_train, batch_size=128, epochs=50, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0)
def train(hyper_params): mnist = get_data() # Get graph definition, tensors and ops train_step, cross_entropy, accuracy, x, y, y_ = build_model_graph( hyper_params) #log parameters to Comet.ml exp = Experiment(api_key="YOUR-API-KEY", project_name='tensorflow examples') exp.log_multiple_params(hyper_params) exp.log_dataset_hash(mnist) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) exp.set_model_graph(sess.graph) for i in range(hyper_params["steps"]): batch = mnist.train.next_batch(hyper_params["batch_size"]) exp.set_step(i) # Compute train accuracy every 10 steps if i % 10 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1] }) print('step %d, training accuracy %g' % (i, train_accuracy)) exp.log_metric("acc", train_accuracy) # Update weights (back propagation) loss = train_step.run(feed_dict={x: batch[0], y_: batch[1]}) exp.log_metric("loss", loss) ### Finished Training ### # Compute test accuracy acc = accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels }) print('test accuracy %g' % acc)
def run_logistic_regression(train_df, validation_df): params = """ C real [0.00001, 0.0001] [0.0001] """ optimizer = Optimizer(API_KEY) optimizer.set_params(params) while True: suggestion = optimizer.get_suggestion() experiment = Experiment(api_key=API_KEY, project_name='home-credit') experiment.set_name('logreg') experiment.log_dataset_hash( pd.concat([train_df, validation_df], axis=0)) experiment.log_parameter(name='C', value=suggestion['C']) logreg = LogisticRegression(C=suggestion['C']) logreg.fit(train_df.drop(columns=['TARGET']), train_df["TARGET"]) y_pred = logreg.predict(validation_df.drop(columns=['TARGET'])) auc_score = roc_auc_score(validation_df['TARGET'], y_pred) experiment.log_metric(name='auc_score', value=auc_score) suggestion.report_score("auc_score", auc_score)
def run_lightgbm(train_df, validation_df): train_data = lgb.Dataset(data=train_df.drop(columns=['TARGET']), label=train_df['TARGET']) validation_data = lgb.Dataset(data=validation_df.drop(columns=['TARGET']), label=validation_df['TARGET']) num_round = 10 params = """ num_leaves integer [31, 51] [31] num_trees integer [50, 100] [50] """ optimizer = Optimizer(API_KEY) optimizer.set_params(params) while True: suggestion = optimizer.get_suggestion() experiment = Experiment(api_key=API_KEY, project_name='home-credit') experiment.set_name('lightgbm') _param = { 'num_leaves': suggestion['num_leaves'], 'num_trees': suggestion['num_trees'], 'objective': 'binary', 'metric': 'auc' } experiment.log_multiple_params(_param) experiment.log_dataset_hash( pd.concat([train_df, validation_df], axis=0)) bst = lgb.train(_param, train_data, num_round, valid_sets=[validation_data]) y_pred = bst.predict(validation_df.drop(columns=['TARGET'])) auc_score = roc_auc_score(validation_df['TARGET'], y_pred) experiment.log_metric(name='auc_score', value=auc_score) suggestion.report_score("auc_score", auc_score)
def main(_): print("Model Architecture: {}".format(FLAGS.model_architecture)) # Adjust some parameters if FLAGS.debug: FLAGS.small_label_set = False print("RUNNING IN DEBUG MODE") FLAGS.num_classes = utils.get_num_classes(FLAGS) X_train, y_train = data_utils.load_dataset_tf(FLAGS, mode="train") X_val, y_val = data_utils.load_dataset_tf(FLAGS, mode="val") # comet_ml experiment logging (https://www.comet.ml/) experiment = Experiment(api_key="J55UNlgtffTDmziKUlszSMW2w", log_code=False) experiment.log_multiple_params(utils.gather_params(FLAGS)) experiment.set_num_of_epocs(FLAGS.epochs) experiment.log_dataset_hash(X_train) tf.logging.set_verbosity(tf.logging.INFO) # Start a new, DEFAULT TensorFlow session. sess = tf.InteractiveSession() utils.set_seeds() # Get deterministic behavior? model = models.create_model(FLAGS) fw = framework.Framework(sess, model, experiment, FLAGS) num_params = int(utils.get_number_of_params()) model_size = num_params * 4 print("\nNumber of trainable parameters: {}".format(num_params)) print("Model is ~ {} bytes out of max 5000000 bytes\n".format(model_size)) experiment.log_parameter("num_params", num_params) experiment.log_parameter("approx_model_size", model_size) fw.optimize(X_train, y_train, X_val, y_val)
def run_random_forest(train_df, validation_df): params = """ n_estimators integer [100, 500] [100] """ optimizer = Optimizer(API_KEY) optimizer.set_params(params) while True: suggestion = optimizer.get_suggestion() experiment = Experiment(api_key=API_KEY, project_name='home-credit') experiment.log_dataset_hash( pd.concat([train_df, validation_df], axis=0)) experiment.set_name('rf') experiment.log_parameter(name='n_estimators', value=suggestion['n_estimators']) rf = RandomForestClassifier(n_estimators=suggestion['n_estimators']) rf.fit(train_df.drop(columns=['TARGET']), train_df["TARGET"]) y_pred = rf.predict(validation_df.drop(columns=['TARGET'])) auc_score = roc_auc_score(validation_df['TARGET'], y_pred) experiment.log_metric(name='auc_score', value=auc_score) suggestion.report_score("auc_score", auc_score)
to_file(y_train, "true_train", y_train) to_file(y_dev, "true_dev", y_train) print('saving model') model.save(os.path.join(DATADIR,'level1_'+MODEL_NAME)) # experiment.log_figure(figure_name='dev_support', figure=train_support) # experiment.log_figure(figure_name='train_support', figure=dev_support) print('logging experiment parameters') params={ "max_sequence_length":MAX_SEQUENCE_LENGTH, "embedding_dim":EMBEDDING_DIM, "p_threshold":P_THRESHOLD, "pos_ratio":POS_RATIO, "num_words":NUM_WORDS, "datadir":DATADIR, "metadata":os.getenv('METADATA_LIST'), "Data_since":os.getenv('SINCE_THRESHOLD') } experiment.log_multiple_params(params) experiment.log_other("datadir", DATADIR) experiment.log_other("metadata", os.getenv('METADATA_LIST')) experiment.log_other("data_since", os.getenv('SINCE_THRESHOLD')) experiment.log_dataset_hash(x_train)
plt.show() plt.plot(y_pred, y_test) # Visualising the Test set results plt.scatter(X_test, y_test, color = 'red') plt.plot(X_train, regressor.predict(X_train), color = 'blue') plt.title('Salary vs Experience (Test set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.show() mse = mean_squared_error(y_test, y_pred) mae = mean_absolute_error(y_test, y_pred) evs = explained_variance_score(y_test, y_pred) #these will be logged to your sklearn-demos project on Comet.ml params={"random_state":0, "model_type":"simple regression", "scaler":"none", "stratify":True } metrics = {"mse":mse, "mae":mae, "evs":evs } exp.log_dataset_hash(X_train) exp.log_multiple_params(params) exp.log_multiple_metrics(metrics)
logreg = LogisticRegression() param_grid = {'C': [0.001, 0.01, 0.1, 1, 5, 10, 20, 50, 100]} clf = GridSearchCV(logreg, param_grid=param_grid, cv=10, n_jobs=-1) clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) print("\nResults\nConfusion matrix \n {}".format( confusion_matrix(y_test, y_pred))) f1 = f1_score(y_test, y_pred) precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) params = { "random_state": random_state, "model_type": "logreg", "scaler": "standard scaler", "param_grid": str(param_grid), "stratify": True } metrics = {"f1": f1, "recall": recall, "precision": precision} experiment.log_dataset_hash(X_train_scaled) experiment.log_multiple_params(params) experiment.log_multiple_metrics(metrics)
def train(args, use_comet: bool = True): data_cls = funcs[args['dataset']] model_cls = funcs[args['model']] network = funcs[args['network']] print('[INFO] Getting dataset...') data = data_cls() (x_train, y_train), (x_test, y_test) = data.load_data() classes = data.mapping # #Used for testing only # x_train = x_train[:100, :, :] # y_train = y_train[:100, :] # x_test = x_test[:100, :, :] # y_test = y_test[:100, :] # print ('[INFO] Training shape: ', x_train.shape, y_train.shape) # print ('[INFO] Test shape: ', x_test.shape, y_test.shape) # #delete these lines y_test_labels = [ np.where(y_test[idx] == 1)[0][0] for idx in range(len(y_test)) ] # distribute 90% test 10% val dataset with equal class distribution (x_test, x_valid, y_test, y_valid) = train_test_split(x_test, y_test, test_size=0.1, stratify=y_test_labels, random_state=42) print('[INFO] Training shape: ', x_train.shape, y_train.shape) print('[INFO] Validation shape: ', x_valid.shape, y_valid.shape) print('[INFO] Test shape: ', x_test.shape, y_test.shape) print('[INFO] Setting up the model..') model = model_cls(network, data_cls) print(model) dataset = dict({ 'x_train': x_train, 'y_train': y_train, 'x_valid': x_valid, 'y_valid': y_valid, 'x_test': x_test, 'y_test': y_test }) if use_comet and args['find_lr'] == False: #create an experiment with your api key experiment = Experiment(api_key='INSERT API KEY', project_name='emnist', auto_param_logging=False) print('[INFO] Starting Training...') #will log metrics with the prefix 'train_' with experiment.train(): _ = train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network']) print('[INFO] Starting Testing...') #will log metrics with the prefix 'test_' with experiment.test(): loss, score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}') metrics = {'loss': loss, 'accuracy': score} experiment.log_metrics(metrics) experiment.log_parameters(args) experiment.log_dataset_hash( x_train) #creates and logs a hash of your data experiment.end() elif use_comet and args['find_lr'] == True: _ = train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], FIND_LR=args['find_lr'], name=args['network']) else: print('[INFO] Starting Training...') train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network']) print('[INFO] Starting Testing...') loss, score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}') if args['weights']: model.save_weights() if args['save_model']: model.save_model()
def train(args, use_comet : bool = True): data_cls = funcs[args['dataset']] model_cls = funcs[args['model']] network = funcs[args['network']] print ('[INFO] Getting dataset...') data = data_cls() data.load_data() (x_train, y_train), (x_test, y_test) = (data.x_train, data.y_train), (data.x_test, data.y_test) classes = data.mapping # #Used for testing only # x_train = x_train[:100, :, :] # y_train = y_train[:100, :] # x_test = x_test[:100, :, :] # y_test = y_test[:100, :] # print ('[INFO] Training shape: ', x_train.shape, y_train.shape) # print ('[INFO] Test shape: ', x_test.shape, y_test.shape) # #delete these lines # distribute 90% test 10% val dataset with equal class distribution (x_test, x_valid, y_test, y_valid) = train_test_split(x_test, y_test, test_size=0.2, random_state=42) print ('[INFO] Training shape: ', x_train.shape, y_train.shape) print ('[INFO] Validation shape: ', x_valid.shape, y_valid.shape) print ('[INFO] Test shape: ', x_test.shape, y_test.shape) print ('[INFO] Setting up the model..') if args['network'] == 'lstmctc': network_args = {'backbone' : args['backbone'], 'seq_model' : args['seq'], 'bi' : args['bi'] } model = model_cls(network, data_cls, network_args) else: model = model_cls(network, data_cls) print (model) dataset = dict({ 'x_train' : x_train, 'y_train' : y_train, 'x_valid' : x_valid, 'y_valid' : y_valid, 'x_test' : x_test, 'y_test' : y_test }) if use_comet and args['find_lr'] == False: #create an experiment with your api key experiment = Experiment(api_key='WVBNRAfMLCBWslJAAsffxM4Gz', project_name='iam_lines', auto_param_logging=False) print ('[INFO] Starting Training...') #will log metrics with the prefix 'train_' with experiment.train(): _ = train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network'] ) print ('[INFO] Starting Testing...') #will log metrics with the prefix 'test_' with experiment.test(): score = model.evaluate(dataset, int(args['batch_size'])) print(f'[INFO] Test evaluation: {score*100}...') metrics = { 'accuracy':score } experiment.log_metrics(metrics) experiment.log_parameters(args) experiment.log_dataset_hash(x_train) #creates and logs a hash of your data experiment.end() elif use_comet and args['find_lr'] == True: _ = train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], FIND_LR=args['find_lr'], name=args['network'] ) else : print ('[INFO] Starting Training...') train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network'] ) print ('[INFO] Starting Testing...') score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}...') if args['weights']: model.save_weights() if args['save_model']: model.save_model()
# Get dataset and put into train,test lists categories = [ 'alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med' ] twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42) twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42) experiment.log_dataset_hash(twenty_train) # Build training pipeline text_clf = Pipeline([ ('vect', CountVectorizer()), # Counts occurrences of each word ('tfidf', TfidfTransformer()), # Normalize the counts based on document length ( 'clf', SGDClassifier( loss='hinge', penalty='l2', # Call classifier with vector alpha=1e-3, random_state=42, max_iter=5, tol=None)),
def eval_model(datapath="../SF3H_labeled.csv", min_date='01-01-2018', freq='3H', use_comet=True): dataset = read_labeled_data(datapath) dataset['date'] = pd.DatetimeIndex(dataset['date']) dataset.set_index('date', inplace=True) dataset = dataset[dataset.index > min_date] dataset = dataset.reset_index().set_index(['date', 'category']) dataset = dataset[~dataset.index.duplicated(keep='first')] X = dataset[['value']] y = dataset[['is_anomaly']] ## First model: STL anomaly_types = ['residual', 'trend', 'and', 'or'] # anomaly_types = ['residual', ['trend', 'residual']] anomaly_type = 'residual' num_std = 3 window_size_for_metrics = 3 min_value = 15 for num_std in [2, 2.5, 3, 3.5, 4]: for anomaly_type in anomaly_types: model = StlTrendinessDetector(is_multicategory=True, freq=freq, min_value=min_value, anomaly_type=anomaly_type, num_of_std=num_std) result = eval_models( X, y, [model], label_col_name='is_anomaly', train_percent=50, window_size_for_metrics=window_size_for_metrics) print('num_std = ' + str(num_std) + ', anomaly_type = ' + str(anomaly_type) + ', min_value = ' + str(min_value) + ', dataset = ' + datapath) print('F1 score = ' + str(result[model.__name__]['f1']) + ", precision = " + str(result[model.__name__]['precision']) + ", recall = " + str(result[model.__name__]['recall'])) # model.plot(labels = y.reset_index().set_index('date')) if use_comet: experiment = Experiment( api_key= "Uv0lx3yRDH7kk8h1vtR9ZRiD2s16gnYTxfsvK2VnpV2xRrMbFobYDZRRA4tvoYiR", project_name="trending-topics") experiment.log_dataset_hash(dataset) experiment.log_parameter("model", model.__name__) experiment.log_parameter("dataset", datapath) experiment.log_parameter("num_of_std", num_std) experiment.log_parameter("anomaly_type", anomaly_type) experiment.log_parameter("window_size_for_metrics", window_size_for_metrics) experiment.log_metric("f1", result[str(model.__name__)]['f1']) experiment.log_metric("f0.5", result[str(model.__name__)]['f0.5']) experiment.log_metric("precision", result[str(model.__name__)]['precision']) experiment.log_metric("recall", result[str(model.__name__)]['recall']) max_anoms_list = [0.05, 0.1] for max_anoms in max_anoms_list: for threshold in [None, 'med_max', 'p95', 'p99']: for alpha in [0.05, 0.1, 0.15]: model = TwitterAnomalyTrendinessDetector(is_multicategory=True, freq=freq, min_value=min_value, threshold=threshold, max_anoms=max_anoms, longterm=False, alpha=alpha, seasonality_freq=7) result = eval_models( X, y, [model], label_col_name='is_anomaly', train_percent=50, window_size_for_metrics=window_size_for_metrics) if threshold is None: print('Threshold = None, Alpha = ' + str(alpha) + ', max_anoms = None, min_value = ' + str(min_value) + ', dataset = ' + datapath) else: print('Threshold = ' + threshold + ', Alpha = ' + str(alpha) + ', max_anoms = None, min_value = ' + str(min_value) + ', dataset = ' + datapath) print('F1 score = ' + str(result[model.__name__]['f1']) + ", precision = " + str(result[model.__name__]['precision']) + ", recall = " + str(result[model.__name__]['recall'])) if use_comet: experiment = Experiment( api_key= "Uv0lx3yRDH7kk8h1vtR9ZRiD2s16gnYTxfsvK2VnpV2xRrMbFobYDZRRA4tvoYiR", project_name="trending-topics") experiment.log_dataset_hash(dataset) experiment.log_parameter("model", model.__name__) experiment.log_parameter("max_anoms", 0.49) experiment.log_parameter("threshold", threshold) experiment.log_parameter("alpha", alpha) experiment.log_parameter("longterm", True) experiment.log_parameter("dataset", datapath) experiment.log_parameter("window_size_for_metrics", window_size_for_metrics) experiment.log_metric("f1", result[str(model.__name__)]['f1']) experiment.log_metric("f0.5", result[str(model.__name__)]['f0.5']) experiment.log_metric( "precision", result[str(model.__name__)]['precision']) experiment.log_metric( "recall", result[str(model.__name__)]['recall'])
def main(): exp = Experiment(project_name="movie-reviews", auto_histogram_weight_logging=True) params = { 'layer-1-size': 16, 'epochs': 10, 'batch-size': 512, 'dropout': 0.15, } exp.log_parameters(params) # Load data train_data, test_data = tfds.load(name="imdb_reviews", split=["train", "test"], batch_size=-1, as_supervised=True) train_examples, train_labels = tfds.as_numpy(train_data) test_examples, test_labels = tfds.as_numpy(test_data) x_val = train_examples[:10000] partial_x_train = train_examples[10000:] y_val = train_labels[:10000] partial_y_train = train_labels[10000:] # Load model model = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1" hub_layer = hub.KerasLayer(model, output_shape=[20], input_shape=[], dtype=tf.string, trainable=True) hub_layer(train_examples[:3]) # Build model model = tf.keras.Sequential() model.add(hub_layer) model.add( tf.keras.layers.Dense(exp.get_parameter('layer-1-size'), activation='relu')) model.add(tf.keras.layers.Dropout(exp.get_parameter('dropout'))) model.add(tf.keras.layers.Dense(1)) model.compile( optimizer='adam', loss=tf.losses.BinaryCrossentropy(from_logits=True), metrics=[tf.metrics.BinaryAccuracy(threshold=0.0, name='accuracy')]) # Train model model.fit(partial_x_train, partial_y_train, epochs=exp.get_parameter('epochs'), batch_size=exp.get_parameter('batch-size'), validation_data=(x_val, y_val), verbose=1) # log any custom metric exp.log_metric('custom_metric', 0.98) # log a dataset hash exp.log_dataset_hash(partial_x_train) # finalize_model invokes: # * exp.log_confusion_matrix() # * exp.log_text() # * exp.log_model() finalize_model(model, test_examples, test_labels, exp)
def test_model( # pylint:disable=too-many-arguments modelpath: str, scalerpath: str, Xpath: str, ypath: str, namepath: str, outpath: str, featurelabelpath: str = None, ): # pylint:disable=too-many-locals """Takes a trained model and performes some tests on it and calculates statistics. Arguments: modelpath {str} -- path to sklearn model in .joblib file modelpath {str} -- path to the scaler object Xpath {str} -- path to features in npz file ypath {str} -- path to labels in npz file namepath {str} -- path to names in pickle 3 file outpath {str} -- path to which the evaluation metrics are written Keyword Arguments: featurelabelpath {str} -- path to a picklefile with a list of the feature names, if not None, feature importances are also estimates (default {None}) """ lower_quantile = 2.5 / 100 upper_quantile = 97.5 / 100 experiment = Experiment( api_key=os.getenv("COMET_API_KEY", None), project_name="mof-oxidation-states" ) experiment.add_tag("model evaluation") print("*** Loading data ***") model = load(modelpath) scaler = load(scalerpath) X = np.load(Xpath) X = scaler.transform(X) y = np.load(ypath) experiment.log_dataset_hash(X) names = read_pickle(namepath) print("*** Getting bootstrapped metrics, using 200 folds which takes some time ***") scores = bootstrapped_metrics(model, X, y, scoring_funcs=return_scoring_funcs()) df_metrics = pd.DataFrame(scores) means = df_metrics.mean().values medians = df_metrics.median().values lower = df_metrics.quantile(lower_quantile).values upper = df_metrics.quantile(upper_quantile).values stds = df_metrics.std().values # print( # " *** Running permuation test running 200 folds with 10 fold CV which takes forever ***" # ) # cv = StratifiedKFold(10) # balanced_accuracy, balanced_acc_permutation_scores, balanced_accuracy_pvalue = permutation_test( # model, X, y # ) metrics_dict = {} # metrics_dict["balanced_accuracy_cv"] = balanced_accuracy # metrics_dict[ # "balanced_accuracy_permutation_scores" # ] = balanced_acc_permutation_scores # metrics_dict["balanced_accuracy_p_value"] = balanced_accuracy_pvalue prediction = model.predict(X) print(" *** Getting misclassified cases ***") misclassified = np.where(y != prediction) misclassified_w_prediction_true = [ (names[i], prediction[i], y[i]) for i in list(misclassified[0]) ] metrics_dict["misclassified"] = misclassified_w_prediction_true experiment.log_metric("misclassified", misclassified) if featurelabelpath is not None: feature_labels = read_pickle(featurelabelpath) print("*** Getting feature importance ***") imp_vals, imp_all = feature_importance_permutation( predict_method=model.predict, X=X, y=y, metric="accuracy", num_rounds=20, # to get some errorbars seed=1, ) importance_error = np.std(imp_all, axis=-1) importance_metrics = [ (name, value, error) for name, value, error in zip(feature_labels, imp_vals, importance_error) ] experiment.log_metric("feature_importances", importance_metrics) metrics_dict["feature_importances"] = importance_metrics for i, column in enumerate(df_metrics.columns.values): metrics_dict[column] = (means[i], medians[i], stds[i], lower[i], upper[i]) print((column, means[i], "_".join([column, "mean"]))) experiment.log_metric("_".join([column, "mean"]), means[i]) experiment.log_metric("_".join([column, "median"]), medians[i]) experiment.log_metric("_".join([column, "lower"]), lower[i]) experiment.log_metric("_".join([column, "upper"]), upper[i]) experiment.log_metric("_".join([column, "std"]), stds[i]) # experiment.log_metrics("balanced_accuracy_cv", balanced_accuracy) # experiment.log_metrics("balanced_accuracy_p_value", balanced_accuracy_pvalue) # experiment.log_metrics("missclassified", misclassified_w_prediction_true) print(" *** Getting the calibration curve ***") cc = calibration_curve(y, model.predict(X), n_bins=10) metrics_dict["calibration_curve_true_probab"] = cc[0] metrics_dict["calibration_curve_predicted_probab"] = cc[1] # now write a .json with metrics for DVC with open(os.path.join(outpath, "test_metrics.json"), "w") as fp: json.dump(metrics_dict, fp, cls=NpEncoder)
logger.info("Get prediction...") y_pred = clf.predict(X_test_scaled) print("\nResults\nConfusion matrix \n {}".format(confusion_matrix(y_test, y_pred))) f1 = f1_score(y_test, y_pred) precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) print("F1 score is {:6.3f}".format(f1)) print("Precision score is {:6.3f}".format(precision)) print("Recall score is {:6.3f}".format(recall)) # these will be logged to your sklearn-demos project on Comet.ml params = { "random_state": random_state, "model_type": "logreg", "scaler": "standard scaler", "param_grid": str(param_grid), "stratify": True, } metrics = {"f1": f1, "recall": recall, "precision": precision} logger.info("Logging params, meta info to commet.ml ...") exp.log_dataset_hash(X_train_scaled) exp.log_parameters(params) exp.log_metrics(metrics) logger.info("Done...")
class MLOxidationStates: """Collects some functions used for training of the oxidation state classifier""" def __init__( self, X: np.array, y: np.array, n: int = 5, max_size: int = None, eval_method: str = "kfold", scaler: str = "standard", metricspath: str = "metrics", modelpath: str = "models", max_evals: int = 100, voting: str = "hard", calibrate: str = "sigmoid", timeout: int = 600, oversampling: str = None, max_workers: int = 16, train_one_fold: bool = False, ): # pylint:disable=too-many-arguments self.x = X self.y = y assert len(self.x) == len(self.y) self.n = n self.eval_method = eval_method self.max_size = max_size if scaler == "robust": self.scalername = "robust" self.scaler = RobustScaler() elif scaler == "standard": self.scalername = "standard" self.scaler = StandardScaler() elif scaler == "minmax": self.scalername = "minmax" self.scaler = MinMaxScaler() self.bootstrap_results = [] self.metrics = {} self.max_evals = max_evals self.voting = voting self.timeout = timeout self.timings = [] self.metricspath = metricspath self.modelpath = modelpath self.mix_ratios = {"rand": 0.15, "tpe": 0.7, "anneal": 0.15} self.max_workers = max_workers self.calibrate = calibrate self.oversampling = oversampling self.train_one_fold = train_one_fold self.classes = [0, 1, 2, 3, 4, 5, 6, 7, 8] self.y = self.y.astype(np.int) trainlogger.info("intialized training class") @classmethod def from_x_y_paths( cls, xpath: str, ypath: str, modelpath: str, metricspath: str, scaler: str, n: int, voting: str, calibrate: str, max_size: int, oversampling: str, train_one_fold: bool, ): """Constructs a MLOxidationStates object from filepaths""" x = np.load(xpath, allow_pickle=True) y = np.load(ypath, allow_pickle=True) return cls( x, y, n=n, max_size=max_size, scaler=scaler, voting=voting, calibrate=calibrate, modelpath=modelpath, metricspath=metricspath, oversampling=oversampling, train_one_fold=train_one_fold, ) @staticmethod def train_ensemble( models: list, X: np.array, y: np.array, voting: str = "soft", calibrate: str = "isotonic", valid_size: float = VALID_SIZE, ) -> Tuple[VotingClassifier, float]: """Collects base models into a voting classifier, trains it and then performs probability calibration Arguments: models {list} -- list of optimized base models X {np.array} -- feature matrix y {np.array} -- label vector Keyword Arguments: voting {str} -- voting mechanism (hard or soft) (default: {"soft"}) n {int} -- number of CV folds for isotonic regression (default: {10}) calibrate {str} -- probability calibration method (none, isotonic, sigmoid) (default: {isotonic}) valid_size {float} -- fraction of the last part of the training set used for validation Returns: [CalibratedClassifierCV, float] -- [description] """ trainlogger.debug("calibrating and building ensemble model") startime = time.process_time() # hyperopt uses by default the last .2 percent as a validation set, we use the same convention here to do the # probability calibration # https://github.com/hyperopt/hyperopt-sklearn/blob/52a5522fae473bce0ea1de5f36bb84ed37990d02/hpsklearn/estimator.py#L268 n_train = int(len(y) * (1 - valid_size)) X_valid = X[n_train:] y_valid = y[n_train:] # calibrate the base esimators vc = VotingClassifier(models, voting=voting) trainlogger.debug("now, calibrating the base base estimators") vc._calibrate_base_estimators( calibrate, X_valid, y_valid ) # pylint:disable=protected-access endtime = time.process_time() elapsed_time = endtime - startime return vc, elapsed_time @staticmethod def tune_fit( # pylint:disable=dangerous-default-value models: list, X: np.ndarray, y: np.ndarray, max_evals: int = 400, timeout: int = 10 * 60, mix_ratios: dict = {"rand": 0.1, "tpe": 0.8, "anneal": 0.1}, valid_size: float = VALID_SIZE, ) -> list: """Tune model hyperparameters using hyperopt using a mixed strategy. Make sure when using this function that no data leakage happens. This data here should be seperate from training and test set. Arguments: models {list} -- list of models that should be optimized X_valid {np.ndarray} -- features y_valid {np.ndarray} -- labels max_evals {int} -- maximum number of evaluations of hyperparameter optimizations timeout {int} -- timeout in seconds after which the optimization stops mix_ratios {dict} -- dictionary which provides the ratios of the different optimization algorithms valid_size {float} -- fraction of the last part of the training set used for validation Returns: list -- list of tuples (name, model) of optimized models """ assert sum(list(mix_ratios.values())) == 1 assert list(mix_ratios.keys()) == ["rand", "tpe", "anneal"] trainlogger.debug("performing hyperparameter optimization") optimized_models = [] mix_algo = partial( mix.suggest, p_suggest=[ (mix_ratios["rand"], rand.suggest), (mix_ratios["tpe"], tpe.suggest), (mix_ratios["anneal"], anneal.suggest), ], ) for name, classifier in models: m = hyperopt_estimator( classifier=classifier("classifier"), algo=mix_algo, trial_timeout=timeout, preprocessing=[], max_evals=max_evals, seed=RANDOM_SEED, # n_jobs=-1, # todo fix installation to use my forks ) m.fit( X, y, valid_size=valid_size, cv_shuffle=False ) # avoid shuffleing to have the same validation set for the ensemble stage # chose the model with best hyperparameters and train it n_train = int(len(y) * (1 - valid_size)) X_train = X[:n_train] y_train = y[:n_train] m.retrain_best_model_on_full_data(X_train, y_train) m = m.best_model()["learner"] optimized_models.append((name, m)) return optimized_models @staticmethod def get_bootstrap(X: np.ndarray, y: np.ndarray, n: int = 200): """Returns train, test, validation splits Arguments: X {np.ndarray} -- Feature matrix y {np.ndarray} -- Label vector n {int} -- number of bootstrap resamplings """ assert len(X) == len(y) bs = BootstrapOutOfBag(n_splits=n, random_seed=RANDOM_SEED) oob = bs.split(np.arange(len(y))) return oob @staticmethod def get_train_test_split(X: np.ndarray, y: np.ndarray, n: int = 10): """Returns train, test, validation splits Arguments: X {np.ndarray} -- Feature matrix y {np.ndarray} -- Label vector n {int} -- number of split resamplings """ bs = StratifiedKFold(n_splits=n, random_state=RANDOM_SEED) oob = bs.split(X, y) return oob @staticmethod def model_eval( models: list, xtrain: np.array, ytrain: np.array, xtest: np.array, ytest: np.array, postfix: str = 0, outdir_metrics: str = None, outdir_models: str = None, ): """Peforms a model evaluation on training and test set and dump the predictions with the actual values into an outout file Arguments: models {list} -- list of tuples with model name and model itself xtrain {np.array} -- feature matrix training set ytrain {np.array} -- label vector training set xtest {np.array} -- feature matrix test set ytest {np.array} -- label vector test set postfix {str} -- string that will be attached to filename outdir_metrics {str} -- output directory for metrics outdir_models {str} -- output directory for models """ predictions = [] trainlogger.debug("entered evaluation function") for name, model in models: outdir_metrics_verbose = os.path.join( os.path.join(outdir_metrics, "verbose") ) if not os.path.exists(outdir_metrics_verbose): os.mkdir(outdir_metrics_verbose) outname_base_metrics = os.path.join( outdir_metrics_verbose, "_".join([STARTTIMESTRING, name, postfix]) ) outname_base_models = os.path.join( outdir_models, "_".join([STARTTIMESTRING, name, postfix]) ) train_true = ytrain test_true = ytest train_predict = model.predict(xtrain) test_predict = model.predict(xtest) accuracy_train = accuracy_score(train_true, train_predict) accuracy_test = accuracy_score(test_true, test_predict) f1_micro_train = f1_score(train_true, train_predict, average="micro") f1_micro_test = f1_score(test_true, test_predict, average="micro") f1_macro_train = f1_score(train_true, train_predict, average="macro") f1_macro_test = f1_score(test_true, test_predict, average="macro") balanced_accuracy_train = balanced_accuracy_score(train_true, train_predict) balanced_accuracy_test = balanced_accuracy_score(test_true, test_predict) precision_train = precision_score( train_true, train_predict, average="micro" ) precision_test = precision_score(train_true, train_predict, average="micro") recall_train = recall_score(train_true, train_predict, average="micro") recall_test = recall_score(test_true, test_predict, average="micro") trainlogger.info( f"model {name}: accuracy test: {accuracy_test}, accuracy train: {accuracy_train} | f1 micro test {f1_micro_test}, f1 micro train {f1_micro_train}" ) prediction = { "model": name, "postfix": postfix, "outname_base_models": outname_base_models, "outname_base_metrics": outname_base_metrics, "accuracy_train": accuracy_train, "accuracy_test": accuracy_test, "f1_micro_train": f1_micro_train, "f1_micro_test": f1_micro_test, "f1_macro_train": f1_macro_train, "f1_macro_test": f1_macro_test, "balanced_accuracy_train": balanced_accuracy_train, "balanced_accuracy_test": balanced_accuracy_test, "precision_train": precision_train, "precision_test": precision_test, "recall_train": recall_train, "recall_test": recall_test, "training_points": len(ytrain), "test_points": len(ytest), } arrays = { "train_true": train_true, "train_predict": train_predict, "test_predict": test_predict, "test_true": test_true, } arrays.update(prediction) predictions.append(arrays) with open(outname_base_metrics + ".pkl", "wb") as fh: pickle.dump(arrays, fh) dump(model, outname_base_models + ".joblib") return predictions def train_eval_single(self, count_indx: tuple): """Peforms a optimize, train, evaluation loop on one fold Arguments: count_indx {tuple} -- (fold index, indices for training and test set) Returns: list -- list of dictionaries of model performance metrics """ counter, tt_indices = count_indx trainlogger.debug("entered the function that trains one fold") all_predictions = [] counter = str(counter) train, test = tt_indices scaler = self.scaler xtrain = self.x[train] ytrain = self.y[train] ytest = self.y[test] ytrain = ytrain.reshape(-1, 1) ytest = ytest.reshape(-1, 1) if self.oversampling == "smote": trainlogger.debug("using smote oversampling") xtrain, ytrain = SMOTE(random_state=RANDOM_SEED).fit_resample( xtrain, ytrain ) ytrain = ytrain.reshape(-1, 1) elif self.oversampling == "borderlinesmote": trainlogger.debug("using BorderlineSMOTE oversamplign") xtrain, ytrain = BorderlineSMOTE(random_state=RANDOM_SEED).fit_resample( xtrain, ytrain ) ytrain = ytrain.reshape(-1, 1) elif self.oversampling == "adaysn": trainlogger.debug("using Adayn oversamplign") xtrain, ytrain = ADASYN(random_state=RANDOM_SEED).fit_resample( xtrain, ytrain ) ytrain = ytrain.reshape(-1, 1) xtrain = scaler.fit_transform(xtrain) trainlogger.debug("the training set has shape %s", xtrain.shape) # save the latest scaler so we can use it later with latest model for # evaluation on a holdout set dump( scaler, os.path.join( self.modelpath, STARTTIMESTRING + "scaler_" + counter + ".joblib" ), ) xtest = self.x[test] xtest = scaler.transform(xtest) n_test = int(len(ytest) * (1 - VALID_SIZE)) xvalid = xtest[n_test:] yvalid = ytest[n_test:] xtest = xtest[:n_test] ytest = ytest[:n_test] xtrain = np.vstack([xtrain, xvalid]) ytrain = np.vstack([ytrain, yvalid]) valid_size = len(xvalid) / len(xtrain) trainlogger.debug("the test set has shape %s", xtest.shape) ytrain = ytrain.ravel() ytest = ytest.ravel() optimized_models_split = MLOxidationStates.tune_fit( classifiers, xtrain, ytrain, self.max_evals, self.timeout, self.mix_ratios, valid_size, ) res = MLOxidationStates.model_eval( optimized_models_split, xtrain, ytrain, xtest, ytest, counter, self.metricspath, self.modelpath, ) all_predictions.extend(res) # now build an ensemble based on the single models ensemble_model, elapsed_time = MLOxidationStates.train_ensemble( optimized_models_split, xtrain, ytrain, voting=self.voting, calibrate=self.calibrate, valid_size=valid_size, ) ensemble_predictions = MLOxidationStates.model_eval( [("ensemble", ensemble_model)], xtrain, ytrain, xtest, ytest, counter, self.metricspath, self.modelpath, ) all_predictions.extend(ensemble_predictions) self.timings.append(elapsed_time) return all_predictions def track_comet_ml(self): """Function to track main parameters and metrics using comet.ml""" trainlogger.debug("entering the tracking function") self.experiment = Experiment( api_key=os.getenv("COMET_API_KEY", None), project_name="mof-oxidation-states", ) mean_time = np.mean(np.array(self.timings)) self.metrics = MLOxidationStates.summarize_metrics( self.bootstrap_results, outpath=self.metricspath, timings=mean_time ) self.experiment.log_dataset_hash(self.x) self.experiment.log_metrics(self.metrics) basemodels = [i for i, _ in classifiers] self.experiment.log_parameter("models", basemodels) self.experiment.log_parameter("n_bootstraps", self.n) self.experiment.log_parameter("max_hyperopt_eval", self.max_evals) self.experiment.log_parameter("timeout_hyperopt", self.timeout) self.experiment.log_parameter("fraction_tpe", self.mix_ratios["tpe"]) self.experiment.log_parameter("fraction_random", self.mix_ratios["rand"]) self.experiment.log_parameter("fraction_anneal", self.mix_ratios["anneal"]) self.experiment.log_parameter("voting", self.voting) self.experiment.log_parameter("size", self.max_size) self.experiment.log_parameter("eval_method", self.eval_method) self.experiment.log_parameter("scaler", self.scalername) self.experiment.log_parameter("calibration_method", self.calibrate) self.experiment.log_parameter("oversampling", self.oversampling) self.experiment.add_tag("initial_model_eval") self.experiment.log_parameter("validation_percentage", VALID_SIZE) self.experiment.log_metric("mean_training_time", mean_time) return self.experiment @staticmethod def summarize_metrics(metrics: list, outpath: str, timings: float): """Calculates summaries of metrics and writes them into .json file for dvc Arguments: metrics {list} -- list of dictionaries outpath {str} -- path to which metrics are writting timings {float} -- training time in seconds Returns: dict -- dictionary with most important metrics """ df = pd.DataFrame(metrics) df_ensemble = df[df["model"] == "ensemble"] summary_metrics = { "mean_accuracy_test": df_ensemble["accuracy_test"].mean(), "median_accuracy_test": df_ensemble["accuracy_test"].median(), "std_accuracy_test": df_ensemble["accuracy_test"].std(), "mean_accuracy_train": df_ensemble["accuracy_train"].mean(), "median_accuracy_train": df_ensemble["accuracy_train"].median(), "std_accuracy_train": df_ensemble["accuracy_train"].std(), "mean_f1_micro_train": df_ensemble["f1_micro_train"].mean(), "median_f1_micro_train": df_ensemble["f1_micro_train"].median(), "std_f1_micro_train": df_ensemble["f1_micro_train"].std(), "mean_f1_micro_test": df_ensemble["f1_micro_test"].mean(), "median_f1_micro_test": df_ensemble["f1_micro_test"].median(), "std_f1_micro_test": df_ensemble["f1_micro_test"].std(), "mean_f1_macro_train": df_ensemble["f1_macro_train"].mean(), "median_f1_macro_train": df_ensemble["f1_macro_train"].median(), "std_f1_macro_train": df_ensemble["f1_macro_train"].std(), "mean_f1_macro_test": df_ensemble["f1_macro_test"].mean(), "median_f1_macro_test": df_ensemble["f1_macro_test"].median(), "std_f1_macro_test": df_ensemble["f1_macro_test"].std(), "mean_precision_train": df_ensemble["precision_train"].mean(), "median_precision_train": df_ensemble["precision_train"].median(), "std_precision_train": df_ensemble["precision_train"].std(), "mean_precision_test": df_ensemble["precision_test"].mean(), "median_precision_test": df_ensemble["precision_test"].median(), "std_precision_test": df_ensemble["precision_test"].std(), "mean_recall_train": df_ensemble["recall_train"].mean(), "median_recall_train": df_ensemble["recall_train"].median(), "std_recall_train": df_ensemble["recall_train"].std(), "mean_recall_test": df_ensemble["recall_train"].mean(), "median_recall_test": df_ensemble["recall_train"].median(), "std_recall_test": df_ensemble["recall_train"].std(), "mean_balanced_accuracy_train": df_ensemble[ "balanced_accuracy_train" ].mean(), "median_balanced_accuracy_train": df_ensemble[ "balanced_accuracy_train" ].median(), "std_balanced_accuracy_train": df_ensemble["balanced_accuracy_train"].std(), "mean_balanced_accuracy_test": df_ensemble[ "balanced_accuracy_train" ].mean(), "median_balanced_accuracy_test": df_ensemble[ "balanced_accuracy_train" ].median(), "std_balanced_accuracy_test": df_ensemble["balanced_accuracy_train"].std(), "mean_training_set_size": df_ensemble["training_points"].mean(), "mean_test_set_size": df_ensemble["test_points"].mean(), "mean_training_time": timings, } # now write a .json with metrics for DVC with open(os.path.join(outpath, "train_metrics.json"), "w") as fp: json.dump(summary_metrics, fp) return summary_metrics def train_test_cv(self): """Train an ensemble using a cross-validation technique for evaluation""" # Get different sizes for learning curves if needed trainlogger.debug("the metrics are saved to %s", self.metricspath) trainlogger.debug("the models are saved to %s", self.modelpath) classcounter = dict(Counter(self.y)) trainlogger.info("the classdistribution is %s", classcounter) classes_to_keep = [] for oxidationstate, count in classcounter.items(): if count > MIN_SAMPLES: classes_to_keep.append(oxidationstate) else: trainlogger.warning( "will drop class %s since it has not enough examples", oxidationstate, ) selected_idx = np.where(np.isin(self.y, classes_to_keep))[0] self.x = self.x[selected_idx] self.y = self.y[selected_idx] if self.max_size is not None: assert self.max_size <= len(self.y) rng = np.random.RandomState(RANDOM_SEED) sample_idx = np.arange(self.x.shape[0]) sampled_idx = rng.choice(sample_idx, size=self.max_size, replace=True) self.x = self.x[sampled_idx] self.y = self.y[sampled_idx] if self.train_one_fold: trainlogger.info( "Entering full training mode, which trains on only one fold." ) trainlogger.info( "This mode should only be used when the selected architecture is stable" ) bs = MLOxidationStates.get_train_test_split(self.x, self.y, self.n) metrics = self.train_eval_single(list(enumerate(bs))[0]) self.bootstrap_results.extend(metrics) else: if self.eval_method == "kfold": bs = MLOxidationStates.get_train_test_split(self.x, self.y, self.n) elif self.eval_method == "bootstrap": bs = MLOxidationStates.get_bootstrap(self.x, self.y, self.n) else: bs = MLOxidationStates.get_train_test_split(self.x, self.y, self.n) # all_predictions = [] # do not run this concurrently since the state of the scaler is not clear! with concurrent.futures.ProcessPoolExecutor( max_workers=self.max_workers ) as executor: for metrics in executor.map( self.train_eval_single, enumerate(list(bs)) ): # all_predictions.extend(predfull) self.bootstrap_results.extend(metrics)
padding='same', activation=params['activation'])) model.add(Dropout(params['dropout'])) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=params['optimizer'], metrics=['accuracy']) #print model.summary() to preserve automatically in `Output` tab print(model.summary()) params.update({'total_number_of_parameters': model.count_params()}) #will log metrics with the prefix 'train_' with experiment.train(): model.fit(X_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], verbose=1, validation_data=(X_test, y_test)) #will log metrics with the prefix 'test_' with experiment.test(): loss, accuracy = model.evaluate(X_test, y_test) metrics = {'loss': loss, 'accuracy': accuracy} experiment.log_multiple_metrics(metrics) experiment.log_multiple_params(params) experiment.log_dataset_hash(X_train) #creates and logs a hash of your data
def training_loop( G_args = {}, # Options for generator network. D_args = {}, # Options for discriminator network. G_opt_args = {}, # Options for generator optimizer. D_opt_args = {}, # Options for discriminator optimizer. G_loss_args = {}, # Options for generator loss. D_loss_args = {}, # Options for discriminator loss. dataset_args = {}, # Options for dataset.load_dataset(). sched_args = {}, # Options for train.TrainingSchedule. grid_args = {}, # Options for train.setup_snapshot_image_grid(). metric_arg_list = [], # Options for MetricGroup. tf_config = {}, # Options for tflib.init_tf(). data_dir = None, # Directory to load datasets from. G_smoothing_kimg = 10.0, # Half-life of the running average of generator weights. minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters. lazy_regularization = True, # Perform regularization as a separate training step? G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False. D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False. reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced? total_kimg = 25000, # Total length of the training, measured in thousands of real images. mirror_augment = False, # Enable mirror augment? drange_net = [-1,1], # Dynamic range used when feeding image data to the networks. image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'. network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'. save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file? save_weight_histograms = False, # Include weight histograms in the tfevents file? resume_pkl = None, # Network pickle to resume training from, None = train from scratch. resume_kimg = 0.0, # Assumed training progress at the beginning. Affects reporting and training schedule. resume_time = 0.0, # Assumed wallclock time at the beginning. Affects reporting. resume_with_new_nets = False): # Construct new networks according to G_args and D_args before resuming training? # Initialize dnnlib and TensorFlow. tflib.init_tf(tf_config) num_gpus = dnnlib.submit_config.num_gpus e = Experiment("Your API Key") e.log_parameters(params) # Load training set. training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir), verbose=True, **dataset_args) grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args) misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('reals.png'), drange=training_set.dynamic_range, grid_size=grid_size) for i in range(len(training_set)) e.log_image(training_set[i]) # Construct or load networks. with tf.device('/gpu:0'): if resume_pkl is None or resume_with_new_nets: print('Constructing networks...') G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args) D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args) Gs = G.clone('Gs') if resume_pkl is not None: print('Loading networks from "%s"...' % resume_pkl) rG, rD, rGs = misc.load_pkl(resume_pkl) if resume_with_new_nets: G.copy_vars_from(rG); D.copy_vars_from(rD); Gs.copy_vars_from(rGs) else: G = rG; D = rD; Gs = rGs # Print layers and generate initial image snapshot. G.print_layers(); D.print_layers() sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, **sched_args) grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:]) grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu) misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.png'), drange=drange_net, grid_size=grid_size) # Setup training inputs. print('Building TensorFlow graph...') with tf.name_scope('Inputs'), tf.device('/cpu:0'): lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[]) lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[]) minibatch_size_in = tf.placeholder(tf.int32, name='minibatch_size_in', shape=[]) minibatch_gpu_in = tf.placeholder(tf.int32, name='minibatch_gpu_in', shape=[]) minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in * num_gpus) Gs_beta = 0.5 ** tf.div(tf.cast(minibatch_size_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0 # Setup optimizers. G_opt_args = dict(G_opt_args) D_opt_args = dict(D_opt_args) for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]: args['minibatch_multiplier'] = minibatch_multiplier args['learning_rate'] = lrate_in if lazy_regularization: mb_ratio = reg_interval / (reg_interval + 1) args['learning_rate'] *= mb_ratio if 'beta1' in args: args['beta1'] **= mb_ratio if 'beta2' in args: args['beta2'] **= mb_ratio G_opt = tflib.Optimizer(name='TrainG', **G_opt_args) D_opt = tflib.Optimizer(name='TrainD', **D_opt_args) G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args) D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args) # Build training graph for each GPU. images = e.log_image(dnnlib.make_run_dir_path('fakes_init.png'), 'Initial Fakes') for i in range(len(image)) e.log_image(images[i]) data_fetch_ops = [] for gpu in range(num_gpus): with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): # Create GPU-specific shadow copies of G and D. G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow') D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') # Fetch training data via temporary variables. with tf.name_scope('DataFetch'): sched = training_schedule(cur_nimg=int(resume_kimg*1000), training_set=training_set, **sched_args) reals_var = tf.Variable(name='reals', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu] + training_set.shape)) labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu, training_set.label_size])) reals_write, labels_write = training_set.get_minibatch_tf() reals_write, labels_write = process_reals(reals_write, labels_write, lod_in, mirror_augment, training_set.dynamic_range, drange_net) reals_write = tf.concat([reals_write, reals_var[minibatch_gpu_in:]], axis=0) labels_write = tf.concat([labels_write, labels_var[minibatch_gpu_in:]], axis=0) data_fetch_ops += [tf.assign(reals_var, reals_write)] data_fetch_ops += [tf.assign(labels_var, labels_write)] reals_read = reals_var[:minibatch_gpu_in] labels_read = labels_var[:minibatch_gpu_in] # Evaluate loss functions. lod_assign_ops = [] if 'lod' in G_gpu.vars: lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)] if 'lod' in D_gpu.vars: lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)] with tf.control_dependencies(lod_assign_ops): with tf.name_scope('G_loss'): G_loss, G_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, **G_loss_args) with tf.name_scope('D_loss'): D_loss, D_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, reals=reals_read, labels=labels_read, **D_loss_args) # Register gradients. if not lazy_regularization: if G_reg is not None: G_loss += G_reg if D_reg is not None: D_loss += D_reg else: if G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(G_reg * G_reg_interval), G_gpu.trainables) if D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(D_reg * D_reg_interval), D_gpu.trainables) G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables) D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables) # Setup training ops. data_fetch_op = tf.group(*data_fetch_ops) G_train_op = G_opt.apply_updates() D_train_op = D_opt.apply_updates() G_reg_op = G_reg_opt.apply_updates(allow_no_op=True) D_reg_op = D_reg_opt.apply_updates(allow_no_op=True) Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta) # Finalize graph. with tf.device('/gpu:0'): try: peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse() except tf.errors.NotFoundError: peak_gpu_mem_op = tf.constant(0) tflib.init_uninitialized_vars() print('Initializing logs...') summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path()) if save_tf_graph: summary_log.add_graph(tf.get_default_graph()) if save_weight_histograms: G.setup_weight_histograms(); D.setup_weight_histograms() metrics = metric_base.MetricGroup(metric_arg_list) print('Training for %d kimg...\n' % total_kimg) dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg) maintenance_time = dnnlib.RunContext.get().get_last_update_interval() cur_nimg = int(resume_kimg * 1000) cur_tick = -1 tick_start_nimg = cur_nimg prev_lod = -1.0 running_mb_counter = 0 while cur_nimg < total_kimg * 1000: if dnnlib.RunContext.get().should_stop(): break # Choose training parameters and configure training ops. sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, **sched_args) assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0 training_set.configure(sched.minibatch_gpu, sched.lod) if reset_opt_for_new_lod: if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod): G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state() prev_lod = sched.lod # Run training ops. feed_dict = {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_size_in: sched.minibatch_size, minibatch_gpu_in: sched.minibatch_gpu} for _repeat in range(minibatch_repeats): rounds = range(0, sched.minibatch_size, sched.minibatch_gpu * num_gpus) run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0) run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0) cur_nimg += sched.minibatch_size running_mb_counter += 1 # Fast path without gradient accumulation. if len(rounds) == 1: tflib.run([G_train_op, data_fetch_op], feed_dict) if run_G_reg: tflib.run(G_reg_op, feed_dict) tflib.run([D_train_op, Gs_update_op], feed_dict) if run_D_reg: tflib.run(D_reg_op, feed_dict) # Slow path with gradient accumulation. else: for _round in rounds: tflib.run(G_train_op, feed_dict) if run_G_reg: for _round in rounds: tflib.run(G_reg_op, feed_dict) tflib.run(Gs_update_op, feed_dict) for _round in rounds: tflib.run(data_fetch_op, feed_dict) tflib.run(D_train_op, feed_dict) if run_D_reg: for _round in rounds: tflib.run(D_reg_op, feed_dict) # Perform maintenance tasks once per tick. done = (cur_nimg >= total_kimg * 1000) if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done: cur_tick += 1 tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 tick_start_nimg = cur_nimg tick_time = dnnlib.RunContext.get().get_time_since_last_update() total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time # Report progress. print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f' % ( autosummary('Progress/tick', cur_tick), autosummary('Progress/kimg', cur_nimg / 1000.0), autosummary('Progress/lod', sched.lod), autosummary('Progress/minibatch', sched.minibatch_size), dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)), autosummary('Timing/sec_per_tick', tick_time), autosummary('Timing/sec_per_kimg', tick_time / tick_kimg), autosummary('Timing/maintenance_sec', maintenance_time), autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30))) autosummary('Timing/total_hours', total_time / (60.0 * 60.0)) autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0)) e.log_dataset_hash(training_set) # Save snapshots. if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done): grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu) misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size) if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done): pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' % (cur_nimg // 1000)) misc.save_pkl((G, D, Gs), pkl) metrics.run(pkl, run_dir=dnnlib.make_run_dir_path(), data_dir=dnnlib.convert_path(data_dir), num_gpus=num_gpus, tf_config=tf_config) # Update summaries and RunContext. metrics.update_autosummaries() tflib.autosummary.save_summaries(summary_log, cur_nimg) dnnlib.RunContext.get().update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg) maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time # Save final snapshot. misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl')) # All done. summary_log.close() training_set.close()