def save_perf(params, scores, tgt): # Building the performance dictionary perf = params.copy() perf["target"] = tgt now = datetime.datetime.now() perf["date"] = now.strftime("%Y-%m-%d %H:%M") perf = merge_params(perf, scores) new_perf = pd.DataFrame.from_dict(perf, orient = "index").T # Saving to the other performance perfs = get_perfs() perfs = perfs.append(new_perf, ignore_index = True, sort=True) perfs.to_csv(get_perf_path(), index = False) return new_perf
def predictKaggle(df_name, model_name, params, is_GPU=True): '''Use the dataset and the model provided with the params to generate a Kaggle prediction''' docs, params_data = data.get_kaggle_docs(df_name) # Load the raw docs params = merge_params( params_data, params) # Force the parameters to be the one of the dataset all_preds_han = [] n_target = 1 if params["full_pred"] else 4 for tgt in range(n_target): print('* * * * * * *', tgt, '* * * * * * *') embeddings = data.get_embeddings(roll2vec=params["roll2vec"], multiplier=params["embs_multiplier"]) model = HAN(embeddings, docs.shape, is_GPU=is_GPU, activation=params["activation"], drop_rate=params["drop_rate"], n_units=params["n_units"], multi_dense=params["multi_dense"], dense_acti=params["dense_acti"], full_pred=params["full_pred"]) if params["full_pred"]: tgt = "full" model_file = os.path.join( data.data_path, "models/", "{}_{}_{}_model.h5".format(model_name, df_name, tgt)) model.load_weights(model_file) all_preds_han.append(model.predict(docs).tolist()) all_preds_han = [elt[0] for sublist in all_preds_han for elt in sublist] kaggle_file = os.path.join(data.data_path, "predictions/", "preds_{}_{}.txt".format(model_name, df_name)) with open(kaggle_file, 'w') as file: file.write('id,pred\n') for idx, pred in enumerate(all_preds_han): pred = format(pred, '.7f') file.write(str(idx) + ',' + pred + '\n') print("The Kaggle file has been saved : {}".format(kaggle_file))
def accuracy(trainable_params, untrainable_params, batch): inputs, targets = batch target_class = jnp.argmax(targets, axis=1) params = merge_params(trainable_params, untrainable_params) predicted_class = jnp.argmax(net.apply(params, inputs), axis=1) return jnp.mean(predicted_class == target_class)
def loss(trainable_params, untrainable_params, batch): inputs, targets = batch preds = net.apply(merge_params(trainable_params, untrainable_params), inputs) return -jnp.mean(jnp.sum(preds * targets, axis=1))
def train(net, init_params, trainable_predicate, log_prefix): def loss(trainable_params, untrainable_params, batch): inputs, targets = batch preds = net.apply(merge_params(trainable_params, untrainable_params), inputs) return -jnp.mean(jnp.sum(preds * targets, axis=1)) def accuracy(trainable_params, untrainable_params, batch): inputs, targets = batch target_class = jnp.argmax(targets, axis=1) params = merge_params(trainable_params, untrainable_params) predicted_class = jnp.argmax(net.apply(params, inputs), axis=1) return jnp.mean(predicted_class == target_class) tx = optax.adam(config.learning_rate) @jit def update(opt_state, trainable_params, untrainable_params, batch): batch_loss, g = value_and_grad(loss)(trainable_params, untrainable_params, batch) # Standard gradient update on the smooth part. updates, opt_state = tx.update(g, opt_state) trainable_params = optax.apply_updates(trainable_params, updates) # TODO: Proximal update on the L1 non-smooth part. return opt_state, trainable_params, untrainable_params, batch_loss trainable_params, untrainable_params = partition_dict( trainable_predicate, flatten_params(init_params)) print("Trainable params:") print(tree_map(jnp.shape, trainable_params)) assert len(trainable_params) > 0 opt_state = tx.init(trainable_params) itercount = itertools.count() batches = data_stream() start_time = time.time() for epoch in tqdm(range(config.num_epochs)): for _ in range(num_batches): step = next(itercount) opt_state, trainable_params, untrainable_params, batch_loss = update( opt_state, trainable_params, untrainable_params, next(batches)) wandb.log({ f"{log_prefix}/batch_loss": batch_loss, "step": step, "wallclock": time.time() - start_time }) # Calculate the proportion of gains that are dead. # gains, _ = ravel_pytree( # tree_map(lambda x: x.gain if isinstance(x, ProximalGainLayerWeights) else jnp.array([]), # params, # is_leaf=lambda x: isinstance(x, ProximalGainLayerWeights))) # dead_units_proportion = jnp.sum(jnp.abs(gains) < 1e-12) / jnp.size(gains) # print(dead_units_proportion) wandb.log({ f"{log_prefix}/train_loss": loss(trainable_params, untrainable_params, (train_images, train_labels)), f"{log_prefix}/test_loss": loss(trainable_params, untrainable_params, (test_images, test_labels)), f"{log_prefix}/train_accuracy": accuracy(trainable_params, untrainable_params, (train_images, train_labels)), f"{log_prefix}/test_accuracy": accuracy(trainable_params, untrainable_params, (test_images, test_labels)), # f"{log_prefix}/dead_units_proportion": dead_units_proportion, "step": step, "epoch": epoch, "wallclock": time.time() - start_time }) return merge_params(trainable_params, untrainable_params)
def run_training(df_name, model_name, is_GPU=True, params=None): default_params = { "nb_epochs": 10, "my_patience": 4, "batch_size": 80, "optimizer": "adam", "learning_rate": 0.01, "momentum": 0.9, "nesterov": True, "activation": "linear", "drop_rate": 0.3, "n_units": 50, "roll2vec": True, "embs_multiplier": 1, "multi_dense": True, "dense_acti": "linear", "full_pred": True, } params = merge_params(params, default_params) docs, target, params_data = data.get_dataset(df_name) params = merge_params( params_data, params) # Force the parameters to be the one of the dataset X_train, X_test, y_train, y_test = train_test_split(docs, target, test_size=0.3) params["split_id"] = random_id() # id to identify the split later # = = = = = fitting the model on 4 targets = = = = # # Building the models embeddings = data.get_embeddings(roll2vec=params["roll2vec"], multiplier=params["embs_multiplier"]) print("### EMBS SHAPE : {} ###".format(embeddings.shape)) model = HAN(embeddings, docs.shape, is_GPU=is_GPU, activation=params["activation"], drop_rate=params["drop_rate"], n_units=params["n_units"], multi_dense=params["multi_dense"], dense_acti=params["dense_acti"], full_pred=params["full_pred"]) if params["optimizer"] == 'sgd': decay_rate = params["learning_rate"] / params["nb_epochs"] my_optimizer = optimizers.SGD(lr=params["learning_rate"], decay=decay_rate, momentum=params["momentum"], nesterov=params["nesterov"]) elif params["optimizer"] == 'adam': my_optimizer = optimizers.Adam() elif params["optimizer"] == 'nadam': my_optimizer = optimizers.Nadam() model.compile(loss='mean_squared_error', optimizer=my_optimizer, metrics=['mae']) # Training for each target params["train_id"] = random_id() n_target = 1 if params["full_pred"] else 4 for tgt in range(n_target): t0 = time.process_time() # = = = = = training = = = = = early_stopping = EarlyStopping(monitor='val_loss', patience=params["my_patience"], mode='min') # save model corresponding to best epoch if params["full_pred"]: tgt = "full" model_file = os.path.join( data.data_path, "models/", "{}_{}_{}_model.h5".format(model_name, df_name, tgt)) checkpointer = ModelCheckpoint(filepath=model_file, verbose=1, save_best_only=True, save_weights_only=True) my_callbacks = [early_stopping, checkpointer] y_train_tgt = y_train if params["full_pred"] else y_train[tgt] y_test_tgt = y_test if params["full_pred"] else y_test[tgt] model.fit(X_train, y_train_tgt, batch_size=params["batch_size"], epochs=params["nb_epochs"], validation_data=(X_test, y_test_tgt), callbacks=my_callbacks) T = time.process_time() - t0 hist = model.history.history scores = get_scores(hist) scores["T"] = time.process_time() - t0 data.save_perf(params, scores, tgt) print("################ {} minutes spent...###########".format( round(T / 60))) return params