Пример #1
0
def save_perf(params, scores, tgt):
    # Building the performance dictionary
    perf = params.copy()
    perf["target"] = tgt
    now = datetime.datetime.now()
    perf["date"] = now.strftime("%Y-%m-%d %H:%M")
    perf = merge_params(perf, scores)
    new_perf = pd.DataFrame.from_dict(perf, orient = "index").T
    
    # Saving to the other performance
    perfs = get_perfs()
    perfs = perfs.append(new_perf, ignore_index = True, sort=True)
    perfs.to_csv(get_perf_path(), index = False)
    
    return new_perf
Пример #2
0
def predictKaggle(df_name, model_name, params, is_GPU=True):
    '''Use the dataset and the model provided with the params to generate a Kaggle prediction'''
    docs, params_data = data.get_kaggle_docs(df_name)  # Load the raw docs
    params = merge_params(
        params_data,
        params)  # Force the parameters to be the one of the dataset

    all_preds_han = []
    n_target = 1 if params["full_pred"] else 4
    for tgt in range(n_target):

        print('* * * * * * *', tgt, '* * * * * * *')

        embeddings = data.get_embeddings(roll2vec=params["roll2vec"],
                                         multiplier=params["embs_multiplier"])

        model = HAN(embeddings,
                    docs.shape,
                    is_GPU=is_GPU,
                    activation=params["activation"],
                    drop_rate=params["drop_rate"],
                    n_units=params["n_units"],
                    multi_dense=params["multi_dense"],
                    dense_acti=params["dense_acti"],
                    full_pred=params["full_pred"])

        if params["full_pred"]: tgt = "full"
        model_file = os.path.join(
            data.data_path, "models/",
            "{}_{}_{}_model.h5".format(model_name, df_name, tgt))
        model.load_weights(model_file)
        all_preds_han.append(model.predict(docs).tolist())

    all_preds_han = [elt[0] for sublist in all_preds_han for elt in sublist]

    kaggle_file = os.path.join(data.data_path, "predictions/",
                               "preds_{}_{}.txt".format(model_name, df_name))
    with open(kaggle_file, 'w') as file:
        file.write('id,pred\n')
        for idx, pred in enumerate(all_preds_han):
            pred = format(pred, '.7f')
            file.write(str(idx) + ',' + pred + '\n')
    print("The Kaggle file has been saved : {}".format(kaggle_file))
Пример #3
0
 def accuracy(trainable_params, untrainable_params, batch):
     inputs, targets = batch
     target_class = jnp.argmax(targets, axis=1)
     params = merge_params(trainable_params, untrainable_params)
     predicted_class = jnp.argmax(net.apply(params, inputs), axis=1)
     return jnp.mean(predicted_class == target_class)
Пример #4
0
 def loss(trainable_params, untrainable_params, batch):
     inputs, targets = batch
     preds = net.apply(merge_params(trainable_params, untrainable_params),
                       inputs)
     return -jnp.mean(jnp.sum(preds * targets, axis=1))
Пример #5
0
def train(net, init_params, trainable_predicate, log_prefix):
    def loss(trainable_params, untrainable_params, batch):
        inputs, targets = batch
        preds = net.apply(merge_params(trainable_params, untrainable_params),
                          inputs)
        return -jnp.mean(jnp.sum(preds * targets, axis=1))

    def accuracy(trainable_params, untrainable_params, batch):
        inputs, targets = batch
        target_class = jnp.argmax(targets, axis=1)
        params = merge_params(trainable_params, untrainable_params)
        predicted_class = jnp.argmax(net.apply(params, inputs), axis=1)
        return jnp.mean(predicted_class == target_class)

    tx = optax.adam(config.learning_rate)

    @jit
    def update(opt_state, trainable_params, untrainable_params, batch):
        batch_loss, g = value_and_grad(loss)(trainable_params,
                                             untrainable_params, batch)
        # Standard gradient update on the smooth part.
        updates, opt_state = tx.update(g, opt_state)
        trainable_params = optax.apply_updates(trainable_params, updates)
        # TODO: Proximal update on the L1 non-smooth part.
        return opt_state, trainable_params, untrainable_params, batch_loss

    trainable_params, untrainable_params = partition_dict(
        trainable_predicate, flatten_params(init_params))
    print("Trainable params:")
    print(tree_map(jnp.shape, trainable_params))
    assert len(trainable_params) > 0

    opt_state = tx.init(trainable_params)
    itercount = itertools.count()
    batches = data_stream()
    start_time = time.time()
    for epoch in tqdm(range(config.num_epochs)):
        for _ in range(num_batches):
            step = next(itercount)
            opt_state, trainable_params, untrainable_params, batch_loss = update(
                opt_state, trainable_params, untrainable_params, next(batches))
            wandb.log({
                f"{log_prefix}/batch_loss": batch_loss,
                "step": step,
                "wallclock": time.time() - start_time
            })

        # Calculate the proportion of gains that are dead.
        # gains, _ = ravel_pytree(
        #     tree_map(lambda x: x.gain if isinstance(x, ProximalGainLayerWeights) else jnp.array([]),
        #              params,
        #              is_leaf=lambda x: isinstance(x, ProximalGainLayerWeights)))
        # dead_units_proportion = jnp.sum(jnp.abs(gains) < 1e-12) / jnp.size(gains)
        # print(dead_units_proportion)

        wandb.log({
            f"{log_prefix}/train_loss":
            loss(trainable_params, untrainable_params,
                 (train_images, train_labels)),
            f"{log_prefix}/test_loss":
            loss(trainable_params, untrainable_params,
                 (test_images, test_labels)),
            f"{log_prefix}/train_accuracy":
            accuracy(trainable_params, untrainable_params,
                     (train_images, train_labels)),
            f"{log_prefix}/test_accuracy":
            accuracy(trainable_params, untrainable_params,
                     (test_images, test_labels)),
            # f"{log_prefix}/dead_units_proportion": dead_units_proportion,
            "step":
            step,
            "epoch":
            epoch,
            "wallclock":
            time.time() - start_time
        })

    return merge_params(trainable_params, untrainable_params)
Пример #6
0
def run_training(df_name, model_name, is_GPU=True, params=None):

    default_params = {
        "nb_epochs": 10,
        "my_patience": 4,
        "batch_size": 80,
        "optimizer": "adam",
        "learning_rate": 0.01,
        "momentum": 0.9,
        "nesterov": True,
        "activation": "linear",
        "drop_rate": 0.3,
        "n_units": 50,
        "roll2vec": True,
        "embs_multiplier": 1,
        "multi_dense": True,
        "dense_acti": "linear",
        "full_pred": True,
    }
    params = merge_params(params, default_params)

    docs, target, params_data = data.get_dataset(df_name)
    params = merge_params(
        params_data,
        params)  # Force the parameters to be the one of the dataset
    X_train, X_test, y_train, y_test = train_test_split(docs,
                                                        target,
                                                        test_size=0.3)
    params["split_id"] = random_id()  # id to identify the split later

    # = = = = = fitting the model on 4 targets = = = = #

    # Building the models
    embeddings = data.get_embeddings(roll2vec=params["roll2vec"],
                                     multiplier=params["embs_multiplier"])
    print("### EMBS SHAPE : {} ###".format(embeddings.shape))
    model = HAN(embeddings,
                docs.shape,
                is_GPU=is_GPU,
                activation=params["activation"],
                drop_rate=params["drop_rate"],
                n_units=params["n_units"],
                multi_dense=params["multi_dense"],
                dense_acti=params["dense_acti"],
                full_pred=params["full_pred"])

    if params["optimizer"] == 'sgd':
        decay_rate = params["learning_rate"] / params["nb_epochs"]
        my_optimizer = optimizers.SGD(lr=params["learning_rate"],
                                      decay=decay_rate,
                                      momentum=params["momentum"],
                                      nesterov=params["nesterov"])
    elif params["optimizer"] == 'adam':
        my_optimizer = optimizers.Adam()
    elif params["optimizer"] == 'nadam':
        my_optimizer = optimizers.Nadam()

    model.compile(loss='mean_squared_error',
                  optimizer=my_optimizer,
                  metrics=['mae'])

    # Training for each target
    params["train_id"] = random_id()
    n_target = 1 if params["full_pred"] else 4
    for tgt in range(n_target):
        t0 = time.process_time()
        # = = = = = training = = = = =

        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=params["my_patience"],
                                       mode='min')

        # save model corresponding to best epoch
        if params["full_pred"]: tgt = "full"
        model_file = os.path.join(
            data.data_path, "models/",
            "{}_{}_{}_model.h5".format(model_name, df_name, tgt))
        checkpointer = ModelCheckpoint(filepath=model_file,
                                       verbose=1,
                                       save_best_only=True,
                                       save_weights_only=True)

        my_callbacks = [early_stopping, checkpointer]

        y_train_tgt = y_train if params["full_pred"] else y_train[tgt]
        y_test_tgt = y_test if params["full_pred"] else y_test[tgt]
        model.fit(X_train,
                  y_train_tgt,
                  batch_size=params["batch_size"],
                  epochs=params["nb_epochs"],
                  validation_data=(X_test, y_test_tgt),
                  callbacks=my_callbacks)

        T = time.process_time() - t0
        hist = model.history.history
        scores = get_scores(hist)
        scores["T"] = time.process_time() - t0

        data.save_perf(params, scores, tgt)
        print("################ {} minutes spent...###########".format(
            round(T / 60)))

    return params