Exemplo n.º 1
0
import utils.gen_utils as utils

(
    inp_dir,
    dataset,
    lr,
    batch_size,
    epochs,
    log_expdata,
    embed,
    layer,
    mode,
    embed_mode,
    jobid,
) = utils.parse_args()
# embed_mode {mean, cls}
# mode {512_head, 512_tail, 256_head_tail}
network = "LR"
print("{} : {} : {} : {} : {}".format(dataset, embed, layer, mode, embed_mode))
n_classes = 2
seed = jobid
np.random.seed(seed)
tf.random.set_seed(seed)

start = time.time()
path = "explogs/"


def merge_features(embedding, other_features):
    df = pd.merge(embedding, other_features, left_index=True, right_index=True)
Exemplo n.º 2
0
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
import pickle
import time
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold

import utils.gen_utils as utils

inp_dir, dataset_type, network, lr, batch_size, epochs, seed, write_file, embed, layer, mode, embed_mode = utils.parse_args(
)
n_classes = 2
np.random.seed(seed)
tf.compat.v1.set_random_seed(seed)

start = time.time()


def classification(X_train, X_test, y_train, y_test, file_name):
    model_name = 'LR-models/' + file_name + '-mean.joblib'
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)
    # joblib.dump(classifier, model_name)
    acc = classifier.score(X_test, y_test)
    return acc

Exemplo n.º 3
0
                                                 dataset, lr, batch_size, epochs, MODEL_INPUT, embed, layer, mode, embed_mode, jobid

    pd.set_option('display.max_columns', None)
    print(df.head(5))

    # save the results of our experiment
    if (log_expdata):
        Path(path).mkdir(parents=True, exist_ok=True)
        if (not os.path.exists(path + 'expdata.csv')):
            df.to_csv(path + 'expdata.csv', mode='a', header=True)
        else:
            df.to_csv(path + 'expdata.csv', mode='a', header=False)


if __name__ == "__main__":
    inp_dir, dataset, lr, batch_size, epochs, log_expdata, embed, layer, mode, embed_mode, jobid = utils.parse_args()
    print('{} : {} : {} : {} : {}'.format(dataset, embed, layer, mode, embed_mode))
    n_classes = 2
    features_dim = 123
    MODEL_INPUT = 'combined_features'
    path = 'explogs/'
    network = 'SVM'
    np.random.seed(jobid)
    tf.random.set_seed(jobid)

    nrc, nrc_vad, readability, mairesse = [True, True, True, True]
    feature_flags = [nrc, nrc_vad, readability, mairesse]

    start = time.time()

    if (re.search(r'base', embed)):