Exemplo n.º 1
0
hidden_layers = [800, 500, 300, ]
noise_layers = [0.6, 0.4, ]
pretr_activ = 'sigmoid'
pretr_opt = 'adadelta'
pretr_loss = 'mse'
fine_activ = 'softmax'
fine_opt = 'adadelta'
fine_loss = 'categorical_crossentropy'

clean = lambda words: [str(word)
                       for word in words
                       if type(word) is not float]

print('\n{}\n'.format('-' * 50))
print('Reading the dataset..')
preprocessor = Preprocess(**preprocess_params)

print('Spliting the dataset..')
enron_dataset = preprocessor.dataset
enron_dataset = utils.split_dataset(x=enron_dataset['body'].values,
                                    y=enron_dataset['label'].values)

print('Transforming dataset into vectors and matrices..')
enron_dataset = preprocessor.transform(dataset=enron_dataset)
vocabulary = preprocessor.vocabulary

print('\n{}\n'.format('-' * 50))
print('Building model..')

encoders = []
noises = []
Exemplo n.º 2
0
Arquivo: spam.py Projeto: benigls/spam
print("\n{}\n".format("-" * 50))

if CONFIG["dataset"]["generate"]:
    print("Reading the dataset..")
    dataset = EnronDataset(path=CONFIG["dataset"]["path"])

    enron_dataset = dataset.get_dataset()

    if CONFIG["dataset"]["output"]:
        print("Exporting the dataset..")
        dataset.to_csv(filepath=CONFIG["dataset"]["filepath"])


if CONFIG["preprocess"]["params"]["read_csv"]:
    print("Reading the dataset..")
    preprocessor = Preprocess(**CONFIG["preprocess"]["params"])
else:
    preprocessor = Preprocess(dataset=enron_dataset, **CONFIG["preprocess"]["params"])

if CONFIG["preprocess"]["clean_dataset"]:
    print("Cleaning the dataset..")
    preprocessor.clean_data()

if CONFIG["preprocess"]["output_csv"]:
    print("Exporting clean dataset..")
    preprocessor.dataset.to_csv(CONFIG["preprocess"]["output_csv_filepath"])

print("Spliting the dataset..")
enron_dataset = preprocessor.dataset
enron_dataset = utils.split_dataset(x=enron_dataset["body"].values, y=enron_dataset["label"].values)