Exemplo n.º 1
0
import random
from MaulParams import MaulParams
from MaulDataset import MaulDataset

#
# Top-Level Harness For Maul
#

# Parameters
params = MaulParams()
params.kernelName = 'linear'
params.dataType = 'vector'
params.coef0 = 1

# Random Seed
random.seed(18283835)

# Initialize a dataset
#dataset = MaulDataset('../data/maul.db', params)
#dataset.crossValidate('Type', 0.8, 1.0, "WHERE (Type = 'Browser' OR Type = 'Robot')")
#dataset.crossValidate('Family', 0.8, 1.0, "WHERE (Type = 'Browser' AND Family IS NOT NULL)")
#dataset.crossValidate('OS',0.8,1.0,"WHERE (Type = 'Browser' AND OS IS NOT NULL)")
dataset.crossValidate('Type', 0.8, 1.0, "WHERE (Type = 'Browser' OR Type = 'Robot' OR Type = 'Mobile Browser')")




# playing with hard to classify strings
#dataset = MaulDataset('../data/maul.db.excl', params) # load DB which excluded the hard to classify  strings
# train on all of the dataset
#dataset.crossValidate('Type', 1.0, 1.0, "WHERE (Type = 'Browser' OR Type = 'Robot' OR Type = 'Mobile Browser')")
Exemplo n.º 2
0

# Define parameter combinations
kernelAndType = [('linear', 'vector'), ('RBF', 'vector'), ('edit', 'tokens'),
                 ('subseq', 'tokens')]
testAndQuery = [('Type', "WHERE (Type = 'Browser' OR Type = 'Robot'"\
                         " OR Type = 'Mobile Browser')"),
                ('OS', "WHERE (Type = 'Browser' AND OS IS NOT NULL)"),
                ('Family', "WHERE (Type = 'Browser' AND Family IS NOT NULL)")]

# Iterate over parameter possibilities:
for kernel, type in kernelAndType:
    for test, query in testAndQuery:

        # Parameters
        params = MaulParams()
        params.kernelName = kernel
        params.dataType = type

        # Random Seed
        random.seed(18283835)

        # Initialize a dataset
        dataset = MaulDataset('../data/maul.db', params)

        # Run cross-validation
        results = dataset.crossValidate(test, 0.8, 1.0, query)

        # Print results
        writeResults(params, test, results)
Exemplo n.º 3
0
# Define parameter combinations
kernelAndType = [('linear', 'vector'),
                 ('RBF', 'vector'),
                 ('edit', 'tokens'),
                 ('subseq', 'tokens')]
testAndQuery = [('Type', "WHERE (Type = 'Browser' OR Type = 'Robot'"\
                         " OR Type = 'Mobile Browser')"),
                ('OS', "WHERE (Type = 'Browser' AND OS IS NOT NULL)"),
                ('Family', "WHERE (Type = 'Browser' AND Family IS NOT NULL)")]

# Iterate over parameter possibilities:
for kernel, type in kernelAndType:
  for test, query in testAndQuery:

    # Parameters
    params = MaulParams()
    params.kernelName = kernel
    params.dataType = type

    # Random Seed
    random.seed(18283835)

    # Initialize a dataset
    dataset = MaulDataset('../data/maul.db', params)

    # Run cross-validation
    results = dataset.crossValidate(test, 0.8, 1.0, query)

    # Print results
    writeResults(params, test, results)