import random from MaulParams import MaulParams from MaulDataset import MaulDataset # # Top-Level Harness For Maul # # Parameters params = MaulParams() params.kernelName = 'linear' params.dataType = 'vector' params.coef0 = 1 # Random Seed random.seed(18283835) # Initialize a dataset #dataset = MaulDataset('../data/maul.db', params) #dataset.crossValidate('Type', 0.8, 1.0, "WHERE (Type = 'Browser' OR Type = 'Robot')") #dataset.crossValidate('Family', 0.8, 1.0, "WHERE (Type = 'Browser' AND Family IS NOT NULL)") #dataset.crossValidate('OS',0.8,1.0,"WHERE (Type = 'Browser' AND OS IS NOT NULL)") dataset.crossValidate('Type', 0.8, 1.0, "WHERE (Type = 'Browser' OR Type = 'Robot' OR Type = 'Mobile Browser')") # playing with hard to classify strings #dataset = MaulDataset('../data/maul.db.excl', params) # load DB which excluded the hard to classify strings # train on all of the dataset #dataset.crossValidate('Type', 1.0, 1.0, "WHERE (Type = 'Browser' OR Type = 'Robot' OR Type = 'Mobile Browser')")
# Define parameter combinations kernelAndType = [('linear', 'vector'), ('RBF', 'vector'), ('edit', 'tokens'), ('subseq', 'tokens')] testAndQuery = [('Type', "WHERE (Type = 'Browser' OR Type = 'Robot'"\ " OR Type = 'Mobile Browser')"), ('OS', "WHERE (Type = 'Browser' AND OS IS NOT NULL)"), ('Family', "WHERE (Type = 'Browser' AND Family IS NOT NULL)")] # Iterate over parameter possibilities: for kernel, type in kernelAndType: for test, query in testAndQuery: # Parameters params = MaulParams() params.kernelName = kernel params.dataType = type # Random Seed random.seed(18283835) # Initialize a dataset dataset = MaulDataset('../data/maul.db', params) # Run cross-validation results = dataset.crossValidate(test, 0.8, 1.0, query) # Print results writeResults(params, test, results)