def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    #multi_split_set = MultiSplitSet.get(387)
    #multi_split_set = MultiSplitSet.get(407)
    multi_split_set = MultiSplitSet.get(399)

    #dataset_name = multi_split_set.description

    
    # create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeRBFKernel" #"WeightedDegreeStringKernel"#"PolyKernel" 
    param.wdk_degree = 2
    param.cost = 1.0
    param.transform = 0.2
    param.base_similarity = 1.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    flags= {}
    #flags["boosting"] = "ones"
    #flags["boosting"] = "L1"
    flags["boosting"] = "L2"
    #flags["boosting"] = "L2_reg"
    flags["signum"] = False
    flags["normalize_cost"] = True
    flags["all_positions"] = False
    
    flags["wdk_rbf_on"] = False
    
    param.flags = flags
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)


    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    multi_split_set = MultiSplitSet.get(434)

    
    # flags
    flags = {}
    flags["normalize_cost"] = False
    flags["epsilon"] = 1.0 
    #0.005
    flags["kernel_cache"] = 200
    flags["use_bias"] = False 

    # arts params
    flags["svm_type"] = "liblineardual"

    flags["degree"] = 24
    flags["degree_spectrum"] = 4
    flags["shifts"] = 0 #32
    flags["center_offset"] = 70
    flags["train_factor"] = 1

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "Promoter"
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.flags = flags
    param.taxonomy = multi_split_set.taxonomy
    
    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
Пример #3
0
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    #multi_split_set = MultiSplitSet.get(387)
    #multi_split_set = MultiSplitSet.get(407)
    multi_split_set = MultiSplitSet.get(399)

    #dataset_name = multi_split_set.description

    
    # create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" 
    param.wdk_degree = 2
    param.cost = 1.0
    param.transform = 0.2
    param.base_similarity = 1
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    flags= {}
    #flags["boosting"] = "ones"
    flags["boosting"] = "L1"
    #flags["boosting"] = "L2"
    #flags["boosting"] = "L2_reg"
    flags["signum"] = False
    flags["normalize_cost"] = True
    flags["all_positions"] = False
    flags["wdk_rbf_on"] = False
    
    param.flags = flags
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)


    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
Пример #4
0
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
     
        
    # select dataset
    multi_split_set = MultiSplitSet.get(384)
    
    # flags
    flags = {}
    flags["normalize_cost"] = False
    #flags["epsilon"] = 0.005
    flags["kernel_cache"] = 200
    flags["use_bias"] = False 

    # arts params
    #flags["svm_type"] = "liblineardual"

    flags["degree"] = 24

    flags["local"] = False
    flags["mem"] = "6G"
    flags["maxNumThreads"] = 1
    
    
    #create mock param object by freezable struct
    param = Options()
    #param.kernel = "GaussianKernel"
    param.kernel = "PolyKernel"
    param.sigma = 3.0
    param.cost = 10.0
    param.transform = 1.0
    param.id = 666
    param.flags = flags
    param.taxonomy = multi_split_set.taxonomy.data
    
    param.freeze()
    
    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
Пример #5
0
def main():

    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options

    # select dataset
    multi_split_set = MultiSplitSet.get(432)

    # flags
    flags = {}
    flags["normalize_cost"] = False
    #flags["epsilon"] = 0.005
    flags["kernel_cache"] = 200
    flags["use_bias"] = False

    # arts params
    flags["svm_type"] = "liblineardual"

    flags["degree"] = 24
    flags["degree_spectrum"] = 4
    flags["shifts"] = 0  #32
    flags["center_offset"] = 70
    flags["train_factor"] = 1

    flags["local"] = False
    flags["mem"] = "6G"
    flags["maxNumThreads"] = 1

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "Promoter"
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.flags = flags
    param.taxonomy = multi_split_set.taxonomy.data

    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    multi_split_set = MultiSplitSet.get(399)

    #dataset_name = multi_split_set.description
    flags = {}
    flags["normalize_cost"] = False
    flags["epsilon"] = 0.05
    flags["cache_size"] = 7
    #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON)
    flags["normalize_trace"] = True
    flags["interleaved"] = True
    
    
    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 1
    param.cost = 1
    param.transform = 1 #2.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    
    param.flags = flags
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)


    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
Пример #7
0
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options 
    from task_similarities import fetch_gammas
    
    
    # select dataset
    multi_split_set = MultiSplitSet.get(317)
    #multi_split_set = MultiSplitSet.get(374)
    #multi_split_set = MultiSplitSet.get(2)

    dataset_name = multi_split_set.description

    transform = 1.0
    base = 1.0
    similarity_matrix = fetch_gammas(transform, base, dataset_name) 
        

    #create mock taxonomy object by freezable struct
    taxonomy = Options()
    taxonomy.data = similarity_matrix
    taxonomy.description = dataset_name
    taxonomy.freeze()
    
    
    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 1
    param.cost = 1.0
    param.transform = 1.0
    param.taxonomy = taxonomy
    param.id = 666
    
    param.freeze()
    


    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    create_plot_inner(param, data_train, data_eval)
def main():
        
    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    multi_split_set = MultiSplitSet.get(379)

    dataset_name = multi_split_set.description

    print "dataset_name", dataset_name
    
    #create mock taxonomy object by freezable struct
    #taxonomy = Options()
    #taxonomy.data = taxonomy_graph.data
    #taxonomy.description = dataset_name
    #taxonomy.freeze()
    
    
    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 1
    param.cost = 1.0
    param.transform = 2.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train hierarchical xval
    mymethod = Method(param)
    mymethod.train(data_train)
    
    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf();
Пример #9
0
def training_for_sigma(sigma):

    print "starting debugging:"


    from expenv import MultiSplitSet
        
    # select dataset
    multi_split_set = MultiSplitSet.get(393)

    SPLIT_POINTER = 1
    
    #create mock param object by freezable struct
    param = Options()
    param.kernel =  "WeightedDegreeStringKernel" #"WeightedDegreeRBFKernel" # #
    param.wdk_degree = 2
    param.cost = 1.0
    param.transform = 1.0 
    param.id = 666
    param.base_similarity = sigma
    param.degree = 2
    param.flags = {}
    
    param.flags["wdk_rbf_on"] = False   
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()



    return assessment.auROC
Пример #10
0
def main():

    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options

    # select dataset
    multi_split_set = MultiSplitSet.get(399)

    #dataset_name = multi_split_set.description
    flags = {}
    flags["normalize_cost"] = False
    flags["epsilon"] = 0.05
    flags["cache_size"] = 7
    #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON)
    flags["normalize_trace"] = True
    flags["interleaved"] = True

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 1
    param.cost = 1
    param.transform = 1  #2.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666

    param.flags = flags

    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()
def main():

    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options

    # select dataset
    multi_split_set = MultiSplitSet.get(379)

    dataset_name = multi_split_set.description

    print "dataset_name", dataset_name

    #create mock taxonomy object by freezable struct
    #taxonomy = Options()
    #taxonomy.data = taxonomy_graph.data
    #taxonomy.description = dataset_name
    #taxonomy.freeze()

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 1
    param.cost = 1.0
    param.transform = 2.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666

    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train hierarchical xval
    mymethod = Method(param)
    mymethod.train(data_train)

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()
Пример #12
0
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    #multi_split_set = MultiSplitSet.get(387)
    multi_split_set = MultiSplitSet.get(386)

    #dataset_name = multi_split_set.description

    
    # create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" 
    param.wdk_degree = 1
    param.cost = 100
    param.transform = 2 #2.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)


    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
Пример #13
0
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    multi_split_set = MultiSplitSet.get(399)

    
    #create mock param object by freezable struct
    param = Options()
    param.kernel =  "WeightedDegreeRBFKernel" #"WeightedDegreeStringKernel"# #
    param.wdk_degree = 1
    param.cost = 1.0
    param.transform = 1.0
    param.sigma = 1.0
    param.id = 666
    param.base_similarity = 1
    param.degree = 2
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
Пример #14
0
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    #multi_split_set = MultiSplitSet.get(387)
    multi_split_set = MultiSplitSet.get(386)

    #dataset_name = multi_split_set.description

    
    # create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" 
    param.wdk_degree = 1
    param.cost = 1
    param.transform = 2 #2.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)


    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
Пример #15
0
def main():

    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options

    # select dataset
    multi_split_set = MultiSplitSet.get(384)

    # flags
    flags = {}
    flags["normalize_cost"] = False
    flags["kernel_cache"] = 1000
    flags["use_bias"] = False
    #flags["debug"] = False

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "PolyKernel"
    param.cost = 100.0
    param.id = 1
    param.flags = flags

    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()
Пример #16
0
def training_for_sigma(sigma):

    print "starting debugging:"

    from expenv import MultiSplitSet

    # select dataset
    multi_split_set = MultiSplitSet.get(393)

    SPLIT_POINTER = 1

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"  #"WeightedDegreeRBFKernel" # #
    param.wdk_degree = 2
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.base_similarity = sigma
    param.degree = 2
    param.flags = {}

    param.flags["wdk_rbf_on"] = False
    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()

    return assessment.auROC
Пример #17
0
def main():

    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options

    # select dataset
    multi_split_set = MultiSplitSet.get(399)

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeRBFKernel"  #"WeightedDegreeStringKernel"# #
    param.wdk_degree = 1
    param.cost = 1.0
    param.transform = 1.0
    param.sigma = 1.0
    param.id = 666
    param.base_similarity = 1
    param.degree = 2

    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()
Пример #18
0
def run_multi_example(dataset_idx, mymethod, comment):
    """
    sets up and runs experiment
    """

    
    #######################################
    # fix parameters
    #######################################

    flags= {}

    # general
    flags["normalize_cost"] = True #False
    flags["epsilon"] = 0.03
    flags["cache_size"] = 500
    
    # Boosting
    #flags["boosting"] = "ones"
    #flags["boosting"] = "L1"
    #flags["boosting"] = "L2"
    flags["boosting"] = "L2_reg"
    #flags["use_all_nodes"] = False
    flags["signum"] = False
    #flags["all_positions"] = True

    
    # MKL
    #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON)
    #flags["normalize_trace"] = True
    #flags["interleaved"] = True
    #flags["mkl_q"] = 0
    
    #WDK_RBF
    flags["wdk_rbf_on"] = False
    
    # define parameter search space [float(numpy.power(10, 3.58))] #
    costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(1000), numpy.log(100000), 8))]
    #costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(float(numpy.power(10, 3))), numpy.log(10000), 4))]
    #costs =  [float(c) for c in numpy.exp(numpy.linspace(numpy.log(0.01), numpy.log(1000), 8))] 
    #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(10), numpy.log(2000), 10))]
    costs.reverse()
    
    
    degrees = [1,2,3,4,5] #[1, 5, 10, 15, 20, 22]
    #print "WARNING: Degree is ONE"
    
    base_similarities = [200] #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(1), numpy.log(1000), 8))]
    #base_similarities = [float(c) for c in numpy.linspace(1, 5000, 6)] #[1]
    #transform_params =  [float(c) for c in numpy.linspace(1, 10000, 6)] #[1] #1.5, 2.0, 2.5, 3.0] #, 3.5, 4.0, 4.5, 5.0]
    #transform_params = [float(c) for c in numpy.linspace(0.01, 0.99, 6)]
    transform_params = [0.99]
    
    generation_parameters = locals()
    
    
    #######################################
    # create experiment
    #######################################    
    
    # select dataset
    multi_split_set = MultiSplitSet.get(dataset_idx)


    dataset_name = multi_split_set.description

    print "method:", mymethod
    print "dataset:", dataset_name
    print "multi split set id:", dataset_idx

    experiment_description = dataset_name + " (" + mymethod + ") " + comment
    
    
    # allow different features/kernel types
    feature_type = multi_split_set.feature_type
    
    if feature_type == "string":
        kernel_type = "WeightedDegreeStringKernel"
    else:
        kernel_type = "PolyKernel"
    
    
    kernel_type = "WeightedDegreeRBFKernel"
    
    
    # create experiment
    experiment = MultiSourceExperiment(split_set = multi_split_set, 
                                       description = experiment_description, 
                                       method_name = mymethod,
                                       meta_data = generation_parameters)
    
    print "experiment id:", experiment.id
    

    
    #######################################
    # create runs
    #######################################    
    
    
    if multi_split_set.taxonomy==None:
        print "WARNING: NO taxonomy set, generating one for dataset " + dataset_name
        taxonomy = dataset_to_hierarchy(dataset_name)
    else:
        taxonomy = multi_split_set.taxonomy
        
    
    for cost in costs:
        for degree in degrees:
            for base in base_similarities:
                for transform in transform_params:

                    param = ParameterMultiSvm(cost=cost, 
                                              wdk_degree=degree, 
                                              base_similarity=base, 
                                              transform=transform, 
                                              taxonomy=taxonomy,
                                              kernel=kernel_type,
                                              flags=flags)

                    print param

                    Method(module_name=mymethod, param=param, experiment=experiment)
    

    # skip model selection if we only have one model
    if len(experiment.methods) > 1:
    
        # create evaluation runs based on splits and methods
        run_ids = [run.id for run in experiment.create_eval_runs()]
    
        # execute runs
        execute_runs(run_ids)


    # finally perform model selection and retrain
    select_best_and_test(experiment, target)
    #experiment.select_best_method(target)

    return experiment.id
def main():
    
    
    print "starting debugging:"
    

    from expenv import MultiSplitSet
    from helper import Options 
    from task_similarities import dataset_to_hierarchy
    
    # select dataset
    #multi_split_set = MultiSplitSet.get(317)
    multi_split_set = MultiSplitSet.get(432)
    #multi_split_set = MultiSplitSet.get(2) #small splicing
    #multi_split_set = MultiSplitSet.get(377) #medium splicing

    dataset_name = multi_split_set.description

    # flags
    flags = {}
    flags["normalize_cost"] = False
    flags["epsilon"] = 1.0 
    #0.005
    flags["kernel_cache"] = 1000
    flags["use_bias"] = False 

    # arts params
    flags["svm_type"] = "liblineardual"

    flags["degree"] = 24
    flags["degree_spectrum"] = 4
    flags["shifts"] = 0 #32
    flags["train_factor"] = 1
    flags["center_offset"] = 70
    flags["center_pos"] = 500


    #create mock param object by freezable struct
    param = Options()
    param.kernel = "Promoter"
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.flags = flags
    param.taxonomy = multi_split_set.taxonomy
    
    param.freeze()


    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    (perf_xval, final_pred, best_idx_cost) = create_plot_inner(param, data_train, data_eval)
    perf_regular = create_plot_regular(param, data_train, data_eval)


    # plot performances
      
    import pylab
    
    if TARGET_PARAM=="both":


        #X,Y = pylab.meshgrid(range(len(RANGE)), range(len(RANGE)))
        
        cmap = pylab.cm.get_cmap('jet', 20)    # 10 discrete colors
        
        pylab.contourf(RANGE, RANGE, perf_xval, cmap=cmap)
        #im = pylab.imshow(perf_xval, cmap=cmap, interpolation='bilinear')
        pylab.axis('on')
        pylab.colorbar()
        
        pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , param:" + TARGET_PARAM +  ", split:" + str(SPLIT_POINTER))
        
        pylab.show()
    
    else:
        
        pylab.semilogx(RANGE, perf_regular, "g-o")
        pylab.semilogx(RANGE, perf_xval, "b-o")
        #pylab.semilogx([a*0.66 for a in RANGE], perf_xval, "b-o")
        
        #pylab.plot(numpy.array(perf_regular) - numpy.array(perf_xval), "y-o")
        
        #pylab.plot([best_idx_cost], [final_pred], "r+")
        pylab.axhline(y=final_pred, color="r")
        pylab.axvline(x=RANGE[best_idx_cost], color="r")
        pylab.axvline(x=1.0, color="g")
        
        pylab.ylabel(TARGET_MEASURE)
        pylab.xlabel(TARGET_PARAM)
        
        pylab.legend( ("outer", "inner xval"), loc="best")
        pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , degree:" + str(param.wdk_degree) +  ", split:" + str(SPLIT_POINTER))
        
        pylab.show()
Пример #20
0
def run_multi_example(dataset_idx, mymethod, comment):
    """
    sets up and runs experiment
    """

    #######################################
    # fix parameters
    #######################################

    flags = {}

    # general
    flags["normalize_cost"] = True  #False
    flags["epsilon"] = 0.03
    flags["cache_size"] = 500

    # Boosting
    #flags["boosting"] = "ones"
    #flags["boosting"] = "L1"
    #flags["boosting"] = "L2"
    flags["boosting"] = "L2_reg"
    #flags["use_all_nodes"] = False
    flags["signum"] = False
    #flags["all_positions"] = True

    # MKL
    #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON)
    #flags["normalize_trace"] = True
    #flags["interleaved"] = True
    #flags["mkl_q"] = 0

    #WDK_RBF
    flags["wdk_rbf_on"] = False

    # define parameter search space [float(numpy.power(10, 3.58))] #
    costs = [
        float(c) for c in numpy.exp(
            numpy.linspace(numpy.log(1000), numpy.log(100000), 8))
    ]
    #costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(float(numpy.power(10, 3))), numpy.log(10000), 4))]
    #costs =  [float(c) for c in numpy.exp(numpy.linspace(numpy.log(0.01), numpy.log(1000), 8))]
    #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(10), numpy.log(2000), 10))]
    costs.reverse()

    degrees = [1, 2, 3, 4, 5]  #[1, 5, 10, 15, 20, 22]
    #print "WARNING: Degree is ONE"

    base_similarities = [
        200
    ]  #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(1), numpy.log(1000), 8))]
    #base_similarities = [float(c) for c in numpy.linspace(1, 5000, 6)] #[1]
    #transform_params =  [float(c) for c in numpy.linspace(1, 10000, 6)] #[1] #1.5, 2.0, 2.5, 3.0] #, 3.5, 4.0, 4.5, 5.0]
    #transform_params = [float(c) for c in numpy.linspace(0.01, 0.99, 6)]
    transform_params = [0.99]

    generation_parameters = locals()

    #######################################
    # create experiment
    #######################################

    # select dataset
    multi_split_set = MultiSplitSet.get(dataset_idx)

    dataset_name = multi_split_set.description

    print "method:", mymethod
    print "dataset:", dataset_name
    print "multi split set id:", dataset_idx

    experiment_description = dataset_name + " (" + mymethod + ") " + comment

    # allow different features/kernel types
    feature_type = multi_split_set.feature_type

    if feature_type == "string":
        kernel_type = "WeightedDegreeStringKernel"
    else:
        kernel_type = "PolyKernel"

    kernel_type = "WeightedDegreeRBFKernel"

    # create experiment
    experiment = MultiSourceExperiment(split_set=multi_split_set,
                                       description=experiment_description,
                                       method_name=mymethod,
                                       meta_data=generation_parameters)

    print "experiment id:", experiment.id

    #######################################
    # create runs
    #######################################

    if multi_split_set.taxonomy == None:
        print "WARNING: NO taxonomy set, generating one for dataset " + dataset_name
        taxonomy = dataset_to_hierarchy(dataset_name)
    else:
        taxonomy = multi_split_set.taxonomy

    for cost in costs:
        for degree in degrees:
            for base in base_similarities:
                for transform in transform_params:

                    param = ParameterMultiSvm(cost=cost,
                                              wdk_degree=degree,
                                              base_similarity=base,
                                              transform=transform,
                                              taxonomy=taxonomy,
                                              kernel=kernel_type,
                                              flags=flags)

                    print param

                    Method(module_name=mymethod,
                           param=param,
                           experiment=experiment)

    # skip model selection if we only have one model
    if len(experiment.methods) > 1:

        # create evaluation runs based on splits and methods
        run_ids = [run.id for run in experiment.create_eval_runs()]

        # execute runs
        execute_runs(run_ids)

    # finally perform model selection and retrain
    select_best_and_test(experiment, target)
    #experiment.select_best_method(target)

    return experiment.id