Exemplo n.º 1
0
def reglogistic(InputFileName, inclusion_list = None):
    raw_data = load_data(InputFileName)
    
    all_normalised_data = append_classifications(raw_data)
    all_normalised_data = normalise(all_normalised_data)
    #all_normalised_data = all_normalised_data
    training_data = append_features(all_normalised_data)
    expander = FeatureExpander(training_data)
    
    if not inclusion_list:
        inclusion_list = []
        inclusion_list.append(0) # last sv
        inclusion_list.append(0) # last change in sv
        inclusion_list.append(0) # mean of prev 10 rows sv
        inclusion_list.append(0) # std dev of prev 10 rows sv
        inclusion_list.append(0) # last sp
        inclusion_list.append(1) # last change in sp
        inclusion_list.append(0) # mean of prev 10 rows sp
        inclusion_list.append(1) # std dev of prev 10 rows sp
    
    expanded = expander.expand_features(inclusion_list)
    
    write_to_file(expanded, fp_out)
    
    [expanded_CV1, expanded_CV2, expanded_test] = split_data_3_folds(expanded)
    
    results = []
    
    lamb_resolution = 5
    for lamb in [i/lamb_resolution for i in range(1,lamb_resolution)]:
        print lamb
        THETA_CV1 = unflatten_theta(regression(expanded_CV1, lamb))
        THETA_CV2 = unflatten_theta(regression(expanded_CV2, lamb))
        
        results.append((THETA_CV1, lamb, percentage_correct_classifications(THETA_CV1, expanded_CV2)))
        results.append((THETA_CV2, lamb, percentage_correct_classifications(THETA_CV2, expanded_CV1)))
        
    best_result = results[0]
    for result in results:
        if result[2] > best_result[2]:
            best_result = result
            
    final_score = percentage_correct_classifications(best_result[0], expanded_test)
    
    return (best_result, final_score)
Exemplo n.º 2
0
def reglinear(InputFileName, inclusion_list = None):
    raw_data = load_data(InputFileName)
    
    training_data = append_features(raw_data)
        
    expander = FeatureExpander(training_data)
    
    if not inclusion_list:
        inclusion_list = []
        inclusion_list.append(1) # last sv
        inclusion_list.append(1) # last change in sv
        inclusion_list.append(0) # mean of prev 10 rows sv
        inclusion_list.append(1) # std dev of prev 10 rows sv
        inclusion_list.append(0) # last sp
        inclusion_list.append(0) # last change in sp
        inclusion_list.append(2) # mean of prev 10 rows sp
        inclusion_list.append(0) # std dev of prev 10 rows sp
    
    expanded = expander.expand_features(inclusion_list)
    
    write_to_file(expanded, fp_out)
    
    [expanded_CV1, expanded_CV2, expanded_test] = split_data_3_folds(expanded)
    
    results = []
    
    lamb_resolution = 100
    for lamb in [i/lamb_resolution for i in range(1,lamb_resolution)]:
        print lamb
        THETA_CV1 = regression(expanded_CV1, lamb)
        THETA_CV2 = regression(expanded_CV2, lamb)
        
        results.append((THETA_CV1, lamb, calc_mse(expanded_CV2, THETA_CV1)))
        results.append((THETA_CV2, lamb, calc_mse(expanded_CV1, THETA_CV2)))
    
    best_result = results[0]
    for result in results:
        if result[2] < best_result[2]:
            best_result = result
    
    final_score = calc_mse(expanded_test, best_result[0])
    
    return (best_result, final_score)