def problem9(digit_set, lambda_coef):
    datapoints, digits = dr.read_data(dr.TR_DATA_FN)
    datapoints_x0 = transform_add_x0(datapoints)
    datapoints_2nd_order = second_order_transform(datapoints)

    test_datapoints, test_digits = dr.read_data(dr.TEST_DATA_FN)
    test_datapoints_x0 = transform_add_x0(test_datapoints)
    test_datapoints_2nd_order = second_order_transform(test_datapoints)

    print "# | err_in_x0 | err_out_x0 | err_in_2nd_order | err_out_2nd_order | overfitting  | <=0.95 | >= 1.05"

    for digit in digit_set:
        labels = dr.label_data_vs_all(digits, digit)
        #print labels[:4]

        w_x0 = rlr.linear_reg_w_weight_decay(datapoints_x0, labels, lambda_coef)
        err_in_x0 = rlr.estimate_err(w_x0, datapoints_x0, labels)

        w_2nd_order = rlr.linear_reg_w_weight_decay(datapoints_2nd_order, labels, lambda_coef)
        err_in_2nd_order = rlr.estimate_err(w_2nd_order, datapoints_2nd_order, labels)


        test_labels = dr.label_data_vs_all(test_digits, digit)
        err_out_x0 = rlr.estimate_err(w_x0, test_datapoints_x0, test_labels)
        err_out_2nd_order = rlr.estimate_err(w_2nd_order, test_datapoints_2nd_order, test_labels)
        print "| ".join(str(d) for d in (digit, err_in_x0, err_out_x0, err_in_2nd_order, err_out_2nd_order, (err_out_2nd_order - err_in_2nd_order) > (err_out_x0 - err_in_x0), err_out_2nd_order <= 0.95*err_out_x0, err_out_2nd_order >= 1.05*err_out_x0))
示例#2
0
def experiment_3(k): 
    val_data, tr_data = read_dta_25_10('./data/in.dta')
    transformed_tr_data, tr_labels = nonlinear_transformation_to_k(tr_data, k)
    #model training
    w = hw6_2LinRegRegularized.linear_reg(transformed_tr_data, tr_labels)
    #print w
    err_in  = hw6_2LinRegRegularized.estimate_err(w, transformed_tr_data, tr_labels)
    print "err_in=", err_in

    transformed_val_data, val_labels = nonlinear_transformation_to_k(val_data, k)
    err_out = hw6_2LinRegRegularized.estimate_err(w, transformed_val_data, val_labels)
    print "err_out=", err_out 
    return w, err_in, err_out 
def problem10(digit1, digit2, lambdas):
    datapoints, digits = dr.read_data(dr.TR_DATA_FN)
    labels, datapoints = dr.label_data_vs_one(digit1, digit2, digits, datapoints)
    datapoints_2nd_order = second_order_transform(datapoints)

    test_datapoints, test_digits = dr.read_data(dr.TEST_DATA_FN)
    test_labels, test_datapoints = dr.label_data_vs_one(digit1, digit2, test_digits, test_datapoints)
    test_datapoints_2nd_order = second_order_transform(test_datapoints)

    for lambda_coef in lambdas:
        w_2nd_order = rlr.linear_reg_w_weight_decay(datapoints_2nd_order, labels, lambda_coef)

        err_in_2nd_order = rlr.estimate_err(w_2nd_order, datapoints_2nd_order, labels)
        err_out_2nd_order = rlr.estimate_err(w_2nd_order, test_datapoints_2nd_order, test_labels)
        print lambda_coef, err_in_2nd_order, err_out_2nd_order,
def problem7(digit_set, lambda_coef):
    datapoints, digits = dr.read_data(dr.TR_DATA_FN)
    #print datapoints[1][1]
    print datapoints[:5], digits[:5]
    print datapoints.shape

    #print type(datapoints)
    datapoints = transform_add_x0(datapoints)
    #print datapoints
    print datapoints[:5]
    print datapoints.shape

    for digit in digit_set:
        labels = dr.label_data_vs_all(digits, digit)
        #print labels[:4]

        w = rlr.linear_reg_w_weight_decay(datapoints, labels, lambda_coef)
        err_in = rlr.estimate_err(w, datapoints, labels)
        print digit, err_in
def problem8(digit_set, lambda_coef):
    datapoints, digits = dr.read_data(dr.TR_DATA_FN)
    #print datapoints[1][1]
    print datapoints[:5], digits[:5]
    print datapoints.shape

    datapoints = second_order_transform(datapoints)
    #print type(datapoints)
    #print datapoints[:5]
    #print datapoints.shape

    test_datapoints, test_digits = dr.read_data(dr.TEST_DATA_FN)
    test_datapoints = second_order_transform(test_datapoints)

    for digit in digit_set:
        labels = dr.label_data_vs_all(digits, digit)
        #print labels[:4]

        w = rlr.linear_reg_w_weight_decay(datapoints, labels, lambda_coef)

        test_labels = dr.label_data_vs_all(test_digits, digit)
        err_out = rlr.estimate_err(w, test_datapoints, test_labels)
        print digit, err_out