def problem9(digit_set, lambda_coef): datapoints, digits = dr.read_data(dr.TR_DATA_FN) datapoints_x0 = transform_add_x0(datapoints) datapoints_2nd_order = second_order_transform(datapoints) test_datapoints, test_digits = dr.read_data(dr.TEST_DATA_FN) test_datapoints_x0 = transform_add_x0(test_datapoints) test_datapoints_2nd_order = second_order_transform(test_datapoints) print "# | err_in_x0 | err_out_x0 | err_in_2nd_order | err_out_2nd_order | overfitting | <=0.95 | >= 1.05" for digit in digit_set: labels = dr.label_data_vs_all(digits, digit) #print labels[:4] w_x0 = rlr.linear_reg_w_weight_decay(datapoints_x0, labels, lambda_coef) err_in_x0 = rlr.estimate_err(w_x0, datapoints_x0, labels) w_2nd_order = rlr.linear_reg_w_weight_decay(datapoints_2nd_order, labels, lambda_coef) err_in_2nd_order = rlr.estimate_err(w_2nd_order, datapoints_2nd_order, labels) test_labels = dr.label_data_vs_all(test_digits, digit) err_out_x0 = rlr.estimate_err(w_x0, test_datapoints_x0, test_labels) err_out_2nd_order = rlr.estimate_err(w_2nd_order, test_datapoints_2nd_order, test_labels) print "| ".join(str(d) for d in (digit, err_in_x0, err_out_x0, err_in_2nd_order, err_out_2nd_order, (err_out_2nd_order - err_in_2nd_order) > (err_out_x0 - err_in_x0), err_out_2nd_order <= 0.95*err_out_x0, err_out_2nd_order >= 1.05*err_out_x0))
def experiment_3(k): val_data, tr_data = read_dta_25_10('./data/in.dta') transformed_tr_data, tr_labels = nonlinear_transformation_to_k(tr_data, k) #model training w = hw6_2LinRegRegularized.linear_reg(transformed_tr_data, tr_labels) #print w err_in = hw6_2LinRegRegularized.estimate_err(w, transformed_tr_data, tr_labels) print "err_in=", err_in transformed_val_data, val_labels = nonlinear_transformation_to_k(val_data, k) err_out = hw6_2LinRegRegularized.estimate_err(w, transformed_val_data, val_labels) print "err_out=", err_out return w, err_in, err_out
def problem10(digit1, digit2, lambdas): datapoints, digits = dr.read_data(dr.TR_DATA_FN) labels, datapoints = dr.label_data_vs_one(digit1, digit2, digits, datapoints) datapoints_2nd_order = second_order_transform(datapoints) test_datapoints, test_digits = dr.read_data(dr.TEST_DATA_FN) test_labels, test_datapoints = dr.label_data_vs_one(digit1, digit2, test_digits, test_datapoints) test_datapoints_2nd_order = second_order_transform(test_datapoints) for lambda_coef in lambdas: w_2nd_order = rlr.linear_reg_w_weight_decay(datapoints_2nd_order, labels, lambda_coef) err_in_2nd_order = rlr.estimate_err(w_2nd_order, datapoints_2nd_order, labels) err_out_2nd_order = rlr.estimate_err(w_2nd_order, test_datapoints_2nd_order, test_labels) print lambda_coef, err_in_2nd_order, err_out_2nd_order,
def problem7(digit_set, lambda_coef): datapoints, digits = dr.read_data(dr.TR_DATA_FN) #print datapoints[1][1] print datapoints[:5], digits[:5] print datapoints.shape #print type(datapoints) datapoints = transform_add_x0(datapoints) #print datapoints print datapoints[:5] print datapoints.shape for digit in digit_set: labels = dr.label_data_vs_all(digits, digit) #print labels[:4] w = rlr.linear_reg_w_weight_decay(datapoints, labels, lambda_coef) err_in = rlr.estimate_err(w, datapoints, labels) print digit, err_in
def problem8(digit_set, lambda_coef): datapoints, digits = dr.read_data(dr.TR_DATA_FN) #print datapoints[1][1] print datapoints[:5], digits[:5] print datapoints.shape datapoints = second_order_transform(datapoints) #print type(datapoints) #print datapoints[:5] #print datapoints.shape test_datapoints, test_digits = dr.read_data(dr.TEST_DATA_FN) test_datapoints = second_order_transform(test_datapoints) for digit in digit_set: labels = dr.label_data_vs_all(digits, digit) #print labels[:4] w = rlr.linear_reg_w_weight_decay(datapoints, labels, lambda_coef) test_labels = dr.label_data_vs_all(test_digits, digit) err_out = rlr.estimate_err(w, test_datapoints, test_labels) print digit, err_out