def test_f_regression_input_dtype(): """ Test whether f_regression returns the same value for any numeric data_type """ X = np.random.rand(10, 20) y = np.arange(10).astype(int) F1, pv1 = f_regression(X, y) F2, pv2 = f_regression(X, y.astype(float)) assert_array_equal(F1, F2) assert_array_equal(pv1, pv2)
def test_f_regression_input_dtype(): """ Test whether f_regression returns the same value for any numeric data_type """ X = np.random.rand(10, 20) y = np.arange(10).astype(np.int) F1, pv1 = f_regression(X, y) F2, pv2 = f_regression(X, y.astype(np.float)) assert_array_almost_equal(F1, F2, 5) assert_array_almost_equal(pv1, pv2, 5)
def test_f_regression(): """ Test whether the F test yields meaningful results on a simple simulated regression problem """ X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) F, pv = f_regression(X, Y) assert (F > 0).all() assert (pv > 0).all() assert (pv < 1).all() assert (pv[:5] < 0.05).all() assert (pv[5:] > 1.0e-4).all()
def test_f_regression(): """ Test whether the F test yields meaningful results on a simple simulated regression problem """ X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0) F, pv = f_regression(X, Y) assert(F > 0).all() assert(pv > 0).all() assert(pv < 1).all() assert(pv[:5] < 0.05).all() assert(pv[5:] > 1.e-4).all()
# Store data in a consistent place DATA_DIR = '/users/ansonau/DS_HK_2/data/' cars = read_csv(DATA_DIR + 'cars93.csv') #Clean Data #Get only Numeric Data from cars car_x = cars._get_numeric_data() car_x = car_x.fillna(car_x.mean()) #Drop y Values car_x = cars.drop(['MPG.highway', 'MPG.city'], 1) #car_x = cars[['EngineSize','Horsepower','RPM','Rev.per.mile','Fuel.tank.capacity','Passengers','Length','Wheelbase','Width','Turn.circle','Weight']] car_y = cars['MPG.highway'] f, p = f_regression(car_x, car_y.values) print[[x, y] for x, y in zip(f, p)] # Regression on Weight #Polynomial Regression cars['Weight_squared'] = cars['Weight']**2 cars['Weight_cubed'] = cars['Weight']**3 cars['Weigh_fourth'] = cars['Weight']**4 MPG = cars['MPG.city'] Weight_poly = [[ w, x, y, z ] for w, x, y, z in zip(cars['Weight'].values, cars['Weight_squared'].values, cars['Weight_cubed'], cars['Weigh_fourth'])] ridge = linear_model.Ridge()