return np.sum(np.square(x0 - x1)) # Generate the training and validation sets a = 0 b = 1 N = 20 k = N/4 std = 0.05 x_train = np.linspace(a, b, N) + np.random.normal(0, std, N) y_train = f(x_train) + np.random.normal(0, std, N) x_valid = np.linspace(a, b, k) + np.random.normal(0, std, k) y_valid = f(x_valid) + np.random.normal(0, std, k) max_degree = 8 ws = [polyfit(x_train, y_train, n) for n in range(1, max_degree)] x = np.linspace(a-std,b+std,100) # Create anonymous functions for each polynomial fit y_fs = [lambda t, w=w: sum([w[i]*t**i for i in range(len(w))]) for w in ws] training_errors = [L2(y(x_train),y_train) for y in y_fs] valid_errors = [L2(y(x_valid),y_valid) for y in y_fs] # Plot the polynomial fit to the training data fig, ax = plot.subplots(frameon=True) ax.set_axis_bgcolor((1,1,1)) ax.grid(which='both',color='gray') for i in ax.spines:
# Remove rows whose average is NaN df = df[pd.notnull(df['average'])] # Compute the period of the oscillations in the data # Note that some data has been removed because it was missing a = df.average # http://stackoverflow.com/a/4625132 minima = np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True] minima_index = [i for i,j in enumerate(minima) if j] period = np.mean([j-i for i,j in zip(minima_index,minima_index[1:])]) # Compute the polynomial coefficients x = df['decimal_date'] y = df['average'] N = len(df.index) ws = [polyfit(x, y, n) for n in range(1,4)] y_fs = [lambda t, w=w: sum([w[i]*t**i for i in range(len(w))]) for w in ws] fig, ax = plot.subplots(frameon=True) ax.set_axis_bgcolor((1,1,1)) ax.grid(which='both',color='gray') for i in ax.spines: ax.spines[i].set_color('k') ax.set_title('Polynomial Fits to Average Monthly\nMeasurements of CO2 Concentration', fontsize=18) #ax.set_title('Mean Monthly\nMeasurements of CO2 Concentration', fontsize=18) ax.set_xlabel('Date') ax.set_ylabel('Average Measurement of CO2 (ppm)') plot.plot(df['decimal_date'], df['average'], label='Original Data') for i,y in enumerate(y_fs, 1): plot.plot(df['decimal_date'], y(df['decimal_date']), label='Degree=%d'%i)