splitting_sampler, success_params=(1, 1), B=B, fit_probability=gbm_fit_sk, fit_args={'n_estimators': 1000}) if __name__ == "__main__": import statsmodels.api as sm import matplotlib.pyplot as plt import pandas as pd U = np.linspace(0, 1, 101) plt.clf() for i in range(2000): df = simulate() csvfile = 'lasso_multi_CV_gbm.csv' outbase = csvfile[:-4] if df is not None: try: df = pd.concat([df, pd.read_csv(csvfile)]) except FileNotFoundError: pass df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: pivot_plot(df, outbase)
sigma=2, alpha=0.1, B=2000) R2 = [] for _ in range(100): X, y, truth = generate(**opts) R2.append((np.linalg.norm(y - X.dot(truth))**2, np.linalg.norm(y)**2)) R2 = np.array(R2) R2mean = 1 - np.mean(R2[:, 0]) / np.mean(R2[:, 1]) print('R2', R2mean) for i in range(5000): df = simulate(**opts) csvfile = __file__[:-3] + '.csv' outbase = csvfile[:-4] if df is not None: try: df = pd.concat([df, pd.read_csv(csvfile)]) except FileNotFoundError: pass df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: f = pivot_plot(df, outbase)[1] plt.close(f)
if __name__ == "__main__": import statsmodels.api as sm import matplotlib.pyplot as plt import pandas as pd U = np.linspace(0, 1, 101) plt.clf() for i in range(500): df = simulate() csvfile = 'lee_multi.csv' outbase = csvfile[:-4] if df is not None and i > 0: try: # concatenate to disk df = pd.concat([df, pd.read_csv(csvfile)]) except FileNotFoundError: pass df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: pivot_ax, length_ax = pivot_plot(df, outbase) # pivot_ax.plot(U, sm.distributions.ECDF(df['lee_pivot'][~np.isnan(df['lee_pivot'])])(U), 'g', label='Lee', linewidth=3) pivot_ax.figure.savefig(outbase + '.pdf') length_ax.scatter(df['naive_length'], df['lee_length']) length_ax.figure.savefig(outbase + '_lengths.pdf')