def test_localreg_realistic(): x = np.array([ -6.89438, 7.94300378, 5.5221823, 9.77749217, -0.35979986, 2.01456739, 4.80691814, 3.22260756, -7.12156073, -8.69959441 ]) y = np.array([ -1.74962299, -8.55733072, 8.56537608, 1.79095858, 4.43380336, -14.63365203, 5.41264117, 9.69660297, -13.85424098, 0.42264531 ]) x0 = np.array([2., 3.]) # Testing all orders assert np.allclose(localreg(x, y, x0, degree=0, kernel=epanechnikov, width=1), [-14.63365203, 8.9780852], rtol=1e-3) assert np.allclose(localreg(x, y, x0, degree=1, kernel=epanechnikov, width=1), [-14.5487543, 5.21322664], rtol=1e-3) assert np.allclose(localreg(x, y, x0, degree=2, kernel=epanechnikov, width=1), [-14.4523815, 3.77134959], rtol=1e-3) # Testing width assert np.allclose(localreg(x, y, x0, degree=2, kernel=epanechnikov, width=2), [-14.80997735, 7.00785276], rtol=1e-3) # Testing frac assert np.allclose(localreg(x, y, x0, degree=2, kernel=epanechnikov, frac=0.5), [-6.21823369, 4.33953829], rtol=1e-3)
def test_localreg_narrow_kernel(caplog): x = np.array([0., 1., 2.]) y = np.array([0., 1., 2.]) x0 = np.array([0.5]) y0 = localreg(x, y, x0, degree=2, kernel=epanechnikov, width=0.4) assert np.isnan(y0)[0] assert (len(caplog.records) == 1)
def plot_nucleosome_occupancy_sample(fig_format ="png"): mkdir(figure_dir) input_fp = os.path.join(data_dir, "nucleosome_positioning", "GSM1194220_H1.bed") df = pd.read_csv(input_fp, sep="\t", header=None).values NROW = 4 NCOL = 4 fig, axs = plt.subplots(NROW, NROW, figsize=(NROW * EACH_SUB_FIG_SIZE, NROW * EACH_SUB_FIG_SIZE)) fig_fp = os.path.join(figure_dir, "nucleosome_occupancy_of_part_of_chr1.%s" % fig_format) for rid in range(NROW * NCOL): row = rid // NCOL col = rid % NCOL ax = axs[row][col] start = 20000 + rid * 2000 end = 20000 + (rid + 1) * 2000 loci = df[start:end, 1].astype(np.float) oc = df[start:end, 2].astype(np.float) try: y2 = localreg(loci, oc, degree=2, kernel=tricube, width=10) ax.scatter(loci, oc, s=8, color="blue") ax.plot(loci, y2, "k-", linewidth=2) except np.linalg.LinAlgError as e: sns.regplot(x=loci, y=oc, ax=ax, scatter_kws={'s': 8, 'color': "blue"}) ax.set_xlim(start, end) ax.set_ylim(0, 600) ax.set_title("nucleos occupancy of chr1: %d:%d" %(start, end) , fontsize=14) plt.savefig(fig_fp, dpi=300, bbox_inches='tight', pad_inches=0.1)
def plot_local_regression_and_RD(fig_format="png"): K_RD = [1] # , 0 N_COL = N_ROW = 2 cm = plt.get_cmap('gist_rainbow') mkdir(figure_dir) for km in K_RD: correlation_type = '' if km else '-methy' for rid, region_label in enumerate(REGION_LABELS): input_dir = os.path.join(data_dir, "Q1_CGI_density", region_label) REGIONS = ["Non-" + region_label, region_label] fig, axs = plt.subplots(N_ROW, N_COL, figsize=(N_COL * EACH_SUB_FIG_SIZE, N_ROW * EACH_SUB_FIG_SIZE)) fig_fp = os.path.join(figure_dir, "%s.%s" % (region_label, fig_format)) for iid, region in enumerate(REGIONS): for jid, cgi in enumerate(CGI_LABELS): correlation_fp = os.path.join( input_dir, region + "-" + cgi + correlation_type + "-Rd.bed") file_label = region + " " + cgi idx = iid * len(REGIONS) + jid row = iid col = jid ax = axs[row][col] RD_df = pd.read_csv(correlation_fp, sep="\t", header=None).values x = RD_df[:, 0] y = RD_df[:, 1] color = cm(1. * idx / (N_COL * N_ROW)) try: y2 = localreg(x, y, degree=2, kernel=tricube, width=100) ax.scatter(x, y, s=8, color=color, label=file_label) ax.plot(x, y2, "k-", linewidth=2) except np.linalg.LinAlgError as e: sns.regplot(x=x, y=y, ax=ax, scatter_kws={ 's': 8, 'color': color }) #, line_kws ={'color':'black', "lw": 2} ax.set_xticks(range(0, D_MAX + 1, 200)) ax.set_xlim(0, D_MAX) ax.set_ylim(0, 1.0) ax.set_title(file_label, fontsize=14) plt.savefig(fig_fp, dpi=300, bbox_inches='tight', pad_inches=0.1)
def plot_local_regression_and_RD_within_vs_across(fig_format="png"): K_RD = [1]# , 0 N_COL = 4 N_ROW = 2 cm = plt.get_cmap('gist_rainbow') mkdir(figure_dir) for km in K_RD: correlation_type = '' if km else '-methy' fig, axs = plt.subplots(N_ROW, N_COL, figsize=(N_COL * EACH_SUB_FIG_SIZE, N_ROW * EACH_SUB_FIG_SIZE)) fig_fp = os.path.join(figure_dir, "CGI_RATIO_Within-Across-5000bp.%s" % fig_format) for rid, ratio in enumerate(RATIOS): within_correlation_fp = os.path.join(data_dir, "CGI_identified_with_different_thereshold", "CGI_%s_K_intersected" % ratio + correlation_type +"-only-within-Rd.bed") across_correlation_fp = os.path.join(data_dir, "CGI_identified_with_different_thereshold", "CGI_%s_K_intersected" % ratio + correlation_type +"-both-within-across-Rd.bed") file_label = "Obs/Expect " + str(RATIO_LABELS[rid]) row = rid // N_COL col = rid % N_COL ax = axs[row][col] within_RD = pd.read_csv(within_correlation_fp, sep="\t", header=None).values across_RD = pd.read_csv(across_correlation_fp, sep="\t", header=None).values x1 = within_RD[:, 0] y1 = within_RD[:, 1] x2 = across_RD[:, 0] y2 = across_RD[:, 1] color = cm(1. * rid / len(RATIOS)) try: yy1 = localreg(x1, y1, degree=2, kernel=tricube, width=100) yy2 = localreg(x2, y2, degree=2, kernel=tricube, width=100) ax.scatter(x1, y1, s=3, color=color, label="within") ax.scatter(x2, y2, s=1.5, color="black", label="within") ax.plot(x2, yy2, "w-", linewidth=1.5) ax.plot(x1, yy1, "k-", linewidth=1.5) except np.linalg.LinAlgError as e: sns.regplot(x=x1, y=y1, ax=ax, scatter_kws={'s':8, 'color': color})#, line_kws ={'color':'black', "lw": 2} sns.regplot(x=x2, y=y2, ax=ax, scatter_kws={'s':8, 'color': "black"})#, line_kws ={'color':'black', "lw": 2} ax.set_xticks(range(0, D_MAX + 1, 1000)) ax.set_xlim(0, D_MAX) ax.set_ylim(0, 1.0) ax.set_title(file_label, fontsize=14) plt.savefig(fig_fp, dpi=300, bbox_inches='tight', pad_inches=0.1)
def plot_local_regression_and_RD(fig_format="png"): K_RD = [1]# , 0 N_COL = 1 N_ROW = 1 plt.rc('xtick', labelsize=12) # fontsize of the tick labels plt.rc('ytick', labelsize=12) # fontsize of the tick labels cm = plt.get_cmap('gist_rainbow') mkdir(figure_dir) for km in K_RD: correlation_type = '' if km else '-methy' fig, ax = plt.subplots(N_ROW, N_COL, figsize=((N_COL * EACH_SUB_FIG_SIZE), N_ROW * EACH_SUB_FIG_SIZE)) fig_fp = os.path.join(figure_dir, "CGI_RATIO_FROM-06-13-%d.%s" % (D_MAX, fig_format)) for rid, ratio in enumerate(RATIOS): correlation_fp = os.path.join(data_dir, "CGI_identified_with_different_thereshold", "CGI_%s_K_intersected" % ratio + correlation_type +"-only-within-Rd.bed") file_label = "Obs/Exp>" + str(RATIO_LABELS[rid]) # row = rid // N_COL # col = rid % N_COL # if N_ROW == 1: # ax = axs[0] # else: # ax = axs[row][0] RD_df = pd.read_csv(correlation_fp, sep="\t", header=None).values x = RD_df[:, 0] y = RD_df[:, 1] color = cm(1. * rid / len(RATIOS)) try: y2 = localreg(x, y, degree=2, kernel=tricube, width=100) ax.scatter(x, y, s=8, color=color, label=file_label) ax.plot(x, y2, color='black', linestyle = 'solid', linewidth=2) except np.linalg.LinAlgError as e: sns.regplot(x=x, y=y, ax=ax, scatter_kws={'s':8, 'color': color})#, line_kws ={'color':'black', "lw": 2} ax.set_xticks(range(0, D_MAX + 1, 200)) ax.set_xlim(0, D_MAX) ax.set_ylim(0, 1.0) ax.set_xlabel("Genomic Distance(bp)", fontsize=18, fontweight='bold') ax.set_ylabel("Pearson Correlation", fontsize=18, fontweight='bold') # box = ax.get_position() # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='best')#, bbox_to_anchor=(1, 0.5)) # ax.set_title(file_label, fontsize=14) plt.savefig(fig_fp, dpi=300)
def smooth(x, y, xgrid): """ Approximates the linear relationship between x and y by a random subsample :param (np.array) x: x-axis data points :param np.array y: y-axis data points :param np.array xgrid: interpolation points :return np.array: approximate/smoothed y-values """ samples = np.random.choice(len(x), len(x), replace=True) y_s = y[samples] x_s = x[samples] y_sm = localreg(x_s, y_s, x0=None, degree=1, kernel=triangular, width=19.08094) y_grid = scipy.interpolate.interp1d(x_s, y_sm, fill_value='extrapolate')(xgrid) return y_grid
def plot_whole_landscape(fig_format="png"): regions = ["Genomic_Regions", "Histone_Modification", "ChromHMM", "TFBS"] # RD_DIRNAME = "Whole_Landscape" cm = plt.get_cmap('gist_rainbow') FIG_DIR = os.path.join(BASE_DIR, "figures") NBIN = 30 vmins = [0, 120] vmaxs = [0.15, 220] N_COL = 18 COL_Labels = [ "Corr", "Corr with DNase", "Corr with Nuc Occ", "Hist of k", "Hist of methylation", "Hist2D of k/methy", "DNase Peak", "Nucleosome Occupancy", "H3k4me1", "H3k4me3", "H3k9me3", "H3k9ac", "H3k27ac", "H3k27me3", "H3k36me3", "H4k20me", "CTCF", "p300" ] COL_INDEXS = [ 0, 0, 0, 4, 3, 0, -2, -1, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] MP = 50 #Max Peak Value xlims = [ D_MAX, D_MAX, D_MAX, 10, 1., 1, 0.15, 220, MP, MP, MP, MP, MP, MP, MP, MP, MP, MP ] ylims = [ 1, 1, 1, 0.6, 16, 1, 70, 0.05, 0.25, 0.2, 0.14, 0.06, .1, .12, .25, .20, 0.05, .2 ] for REGION in regions: fig_dir = os.path.join(FIG_DIR, RD_DIRNAME) mkdir(fig_dir) out_rd_corr_dir = os.path.join("../data/K_region_intersect", REGION, "K_Rd") file_paths = [ os.path.join(out_rd_corr_dir, "%s.bed" % file_name) for file_name in FILE_ORDERED_NAMES[REGION] ] file_labels = FILE_LABELS[REGION] bed_fps = [ os.path.join("../data/K_region_intersect", REGION, "%s.bed" % file_name) for file_name in FILE_ORDERED_NAMES[REGION] ] N_ROW = len(file_labels) fig_fp = os.path.join(fig_dir, "%s.%s" % (REGION, fig_format)) fig, axs = plt.subplots(N_ROW, N_COL, figsize=(N_COL * EACH_SUB_FIG_SIZE, N_ROW * EACH_SUB_FIG_SIZE)) for row in range(N_ROW): RD_df = pd.read_csv(file_paths[row], sep="\t", header=None).values bed_df = pd.read_csv(bed_fps[row], sep="\t", header=None).values for col in range(N_COL): print("%s, %s" % (file_labels[row], COL_Labels[col])) ax = axs[row][col] xlim = xlims[col] ylim = ylims[col] if col <= 2: x = RD_df[:, 0] y = RD_df[:, 1] try: y2 = localreg(x, y, degree=2, kernel=tricube, width=100) if col != 0: z = RD_df[:, 1 + col] sc = ax.scatter(x, y, s=8, c=z, label=file_labels[row], cmap=cm, vmin=vmins[col - 1], vmax=vmaxs[col - 1]) fig.colorbar(sc, ax=ax) else: ax.scatter(x, y, s=2, color=cm(1. * row / N_ROW), label=file_labels[row]) ax.plot(x, y2, "k-", linewidth=2) except np.linalg.LinAlgError as e: sns.regplot( x=x, y=y, ax=ax, scatter_kws={ 's': 8, 'color': cm(1. * row / N_ROW) }) # , line_kws ={'color':'black', "lw": 2} ax.set_xticks(range(0, D_MAX + 1, 200)) ax.set_xlim(0, xlim) ax.set_ylim(0, ylim) ax.set_title(COL_Labels[col], fontsize=18) if col == 0: ax.set_ylabel(file_labels[row], fontsize=20) if row == N_ROW - 1: ax.set_xlabel("Genomic Distance(bp)") else: col_ind_in_bed = COL_INDEXS[col] if col_ind_in_bed != 0: vals = bed_df[:, col_ind_in_bed] vals = vals[vals != "."].astype(float) vals[vals > xlim] = xlim _ = ax.hist(vals, bins=NBIN, density=True, color=cm(1. * col / N_COL), edgecolor='black', alpha=0.5, linewidth=0.5) ax.set_xlim([0, xlim]) ax.set_ylim(0, ylim) ax.set_title(COL_Labels[col], fontsize=18) else: ks = bed_df[:, 4].astype(float) methys = bed_df[:, 3].astype(float) ks[ks <= 0] = 0.001 ks = np.log10(ks) h = ax.hist2d(methys, ks, bins=(NBIN, NBIN), density=True, vmin=0, vmax=5, cmap="viridis") fig.colorbar(h[3], ax=ax) if col == 0: ax.set_ylabel('log10(K)') if row == N_ROW - 1: ax.set_xlabel('Methylation level') ax.set_ylim(-1, 1) ax.set_xlim(0, 1) ax.set_xticks([0.2 * i for i in range(6)]) plt.savefig(fig_fp, dpi=300, bbox_inches='tight', pad_inches=0.1)
# p_upper[0] = 12 # p_init[0] = 9.11 # # p_lower[2] = 1 # # p_upper[2] = 2 # # p_init[2] = 1.5 # p_upper[3] = 3 # if np.abs(l-2e-3)<1e-4: # p_lower[1] = 0 # p_upper[1] = 1e-6 # p_init[1] = 0 # p_lower[3] = -1 if inspect or fit_of_fit: zeta_ = np.linspace(0, lambd, 5000) g_ = localreg(zeta, g, zeta_, kernel=gaussian, width=0.1, degree=2) bounded = "" model = partial(additive_model, lambd) model = lambda zeta, C, A, alpha, delta: additive_model(lambd, zeta, C, A, alpha, 0, 1, 1, delta) if fit_of_fit: zeta_fit = zeta_ g_fit = g_ else: zeta_fit = zeta g_fit = g zeta_bnd = 5 frac_bnd = 0.5 inds = np.where(np.logical_or(zeta_fit<zeta_bnd, zeta_fit>lambd-zeta_bnd))[0] n_bnd = len(inds)
def rdd_plot(data, sbins, bw, k, calc_points, dependant_var): """Plots smoothed local regression with bootstrapped CIs on both sides of "margin_1". :param (df) data: df that contains "margin_1" and parameter dependant_var :param (int) sbins: length of bin :param (int) bw: bandwidth for local regression :param (int) k: iterations of resampling by bootstrapping :param (int) calc_points: points where to calculate smoothed value :param (str) dependant_var: name of dependant variable in df :return: plot """ temp_df = bin_fct(data, sbins) avg_rank_impr = temp_df.groupby(temp_df["bin"]).mean()[dependant_var] x = range(-30, 30, sbins) vic_marg = x - np.mod(x, sbins) + sbins / 2 df_figure2 = pd.DataFrame([vic_marg, avg_rank_impr], index=["vic_marg", "rank_imp"]).transpose() df_neg = temp_df.loc[(temp_df["margin_1"] < 0)].sort_values( by=["margin_1"]) df_pos = temp_df.loc[(temp_df["margin_1"] > 0)].sort_values( by=["margin_1"]) y1 = np.asarray(df_neg[dependant_var]) y2 = np.asarray(df_pos[dependant_var]) x1 = np.asarray(df_neg["margin_1"]) x2 = np.asarray(df_pos["margin_1"]) x_sm1 = x1[0::calc_points] x_sm2 = x2[0::calc_points] reg_1 = localreg(x1, y1, x0=x_sm1, degree=1, kernel=triangular, width=bw) reg_2 = localreg(x2, y2, x0=x_sm2, degree=1, kernel=triangular, width=bw) xgrid1 = np.linspace(-30, 0, 50) xgrid2 = np.linspace(0, 30, 50) smooths1 = np.stack([smooth(x1, y1, xgrid1) for i in range(k)]).T smooths2 = np.stack([smooth(x2, y2, xgrid2) for i in range(k)]).T mean_neg = np.nanmean(smooths1, axis=1) stderr_neg = np.nanstd(smooths1, axis=1, ddof=0) mean_pos = np.nanmean(smooths2, axis=1) stderr_pos = np.nanstd(smooths2, axis=1, ddof=0) fig, (ax0) = plt.subplots(1, 1, figsize=(12, 8), tight_layout=True) plt.fill_between(xgrid1, mean_neg - 1.96 * stderr_neg, mean_neg + 1.96 * stderr_neg, alpha=0.25) plt.fill_between(xgrid2, mean_pos - 1.96 * stderr_pos, mean_pos + 1.96 * stderr_pos, alpha=0.25) plt.axvline(0, linewidth=0.4, color='r') ax0.grid(True) ax0.scatter(df_figure2["vic_marg"], df_figure2["rank_imp"]) ax0.plot(x_sm1, reg_1) ax0.plot(x_sm2, reg_2) plt.xlabel("% Margin of Victory") plt.ylabel("Average Rank Improvment") ax0.axis([-30, 30, -6, 6]) plt.show()
x = np.linspace(0, L, 5000) x0 = np.linspace(0, L, 500) yf = np.sin(x**2) y = yf + 0.5 * np.random.randn(*x.shape) plt.plot(x, yf, label='$\\sin(x^2)$') plt.plot(x, y, '+', markersize=0.2, color='black') N = 200 yarr = np.zeros((N, len(x0))) for n in range(N): ind = np.random.randint(0, len(x), len(x)) xb = x[ind] yb = y[ind] yarr[n, :] = localreg(xb, yb, x0, degree=2, kernel=tricube, width=0.4) # plt.plot(x0, yarr[n,:], linewidth=0.5, color='black') lower = np.percentile(yarr, 2.5, axis=0) upper = np.percentile(yarr, 97.5, axis=0) plt.fill_between(x0, lower, upper, color='gray', alpha=0.5) y0 = localreg(x, y, x0, degree=2, kernel=tricube, width=0.4) plt.plot(x0, y0, label='Local regression') # ym = np.average(yarr, axis=0) # plt.plot(x0, ym, '--', label='Bootstrapped local regression') plt.title('Locally Weighted Polynomial Regression') plt.xlabel('x') plt.xlabel('y')
print('linear dim:' + str(semi_data.linear_dim)) print('varying dim:' + str(semi_data.varying_dim)) I = 5 la_design = lae.la_design(semi_data.varying, I) varying_hat, linear_hat, index_design = lae.full_estimate(semi_data, I) print('constant coefficients:') print(linear_hat) """ further estimate via local polynomial regression """ target = varying_hat[:, 1] index = index_design a_hat = localreg(index, target, degree=1, kernel=epanechnikov, width=0.3) plt.style.use('ggplot') plt.plot(index, a_hat, label='Local average + Local linear') """example for varying coefficient model""" import numpy as np from numpy import pi, sin, cos, exp, sqrt, std from laepy import lae from localreg import * import matplotlib.pyplot as plt n, d, snr = 500, 2, 5 x = np.random.normal(0, 1, [n, d]) u = np.random.uniform(0, 1, n) coef = np.array([sin(60 * u), 4 * u * np.subtract(1, u)])
def plot_local_regression_and_RD_separately(max_d, fig_format="png"): regions = ["Genomic_Regions", "Histone_Modification", "ChromHMM", "TFBS"] # K_RD = [3] # 0, 1, 2 N_COL = 5 cm = plt.get_cmap('gist_rainbow') FIG_DIR = os.path.join(BASE_DIR, "figures") plt.rc('xtick', labelsize=12) # fontsize of the tick labels plt.rc('ytick', labelsize=12) # fontsize of the tick labels for REGION in regions: fig_dir = os.path.join(FIG_DIR, "Rd") mkdir(fig_dir) for km in K_RD: if km == 1: inter_name = "K_Rd" elif km == 2: inter_name = "f_Rd" elif km == 3: inter_name = "DNase_Rd" else: inter_name = "Methy_Rd" out_rd_corr_dir = os.path.join("../data/K_region_intersect", REGION, inter_name) file_paths = [ os.path.join(out_rd_corr_dir, "%s.bed" % file_name) for file_name in FILE_ORDERED_NAMES[REGION] ] file_labels = FILE_LABELS[REGION] N_FILES = len(file_labels) N_ROW = int(math.ceil((N_FILES) / N_COL)) fig_fp = os.path.join( fig_dir, "%s_%s.%s" % (REGION, inter_name, fig_format)) fig, axs = plt.subplots(N_ROW, N_COL, figsize=(N_COL * (EACH_SUB_FIG_SIZE - 1), N_ROW * (EACH_SUB_FIG_SIZE - 1))) for j in range(N_FILES): row = j // N_COL col = j % N_COL if N_ROW == 1: ax = axs[col] else: ax = axs[row][col] file_path = file_paths[j] RD_df = pd.read_csv(file_path, sep="\t", header=None).values x = RD_df[:, 0] y = RD_df[:, 1] try: y2 = localreg(x, y, degree=2, kernel=tricube, width=100) if file_labels[j] == "Genome": ax.scatter(x, y, s=8, label=file_labels[j], color="blue") ax.plot(x, y2, "w-", linewidth=2) else: ax.scatter(x, y, s=8, label=file_labels[j], color=cm(1. * j / N_FILES)) ax.plot(x, y2, "k-", linewidth=2) except np.linalg.LinAlgError as e: sns.regplot(x=x, y=y, ax=ax, scatter_kws={ 's': 8, 'color': cm(1. * j / N_FILES) }) #, line_kws ={'color':'black', "lw": 2} ax.set_xticks(range(0, max_d + 1, 200)) ax.set_xlim(0, max_d) ax.set_ylim(0, 1.0) ax.set_title(file_labels[j], fontsize=18) plt.savefig(fig_fp, dpi=300, bbox_inches='tight', pad_inches=0.1)
def plot_local_regression_and_RD(max_d, fig_format="png"): regions = [ "Genomic_Regions", "Histone_Modification", "ChromHMM", "TFBS" ] #"Genomic_Regions", "Histone_Modification", "ChromHMM", "TFBS" K_RD = [3] # , 0 N_COL = 5 cm = plt.get_cmap('gist_rainbow') FIG_DIR = os.path.join(BASE_DIR, "figures") fig_dir = os.path.join(FIG_DIR, "Rd") mkdir(fig_dir) for REGION in regions: fig_fp = os.path.join(fig_dir, "%s.%s" % (REGION, fig_format)) fig, axs = None, None for km in K_RD: if km == 1: inter_name = "K_Rd" color = "r" c = "red" label = "k" elif km == 2: inter_name = "f_Rd" color = "g" c = "green" label = "f" else: inter_name = "Methy_Rd" color = "b" c = "blue" label = "methy" out_rd_corr_dir = os.path.join("../data/K_region_intersect", REGION, inter_name) file_paths = [ os.path.join(out_rd_corr_dir, "%s.bed" % file_name) for file_name in FILE_ORDERED_NAMES[REGION] ] file_labels = FILE_LABELS[REGION] N_FILES = len(file_labels) N_ROW = int(math.ceil((N_FILES) / N_COL)) if not fig: fig, axs = plt.subplots( N_ROW, N_COL, figsize=(N_COL * EACH_SUB_FIG_SIZE, N_ROW * (EACH_SUB_FIG_SIZE - 1))) for j in range(N_FILES): row = j // N_COL col = j % N_COL if N_ROW == 1: ax = axs[col] else: ax = axs[row][col] file_path = file_paths[j] RD_df = pd.read_csv(file_path, sep="\t", header=None).values x = RD_df[:, 0] y = RD_df[:, 1] try: y2 = localreg(x, y, degree=2, kernel=tricube, width=100) sc = ax.scatter(x, y, s=8, label=label, color=c) ax.plot(x, y2, "w-", linewidth=2) except np.linalg.LinAlgError as e: sns.regplot(x=x, y=y, ax=ax, scatter_kws={ 's': 8, 'color': c }, line_kws={ 'color': 'white', "lw": 2 }) ax.set_xticks(range(0, max_d + 1, 200)) ax.set_xlim(0, max_d) ax.set_ylim(0, 1.0) ax.set_title(file_labels[j], fontsize=16) if km == 2 and j == 0: ax.legend() plt.savefig(fig_fp, dpi=300) #, bbox_inches='tight', pad_inches=0.1
import numpy as np import matplotlib.pyplot as plt from localreg import * np.random.seed(1234) x = np.linspace(1.5, 5, 2000) yf = np.sin(x * x) y = yf + 0.5 * np.random.randn(*x.shape) y0 = localreg(x, y, degree=0, kernel=rbf.tricube, width=0.3) y1 = localreg(x, y, degree=1, kernel=rbf.tricube, width=0.3) y2 = localreg(x, y, degree=2, kernel=rbf.tricube, width=0.3) plt.plot(x, y, '+', markersize=0.6, color='gray') plt.plot(x, yf, label='Ground truth ($\sin(x^2)$)') plt.plot(x, y0, label='Moving average') plt.plot(x, y1, label='Local linear regression') plt.plot(x, y2, label='Local quadratic regression') plt.legend() plt.show()