def simulate_data(ss, es, N, rep, zero, G, alpha, fname): """ Simulates the data for the plot Args: ss: An array of sigma values to estimate the FI at. es: An array of epsilon values to estimate the FI at. N: Number of data points for each PDF. rep: Number of repetitions of the whole simulation. zero: What should npfi consider as zero G: G for DEFT alpha: alpha for DEFT fname: Name of the file where the simulation data will be stored. Returns: results: A dictionary with all simulated data, which was also stored to the file. """ # Results of the simulation will be stored here data = {} # Go over all sigma values in ss for i, s in enumerate(ss): true_fi = 2 / s ** 2 ess = [] # Store the epsilon values actually used dss = [] # Store the ds values we used FI_values_all = [] err_values_all = [] err_median, err_5, err_95 = [], [], [] for j, e in enumerate(es): ds = s / (e * np.sqrt(N)) # Choose ds according to desired epsilon # If ds >= s we have a problem of sampling with negative std if ds >= s: continue dss.append(ds) ess.append(e) # Estimate the FI for rep repetitions FI_values = [] for j in range(rep): sim_data = [normal(size=N, scale=s), normal(size=N, scale=s-ds), normal(size=N, scale=s+ds)] pdfs, bbox = get_pdfs_from_data(sim_data, method="deft", G=G, alpha=alpha, bbox="adjust") FI, a, b = npfi(pdfs, ds, bounds=bbox, logarithmic=False, zero=zero, N=N) FI_values.append(FI) # More convenient to use as numpy arrays FI_values = np.array(FI_values) # Compute statistics from the results err_values = (FI_values - true_fi) / true_fi FI_values_all.append(FI_values) err_values_all.append(err_values) err_median.append(np.median(err_values)) err_5.append(np.percentile(err_values, 5)) err_95.append(np.percentile(err_values, 95)) data[s] = dict(FI_values_all=FI_values_all, err_values_all=err_values_all, err_median=np.array(err_median), err_5=np.array(err_5), err_95=np.array(err_95), dss=dss, ess=ess) results = dict(data=data, N=N, rep=rep, ss=ss) f = gzip.open(fname, "wb") pickle.dump(results, f) f.close() return results
def simulate_data(s, Ns, dss, rep, zero, G, alpha, fname): """ Simulates the data for the plot Args: s: Sigma in which all computations are done. Ns: An array of N values where to compute the FI. dss: An array of ds to compute with. rep: Number of repetitions of the whole simulation. zero: What should npfi consider as zero G: G for DEFT alpha: alpha for DEFT fname: Name of the file where the simulation data will be stored. Returns: data: A dictionary with all simulated data, which was also stored to the file. """ # Results of the simulation will be stored here shape = (len(Ns), len(dss)) FIs = np.zeros(shape=shape) + np.nan # Computed FIs err = np.zeros(shape=shape) + np.nan # Computed absolute relative error true_fi = 2.0 / s ** 2 FI_values_all = [] # Go over all Ns and dss per N for i, N in enumerate(Ns): print("Starting %d from %d" % (i+1, len(Ns))) FI_row = [] for j, ds in enumerate(dss): # Estimate the FI for rep repetitions FI_values = [] for k in range(rep): sim_data = [normal(size=N, scale=s), normal(size=N, scale=s-ds), normal(size=N, scale=s+ds)] pdfs, bbox = get_pdfs_from_data(sim_data, method="deft", G=G, alpha=alpha, bbox="adjust") FI, a, b = npfi(pdfs, ds, bounds=bbox, logarithmic=False, zero=zero, N=N) FI_values.append(FI) # More convenient to use as numpy arrays FI_values = np.array(FI_values) err_values = np.abs(FI_values - true_fi) / true_fi FI_row.append(FI_values) # Save results in the appropriate matrix FIs[i, j] = np.median(FI_values) err[i, j] = np.median(err_values) FI_values_all.append(FI_row) data = dict(FIs=FIs, err=err,Ns=Ns, dss=dss, rep=rep) f = gzip.open(fname, "wb") pickle.dump(data, f) f.close() return data
def simulate_data(ss, N, rep, e, zero, G, alpha, fname): """ Simulates the data for the plot Args: ss: An array of sigma values to estimate the FI at. N: Number of data points for each PDF. rep: Number of repetitions of the whole simulation. e: The value of the epsilon parameter. zero: What should npfi consider as zero G: G for DEFT alpha: alpha for DEFT fname: Name of the file where the simulation data will be stored. Returns: data: A dictionary with all simulated data, which was also stored to the file. """ # All list containers we need to store the values we compute FI_deft_median, FI_deft_5, FI_deft_95 = [], [], [] FI_kde_median, FI_kde_5, FI_kde_95 = [], [], [] err_deft_median, err_deft_5, err_deft_95 = [], [], [] err_kde_median, err_kde_5, err_kde_95 = [], [], [] FI_deft_values_all, FI_kde_values_all = [], [] dss = [] # Go over all sigma values in ss for i, s in enumerate(ss): real_FI = 2 / s ** 2 ds = s / (e * np.sqrt(N)) # Choose ds according to desired epsilon # If ds >= s we have a problem of sampling with negative std while ds >= s: ds *= 0.9 dss.append(ds) # Estimate the FI for rep repetitions FI_deft_values, FI_kde_values = [], [] for j in range(rep): sim_data = [normal(size=N, scale=s), normal(size=N, scale=s-ds), normal(size=N, scale=s+ds)] pdfs_deft, bbox_deft = get_pdfs_from_data(sim_data, method="deft", G=G, alpha=alpha, bbox="adjust") pdfs_kde, bbox_kde = get_pdfs_from_data(sim_data, method="gaussian_kde") FI_deft, a, b = npfi(pdfs_deft, ds, bounds=bbox_deft, logarithmic=False, zero=zero, N=N) FI_kde, a, b = npfi(pdfs_kde, ds, bounds=bbox_kde, logarithmic=True, zero=zero, N=N) FI_deft_values.append(FI_deft) FI_kde_values.append(FI_kde) # More convenient to use as numpy arrays FI_deft_values = np.array(FI_deft_values) FI_kde_values = np.array(FI_kde_values) FI_deft_values_all.append(FI_deft_values) FI_kde_values_all.append(FI_kde_values) # Compute statistics from the values we obtained FI_deft_median.append(np.median(FI_deft_values)) FI_deft_5.append(np.percentile(FI_deft_values, 5)) FI_deft_95.append(np.percentile(FI_deft_values, 95)) FI_kde_median.append(np.median(FI_kde_values)) FI_kde_5.append(np.percentile(FI_kde_values, 5)) FI_kde_95.append(np.percentile(FI_kde_values, 95)) # Compute relative error statistics err_deft_values = (FI_deft_values - real_FI) / real_FI err_deft_median.append(np.median(err_deft_values)) err_deft_5.append(np.percentile(err_deft_values, 5)) err_deft_95.append(np.percentile(err_deft_values, 95)) err_kde_values = (FI_kde_values - real_FI) / real_FI err_kde_median.append(np.median(err_kde_values)) err_kde_5.append(np.percentile(err_kde_values, 5)) err_kde_95.append(np.percentile(err_kde_values, 95)) if __debug__: print("Finished %d from %d values" % (i+1, len(ss))) f = gzip.open(fname, "wb") data = dict(ss=ss, dss=dss, FI_deft_values_all=FI_deft_values_all, FI_kde_values_all=FI_kde_values_all, FI_deft_median=FI_deft_median, FI_kde_median=FI_kde_median, FI_deft_5=FI_deft_5, FI_deft_95=FI_deft_95, FI_kde_5=FI_kde_5, FI_kde_95=FI_kde_95, err_deft_median=err_deft_median, err_kde_median=err_kde_median, err_deft_5=err_deft_5, err_deft_95=err_deft_95, err_kde_5=err_kde_5, err_kde_95=err_kde_95) pickle.dump(data, f) f.close() return data