def replace_missing_with_kde_samples(data_frame, attribute): """ Replace missing values based on samples from KDE function :param data_frame: Pandas dataframe holding the attribute :type data_frame: pandas.DataFrame :param attribute: The attribute for which missing values should be replaced :type attribute: str """ minimum = data_frame[attribute].min() maximum = data_frame[attribute].max() values = np.array(data_frame[attribute].dropna()) kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit( values.reshape(-1, 1)) missing_values = data_frame.loc[ data_frame[attribute].isnull(), attribute] samples = [num for num in kde.sample(n_samples=len(data_frame[attribute].dropna())) if minimum <= num <= maximum] while len(samples) < 2*len(missing_values): samples.extend([num for num in kde.sample(n_samples=len(data_frame[attribute].dropna())) if minimum <= num <= maximum]) samples = [samples[i] for i in sorted(random.sample(xrange(len(samples)), len(missing_values)))] for index, value in enumerate(samples): missing_values[index] = samples[index] data_frame.update(pd.DataFrame(missing_values))
def kde_sampler_life(enc, X, y, batch_size, bandwidth=0.5, nn_subset_size=None): while True: log_time("get_z_enc") if nn_subset_size is None: imgs = X else: rand_idxs = np.random.randint(0, len(X), nn_subset_size) imgs = X[rand_idxs] z_enc = ld_gan.utils.model_handler.apply_model(enc, imgs, batch_size=500) log_time("get_z_enc") batch_idxs = np.random.randint(0, len(z_enc), batch_size) img_batch = imgs[batch_idxs] y_batch = y[batch_idxs] kde = KernelDensity(bandwidth=bandwidth).fit(z_enc) z_batch = kde.sample(batch_size) yield img_batch, y_batch, z_batch, z_batch
def find_max_density(point_list): point_list, _ = remove_nan(point_list) if point_list.shape[0] == 0: return [float('nan'),float('nan'),float('nan')] kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(point_list) points = kde.sample(100000) prob_list = kde.score_samples(points) max_point = points[np.argmax(prob_list)] # print "max", max_point return max_point
def train_rlos(data, show_chart=False): """Train LOS estimator""" """Train patient LOS for triplet (sex, age, sline)""" freq = {} for row in data: sex = int(row["sex"]) age = fp.split_age(int(row["age"])) sline = row["sline"] rlos = int(row["rlos"]) if rlos == 0: print "RLOS equals zero for sex %d, age %d, SL %s" % (sex, age, sline) tuple = (sex, age, sline) freq.setdefault(tuple, []) freq[tuple].append(rlos) result = {} for tuple, train_data in freq.items(): (sex, age, sline) = tuple if len(train_data) < training_threshold: print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \ (training_threshold, sex, age, sline) continue X = np.array([train_data]).transpose() kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X) kdef = lambda size: [round(l[0]) for l in kde.sample(size).tolist()] result[tuple] = kde if show_chart: # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline) # print_freq(ages) samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500) # print_freq(samples) # hist for train data plt.subplot(211) plt.title("RLOS train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline)) plt.ylabel('freq') plt.xlabel('RLOS') plt.hist(train_data) # estimated density plt.subplot(212) plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline)) plt.ylabel('freq') plt.xlabel('RLOS') plt.hist(samples) plt.show() return result
def get_numerical_signature(values, S): ''' Learns a distribution of the values Then generates a sample of size S ''' # Transform data to numpy array Xnumpy = np.asarray(values) X = Xnumpy.reshape(-1, 1) # Learn kernel kde = KernelDensity(kernel=C.kd["kernel"], bandwidth=C.kd["bandwidth"]).fit(X) sig_v = [kde.sample()[0][0] for x in range(S)] return sig_v
def train_admit_count(data, show_chart=False): """Train patient admittance number for triplet (sex, age, sline)""" freq = {} for row in data: sex = int(row["sex"]) age = fp.split_age(int(row["age"])) sline = row["sline"] admit = row["admit"] tuple = (sex, age, sline) freq.setdefault(tuple, {}) freq[tuple].setdefault(admit, 0) freq[tuple][admit] += 1 result = {} for tuple, days in freq.items(): (sex, age, sline) = tuple train_data = days.values() if len(train_data) < training_threshold: print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \ (training_threshold, sex, age, sline) continue X = np.array([train_data]).transpose() kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X) kdef = lambda size: [int(round(l[0])) for l in kde.sample(size).tolist()] result[tuple] = kde if show_chart: # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline) # print_freq(ages) samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500) # print_freq(samples) # hist for train data plt.subplot(211) plt.title("Admit count train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline)) plt.ylabel('freq') plt.xlabel('admittance count') plt.hist(train_data) # estimated density plt.subplot(212) plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline)) plt.ylabel('freq') plt.xlabel('admittance count') plt.hist(samples) plt.show() return result
class KDEModel(object): """ Wrapper class for Scikit Learn's Kernel Density Estimation model. Attributes ---------- model : KernelDensity Wrapped class model. """ def __init__(self, kernel='gaussian', bandwidth=.001): self.model = KernelDensity(kernel='gaussian', bandwidth=bandwidth) def fit(self, train_X): """ Wrapper method for fit() method of Kernel Density model. Parameters ---------- train_X : {array-like, sparse matrix}, shape = [n_samples, n_features] """ self.model.fit(train_X) def generate_samples(self, n_samples): """ Generates the random samples according to the fitted distribution. Returns ------- list List of numpy arrays of randomly generated observations. """ points = self.model.sample(n_samples) return points def score_samples(self, X): """ Predicts the log likelihood score of the samples in X. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] """ return self.model.score_samples(X)
def get_kde_pdf(X, bandwidth=2, step=.1, num_samples=200, optimize=False): """ return kde and pdf from a data sample """ if len(X) == 0: return [], np.array([]), [] if optimize: bandwidths = 10**np.linspace(-1, 1, 10) grid = GridSearchCV(KernelDensity(kernel='gaussian'), {'bandwidth': bandwidths}, cv=LeaveOneOut(len(X))) grid.fit(X[:, None]) kde = KernelDensity(kernel='gaussian', bandwidth=grid.best_params_['bandwidth']).fit( X[:, None]) else: kde = KernelDensity(kernel='gaussian', bandwidth=2).fit(X[:, None]) pdf = np.exp(kde.score_samples(np.arange(0, 100, step)[:, None])) samples = kde.sample(num_samples) return kde, np.array(pdf), samples
def get_sampled_feature(dict_ids, article_ids, ndim=100, type_features='second', features=['intervals'], bw=1): ''' Each article has 'intervals' (second) between two adjacent retweets. ndim : feature dimension. Thus, the number of samplings ''' X = np.zeros((len(article_ids), ndim * len(features))) for i, id_ in enumerate(article_ids): article_id = id_ stats = dict_ids[id_] for find, feature in enumerate(features): raw_data = change_resolution(stats[feature], type_features) kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(raw_data.reshape(-1, 1)) X[i, find * ndim:(find + 1) * ndim] = np.sort(kde.sample(ndim)[:, 0]) return X
def resample_state(D, w): w_norm = np.sum(w) # Normalization factor for weights w_ecdf = np.cumsum(w) / w_norm # New weight given the new measurement # Resample the points D_new, ind = np.empty_like(D), np.empty_like(D) for i, q in enumerate(D): ind[i] = bisect.bisect_left(w_ecdf, np.random.uniform( 0, 1)) # Indexes for new samples D_new[i] = D[int( ind[i] )] # New weighted particles (samples) from previous step given new measuremnt # Regularize it! # std = np.std(D_new) bandwidth = 0.05 #1.06*std*len(D_new)**-0.2 ## used to be 0.08 kde = KernelDensity( bandwidth=bandwidth, kernel='gaussian', algorithm='ball_tree' ) # Bandwidth = 0.006 is calculated based on Silverman's Rule of Thumb kde.fit(D_new[:, np.newaxis]) return kde.sample(num_particles).flatten(), ind
class DensityEstimator: def __init__(self, training_set, method_name, n_components=None, log_dir=None, second_stage_beta=None): self.log_dir = log_dir self.training_set = training_set self.fitting_done = False self.method_name = method_name self.second_density_mdl = None self.skip_fitting_and_sampling = False if method_name == "GMM_Dirichlet": self.model = mixture.BayesianGaussianMixture( n_components=n_components, covariance_type='full', weight_concentration_prior=1.0 / n_components) elif method_name == "GMM": self.model = mixture.GaussianMixture(n_components=n_components, covariance_type='full', max_iter=2000, verbose=2, tol=1e-3) elif method_name == "GMM_1": self.model = mixture.GaussianMixture(n_components=1, covariance_type='full', max_iter=2000, verbose=2, tol=1e-3) elif method_name == "GMM_10": self.model = mixture.GaussianMixture(n_components=10, covariance_type='full', max_iter=2000, verbose=2, tol=1e-3) elif method_name == "GMM_20": self.model = mixture.GaussianMixture(n_components=20, covariance_type='full', max_iter=2000, verbose=2, tol=1e-3) elif method_name == "GMM_100": self.model = mixture.GaussianMixture(n_components=100, covariance_type='full', max_iter=2000, verbose=2, tol=1e-3) elif method_name == "GMM_200": self.model = mixture.GaussianMixture(n_components=200, covariance_type='full', max_iter=2000, verbose=2, tol=1e-3) elif method_name.find("aux_vae") >= 0: have_2nd_density_est = False if method_name[8:] != "": self.second_density_mdl = method_name[8:] have_2nd_density_est = True self.model = VaeModelWrapper( input_shape=(training_set.shape[-1], ), latent_space_dim=training_set.shape[-1], have_2nd_density_est=have_2nd_density_est, log_dir=self.log_dir, sec_stg_beta=second_stage_beta) elif method_name == "given_zs": files = os.listdir(log_dir) for z_smpls in files: if z_smpls.endswith('.npy'): break self.z_smps = np.load(os.path.join(log_dir, z_smpls)) self.skip_fitting_and_sampling = True elif method_name.upper() == "KDE": self.model = KernelDensity(kernel='gaussian', bandwidth=0.425) # self.model = KernelDensity(kernel='tophat', bandwidth=15) else: raise NotImplementedError("Method specified : " + str(method_name) + " doesn't have an implementation yet.") def fitorload(self, file_name=None): if not self.skip_fitting_and_sampling: if file_name is None: self.model.fit(self.training_set, self.second_density_mdl) else: self.model.load(file_name) self.fitting_done = True def score(self, X, y=None): if self.method_name.upper().find( "AUX_VAE") >= 0 or self.skip_fitting_and_sampling: raise NotImplementedError( "Log likelihood evaluation for VAE is difficult. or skipped") else: return self.model.score(X, y) def save(self, file_name): if not self.skip_fitting_and_sampling: if self.method_name.find('vae') >= 0: self.model.save(file_name) else: with open(file_name, 'wb') as f: pickle.dump(self.model, f) def reconstruct(self, input_batch): if self.method_name.upper().find("AUX_VAE") < 0: raise ValueError("Non autoencoder style density estimator: " + self.method_name) return self.model.reconstruct(input_batch) def get_samples(self, n_samples): if not self.skip_fitting_and_sampling: if not self.fitting_done: self.fitorload() scrmb_idx = np.array(range(n_samples)) np.random.shuffle(scrmb_idx) if self.log_dir is not None: pickle_path = os.path.join(self.log_dir, self.method_name + '_mdl.pkl') with open(pickle_path, 'wb') as f: pickle.dump(self.model, f) if self.method_name.upper() == "GMM_DIRICHLET" or self.method_name.upper() == "AUX_VAE" \ or self.method_name.upper() == "GMM" or self.method_name.upper() == "GMM_1" \ or self.method_name.upper() == "GMM_10" or self.method_name.upper() == "GMM_20" \ or self.method_name.upper() == "GMM_100" or self.method_name.upper() == "GMM_200"\ or self.method_name.upper().find("AUX_VAE") >= 0: return self.model.sample(n_samples)[0][scrmb_idx, :] else: return np.random.shuffle( self.model.sample(n_samples))[scrmb_idx, :] else: return self.z_smps
def estimate(): kde = KernelDensity(kernel = "gaussian", bandwidth = 0.1).fit(X) #get random sample samples = kde.sample()
# In[34]: pca1 = PCA(n_components=n1, whiten=True) dt = pca1.fit_transform(digits.data) # print(dt.shape) # In[35]: kde_model = KernelDensity(kernel='gaussian', bandwidth=bandwidth) kde_model.fit(dt) # In[38]: d1new = kde_model.sample(n_samples=100, random_state=0) digits1k_new = pca1.inverse_transform(d1new) plot_digits(digits1k_new) # In[40]: n_comps = np.arange(50, 210, 10) clf_gauss_models = [ GaussianMixture(n_components=n, covariance_type='full', random_state=0) for n in n_comps ] aics = [model.fit(dt).aic(dt) for model in clf_gauss_models] lbd = aics.index(min(aics)) print("Optimal Number of Components for GMM =", n_comps[lbd]) # In[44]:
def kernel_smoother(all_data, bandwidth, sample_size): X = np.array(all_data) kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(X) return kde.sample(sample_size)
def train_age(data, show_chart=False): """Train age estimator for each SL""" def print_freq(data): freq = {} length = float(len(data)) for x in data: xcat = fp.split_age(x) freq.setdefault(xcat, 0) freq[xcat] += 1 for x in sorted(freq.keys()): print "%d: %.2f" % (x, round(freq[x]/length, 2)), print sline_ages = {} bad_sl = set() for row in data: sline = row['sline'] age = int(row["age"]) if age <= 0: bad_sl.add(sline) continue sline_ages.setdefault(sline, []) sline_ages[sline].append(age) for sl in bad_sl: print "SL=%s has age values equal or less than zero. Values were ignored" % sl for sline, ages in sline_ages.items(): if len(ages) < alert_count: print "SL=%s has less(%d) than %d samples and will be excluded" % (sline, len(ages), alert_count) del sline_ages[sline] result = {} for sline,ages in sline_ages.items(): X = np.array([ages]).transpose() kde = KernelDensity(kernel='tophat', bandwidth=1.0).fit(X) kdef = lambda size: [round(l[0]) for l in kde.sample(size).tolist()] result[sline] = kdef if show_chart: print "SL=%s" % sline print_freq(ages) samples = kdef(len(ages)) if len(ages) < 500 else kdef(500) print_freq(samples) # hist for train data plt.subplot(211) plt.title("Age train data for SL=%s" %(sline)) plt.ylabel('freq') plt.xlabel('age category') plt.hist(ages) # estimated density plt.subplot(212) plt.title("Estimated density %s" % sline) plt.ylabel('freq') plt.xlabel('age category') plt.hist(samples) plt.show() return result
# In[25]: get_ipython().run_line_magic('pinfo', 'kde.sample') # Basically, that means we can use this model to predict what the next output of the 3 arms (constituting the Gaussian problem) will be. # # Let see this with one example. # In[26]: np.random.seed(1) one_sample = kde.sample() one_sample # In[27]: one_draw = M.draw_each() one_draw # Of course, the next random rewards from the arms have no reason to be close to predicted ones... # # But maybe we can use the prediction to choose the arm with highest sample? # And hopefully this will be the best arm, *at least in average*!
class CSGM(torch.nn.Module): def __init__(self, target, filter, G, num_samples, BS=64, init_threshold=1e-2, threshold=0.05, bandwidth=0.1, lr=1e-2): super(CSGM, self).__init__() self.target = torch.FloatTensor(target).cuda() self.A = torch.FloatTensor(filter).cuda() self.num_samples = num_samples self.G = G self.n_pixels = np.sum(filter) self.threshold = threshold self.init_threshold = init_threshold self.BS = BS # determine the points for KDE self.z, self.init_samples, self.init_bg = reconstruct_batch( target, filter, self.n_pixels, G, num_samples, threshold=init_threshold, lr=lr) self.Dz = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit( self.z.reshape(num_samples, 100)) def update_sampler(self, bandwidth): self.Dz = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit( self.z.reshape(-1, 100)) def sample(self, num_samples): count = 0 z_samples = [] gen_samples = [] bg_samples = [] while count < num_samples: Z = self.Dz.sample(self.BS) Z = torch.FloatTensor(Z).cuda().view(-1, 100, 1, 1) gen = self.G(Z).view(Z.shape[0], 28, 28) yhat = gen * self.A error = i_se(yhat, self.target.unsqueeze(0).repeat(Z.shape[0], 1, 1)) / self.n_pixels Z = Z[error <= self.threshold] gen = gen[error <= self.threshold] end = min(Z.shape[0], num_samples - count) bg = gen[:end] * (1 - self.A) z_samples.append(Z[:end].data.cpu().numpy()) gen_samples.append(gen[:end].data.cpu().numpy()) bg_samples.append(bg.data.cpu().numpy()) count += end z_samples = np.concatenate(z_samples, axis=0) gen_samples = np.concatenate(gen_samples, axis=0) bg_samples = np.concatenate(bg_samples, axis=0) return z_samples, gen_samples, bg_samples
class SkewExploreKDE(SkewExploreBase): """ Class for state density estimation using sklearn kernel density estimator, goal proposing and result plotting. The goal proposing distribution is computed using Skew-fit algorithm (https://arxiv.org/abs/1903.03698) with a sklearn kernel density estimation. :param env: the environment, used to access environment properties such as state range, pass proposed goals and pass the on-line mean and standard diviation for state normalization :param args: additional arguments for configuring result plotting """ def __init__(self, env, args): super().__init__(env, args) self.density_estimator = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) self.density_estimator_raw = KernelDensity(kernel='gaussian', bandwidth=0.1) self.sample_prob = [] ## for coverage computation self.obs_in_use = None self.num_points_estimator = 50000 #70000 def fit_model(self): """ fit the kernel density model """ self.count += 1 logging.info('Activate buffer') self.init_buffer = True selected_index = np.random.randint(len(self.obs_hist), size=self.num_points_estimator) self.obs_in_use = self.obs_hist[selected_index] # only yumi environments need to normalize the observation states on-line if self.args.use_auto_scale: if self.args.env == 'yumi' or self.args.env == 'yumi_box_pick' or self.args.env == 'yumi_door_button': if self.count % 2 == 0: self.obs_mean = self.obs_rms.mean[ 0] #np.mean(self.obs_in_use, axis=0) self.obs_std = np.sqrt( self.obs_rms.var[0] ) + 1e-8 #np.std(self.obs_in_use, axis=0) + 0.000000001 self.obs_nomalized = (self.obs_in_use - self.obs_mean) / self.obs_std self.density_estimator.fit(self.obs_nomalized) # scale the observation for entropy computation self.obs_scaled = (self.obs_in_use - self.entropy_shift) / self.entropy_scale self.density_estimator_raw.fit(self.obs_scaled) if self.plot_density: if self.args.env == 'yumi': self.xy_estimator.fit(self.obs_nomalized[:, 0:2]) self.doorangle_estimator.fit(self.obs_nomalized[:, -1:]) elif self.args.env == 'yumi_box_pick' or self.args.env == 'yumi_door_button': self.xy_estimator.fit(self.obs_nomalized[:, 0:2]) self.doorangle_estimator.fit(self.obs_nomalized[:, -2:]) def get_samples_and_density(self, sample_num): """ Sample states from the density model and compute the sample density """ samples = self.density_estimator.sample(self.skew_sample_num) samples_density = np.exp(self.density_estimator.score_samples(samples)) return samples, samples_density def get_log_density(self, obs_test): """ Compute log density """ log_density = self.density_estimator.score_samples(obs_test) return log_density def get_density(self, obs_test): """ Compute density """ density = np.exp(self.density_estimator.score_samples(obs_test)) return density
class Texture3D: def __init__(self, size): self.rocks = [] self.size = size self.data = [] ''' add a rock to the texture if the rock doesn't any rock in self.rocks returns bool ''' def add(self, rock): if not self.intersect(rock): self.rocks.append(rock) self.data.append(rock.data()) return True return False ''' Create the tree for the kernelDensity ''' def learn(self): self.kde = KernelDensity(kernel='gaussian', bandwidth=0.04).fit(self.data) ''' samples rocks from kde one by one making sure there are no intersection returns a Texture ''' def sample(self, n_rocks=None): length = n_rocks if length == None: length = len(self.data) mtexture = Texture3D(self.size) i = 0 while i < length: new_rock = rock.dataToRock3D( self.kde.sample(1, random_state=None)[0]) if mtexture.add(new_rock): i = i + 1 return mtexture ''' compute the distance between two points return float ''' def __distance(self, center1, center2): center1 = np.array(center1) center2 = np.array(center2) sub = center1 - center2 sub = sub**2 return math.sqrt(np.sum(sub)) ''' returns a string defining the texture. (for saving) format: size c1,c2,c3#rad1,rad2,rad3#col1,col2,col3#rot1,rot2,rot3 c1,c2,c3#rad1,rad2,rad3#col1,col2,col3#rot1,rot2,rot3 ''' def toString(self): result = str(self.size) + '\n' for rock in self.rocks: center = '' + str(rock.center[0]) + ',' + str( rock.center[1]) + ',' + str(rock.center[2]) radius = '' + str(rock.radius[0]) + ',' + str( rock.radius[1]) + ',' + str(rock.radius[2]) color = '' + str(rock.color[0]) + ',' + str( rock.color[1]) + ',' + str(rock.color[2] + ',' + str(rock.color[3])) rotation = '' + str(rock.rotation[0]) + ',' + str( rock.rotation[1]) + ',' + str(rock.rotation[2]) result = result + center + '#' + radius + '#' + color + '#' + rotation + '\n' return result ''' Test if two rocks intersect. returns bool ''' def __intersect(self, rock1, rock2): return self.__distance( rock1.center, rock2.center) < max(rock1.radius) + max(rock2.radius) ''' Test if a rock intersects another rock in self.rocks ''' def intersect(self, rock): for r in self.rocks: if self.__intersect(rock, r): return True return False
import numpy as np import matplotlib.pyplot as plt from sklearn.neighbors.kde import KernelDensity #load data xtrain = np.genfromtxt('../../contest_data/train.csv', delimiter=',')[1:, 1:-1] ytrain = np.genfromtxt('../../contest_data/train.csv', delimiter=',')[1:, -1] ytrain = np.asmatrix(ytrain).T xtrain_linear_imputed = np.genfromtxt( '../../contest_data/xtrain_linear_imputed.csv', delimiter=',') #imputing by sampling from class conditioned density estimate #class conditional density estimate of column 1 for k in range(500): finite = np.isfinite(xtrain[:, k]) nans = np.isnan(xtrain[:, k]) y = np.array(ytrain[finite].T) X = xtrain[finite, k][:, np.newaxis] print k for i in range(29): #X_plot=np.linspace(0,1,1000)[:,np.newaxis] ind = y == float(i) kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(X[ind[0]]) nans_i = np.isnan(xtrain[:, k]) * np.array((ytrain == float(i)).T) xtrain[nans_i[0], k] = np.array(kde.sample(sum(nans_i[0]), random_state=0).T) log_dens = kde.score_samples(X_plot) #dens=np.exp(log_dens) #plt.plot(X_plot,dens) #plt.show()
class SkewExploreBase(): """ Class for state density estimation, goal proposing and result plotting. The goal proposing distribution is computed using Skew-fit algorithm (https://arxiv.org/abs/1903.03698) with a sklearn kernel density estimation. :param env: the environment, used to access environment properties such as state range, pass proposed goals and pass the on-line mean and standard diviation for state normalization :param args: additional arguments for configuring result plotting """ def __init__(self, env, args): self.density_estimator_raw = None #KernelDensity(kernel='gaussian', bandwidth=0.1) self.density_estimator = None self.args = args self.env = env self.obs_rms = None #RunningMeanStd(shape=env.observation_space) self.skew_sample_num = 10000 #25000 self.skew_alpha = args.skew_alpha #-2.5 #-2.3 #-2.1 self.goal_sampling_num = 100 self.init_buffer = False self.obs_hist = None self.obs_next_hist = None self.dones = None self.obs_in_use = None self.obs_new = None self.plot_coverage = False self.plot_density = False self.plot_overall_coverage = False self.plot_entropy = False self.count = 0 self.coverages = [] self.entropy = [] self.task_reward = [] self.obs_mean = None self.obs_std = None self.plot_coverage = self.args.plot_coverage self.plot_density = self.args.plot_density self.plot_overall_coverage = self.args.plot_overall_coverage self.plot_entropy = self.args.plot_entropy # for coverage plotting if self.args.env == 'maze': self.bandwidth = 0.1 self.init_maze_plotting_params() sigma = 0.1 elif self.args.env == 'yumi': # self.bandwidth = 0.003 self.bandwidth = 0.1 self.init_door_plotting_params() sigma = 0.1 elif self.args.env == 'yumi_box_pick' or self.args.env == 'yumi_door_button': self.bandwidth = 0.11 self.init_boxpick_plotting_params() sigma = 0.005 self.beta = 1 / (sigma**2 * 2) def init_maze_plotting_params(self): """ Initialize parameters to evaluate and plot results of point maze environment """ xbins = 50j ybins = 50j x_start = -6 x_end = 6 y_start = -12 y_end = 4 self.xx, self.yy = np.mgrid[x_start:x_end:xbins, y_start:y_end:ybins] self.eval_sample = np.vstack([self.yy.ravel(), self.xx.ravel()]).T self.eval_sample_min_dist = np.ones(len(self.eval_sample)) self.skewed_estimator = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) self.entropy_shift = np.array([y_start, x_start]) self.entropy_scale = np.array([(y_end - y_start, x_end - x_start)]) def init_door_plotting_params(self): """ Initialize parameters to evaluate and plot results of yumi door opening environment """ xbins, ybins, zbins, gbins, dbins = 10j, 10j, 10j, 2j, 10j self.x_start, self.y_start, self.z_start = self.env.xyz_start self.x_end, self.y_end, self.z_end = self.env.xyz_end self.g_start, self.g_end = self.env.gripper_start, self.env.gripper_end self.d_start, self.d_end = self.env.door_start, self.env.door_end # for xy and door angle plotting self.mesh_xx, self.mesh_yy = np.mgrid[self.x_start:self.x_end:xbins, self.y_start:self.y_end:ybins] self.dd = np.mgrid[self.d_start:self.d_end:dbins] self.xy_eval_sample = np.vstack( [self.mesh_xx.ravel(), self.mesh_yy.ravel()]).T self.door_eval_sample = np.vstack([self.dd.ravel()]).T self.door_eval_sample_min_dist = np.ones(len(self.door_eval_sample)) # for coverage plotting self.xx, self.yy, self.zz, self.gg, self.dd = np.mgrid[ self.x_start:self.x_end:xbins, self.y_start:self.y_end:ybins, self.z_start:self.z_end:zbins, self.g_start:self.g_end:gbins, self.d_start:self.d_end:dbins] self.eval_sample = np.vstack([ self.xx.ravel(), self.yy.ravel(), self.zz.ravel(), self.gg.ravel(), self.dd.ravel() ]).T self.eval_sample_min_dist = np.ones(len(self.eval_sample)) self.xy_estimator = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) self.doorangle_estimator = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) self.skewed_estimator = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) self.entropy_shift = np.array([ self.x_start, self.y_start, self.z_start, self.g_start, self.d_start ]) self.entropy_scale = np.array([ self.x_start - self.x_end, self.y_start - self.y_end, self.z_start - self.z_end, self.g_start - self.g_end, self.d_start - self.d_end ]) def init_boxpick_plotting_params(self): """ Initialize parameters to evaluate and plot results of yumi door button environment """ xbins, ybins, zbins, gbins, dlbins, drbins = 1j, 1j, 1j, 1j, 10j, 5j self.x_start, self.y_start, self.z_start = self.env.xyz_start self.x_end, self.y_end, self.z_end = self.env.xyz_end self.g_start, self.g_end = self.env.gripper_start, self.env.gripper_end self.dl_start, self.dl_end = self.env.door_l_start, self.env.door_l_end self.dr_start, self.dr_end = self.env.door_r_start, self.env.door_r_end # for xy and door angle plotting self.mesh_xx, self.mesh_yy = np.mgrid[self.x_start:self.x_end:xbins, self.y_start:self.y_end:ybins] self.xy_eval_sample = np.vstack( [self.mesh_xx.ravel(), self.mesh_yy.ravel()]).T self.mesh_ld, self.mesh_rd = np.mgrid[self.dl_start:self.dl_end:dlbins, self.dr_start:self.dr_end:drbins] self.door_eval_sample = np.vstack( [self.mesh_ld.ravel(), self.mesh_rd.ravel()]).T self.door_eval_sample_min_dist = np.ones(len(self.door_eval_sample)) # for coverage plotting self.xx, self.yy, self.zz, self.gg, self.dl, self.dr = np.mgrid[ self.x_start:self.x_end:xbins, self.y_start:self.y_end:ybins, self.z_start:self.z_end:zbins, self.g_start:self.g_end:gbins, self.dl_start:self.dl_end:dlbins, self.dr_start:self.dr_end:drbins] self.eval_sample = np.vstack([ self.xx.ravel(), self.yy.ravel(), self.zz.ravel(), self.gg.ravel(), self.dl.ravel(), self.dr.ravel() ]).T self.eval_sample_min_dist = np.ones(len(self.eval_sample)) self.xy_estimator = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) self.doorangle_estimator = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) self.skewed_estimator = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) self.entropy_shift = np.array([ self.x_start, self.y_start, self.z_start, self.g_start, self.dl_start, self.dr_start ]) self.entropy_scale = np.array([ self.x_start - self.x_end, self.y_start - self.y_end, self.z_start - self.z_end, self.g_start - self.g_end, self.dl_start - self.dl_end, self.dr_start - self.dr_end ]) def plot_maze_metrics(self): """ Plot intermediate result for point maze environment """ fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, figsize=(5, 20)) if self.plot_entropy or self.plot_density: eval_sample_log_density = self.get_log_density(self.eval_sample) eval_sample_density = np.exp(eval_sample_log_density) if self.plot_density: zz_density = np.reshape(eval_sample_density, self.xx.shape) im = ax1.pcolormesh(self.yy, self.xx, zz_density) if self.plot_entropy: entropy = self.compute_entropy(eval_sample_density, eval_sample_log_density) self.entropy.append(entropy) ax3.plot(self.entropy) # np.save(self.args.save_path + '/entropy.npy', self.entropy) if self.plot_coverage: z_coverage = self.get_coverage() zz_coverage = np.reshape(z_coverage, self.xx.shape) im = ax2.pcolormesh(self.yy, self.xx, zz_coverage, vmin=0, vmax=1) # if the use_extrinsic_reward flag is true, it will only plot the curve of task_reward if self.args.use_extrinsic_reward: ax4.plot(self.task_reward) elif self.plot_overall_coverage: self.coverages.append(z_coverage.mean()) ax4.plot(self.coverages) # np.save(self.args.save_path + '/coverage.npy', self.coverages) sample_goal = self.sample_goal(200) ax5.scatter(sample_goal[:, 0], sample_goal[:, 1], s=10, color='red') ax5.set_xlim([-12, 4]) ax5.set_ylim([-6, 6]) if self.plot_density or self.plot_coverage or self.plot_overall_coverage or self.plot_entropy: plt.savefig(self.args.save_path + '/coverage_' + str(self.count) + '.svg') plt.close() def plot_door_metrics(self): """ Plot intermediate result for door opening environment """ fig, ax = plt.subplots(3, 2, figsize=(10, 15)) if self.plot_density: xy_eval_sample_norm = (self.xy_eval_sample - self.obs_mean[:2]) / self.obs_std[:2] xy_sample_density = np.exp( self.xy_estimator.score_samples(xy_eval_sample_norm)) xy_density = np.reshape(xy_sample_density, self.mesh_xx.shape) door_eval_sample_norm = (self.door_eval_sample - self.obs_mean[-1]) / self.obs_std[-1] door_sample_density = np.exp( self.doorangle_estimator.score_samples(door_eval_sample_norm)) im = ax[0][0].pcolormesh(self.mesh_xx, self.mesh_yy, xy_density) im = ax[0][1].scatter(self.door_eval_sample, door_sample_density) ax[0][0].set_xlim([self.x_start - 0.05, self.x_end + 0.05]) ax[0][0].set_ylim([self.y_start - 0.05, self.y_end + 0.05]) ax[0][1].set_xlim([self.d_start - 0.05, self.d_end + 0.05]) ax[0][1].set_ylim([-0.05, 1]) if self.plot_coverage: door_sample_coverage = self.get_door_coverage( self.door_eval_sample, -1) ax[2][1].scatter(self.door_eval_sample, door_sample_coverage) # if the use_extrinsic_reward flag is true, it will only plot the curve of task_reward if self.args.use_extrinsic_reward: ax[2][0].plot(self.task_reward) elif self.plot_overall_coverage: eval_sample_coverage = self.get_coverage() self.coverages.append(eval_sample_coverage.mean()) ax[2][0].plot(self.coverages) if self.plot_entropy: eval_sample_scaled = (self.eval_sample - self.entropy_shift) / self.entropy_scale eval_sample_log_density = self.density_estimator_raw.score_samples( eval_sample_scaled) #self.get_log_density(eval_sample_norm) eval_sample_density = np.exp(eval_sample_log_density) entropy = self.compute_entropy(eval_sample_density, eval_sample_log_density) self.entropy.append(entropy) ax[0][0].plot(self.entropy) np.save(self.args.save_path + '/entropy', np.array(self.entropy)) sample_goal = self.sample_goal(200) ax[1][0].scatter(sample_goal[:, 0], sample_goal[:, 1], s=10, color='red') ax[1][0].set_xlim([self.x_start - 0.05, self.x_end + 0.05]) ax[1][0].set_ylim([self.y_start - 0.05, self.y_end + 0.05]) ax[1][1].scatter(sample_goal[:, -1], np.ones(len(sample_goal)), s=1, color='red') ax[1][1].set_xlim([self.d_start - 0.05, self.d_end + 0.05]) if self.plot_density or self.plot_coverage or self.plot_overall_coverage or self.plot_entropy: plt.savefig(self.args.save_path + '/coverage_' + str(self.count) + '.svg') plt.close() def plot_boxpick_metrics(self): """ Plot intermediate result for door button environment """ fig, ax = plt.subplots(3, 2, figsize=(15, 15)) if self.plot_density: xy_eval_sample_norm = (self.xy_eval_sample - self.obs_mean[:2]) / self.obs_std[:2] xy_sample_density = np.exp( self.xy_estimator.score_samples(xy_eval_sample_norm)) xy_density = np.reshape(xy_sample_density, self.mesh_xx.shape) door_eval_sample_norm = (self.door_eval_sample - self.obs_mean[-2:]) / self.obs_std[-2:] door_sample_density = np.exp( self.doorangle_estimator.score_samples(door_eval_sample_norm)) door_density = np.reshape(door_sample_density, self.mesh_ld.shape) im = ax[0][0].pcolormesh(self.mesh_xx, self.mesh_yy, xy_density) im = ax[0][1].pcolormesh(self.mesh_ld, self.mesh_rd, door_density) ax[0][0].set_xlim([self.x_start - 0.05, self.x_end + 0.05]) ax[0][0].set_ylim([self.y_start - 0.05, self.y_end + 0.05]) ax[0][1].set_xlim([self.dl_start, self.dl_end]) ax[0][1].set_ylim([self.dr_start, self.dr_end]) # if the use_extrinsic_reward flag is true, it will only plot the curve of task_reward if self.args.use_extrinsic_reward: ax[2][0].plot(self.task_reward) elif self.plot_overall_coverage: eval_sample_coverage = self.get_coverage() self.coverages.append(eval_sample_coverage.mean()) ax[2][0].plot(self.coverages) if self.plot_coverage: door_sample_coverage = self.get_door_coverage( self.door_eval_sample, -2) door_coverage = np.reshape(door_sample_coverage, self.mesh_ld.shape) ax[2][1].pcolormesh(self.mesh_ld, self.mesh_rd, door_coverage, vmin=0, vmax=1) if self.plot_entropy: eval_sample_norm = (self.eval_sample - self.obs_mean) / self.obs_std eval_sample_log_density = self.get_log_density(eval_sample_norm) eval_sample_density = np.exp(eval_sample_log_density) entropy = self.compute_entropy(eval_sample_density, eval_sample_log_density) self.entropy.append(entropy) np.save(self.args.save_path + '/entropy', np.array(self.entropy)) sample_goal = self.sample_goal(200) ax[1][0].scatter(sample_goal[:, 0], sample_goal[:, 1], s=10, color='red') ax[1][0].set_xlim([self.x_start - 0.05, self.x_end + 0.05]) ax[1][0].set_ylim([self.y_start - 0.05, self.y_end + 0.05]) ax[1][1].scatter(sample_goal[:, -2], sample_goal[:, -1], s=1, color='red') ax[1][1].set_xlim([self.dl_start, self.dl_end]) ax[1][1].set_ylim([self.dr_start, self.dr_end]) if self.plot_density or self.plot_coverage or self.plot_overall_coverage or self.plot_entropy: plt.savefig(self.args.save_path + '/coverage_' + str(self.count) + '.svg') plt.close() def activate_buffer(self): """ Update the history buffer, update the state density estimation model update the goal proposing distribution model """ start_time = time.time() if self.obs_hist is None: self.obs_hist = self.obs_new # self.obs_next_hist = obs_next # self.done_hist = dones else: self.obs_hist = np.concatenate((self.obs_hist, self.obs_new), axis=0) self.fit_model() fitmodel_time = time.time() logging.info("fit model time cost: %f" % (fitmodel_time - start_time)) self.train_skew_generator() fitskew_time = time.time() logging.info("fit skew-model time cost: %f" % (fitskew_time - start_time)) # update goal samples in the environment and update the obs mean and std if self.args.use_auto_scale: self.env.update_reward_scale(self.obs_mean, self.obs_std) sampled_goal = self.sample_goal(self.goal_sampling_num) self.env.set_goals(sampled_goal) self.env.set_density_estimator(self.density_estimator) # compute task_reward if self.args.use_extrinsic_reward: # dones = self.dones.astype(int) task_reward = self.env.get_extrinsic_reward( self.obs_new) # * dones self.task_reward.append(task_reward.mean()) # plotting if self.plot_density or self.plot_coverage or self.plot_overall_coverage or self.plot_entropy: if self.args.env == 'maze': self.plot_maze_metrics() elif self.args.env == 'yumi': self.plot_door_metrics() elif self.args.env == 'yumi_box_pick' or self.args.env == 'yumi_door_button': self.plot_boxpick_metrics() self.obs_new = None self.dones = None finish_time = time.time() logging.info('time cost: %f' % (finish_time - start_time)) np.save(self.args.save_path + '/entropy', np.array(self.entropy)) np.save(self.args.save_path + '/coverage', np.array(self.coverages)) np.save(self.args.save_path + '/task_reward', np.array(self.task_reward)) logging.info('end of activate buffer') def train_skew_generator(self): """ Update the goal proposing distribution using the Skew-fit algorithm (https://arxiv.org/abs/1903.03698) """ # NOTE: The skewed samples are sampled from density estimator self.skew_samples, skew_samples_density = self.get_samples_and_density( self.skew_sample_num) # self.skew_samples = self.density_estimator.sample(self.skew_sample_num) # skew_samples_density = np.exp(self.density_estimator.score_samples(self.skew_samples)) skew_unnormalized_weights = skew_samples_density * skew_samples_density**self.skew_alpha skew_zeta_alpha = np.sum(skew_unnormalized_weights) self.skew_weights = skew_unnormalized_weights / skew_zeta_alpha self.skewed_estimator.fit(self.skew_samples, sample_weight=self.skew_weights) def sample_goal(self, goal_num): """ Sample goal states from the goal proposing distribution """ sampled_data = self.skewed_estimator.sample(goal_num) sampled_data = sampled_data * self.obs_std + self.obs_mean return sampled_data #sampled_data[goal_index] def get_samples_and_density(self, sample_num): raise NotImplementedError() def fit_model(self): raise NotImplementedError() def get_pvisited(self, obs_test): raise NotImplementedError() def get_log_density(self, obs_test): raise NotImplementedError() def get_coverage(self): """ Compute the current coverage of the states used for evaluation """ p_coverage = np.zeros(len(self.eval_sample)) for i in range(len(self.eval_sample)): obs = self.eval_sample[i] obs_diff = self.obs_new - obs diff_norm = LA.norm(obs_diff, axis=1) min_dist = diff_norm.min() current_min_dist = self.eval_sample_min_dist[i] new_min_dist = np.minimum(current_min_dist, min_dist) self.eval_sample_min_dist[i] = new_min_dist pv = np.exp(-new_min_dist * new_min_dist * self.beta) p_coverage[i] = 1 - pv return p_coverage def get_door_coverage(self, door_eval_sample, index): """ Compute the current coverage of the door states used for evaluation """ p_coverage = np.zeros(len(door_eval_sample)) for i in range(len(door_eval_sample)): obs = door_eval_sample[i] obs_diff = self.obs_new[:, index:] - obs diff_norm = LA.norm(obs_diff, axis=1) min_dist = diff_norm.min() current_min_dist = self.door_eval_sample_min_dist[i] new_min_dist = np.minimum(current_min_dist, min_dist) self.door_eval_sample_min_dist[i] = new_min_dist pv = np.exp(-new_min_dist * new_min_dist * self.beta) p_coverage[i] = pv return p_coverage def compute_entropy(self, density, log_density): """ Compute the entropy """ d_mul_logd = density * log_density entropy = -np.sum(d_mul_logd) return entropy def get_preach(self, obs_from): raise NotImplementedError() def get_preal(self, obs_test): raise NotImplementedError() def compute_reward(self, obs_test, use_sampling=False): raise NotImplementedError() def update_history(self, obs, dones): """ Save the new states in the self.obs_new buffer. the self.obs_new buffer will be merged to the self.obs buffer in activate_buffer() function. """ if self.args.use_index: obs = obs[:, :-1] if self.obs_mean is None: self.obs_mean = np.zeros_like(obs)[0] self.obs_std = np.ones_like(obs)[0] self.obs_rms = RunningMeanStd(shape=obs.shape) if self.obs_new is None: self.obs_new = obs # self.obs_next_hist = obs_next self.dones = dones else: self.obs_new = np.concatenate((self.obs_new, obs), axis=0) # self.obs_next_hist = np.concatenate((self.obs_next_hist, obs_next), axis=0) self.dones = np.concatenate((self.dones, dones), axis=0) self.obs_rms.update(obs)
class GAE(): def __init__(self, img_shape=(48, 96, 96, 1), encoded_dim=8, optimizer=SGD(0.001, momentum=.9), optimizer_discriminator=SGD(0.0001, momentum=.9), optimizer_autoencoder=Adam(0.0001)): self.encoded_dim = encoded_dim self.optimizer = optimizer self.optimizer_discriminator = optimizer_discriminator self.optimizer_autoencoder = optimizer_autoencoder self.img_shape = img_shape self.initializer = RandomNormal(mean=0., stddev=1.) self._initAndCompileFullModel(img_shape, encoded_dim) def _genEncoderModel(self, img_shape, encoded_dim): """ Build Encoder Model Based on Paper Configuration Args: img_shape (tuple) : shape of input image encoded_dim (int) : number of latent variables Return: A sequential keras model """ encoder = Sequential() encoder.add( keras.layers.Conv3D(input_shape=img_shape, filters=16, kernel_size=3, strides=(1, ) * 3, padding="SAME", activation='relu')) encoder.add(keras.layers.Dropout(0.2)) encoder.add( keras.layers.Conv3D(filters=16, kernel_size=3, strides=(2, ) * 3, padding="SAME", activation='relu')) #encoder.add(keras.layers.MaxPool3D(pool_size=(2,)*3, padding="SAME")) encoder.add( keras.layers.Conv3D(filters=32, kernel_size=3, strides=(1, ) * 3, padding="SAME", activation='relu')) encoder.add(keras.layers.Dropout(0.2)) encoder.add( keras.layers.Conv3D(filters=32, kernel_size=3, strides=(2, ) * 3, padding="SAME", activation='relu')) #encoder.add(keras.layers.MaxPool3D(pool_size=(2,)*3, padding="SAME")) encoder.add( keras.layers.Conv3D(filters=64, kernel_size=3, strides=(1, ) * 3, padding="SAME", activation='relu')) encoder.add(keras.layers.Dropout(0.2)) encoder.add( keras.layers.Conv3D(filters=64, kernel_size=3, strides=(2, ) * 3, padding="SAME", activation='relu')) #encoder.add(keras.layers.MaxPool3D(pool_size=(2,)*3, padding="SAME")) encoder.add(keras.layers.GlobalAvgPool3D()) encoder.add(keras.layers.Flatten()) encoder.add(Dense(encoded_dim)) encoder.summary() return encoder def _getDecoderModel(self, encoded_dim, img_shape): """ Build Decoder Model Based on Paper Configuration Args: encoded_dim (int) : number of latent variables img_shape (tuple) : shape of target images Return: A sequential keras model """ decoder = Sequential() decoder.add(Dense(128, activation='relu', input_dim=encoded_dim)) decoder.add(Reshape((128, 1))) decoder.add( keras.layers.Conv1D(filters=108, kernel_size=3, strides=1, padding="SAME", activation='relu')) decoder.add(Reshape([3, 6, 6, 128])) decoder.add( Conv3DTranspose(filters=64, kernel_size=3, strides=(2, ) * 3, padding="SAME", activation='relu')) decoder.add( Conv3DTranspose(filters=32, kernel_size=3, strides=(2, ) * 3, padding="SAME", activation='relu')) decoder.add( Conv3DTranspose(filters=16, kernel_size=3, strides=(2, ) * 3, padding="SAME", activation='relu')) decoder.add( Conv3DTranspose(filters=1, kernel_size=3, strides=(2, ) * 3, padding="SAME", activation='relu')) #decoder.add(Dense(1000, activation='relu')) #decoder.add(Dense(np.prod(img_shape), activation='sigmoid')) decoder.summary() return decoder def _getDescriminator(self, img_shape): """ Build Descriminator Model Based on Paper Configuration Args: encoded_dim (int) : number of latent variables Return: A sequential keras model """ discriminator = Sequential() discriminator.add( keras.layers.Conv3D(input_shape=img_shape, filters=16, kernel_size=3, strides=(1, ) * 3, padding="SAME", activation='relu')) discriminator.add( keras.layers.MaxPool3D(pool_size=(2, ) * 3, padding="SAME")) discriminator.add( keras.layers.Conv3D(input_shape=img_shape, filters=32, kernel_size=3, strides=(1, ) * 3, padding="SAME", activation='relu')) discriminator.add( keras.layers.MaxPool3D(pool_size=(2, ) * 3, padding="SAME")) discriminator.add( keras.layers.Conv3D(input_shape=img_shape, filters=64, kernel_size=3, strides=(1, ) * 3, padding="SAME", activation='relu')) discriminator.add( keras.layers.MaxPool3D(pool_size=(2, ) * 3, padding="SAME")) discriminator.add( keras.layers.Conv3D(input_shape=img_shape, filters=128, kernel_size=3, strides=(1, ) * 3, padding="SAME", activation='relu')) discriminator.add( keras.layers.MaxPool3D(pool_size=(2, ) * 3, padding="SAME")) discriminator.add(keras.layers.GlobalAvgPool3D()) discriminator.add(keras.layers.Flatten()) discriminator.add(Dense(32, activation="relu")) discriminator.add(Dense(1, activation="sigmoid")) discriminator.summary() return discriminator def _initAndCompileFullModel(self, img_shape, encoded_dim): self.encoder = self._genEncoderModel(img_shape, encoded_dim) self.decoder = self._getDecoderModel(encoded_dim, img_shape) self.discriminator = self._getDescriminator(img_shape) img = Input(shape=img_shape) encoded_repr = self.encoder(img) gen_img = self.decoder(encoded_repr) self.autoencoder = Model(img, gen_img) self.autoencoder.compile(optimizer=self.optimizer_autoencoder, loss='mse') self.discriminator.compile(optimizer=self.optimizer_discriminator, loss='binary_crossentropy', metrics=['accuracy']) for layer in self.discriminator.layers: layer.trainable = False is_real = self.discriminator(gen_img) self.autoencoder_discriminator = Model(img, is_real) self.autoencoder_discriminator.compile(optimizer=self.optimizer, loss='binary_crossentropy', metrics=['accuracy']) def imagegrid(self, epochnumber): fig = plt.figure(figsize=[20, 20]) for i in range(-5, 5): for j in range(-5, 5): topred = np.array((i * 0.5, j * 0.5)) topred = topred.reshape((1, 2)) img = self.decoder.predict(topred) img = img.reshape(self.img_shape) ax = fig.add_subplot(10, 10, (i + 5) * 10 + j + 5 + 1) ax.set_axis_off() ax.imshow(img, cmap="gray") fig.savefig(str(epochnumber) + ".png") plt.show() plt.close(fig) def train(self, x_train, batch_size=4, epochs=5): self.autoencoder.fit(x_train, x_train, epochs=1) for epoch in range(epochs): #---------------Train Discriminator ------------- # Select a random half batch of images idx = np.random.randint(0, x_train.shape[0], batch_size) imgs_real = x_train[idx] idx = np.random.randint(0, x_train.shape[0], batch_size) imgs_real2 = x_train[idx] # Generate a half batch of new images #gen_imgs = self.decoder.predict(latent_fake) imgs_fake = self.autoencoder.predict(imgs_real2) valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs_real, valid) d_loss_fake = self.discriminator.train_on_batch(imgs_fake, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) #d_loss = (0,0) idx = np.random.randint(0, x_train.shape[0], batch_size) imgs_real = x_train[idx] # Generator wants the discriminator to label the generated representations as valid valid_y = np.ones((batch_size, 1)) # Train generator g_logg_similarity = self.autoencoder_discriminator.train_on_batch( imgs_real, valid_y) # Plot the progress print( "%d [D loss = %.2f, accuracy: %.2f] [G loss = %f, accuracy: %.2f]" % (epoch, d_loss[0], d_loss[1], g_logg_similarity[0], g_logg_similarity[1])) # if(epoch % save_interval == 0): # self.imagegrid(epoch) codes = self.encoder.predict(x_train) # params = {'bandwidth': [3.16]}#np.logspace(0, 2, 5)} # grid = GridSearchCV(KernelDensity(), params, n_jobs=4) # grid.fit(codes) # print grid.best_params_ # self.kde = grid.best_estimator_ self.kde = KernelDensity(kernel='gaussian', bandwidth=3.16).fit(codes) def generate(self, n=10000): codes = self.kde.sample(n) images = self.decoder.predict(codes) return images def autoEncode(self, image): codes = self.encoder(image) gen_image = self.decoder(codes) return gen_image def generateAndPlot(self, x_train, n=10, fileName="generated.png"): fig = plt.figure(figsize=[20, 20]) images = self.generate(n * n) index = 1 for image in images: image = image.reshape(self.img_shape) ax = fig.add_subplot(n, n + 1, index) index = index + 1 ax.set_axis_off() ax.imshow(image, cmap="gray") if ((index) % (n + 1) == 0): nearest = helpers.findNearest(x_train, image) ax = fig.add_subplot(n, n + 1, index) index = index + 1 ax.imshow(nearest, cmap="gray") fig.savefig(fileName) plt.show() def meanLogLikelihood(self, x_test): KernelDensity(kernel='gaussian', bandwidth=0.2).fit(codes)
class Texture: def __init__(self): self.rocks = [] self.data = [] ''' add a rock to the texture if the rock doesn't any rock in self.rocks returns bool ''' def add(self, rock): if not self.intersect(rock): self.rocks.append(rock) self.data.append(rock.data()) return True return False ''' Create the tree for the kernelDensity ''' def learn(self): self.kde = KernelDensity(kernel='gaussian', bandwidth=0.02).fit(self.data) ''' samples rocks from kde one by one making sure there are no intersection returns a Texture ''' def sample(self, n_rocks=None): length = n_rocks if length == None: length = len(self.data) mtexture = Texture() i = 0 while i < length: new_rock = rock.dataToRock( self.kde.sample(1, random_state=None)[0]) if mtexture.add(new_rock): i = i + 1 return mtexture ''' compute the distance between two points return float ''' def __distance(self, center1, center2): center1 = np.array(center1) center2 = np.array(center2) sub = center1 - center2 sub = sub**2 return math.sqrt(np.sum(sub)) ''' Test if two rocks intersect. returns bool ''' def __intersect(self, rock1, rock2): return self.__distance(rock1.center, rock2.center) < rock1.radius + rock2.radius ''' Test if a rock intersects another rock in self.rocks ''' def intersect(self, rock): for r in self.rocks: if self.__intersect(rock, r): return True return False
class skde(object): r""" Custom wrapper around `sklearn.neighbors.kde.KernelDensity` to conform to our prefered syntax calling (following scipy conventions) """ def __init__(self, data, mirror=False, **kwds): self.mirror = mirror if kwds is None: if self.mirror: self.kde_object = KernelDensity(kernel='gaussian').fit( np.vstack([-data, data]) ) else: self.kde_object = KernelDensity(kernel='gaussian').fit(data) else: if self.mirror: self.kde_object = KernelDensity(**kwds).fit( np.vstack([-data, data]) ) else: self.kde_object = KernelDensity(**kwds).fit(data) try: self.d = data.shape[1] except IndexError: self.d = 1 self.n = data.shape[0] def rvs(self, size=1): r""" Generates random variables from a kde object. Wrapper function for `sklearn.neighbors.kde.KernelDensity.sample`. :param int size: number of random samples to generate :param tuple size: number of samples is taken to be the first argument """ if type(size) is tuple: size=size[0] if self.mirror: # have to generate twice as many samples num_samps = 0 samps = [] while num_samps < size: samp_proposal = self.kde_object.sample() if samp_proposal > 0: samps.append(samp_proposal) num_samps += 1 samps = np.array(samps).reshape(size,self.d) else: samps = self.kde_object.sample(size) return samps #TODO write a test that makes sure this returns the correct shape def pdf(self, eval_points): r""" Generates random variables from a kde object. Wrapper function for `sklearn.neighbors.kde.KernelDensity.score_samples`. :param eval_points: points on which to evaluate the density. :type eval_points: :class:`numpy.ndarray` of shape (num, dim) """ #: TODO write a test that makes sure this returns the correct shape num_samples = eval_points.shape[0] if self.mirror: p = 2*np.exp( self.kde_object.score_samples( eval_points ) ).reshape(num_samples) else: try: p = np.exp( self.kde_object.score_samples( eval_points ) ) except ValueError: p = np.exp( self.kde_object.score_samples( eval_points.reshape(-1,1) ) ) return p
class GAE(): def __init__(self, img_shape=(28, 28), encoded_dim=2): self.encoded_dim = encoded_dim self.optimizer = Adam(0.001) self.optimizer_discriminator = Adam(0.00001) self._initAndCompileFullModel(img_shape, encoded_dim) self.img_shape = img_shape def _genEncoderModel(self, img_shape, encoded_dim): """ Build Encoder Model Based on Paper Configuration Args: img_shape (tuple) : shape of input image encoded_dim (int) : number of latent variables Return: A sequential keras model """ encoder = Sequential() encoder.add(Flatten(input_shape=img_shape)) encoder.add(Dense(1000, activation='relu')) encoder.add(Dense(1000, activation='relu')) encoder.add(Dense(encoded_dim)) encoder.summary() return encoder def _getDecoderModel(self, encoded_dim, img_shape): """ Build Decoder Model Based on Paper Configuration Args: encoded_dim (int) : number of latent variables img_shape (tuple) : shape of target images Return: A sequential keras model """ decoder = Sequential() decoder.add(Dense(1000, activation='relu', input_dim=encoded_dim)) decoder.add(Dense(1000, activation='relu')) decoder.add(Dense(np.prod(img_shape), activation='sigmoid')) decoder.add(Reshape(img_shape)) decoder.summary() return decoder def _getDescriminator(self, img_shape): """ Build Descriminator Model Based on Paper Configuration Args: encoded_dim (int) : number of latent variables Return: A sequential keras model """ discriminator = Sequential() discriminator.add(Flatten(input_shape=img_shape)) discriminator.add( Dense(1000, activation='relu', kernel_initializer=initializer, bias_initializer=initializer)) discriminator.add( Dense(1000, activation='relu', kernel_initializer=initializer, bias_initializer=initializer)) discriminator.add( Dense(1, activation='sigmoid', kernel_initializer=initializer, bias_initializer=initializer)) discriminator.summary() return discriminator def _initAndCompileFullModel(self, img_shape, encoded_dim): self.encoder = self._genEncoderModel(img_shape, encoded_dim) self.decoder = self._getDecoderModel(encoded_dim, img_shape) self.discriminator = self._getDescriminator(img_shape) img = Input(shape=img_shape) encoded_repr = self.encoder(img) gen_img = self.decoder(encoded_repr) self.autoencoder = Model(img, gen_img) self.autoencoder.compile(optimizer=self.optimizer, loss='mse') self.discriminator.compile(optimizer=self.optimizer, loss='binary_crossentropy', metrics=['accuracy']) for layer in self.discriminator.layers: layer.trainable = False latent = Input(shape=(encoded_dim, )) gen_image_from_latent = self.decoder(latent) is_real = self.discriminator(gen_image_from_latent) self.decoder_discriminator = Model(latent, is_real) self.decoder_discriminator.compile( optimizer=self.optimizer_discriminator, loss='binary_crossentropy', metrics=['accuracy']) def imagegrid(self, epochnumber): fig = plt.figure(figsize=[20, 20]) for i in range(-5, 5): for j in range(-5, 5): topred = np.array((i * 0.5, j * 0.5)) topred = topred.reshape((1, 2)) img = self.decoder.predict(topred) img = img.reshape(self.img_shape) ax = fig.add_subplot(10, 10, (i + 5) * 10 + j + 5 + 1) ax.set_axis_off() ax.imshow(img, cmap="gray") fig.savefig(str(epochnumber) + ".png") plt.show() plt.close(fig) def train(self, x_train, batch_size=32, epochs=5): fileNames = glob.glob('models/GAE/weights_mnist_autoencoder.*') fileNames.sort() if (len(fileNames) != 0): savedEpoch = int(fileNames[-1].split('.')[1]) self.autoencoder.load_weights(fileNames[-1]) else: savedEpoch = -1 if (savedEpoch < epochs - 1): self.autoencoder.fit( x_train, x_train, batch_size=batch_size, epochs=epochs, callbacks=[ keras.callbacks.ModelCheckpoint( 'models/GAE/weights_autoencoder.{epoch:02d}.hdf5', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) ]) print "Training KDE" codes = self.encoder.predict(x_train) # params = {'bandwidth': [3.16]}#np.logspace(0, 2, 5)} # grid = GridSearchCV(KernelDensity(), params, n_jobs=4) # grid.fit(codes) # print grid.best_params_ # self.kde = grid.best_estimator_ self.kde = KernelDensity(kernel='gaussian', bandwidth=3.16).fit(codes) print "Initial Training of discriminator" fileNames = glob.glob('models/GAE/weights_mnist_discriminator.*') fileNames.sort() if (len(fileNames) != 0): savedEpoch = int(fileNames[-1].split('.')[1]) self.discriminator.load_weights(fileNames[-1]) else: savedEpoch = -1 if (savedEpoch < epochs - 1): imgs_fake = self.generate(n=len(x_train)) #gen_imgs = self.decoder.predict(latent_fake) valid = np.ones((len(x_train), 1)) fake = np.zeros((len(x_train), 1)) labels = np.vstack([valid, fake]) images = np.vstack([x_train, imgs_fake]) # Train the discriminator self.discriminator.fit( images, labels, epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[ keras.callbacks.ModelCheckpoint( 'models/GAE/weights_discriminator.{epoch:02d}.hdf5', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) ]) print "Training GAN" self.generateAndPlot(x_train, fileName="before_gan.png") self.trainGAN(x_train, epochs=len(x_train) / batch_size, batch_size=batch_size) self.generateAndPlot(x_train, fileName="after_gan.png") def trainGAN(self, x_train, epochs=1000, batch_size=32): half_batch = batch_size / 2 for epoch in range(epochs): #---------------Train Discriminator ------------- # Select a random half batch of images idx = np.random.randint(0, x_train.shape[0], half_batch) imgs_real = x_train[idx] # Generate a half batch of new images imgs_fake = self.generate(n=half_batch) #gen_imgs = self.decoder.predict(latent_fake) valid = np.ones((half_batch, 1)) fake = np.zeros((half_batch, 1)) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs_real, valid) d_loss_fake = self.discriminator.train_on_batch(imgs_fake, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) #d_loss = (0,0) codes = self.kde.sample(batch_size) # Generator wants the discriminator to label the generated representations as valid valid_y = np.ones((batch_size, 1)) # Train generator g_logg_similarity = self.decoder_discriminator.train_on_batch( codes, valid_y) # Plot the progress print("%d [D accuracy: %.2f] [G accuracy: %.2f]" % (epoch, d_loss[1], g_logg_similarity[1])) # if(epoch % save_interval == 0): # self.imagegrid(epoch) def generate(self, n=10000): codes = self.kde.sample(n) images = self.decoder.predict(codes) return images def generateAndPlot(self, x_train, n=10, fileName="generated.png"): fig = plt.figure(figsize=[20, 20]) images = self.generate(n * n) index = 1 for image in images: image = image.reshape(self.img_shape) ax = fig.add_subplot(n, n + 1, index) index = index + 1 ax.set_axis_off() ax.imshow(image, cmap="gray") if ((index) % (n + 1) == 0): nearest = helpers.findNearest(x_train, image) ax = fig.add_subplot(n, n + 1, index) index = index + 1 ax.imshow(nearest, cmap="gray") fig.savefig(fileName) plt.show() def meanLogLikelihood(self, x_test): KernelDensity(kernel='gaussian', bandwidth=0.2).fit(codes)
class KDEestimator: """ An interface for generating random numbers according to a given Kernel Density Estimation (KDE) parametrization based on the data. """ def __init__(self, bandwidth=1.0): from sklearn.neighbors.kde import KernelDensity self.bandwidth = bandwidth self.model = KernelDensity(bandwidth=self.bandwidth) def _botev_fixed_point(self, t, M, I, a2): # Find the largest float available for this numpy if hasattr(np, 'float128'): large_float = np.float128 elif hasattr(np, 'float96'): large_float = np.float96 else: large_float = np.float64 l = 7 I = large_float(I) M = large_float(M) a2 = large_float(a2) f = 2 * np.pi**(2 * l) * np.sum(I**l * a2 * np.exp(-I * np.pi**2 * t)) for s in range(l, 1, -1): K0 = np.prod(np.arange(1, 2 * s, 2)) / np.sqrt(2 * np.pi) const = (1 + (1 / 2)**(s + 1 / 2)) / 3 time = (2 * const * K0 / M / f)**(2 / (3 + 2 * s)) f = 2 * np.pi ** (2 * s) * \ np.sum(I ** s * a2 * np.exp(-I * np.pi ** 2 * time)) return t - (2 * M * np.sqrt(np.pi) * f)**(-2 / 5) def finite(self, val): """ Checks if a value is finite or not """ return val is not None and np.isfinite(val) def botev_bandwidth(self, data): """ Implementation of the KDE bandwidth selection method outline in: Z. I. Botev, J. F. Grotowski, and D. P. Kroese. *Kernel density estimation via diffusion.* The Annals of Statistics, 38(5):2916-2957, 2010. Based on the implementation of Daniel B. Smith, PhD. The object is a callable returning the bandwidth for a 1D kernel. Forked from the package `PyQT_fit <https://code.google.com/archive/p/pyqt-fit/>`_. :param data: 1D array containing the data to model with a 1D KDE. :type data: numpy.ndarray :returns: Optimal bandwidth according to the data. """ from scipy import fftpack, optimize # def __init__(self, N=None, **kword): # if 'lower' in kword or 'upper' in kword: # print("Warning, using 'lower' and 'upper' for botev bandwidth is " # "deprecated. Argument is ignored") # self.N = N # # def __call__(self, data):#, model): # """ # Returns the optimal bandwidth based on the data # """ N = 2**10 #if self.N is None else int(2 ** np.ceil(np.log2(self.N))) # lower = getattr(model, 'lower', None) # upper = getattr(model, 'upper', None) # if not finite(lower) or not finite(upper): minimum = np.min(data) maximum = np.max(data) span = maximum - minimum lower = minimum - span / 10 #if not finite(lower) else lower upper = maximum + span / 10 #if not finite(upper) else upper # Range of the data span = upper - lower # Histogram of the data to get a crude approximation of the density # weights = model.weights # if not weights.shape: weights = None M = len(data) DataHist, bins = np.histogram(data, bins=N, range=(lower, upper), weights=weights) DataHist = DataHist / M DCTData = fftpack.dct(DataHist, norm=None) I = np.arange(1, N, dtype=int)**2 SqDCTData = (DCTData[1:] / 2)**2 guess = 0.1 try: t_star = optimize.brentq(self._botev_fixed_point, 0, guess, args=(M, I, SqDCTData)) except ValueError: t_star = .28 * N**(-.4) return np.sqrt(t_star) * span def fit(self, x): self.bandwidth = self.botev_bandwidth(x.flatten()) self.model.set_params(**{'bandwidth': self.bandwidth}) self.model.fit(x.reshape(-1, 1)) def sample(self, dimension=1.0): return self.model.sample(dimension) def pdf(self, x): return self.model.score_samples(x)
class GAE: def __init__(self, img_shape=(28, 28), encoded_dim=2): self.img_shape = img_shape self.encoded_dim = encoded_dim self.optimizer = Adam(0.001) self.optimizer_discriminator = Adam(0.00001) self.discriminator = self.get_discriminator_model(img_shape) self.decoder = self.get_decoder_model(encoded_dim, img_shape) self.encoder = self.get_encoder_model(img_shape, encoded_dim) # Initialize Autoencoder img = Input(shape=self.img_shape) encoded_repr = self.encoder(img) gen_img = self.decoder(encoded_repr) self.autoencoder = Model(img, gen_img) # Initialize Discriminator latent = Input(shape=(encoded_dim,)) gen_image_from_latent = self.decoder(latent) is_real = self.discriminator(gen_image_from_latent) self.decoder_discriminator = Model(latent, is_real) # Finally compile models self.initialize_full_model(encoded_dim) def initialize_full_model(self, encoded_dim): self.autoencoder.compile(optimizer=self.optimizer, loss='mse') self.discriminator.compile(optimizer=self.optimizer, loss='binary_crossentropy', metrics=['accuracy']) # Default start discriminator is not trainable for layer in self.discriminator.layers: layer.trainable = False self.decoder_discriminator.compile(optimizer=self.optimizer_discriminator, loss='binary_crossentropy', metrics=['accuracy']) @staticmethod def get_encoder_model(img_shape, encoded_dim): encoder = Sequential() encoder.add(Flatten(input_shape=img_shape)) encoder.add(Dense(1000, activation='relu')) encoder.add(Dense(1000, activation='relu')) encoder.add(Dense(encoded_dim)) encoder.summary() return encoder @staticmethod def get_decoder_model(encoded_dim, img_shape): decoder = Sequential() decoder.add(Dense(1000, activation='relu', input_dim=encoded_dim)) decoder.add(Dense(1000, activation='relu')) decoder.add(Dense(np.prod(img_shape), activation='sigmoid')) decoder.add(Reshape(img_shape)) decoder.summary() return decoder @staticmethod def get_discriminator_model(img_shape): discriminator = Sequential() discriminator.add(Flatten(input_shape=img_shape)) discriminator.add(Dense(1000, activation='relu', kernel_initializer=initializer, bias_initializer=initializer)) discriminator.add(Dense(1000, activation='relu', kernel_initializer=initializer, bias_initializer=initializer)) discriminator.add(Dense(1, activation='sigmoid', kernel_initializer=initializer, bias_initializer=initializer)) discriminator.summary() return discriminator def imagegrid(self, epochnumber): fig = plt.figure(figsize=[20, 20]) for i in range(-5, 5): for j in range(-5, 5): topred = np.array((i * 0.5, j * 0.5)) topred = topred.reshape((1, 2)) img = self.decoder.predict(topred) img = img.reshape(self.img_shape) ax = fig.add_subplot(10, 10, (i + 5) * 10 + j + 5 + 1) ax.set_axis_off() ax.imshow(img) fig.savefig(str(epochnumber) + ".png") plt.show() plt.close(fig) def train(self, x_train_input, batch_size=128, epochs=5): fileNames = glob.glob('models/weights_mnist_autoencoder.*') fileNames.sort() if len(fileNames) != 0: saved_epoch = int(fileNames[-1].split('.')[1]) self.autoencoder.load_weights(fileNames[-1]) else: saved_epoch = -1 if saved_epoch < epochs - 1: self.autoencoder.fit(x_train_input, x_train_input, batch_size=batch_size, epochs=epochs, callbacks=[ keras.callbacks.ModelCheckpoint('models/weights_autoencoder.{epoch:02d}.hdf5', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1), keras.callbacks.EarlyStopping(monitor='loss', patience=3, min_delta=1e-4, restore_best_weights=True)]) print("Training KDE") codes = self.encoder.predict(x_train_input) self.kde = KernelDensity(kernel='gaussian', bandwidth=3.16).fit(codes) print("Initial Training of discriminator") fileNames = glob.glob('models/weights_mnist_discriminator.*') fileNames.sort() if len(fileNames) != 0: saved_epoch = int(fileNames[-1].split('.')[1]) self.discriminator.load_weights(fileNames[-1]) else: saved_epoch = -1 train_count = len(x_train_input) if saved_epoch < epochs - 1: # Combine real and fake images for discriminator training imgs_fake = self.generate(n=train_count) valid = np.ones((train_count, 1)) # result for training images fake = np.zeros((train_count, 1)) # result for generated fakes labels = np.vstack([valid, fake]) # combine together images = np.vstack([x_train_input, imgs_fake]) # Train the discriminator self.discriminator.fit(images, labels, epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[ keras.callbacks.ModelCheckpoint( 'models/weights_discriminator.{epoch:02d}.hdf5', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1), keras.callbacks.EarlyStopping(monitor='loss', patience=3, min_delta=1e-4, restore_best_weights=True)]) print("Training GAN") self.generateAndPlot(x_train_input, fileName="before_gan.png") self.trainGAN(x_train_input, epochs=int(train_count / batch_size), batch_size=batch_size) self.generateAndPlot(x_train_input, fileName="after_gan.png") def trainGAN(self, x_train_input, epochs=1000, batch_size=128): half_batch = int(batch_size / 2) for epoch in range(epochs): # ---------------Train Discriminator ------------- # Select a random half batch of images idx = np.random.randint(0, x_train_input.shape[0], half_batch) imgs_real = x_train_input[idx] # Generate a half batch of new images imgs_fake = self.generate(n=half_batch) valid = np.ones((half_batch, 1)) fake = np.zeros((half_batch, 1)) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs_real, valid) d_loss_fake = self.discriminator.train_on_batch(imgs_fake, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) codes = self.kde.sample(batch_size) # Generator wants the discriminator to label the generated representations as valid valid_y = np.ones((batch_size, 1)) # Train generator g_logg_similarity = self.decoder_discriminator.train_on_batch(codes, valid_y) # Plot the progress if epoch % 50 == 0: print("epoch %d [D accuracy: %.2f] [G accuracy: %.2f]" % (epoch, d_loss[1], g_logg_similarity[1])) def generate(self, n=10000): codes = self.kde.sample(n) images = self.decoder.predict(codes) return images def generateAndPlot(self, x_train_input, n=10, fileName="generated.png"): fig = plt.figure(figsize=[20, 20]) images = self.generate(n * n) index = 1 for image in images: image = image.reshape(self.img_shape) ax = fig.add_subplot(n, n + 1, index) index = index + 1 ax.set_axis_off() ax.imshow(image) if index % (n + 1) == 0: nearest = findNearest(x_train_input, image) ax = fig.add_subplot(n, n + 1, index) index = index + 1 ax.imshow(nearest) fig.savefig(fileName) plt.show() @staticmethod def mean_log_likelihood(x_test_input): KernelDensity(kernel='gaussian', bandwidth=0.2).fit(x_test_input)
counts = np.array(counts).reshape(-1, 1) data['day'] = [s[:2] for s in data.Date] day = np.array(data['day']).reshape(-1, 1) trans = np.array(data.Amount).reshape(-1, 1) X1 = np.linspace(0, 120, 1000)[:, np.newaxis] X2 = np.linspace(0, 32, 1000)[:, np.newaxis] X3 = np.linspace(min(trans), max(trans)+1, 1000)[:, np.newaxis] for kernel in ['gaussian', 'tophat']: kde1 = KernelDensity(kernel=kernel, bandwidth=5).fit(counts) kde2 = KernelDensity(kernel=kernel, bandwidth=5).fit(day) kde3 = KernelDensity(kernel=kernel, bandwidth=5).fit(trans) log_dens1 = kde1.score_samples(X1) log_dens2 = kde2.score_samples(X2) log_dens3 = kde3.score_samples(X3) samples = int(kde1.sample(1)[0][0]) print('There are', samples, 'transactions', '\n') num_days = kde2.sample(samples) for m in range(len(num_days)): num_days[m] = int(round(num_days[m][0])) while num_days[m] <= 0 or num_days[m] > 31: num_days[m] = int(round(kde2.sample(1)[0][0])) print('The days are:') print(num_days, '\n') num_trans = kde3.sample(samples) print('The transactions are:') print(num_trans, '\n') #Plotting density of number of transactions in a month plt.plot(X1[:, 0], np.exp(log_dens1), '-', label="kernel = '{0}'".format(kernel))
for i, label, color, bw in zip([64, 16, 4, 1], labels, colors, bandwidths): print(i) data = np.loadtxt( 'STP_09_lm55_t{:02d}_joint_Lframe_resampled.dat'.format(i)) ra, dec = data[:, 8], data[:, 9] #ra -= np.pi # create the KDE estimator # we could use grid-search cross validation to estimate the bandwidth here radec = np.vstack((dec, ra)).T kde = KernelDensity(kernel='tophat', bandwidth=bw, metric='haversine').fit(radec) # find the density levels corresponding to 1, 2, 3 sigmas n = 10000 sample = kde.sample(n) sample_densities = np.sort(np.exp(kde.score_samples(sample))) # Note: are those levels appropriate for 2d ? levels = [ sample_densities[int(n * (1 - p))] for p in [0.9973, 0.9545, 0.6827] ] gra = np.linspace(-np.pi, np.pi, 300) gdec = np.linspace(-np.pi / 2, np.pi / 2, 300) ggra, ggdec = np.meshgrid(gra, gdec) d = np.exp(kde.score_samples(np.vstack((ggdec.ravel(), ggra.ravel())).T)) d = np.reshape(d, ggra.shape) cs = ax.contour(gra, gdec, d, colors=color,
transformer = RobustScaler(quantile_range=(25, 75)) transformer.fit(X) X = transformer.transform(X) from sklearn.model_selection import GridSearchCV bandwidths = 10**np.linspace(-2, 0, 20) grid = GridSearchCV(KernelDensity(kernel='gaussian'), {'bandwidth': bandwidths}, cv=2) grid.fit(X) kde = KernelDensity(kernel='gaussian', bandwidth=.021) kde.fit(X) samples = kde.sample(n_samples=45 * 198) samples = transformer.inverse_transform(samples) x_int = r_0_g * np.cos(phi_0_g) y_int = r_0_g * np.sin(phi_0_g) X_int = sp.interpolate.griddata(samples[:, 1:], samples[:, 0], (x_int.flatten(), y_int.flatten()), method='linear') im = plt.contourf(x_int, y_int, np.reshape(X_int, r_0_g.shape), cmap='jet') plt.colorbar(im) with open('K_CNR.pkl', 'wb') as writer: pickle.dump(K_CNR, writer) #####Dataframes