def distance(self, X, Y, **kwargs): """ Step 1: project X and Y in the learned latent space, Step 2: compute distance between the projections (NNAA score by default). """ X_enc = self.encoder.predict(X) Y_enc = self.encoder.predict(Y) if not isinstance(X_enc, autopandas.AutoData): X_enc = autopandas.AutoData(X_enc) if not isinstance(Y_enc, autopandas.AutoData): Y_enc = autopandas.AutoData(Y_enc) return X_enc.distance(Y_enc, **kwargs)
def copula_generate(X, generator=None, n=None): """ Generate using copula trick. :param generator: Model to fit and sample from. KDE by default. :param n: Number of examples to generate. By default it is the number of observations in X. """ indexes = X.indexes columns = X.columns if generator is None: generator = KernelDensity() if n is None: n = X.shape[0] X_real = np.array(X) # X marginals to uniforms X = matrix_to_rank(X) # X uniforms to inverse gaussian CDF X = rank_matrix_to_inverse(X) # Fit generator generator.fit(X) # Generating artificial data \n Sampling from generator X_artif = generator.sample(n) # Marginal retrofitting result = autopandas.AutoData(marginal_retrofit(X_artif, X_real)) # Restore data frame index result.indexes = indexes result.columns = columns return result
def sample(self, n=1, **kwargs): """ Sample from trained GMM. :param n: Number of examples to sample. """ if self.indexes is None: raise Exception('You firstly need to train the GMM before sampling. Please use fit method.') else: gen_data = self.model.sample(n, **kwargs)[0] # sklearn's gmm return a tuple return autopandas.AutoData(gen_data, columns=self.columns, indexes=self.indexes)
def sample(self, n=100, loc=0, scale=1): """ :param scale: Standard deviation of gaussian distribution prior. """ randoms = np.array([np.random.normal(loc, scale, self.latent_dim) for _ in range(n)]) decoded = self.decode(randoms) decoded = autopandas.AutoData(decoded) if self.columns is not None: decoded.columns = self.columns if self.indexes is not None: decoded.indexes = self.indexes return decoded
def sample(self, n=100, loc=0, scale=1): """ :param scale: Standard deviation of gaussian distribution prior. """ randoms = np.array( [np.random.normal(loc, scale, self.latent_dim) for _ in range(n)]) decoded = self.decoder.predict(randoms) try: decoded = autopandas.AutoData(decoded) if self.columns is not None: decoded.columns = self.columns if self.indexes is not None: decoded.indexes = self.indexes except: warn('Impossible to cast sampled data to autopandas.AutoData') return decoded
def decode(new_data, data, limits, min_max): """ Decode the data from SDV format. :param data: Data in SDV format :param data: Original data :param limits: Limits returned by sdv.encode :param min_max: Min-max returned by sdv.encode """ new_data = autopandas.AutoData(new_data, columns=data.columns, indexes=data.indexes) for c in new_data.columns: if c in limits: new_data[c] = undo_categorical(new_data[c], limits[c]) else: new_data[c] = undo_numeric(new_data[c], *min_max[c]) return new_data