Пример #1
0
 def distance(self, X, Y, **kwargs):
     """ Step 1: project X and Y in the learned latent space,
         Step 2: compute distance between the projections (NNAA score by default).
     """
     X_enc = self.encoder.predict(X)
     Y_enc = self.encoder.predict(Y)
     if not isinstance(X_enc, autopandas.AutoData):
         X_enc = autopandas.AutoData(X_enc)
     if not isinstance(Y_enc, autopandas.AutoData):
         Y_enc = autopandas.AutoData(Y_enc)
     return X_enc.distance(Y_enc, **kwargs)
Пример #2
0
def copula_generate(X, generator=None, n=None):
    """ Generate using copula trick.

        :param generator: Model to fit and sample from. KDE by default.
        :param n: Number of examples to generate. By default it is the number of observations in X.
    """
    indexes = X.indexes
    columns = X.columns
    if generator is None:
        generator = KernelDensity()
    if n is None:
        n = X.shape[0]
    X_real = np.array(X)
    # X marginals to uniforms
    X = matrix_to_rank(X)
    # X uniforms to inverse gaussian CDF
    X = rank_matrix_to_inverse(X)
    # Fit generator
    generator.fit(X)
    # Generating artificial data \n Sampling from generator
    X_artif = generator.sample(n)
    # Marginal retrofitting
    result = autopandas.AutoData(marginal_retrofit(X_artif, X_real))
    # Restore data frame index
    result.indexes = indexes
    result.columns = columns
    return result
Пример #3
0
    def sample(self, n=1, **kwargs):
        """ Sample from trained GMM.

            :param n: Number of examples to sample.
        """
        if self.indexes is None:
            raise Exception('You firstly need to train the GMM before sampling. Please use fit method.')
        else:
            gen_data = self.model.sample(n, **kwargs)[0] # sklearn's gmm return a tuple
            return autopandas.AutoData(gen_data, columns=self.columns, indexes=self.indexes)
Пример #4
0
 def sample(self, n=100, loc=0, scale=1):
     """ :param scale: Standard deviation of gaussian distribution prior.
     """
     randoms = np.array([np.random.normal(loc, scale, self.latent_dim) for _ in range(n)])
     decoded = self.decode(randoms)
     decoded = autopandas.AutoData(decoded)
     if self.columns is not None:
         decoded.columns = self.columns
     if self.indexes is not None:
         decoded.indexes = self.indexes
     return decoded
Пример #5
0
 def sample(self, n=100, loc=0, scale=1):
     """ :param scale: Standard deviation of gaussian distribution prior.
     """
     randoms = np.array(
         [np.random.normal(loc, scale, self.latent_dim) for _ in range(n)])
     decoded = self.decoder.predict(randoms)
     try:
         decoded = autopandas.AutoData(decoded)
         if self.columns is not None:
             decoded.columns = self.columns
         if self.indexes is not None:
             decoded.indexes = self.indexes
     except:
         warn('Impossible to cast sampled data to autopandas.AutoData')
     return decoded
Пример #6
0
def decode(new_data, data, limits, min_max):
    """ Decode the data from SDV format.

        :param data: Data in SDV format
        :param data: Original data
        :param limits: Limits returned by sdv.encode
        :param min_max: Min-max returned by sdv.encode
    """
    new_data = autopandas.AutoData(new_data,
                                   columns=data.columns,
                                   indexes=data.indexes)
    for c in new_data.columns:
        if c in limits:
            new_data[c] = undo_categorical(new_data[c], limits[c])
        else:
            new_data[c] = undo_numeric(new_data[c], *min_max[c])
    return new_data