Python FactoredInference.estimate примеры использования

Язык программирования: Python

Пространство имен/Пакет: mbi

Класс/Тип: FactoredInference

Метод/Функция: estimate

Примеров на hotexamples.com: 8

Python FactoredInference.estimate - 8 примеров найдено. Это лучшие примеры Python кода для mbi.FactoredInference.estimate, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

FactoredInference(10)

estimate(8)

_setup(1)

infer(1)

marginals(1)

mirror_descent(1)

potentials(1)

Пример #1

Показать файл

Файл: mst.py Проект: meijiu/private-pgm

def select(data, epsilon, measurement_log, cliques=[]):
    engine = FactoredInference(data.domain, iters=1000)
    est = engine.estimate(measurement_log)

    weights = {}
    candidates = list(itertools.combinations(data.domain.attrs, 2))
    for a, b in candidates:
        xhat = est.project([a, b]).datavector()
        x = data.project([a, b]).datavector()
        weights[a, b] = np.linalg.norm(x - xhat, 1)

    T = nx.Graph()
    T.add_nodes_from(data.domain.attrs)
    ds = DisjointSet()

    for e in cliques:
        T.add_edge(*e)
        ds.union(*e)

    r = len(list(nx.connected_components(T)))

    for i in range(r - 1):
        candidates = [e for e in candidates if not ds.connected(*e)]
        wgts = np.array([weights[e] for e in candidates])
        idx = permute_and_flip(wgts, epsilon / (r - 1), sensitivity=1.0)
        e = candidates[idx]
        T.add_edge(*e)
        ds.union(*e)

    return list(T.edges)

Пример #2

Показать файл

Файл: match3.py Проект: ypzhang725/edu_GraphicalModel

    def postprocess(self):
        #use noisy measurements to fit PGM inference
        #and generate synthetic data
        iters = self.iters
        domain = self.domain
        temp_domain = Domain.fromdict(domain)
        engine = FactoredInference(temp_domain,
                                   structural_zeros=None,
                                   iters=10000,
                                   log=True,
                                   warm_start=False,
                                   elim_order=self.elimination_order)
        self.engine = engine
        engine.estimate(self.measurements)

        self.synthetic = self.engine.model.synthetic_data()
        self.synthetic = reverse_data(self.synthetic, self.supports)

Пример #3

Показать файл

def run(dataset, measurements, eps=1.0, delta=0.0, bounded=True, engine='MD', 
        options={}, iters=10000, seed=None, metric='L2', elim_order=None, frequency=1,workload=None):
    """
    Run a mechanism that measures the given measurements and runs inference.
    This is a convenience method for running end-to-end experiments.
    """

    domain = dataset.domain
    total = None

    state = np.random.RandomState(seed)

    if len(measurements) >= 1 and type(measurements[0][0]) is str:
        matrix = lambda proj: sparse.eye(domain.project(proj).size())
        measurements = [(proj, matrix(proj)) for proj in measurements]

    l1 = 0
    l2 = 0
    for _, Q in measurements:
        l1 += np.abs(Q).sum(axis=0).max()
        try: l2 += Q.power(2).sum(axis=0).max()  # for spares matrices
        except: l2 += np.square(Q).sum(axis=0).max() # for dense matrices

    if bounded:
        total = dataset.df.shape[0]
        l1 *= 2
        l2 *= 2

    if delta > 0:
        noise = norm(loc=0, scale=np.sqrt(l2 * 2 * np.log(2/delta))/eps)
    else:
        noise = laplace(loc=0, scale=l1/eps)

    if workload is None:
        workload = measurements
   
    truth = [] 
    for proj, W, in workload:
        x = dataset.project(proj).datavector()
        y = W.dot(x)
        truth.append( (W, y, proj) )

    answers = []
    for proj, Q in measurements:
        x = dataset.project(proj).datavector()
        z = noise.rvs(size=Q.shape[0], random_state=state)
        y = Q.dot(x)
        answers.append( (Q, y+z, 1.0, proj) )

    estimator = FactoredInference(domain, metric=metric, iters=iters, warm_start=False, elim_order=elim_order)
    logger = Logger(estimator, true_answers=truth, frequency=frequency)
    model = estimator.estimate(answers, total, engine=engine, callback=logger, options=options)
        
    return model, logger, answers

Пример #4

Показать файл

Файл: mst.py Проект: meijiu/private-pgm

def MST(data, epsilon, delta):
    # This mechanism is designed for relatively large high-dimensional datasets
    # for lower-dimensional datasets (like adult), simpler mechanisms may be better
    sigma = calibrate_gaussian_noise(epsilon * 2.0 / 3.0, delta)
    cliques = [(col, ) for col in data.domain]
    log1 = measure(data, cliques, sigma)
    data, log1, undo_compress_fn = compress_domain(data, log1)
    cliques = select(data, epsilon / 3.0, log1)
    log2 = measure(data, cliques, sigma)
    engine = FactoredInference(data.domain, iters=5000)
    est = engine.estimate(log1 + log2)
    synth = est.synthetic_data()
    return undo_compress_fn(synth)

Пример #5

Показать файл

    def synthesize(self, file_path, eps, seed):
        # setup random state
        prng = np.random.RandomState(seed)

        # load data vector
        relation = Relation(self.config)
        relation.load_csv(file_path)
        self._numerize(relation._df)

        # perform measurement
        attributes = [field_name for field_name in self.config.keys()]
        measurements = []
        w_sum = sum(Ai.weight for Ai in self.strategy.matrices)
        for Ai in self.strategy.matrices:
            w = Ai.weight
            proj = [
                attributes[i] for i, B in enumerate(Ai.base.matrices)
                if type(B).__name__ != 'Ones'
            ]
            matrix = [
                B for B in Ai.base.matrices if type(B).__name__ != 'Ones'
            ]
            matrix = EkteloMatrix(np.ones(
                (1, 1))) if len(matrix) == 0 else Kronecker(matrix)
            proj_rel = copy.deepcopy(relation)
            proj_rel.project(proj)
            if proj_rel.df.shape[1] == 0:
                x = np.array([proj_rel.df.shape[0]])
            else:
                x = Vectorize('').transform(proj_rel).flatten()
            y = Laplace(matrix, w * eps / w_sum).measure(x, prng)
            measurements.append((matrix.sparse_matrix(), y, 1.0 / w, proj))

        # generate synthetic data
        sizes = [field['bins'] for field in self.config.values()]
        dom = Domain(attributes, sizes)
        engine = FactoredInference(dom)
        model = engine.estimate(measurements)
        df = model.synthetic_data().df
        self._denumerize(df)
        self._sample_numerical(df)

        return df

Пример #6

Показать файл

    """ Efficiently take measurements from HDMM strategy and convert to a PGM-compatable form """
    A = workload.union_kron_canonical(A)
    measurements = []
    for Ai in A.matrices:
        w = Ai.weight
        proj = [
            attributes[i] for i, B in enumerate(Ai.base.matrices)
            if type(B) != workload.Ones
        ]
        print(proj)
        matrix = workload.Kronecker(
            [B for B in Ai.base.matrices if type(B) != workload.Ones])
        matrix = w * matrix.sparse_matrix()
        x = data.project(
            proj).datavector()  # does Relation have this functionality?
        y = matrix.dot(x) + np.random.laplace(
            loc=0, scale=1, size=matrix.shape[0])
        measurements.append((matrix, y, 1.0, proj))
    return measurements


measurements = take_measurements(A, data)

engine = FactoredInference(dom)
model = engine.estimate(measurements)

df = model.synthetic_data().df
print(df.head())

# Then you can post-process to change category/bin ids with values

Пример #7

Показать файл

Файл: toy_example.py Проект: ypzhang725/private-pgm

sigma = np.sqrt(2.0) / epsilon

np.random.seed(0)
yab = ab + np.random.laplace(loc=0, scale=sigma, size=ab.size)
ybc = bc + np.random.laplace(loc=0, scale=sigma, size=bc.size)

# record the measurements in a form needed by inference
Iab = np.eye(ab.size)
Ibc = np.eye(bc.size)

measurements = [(Iab, yab, sigma, ['A', 'B']),
                (Ibc, ybc, sigma, ['B', 'C'])]

# estimate the data distribution
engine = FactoredInference(domain)
model = engine.estimate(measurements, engine='MD')

# recover consistent estimates of measurements
ab2 = model.project(['A','B']).datavector()
bc2 = model.project(['B','C']).datavector()

print(ab2)

print(bc2)

# estimate answer to unmeasured queries
ac2 = model.project(['A','C']).datavector()
print(ac2)

# generate synthetic data
synth = model.synthetic_data(rows=10)

Пример #8

Показать файл

measurements = []
for col in data.domain:
    x = data.project(col).datavector()
    y = x + np.random.laplace(loc=0, scale=sigma, size=x.size)
    I = Identity(x.size)
    measurements.append((I, y, sigma, (col, )))

# spend half of privacy budget to measure some more 2 and 3 way marginals

cliques = [('age', 'education-num'), ('marital-status', 'race'),
           ('sex', 'hours-per-week'), ('hours-per-week', 'income>50K'),
           ('native-country', 'marital-status', 'occupation')]

sigma = 1.0 / len(cliques) / 2.0

for cl in cliques:
    x = data.project(cl).datavector()
    y = x + np.random.laplace(loc=0, scale=sigma, size=x.size)
    I = Identity(x.size)
    measurements.append((I, y, sigma, cl))

# now perform inference to estimate the data distribution

engine = FactoredInference(domain, backend='torch', log=True, iters=10000)
model = engine.estimate(measurements, total=total, engine='RDA')

# now answer new queries

y1 = model.project(('sex', 'income>50K')).datavector()
y2 = model.project(('race', 'occupation')).datavector()