示例#1
0
    def test_convert(self):
        secint = mpc.SecInt()
        secint8 = mpc.SecInt(8)
        secint16 = mpc.SecInt(16)
        secfld257 = mpc.SecFld(257)
        secfld263 = mpc.SecFld(263)
        secfxp = mpc.SecFxp()
        secfxp16 = mpc.SecFxp(16)

        x = [secint8(-100), secint8(100)]
        y = mpc.convert(x, secint)
        self.assertEqual(mpc.run(mpc.output(y)), [-100, 100])
        y = mpc.convert(y, secint8)
        self.assertEqual(mpc.run(mpc.output(y)), [-100, 100])

        x = [secint16(i) for i in range(10)]
        y = mpc.convert(x, secfld257)
        self.assertEqual(mpc.run(mpc.output(y)), list(range(10)))

        x = [secfld257(i) for i in range(10)]
        y = mpc.convert(x, secfld263)
        self.assertEqual(mpc.run(mpc.output(y)), list(range(10)))

        x = [secint(-100), secint(100)]
        y = mpc.convert(x, secfxp)
        self.assertEqual(mpc.run(mpc.output(y)), [-100, 100])
        y = mpc.convert(y, secint)
        self.assertEqual(mpc.run(mpc.output(y)), [-100, 100])

        x = [secfxp16(-100.25), secfxp16(100.875)]
        y = mpc.convert(x, secfxp)
        self.assertEqual(mpc.run(mpc.output(y)), [-100.25, 100.875])
        y = mpc.convert(y, secfxp16)
        self.assertEqual(mpc.run(mpc.output(y)), [-100.25, 100.875])
示例#2
0
 def test_empty_input(self):
     secint = mpc.SecInt()
     self.assertEqual(mpc.run(mpc.gather([])), [])
     self.assertEqual(mpc.run(mpc.output([])), [])
     self.assertEqual(mpc._reshare([]), [])
     self.assertEqual(mpc.convert([], None), [])
     self.assertEqual(mpc.sum([]), 0)
     self.assertEqual(mpc.prod([]), 1)
     self.assertEqual(mpc.in_prod([], []), 0)
     self.assertEqual(mpc.vector_add([], []), [])
     self.assertEqual(mpc.vector_sub([], []), [])
     self.assertEqual(mpc.scalar_mul(secint(0), []), [])
     self.assertEqual(mpc.schur_prod([], []), [])
     self.assertEqual(mpc.from_bits([]), 0)
示例#3
0
async def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i',
        '--dataset',
        type=int,
        metavar='I',
        help=('dataset 0=synthetic (default), 1=student, 2=wine-red, '
              '3=wine-white, 4=year, 5=gas-methane, 6=gas-CO, 7=higgs'))
    parser.add_argument('-u',
                        '--data-url',
                        action='store_true',
                        default=False,
                        help='show URL for downloading dataset I')
    parser.add_argument('-l',
                        '--lambda_',
                        type=float,
                        metavar='L',
                        help='regularization L>=0.0 (default=1.0)')
    parser.add_argument('-a',
                        '--accuracy',
                        type=int,
                        metavar='A',
                        help='accuracy A (number of fractional bits)')
    parser.add_argument(
        '-n',
        '--samples',
        type=int,
        metavar='N',
        help='number of samples in synthetic data (default=1000)')
    parser.add_argument(
        '-d',
        '--features',
        type=int,
        metavar='D',
        help='number of features in synthetic data (default=10)')
    parser.add_argument('-e',
                        '--targets',
                        type=int,
                        metavar='E',
                        help='number of targets in synthetic data (default=1)')
    parser.set_defaults(dataset=0,
                        lambda_=1.0,
                        accuracy=-1,
                        samples=1000,
                        features=10,
                        targets=1)
    args = parser.parse_args()

    await mpc.start()

    if not args.dataset:
        range_alpha = range(4, 8)
        n, d, e, split = args.samples, args.features, args.targets, 0
        name = 'SYNTHETIC'
        logging.info('Generating synthetic data')
        X = await synthesize_data(n, d, e)
    else:
        settings = [('student+performance', 'student-mat', 6),
                    ('Wine+Quality', 'winequality-red', 7),
                    ('Wine+Quality', 'winequality-white', 8),
                    ('Yearpredictionmsd', 'YearPredictionMSD', 6),
                    ('Gas+sensor+array+under+dynamic+gas+mixtures',
                     'ethylene_methane', 8),
                    ('Gas+sensor+array+under+dynamic+gas+mixtures',
                     'ethylene_CO', 9), ('HIGGS', 'HIGGS', 5)]
        url, name, alpha = settings[args.dataset - 1]
        url = 'https://archive.ics.uci.edu/ml/datasets/' + url
        if args.data_url:
            print(f'URL: {url}')
        range_alpha = range(alpha, alpha + 1)
        infofile = os.path.join('data', 'regr', 'info-' + name + '.csv')
        logging.info(f'Loading dataset {name}')
        X, d, e, split = read_data(infofile)
        n = len(X)
        logging.info(f'Loaded {n} samples')
    print(f'dataset: {name} with {n} samples, {d} features, and {e} target(s)')
    print(f'regularization lambda: {args.lambda_}')

    # split in train set and test set
    if split:
        # fixed split
        X1, X2 = X[:split], X[split:]
    else:
        # random split (all parties use same rnd)
        rnd = await mpc.transfer(random.randrange(2**31), senders=0)
        X1, X2 = sklearn.model_selection.train_test_split(X,
                                                          train_size=0.7,
                                                          random_state=rnd)
    del X
    X1, Y1 = X1[:, :d], X1[:, d:]
    X2, Y2 = X2[:, :d], X2[:, d:]
    n1 = len(X1)
    d = d + 1  # add (virtual) feature column X_d = [1, ..., 1] for vertical intercept

    # ridge regression "in the clear"
    ridge = sklearn.linear_model.Ridge(alpha=args.lambda_,
                                       fit_intercept=True,
                                       copy_X=True,
                                       solver='cholesky')
    ridge.fit(X1, Y1)
    error_train_skit = rmse(Y1, ridge.predict(X1))
    error_test_skit = rmse(Y2, ridge.predict(X2))
    print(f'scikit train error: {error_train_skit}')
    print(f'scikit test error:  {error_test_skit}')

    if args.accuracy >= 0:
        alpha = args.accuracy
        range_alpha = range(alpha, alpha + 1)
    for alpha in range_alpha:  # accuracy parameter
        print('accuracy alpha:', alpha)
        # set parameters accordingly
        beta = 2**alpha
        lambda_ = round(args.lambda_ * beta**2)
        gamma = n1 * beta**2 + lambda_
        secint = mpc.SecInt(gamma.bit_length() + 1)
        print(
            f'secint prime size: |q| = {secint.field.modulus.bit_length()} bits'
            f' (secint bit length: {secint.bit_length})')
        bound = round(d**(d / 2)) * gamma**d
        secfld = mpc.SecFld(min_order=2 * bound + 1, signed=True)
        print(
            f'secfld prime size: |p| = {secfld.field.modulus.bit_length()} bits'
        )

        f2 = float(beta)
        q = secint.field.modulus
        logging.info(
            'Transpose, scale, and create (degree 0) shares for X and Y')
        # enforce full size shares (mod q numbers) by adding q to each element
        Xt = [[int(a * f2) + q for a in col] for col in X1.transpose()]
        Yt = [[int(a * f2) + q for a in col] for col in Y1.transpose()]

        timeStart = time.process_time()
        logging.info('Compute A = X^T X + lambda I and B = X^T Y')

        AB = []
        for i in range(d - 1):
            xi = Xt[i]
            for j in range(i, d - 1):
                xj = Xt[j]
                s = 0
                for k in range(n1):
                    s += xi[k] * xj[k]
                AB.append(s)  # X_i dot X_j
            AB.append(sum(xi) * beta)  # X_i dot X_d
            for j in range(e):
                yj = Yt[j]
                s = 0
                for k in range(n1):
                    s += xi[k] * yj[k]
                AB.append(s)  # X_i dot Y_j
        AB.append(n1 * beta**2)  # X_d dot X_d
        for j in range(e):
            AB.append(beta * sum(Yt[j]))  # X_d dot Y_j

        del Xt, Yt
        AB = [secint.field(a) for a in AB]
        AB = await mpc._reshare(AB)

        timeMiddle = time.process_time()
        logging.info('Compute w = A^-1 B')

        # convert secint to secfld
        AB = [secint(a) for a in AB]
        AB = mpc.convert(AB, secfld)

        # extract A and B from the AB array
        A = [[None] * d for _ in range(d)]
        B = [[None] * e for _ in range(d)]
        index = 0
        for i in range(d):
            A[i][i] = AB[index] + lambda_
            index += 1
            for j in range(i + 1, d):
                A[i][j] = A[j][i] = AB[index]
                index += 1
            for j in range(e):
                B[i][j] = AB[index]
                index += 1

        # solve A w = B
        w_det = linear_solve(A, B)
        w_det = await mpc.output(w_det)
        w_det = list(map(int, w_det))
        w = np.reshape(w_det[:-1], (d, e))
        w /= w_det[-1]

        timeEnd = time.process_time()
        logging.info(f'Total time {timeEnd - timeStart} = '
                     f'A and B in {timeMiddle - timeStart} + '
                     f'A^-1 B in {timeEnd - timeMiddle} seconds')

        error_train_mpyc = rmse(Y1, np.dot(X1, w[:-1]) + w[-1])
        error_test_mpyc = rmse(Y2, np.dot(X2, w[:-1]) + w[-1])
        print(f'MPyC train error: {error_train_mpyc}')
        print(f'MPyC test error:  {error_test_mpyc}')
        print(
            f'relative train error: {(error_train_mpyc - error_train_skit) / error_train_skit}'
        )
        print(
            f'relative test error:  {(error_test_mpyc - error_test_skit) / error_test_skit}'
        )

    await mpc.shutdown()