def test_convert(self): secint = mpc.SecInt() secint8 = mpc.SecInt(8) secint16 = mpc.SecInt(16) secfld257 = mpc.SecFld(257) secfld263 = mpc.SecFld(263) secfxp = mpc.SecFxp() secfxp16 = mpc.SecFxp(16) x = [secint8(-100), secint8(100)] y = mpc.convert(x, secint) self.assertEqual(mpc.run(mpc.output(y)), [-100, 100]) y = mpc.convert(y, secint8) self.assertEqual(mpc.run(mpc.output(y)), [-100, 100]) x = [secint16(i) for i in range(10)] y = mpc.convert(x, secfld257) self.assertEqual(mpc.run(mpc.output(y)), list(range(10))) x = [secfld257(i) for i in range(10)] y = mpc.convert(x, secfld263) self.assertEqual(mpc.run(mpc.output(y)), list(range(10))) x = [secint(-100), secint(100)] y = mpc.convert(x, secfxp) self.assertEqual(mpc.run(mpc.output(y)), [-100, 100]) y = mpc.convert(y, secint) self.assertEqual(mpc.run(mpc.output(y)), [-100, 100]) x = [secfxp16(-100.25), secfxp16(100.875)] y = mpc.convert(x, secfxp) self.assertEqual(mpc.run(mpc.output(y)), [-100.25, 100.875]) y = mpc.convert(y, secfxp16) self.assertEqual(mpc.run(mpc.output(y)), [-100.25, 100.875])
def test_empty_input(self): secint = mpc.SecInt() self.assertEqual(mpc.run(mpc.gather([])), []) self.assertEqual(mpc.run(mpc.output([])), []) self.assertEqual(mpc._reshare([]), []) self.assertEqual(mpc.convert([], None), []) self.assertEqual(mpc.sum([]), 0) self.assertEqual(mpc.prod([]), 1) self.assertEqual(mpc.in_prod([], []), 0) self.assertEqual(mpc.vector_add([], []), []) self.assertEqual(mpc.vector_sub([], []), []) self.assertEqual(mpc.scalar_mul(secint(0), []), []) self.assertEqual(mpc.schur_prod([], []), []) self.assertEqual(mpc.from_bits([]), 0)
async def main(): parser = argparse.ArgumentParser() parser.add_argument( '-i', '--dataset', type=int, metavar='I', help=('dataset 0=synthetic (default), 1=student, 2=wine-red, ' '3=wine-white, 4=year, 5=gas-methane, 6=gas-CO, 7=higgs')) parser.add_argument('-u', '--data-url', action='store_true', default=False, help='show URL for downloading dataset I') parser.add_argument('-l', '--lambda_', type=float, metavar='L', help='regularization L>=0.0 (default=1.0)') parser.add_argument('-a', '--accuracy', type=int, metavar='A', help='accuracy A (number of fractional bits)') parser.add_argument( '-n', '--samples', type=int, metavar='N', help='number of samples in synthetic data (default=1000)') parser.add_argument( '-d', '--features', type=int, metavar='D', help='number of features in synthetic data (default=10)') parser.add_argument('-e', '--targets', type=int, metavar='E', help='number of targets in synthetic data (default=1)') parser.set_defaults(dataset=0, lambda_=1.0, accuracy=-1, samples=1000, features=10, targets=1) args = parser.parse_args() await mpc.start() if not args.dataset: range_alpha = range(4, 8) n, d, e, split = args.samples, args.features, args.targets, 0 name = 'SYNTHETIC' logging.info('Generating synthetic data') X = await synthesize_data(n, d, e) else: settings = [('student+performance', 'student-mat', 6), ('Wine+Quality', 'winequality-red', 7), ('Wine+Quality', 'winequality-white', 8), ('Yearpredictionmsd', 'YearPredictionMSD', 6), ('Gas+sensor+array+under+dynamic+gas+mixtures', 'ethylene_methane', 8), ('Gas+sensor+array+under+dynamic+gas+mixtures', 'ethylene_CO', 9), ('HIGGS', 'HIGGS', 5)] url, name, alpha = settings[args.dataset - 1] url = 'https://archive.ics.uci.edu/ml/datasets/' + url if args.data_url: print(f'URL: {url}') range_alpha = range(alpha, alpha + 1) infofile = os.path.join('data', 'regr', 'info-' + name + '.csv') logging.info(f'Loading dataset {name}') X, d, e, split = read_data(infofile) n = len(X) logging.info(f'Loaded {n} samples') print(f'dataset: {name} with {n} samples, {d} features, and {e} target(s)') print(f'regularization lambda: {args.lambda_}') # split in train set and test set if split: # fixed split X1, X2 = X[:split], X[split:] else: # random split (all parties use same rnd) rnd = await mpc.transfer(random.randrange(2**31), senders=0) X1, X2 = sklearn.model_selection.train_test_split(X, train_size=0.7, random_state=rnd) del X X1, Y1 = X1[:, :d], X1[:, d:] X2, Y2 = X2[:, :d], X2[:, d:] n1 = len(X1) d = d + 1 # add (virtual) feature column X_d = [1, ..., 1] for vertical intercept # ridge regression "in the clear" ridge = sklearn.linear_model.Ridge(alpha=args.lambda_, fit_intercept=True, copy_X=True, solver='cholesky') ridge.fit(X1, Y1) error_train_skit = rmse(Y1, ridge.predict(X1)) error_test_skit = rmse(Y2, ridge.predict(X2)) print(f'scikit train error: {error_train_skit}') print(f'scikit test error: {error_test_skit}') if args.accuracy >= 0: alpha = args.accuracy range_alpha = range(alpha, alpha + 1) for alpha in range_alpha: # accuracy parameter print('accuracy alpha:', alpha) # set parameters accordingly beta = 2**alpha lambda_ = round(args.lambda_ * beta**2) gamma = n1 * beta**2 + lambda_ secint = mpc.SecInt(gamma.bit_length() + 1) print( f'secint prime size: |q| = {secint.field.modulus.bit_length()} bits' f' (secint bit length: {secint.bit_length})') bound = round(d**(d / 2)) * gamma**d secfld = mpc.SecFld(min_order=2 * bound + 1, signed=True) print( f'secfld prime size: |p| = {secfld.field.modulus.bit_length()} bits' ) f2 = float(beta) q = secint.field.modulus logging.info( 'Transpose, scale, and create (degree 0) shares for X and Y') # enforce full size shares (mod q numbers) by adding q to each element Xt = [[int(a * f2) + q for a in col] for col in X1.transpose()] Yt = [[int(a * f2) + q for a in col] for col in Y1.transpose()] timeStart = time.process_time() logging.info('Compute A = X^T X + lambda I and B = X^T Y') AB = [] for i in range(d - 1): xi = Xt[i] for j in range(i, d - 1): xj = Xt[j] s = 0 for k in range(n1): s += xi[k] * xj[k] AB.append(s) # X_i dot X_j AB.append(sum(xi) * beta) # X_i dot X_d for j in range(e): yj = Yt[j] s = 0 for k in range(n1): s += xi[k] * yj[k] AB.append(s) # X_i dot Y_j AB.append(n1 * beta**2) # X_d dot X_d for j in range(e): AB.append(beta * sum(Yt[j])) # X_d dot Y_j del Xt, Yt AB = [secint.field(a) for a in AB] AB = await mpc._reshare(AB) timeMiddle = time.process_time() logging.info('Compute w = A^-1 B') # convert secint to secfld AB = [secint(a) for a in AB] AB = mpc.convert(AB, secfld) # extract A and B from the AB array A = [[None] * d for _ in range(d)] B = [[None] * e for _ in range(d)] index = 0 for i in range(d): A[i][i] = AB[index] + lambda_ index += 1 for j in range(i + 1, d): A[i][j] = A[j][i] = AB[index] index += 1 for j in range(e): B[i][j] = AB[index] index += 1 # solve A w = B w_det = linear_solve(A, B) w_det = await mpc.output(w_det) w_det = list(map(int, w_det)) w = np.reshape(w_det[:-1], (d, e)) w /= w_det[-1] timeEnd = time.process_time() logging.info(f'Total time {timeEnd - timeStart} = ' f'A and B in {timeMiddle - timeStart} + ' f'A^-1 B in {timeEnd - timeMiddle} seconds') error_train_mpyc = rmse(Y1, np.dot(X1, w[:-1]) + w[-1]) error_test_mpyc = rmse(Y2, np.dot(X2, w[:-1]) + w[-1]) print(f'MPyC train error: {error_train_mpyc}') print(f'MPyC test error: {error_test_mpyc}') print( f'relative train error: {(error_train_mpyc - error_train_skit) / error_train_skit}' ) print( f'relative test error: {(error_test_mpyc - error_test_skit) / error_test_skit}' ) await mpc.shutdown()