def ciphertext_elections(draw: _DrawType, election_description: ElectionDescription): """ Generates a `CiphertextElectionContext` with a single public-private key pair as the encryption context. In a real election, the key ceremony would be used to generate a shared public key. :param draw: Hidden argument, used by Hypothesis. :param election_description: An `ElectionDescription` object, with which the `CiphertextElectionContext` will be associated :return: a tuple of a `CiphertextElectionContext` and the secret key associated with it """ secret_key, public_key = draw(elgamal_keypairs()) commitment_hash = draw(elements_mod_q_no_zero()) ciphertext_election_with_secret: CIPHERTEXT_ELECTIONS_TUPLE_TYPE = ( secret_key, make_ciphertext_election_context( number_of_guardians=1, quorum=1, elgamal_public_key=public_key, commitment_hash=commitment_hash, description_hash=election_description.crypto_hash(), ), ) return ciphertext_election_with_secret
def test_publish(self) -> None: # Arrange now = datetime.now(timezone.utc) manifest = Manifest("", ElectionType.unknown, now, now, [], [], [], [], [], []) context = make_ciphertext_election_context( 1, 1, ONE_MOD_P, ONE_MOD_Q, ONE_MOD_Q ) constants = ElectionConstants() devices = [] guardian_records = [GuardianRecord("", "", ONE_MOD_Q, [], [])] encrypted_ballots = [] spoiled_ballots = [] plaintext_tally = PlaintextTally("", []) ciphertext_tally = CiphertextTally("", manifest, context) # Act publish( manifest, context, constants, devices, encrypted_ballots, spoiled_ballots, ciphertext_tally.publish(), plaintext_tally, guardian_records, ) # Assert self.assertTrue(path.exists(RESULTS_DIR)) # Cleanup rmtree(RESULTS_DIR)
def test_publish(self) -> None: # Arrange now = datetime.now(timezone.utc) description = ElectionDescription("", ElectionType.unknown, now, now, [], [], [], [], [], []) context = make_ciphertext_election_context(1, 1, ONE_MOD_P, ONE_MOD_Q) constants = ElectionConstants() devices = [] coefficients = [CoefficientValidationSet("", [], [])] encrypted_ballots = [] spoiled_ballots = [] plaintext_tally = PlaintextTally("", [], []) ciphertext_tally = publish_ciphertext_tally( CiphertextTally("", description, context)) # Act publish( description, context, constants, devices, encrypted_ballots, spoiled_ballots, ciphertext_tally, plaintext_tally, coefficients, ) # Assert self.assertTrue(path.exists(RESULTS_DIR)) # Cleanup rmtree(RESULTS_DIR)
def build_election_context(request: ElectionContextRequest = Body(...)) -> Any: """ Build a CiphertextElectionContext for a given election """ description: ElectionDescription = ElectionDescription.from_json_object( request.description) elgamal_public_key: ElementModP = read_json_object( request.elgamal_public_key, ElementModP) number_of_guardians = request.number_of_guardians quorum = request.quorum context = make_ciphertext_election_context(number_of_guardians, quorum, elgamal_public_key, description.crypto_hash()) return write_json_object(context)
def ray_tally_everything( cvrs: DominionCSV, verbose: bool = True, use_progressbar: bool = True, date: Optional[datetime] = None, seed_hash: Optional[ElementModQ] = None, master_nonce: Optional[ElementModQ] = None, secret_key: Optional[ElementModQ] = None, root_dir: Optional[str] = None, ) -> "RayTallyEverythingResults": """ This top-level function takes a collection of Dominion CVRs and produces everything that we might want for arlo-e2e: a list of encrypted ballots, their encrypted and decrypted tally, and proofs of the correctness of the whole thing. The election `secret_key` is an optional parameter. If absent, a random keypair is generated and used. Similarly, if a `seed_hash` or `master_nonce` is not provided, random ones are generated and used. For parallelism, Ray is used. Make sure you've called `ray.init()` or `ray_localhost_init()` before calling this. If `root_dir` is specified, then the tally is written out to the specified directory, and the resulting `RayTallyEverythingResults` object will support the methods that allow those ballots to be read back in again. Conversely, if `root_dir` is `None`, then nothing is written to disk, and the result will not have access to individual ballots. """ rows, cols = cvrs.data.shape ray_wait_for_workers(min_workers=2) if date is None: date = datetime.now() if root_dir is not None: mkdir_helper(root_dir, num_retries=NUM_WRITE_RETRIES) r_manifest_aggregator = ManifestAggregatorActor.remote( root_dir) # type: ignore else: r_manifest_aggregator = None r_root_dir = ray.put(root_dir) start_time = timer() # Performance note: by using to_election_description_ray rather than to_election_description, we're # only getting back a list of dictionaries rather than a list of PlaintextBallots. We're pushing that # work out into the nodes, where it will run in parallel. The BallotPlaintextFactory wraps up all # the (immutable) state necessary to convert from these dicts to PlaintextBallots and is meant to # be sent to every node in the cluster. ed, bpf, ballot_dicts, id_map = cvrs.to_election_description_ray(date=date) setup_time = timer() num_ballots = len(ballot_dicts) assert num_ballots > 0, "can't have zero ballots!" log_and_print( f"ElectionGuard setup time: {setup_time - start_time: .3f} sec, {num_ballots / (setup_time - start_time):.3f} ballots/sec" ) keypair = (elgamal_keypair_random() if secret_key is None else elgamal_keypair_from_secret(secret_key)) assert keypair is not None, "unexpected failure with keypair computation" secret_key, public_key = keypair cec = make_ciphertext_election_context( number_of_guardians=1, quorum=1, elgamal_public_key=public_key, description_hash=ed.crypto_hash(), ) r_cec = ray.put(cec) ied = InternalElectionDescription(ed) r_ied = ray.put(ied) if seed_hash is None: seed_hash = rand_q() r_seed_hash = ray.put(seed_hash) r_keypair = ray.put(keypair) r_ballot_plaintext_factory = ray.put(bpf) if master_nonce is None: master_nonce = rand_q() nonces = Nonces(master_nonce) r_nonces = ray.put(nonces) nonce_indices = range(num_ballots) inputs = list(zip(ballot_dicts, nonce_indices)) batches = shard_list_uniform(inputs, BATCH_SIZE) num_batches = len(batches) log_and_print( f"Launching Ray.io remote encryption! (number of batches: {num_batches})" ) start_time = timer() progressbar = (ProgressBar({ "Ballots": num_ballots, "Tallies": num_ballots, "Iterations": 0, "Batch": 0, }) if use_progressbar else None) progressbar_actor = progressbar.actor if progressbar is not None else None batch_tallies: List[ObjectRef] = [] for batch in batches: if progressbar_actor: progressbar_actor.update_completed.remote("Batch", 1) num_ballots_in_batch = len(batch) sharded_inputs = shard_list_uniform(batch, BALLOTS_PER_SHARD) num_shards = len(sharded_inputs) partial_tally_refs = [ r_encrypt_and_write.remote( r_ied, r_cec, r_seed_hash, r_root_dir, r_manifest_aggregator, progressbar_actor, r_ballot_plaintext_factory, r_nonces, right_tuple_list(shard), *(left_tuple_list(shard)), ) for shard in sharded_inputs ] # log_and_print("Remote tallying.") btally = ray_tally_ballots(partial_tally_refs, BALLOTS_PER_SHARD, progressbar) batch_tallies.append(btally) # Each batch ultimately yields one partial tally; we add these up here at the # very end. If we have a million ballots and have batches of 10k ballots, this # would mean we'd have only 100 partial tallies. So, what's here works just fine. # If we wanted, we could certainly burn some scalar time and keep a running, # singular, partial tally. It's probably more important to push onward to the # next batch, so we can do as much work in parallel as possible. if len(batch_tallies) > 1: tally = ray.get(ray_tally_ballots(batch_tallies, 10, progressbar)) else: tally = ray.get(batch_tallies[0]) if progressbar: progressbar.close() assert tally is not None, "tally failed!" log_and_print("Tally decryption.") decrypted_tally: DECRYPT_TALLY_OUTPUT_TYPE = ray_decrypt_tally( tally, r_cec, r_keypair, seed_hash) log_and_print("Validating tally.") # Sanity-checking logic: make sure we don't have any unexpected keys, and that the decrypted totals # match up with the columns in the original plaintext data. tally_keys = set(decrypted_tally.keys()) expected_keys = set(id_map.keys()) assert tally_keys.issubset( expected_keys ), f"bad tally keys (actual keys: {sorted(tally_keys)}, expected keys: {sorted(expected_keys)})" for obj_id in decrypted_tally.keys(): cvr_sum = int(cvrs.data[id_map[obj_id]].sum()) decryption, proof = decrypted_tally[obj_id] assert cvr_sum == decryption, f"decryption failed for {obj_id}" final_manifest: Optional[Manifest] = None if root_dir is not None: final_manifest = ray.get(r_manifest_aggregator.result.remote()) assert isinstance( final_manifest, Manifest), "type error: bad result from manifest aggregation" # Assemble the data structure that we're returning. Having nonces in the ciphertext makes these # structures sensitive for writing out to disk, but otherwise they're ready to go. log_and_print("Constructing results.") reported_tally: Dict[str, SelectionInfo] = { k: SelectionInfo( object_id=k, encrypted_tally=tally[k], # we need to forcibly convert mpz to int here to make serialization work properly decrypted_tally=int(decrypted_tally[k][0]), proof=decrypted_tally[k][1], ) for k in tally.keys() } tabulate_time = timer() log_and_print( f"Encryption and tabulation: {rows} ballots, {rows / (tabulate_time - start_time): .3f} ballot/sec", verbose, ) return RayTallyEverythingResults( metadata=cvrs.metadata, cvr_metadata=cvrs.dataframe_without_selections(), election_description=ed, num_ballots=rows, manifest=final_manifest, tally=SelectionTally(reported_tally), context=cec, )
def fast_tally_everything( cvrs: DominionCSV, pool: Optional[Pool] = None, verbose: bool = True, date: Optional[datetime] = None, seed_hash: Optional[ElementModQ] = None, master_nonce: Optional[ElementModQ] = None, secret_key: Optional[ElementModQ] = None, use_progressbar: bool = True, ) -> FastTallyEverythingResults: """ This top-level function takes a collection of Dominion CVRs and produces everything that we might want for arlo-e2e: a list of encrypted ballots, their encrypted and decrypted tally, and proofs of the correctness of the whole thing. The election `secret_key` is an optional parameter. If absent, a random keypair is generated and used. Similarly, if a `seed_hash` or `master_nonce` is not provided, random ones are generated and used. For parallelism, a `multiprocessing.pool.Pool` may be provided, and should result in significant speedups on multicore computers. If absent, the computation will proceed sequentially. """ rows, cols = cvrs.data.shape if date is None: date = datetime.now() parse_time = timer() log_and_print(f"Rows: {rows}, cols: {cols}", verbose) ed, ballots, id_map = cvrs.to_election_description(date=date) assert len(ballots) > 0, "can't have zero ballots!" keypair = (elgamal_keypair_random() if secret_key is None else elgamal_keypair_from_secret(secret_key)) assert keypair is not None, "unexpected failure with keypair computation" secret_key, public_key = keypair # This computation exists only to cause side-effects in the DLog engine, so the lame nonce is not an issue. assert len(ballots) == get_optional( elgamal_encrypt(m=len(ballots), nonce=int_to_q_unchecked(3), public_key=public_key)).decrypt( secret_key), "got wrong ElGamal decryption!" dlog_prime_time = timer() log_and_print( f"DLog prime time (n={len(ballots)}): {dlog_prime_time - parse_time: .3f} sec", verbose, ) cec = make_ciphertext_election_context( number_of_guardians=1, quorum=1, elgamal_public_key=public_key, description_hash=ed.crypto_hash(), ) ied = InternalElectionDescription(ed) # REVIEW THIS: is this cryptographically sound? Is the seed_hash properly a secret? Should # it go in the output? The nonces are clearly secret. If you know them, you can decrypt. if seed_hash is None: seed_hash = rand_q() if master_nonce is None: master_nonce = rand_q() nonces: List[ElementModQ] = Nonces(master_nonce)[0:len(ballots)] # even if verbose is false, we still want to see the progress bar for the encryption cballots = fast_encrypt_ballots(ballots, ied, cec, seed_hash, nonces, pool, use_progressbar=use_progressbar) eg_encrypt_time = timer() log_and_print( f"Encryption time: {eg_encrypt_time - dlog_prime_time: .3f} sec", verbose) log_and_print( f"Encryption rate: {rows / (eg_encrypt_time - dlog_prime_time): .3f} ballot/sec", verbose, ) tally: TALLY_TYPE = fast_tally_ballots(cballots, pool) eg_tabulate_time = timer() log_and_print( f"Tabulation time: {eg_tabulate_time - eg_encrypt_time: .3f} sec", verbose) log_and_print( f"Tabulation rate: {rows / (eg_tabulate_time - eg_encrypt_time): .3f} ballot/sec", verbose, ) log_and_print( f"Encryption and tabulation: {rows} ballots / {eg_tabulate_time - dlog_prime_time: .3f} sec = {rows / (eg_tabulate_time - dlog_prime_time): .3f} ballot/sec", verbose, ) assert tally is not None, "tally failed!" if verbose: # pragma: no cover print("Decryption & Proofs: ") decrypted_tally: DECRYPT_TALLY_OUTPUT_TYPE = fast_decrypt_tally( tally, cec, keypair, seed_hash, pool, verbose) eg_decryption_time = timer() log_and_print( f"Decryption time: {eg_decryption_time - eg_tabulate_time: .3f} sec", verbose) log_and_print( f"Decryption rate: {len(decrypted_tally.keys()) / (eg_decryption_time - eg_tabulate_time): .3f} selection/sec", verbose, ) # Sanity-checking logic: make sure we don't have any unexpected keys, and that the decrypted totals # match up with the columns in the original plaintext data. for obj_id in decrypted_tally.keys(): assert obj_id in id_map, "object_id in results that we don't know about!" cvr_sum = int(cvrs.data[id_map[obj_id]].sum()) decryption, proof = decrypted_tally[obj_id] assert cvr_sum == decryption, f"decryption failed for {obj_id}" # Assemble the data structure that we're returning. Having nonces in the ciphertext makes these # structures sensitive for writing out to disk, but otherwise they're ready to go. reported_tally: Dict[str, SelectionInfo] = { k: SelectionInfo( object_id=k, encrypted_tally=tally[k], # we need to forcibly convert mpz to int here to make serialization work properly decrypted_tally=int(decrypted_tally[k][0]), proof=decrypted_tally[k][1], ) for k in tally.keys() } # strips the ballots of their nonces, which is important because those could allow for decryption accepted_ballots = [ciphertext_ballot_to_accepted(x) for x in cballots] return FastTallyEverythingResults( metadata=cvrs.metadata, cvr_metadata=cvrs.dataframe_without_selections(), election_description=ed, encrypted_ballot_memos={ ballot.object_id: make_memo_value(ballot) for ballot in accepted_ballots }, tally=SelectionTally(reported_tally), context=cec, )