def bench_cl_ul(name: str, xy_train, xy_val, xy_test, epochs: int, use_coordinator: bool): start = time.time() if use_coordinator: hist, _, _, loss, acc = run.federated_training("blog_cnn", [xy_train], xy_val, xy_test, R=epochs, E=1, C=0, B=B) else: hist, loss, acc = run.unitary_training("blog_cnn", xy_train, xy_val, xy_test, E=epochs, B=B) end = time.time() # Write results JSON results = { "name": name, "start": start, "end": end, "duration": end - start, "E": epochs, "B": B, "unitary_learning": { "loss": float(loss), "acc": float(acc), "hist": hist }, } storage.write_json(results, fname=name + "-results.json")
def save_splits(dataset_name: str, dataset: FederatedDataset, local_generator_dir: str): fname_ndarray_tuple = dataset_to_fname_ndarray_tuple_list(dataset) dataset_dir = get_dataset_dir( dataset_name=dataset_name, local_generator_dir=local_generator_dir ) logging.info("Storing dataset in {}".format(dataset_dir)) split_hashes: Dict[str, List[Optional[str]]] = {} for fname, ndarr in fname_ndarray_tuple: sha1cs = save(fname=fname, data=ndarr, storage_dir=dataset_dir) storage_key = fname[2:-4] if storage_key not in split_hashes: split_hashes[storage_key] = [None, None] split_hashes[storage_key][0 if "x_" in fname else 1] = sha1cs hash_file = os.path.join(dataset_dir, f"../../{dataset_name}.json") storage.write_json(split_hashes, hash_file) logging.info("{} generated and stored\n".format(dataset_name))
def main(_): # Set exit callback if FLAGS.push_results: atexit.register(after_main, group_name=FLAGS.group_name, task_name=FLAGS.task_name) # Load data xy_train_partitions, xy_val, xy_test = load_splits(FLAGS.dataset) # Execute training start = time.time() partition_id = FLAGS.partition_id hist_metrics = None # For unitary training if partition_id is not None: # Use only a single partition if required (unitary) hist, loss, acc = run.unitary_training( model_name=FLAGS.model, xy_train=xy_train_partitions[partition_id], xy_val=xy_val, xy_test=xy_test, E=FLAGS.E, B=FLAGS.B, ) else: hist, _, hist_metrics, loss, acc = run.federated_training( model_name=FLAGS.model, xy_train_partitions=xy_train_partitions, xy_val=xy_val, xy_test=xy_test, R=FLAGS.R, E=FLAGS.E, C=FLAGS.C, B=FLAGS.B, ) end = time.time() # Write results res = { "group_name": FLAGS.group_name, "task_name": FLAGS.task_name, "task_label": FLAGS.task_label, "dataset": FLAGS.dataset, "model": FLAGS.model, "R": FLAGS.R, "E": FLAGS.E, "C": FLAGS.C, "B": FLAGS.B, "partition_id": partition_id, "start": start, "end": end, "duration": end - start, "loss": float(loss), "acc": float(acc), "hist": hist, "hist_metrics": hist_metrics, } storage.write_json(res, fname="results.json")
def benchmark_evolutionary_avg(): fn_name = benchmark_evolutionary_avg.__name__ logging.info("Starting {}".format(fn_name)) # Load dataset xy_parts, xy_val, xy_test = load_splits("fashion-mnist-100p-noniid-03cpp") # Run Federated Learning with evolutionary aggregation evaluator = Evaluator(orig_cnn_compiled(), xy_val) # FIXME refactor aggregator = EvoAgg(evaluator) _, _, _, loss_a, acc_a = run.federated_training( "blog_cnn", xy_parts, xy_val, xy_test, R=DEFAULT_R, E=DEFAULT_E, C=DEFAULT_C, B=DEFAULT_B, aggregator=aggregator, ) # Run Federated Learning with weighted average aggregation _, _, _, loss_b, acc_b = run.federated_training( "blog_cnn", xy_parts, xy_val, xy_test, R=DEFAULT_R, E=DEFAULT_E, C=DEFAULT_C, B=DEFAULT_B, ) # Write results JSON results = {} results["loss_a"] = float(loss_a) results["acc_a"] = float(acc_a) results["loss_b"] = float(loss_b) results["acc_b"] = float(acc_b) # TODO add histories storage.write_json(results, fname="EA-WA-results.json")
def run_benchmark(benchmark_name: str): logging.info(f"Building Docker image for benchmark {benchmark_name}") logging.info(f"Starting benchmark {benchmark_name}") benchmark = benchmarks[benchmark_name] group_name = FLAGS.group_name or f"{strftime('%Y%m%dT%H%M')}_{benchmark_name}" task_names = {task.name for task in benchmark.tasks} assert len(task_names) == len(benchmark.tasks), "Duplicate task names" should_push = benchmark.runner == "ec2" docker_image_name = docker.build(should_push=should_push) # TODO Initiate tasks in parallel for task in benchmark.tasks: model_name = task.model_name dataset_name = task.dataset_name run_task( docker_image_name=docker_image_name, group_name=group_name, task_name=task.name, task_label=task.label, model=model_name, dataset=dataset_name, R=task.R, E=task.E, C=task.C, B=task.B, partition_id=task.partition_id, instance_cores=task.instance_cores, timeout=task.timeout, runner=benchmark.runner, ) with TemporaryDirectory() as tmpdir: fname = os.path.join(tmpdir, "config.json") data = {"aggregation_name": benchmark.aggregation_name} storage.write_json(data, fname) results.push(group_name=group_name, task_name="", output_dir=tmpdir)
def unitary_versus_federated( benchmark_name: str, model_name: str, dataset_name: str, R: int = DEFAULT_R, E: int = DEFAULT_E, C: float = DEFAULT_C, B: int = DEFAULT_B, ): """ :param C: Fraction of participants used in each round of training """ logging.info(f"Starting {benchmark_name}") xy_train_partitions, xy_val, xy_test = load_splits(dataset_name) start = time.time() # Train CNN on a single partition ("unitary learning") # TODO train n models on all partitions partition_id = 0 xy_train = xy_train_partitions[partition_id] logging.info(f"Run unitary training using partition {partition_id}") ul_hist, ul_loss, ul_acc = unitary_training(model_name, xy_train, xy_val, xy_test, E=R * E, B=B) # Train CNN using federated learning on all partitions logging.info("Run federated learning using all partitions") fl_hist, _, _, fl_loss, fl_acc = federated_training(model_name, xy_train_partitions, xy_val, xy_test, R=R, E=E, C=C, B=B) end = time.time() # Write results JSON results = { "name": benchmark_name, "start": start, "end": end, "duration": end - start, "R": R, "E": E, "C": C, "B": B, "unitary_learning": { "loss": float(ul_loss), "acc": float(ul_acc), "hist": ul_hist, }, "federated_learning": { "loss": float(fl_loss), "acc": float(fl_acc), "hist": fl_hist, }, } storage.write_json(results, fname="results.json") # Plot results # TODO include aggregated participant histories in plot plot_data: List[Tuple[str, List[float], Optional[List[int]]]] = [ ( "Unitary Learning", ul_hist["val_acc"], [i for i in range(1, len(ul_hist["val_acc"]) + 1, 1)], ), ( "Federated Learning", fl_hist["val_acc"], [i for i in range(E, len(fl_hist["val_acc"]) * E + 1, E)], ), ] # FIXME use different filenames for different datasets task_accuracies.plot(plot_data, fname="plot.png")