def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') # Flags. hparam_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) for k in hparam_dict.keys(): if hparam_dict[k] is None: hparam_dict[k] = 'None' for k, v in hparam_dict.items(): print('{} : {} '.format(k, v)) if FLAGS.invert_imagery_likelihood > 1.0: raise ValueError( 'invert_imagery_likelihood cannot be greater than 1.0') if FLAGS.bad_accuracy_cutoff > 1.0: raise ValueError('bad_accuracy_cutoff cannot be greater than 1.0') if FLAGS.good_accuracy_cutoff > 1.0: raise ValueError('good_accuracy_cutoff cannot be greater than 1.0') # Training datasets. client_real_images_train_tff_data = ( emnist_data_utils.create_real_images_tff_client_data('train')) print('There are %d unique clients.' % len(client_real_images_train_tff_data.client_ids)) # Trained classifier model. classifier_model = ecm.get_trained_emnist_classifier_model() # Filter down to those client IDs that fall within some accuracy cutoff. bad_client_ids_inversion_map, good_client_ids_inversion_map = ( _get_client_ids_meeting_condition(client_real_images_train_tff_data, FLAGS.bad_accuracy_cutoff, FLAGS.good_accuracy_cutoff, FLAGS.invert_imagery_likelihood, classifier_model)) print( 'There are %d unique clients meeting bad accuracy cutoff condition.' % len(bad_client_ids_inversion_map)) print( 'There are %d unique clients meeting good accuracy cutoff condition.' % len(good_client_ids_inversion_map)) # Save selected client id dictionary to csv. with tf.io.gfile.GFile(FLAGS.path_to_save_bad_clients_csv, 'w') as csvfile: w = csv.writer(csvfile) for key, val in bad_client_ids_inversion_map.items(): w.writerow([key, val]) with tf.io.gfile.GFile(FLAGS.path_to_save_good_clients_csv, 'w') as csvfile: w = csv.writer(csvfile) for key, val in good_client_ids_inversion_map.items(): w.writerow([key, val]) print('CSV files with selected Federated EMNIST clients have been saved.')
def _create_real_images_dataset_for_eval(): """Returns a `tf.data.Dataset` of real images.""" eval_tff_data = emnist_data_utils.create_real_images_tff_client_data( split='test') raw_data = eval_tff_data.create_tf_dataset_from_all_clients() return emnist_data_utils.preprocess_img_dataset(raw_data, include_label=False, batch_size=EVAL_BATCH_SIZE, shuffle=True, repeat=True)
def _create_real_images_dataset_for_central(batchsize, cache_dir): """Returns a `tf.data.Dataset` of real images.""" eval_tff_data = emnist_data_utils.create_real_images_tff_client_data( split='train', cache_dir = cache_dir) raw_data = eval_tff_data.create_tf_dataset_from_all_clients() return emnist_data_utils.preprocess_img_dataset( raw_data, include_label=False, batch_size=batchsize, shuffle=True, repeat=True)
def setUp(self): super().setUp() client_data = emnist_data_utils.create_real_images_tff_client_data( split='synthetic') images_ds = client_data.create_tf_dataset_for_client( client_data.client_ids[0]) images_ds = emnist_data_utils.preprocess_img_dataset( images_ds, shuffle=False) images_ds_iterator = iter(images_ds) self.real_images = next(images_ds_iterator) np.random.seed(seed=123456) self.fake_images = tf.constant( np.random.random((32, 28, 28, 1)), dtype=tf.float32)
def _load_and_preprocess_datasets(): """Load raw EMNIST data and preprocess images and labels.""" emnist_train, emnist_test = ( emnist_data_utils.create_real_images_tff_client_data()) # Raw image datasets. train_dataset = emnist_train.create_tf_dataset_from_all_clients() test_dataset = emnist_test.create_tf_dataset_from_all_clients() # Preprocessed image datasets. preprocessed_train_dataset = emnist_data_utils.preprocess_img_dataset( train_dataset, include_label=True, batch_size=BATCH_SIZE, shuffle=True) preprocessed_test_dataset = emnist_data_utils.preprocess_img_dataset( test_dataset, include_label=True, batch_size=BATCH_SIZE, shuffle=False) return preprocessed_train_dataset, preprocessed_test_dataset
def get_filtered_client_data_for_training(path_to_read_inversions_csv, path_to_read_example_indices_csv, batch_size, cache_dir): """Form ClientData using paths to pixel inversion, example selection data.""" raw_client_data = emnist_data_utils.create_real_images_tff_client_data( 'train', cache_dir=cache_dir) client_ids = raw_client_data.client_ids selected_client_ids_inversion_map = None client_ids_example_indices_map = None # If filter-by-user or filter-by-example, load the csv data into maps, and # update the client IDs to just the users that will be part of training. if path_to_read_inversions_csv is not None: selected_client_ids_inversion_map, client_ids_example_indices_map = ( _get_client_ids_inversion_and_example_indices_maps( path_to_read_inversions_csv, path_to_read_example_indices_csv)) client_ids = list(selected_client_ids_inversion_map.keys()) def _get_dataset(client_id): """Retrieve/preprocess a tf.data.Dataset for a given client_id.""" raw_ds = raw_client_data.create_tf_dataset_for_client(client_id) invert_imagery = False if selected_client_ids_inversion_map: invert_imagery = selected_client_ids_inversion_map[client_id] # If filter-by-example, do it here. if client_ids_example_indices_map: raw_ds = _filter_by_example(raw_ds, client_ids_example_indices_map, client_id) return emnist_data_utils.preprocess_img_dataset( raw_ds, invert_imagery=invert_imagery, include_label=False, batch_size=batch_size, shuffle=True, repeat=False) return tff.simulation.ClientData.from_clients_and_fn( client_ids, _get_dataset)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') invert_imagery_likelihood = FLAGS.invert_imagery_likelihood print('invert_imagery_likelihood is %s' % invert_imagery_likelihood) if invert_imagery_likelihood > 1.0: raise ValueError( 'invert_imagery_likelihood cannot be greater than 1.0') # TFF Dataset. client_real_images_tff_data = ( emnist_data_utils.create_real_images_tff_client_data(split='train')) print('There are %d unique clients.' % len(client_real_images_tff_data.client_ids)) # EMNIST Classifier. classifier_model = ecm.get_trained_emnist_classifier_model() accuracy_list = [] overall_total_count = 0 overall_correct_count = 0 for client_id in client_real_images_tff_data.client_ids: invert_imagery = (1 == np.random.binomial(n=1, p=invert_imagery_likelihood)) # TF Dataset for particular client. raw_images_ds = client_real_images_tff_data.create_tf_dataset_for_client( client_id) # Preprocess into format expected by classifier. images_ds = emnist_data_utils.preprocess_img_dataset( raw_images_ds, invert_imagery=invert_imagery, include_label=True, batch_size=None, shuffle=False, repeat=False) # Run classifier on all data on client, compute % classified correctly. total_count, correct_count = _analyze_classifier( images_ds, classifier_model) accuracy = float(correct_count) / float(total_count) accuracy_list.append(accuracy) overall_total_count += total_count overall_correct_count += correct_count # Calculate histogram. bin_width = 1 histogram = _compute_histogram(accuracy_list, bin_width) print('\nHistogram:') print(histogram.numpy()) # Sanity check (should be 3400) print('(Histogram sum):') print(sum(histogram.numpy())) # Calculate percentile values. percentile_25, percentile_75 = np.percentile(accuracy_list, q=(25, 75)) print('\nPercentiles...') print('25th Percentile : %f' % percentile_25) print('75th Percentile : %f' % percentile_75) overall_accuracy = (float(overall_correct_count) / float(overall_total_count)) print('\nOverall classification success percentage: %d / %d (%f)' % (overall_correct_count, overall_total_count, overall_accuracy))
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') # Flags. hparam_dict = collections.OrderedDict([ (name, FLAGS[name].value) for name in hparam_flags ]) for k in hparam_dict.keys(): if hparam_dict[k] is None: hparam_dict[k] = 'None' for k, v in hparam_dict.items(): print('{} : {} '.format(k, v)) if FLAGS.invert_imagery_likelihood > 1.0: raise ValueError('invert_imagery_likelihood cannot be greater than 1.0') # Training datasets. client_real_images_train_tff_data = ( emnist_data_utils.create_real_images_tff_client_data('train')) print('There are %d unique clients.' % len(client_real_images_train_tff_data.client_ids)) # Trained classifier model. classifier_model = ecm.get_trained_emnist_classifier_model() # Filter down to those client IDs that fall within some accuracy cutoff. (client_ids_with_correct_examples_map, client_ids_with_incorrect_examples_map, client_ids_correct_example_indices_map, client_ids_incorrect_example_indices_map) = ( _get_client_ids_and_examples_based_on_classification( client_real_images_train_tff_data, FLAGS.min_num_examples, FLAGS.invert_imagery_likelihood, classifier_model)) print('There are %d unique clients with at least %d correct examples.' % (len(client_ids_with_correct_examples_map), FLAGS.min_num_examples)) print('There are %d unique clients with at least %d incorrect examples.' % (len(client_ids_with_incorrect_examples_map), FLAGS.min_num_examples)) # Save client id dictionarys to csv. with tf.io.gfile.GFile(FLAGS.path_to_save_clients_with_correct_examples_csv, 'w') as csvfile: w = csv.writer(csvfile) for key, val in client_ids_with_correct_examples_map.items(): w.writerow([key, val]) with tf.io.gfile.GFile(FLAGS.path_to_save_clients_with_incorrect_examples_csv, 'w') as csvfile: w = csv.writer(csvfile) for key, val in client_ids_with_incorrect_examples_map.items(): w.writerow([key, val]) with tf.io.gfile.GFile(FLAGS.path_to_save_correct_example_indices_csv, 'w') as csvfile: w = csv.writer(csvfile) for key, val in client_ids_correct_example_indices_map.items(): w.writerow([key, val]) with tf.io.gfile.GFile(FLAGS.path_to_save_incorrect_example_indices_csv, 'w') as csvfile: w = csv.writer(csvfile) for key, val in client_ids_incorrect_example_indices_map.items(): w.writerow([key, val]) print('CSV files with selected Federated EMNIST clients and lists of ' 'classified/misclassified examples have been saved.')