Exemplo n.º 1
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    # Flags.
    hparam_dict = collections.OrderedDict([(name, FLAGS[name].value)
                                           for name in hparam_flags])
    for k in hparam_dict.keys():
        if hparam_dict[k] is None:
            hparam_dict[k] = 'None'
    for k, v in hparam_dict.items():
        print('{} : {} '.format(k, v))

    if FLAGS.invert_imagery_likelihood > 1.0:
        raise ValueError(
            'invert_imagery_likelihood cannot be greater than 1.0')
    if FLAGS.bad_accuracy_cutoff > 1.0:
        raise ValueError('bad_accuracy_cutoff cannot be greater than 1.0')
    if FLAGS.good_accuracy_cutoff > 1.0:
        raise ValueError('good_accuracy_cutoff cannot be greater than 1.0')

    # Training datasets.
    client_real_images_train_tff_data = (
        emnist_data_utils.create_real_images_tff_client_data('train'))

    print('There are %d unique clients.' %
          len(client_real_images_train_tff_data.client_ids))

    # Trained classifier model.
    classifier_model = ecm.get_trained_emnist_classifier_model()

    # Filter down to those client IDs that fall within some accuracy cutoff.
    bad_client_ids_inversion_map, good_client_ids_inversion_map = (
        _get_client_ids_meeting_condition(client_real_images_train_tff_data,
                                          FLAGS.bad_accuracy_cutoff,
                                          FLAGS.good_accuracy_cutoff,
                                          FLAGS.invert_imagery_likelihood,
                                          classifier_model))

    print(
        'There are %d unique clients meeting bad accuracy cutoff condition.' %
        len(bad_client_ids_inversion_map))
    print(
        'There are %d unique clients meeting good accuracy cutoff condition.' %
        len(good_client_ids_inversion_map))

    # Save selected client id dictionary to csv.
    with tf.io.gfile.GFile(FLAGS.path_to_save_bad_clients_csv, 'w') as csvfile:
        w = csv.writer(csvfile)
        for key, val in bad_client_ids_inversion_map.items():
            w.writerow([key, val])

    with tf.io.gfile.GFile(FLAGS.path_to_save_good_clients_csv,
                           'w') as csvfile:
        w = csv.writer(csvfile)
        for key, val in good_client_ids_inversion_map.items():
            w.writerow([key, val])

    print('CSV files with selected Federated EMNIST clients have been saved.')
Exemplo n.º 2
0
def _create_real_images_dataset_for_eval():
    """Returns a `tf.data.Dataset` of real images."""
    eval_tff_data = emnist_data_utils.create_real_images_tff_client_data(
        split='test')
    raw_data = eval_tff_data.create_tf_dataset_from_all_clients()

    return emnist_data_utils.preprocess_img_dataset(raw_data,
                                                    include_label=False,
                                                    batch_size=EVAL_BATCH_SIZE,
                                                    shuffle=True,
                                                    repeat=True)
Exemplo n.º 3
0
def _create_real_images_dataset_for_central(batchsize, cache_dir):
  """Returns a `tf.data.Dataset` of real images."""
  eval_tff_data = emnist_data_utils.create_real_images_tff_client_data(
      split='train', cache_dir = cache_dir)
  raw_data = eval_tff_data.create_tf_dataset_from_all_clients()

  return emnist_data_utils.preprocess_img_dataset(
      raw_data,
      include_label=False,
      batch_size=batchsize,
      shuffle=True,
      repeat=True)
Exemplo n.º 4
0
  def setUp(self):
    super().setUp()
    client_data = emnist_data_utils.create_real_images_tff_client_data(
        split='synthetic')
    images_ds = client_data.create_tf_dataset_for_client(
        client_data.client_ids[0])
    images_ds = emnist_data_utils.preprocess_img_dataset(
        images_ds, shuffle=False)
    images_ds_iterator = iter(images_ds)
    self.real_images = next(images_ds_iterator)

    np.random.seed(seed=123456)
    self.fake_images = tf.constant(
        np.random.random((32, 28, 28, 1)), dtype=tf.float32)
def _load_and_preprocess_datasets():
  """Load raw EMNIST data and preprocess images and labels."""
  emnist_train, emnist_test = (
      emnist_data_utils.create_real_images_tff_client_data())

  # Raw image datasets.
  train_dataset = emnist_train.create_tf_dataset_from_all_clients()
  test_dataset = emnist_test.create_tf_dataset_from_all_clients()

  # Preprocessed image datasets.
  preprocessed_train_dataset = emnist_data_utils.preprocess_img_dataset(
      train_dataset, include_label=True, batch_size=BATCH_SIZE, shuffle=True)
  preprocessed_test_dataset = emnist_data_utils.preprocess_img_dataset(
      test_dataset, include_label=True, batch_size=BATCH_SIZE, shuffle=False)

  return preprocessed_train_dataset, preprocessed_test_dataset
Exemplo n.º 6
0
def get_filtered_client_data_for_training(path_to_read_inversions_csv,
                                          path_to_read_example_indices_csv,
                                          batch_size, cache_dir):
    """Form ClientData using paths to pixel inversion, example selection data."""

    raw_client_data = emnist_data_utils.create_real_images_tff_client_data(
        'train', cache_dir=cache_dir)
    client_ids = raw_client_data.client_ids

    selected_client_ids_inversion_map = None
    client_ids_example_indices_map = None
    # If filter-by-user or filter-by-example, load the csv data into maps, and
    # update the client IDs to just the users that will be part of training.
    if path_to_read_inversions_csv is not None:
        selected_client_ids_inversion_map, client_ids_example_indices_map = (
            _get_client_ids_inversion_and_example_indices_maps(
                path_to_read_inversions_csv, path_to_read_example_indices_csv))
        client_ids = list(selected_client_ids_inversion_map.keys())

    def _get_dataset(client_id):
        """Retrieve/preprocess a tf.data.Dataset for a given client_id."""
        raw_ds = raw_client_data.create_tf_dataset_for_client(client_id)

        invert_imagery = False
        if selected_client_ids_inversion_map:
            invert_imagery = selected_client_ids_inversion_map[client_id]

        # If filter-by-example, do it here.
        if client_ids_example_indices_map:
            raw_ds = _filter_by_example(raw_ds, client_ids_example_indices_map,
                                        client_id)

        return emnist_data_utils.preprocess_img_dataset(
            raw_ds,
            invert_imagery=invert_imagery,
            include_label=False,
            batch_size=batch_size,
            shuffle=True,
            repeat=False)

    return tff.simulation.ClientData.from_clients_and_fn(
        client_ids, _get_dataset)
Exemplo n.º 7
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    invert_imagery_likelihood = FLAGS.invert_imagery_likelihood
    print('invert_imagery_likelihood is %s' % invert_imagery_likelihood)
    if invert_imagery_likelihood > 1.0:
        raise ValueError(
            'invert_imagery_likelihood cannot be greater than 1.0')

    # TFF Dataset.
    client_real_images_tff_data = (
        emnist_data_utils.create_real_images_tff_client_data(split='train'))
    print('There are %d unique clients.' %
          len(client_real_images_tff_data.client_ids))

    # EMNIST Classifier.
    classifier_model = ecm.get_trained_emnist_classifier_model()

    accuracy_list = []
    overall_total_count = 0
    overall_correct_count = 0
    for client_id in client_real_images_tff_data.client_ids:
        invert_imagery = (1 == np.random.binomial(n=1,
                                                  p=invert_imagery_likelihood))

        # TF Dataset for particular client.
        raw_images_ds = client_real_images_tff_data.create_tf_dataset_for_client(
            client_id)
        # Preprocess into format expected by classifier.
        images_ds = emnist_data_utils.preprocess_img_dataset(
            raw_images_ds,
            invert_imagery=invert_imagery,
            include_label=True,
            batch_size=None,
            shuffle=False,
            repeat=False)
        # Run classifier on all data on client, compute % classified correctly.
        total_count, correct_count = _analyze_classifier(
            images_ds, classifier_model)
        accuracy = float(correct_count) / float(total_count)
        accuracy_list.append(accuracy)

        overall_total_count += total_count
        overall_correct_count += correct_count

    # Calculate histogram.
    bin_width = 1
    histogram = _compute_histogram(accuracy_list, bin_width)
    print('\nHistogram:')
    print(histogram.numpy())
    # Sanity check (should be 3400)
    print('(Histogram sum):')
    print(sum(histogram.numpy()))

    # Calculate percentile values.
    percentile_25, percentile_75 = np.percentile(accuracy_list, q=(25, 75))
    print('\nPercentiles...')
    print('25th Percentile : %f' % percentile_25)
    print('75th Percentile : %f' % percentile_75)

    overall_accuracy = (float(overall_correct_count) /
                        float(overall_total_count))
    print('\nOverall classification success percentage: %d / %d (%f)' %
          (overall_correct_count, overall_total_count, overall_accuracy))
def main(argv):
  if len(argv) > 1:
    raise app.UsageError('Too many command-line arguments.')

  # Flags.
  hparam_dict = collections.OrderedDict([
      (name, FLAGS[name].value) for name in hparam_flags
  ])
  for k in hparam_dict.keys():
    if hparam_dict[k] is None:
      hparam_dict[k] = 'None'
  for k, v in hparam_dict.items():
    print('{} : {} '.format(k, v))

  if FLAGS.invert_imagery_likelihood > 1.0:
    raise ValueError('invert_imagery_likelihood cannot be greater than 1.0')

  # Training datasets.
  client_real_images_train_tff_data = (
      emnist_data_utils.create_real_images_tff_client_data('train'))

  print('There are %d unique clients.' %
        len(client_real_images_train_tff_data.client_ids))

  # Trained classifier model.
  classifier_model = ecm.get_trained_emnist_classifier_model()

  # Filter down to those client IDs that fall within some accuracy cutoff.
  (client_ids_with_correct_examples_map, client_ids_with_incorrect_examples_map,
   client_ids_correct_example_indices_map,
   client_ids_incorrect_example_indices_map) = (
       _get_client_ids_and_examples_based_on_classification(
           client_real_images_train_tff_data, FLAGS.min_num_examples,
           FLAGS.invert_imagery_likelihood, classifier_model))

  print('There are %d unique clients with at least %d correct examples.' %
        (len(client_ids_with_correct_examples_map), FLAGS.min_num_examples))
  print('There are %d unique clients with at least %d incorrect examples.' %
        (len(client_ids_with_incorrect_examples_map), FLAGS.min_num_examples))

  # Save client id dictionarys to csv.
  with tf.io.gfile.GFile(FLAGS.path_to_save_clients_with_correct_examples_csv,
                         'w') as csvfile:
    w = csv.writer(csvfile)
    for key, val in client_ids_with_correct_examples_map.items():
      w.writerow([key, val])

  with tf.io.gfile.GFile(FLAGS.path_to_save_clients_with_incorrect_examples_csv,
                         'w') as csvfile:
    w = csv.writer(csvfile)
    for key, val in client_ids_with_incorrect_examples_map.items():
      w.writerow([key, val])

  with tf.io.gfile.GFile(FLAGS.path_to_save_correct_example_indices_csv,
                         'w') as csvfile:
    w = csv.writer(csvfile)
    for key, val in client_ids_correct_example_indices_map.items():
      w.writerow([key, val])

  with tf.io.gfile.GFile(FLAGS.path_to_save_incorrect_example_indices_csv,
                         'w') as csvfile:
    w = csv.writer(csvfile)
    for key, val in client_ids_incorrect_example_indices_map.items():
      w.writerow([key, val])

  print('CSV files with selected Federated EMNIST clients and lists of '
        'classified/misclassified examples have been saved.')