Exemplo n.º 1
0
def train(model_path, data_dir, output_dir):
  """Trains a softmax regression model given data and embedding extractor.

  Args:
    model_path: string, path to embedding extractor.
    data_dir: string, directory that contains training data.
    output_dir: string, directory to save retrained tflite model and label map.
  """
  t0 = time.perf_counter()
  image_paths, labels, label_map = get_image_paths(data_dir)
  train_and_val_dataset, test_dataset = shuffle_and_split(image_paths, labels)
  # Initializes basic engine model here to avoid repeatedly initialization,
  # which is time consuming.
  engine = basic_engine.BasicEngine(model_path)
  print('Extract embeddings for data_train')
  train_and_val_dataset['data_train'] = extract_embeddings(
      train_and_val_dataset['data_train'], engine)
  print('Extract embeddings for data_val')
  train_and_val_dataset['data_val'] = extract_embeddings(
      train_and_val_dataset['data_val'], engine)
  t1 = time.perf_counter()
  print('Data preprocessing takes %.2f seconds' % (t1 - t0))

  # Construct model and start training
  weight_scale = 5e-2
  reg = 0.0
  feature_dim = train_and_val_dataset['data_train'].shape[1]
  num_classes = np.max(train_and_val_dataset['labels_train']) + 1
  model = SoftmaxRegression(
      feature_dim, num_classes, weight_scale=weight_scale, reg=reg)

  learning_rate = 1e-2
  batch_size = 100
  num_iter = 500
  model.train_with_sgd(
      train_and_val_dataset, num_iter, learning_rate, batch_size=batch_size)
  t2 = time.perf_counter()
  print('Training takes %.2f seconds' % (t2 - t1))

  # Append learned weights to input model and save as tflite format.
  out_model_path = os.path.join(output_dir, 'retrained_model_edgetpu.tflite')
  model.save_as_tflite_model(model_path, out_model_path)
  print('Model %s saved.' % out_model_path)
  label_map_path = os.path.join(output_dir, 'label_map.txt')
  save_label_map(label_map, label_map_path)
  print('Label map %s saved.' % label_map_path)
  t3 = time.perf_counter()
  print('Saving retrained model and label map takes %.2f seconds' % (t3 - t2))

  retrained_engine = basic_engine.BasicEngine(out_model_path)
  test_embeddings = extract_embeddings(test_dataset['data_test'],
                                       retrained_engine)
  saved_model_acc = np.mean(
      np.argmax(test_embeddings, axis=1) == test_dataset['labels_test'])
  print('Saved tflite model test accuracy: %.2f%%' % (saved_model_acc * 100))
  t4 = time.perf_counter()
  print('Checking test accuracy takes %.2f seconds' % (t4 - t3))
Exemplo n.º 2
0
def part_A_run(model_A_path, data_dir, output_dir):
    """Output the intermediate embeddings, true labels, and label map given data and embedding extractor part A.
  Args:
    model_A_path: string, path to embedding extractor part A.
    data_dir: string, directory that contains data.
    output_dir: string, directory to save the intermediate embeddings, true labels, and label map.
  """
    t0 = time.perf_counter()

    # Preprocess training (and validation) data
    image_paths, labels, label_map = get_image_paths(data_dir)
    train_and_val_dataset, test_dataset = shuffle_and_split(image_paths,
                                                            labels,
                                                            val_percent=0.1,
                                                            test_percent=0.1)
    # Initializes basic engine model here to avoid repeated initialization,
    # which is time consuming.
    engine = basic_engine.BasicEngine(model_A_path)
    print('Extract intermediate embeddings for data_train')
    train_and_val_dataset['data_train'] = extract_embeddings(
        train_and_val_dataset['data_train'], engine)
    print('Extract intermediate embeddings for data_val')
    train_and_val_dataset['data_val'] = extract_embeddings(
        train_and_val_dataset['data_val'], engine)
    print('Extract intermediate embeddings for data_test')
    test_dataset['data_test'] = extract_embeddings(test_dataset['data_test'],
                                                   engine)
    t1 = time.perf_counter()
    print('Data preprocessing takes %.2f seconds' % (t1 - t0))

    with open(os.path.join(output_dir, 'train_and_val_dataset.pickle'),
              'wb') as handle:
        pickle.dump(train_and_val_dataset,
                    handle,
                    protocol=pickle.HIGHEST_PROTOCOL)
    with open(os.path.join(output_dir, 'test_dataset.pickle'), 'wb') as handle:
        pickle.dump(test_dataset, handle, protocol=pickle.HIGHEST_PROTOCOL)
    save_label_map(label_map, os.path.join(output_dir, 'label_map.txt'))
    t2 = time.perf_counter()
    print(
        'Saving the intermediate embeddings, true labels, and label map takes %.2f seconds'
        % (t2 - t1))
Exemplo n.º 3
0
def part_B_run_and_train(model_B_path, data_dir, output_dir, scaling_factor, zero_point):
  """Trains a softmax regression model given the intermediate embeddings, true labels, label map and embedding extractor part B.
  Args:
    model_B_path: string, path to embedding extractor part B.
    data_dir: string, directory that contains the intermediate embeddings, true labels, label map.
    output_dir: string, directory to save retrained tflite model and label map.
  """
  t0 = time.perf_counter()

  # Load the the intermediate embeddings and true labels
  with open(os.path.join(data_dir, 'train_and_val_dataset.pickle'), 'rb') as handle:
    train_and_val_dataset = pickle.load(handle)
  with open(os.path.join(data_dir, 'test_dataset.pickle'), 'rb') as handle:
    test_dataset = pickle.load(handle)

  # Initializes basic engine model here to avoid repeated initialization,
  # which is time consuming.
  engine = basic_engine.BasicEngine(model_B_path)
  print('Extract embeddings for data_train')
  train_and_val_dataset['data_train'] = extract_final_embeddings(train_and_val_dataset['data_train'], engine, scaling_factor, zero_point)
  print('Extract embeddings for data_val')
  train_and_val_dataset['data_val'] = extract_final_embeddings(train_and_val_dataset['data_val'], engine, scaling_factor, zero_point)
  t1 = time.perf_counter()
  print('Data preprocessing takes %.2f seconds' % (t1 - t0))

  # Construct FC + softmax and start training
  weight_scale = 5e-2
  reg = 0.0
  feature_dim = train_and_val_dataset['data_train'].shape[1]
  print('feature_dim: %d'%feature_dim)
  num_classes = np.max(train_and_val_dataset['labels_train']) + 1
  print('num_classes: %d'%num_classes)
  model = SoftmaxRegression(feature_dim, num_classes, weight_scale=weight_scale, reg=reg)
  learning_rate = 1e-2
  batch_size = 100
  num_iter = 500
  label_map = " "
  with open(os.path.join(data_dir, 'label_map.txt'), 'r') as fp:
    label_map = fp.read()
  with open(os.path.join(data_dir, 'label_map.pickle'),'wb' ) as fp:
    pickle.dump(label_map, fp)
  model._load_ckpt(data_dir)
  model.train_with_sgd(train_and_val_dataset, num_iter, learning_rate, batch_size=batch_size, print_every=10)
  t2 = time.perf_counter()
  print('Training takes %.2f seconds' % (t2 - t1))

  # Append learned weights to input model part B and save as tflite format.
  out_model_path = os.path.join(output_dir, 'retrained_model_edgetpu.tflite')
  model.save_as_tflite_model(model_B_path, out_model_path)
  print('Model %s saved.' % out_model_path)
  os.system('mv {0} {1}'.format(os.path.join(data_dir, 'label_map.txt'), os.path.join(output_dir, 'label_map.txt')))
  print('Label map %s saved' % os.path.join(output_dir, 'label_map.txt'))
  t3 = time.perf_counter()
  print('Saving retrained model and label map takes %.2f seconds' % (t3 - t2))
  model._save_ckpt(output_dir)

  # Test
  retrained_engine = basic_engine.BasicEngine(out_model_path)
  test_embeddings = extract_final_embeddings(test_dataset['data_test'], retrained_engine, scaling_factor, zero_point)
  saved_model_acc = np.mean(np.argmax(test_embeddings, axis=1) == test_dataset['labels_test'])
  print(np.argmax(test_embeddings, axis=1))
  print(test_dataset['labels_test'])
  """
  check=[[0]*10 for i in range(10)]
  for i in range(10):
    recognized=False
    x = np.argmax(test_embeddings, axis=1)
    y = test_dataset['labels_test']
    for j in range(199):
      if x[j]==y[j]:
        recognized=True
        if recognized and x[j]==i:
          check[i][i] = check[i][i] + 1
      elif x[j]==i:
        check[y[j]][i] = check[y[j]][i] + 1
  print('--------------------')
  for i in range(10):
    print(check[i])
  """
  print('Saved tflite model test accuracy: %.2f%%' % (saved_model_acc * 100))
  t4 = time.perf_counter()
  print('Checking test accuracy takes %.2f seconds' % (t4 - t3))