def main(): # This may provide some performance boost os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Read the arguments to get them from a JSON configuration file args = parse_args() model = ExportedModel( os.path.join(args.experiment_dir, args.test_model_timestamp_directory), args.image_size, args.num_sample_points) test_dm_path = "/home/qiulin/VAE_DeepSDF/example/9c7b2ed3770d1a6ea6fee8e2140acec9_r_450001.png" test_dm = load_image(test_dm_path, [args.image_size[1], args.image_size[0]]) # construct point query matrix n = args.n_point_per_edge step = 1.0 / n coordinates = np.zeros((n, n, n, 3)) for i in range(n): for j in range(n): for k in range(n): coordinates[i, j, k, :] = np.array( [-0.5 + step * i, -0.5 + step * j, -0.5 + step * j]) coordinates = np.reshape(coordinates, (n * n * n, 3)) # inference from the model print('start to inference') out_sdf, out_scale, out_quaternion = model.predict(test_dm, coordinates) out_sdf = out_sdf.squeeze() out_sdf = np.reshape(out_sdf, (n, n, n)) #print(out_sdf) #np.save('./1a6f615e8b1b5ae4dbbc9440457e303e.npy', out_sdf) print('Success!') # call the sdf visualizer app = Application() #out_sdf = np.load('./example/model_sdf_128.npy').astype(np.float16) out_sdf = np.clip(out_sdf, -0.01, 0.01) print(np.max(-out_sdf)) print(np.min(-out_sdf)) out_vox = np.where(np.less_equal(np.abs(out_sdf), 0.03), np.ones_like(out_sdf), np.zeros_like(out_sdf)) drawer = volume_visualizer.voxel_drawer.VoxelDrawer(0.02) xx, yy, zz = np.where(out_vox) vox_pos = np.stack([xx, yy, zz], axis=-1) * 0.02 print(vox_pos.shape) drawer.set_data(vox_pos) # drawer = volume_visualizer.sdf_drawer.sdf_drawer_from_npy(-out_sdf) app.add_drawer(drawer) app.show()
def main(): # This may provide some performance boost os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' config = tf.ConfigProto() # config.gpu_options.allow_growth = True # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Read the arguments to get them from a JSON configuration file args = parse_args() # Call TFEstimator and pass the model function to it model = tf.estimator.Estimator( model_fn=deeplab_v3_plus_estimator_fn, model_dir=args.experiment_dir, params={ 'experiment_dir': args.experiment_dir, 'num_classes': args.num_classes, 'downsampling_factor': args.output_stride, 'width_multiplier': args.width_multiplier, 'weight_decay': args.weight_decay, 'dropout_keep_prob': 1.0, 'batchnorm': args.enable_batchnorm, 'batchnorm_decay': args.batchnorm_decay, 'initial_learning_rate': args.initial_learning_rate, 'final_learning_rate': args.final_learning_rate, 'learning_rate_power': args.learning_rate_decay_power, 'num_epochs': None, 'num_iterations': None, 'data_format': args.data_format, 'max_num_tensorboard_images': None, 'log_every': None, 'tensorboard_update_every': None, 'export': True }) # Export the model def serving_input_receiver_fn(): features = tf.placeholder(tf.float32, [None, *args.image_size], name='image_tensor') receiver_tensors = {'input': features} features = tf.map_fn( lambda image: mean_image_subtraction( image, args.dataset_mean_values), features) return tf.estimator.export.ServingInputReceiver( features=features, receiver_tensors=receiver_tensors) tf.logging.info("Exporting the model to {} ...".format( args.experiment_dir)) model.export_savedmodel(args.experiment_dir, serving_input_receiver_fn) tf.logging.info("Exported successfully!")
def main(): # This may provide some performance boost os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Read the arguments to get them from a JSON configuration file args = parse_args() model = ExportedModel( os.path.join(args.experiment_dir, args.test_model_timestamp_directory), args.image_size) cap = cv2.VideoCapture(0) # Allocation is done before the loop to make it as fast as possible output_frame = np.zeros((args.image_size[0], args.image_size[1] * 2, args.image_size[2])).astype(np.uint8) while True: # Capture frame-by-frame ret, frame = cap.read() if frame is None: raise ValueError( "Camera is not connected or not detected properly.") rgb_input = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Resize to the size provided in the config file rgb_input, predictions, predictions_decoded = model.predict(rgb_input) # add the watershed algorithm to locate each apple of the frame predictions_decoded, fruit_centers, fruit_size = watershed.fruit_center_size( predictions_decoded) print(fruit_centers) print(fruit_size) # Fast hack as stated before. Add both images to the width axis. output_frame[:, :args.image_size[1]] = rgb_input output_frame[:, args.image_size[1]:] = predictions_decoded cv2.imshow('window', cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR)) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def main(): # This may provide some performance boost os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Read the arguments to get them from a JSON configuration file args = parse_args() model = ExportedModel( os.path.join(args.experiment_dir, args.test_model_timestamp_directory), args.image_size) videogen = skvideo.io.vreader(args.test_video_file) videosav = skvideo.io.FFmpegWriter(args.output_video_file) # Allocation is done before the loop to make it as fast as possible dump_every = args.dump_frames_every output_frame = np.zeros( (dump_every, args.image_size[0], args.image_size[1] * 2, args.image_size[2])).astype(np.uint8) frame = 0 for rgb_input in tqdm(videogen): # Resize to the size provided in the config file rgb_input, predictions, predictions_decoded = model.predict(rgb_input) # add the watershed algorithm to locate each apple of the frame predictions_decoded, fruit_centers, radius, fruit_size = watershed.fruit_center_size( predictions_decoded) print(fruit_centers) print(fruit_size) # Fast hack as stated before. Add both images to the width axis. output_frame[frame, :, :args.image_size[1]] = rgb_input output_frame[frame, :, args.image_size[1]:] = predictions_decoded cv2.imshow('window', cv2.cvtColor(output_frame[frame], cv2.COLOR_RGB2BGR)) frame += 1 if frame == dump_every: if args.save_output_video: videosav.writeFrame(output_frame) frame = 0 if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() videosav.close()
- A .txt file listing the file names for the validation images and labels. """ import io import os import sys import cv2 from PIL import Image import tensorflow as tf import numpy as np from tqdm import tqdm from utils import dataset_util from utils.image_util import LABEL_COLORS from utils.generic_util import parse_args args = parse_args() # Enable only if validation data exists VALIDATION_EXISTS = args.VALIDATION_EXISTS # Path to the directory which will store the TFRecord train file TRAIN_TF_RECORD_NAME = args.TRAIN_TF_RECORD_NAME # Path to the directory which will store the TFRecord validation file VAL_TF_RECORD_NAME = args.VAL_TF_RECORD_NAME # Path to the file containing the training data TRAIN_DATA_LIST_NAME = args.TRAIN_DATA_LIST_NAME # Path to the file containing the validation data VAL_DATA_LIST_NAME = args.VAL_DATA_LIST_NAME # Path to the directory containing the training images IMAGE_DATA_DIR = args.IMAGE_DATA_DIR # Path to the directory containing the training labels LABEL_DATA_DIR = args.LABEL_DATA_DIR # Resize Image Height and Width
def main(): # This may provide some performance boost os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Read the arguments to get them from a JSON configuration file args = parse_args() # Call TFEstimator and pass the model function to it model = tf.estimator.Estimator( model_fn=VAE_deepSDF_estimator_fn, model_dir=args.experiment_dir, params={ 'experiment_dir': args.experiment_dir, 'pretrained_model_dir': args.pretrained_model_dir, 'num_sample_points': args.num_sample_points, 'delta': args.delta, 'initial_learning_rate': args.initial_learning_rate, 'final_learning_rate': args.final_learning_rate, 'learning_rate_power': args.learning_rate_decay_power, 'num_iterations': None, 'log_every': args.log_every, 'data_format': args.data_format, 'num_epochs': None, 'tensorboard_update_every': None, 'downsampling_factor': args.output_stride, 'width_multiplier': args.width_multiplier, 'weight_decay': args.weight_decay, 'dropout_keep_prob': args.dropout_keep_prob, 'batchnorm': args.enable_batchnorm, 'batchnorm_decay': args.batchnorm_decay, 'latent_dim': args.latent_dim, 'batch_size': args.batch_size, 'export': True }) # Export the model def serving_input_receiver_fn(): features = { 'depth_map': tf.placeholder(tf.float32, [None, args.image_size[0], args.image_size[1]], name='depth_map_tensor'), 'normal_map': tf.placeholder(tf.float32, [ None, args.image_size[0], args.image_size[1], args.image_size[2] ], name='normal_map_tensor'), 'foreground_map': tf.placeholder(tf.float32, [None, args.image_size[0], args.image_size[1]], name='foreground_map_tensor'), 'points': tf.placeholder(tf.float32, [None, args.num_sample_points, 3], name='points') } receiver_tensors = features return tf.estimator.export.ServingInputReceiver( features=features, receiver_tensors=receiver_tensors) tf.logging.info("Exporting the model to {} ...".format( args.experiment_dir)) model.export_savedmodel(args.experiment_dir, serving_input_receiver_fn) tf.logging.info("Exported successfully!")
def main(): # This may provide some performance boost os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.3 config.gpu_options.allow_growth = True # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Read the arguments to get them from a JSON configuration file args = parse_args() train_data_size = get_num_records(args.train_data_file) num_iterations = train_data_size // args.batch_size # Make args.log_every equal to -1 to print every epoch log_every = args.log_every if args.log_every != -1 else num_iterations # Make a RunConfig to save a checkpoint per training epoch run_config = tf.estimator.RunConfig().replace( save_checkpoints_steps=num_iterations, keep_checkpoint_max=args.max_num_checkpoints, log_step_count_steps=log_every, session_config=config) # Check if there is a checkpoint from which a model will be loaded or not warm_start_from = args.experiment_dir if tf.train.get_checkpoint_state( args.experiment_dir) is not None else None # Call TFEstimator and pass the model function to it model = tf.estimator.Estimator(model_fn=deeplab_v3_plus_estimator_fn, model_dir=args.experiment_dir, config=run_config, params={ 'experiment_dir': args.experiment_dir, 'pretrained_model_dir': args.pretrained_model_dir, 'num_classes': args.num_classes, 'downsampling_factor': args.output_stride, 'width_multiplier': args.width_multiplier, 'weight_decay': args.weight_decay, 'dropout_keep_prob': 1.0, 'batchnorm': args.enable_batchnorm, 'batchnorm_decay': args.batchnorm_decay, 'initial_learning_rate': args.initial_learning_rate, 'final_learning_rate': args.final_learning_rate, 'learning_rate_power': args.learning_rate_decay_power, 'num_epochs': args.num_epochs, 'num_iterations': num_iterations, 'data_format': args.data_format, 'max_num_tensorboard_images': args.max_num_tensorboard_images, 'log_every': log_every, 'tensorboard_update_every': args.tensorboard_update_every, 'dataset_mean_values': args.dataset_mean_values, }, warm_start_from=warm_start_from) if args.train_data_file == "" or args.val_data_file == "": raise ValueError("Train and Validation data files must exist") # Create a train specification object and evaluation specification object and pass the input function to both of them respectively aug_params = { 'aug_delta_brightness': args.aug_delta_brightness, 'aug_flip_left_right': args.aug_flip_left_right, "aug_delta_scale_pad_crop": args.aug_delta_scale_pad_crop, "dataset_mean_values": args.dataset_mean_values } train_spec = tf.estimator.TrainSpec( input_fn=lambda: input_fn_images_labels(args.train_data_file, args.image_size, args.batch_size, args.num_epochs, args.shuffle, args.buffer_size, is_training=True, aug_params=aug_params), max_steps=(args.num_epochs - 1) * num_iterations) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: input_fn_images_labels(args.val_data_file, args.image_size, args.batch_size, 1, args.shuffle, args.buffer_size, is_training=False, aug_params=aug_params), throttle_secs=1) tf.estimator.train_and_evaluate(model, train_spec=train_spec, eval_spec=eval_spec) tf.logging.info("Training completed successfully!")
def main(): # This may provide some performance boost os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Read the arguments to get them from a JSON configuration file args = parse_args() # initialize zed zed = sl.Camera() # Set configuration parameters init = sl.InitParameters() init.camera_resolution = sl.RESOLUTION.RESOLUTION_HD1080 init.depth_mode = sl.DEPTH_MODE.DEPTH_MODE_QUALITY init.coordinate_units = sl.UNIT.UNIT_MILLIMETER # Open the camera err = zed.open(init) if err != sl.ERROR_CODE.SUCCESS: print(repr(err)) zed.close() exit(1) # Set runtime parameters after opening the camera runtime = sl.RuntimeParameters() runtime.sensing_mode = sl.SENSING_MODE.SENSING_MODE_STANDARD # Prepare image size to retrieve images new_width = args.image_size[1] new_height = args.image_size[0] print("The depth range is from {0} to {1}".format( zed.get_depth_min_range_value(), zed.get_depth_max_range_value())) # Declare your sl.Mat matrices image_zed = sl.Mat(new_width, new_height, sl.MAT_TYPE.MAT_TYPE_8U_C4) point_cloud = sl.Mat() # Allocation is done before the loop to make it as fast as possible output_frame = np.zeros((args.image_size[0], args.image_size[1] * 2, args.image_size[2])).astype(np.uint8) model = ExportedModel( os.path.join(args.experiment_dir, args.test_model_timestamp_directory), args.image_size) # mode can be "center" or "median", in the median mode, number of samples should be specified mode = "median" num_sample = 10 while True: err = zed.grab(runtime) if err == sl.ERROR_CODE.SUCCESS: # Retrieve left image zed.retrieve_image(image_zed, sl.VIEW.VIEW_LEFT, sl.MEM.MEM_CPU, int(new_width), int(new_height)) # Retrieve colored point cloud. Point cloud is aligned on the left image. zed.retrieve_measure(point_cloud, sl.MEASURE.MEASURE_XYZRGBA, sl.MEM.MEM_CPU, int(new_width), int(new_height)) if image_zed is None: raise ValueError( "Camera is not connected or not detected properly.") frame = image_zed.get_data() # Resize to the size provided in the config file rgb_input = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) rgb_input, predictions, predictions_decoded = model.predict(rgb_input) # add the watershed algorithm to locate each apple of the frame predictions_decoded, fruit_centers, radius, fruit_size = watershed.fruit_center_size( predictions_decoded) if mode == "center": # calculate the depth of each fruit center based on the point clouds for i in range(len(fruit_centers)): distance = get_distance(fruit_centers[i][0], fruit_centers[i][1], point_cloud) if not np.isnan(distance) and not np.isinf(distance): distance = round(distance) fruit_centers[i] = (fruit_centers[i][0], fruit_centers[i][1], distance) cv2.putText(predictions_decoded, "{}mm".format(distance), (int(fruit_centers[i][0]) + 4, int(fruit_centers[i][1]) + 4), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 1) print("Distance to Camera at fruit {0}: {1} mm\n".format( i, distance)) else: fruit_centers[i] = (fruit_centers[i][0], fruit_centers[i][1], None) cv2.putText(predictions_decoded, "N/A", (int(fruit_centers[i][0]) + 4, int(fruit_centers[i][1]) + 4), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 1) print("Can't estimate distance at fruit {0}".format(i)) elif mode == "median": # calculate the depth of each fruit based on median for i in range(len(fruit_centers)): distance = get_sampled_distances(num_sample, fruit_centers[i], radius[i], point_cloud) if not len(distance) == 0: distance = round(np.median(distance)) fruit_centers[i] = (fruit_centers[i][0], fruit_centers[i][1], distance) cv2.putText(predictions_decoded, "{}mm".format(distance), (int(fruit_centers[i][0]) - 4, int(fruit_centers[i][1]) - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 1) print("Distance to Camera at fruit {0}: {1} mm\n".format( i, distance)) else: fruit_centers[i] = (fruit_centers[i][0], fruit_centers[i][1], None) cv2.putText(predictions_decoded, "N/A", (int(fruit_centers[i][0]) + 4, int(fruit_centers[i][1]) + 4), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 1) print("Can't estimate distance at fruit {0}".format(i)) # Fast hack as stated before. Add both images to the width axis. output_frame[:, :args.image_size[1]] = rgb_input output_frame[:, args.image_size[1]:] = predictions_decoded cv2.imshow('window', cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR)) if cv2.waitKey(1) & 0xFF == ord('q'): break sys.stdout.flush() # When everything done, release the capture cv2.destroyAllWindows() zed.close()