def tokenize_dataset(input_folder: str, output_folder: str, chunk_size: int): make_dir(output_folder) data_writers = { TRAIN: DataWriter(os.path.join(output_folder, TRAIN), file_prefix='data', file_suffix='jsonl.gz', chunk_size=chunk_size), VALID: DataWriter(os.path.join(output_folder, VALID), file_prefix='data', file_suffix='jsonl.gz', chunk_size=chunk_size), TEST: DataWriter(os.path.join(output_folder, TEST), file_prefix='data', file_suffix='jsonl.gz', chunk_size=chunk_size) } partition_counters = {TRAIN: Counter(), VALID: Counter(), TEST: Counter()} for i, (sample, partition) in enumerate(data_generator(input_folder)): data_writers[partition].add(sample) partition_counters[partition][sample[OUTPUT]] += 1 if (i + 1) % chunk_size == 0: print('Wrote {0} samples.'.format(i + 1), end='\r') print() for writer in data_writers.values(): writer.close() print(partition_counters)
def __init__(self, hyper_parameters: HyperParameters, save_folder: str, is_train: bool): self.hypers = hyper_parameters self.save_folder = save_folder self.metadata: Dict[str, Any] = dict() # Get the model output type self._output_type = OutputType[ self.hypers.model_params['output_type'].upper()] make_dir(self.save_folder) self.name = 'model' # Default name
def __init__(self, output_folder: str, file_prefix: str, file_suffix: str, chunk_size: int, mode: str = 'w'): self._output_folder = output_folder self._file_prefix = file_prefix self._file_suffix = file_suffix self._chunk_size = chunk_size # Initialize the data list self._dataset: List[Any] = [] # Create the output directory if necessary make_dir(self._output_folder) # Set the writing mode mode = mode.lower() if mode in ('w', 'write'): self._mode = WriteMode.WRITE elif mode in ('a', 'append'): self._mode = WriteMode.APPEND else: raise ValueError(f'Unknown writing mode: {mode}') # Set the initial file index self._file_index = 0 if self._mode == WriteMode.APPEND: # Regex to extract index from existing files file_name_regex = re.compile( f'{file_prefix}([0-9]+)\.{file_suffix}') # Get index from all existing files for file_name in os.listdir(output_folder): match = file_name_regex.match(file_name) if match is not None: index = int(match.group(1)) self._file_index = max(self._file_index, index + 1)
partition = TEST writers[partition].add(sample) label_counters[partition][sample[OUTPUT]] += 1 if (index + 1) % CHUNK_SIZE == 0: print('Completed {0} samples'.format(index + 1), end='\r') print() # Close all writers for writer in writers.values(): writer.close() print(label_counters) if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('--input-folder', type=str, required=True) parser.add_argument('--output-folder', type=str, required=True) args = parser.parse_args() random.seed(42) make_dir(args.output_folder) print('Starting Training Dataset...') write_dataset(args.input_folder, args.output_folder, series=TRAIN) print('Starting Test Dataset...') write_dataset(args.input_folder, args.output_folder, series=TEST)
parser.add_argument('--log-folder', type=str, required=True, help='Path to folder containing the simulation logs.') parser.add_argument('--power-system-type', type=str, choices=['bluetooth', 'temp'], required=True, help='The sensor type.') parser.add_argument('--output-folder', type=str, help='Path to the output folder.') args = parser.parse_args() output_folder = args.log_folder if args.output_folder is None else args.output_folder make_dir(output_folder) # We first copy all the non-SAMPLE models to the output folder. This is done for convenience for log_file_name in os.listdir(args.log_folder): if ('SAMPLE_RNN' not in log_file_name) or ('BUDGET_RNN' not in log_file_name): old_path = os.path.join(args.log_folder, log_file_name) new_path = os.path.join(output_folder, log_file_name) copyfile(old_path, new_path) # Restore the given models and get the validation results adaptive_model_accuracy: List[Dict[float, float]] = [] fixed_model_accuracy: List[Dict[float, float]] = [] adaptive_logs: List[Dict[str, Dict[str, Dict[str, Any]]]] = [] fixed_budget_logs: List[Dict[str, Dict[str, Dict[str, Any]]]] = []
def plot_and_save(sim_results: Dict[str, SimulationResult], runtime_systems: List[RuntimeSystem], output_folder: str, budget: float, max_time: int, noise_generator: NoiseGenerator, noise_terms: List[float], power_system_type: PowerType, should_plot: bool, save_plots: bool): # Make the output folder if necessary make_dir(output_folder) # Log the test results for each adaptive system model_names: Set[str] = set() system_dict = {system.name: system for system in runtime_systems} for system_name in sorted(sim_results.keys()): system = system_dict[system_name] sim_result = sim_results[system_name] # We compute the validation accuracy for this budget for the adaptive models. # This allows us to choose which backend model to select at testing time. if system.system_type == SystemType.ADAPTIVE: valid_accuracy = system.estimate_validation_results( budget=budget, max_time=max_time) else: valid_accuracy = None model_names.add(system_name.split()[0]) log_file_name = LOG_FILE_FMT.format(system.system_type.name.lower(), system.model_name, power_system_type.name.lower()) log_path = os.path.join(output_folder, log_file_name) save_test_log(accuracy=sim_result.accuracy[-1], power=sim_result.power[-1], valid_accuracy=valid_accuracy, budget=budget, key=str(noise_generator), system_name=system.name, output_file=log_path) print('{0} Accuracy: {1:.5f}, {0} Power: {2:.5f}'.format( system_name, sim_result.accuracy[-1], sim_result.power[-1])) if not should_plot: return # Filter the simulation results systems_to_keep = [] if baseline_to_plot in ('under_budget', 'all'): for name in model_names: systems_to_keep.append('{0} FIXED_UNDER_BUDGET'.format(name)) sim_results = { system_name: result for system_name, result in sim_results.items() if system_name in systems_to_keep or 'ADAPTIVE' in system_name or 'RANDOMIZED' in system_name } colors = { system_name: COLORS[i] for i, system_name in enumerate(sim_results.keys()) } # List of times for plotting times = np.arange(max_time) + 1 # Plot the results with plt.style.context('seaborn-ticks'): fig, (ax1, ax2, ax3) = plt.subplots(figsize=(16, 12), nrows=3, ncols=1, sharex=True) # Plot the energy noise terms ax1.plot(times, noise_terms, color='#e34a33') ax1.set_title('Per-Step Energy Noise') ax1.set_ylabel('Energy (mJ)') # Plot the Setpoints of each system for system_name, sim_result in sorted(sim_results.items()): if 'adaptive' in system_name.lower(): ax2.plot(times, sim_result.target_budgets * max_time, label=system_name, color=colors[system_name]) ax2.axhline(budget * max_time, color='k', linewidth=2) ax2.legend(fontsize=9) ax2.set_title('Budget Setpoint') ax2.set_ylabel('Energy (mJ)') # Plot the Moving Avg Power of each system for system_name, sim_result in sorted(sim_results.items()): avg_power = moving_avg_power(sim_result.energy, window=20) diff = len(times) - len(avg_power) ax3.plot(times[diff:], avg_power, label=system_name, color=colors[system_name]) ax3.axhline(budget, color='k', linewidth=2) ax3.legend(loc='lower center', fontsize=8) ax3.set_title('Moving Average Power for Each Policy') ax3.set_ylabel('Power (mW)') ax3.set_xlabel('Time') plt.tight_layout() if save_plots: output_file = os.path.join(output_folder, 'results_{0}.pdf'.format(budget)) plt.savefig(output_file) else: plt.show()
def split_dataset(input_folder: str, output_folder: str, fractions: List[float], file_prefix: str, chunk_size: int, file_type: str): assert len(fractions) == len( PARTITIONS ), f'Must provide enough fractions to account for all partitions' assert file_type in FILE_TYPES, f'Invalid file type: {file_type}' # Make output folder if necessary make_dir(output_folder) # Create the data manager data_manager = get_data_manager(input_folder, SAMPLE_ID, DATA_FIELDS, extension=file_type) data_manager.load() data_iterator = data_manager.iterate(should_shuffle=False, batch_size=chunk_size) num_samples = data_manager.length # Get folders for each partition train_folder = os.path.join(output_folder, TRAIN) valid_folder = os.path.join(output_folder, VALID) test_folder = os.path.join(output_folder, TEST) # Track counts per partition partition_counters: Counter = Counter() # Create data writers if file_type == 'npz': partition_writers = { TRAIN: NpzDataWriter(train_folder, file_prefix=file_prefix, file_suffix=file_type, chunk_size=chunk_size, sample_id_name=SAMPLE_ID, data_fields=DATA_FIELDS, mode='w'), VALID: NpzDataWriter(valid_folder, file_prefix=file_prefix, file_suffix=file_type, chunk_size=chunk_size, sample_id_name=SAMPLE_ID, data_fields=DATA_FIELDS, mode='w'), TEST: NpzDataWriter(test_folder, file_prefix=file_prefix, file_suffix=file_type, chunk_size=chunk_size, sample_id_name=SAMPLE_ID, data_fields=DATA_FIELDS, mode='w') } else: partition_writers = { TRAIN: DataWriter(train_folder, file_prefix=file_prefix, file_suffix=file_type, chunk_size=chunk_size, mode='w'), VALID: DataWriter(valid_folder, file_prefix=file_prefix, file_suffix=file_type, chunk_size=chunk_size, mode='w'), TEST: DataWriter(test_folder, file_prefix=file_prefix, file_suffix=file_type, chunk_size=chunk_size, mode='w') } # Write to chunked files for index, sample in enumerate(data_iterator): partition_index = get_partition_index(sample, fractions) partition_folder = PARTITIONS[partition_index] partition_writers[partition_folder].add(sample) partition_counters[partition_folder] += 1 if (index + 1) % chunk_size == 0: print(f'Completed {index + 1}/{num_samples} samples.', end='\r') print() # Flush any remaining data samples for writer in partition_writers.values(): writer.flush() # Print out metrics and save metadata print('====== RESULTS ======') total = sum(partition_counters.values()) metadata: Dict[str, Dict[str, float]] = dict() for series in PARTITIONS: count = partition_counters[series] frac = count / total metadata[series] = dict(count=count, frac=frac) print(f'{series.capitalize()}: {count} ({frac:.03f})') metadata_file = os.path.join(output_folder, 'metadata.json') save_by_file_suffix(metadata, metadata_file)
from camera import VideoStreamer, VideoWriter from abc import abstractmethod from math import pi import cv2 import cv2.aruco as aruco import numpy as np import os import time from utils import file_utils CALIBRATION_FILE = "camera_calibration/calibration_parameters/arducam.yaml" POSE_DIR = "marker_detection/logs/pose_data" file_utils.make_dir(POSE_DIR) POSE_FILE = file_utils.create_file_name_date( ) + ".txt" # Default pose file name DEFAULT_FREQ = 20 # Hz ''' Marker Tracker Classes These classes are used to track a marker using a single camera. They maintain various image and marker data, and can be used to retrieve the pose relative to the camera. Each class is used to detect a different kind of marker. The classes are derived from the VideoStreamer class, which is used to retrieve images from a camera or video file in a parallel thread.''' # Abstract Base Class. Each marker tracker needs to define marker length, pose, the current frame, # and some variables used for visualization. class MarkerTracker(VideoStreamer): def __init__(self, src=0, use_pi=-1, resolution=480,
params_files.extend(iterate_files(params_file, pattern=r'.*json')) else: params_files.append(params_file) for params_file in params_files: assert os.path.exists( params_file), f'The file {params_file} does not exist!' assert params_file.endswith( '.json'), f'The params file must be a JSON.' trials = max(args.trials, 1) num_models = trials * len(params_files) # Create save folder (if necessary) base_save_folder = args.save_folder make_dir(base_save_folder) # Create date-named folder for better organization current_day = datetime.now().strftime('%d_%m_%Y') save_folder = os.path.join(base_save_folder, current_day) make_dir(save_folder) for data_folder in args.data_folders: print(f'Started {data_folder}') print('====================') # Use absolute path to avoid issues with relative referencing during later optimization phases data_folder = os.path.abspath(data_folder) for trial in range(trials): print(f'Starting trial {trial+1}/{trials}')