def stochastic_weight_average(self): experiment_set = self.dataset.experiments[self.train_set_name] set_name = self.train_set_name.lower() has_run_average = self.graphs[ set_name].update_stochastic_weighted_average_parameters( ) # noqa: F841 # Think this through - we can probably skip this step but it doesn't harm anything # if not has_run_average: # return False self.graphs[set_name].prepare_for_batchnorm_update() self.graphs[set_name].train() experiment_set.reset_epoch() while True: Console_UI().inform_user('==>updating batchnorm') i = 0 for batch in experiment_set: i += 1 if i % 100 == 1: Console_UI().inform_user( f'Updating batchnorm for {self.get_name()}, doing {self.train_set_name} on step {i}' ) if batch is None: self.graphs[set_name].finish_batchnorm_update() return self.graphs[set_name].update_batchnorm(batch)
def load_pytorch_neural_net(self, neural_net_name: str): current_run_last_save = self.get_network_full_path( neural_net_name=neural_net_name, scene_name='last') if os.path.exists(current_run_last_save): Console_UI().inform_user( f'Resuming current runs {neural_net_name:>90}::last network') return torch.load(current_run_last_save)['state_dict'] if self.resume_prefix is not None: scene_name = self.global_cfgs.get('resume_scene') network_filename = self.get_network_filename( neural_net_name=neural_net_name, scene_name=scene_name) neural_net_path = os.path.join(self.scenario_log_root, self.resume_prefix, 'neural_nets', network_filename) if os.path.exists(neural_net_path): Console_UI().inform_user( f'Resuming from {self.resume_prefix} the network {network_filename}' ) return torch.load(neural_net_path)['state_dict'] if self.model_zoo_root is not None: model_zoo_neural_net_path = os.path.join( self.model_zoo_root, '%s.t7' % (neural_net_name)) if (os.path.exists(model_zoo_neural_net_path)): Console_UI().inform_user( f'loading from model_zoo {model_zoo_neural_net_path}') return torch.load(model_zoo_neural_net_path)['state_dict'] if not self.global_cfgs.get('silent_init_info'): Console_UI().inform_user( f'{neural_net_name} does not exist, Initializing from scratch') return None
def __init__( self, neural_net_name, neural_net_cfgs, layers, optimizer_type: str = 'sgd', input_name: str = '', output_name: str = '', input_shape: list = [], output_shape: list = [], load_from_batch=True, add_noise=False, ): super().__init__() self.neural_net_name = neural_net_name self.neural_net_cfgs = neural_net_cfgs self.input_name = input_name self.output_name = output_name self.input_shape = input_shape self.output_shape = output_shape self.layers = layers self.optimizer_type = optimizer_type self.add_noise = add_noise self.load_from_batch = load_from_batch self.weighted_average_parameters = None self.weighted_average_parameters_counter = 0 self.batch_norm_update_counter = 0 self.momenta = {} if self.load_from_batch: self.forward = self.forward_from_batch else: self.forward = self.forward_data if Global_Cfgs().get('DEVICE_BACKEND') == 'cuda': self.layers.cuda() self.layers = nn.DataParallel(layers) self.network_memory_usage = None try: self.network_memory_usage = summarizeModelSize( model=layers, input_size=(*self.input_shape, ), device=Global_Cfgs().get('DEVICE_BACKEND'), ) except Exception as e: Console_UI().warn_user( f'Failed to get size for {neural_net_name}: {e}') pass Console_UI().debug(self.layers) self.optimizer = None self.optimizer = self.get_optimizer() self.load()
def validate( self, iteration_counter, scene_name: str, set_name=None, ): if set_name is None: set_name = self.val_set_name if set_name not in self.dataset.experiments: raise ValueError(f'The set "{set_name}" cannot be found in data') experiment_set = self.dataset.experiments[set_name] Console_UI().inform_user(f'Validating {self.get_name()}: {set_name}') bar = ProgressBar(total=len(experiment_set)) for batch in experiment_set: if batch is None: bar.done() break bar.current += 1 bar() batch.update({ 'epoch': self.epoch, # 'graph_name': self.graphs[self.train_set_name.lower()].get_name(), 'graph_name': self.graphs[set_name.lower()].get_name(), 'task_name': self.get_name(), 'iteration_counter': iteration_counter, }) self.graphs[set_name].eval(batch) self.end_epoch(set_name, scene_name=scene_name)
def setup_annotations(self): rel_path = self.get_cfgs('annotations_path') fm = File_Manager() self.annotations = fm.read_csv_annotations( dataset_name=self.dataset_name, annotations_rel_path=rel_path, multi_view_per_sample=self.multi_view_per_sample, ) if self.annotations is None: annotations_url = self.get_cfgs('annotations_url') available_csvs_str = '\', \''.join( fm.get_available_csvs(self.dataset_name)) Console_UI().inform_user( '"%s" does not exist among the available datasets: \'%s\'.\nDownloading from:\n %s' % (rel_path, available_csvs_str, annotations_url)) fm.download_zip_file( url=annotations_url, dataset_name=self.get_dataset_name(), ) self.annotations = fm.read_csv_annotations( dataset_name=self.dataset_name, annotations_rel_path=rel_path, multi_view_per_sample=self.multi_view_per_sample, ) if self.get_cfgs('test_run'): self.annotations = self.annotations[ self.annotations.index.get_level_values(0) < 100]
def __init__( self, annotations_root, scenario_log_root, tmp_root, model_zoo_root, resume_prefix, resume_scene, log_folder, global_cfgs, *args, **kwargs, ): super().__init__(*args, **kwargs) self.annotations_root = annotations_root self.scenario_log_root = scenario_log_root self.tmp_root = tmp_root self.model_zoo_root = model_zoo_root self.resume_prefix = resume_prefix self.resume_scene = resume_scene self.log_folder = log_folder Console_UI().set_log_folder(self.log_folder) self.log_configs = True # TODO - set iteration counter on init to last value (this should be saved in a iteration counter txt file) # We can't use the Singleton pattern here as the Global_Cfgs() imports and initiates File_Manager self.global_cfgs = global_cfgs self.__cache = {}
def check_suggested_dictionary(self, modality_name: str, dictionary: pd.DataFrame, action_on_missing: str): modality_name = modality_name.lower() if modality_name not in self.__values: return True if 'name' not in dictionary: # Multi-bipolar dictionaries should not be checked in this manner return False values = pd.Series(self.__values[modality_name]) not_in_dictionary = ~values.isin(dictionary['name']) if any(not_in_dictionary): msg = f'Missing values "{values[not_in_dictionary].tolist()}" from the suggested dictionary' +\ f' for {modality_name}' if action_on_missing == 'exception': raise IndexError(msg) elif action_on_missing != 'silent': Console_UI().inform_user(msg) return False return True
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.time_frame = None self.time_frame_counter = 0 self.travers_order = None self.init_explicit_modalities() self.graph = self.init_graph() self.graph_travers_order = self.get_graph_traverse_order() self.models = {} self.init_models_and_adjust_sizes() self.init_remaining_modalities() self.losses = {} self.init_losses() # Save mermaidjs description of graph to log folder if not os.path.exists(Global_Cfgs().log_folder): os.makedirs(Global_Cfgs().log_folder, exist_ok=True) fn = 'mermaidjs_{ds_name}_{graph_name}_{exp_name}_{scene_name}.txt'\ .format(ds_name=self.get_cfgs('dataset_name'), graph_name=self.get_name(), exp_name=self.experiment_name, scene_name=self.scene_cfgs['name']) mermain_fn = os.path.join(Global_Cfgs().log_folder, fn) with open(mermain_fn, 'w') as mermain_file: mermain_file.write(self.convert_to_mermaidjs()) Console_UI().inform_user(f'Wrote mermaidjs config to {mermain_fn}') self.exception_counter = 0
def update_learning_rate(self, learning_rate): if self.optimizer: if (self.optimizer_type.lower() == 'Adam'.lower()): if (learning_rate > 1e-3): Console_UI().warn_user( f'learning rate {learning_rate:.2e} for Adam is too big.' + ' We recommend a learning rate of less than 1e-3') elif (self.optimizer_type.lower() == 'SGD'.lower()): if (learning_rate > 1e-1): Console_UI().warn_user( f'learning rate {learning_rate:.2e} for Adam is too big.' + ' We recommend a learning rate of less than 1e-1') for param_group in self.optimizer.param_groups: param_group['lr'] = learning_rate param_group['weight_decay'] = learning_rate / 200
def analyze_results(self, batch, loss): self.batch_loss.append(loss) try: self.analyze_modality_specific_results(batch) except Exception as e: traceback.print_exception(type(e), e, e.__traceback__) Console_UI().warn_user(f'Failed to get results for {self.modality_name}: {e}') pass
def get_graph_traverse_order(self): ordered_nodes = list(nx.topological_sort(self.graph)) try: ordered_models = [ m for m in ordered_nodes if self.graph.nodes[m]['node_type'] == 'model' ] return ordered_models except KeyError as e: Console_UI().warn_user( "You have probably missed a key with node_type:") Console_UI().warn_user([ m for m in ordered_nodes if 'node_type' not in self.graph.nodes[m] ]) raise KeyError(f'Key not found: {e}') return None
def save(self, scene_name): Console_UI().inform_user( '\n*****************************************' + f'\nSave network and losses for {scene_name}') no_networks = 0 no_losses = 0 for model_name in self.graph_travers_order: self.models[model_name].save(scene_name) no_networks += 1 for _, loss in self.losses.items(): loss.save(scene_name) no_losses += 1 Console_UI().inform_user( f'Saved {no_networks} networks and {no_losses} losses to disk' + f' check out: {File_Manager().get_network_dir_path()}' + '\n*****************************************\n')
def main(): cfgs = Global_Cfgs() scenario = Scenario(scenario_name=cfgs.get('scenario')) start_scene = cfgs.get('start_scene') try: for scene in iter(scenario): if start_scene is None or scene.scene_name.strip().lower( ) == start_scene.strip().lower(): start_scene = None scene.run_scene() else: Console_UI().inform_user( f'Skip \'{scene.scene_name}\' - waiting for \'{start_scene}\'' ) except RuntimeError as error: Console_UI().warn_user(error) Console_UI().inform_user("\n\n Traceback: \n") traceback.print_exc() except KeyboardInterrupt: Console_UI().inform_user( f'\nInterrupted by ctrl+c - stopped ad "{scene.scene_name}"') else: Console_UI().inform_user("Done with all scenarios!") Console_UI().inform_user('To view results, checkout the tensorboard:') Console_UI().inform_user( f'tensorboard --logdir /media/max/HD_1_3TB/log/{cfgs.sub_log_path}/tensorboard' )
def step(self): if self.optimizer: try: self.optimizer.step() except RuntimeError as e: Console_UI().warn_user( f'Failed to optimize {self.get_name()}- a masking issue? {e}' ) pass
def compute_auc(outputs, targets): mask = np.logical_or(targets == 1, targets == -1) if len(mask) < 2 or mask.sum() < 2: return np.nan try: auc = roc_auc_score(y_true=targets[mask] == 1, y_score=outputs[mask]) except IndexError as error: # TODO: Why is this throwing? msg = f'IndexError in AUC calculation: {error}' Console_UI().warn_user(msg) return np.nan except ValueError as error: msg = f'ValueError in AUC calculation: {error}' Console_UI().warn_user(msg) return np.nan return auc
def __init__(self, test_mode=False, test_init_values={}): if test_mode: self.cfgs = test_init_values Console_UI(self.get('log_level', 'warning'), globalConfigs=self) self.sub_log_path = self.get('sub_log_path', 'sub_log_not_set') File_Manager(annotations_root=self.get('annotation_root'), log_folder=self.get('log_folder'), scenario_log_root=self.get('scenario_log_root'), resume_prefix=self.get('resume'), resume_scene=self.get('resume_scene'), tmp_root=self.get('tmp_root'), model_zoo_root=self.get('model_zoo_root'), global_cfgs=self) return args = self.parse_argument() self.cfgs = args.__dict__ Console_UI(self.get('log_level', 'info'), globalConfigs=self) self.read_environment_variables() self.start_time = datetime.now( pytz.timezone('Europe/Stockholm')).strftime('%Y%m%d/%H.%M') self.sub_log_path = os.path.join(self.get('scenario'), self.start_time) if self.get('resume') is not None: self.prep_resume() fm = File_Manager(scenario_log_root=self.scenario_log_root, log_folder=self.log_folder, annotations_root=self.get('annotation_root'), resume_prefix=self.get('resume'), resume_scene=self.get('resume_scene'), tmp_root=self.get('tmp_root'), model_zoo_root=self.get('model_zoo_root'), global_cfgs=self) setup_data = {'call': ' '.join(sys.argv), 'setup': self.cfgs} fm.log_setup(data=setup_data, name='base_setup.yaml') self.__forward_noise = 0
def get_cfgs(self, name, default=None): try: if name in self.graph_cfgs: return self.graph_cfgs[name] if name in self.task_cfgs: return self.task_cfgs[name] if name in self.task_cfgs['apply']: return self.task_cfgs['apply'][name] if name in self.scene_cfgs: return self.scene_cfgs[name] if name in self.scenario_cfgs: return self.scenario_cfgs[name] except TypeError as e: Console_UI().inform_user(self.graph_cfgs) Console_UI().inform_user(self.task_cfgs) Console_UI().inform_user(self.scene_cfgs) Console_UI().inform_user(self.scenario_cfgs) raise TypeError(f'Error during {self.get_name()}: {e}') return Global_Cfgs().get(name)
def eval(self, batch=None): for model_name in self.graph_travers_order: self.models[model_name].eval() for _, loss in self.losses.items(): loss.eval() if batch is not None: self.encode(batch) if self.reconstruction: self.decode(batch) # Only in evaluation phase we visualize the reconstructed image (if exist) Console_UI().add_last_reconstructed_input(batch) self.compute_loss(batch)
def download_zip_file(self, url, dataset_name): """ annotations has to be zipped with the following command: 7z a -ppassword -mem=ZipCrypto imagenet.zip imagenet """ url_content = urlopen(url) zipfile = ZipFile(BytesIO(url_content.read())) pswd = Console_UI().receive_password( 'Password for unzipping annotations of %s dataset:' % (dataset_name)) zipfile.extractall(self.annotations_root, pwd=bytes(pswd, 'utf-8'))
def read_csv_annotations( self, dataset_name: str, annotations_rel_path: str, multi_view_per_sample: bool = False, ): annotations_path = os.path.join( self.get_annotations_path(dataset_name), annotations_rel_path) if os.path.exists(annotations_path): cache_path = f'csv:{annotations_path}' if cache_path in self.__cache: annotation = self.__cache[cache_path] else: annotation = pd.read_csv(annotations_path, low_memory=False) self.__cache[cache_path] = annotation if multi_view_per_sample and not isinstance( annotation.index, pd.MultiIndex): if 'index' not in annotation: annotation['index'] = np.arange(len(annotation), dtype=int) else: assert np.issubdtype( annotation['index'], np.dtype(int)), 'Index should be integers' assert annotation['index'].min( ) == 0, 'The index has to be indexed from 0' if 'sub_index' not in annotation: annotation['sub_index'] = np.zeros(len(annotation), dtype=int) else: assert np.issubdtype( annotation['sub_index'], np.dtype(int)), 'Sub index should be integers' assert annotation['sub_index'].max( ) > 0, 'You have provided a sub_index without purpose (max 0)' assert annotation['sub_index'].min( ) == 0, 'The sub_index has to start from 0' annotation.set_index(['index', 'sub_index'], inplace=True) if 'num_views' not in annotation: annotation['num_views'] =\ [a for b in [[i] * i for i in annotation.groupby(level=0).size()] for a in b] return annotation Console_UI().warn_user( f'Failed to load file from disk: \'{annotations_path}\'') return None
def prep_resume(self): ui = Console_UI() resume_prefix = self.get('resume') resume_scene = self.get('resume_scene') if resume_scene is not None and resume_prefix is None: raise ValueError( 'You must provide resume prefix if you have set a resume scene' ) # for debug mode uncomment: # scenario_log_root = "/media/max/SSD_1TB/log/" if resume_prefix.lower() == 'last': dirs = sorted([ d for d in iglob(f'{self.scenario_log_root}/*/*/neural_nets') ]) dirs = [ d for d in dirs if len([f for f in iglob(f'{d}/*{resume_scene}.t7')]) > 0 ] if len(dirs) == 0: raise Exception( f'No previous runs found in \'{self.scenario_log_root}\' with *{resume_scene}.t7' ) resume_prefix = dirs[-1].lstrip( self.scenario_log_root).rstrip('/neural_nets') ui.inform_user(f'Resuming run from {resume_prefix}') elif resume_prefix is not None: resume_prefix = retrieve_dir(path=resume_prefix, base_path=self.scenario_log_root, expected_depth=1) ui.inform_user(f'Resuming run from {resume_prefix}') self.cfgs['resume'] = resume_prefix # for debug mode uncomment: # self.cfgs['resume'] = "../%s" % self.cfgs['resume'] if not self.cfgs['skip_tensorboard']: dst_tensorboard_path = os.path.join(self.log_folder, 'tensorboard') if os.path.exists(dst_tensorboard_path): ui.inform_user( f'Removing previous tensorboard catalogue: {dst_tensorboard_path}' ) shutil.rmtree(dst_tensorboard_path) ui.inform_user('Copying the previous tensorboard data') shutil.copytree( src=os.path.join(self.scenario_log_root, resume_prefix, 'tensorboard'), dst=dst_tensorboard_path, )
def init_modality(self, modality_name: str, modality_cfgs: dict = None): modality_name = modality_name.lower() assert (modality_cfgs is not None ), 'modality_cfgs should not be None in %s' % (modality_name) start_time = time.time() Modality, content, dictionary = get_modality_and_content( annotations=self.annotations, modality_name=modality_name, modality_cfgs=modality_cfgs, ignore_index= -100 # The -100 is defined in the loss_cfgs and not available here :-( ) modality = Modality( dataset_name=self.dataset_name, dataset_cfgs=self.dataset_cfgs, experiment_name=self.experiment_name, experiment_cfgs=self.experiment_cfgs, modality_name=modality_name, modality_cfgs=modality_cfgs, content=content, dictionary=dictionary, ) if modality.is_explicit_modality(): self.explicit_modalities[modality_name] = modality if modality.is_input_modality(): self.explicit_input_modalities[modality_name] = modality elif modality.is_output_modality(): self.explicit_output_modalities[modality_name] = modality else: raise BaseException( 'Explicit Modalities should either be input or output') elif modality.is_implicit_modality(): self.implicit_modalities[modality_name] = modality # Add explicit and implicit modalities # Todo - Ali: why do we need to have this split? When do we have the case were a modality is neither self.modalities.update(self.explicit_modalities) self.modalities.update(self.implicit_modalities) if not Global_Cfgs().get('silent_init_info'): Console_UI().inform_user( info='Initializing %s modality in %s in %d milliseconds' % (modality_name, self.get_name(), 1000 * (time.time() - start_time)), debug=(modality_cfgs), )
def end_epoch(self, experiment_name: str, scene_name: str): experiment_set = self.dataset.experiments[experiment_name] summary = { 'epoch': self.epoch, 'graph_name': self.graph_name, 'task_name': self.get_name() } if experiment_name == self.train_set_name: summary['epoch_size'] = len(experiment_set) self.save(scene_name='last') self.epoch += 1 experiment_set.end_epoch(summary=summary, scene_name=scene_name) Console_UI().add_epoch_results(summary)
def __next__(self): batch = None i = 0 iterator = self.get_iterator() # start_time = time.time() while (batch is None and i < 5): try: batch = next(iterator) if batch['encoder_image'].max() == 0: raise ValueError( 'No non-zero images in batch - check file folder') # Convert all tensors to cuda if environment calls for it for key in batch: batch[key] = convert_2_cuda(batch[key]) batch_size_info = f'{self.bin_weights[self.batch_index]} ({len(self.bins[self.batch_index])} bins)' batch.update({ 'batch_index': self.batch_index, 'epoch_size': len(self), 'batch_size': batch_size_info, }) except StopIteration: self.reset_epoch() return None except Exception as ex: batch = None Console_UI().warn_user(f'Failed to load batch: "{ex}"') traceback.print_exception(type(ex), ex, ex.__traceback__) self.batch_index += 1 i += 1 if i >= 5: raise Exception( 'Failed multiple times when trying to retrieve batch') # Check images - seem ok # import cv2 # for i in range(batch['encoder_image'].shape[0]): # for ii in range(batch['encoder_image'].shape[1]): # img = batch['encoder_image'][i, ii, 0] * 255 # cv2.imwrite(f'/home/max/tmp/test{i}_{ii}.png', img.reshape(256, 256, 1).cpu().numpy()) # Profile time for loading batch - not much is gained by more than two workers (time is 0.01 to 0.1 seconds) # print("Time spent time retrieving batch: %0.2f" % (time.time() - start_time)) return batch
def train(self, batch=None): for model_name in self.graph_travers_order: self.models[model_name].train() for _, loss in self.losses.items(): loss.train() if batch is not None: try: self.zero_grad() self.encode(batch) if self.reconstruction: self.decode(batch) loss = self.compute_loss(batch) start_time = time.time() if loss > 0: loss.backward() self.step() batch['time']['backward'][self.get_name()] = { 'start': start_time, 'end': time.time() } self.collect_runtime_stats(batch) self.exception_counter = 0 return True except KeyError as e: Console_UI().warn_user(f'Could not find {e} in:') Console_UI().warn_user(sorted(batch.keys())) Console_UI().inform_user("\n\n Traceback: \n") traceback.print_exc() raise e except Exception as e: Console_UI().warn_user( f'** Error while training batch in {self.get_name()} **') Console_UI().warn_user( f'Indices: {batch["indices"]} and encoder image shape {batch["encoder_image"].shape}' ) Console_UI().warn_user(f'Error message: {e}') Console_UI().inform_user("\n\n Traceback: \n") traceback.print_exc() self.exception_counter += 1 if self.exception_counter > 5: raise RuntimeError(f'Error during training: {e}') return False
def step( self, iteration_counter: int, scene_name: str, ): # Due to the parallel nature of loading the data we need to reset # the start time for the batch in order to get the true processing start_time = time.time() experiment_set = self.dataset.experiments[self.train_set_name] batch = next(experiment_set) if batch is None: self.end_epoch(experiment_name=self.train_set_name, scene_name=scene_name) if ((self.epoch < 5 and iteration_counter < 1e3) or self.epoch % self.validate_when_epoch_is_devisable_by == 0): self.validate(iteration_counter, scene_name=scene_name) batch = next(experiment_set) if batch is None: raise Exception('The next batch after resetting was empty!?') batch['time']['start'] = start_time batch.update({ 'epoch': self.epoch, 'graph_name': self.graphs[self.train_set_name.lower()].get_name(), 'task_name': self.get_name(), 'iteration_counter': iteration_counter, }) success = self.graphs[self.train_set_name.lower()].train(batch) if not success: return False Console_UI().add_batch_results(batch) return True
def read_dictionary( self, dataset_name: str, modality_name: str, ): """ If we have a dictionary associated with the current weights we should use those. The fallback is the resume weight's dictionary and lastly the annotation's dictionary. """ filename = f'{modality_name.lower()}_dictionary.csv' cachename = f'dictionary:{dataset_name}->{filename}' if cachename in self.__cache: return self.__cache[cachename] dictionary_path = os.path.join(self.log_folder, 'neural_nets', filename) if (not os.path.exists(dictionary_path) and self.resume_prefix is not None): dictionary_path = os.path.join(self.scenario_log_root, self.resume_prefix, 'neural_nets', filename) if not os.path.exists(dictionary_path): dictionary_path = os.path.join( self.get_annotations_path(dataset_name), filename) if os.path.exists(dictionary_path): try: dictionary = pd.read_csv(dictionary_path) self.__cache[cachename] = dictionary return dictionary except pd.errors.EmptyDataError: Console_UI().warn_user( f'The dictionary for {modality_name} is corrupt - see file {dictionary_path}' ) return None
def collect_dictionaries(self): """ Check all the Datasets for common items, e.g. body part and then create a general dictionary for all of them. """ datasets = [] for scene in self.scenario_cfgs['scenes']: for task in scene['tasks'].values(): if task['dataset_name'] not in datasets: datasets.append(task['dataset_name']) configs = {} for dataset_name in datasets: configs[dataset_name] = File_Manager().read_dataset_config( dataset_name) modalities_with_dictionaries = [ 'one_vs_rest', 'bipolar', 'multi_bipolar', ] # TODO: add 'hierarchical_label' but this has some fancy logic :-S dictionary_candidates = [] for dataset_name in datasets: config = configs[dataset_name] try: for experiment in config['experiments'].values(): if isinstance(experiment['modalities'], dict): [ dictionary_candidates.append(name) for name, cfg in experiment['modalities'].items() if cfg['type'].lower() in modalities_with_dictionaries and name not in dictionary_candidates ] except Exception as e: raise Exception( f'Failed to get dictionary for {dataset_name}: {e}') # Store all the different values available for this modality into the dictionary singleton that # keeps track of the unique values dg = Dictionary_Generator() for modality_name in dictionary_candidates: for dataset_name in datasets: dg.append_suggested_dictionary(dataset_name=dataset_name, modality_name=modality_name) config = configs[dataset_name] for experiment in config['experiments'].values(): annotations = File_Manager().read_csv_annotations( dataset_name, annotations_rel_path=experiment['annotations_path'], # Multi-view argument should be irrelevant for this ) if annotations is None: raise ValueError( f'Could not find the dataset: {dataset_name} in {experiment["annotations_path"]}' ) modalities = experiment['modalities'] if modalities == 'same_as_train_set': modalities = config['experiments']['train_set'][ 'modalities'] if modality_name in modalities: if 'column_name' in modalities[modality_name]: try: colname = modalities[modality_name][ 'column_name'] dg.append_values(modality_name=modality_name, values=annotations[colname]) except KeyError as e: Console_UI().warn_user( f'Got a key annotation exception for {colname}' ) Console_UI().warn_user( modalities[modality_name]) Console_UI().warn_user(annotations.columns) raise e except Exception as e: Console_UI().warn_user( f'Got an annotation exception for {colname}' ) Console_UI().warn_user( modalities[modality_name]) Console_UI().warn_user(annotations) raise e elif 'columns' in modalities[modality_name]: for column_name in modalities[modality_name][ 'columns']: if isinstance(column_name, dict): assert 'csv_name' in column_name, \ f'The column doesn\'t have the expected csv_name element, got: {column_name}' column_name = column_name['csv_name'] if column_name not in annotations: n = 3 if len( annotations.columns) < 10 else ceil( len(annotations.columns) / 3) closest = get_close_matches( word=column_name, possibilities=annotations.columns, n=n, ) closest = ', '.join(closest) raise IndexError( f'The {column_name} from {modality_name} doesn\'t exist.' + f' Closest matching are: {closest}') dg.append_values( modality_name=modality_name, values=annotations[column_name]) else: raise IndexError( f'Expected {modality_name} to have either columns or column_name defined' )
def closing_credits(self): Console_UI().inform_user("That's it folks")
def __init__( self, graph_name, experiment_set, task_cfgs, scene_cfgs, scenario_cfgs, ): self.graph_name = graph_name self.task_cfgs = task_cfgs self.scene_cfgs = scene_cfgs self.scenario_cfgs = scenario_cfgs self.experiment_set = experiment_set self.experiment_name = self.experiment_set.get_name() self.graph_cfgs = self.get_graph_cfgs(self.graph_name) self.classification = self.get_cfgs('classification', default=False) self.reconstruction = self.get_cfgs('reconstruction', default=False) self.identification = self.get_cfgs('identification', default=False) self.regression = self.get_cfgs('regression', default=False) self.pi_model = self.get_cfgs('pi_model', default=False) self.real_fake = self.get_cfgs('real_fake', default=False) self.optimizer_type = self.get_cfgs('optimizer_type') if not Global_Cfgs().get('silent_init_info'): UI = Console_UI() UI.inform_user( info=[ 'explicit experiment modalities', list(self.get_experiment_explicit_modalities().keys()) ], debug=self.get_experiment_explicit_modalities(), ) UI.inform_user( info=[ 'implicit experiment modalities', list(self.get_experiment_implicit_modalities().keys()) ], debug=self.get_experiment_implicit_modalities(), ) UI.inform_user( info=[ 'explicit graph modalities', list(self.get_graph_specific_explicit_modalities().keys()) ], debug=self.get_graph_specific_explicit_modalities(), ) UI.inform_user( info=[ 'implicit graph modalities', list(self.get_graph_specific_implicit_modalities().keys()) ], debug=self.get_graph_specific_implicit_modalities(), ) UI.inform_user( info=[ 'explicit models', list(self.get_explicit_models().keys()) ], debug=self.get_explicit_models(), ) UI.inform_user( info=[ 'implicit models', list(self.get_implicit_models().keys()) ], debug=self.get_implicit_models(), )