Пример #1
0
    def __init__(self, scenario_name: str):
        self.scenario_name = scenario_name
        fm = File_Manager()
        self.scenario_cfgs = fm.read_scenario_config(self.scenario_name)

        self.current_epoch = self.get_cfgs('current_epoch', default=0)
        self.optimizer_type = self.get_cfgs('optimizer_type')

        self.scenes = OrderedDict({})
        for scene_cfgs in self.get_cfgs('scenes'):
            scene_name = scene_cfgs['name']
            task_defaults = {}
            if 'task_defaults' in scene_cfgs:
                task_defaults = scene_cfgs['task_defaults']

            scene_cfgs['tasks'] = {
                t: fm.read_task_config(task_name=t,
                                       scenario=self,
                                       scene_defaults=task_defaults)
                for t in scene_cfgs['tasks']
            }
            self.scenes[scene_name] = scene_cfgs

        self.scenario_lengths = [{
            'name': key,
            'len': len(s)
        } for (key, s) in self.scenes.items()]

        self.collect_dictionaries()

        self.__current_scene = None
Пример #2
0
    def end_epoch(self, summary: dict, scene_name: str):
        summary['dataset_name'] = self.dataset_name
        summary['experiment_name'] = self.experiment_name
        summary['modalities'] = defaultdict(dict)
        raw_csv_data = []
        for _, modality in self.modalities.items():
            raw_csv_data.extend(modality.get_runtime_values())
            modality.report_epoch_summary(summary)
            if modality.is_explicit_modality() and modality.is_csv():
                if isinstance(modality.content, pd.Series):
                    raw_csv_data.append(modality.content)
                elif isinstance(modality.content, pd.DataFrame):
                    [
                        raw_csv_data.append(modality.content[c])
                        for c in modality.content
                    ]
                else:
                    raise ValueError(
                        f'The content type of {modality.get_name()} is not implemented'
                    )

        # We want the column estimates to be close to eachother in the final output
        raw_csv_data.sort(key=lambda v: v.name if hasattr(v, 'name') else -1)

        File_Manager().write_csv_annotation(
            annotations=pd.concat(raw_csv_data, axis=1),
            dataset_name=self.dataset_name,
            experiment_file_name=
            f'{scene_name}_{self.get_cfgs("annotations_path")}',
        )
        return summary
Пример #3
0
    def init_dictionary(self):
        if self.dictionary is None:
            raise Exception(
                f'No dictionary has been initated for {self.get_name()}')

        File_Manager().write_dictionary2logdir(dictionary=self.dictionary,
                                               modality_name=self.get_name())
Пример #4
0
    def setup_annotations(self):
        rel_path = self.get_cfgs('annotations_path')
        fm = File_Manager()
        self.annotations = fm.read_csv_annotations(
            dataset_name=self.dataset_name,
            annotations_rel_path=rel_path,
            multi_view_per_sample=self.multi_view_per_sample,
        )

        if self.annotations is None:
            annotations_url = self.get_cfgs('annotations_url')
            available_csvs_str = '\', \''.join(
                fm.get_available_csvs(self.dataset_name))
            Console_UI().inform_user(
                '"%s" does not exist among the available datasets: \'%s\'.\nDownloading from:\n %s'
                % (rel_path, available_csvs_str, annotations_url))
            fm.download_zip_file(
                url=annotations_url,
                dataset_name=self.get_dataset_name(),
            )

            self.annotations = fm.read_csv_annotations(
                dataset_name=self.dataset_name,
                annotations_rel_path=rel_path,
                multi_view_per_sample=self.multi_view_per_sample,
            )

        if self.get_cfgs('test_run'):
            self.annotations = self.annotations[
                self.annotations.index.get_level_values(0) < 100]
Пример #5
0
    def __init__(self, test_mode=False, test_init_values={}):
        if test_mode:
            self.cfgs = test_init_values

            Console_UI(self.get('log_level', 'warning'), globalConfigs=self)
            self.sub_log_path = self.get('sub_log_path', 'sub_log_not_set')

            File_Manager(annotations_root=self.get('annotation_root'),
                         log_folder=self.get('log_folder'),
                         scenario_log_root=self.get('scenario_log_root'),
                         resume_prefix=self.get('resume'),
                         resume_scene=self.get('resume_scene'),
                         tmp_root=self.get('tmp_root'),
                         model_zoo_root=self.get('model_zoo_root'),
                         global_cfgs=self)
            return

        args = self.parse_argument()
        self.cfgs = args.__dict__

        Console_UI(self.get('log_level', 'info'), globalConfigs=self)
        self.read_environment_variables()

        self.start_time = datetime.now(
            pytz.timezone('Europe/Stockholm')).strftime('%Y%m%d/%H.%M')
        self.sub_log_path = os.path.join(self.get('scenario'), self.start_time)

        if self.get('resume') is not None:
            self.prep_resume()

        fm = File_Manager(scenario_log_root=self.scenario_log_root,
                          log_folder=self.log_folder,
                          annotations_root=self.get('annotation_root'),
                          resume_prefix=self.get('resume'),
                          resume_scene=self.get('resume_scene'),
                          tmp_root=self.get('tmp_root'),
                          model_zoo_root=self.get('model_zoo_root'),
                          global_cfgs=self)

        setup_data = {'call': ' '.join(sys.argv), 'setup': self.cfgs}

        fm.log_setup(data=setup_data, name='base_setup.yaml')

        self.__forward_noise = 0
Пример #6
0
 def load(self):
     neural_net_name = self.get_name()
     state_dict = File_Manager().load_pytorch_neural_net(
         neural_net_name=neural_net_name)
     if state_dict is not None:
         try:
             # As the save is done at the layers level: neural_net.layers.state_dict()
             # we need to load it from the layers
             self.layers.load_state_dict(state_dict)
         except RuntimeError as e:
             raise RuntimeError(
                 f'Failed to load dictionary for {neural_net_name} \nError message: {e}'
             )
Пример #7
0
    def init_dictionary(self):
        if self.get_cfgs('skip_dictionary_save', default=False):
            return

        column_dictionary = pd.DataFrame({
            'columns': [c for c in self.csv_columns],
            'labels': [self.column_map[c] for c in self.csv_columns],
            'index':
            range(len(self.csv_columns))
        })

        File_Manager().write_dictionary2logdir(dictionary=column_dictionary,
                                               modality_name=self.get_name())
Пример #8
0
    def append_suggested_dictionary(self,
                                    dataset_name,
                                    modality_name,
                                    FMSingleton=None):
        assert isinstance(
            modality_name,
            str), f'Modality name is not a string "{modality_name}"'
        assert isinstance(
            dataset_name,
            str), f'Dataset name is not a string "{modality_name}"'
        modality_name = modality_name.lower()

        if FMSingleton is None:
            FMSingleton = File_Manager()
        suggested_dictionary = FMSingleton.read_dictionary(
            dataset_name=dataset_name,
            modality_name=modality_name,
        )
        if suggested_dictionary is not None:
            self.__suggested_dictionaries[modality_name].append(
                suggested_dictionary)

        return self
Пример #9
0
 def init_dictionary(self):
     if self.dictionary is None:
         fm = File_Manager()
         self.dictionary = fm.read_dictionary(
             dataset_name=self.dataset_name, modality_name=self.get_name())
         if self.dictionary is None:
             self.dictionary = self.make_dictionary()  # no dictionary
             fm.write_dictionary(dictionary=self.dictionary,
                                 dataset_name=self.dataset_name,
                                 modality_name=self.get_name())
         else:
             fm.write_dictionary2logdir(dictionary=self.dictionary,
                                        modality_name=self.get_name())
Пример #10
0
def run(config_manager, logger):

    logger.msg('loading configuration file...')
    config = config_manager.get_config()

    logger.msg('ensuring that file directories exist...')
    file_manager = File_Manager(config['speech'], config['text'])
    if (not file_manager.directories_exist()):
        logger.err(
            'Please check the directories entered into the config file: ' +
            config_file_name)
        return

    logger.msg('waiting for files to transcribe...')
    transcriber = Transcriber(config['username'], config['password'],
                              file_manager)  # Create transcription object
    while (True):  # Loop for eternity
        for file in file_manager.speech_files_without_text_files(
        ):  # Loop through files that need to be transcribed
            logger.msg('Transcribing: ' + file + '...')
            transcriber.transcribe(file)  # Transcribe the current file
            logger.msg(file + ' transcribed')
            logger.msg('waiting for files to transcribe...')
        sleep(10)  # Wait for 10 seconds
Пример #11
0
    def get_dataset_cfgs(self, dataset_name):
        dataset_cfgs = File_Manager().read_dataset_config(dataset_name)

        # For the sake of simplicity, when modalities are identical during
        # train and tests, we can just write "modalities": "same_as_X" in
        # the config file.(in this example, X is "train")
        # This piece of code searches for the modalities like this and
        # replace them with the "X" modalities
        for _, experiment_cfgs in dataset_cfgs['experiments'].items():
            if (isinstance(experiment_cfgs['modalities'], str)
                    and experiment_cfgs['modalities'].startswith('same_as_')):
                other_experiment = experiment_cfgs['modalities'][len('same_as_'):]
                experiments = dataset_cfgs['experiments']
                if other_experiment in experiments:
                    experiment_cfgs['modalities'] = experiments[other_experiment]['modalities']
                else:
                    raise KeyError('Could not find the modality \'%s\' among the modalities: \'%s\'' %
                                   (other_experiment, '\', \''.join(experiments.keys())))
        return dataset_cfgs
Пример #12
0
    def get_dataset(
            self,
            dataset_name,
            batch_size_multiplier: float,
    ):
        fixed_name = dataset_name.lower()
        if fixed_name not in self.datasets:
            predefined_datasets = File_Manager().get_dataset_definitions()
            if (fixed_name in predefined_datasets):
                from .csv_dataset import CSV_Dataset as Dataset
            else:
                raise Exception('The dataset \'%s\' is not among the predefined sets: \'%s\'' %
                                (dataset_name, '\', \''.join(predefined_datasets)))

            self.datasets[fixed_name] = Dataset(
                dataset_name=fixed_name,
                batch_size_multiplier=batch_size_multiplier,
            )
        else:
            self.datasets[fixed_name].set_batch_size_multiplier(batch_size_multiplier)

        return self.datasets[fixed_name]
Пример #13
0
 def save(self, scene_name='last'):
     File_Manager().save_pytorch_neural_net(self.get_name(), self,
                                            scene_name)
Пример #14
0
    def collect_dictionaries(self):
        """
        Check all the Datasets for common items, e.g. body part and then create
        a general dictionary for all of them.
        """
        datasets = []
        for scene in self.scenario_cfgs['scenes']:
            for task in scene['tasks'].values():
                if task['dataset_name'] not in datasets:
                    datasets.append(task['dataset_name'])

        configs = {}
        for dataset_name in datasets:
            configs[dataset_name] = File_Manager().read_dataset_config(
                dataset_name)

        modalities_with_dictionaries = [
            'one_vs_rest',
            'bipolar',
            'multi_bipolar',
        ]  # TODO: add 'hierarchical_label' but this has some fancy logic :-S

        dictionary_candidates = []
        for dataset_name in datasets:
            config = configs[dataset_name]
            try:
                for experiment in config['experiments'].values():
                    if isinstance(experiment['modalities'], dict):
                        [
                            dictionary_candidates.append(name)
                            for name, cfg in experiment['modalities'].items()
                            if
                            cfg['type'].lower() in modalities_with_dictionaries
                            and name not in dictionary_candidates
                        ]
            except Exception as e:
                raise Exception(
                    f'Failed to get dictionary for {dataset_name}: {e}')

        # Store all the different values available for this modality into the dictionary singleton that
        # keeps track of the unique values
        dg = Dictionary_Generator()
        for modality_name in dictionary_candidates:
            for dataset_name in datasets:
                dg.append_suggested_dictionary(dataset_name=dataset_name,
                                               modality_name=modality_name)

                config = configs[dataset_name]
                for experiment in config['experiments'].values():
                    annotations = File_Manager().read_csv_annotations(
                        dataset_name,
                        annotations_rel_path=experiment['annotations_path'],
                        # Multi-view argument should be irrelevant for this
                    )
                    if annotations is None:
                        raise ValueError(
                            f'Could not find the dataset: {dataset_name} in {experiment["annotations_path"]}'
                        )

                    modalities = experiment['modalities']
                    if modalities == 'same_as_train_set':
                        modalities = config['experiments']['train_set'][
                            'modalities']

                    if modality_name in modalities:
                        if 'column_name' in modalities[modality_name]:
                            try:
                                colname = modalities[modality_name][
                                    'column_name']
                                dg.append_values(modality_name=modality_name,
                                                 values=annotations[colname])
                            except KeyError as e:
                                Console_UI().warn_user(
                                    f'Got a key annotation exception for {colname}'
                                )
                                Console_UI().warn_user(
                                    modalities[modality_name])
                                Console_UI().warn_user(annotations.columns)
                                raise e
                            except Exception as e:
                                Console_UI().warn_user(
                                    f'Got an annotation exception for {colname}'
                                )
                                Console_UI().warn_user(
                                    modalities[modality_name])
                                Console_UI().warn_user(annotations)
                                raise e
                        elif 'columns' in modalities[modality_name]:
                            for column_name in modalities[modality_name][
                                    'columns']:
                                if isinstance(column_name, dict):
                                    assert 'csv_name' in column_name, \
                                        f'The column doesn\'t have the expected csv_name element, got: {column_name}'
                                    column_name = column_name['csv_name']
                                if column_name not in annotations:
                                    n = 3 if len(
                                        annotations.columns) < 10 else ceil(
                                            len(annotations.columns) / 3)
                                    closest = get_close_matches(
                                        word=column_name,
                                        possibilities=annotations.columns,
                                        n=n,
                                    )
                                    closest = ', '.join(closest)
                                    raise IndexError(
                                        f'The {column_name} from {modality_name} doesn\'t exist.'
                                        + f' Closest matching are: {closest}')
                                dg.append_values(
                                    modality_name=modality_name,
                                    values=annotations[column_name])
                        else:
                            raise IndexError(
                                f'Expected {modality_name} to have either columns or column_name defined'
                            )
import os
import Tkinter as tk
from Tkinter import *
import shutil
import pyglet
from file_manager import File_Manager
import Tkinter, Tkconstants, tkFileDialog
root = Tk()
root.minsize(300, 300)

number_of_folders = input("number_of_folders")

foo = File_Manager(number_of_folders)
foo.folder_name_and_extension()
foo.sort()
Пример #16
0
 def get_graph_cfgs(self, graph_name):
     graph_cfgs = File_Manager().read_graph_config(graph_name)
     graph_cfgs = self.fix_experiment_modalities(graph_cfgs)
     return graph_cfgs
Пример #17
0
def print_table(table):
	for x in sorted(table, key=lambda leaf:leaf.name[0], reverse=True):
		print x.name[0], ' - ', x.freq, ' - ', x.code



###########################################################################################
#                                      MAIN PROGRAM
###########################################################################################
print "Program start."


img = get_image('lena_gray.tiff')
# img = get_image('c.jpg')

manager = File_Manager('test')

S, H = get_symbols_and_occurrences(img)
D = get_tuple_array(S, H)

t0 = time()
Tree, Code = get_huffman(D)
t1 = time()

#print_table(Code)

print '[Time] Huffman = ', t1 - t0

keys = [c.name[0] for c in Code]
values = [c.code for c in Code]
dicc = dict(zip(keys, values))
Пример #18
0
    def run_scene(self, start_epoch=0):
        logged_memory_usage = False
        ui = Console_UI()
        ui.overall_total_epochs = self.epochs
        ui.overall_total_repeats = self.repeat

        Global_Cfgs().set_forward_noise(
            self.get_cfgs('forward_noise', default=0))
        for r in range(0, self.repeat):
            ui.overall_repeat = r
            if (self.stochastic_weight_averaging and r > 0):
                self.tasks[self.main_task].stochastic_weight_average()

            for e in range(0, self.epochs):
                ui.overall_epoch = e
                if start_epoch > e + r * self.epochs:
                    Scene.iteration_counter += self.epoch_size
                else:
                    for task in self.tasks.values():
                        task.update_learning_rate(self.get_learning_rate(e))

                    for _ in range(self.epoch_size):
                        for key, task in self.tasks.items():
                            if self.should_task_run(task_name=key, task=task):
                                task.step(
                                    iteration_counter=Scene.iteration_counter,
                                    scene_name=self.scene_name)
                        Scene.iteration_counter += 1

                        if logged_memory_usage is False:
                            for key in self.tasks.keys():
                                task = self.tasks[key]
                                memory_usage = task.get_memory_usage_profile()
                                File_Manager().write_usage_profile(
                                    scene_name=self.scene_name,
                                    task=key,
                                    memory_usage=memory_usage,
                                )
                                ui.inform_user(
                                    f'\n Memory usage for {self.scene_name}::{key}\n'
                                )
                                ui.inform_user(memory_usage)
                            logged_memory_usage = True

                    for task in self.tasks.values():
                        task.save(scene_name='last')
                        # Not really helping with just emptying cache - we need to add something more
                        # removing as this may be the cause for errors
                        # torch.cuda.empty_cache()
        ui.reset_overall()

        # Note that the evaluation happens after this step and therefore averaging may hur the performance
        if self.stochastic_weight_averaging_last:
            self.tasks[self.main_task].stochastic_weight_average()
            for task in self.tasks.values():
                task.save(scene_name='last')

        for task in self.tasks.values():
            task.validate(iteration_counter=Scene.iteration_counter,
                          scene_name=self.scene_name)
            task.test(iteration_counter=Scene.iteration_counter,
                      scene_name=self.scene_name)

        # Save all tasks before enterering the next scene
        for task in self.tasks.values():
            task.save(scene_name=self.scene_name)
            [g.dropModelNetworks() for g in task.graphs.values()]