def __init__(self, path): # type: (str) -> None self.path = path storage_path, file_name = os.path.split(path) self.storage_manager = HDF5StorageManager(storage_path, file_name) # would be nice to have an opened state for the chunked api instead of the close_file=False # common scalar headers self.gid = Uuid(HasTraits.gid, self) self.written_by = Scalar(Attr(str), self, name='written_by') self.create_date = Scalar(Attr(str), self, name='create_date') # Generic attributes descriptors self.generic_attributes = GenericAttributes() self.invalid = Scalar(Attr(bool), self, name='invalid') self.is_nan = Scalar(Attr(bool), self, name='is_nan') self.subject = Scalar(Attr(str), self, name='subject') self.state = Scalar(Attr(str), self, name='state') self.type = Scalar(Attr(str), self, name='type') self.user_tag_1 = Scalar(Attr(str), self, name='user_tag_1') self.user_tag_2 = Scalar(Attr(str), self, name='user_tag_2') self.user_tag_3 = Scalar(Attr(str), self, name='user_tag_3') self.user_tag_4 = Scalar(Attr(str), self, name='user_tag_4') self.user_tag_5 = Scalar(Attr(str), self, name='user_tag_5') self.visible = Scalar(Attr(bool), self, name='visible') self.metadata_cache = None if not self.storage_manager.is_valid_hdf5_file(): self.written_by.store(self.__class__.__module__ + '.' + self.__class__.__name__) self.is_new_file = True
def __import_time_series_csv_datatype(self, hrf_folder, connectivity_gid, patient, user_tag): path = os.path.join(hrf_folder, self.TIME_SERIES_CSV_FILE) with open(path) as csv_file: csv_reader = csv.reader( csv_file, delimiter=CSVDelimiterOptionsEnum.COMMA.value) ts = list(csv_reader) ts_data = np.array(ts, dtype=np.float64).reshape( (len(ts), 1, len(ts[0]), 1)) ts_time = np.random.rand(ts_data.shape[0], ) project = dao.get_project_by_id(self.current_project_id) ts_gid = uuid.uuid4() h5_path = "TimeSeries_{}.h5".format(ts_gid.hex) operation_folder = self.storage_interface.get_project_folder( project.name, str(self.operation_id)) h5_path = os.path.join(operation_folder, h5_path) conn = h5.load_from_gid(connectivity_gid) ts = TimeSeriesRegion() ts.data = ts_data ts.time = ts_time ts.gid = ts_gid ts.connectivity = conn generic_attributes = GenericAttributes() generic_attributes.user_tag_1 = user_tag generic_attributes.state = DEFAULTDATASTATE_RAW_DATA with TimeSeriesRegionH5(h5_path) as ts_h5: ts_h5.store(ts) ts_h5.nr_dimensions.store(4) ts_h5.subject.store(patient) ts_h5.store_generic_attributes(generic_attributes) ts_index = TimeSeriesIndex() ts_index.gid = ts_gid.hex ts_index.fk_from_operation = self.operation_id ts_index.time_series_type = "TimeSeriesRegion" ts_index.data_length_1d = ts_data.shape[0] ts_index.data_length_2d = ts_data.shape[1] ts_index.data_length_3d = ts_data.shape[2] ts_index.data_length_4d = ts_data.shape[3] ts_index.data_ndim = len(ts_data.shape) ts_index.sample_period_unit = 'ms' ts_index.sample_period = TimeSeries.sample_period.default ts_index.sample_rate = 1024.0 ts_index.subject = patient ts_index.state = DEFAULTDATASTATE_RAW_DATA ts_index.labels_ordering = json.dumps( list(TimeSeries.labels_ordering.default)) ts_index.labels_dimensions = json.dumps( TimeSeries.labels_dimensions.default) ts_index.visible = False # we don't want to show these TimeSeries because they are dummy dao.store_entity(ts_index) return ts_gid
def __init__(self): self.generic_attributes = GenericAttributes() self.generic_attributes.subject = DataTypeMetaData.DEFAULT_SUBJECT self.storage_interface = StorageInterface() # Will be populate with current running operation's identifier self.operation_id = None self.user_id = None self.submitted_form = None self.log = get_logger(self.__class__.__module__)
def store(self, view_model, fname=None): # type: (ViewModel, str) -> str """ Completely store any ViewModel object to the directory specified by self.base_dir. Works recursively for view models that are serialized in multiple files (eg. SimulatorAdapterModel) """ if fname is None: h5_path = self.path_for_has_traits(type(view_model), view_model.gid) else: h5_path = os.path.join(self.base_dir, fname) with ViewModelH5(h5_path, view_model) as h5_file: h5_file.store(view_model) h5_file.type.store(self.get_class_path(view_model)) h5_file.create_date.store(date2string(datetime.now())) if hasattr(view_model, "generic_attributes"): h5_file.store_generic_attributes(view_model.generic_attributes) else: # For HasTraits not inheriting from ViewModel (e.g. Linear) h5_file.store_generic_attributes(GenericAttributes()) references = h5_file.gather_references() for trait_attr, gid in references: if not gid: continue model_attr = getattr(view_model, trait_attr.field_name) if isinstance(gid, list): for idx, sub_gid in enumerate(gid): self.store(model_attr[idx]) else: self.store(model_attr) return h5_path
def store_view_model(view_model, base_dir): # type: (ViewModel, str) -> str """ Completely store any ViewModel object to the directory specified by base_dir. It works recursively because there are view models that are serialized in multiple files (eg. SimulatorAdapterModel) """ h5_path = path_for(base_dir, ViewModelH5, view_model.gid, type(view_model).__name__) with ViewModelH5(h5_path, view_model) as h5_file: h5_file.store(view_model) h5_file.type.store(get_full_class_name(type(view_model))) h5_file.create_date.store(date2string(datetime.now())) if hasattr(view_model, "generic_attributes"): h5_file.store_generic_attributes(view_model.generic_attributes) else: # For HasTraits not inheriting from ViewModel (e.g. Linear) h5_file.store_generic_attributes(GenericAttributes()) references = h5_file.gather_references() for trait_attr, gid in references: if not gid: continue model_attr = getattr(view_model, trait_attr.field_name) if isinstance(gid, list): for idx, sub_gid in enumerate(gid): store_view_model(model_attr[idx], base_dir) else: store_view_model(model_attr, base_dir) return h5_path
def store_complete_to_dir(datatype, base_dir, generic_attributes=GenericAttributes()): h5_class = REGISTRY.get_h5file_for_datatype(datatype.__class__) storage_path = path_by_dir(base_dir, h5_class, datatype.gid) index_inst = __store_complete(datatype, storage_path, h5_class, generic_attributes) return index_inst
def prepare_data_for_burst_copy(self, burst_config_id, burst_name_format, project): burst_config = self.load_burst_configuration(burst_config_id) burst_config_copy = burst_config.clone() count = dao.count_bursts_with_name(burst_config.name, burst_config.fk_project) burst_config_copy.name = burst_name_format.format(burst_config.name, count + 1) storage_path = self.storage_interface.get_project_folder(project.name, str(burst_config.fk_simulation)) simulator = h5.load_view_model(burst_config.simulator_gid, storage_path) simulator.generic_attributes = GenericAttributes() return simulator, burst_config_copy
def store_complete(datatype, op_id, project_name, generic_attributes=GenericAttributes()): h5_class = REGISTRY.get_h5file_for_datatype(datatype.__class__) storage_path = path_for(op_id, h5_class, datatype.gid, project_name) index_inst = __store_complete(datatype, storage_path, h5_class, generic_attributes) return index_inst
def prepare_metadata(algo_category, burst=None, current_ga=GenericAttributes()): """ Gather generic_metadata from submitted fields and current to be execute algorithm. Will populate STATE, GROUP, etc in generic_metadata """ generic_metadata = GenericAttributes() generic_metadata.state = algo_category.defaultdatastate generic_metadata.parent_burst = burst generic_metadata.fill_from(current_ga) return generic_metadata
def _prepare_metadata(algo_category, submit_data, operation_group=None, burst=None, current_ga=GenericAttributes()): """ Gather generic_metadata from submitted fields and current to be execute algorithm. Will populate STATE, GROUP, etc in generic_metadata """ generic_metadata = GenericAttributes() generic_metadata.state = algo_category.defaultdatastate generic_metadata.parent_burst = burst if DataTypeMetaData.KEY_OPERATION_TAG in submit_data: generic_metadata.operation_tag = submit_data[ DataTypeMetaData.KEY_OPERATION_TAG] if DataTypeMetaData.KEY_TAG_1 in submit_data: generic_metadata.user_tag_1 = submit_data[ DataTypeMetaData.KEY_TAG_1] if operation_group is not None: generic_metadata.user_tag_3 = operation_group.name generic_metadata.fill_from(current_ga) return generic_metadata
def __init__(self): # It will be populate with key from DataTypeMetaData self.meta_data = { DataTypeMetaData.KEY_SUBJECT: DataTypeMetaData.DEFAULT_SUBJECT } self.generic_attributes = GenericAttributes() self.generic_attributes.subject = DataTypeMetaData.DEFAULT_SUBJECT self.file_handler = FilesHelper() self.storage_path = '.' # Will be populate with current running operation's identifier self.operation_id = None self.user_id = None self.log = get_logger(self.__class__.__module__) self.submitted_form = None
def __import_pearson_coefficients_datatype(self, fc_folder, patient, user_tag, ts_gid): path = os.path.join(fc_folder, self.FC_MAT_FILE) result = ABCUploader.read_matlab_data(path, self.FC_DATASET_NAME) result = result.reshape((result.shape[0], result.shape[1], 1, 1)) project = dao.get_project_by_id(self.current_project_id) user = dao.get_user_by_id(project.fk_admin) pearson_gid = uuid.uuid4() h5_path = "CorrelationCoefficients_{}.h5".format(pearson_gid.hex) operation_folder = self.storage_interface.get_project_folder( project.name, str(self.operation_id)) h5_path = os.path.join(operation_folder, h5_path) generic_attributes = GenericAttributes() generic_attributes.user_tag_1 = user_tag generic_attributes.state = DEFAULTDATASTATE_RAW_DATA with CorrelationCoefficientsH5(h5_path) as pearson_correlation_h5: pearson_correlation_h5.array_data.store(result) pearson_correlation_h5.gid.store(pearson_gid) pearson_correlation_h5.source.store(ts_gid) pearson_correlation_h5.labels_ordering.store( CorrelationCoefficients.labels_ordering.default) pearson_correlation_h5.subject.store(patient) pearson_correlation_h5.store_generic_attributes(generic_attributes) pearson_correlation_index = CorrelationCoefficientsIndex() pearson_correlation_index.gid = pearson_gid.hex pearson_correlation_index.fk_from_operation = self.operation_id pearson_correlation_index.subject = patient pearson_correlation_index.state = DEFAULTDATASTATE_RAW_DATA pearson_correlation_index.ndim = 4 pearson_correlation_index.fk_source_gid = ts_gid.hex # we need a random gid here to store the index pearson_correlation_index.has_valid_time_series = False dao.store_entity(pearson_correlation_index)
def __init__(self, path): # type: (str) -> None self.path = path self.storage_manager = StorageInterface.get_storage_manager(self.path) # would be nice to have an opened state for the chunked api instead of the close_file=False # common scalar headers self.gid = Uuid(HasTraits.gid, self) self.written_by = Scalar(Attr(str), self, name=self.KEY_WRITTEN_BY) self.create_date = Scalar(Attr(str), self, name='create_date') self.type = Scalar(Attr(str), self, name='type') # Generic attributes descriptors self.generic_attributes = GenericAttributes() self.invalid = Scalar(Attr(bool), self, name='invalid') self.is_nan = Scalar(Attr(bool), self, name='is_nan') self.subject = Scalar(Attr(str), self, name='subject') self.state = Scalar(Attr(str), self, name='state') self.user_tag_1 = Scalar(Attr(str), self, name='user_tag_1') self.user_tag_2 = Scalar(Attr(str), self, name='user_tag_2') self.user_tag_3 = Scalar(Attr(str), self, name='user_tag_3') self.user_tag_4 = Scalar(Attr(str), self, name='user_tag_4') self.user_tag_5 = Scalar(Attr(str), self, name='user_tag_5') self.operation_tag = Scalar(Attr(str, required=False), self, name='operation_tag') self.parent_burst = Uuid(Attr(uuid.UUID, required=False), self, name='parent_burst') self.visible = Scalar(Attr(bool), self, name='visible') self.metadata_cache = None # Keep a list with datasets for which we should write metadata before closing the file self.expandable_datasets = [] if not self.storage_manager.is_valid_tvb_file(): self.written_by.store(self.get_class_path()) self.is_new_file = True
def build(analyzed_entity_index, analyzed_entity, operation, datatype_group, metrics='{"v": 3}'): measure = DatatypeMeasureIndex() measure.metrics = metrics measure.source = analyzed_entity_index measure.fk_from_operation = operation.id measure.fk_datatype_group = datatype_group.id measure = dao.store_entity(measure) dm = DatatypeMeasure(analyzed_datatype=analyzed_entity, metrics=json.loads(metrics)) dm_path = h5.path_for_stored_index(measure) with DatatypeMeasureH5(dm_path) as dm_h5: dm_h5.store(dm) dm_h5.store_generic_attributes(GenericAttributes()) return measure
def store_complete(datatype, base_dir): # type: (HasTraits, str) -> DataType """ Stores the given HasTraits instance in a h5 file, and fill the Index entity for later storage in DB """ index_class = REGISTRY.get_index_for_datatype(datatype.__class__) index_inst = index_class() index_inst.fill_from_has_traits(datatype) h5_class = REGISTRY.get_h5file_for_datatype(datatype.__class__) storage_path = path_for(base_dir, h5_class, datatype.gid) with h5_class(storage_path) as f: f.store(datatype) # Store empty Generic Attributes, in case the file is saved no through ABCAdapter it can still be used f.store_generic_attributes(GenericAttributes()) return index_inst
def launch(self, view_model): # type: (FooDataImporterModel) -> TimeSeriesIndex array_data = numpy.loadtxt(view_model.array_data) ts = TimeSeries(data=array_data) ts.configure() ts_index = TimeSeriesIndex() ts_index.fill_from_has_traits(ts) ts_h5_path = h5.path_for(self.storage_path, TimeSeriesH5, ts_index.gid) with TimeSeriesH5(ts_h5_path) as ts_h5: ts_h5.store(ts, scalars_only=True) ts_h5.store_generic_attributes(GenericAttributes()) ts_h5.write_data_slice(array_data) return ts_index
def _store(self, file, view_model): file.store(view_model) file.type.store(self.get_class_path(view_model)) file.create_date.store(date2string(datetime.now())) if hasattr(view_model, "generic_attributes"): file.store_generic_attributes(view_model.generic_attributes) else: # For HasTraits not inheriting from ViewModel (e.g. Linear) file.store_generic_attributes(GenericAttributes()) references = file.gather_references() for trait_attr, gid in references: if not gid: continue model_attr = getattr(view_model, trait_attr.field_name) if isinstance(gid, list): for idx, sub_gid in enumerate(gid): self.store(model_attr[idx]) else: self.store(model_attr)
def build(data=None, op=None): ts = time_series_factory(data) if op is None: op = operation_factory() ts_db = TimeSeriesIndex() ts_db.fk_from_operation = op.id ts_db.fill_from_has_traits(ts) ts_h5_path = h5.path_for_stored_index(ts_db) with TimeSeriesH5(ts_h5_path) as f: f.store(ts) f.sample_rate.store(ts.sample_rate) f.nr_dimensions.store(ts.data.ndim) f.store_generic_attributes(GenericAttributes()) f.store_references(ts) ts_db = dao.store_entity(ts_db) return ts_db
def store(self, datatype, fname=None): # type: (HasTraits, str) -> None h5file_cls = self.registry.get_h5file_for_datatype(type(datatype)) if fname is None: path = self.path_for(h5file_cls, datatype.gid) else: path = os.path.join(self.base_dir, fname) sub_dt_refs = [] with h5file_cls(path) as f: f.store(datatype) # Store empty Generic Attributes, so that TVBLoader.load_complete_by_function can be still used f.store_generic_attributes(GenericAttributes()) if self.recursive: sub_dt_refs = f.gather_references() for traited_attr, sub_gid in sub_dt_refs: subdt = getattr(datatype, traited_attr.field_name) if subdt is not None: # Because a non required reference may be not populated self.store(subdt)
class H5File(object): """ A H5 based file format. This class implements reading and writing to a *specific* h5 based file format. A subclass of this defines a new file format. """ is_new_file = False def __init__(self, path): # type: (str) -> None self.path = path storage_path, file_name = os.path.split(path) self.storage_manager = HDF5StorageManager(storage_path, file_name) # would be nice to have an opened state for the chunked api instead of the close_file=False # common scalar headers self.gid = Uuid(HasTraits.gid, self) self.written_by = Scalar(Attr(str), self, name='written_by') self.create_date = Scalar(Attr(str), self, name='create_date') # Generic attributes descriptors self.generic_attributes = GenericAttributes() self.invalid = Scalar(Attr(bool), self, name='invalid') self.is_nan = Scalar(Attr(bool), self, name='is_nan') self.subject = Scalar(Attr(str), self, name='subject') self.state = Scalar(Attr(str), self, name='state') self.type = Scalar(Attr(str), self, name='type') self.user_tag_1 = Scalar(Attr(str), self, name='user_tag_1') self.user_tag_2 = Scalar(Attr(str), self, name='user_tag_2') self.user_tag_3 = Scalar(Attr(str), self, name='user_tag_3') self.user_tag_4 = Scalar(Attr(str), self, name='user_tag_4') self.user_tag_5 = Scalar(Attr(str), self, name='user_tag_5') self.visible = Scalar(Attr(bool), self, name='visible') self.metadata_cache = None if not self.storage_manager.is_valid_hdf5_file(): self.written_by.store(self.__class__.__module__ + '.' + self.__class__.__name__) self.is_new_file = True @classmethod def file_name_base(cls): return cls.__name__.replace("H5", "") def iter_accessors(self): # type: () -> typing.Generator[Accessor] for accessor in self.__dict__.values(): if isinstance(accessor, Accessor): yield accessor def iter_datasets(self): for dataset in self.__dict__.values(): if isinstance(dataset, DataSet): yield dataset def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def close(self): self.storage_manager.close_file() def store(self, datatype, scalars_only=False, store_references=True): # type: (HasTraits, bool, bool) -> None for accessor in self.iter_accessors(): f_name = accessor.trait_attribute.field_name if f_name is None: # skipp attribute that does not seem to belong to a traited type # accessor is an independent Accessor continue if scalars_only and not isinstance(accessor, Scalar): continue if not store_references and isinstance(accessor, Reference): continue accessor.store(getattr(datatype, f_name)) def load_into(self, datatype): # type: (HasTraits) -> None for accessor in self.iter_accessors(): if isinstance(accessor, Reference): # we do not load references recursively continue f_name = accessor.trait_attribute.field_name if f_name is None: # skipp attribute that does not seem to belong to a traited type continue # handle optional data, that will be missing from the h5 files try: value = accessor.load() except MissingDataSetException: if accessor.trait_attribute.required: raise else: value = None if isinstance(accessor, JsonFinal): current_attr = getattr(datatype, f_name) for k, v in current_attr.items(): current_attr[k] = value[k] else: setattr(datatype, f_name, value) def store_generic_attributes(self, generic_attributes, create=True): # type: (GenericAttributes, bool) -> None # write_metadata creation time, serializer class name, etc if create: self.create_date.store(date2string(datetime.now())) self.generic_attributes.fill_from(generic_attributes) self.invalid.store(self.generic_attributes.invalid) self.is_nan.store(self.generic_attributes.is_nan) self.subject.store(self.generic_attributes.subject) self.state.store(self.generic_attributes.state) self.type.store(self.generic_attributes.type) self.user_tag_1.store(self.generic_attributes.user_tag_1) self.user_tag_2.store(self.generic_attributes.user_tag_2) self.user_tag_3.store(self.generic_attributes.user_tag_3) self.user_tag_4.store(self.generic_attributes.user_tag_4) self.user_tag_5.store(self.generic_attributes.user_tag_5) self.visible.store(self.generic_attributes.visible) def load_generic_attributes(self): # type: () -> GenericAttributes self.generic_attributes.invalid = self.invalid.load() self.generic_attributes.is_nan = self.is_nan.load() self.generic_attributes.subject = self.subject.load() self.generic_attributes.state = self.state.load() self.generic_attributes.type = self.type.load() self.generic_attributes.user_tag_1 = self.user_tag_1.load() self.generic_attributes.user_tag_2 = self.user_tag_2.load() self.generic_attributes.user_tag_3 = self.user_tag_3.load() self.generic_attributes.user_tag_4 = self.user_tag_4.load() self.generic_attributes.user_tag_5 = self.user_tag_5.load() self.generic_attributes.visible = self.visible.load() self.generic_attributes.create_date = string2date( str(self.create_date.load())) or None return self.generic_attributes def gather_references(self): ret = [] for accessor in self.iter_accessors(): if isinstance(accessor, Reference): ret.append((accessor.trait_attribute, accessor.load())) return ret @staticmethod def from_file(path): # type: (str) -> typing.Type[H5File] base_dir, fname = os.path.split(path) storage_manager = HDF5StorageManager(base_dir, fname) meta = storage_manager.get_metadata() h5file_class_fqn = meta.get('written_by') if h5file_class_fqn is None: return H5File(path) package, cls_name = h5file_class_fqn.rsplit('.', 1) module = importlib.import_module(package) cls = getattr(module, cls_name) return cls(path) def __repr__(self): return '<{}("{}")>'.format(type(self).__name__, self.path)
class ABCAdapter(object): """ Root Abstract class for all TVB Adapters. """ # model.Algorithm instance that will be set for each adapter class created by in build_adapter method stored_adapter = None launch_mode = AdapterLaunchModeEnum.ASYNC_DIFF_MEM def __init__(self): self.generic_attributes = GenericAttributes() self.generic_attributes.subject = DataTypeMetaData.DEFAULT_SUBJECT self.storage_interface = StorageInterface() # Will be populate with current running operation's identifier self.operation_id = None self.user_id = None self.submitted_form = None self.log = get_logger(self.__class__.__module__) @classmethod def get_group_name(cls): if hasattr(cls, "_ui_group") and hasattr(cls._ui_group, "name"): return cls._ui_group.name return None @classmethod def get_group_description(cls): if hasattr(cls, "_ui_group") and hasattr(cls._ui_group, "description"): return cls._ui_group.description return None @classmethod def get_ui_name(cls): if hasattr(cls, "_ui_name"): return cls._ui_name else: return cls.__name__ @classmethod def get_ui_description(cls): if hasattr(cls, "_ui_description"): return cls._ui_description @classmethod def get_ui_subsection(cls): if hasattr(cls, "_ui_subsection"): return cls._ui_subsection if hasattr(cls, "_ui_group") and hasattr(cls._ui_group, "subsection"): return cls._ui_group.subsection @staticmethod def can_be_active(): """ To be overridden where needed (e.g. Matlab dependent adapters). :return: By default True, and False when the current Adapter can not be executed in the current env for various reasons (e.g. no Matlab or Octave installed) """ return True def submit_form(self, form): self.submitted_form = form # TODO separate usage of get_form_class (returning a class) and return of a submitted instance def get_form(self): if self.submitted_form is not None: return self.submitted_form return self.get_form_class() @abstractmethod def get_form_class(self): return None def get_adapter_fragments(self, view_model): """ The result will be used for introspecting and checking operation changed input params from the defaults, to show in web gui. :return: a list of ABCAdapterForm classes, in case the current Adapter GUI will be composed of multiple sub-forms. """ return {} def get_view_model_class(self): return self.get_form_class().get_view_model() @abstractmethod def get_output(self): """ Describes inputs and outputs of the launch method. """ def configure(self, view_model): """ To be implemented in each Adapter that requires any specific configurations before the actual launch. """ @abstractmethod def get_required_memory_size(self, view_model): """ Abstract method to be implemented in each adapter. Should return the required memory for launching the adapter. """ @abstractmethod def get_required_disk_size(self, view_model): """ Abstract method to be implemented in each adapter. Should return the required memory for launching the adapter in kilo-Bytes. """ def get_execution_time_approximation(self, view_model): """ Method should approximate based on input arguments, the time it will take for the operation to finish (in seconds). """ return -1 @abstractmethod def launch(self, view_model): """ To be implemented in each Adapter. Will contain the logic of the Adapter. Takes a ViewModel with data, dependency direction is: Adapter -> Form -> ViewModel Any returned DataType will be stored in DB, by the Framework. :param view_model: the data model corresponding to the current adapter """ def add_operation_additional_info(self, message): """ Adds additional info on the operation to be displayed in the UI. Usually a warning message. """ current_op = dao.get_operation_by_id(self.operation_id) current_op.additional_info = message dao.store_entity(current_op) def extract_operation_data(self, operation): operation = dao.get_operation_by_id(operation.id) self.operation_id = operation.id self.current_project_id = operation.project.id self.user_id = operation.fk_launched_by def _ensure_enough_resources(self, available_disk_space, view_model): # Compare the amount of memory the current algorithms states it needs, # with the average between the RAM available on the OS and the free memory at the current moment. # We do not consider only the free memory, because some OSs are freeing late and on-demand only. total_free_memory = psutil.virtual_memory().free + psutil.swap_memory().free total_existent_memory = psutil.virtual_memory().total + psutil.swap_memory().total memory_reference = (total_free_memory + total_existent_memory) / 2 adapter_required_memory = self.get_required_memory_size(view_model) if adapter_required_memory > memory_reference: msg = "Machine does not have enough RAM memory for the operation (expected %.2g GB, but found %.2g GB)." raise NoMemoryAvailableException(msg % (adapter_required_memory / 2 ** 30, memory_reference / 2 ** 30)) # Compare the expected size of the operation results with the HDD space currently available for the user # TVB defines a quota per user. required_disk_space = self.get_required_disk_size(view_model) if available_disk_space < 0: msg = "You have exceeded you HDD space quota by %.2f MB Stopping execution." raise NoMemoryAvailableException(msg % (- available_disk_space / 2 ** 10)) if available_disk_space < required_disk_space: msg = ("You only have %.2f GB of disk space available but the operation you " "launched might require %.2f Stopping execution...") raise NoMemoryAvailableException(msg % (available_disk_space / 2 ** 20, required_disk_space / 2 ** 20)) return required_disk_space def _update_operation_entity(self, operation, required_disk_space): operation.start_now() operation.estimated_disk_size = required_disk_space dao.store_entity(operation) @nan_not_allowed() def _prelaunch(self, operation, view_model, available_disk_space=0): """ Method to wrap LAUNCH. Will prepare data, and store results on return. """ self.extract_operation_data(operation) self.generic_attributes.fill_from(view_model.generic_attributes) self.configure(view_model) required_disk_size = self._ensure_enough_resources(available_disk_space, view_model) self._update_operation_entity(operation, required_disk_size) result = self.launch(view_model) if not isinstance(result, (list, tuple)): result = [result, ] self.__check_integrity(result) return self._capture_operation_results(result) def _capture_operation_results(self, result): """ After an operation was finished, make sure the results are stored in DB storage and the correct meta-data,IDs are set. """ data_type_group_id = None operation = dao.get_operation_by_id(self.operation_id) if operation.user_group is None or len(operation.user_group) == 0: operation.user_group = date2string(datetime.now(), date_format=LESS_COMPLEX_TIME_FORMAT) operation = dao.store_entity(operation) if self._is_group_launch(): data_type_group_id = dao.get_datatypegroup_by_op_group_id(operation.fk_operation_group).id count_stored = 0 if result is None: return "", count_stored group_type = None # In case of a group, the first not-none type is sufficient to memorize here for res in result: if res is None: continue if not res.fixed_generic_attributes: res.fill_from_generic_attributes(self.generic_attributes) res.fk_from_operation = self.operation_id res.fk_datatype_group = data_type_group_id associated_file = h5.path_for_stored_index(res) if os.path.exists(associated_file): if not res.fixed_generic_attributes: with H5File.from_file(associated_file) as f: f.store_generic_attributes(self.generic_attributes) # Compute size-on disk, in case file-storage is used res.disk_size = self.storage_interface.compute_size_on_disk(associated_file) dao.store_entity(res) res.after_store() group_type = res.type count_stored += 1 if count_stored > 0 and self._is_group_launch(): # Update the operation group name operation_group = dao.get_operationgroup_by_id(operation.fk_operation_group) operation_group.fill_operationgroup_name(group_type) dao.store_entity(operation_group) return 'Operation ' + str(self.operation_id) + ' has finished.', count_stored def __check_integrity(self, result): """ Check that the returned parameters for LAUNCH operation are of the type specified in the adapter's interface. """ for result_entity in result: if result_entity is None: continue if not self.__is_data_in_supported_types(result_entity): msg = "Unexpected output DataType %s" raise InvalidParameterException(msg % type(result_entity)) def __is_data_in_supported_types(self, data): if data is None: return True for supported_type in self.get_output(): if isinstance(data, supported_type): return True # Data can't be mapped on any supported type !! return False def _is_group_launch(self): """ Return true if this adapter is launched from a group of operations """ operation = dao.get_operation_by_id(self.operation_id) return operation.fk_operation_group is not None def load_entity_by_gid(self, data_gid): # type: (typing.Union[uuid.UUID, str]) -> DataType """ Load a generic DataType, specified by GID. """ idx = load_entity_by_gid(data_gid) if idx and self.generic_attributes.parent_burst is None: # Only in case the BurstConfiguration references hasn't been set already, take it from the current DT self.generic_attributes.parent_burst = idx.fk_parent_burst return idx def load_traited_by_gid(self, data_gid): # type: (typing.Union[uuid.UUID, str]) -> HasTraits """ Load a generic HasTraits instance, specified by GID. """ index = self.load_entity_by_gid(data_gid) return h5.load_from_index(index) def load_with_references(self, dt_gid): # type: (typing.Union[uuid.UUID, str]) -> HasTraits dt_index = self.load_entity_by_gid(dt_gid) h5_path = h5.path_for_stored_index(dt_index) dt, _ = h5.load_with_references(h5_path) return dt def view_model_to_has_traits(self, view_model): # type: (ViewModel) -> HasTraits has_traits_class = view_model.linked_has_traits has_traits = has_traits_class() view_model_class = type(view_model) if not has_traits_class: raise Exception("There is no linked HasTraits for this ViewModel {}".format(type(view_model))) for attr_name in has_traits_class.declarative_attrs: view_model_class_attr = getattr(view_model_class, attr_name) view_model_attr = getattr(view_model, attr_name) if isinstance(view_model_class_attr, DataTypeGidAttr) and view_model_attr: attr_value = self.load_with_references(view_model_attr) elif isinstance(view_model_class_attr, Attr) and isinstance(view_model_attr, ViewModel): attr_value = self.view_model_to_has_traits(view_model_attr) elif isinstance(view_model_class_attr, List) and len(view_model_attr) > 0 and isinstance(view_model_attr[0], ViewModel): attr_value = list() for view_model_elem in view_model_attr: elem = self.view_model_to_has_traits(view_model_elem) attr_value.append(elem) else: attr_value = view_model_attr setattr(has_traits, attr_name, attr_value) return has_traits @staticmethod def build_adapter_from_class(adapter_class): """ Having a subclass of ABCAdapter, prepare an instance for launching an operation with it. """ if not issubclass(adapter_class, ABCAdapter): raise IntrospectionException("Invalid data type: It should extend adapters.ABCAdapter!") try: stored_adapter = dao.get_algorithm_by_module(adapter_class.__module__, adapter_class.__name__) adapter_instance = adapter_class() adapter_instance.stored_adapter = stored_adapter return adapter_instance except Exception as excep: LOGGER.exception(excep) raise IntrospectionException(str(excep)) @staticmethod def determine_adapter_class(stored_adapter): # type: (Algorithm) -> ABCAdapter """ Determine the class of an adapter based on module and classname strings from stored_adapter :param stored_adapter: Algorithm or AlgorithmDTO type :return: a subclass of ABCAdapter """ ad_module = importlib.import_module(stored_adapter.module) adapter_class = getattr(ad_module, stored_adapter.classname) return adapter_class @staticmethod def build_adapter(stored_adapter): # type: (Algorithm) -> ABCAdapter """ Having a module and a class name, create an instance of ABCAdapter. """ try: adapter_class = ABCAdapter.determine_adapter_class(stored_adapter) adapter_instance = adapter_class() adapter_instance.stored_adapter = stored_adapter return adapter_instance except Exception: msg = "Could not load Adapter Instance for Stored row %s" % stored_adapter LOGGER.exception(msg) raise IntrospectionException(msg) def load_view_model(self, operation): storage_path = self.storage_interface.get_project_folder(operation.project.name, str(operation.id)) input_gid = operation.view_model_gid return h5.load_view_model(input_gid, storage_path) @staticmethod def array_size2kb(size): """ :param size: size in bytes :return: size in kB """ return size * TvbProfile.current.MAGIC_NUMBER / 8 / 2 ** 10 @staticmethod def fill_index_from_h5(analyzer_index, analyzer_h5): """ Method used only by analyzers that write slices of data. As they never have the whole array_data in memory, the metadata related to array_data (min, max, etc.) they store on the index is not correct, so we need to update them. """ metadata = analyzer_h5.array_data.get_cached_metadata() if not metadata.has_complex: analyzer_index.array_data_max = float(metadata.max) analyzer_index.array_data_min = float(metadata.min) analyzer_index.array_data_mean = float(metadata.mean) analyzer_index.aray_has_complex = metadata.has_complex analyzer_index.array_is_finite = metadata.is_finite analyzer_index.shape = json.dumps(analyzer_h5.array_data.shape) analyzer_index.ndim = len(analyzer_h5.array_data.shape) def path_for(self, h5_file_class, gid, dt_class=None): project = dao.get_project_by_id(self.current_project_id) return h5.path_for(self.operation_id, h5_file_class, gid, project.name, dt_class) def store_complete(self, datatype, generic_attributes=GenericAttributes()): project = dao.get_project_by_id(self.current_project_id) return h5.store_complete(datatype, self.operation_id, project.name, generic_attributes) def get_storage_path(self): project = dao.get_project_by_id(self.current_project_id) return self.storage_interface.get_project_folder(project.name, str(self.operation_id))
class H5File(object): """ A H5 based file format. This class implements reading and writing to a *specific* h5 based file format. A subclass of this defines a new file format. """ KEY_WRITTEN_BY = 'written_by' is_new_file = False def __init__(self, path): # type: (str) -> None self.path = path storage_path, file_name = os.path.split(path) self.storage_manager = HDF5StorageManager(storage_path, file_name) # would be nice to have an opened state for the chunked api instead of the close_file=False # common scalar headers self.gid = Uuid(HasTraits.gid, self) self.written_by = Scalar(Attr(str), self, name=self.KEY_WRITTEN_BY) self.create_date = Scalar(Attr(str), self, name='create_date') self.type = Scalar(Attr(str), self, name='type') # Generic attributes descriptors self.generic_attributes = GenericAttributes() self.invalid = Scalar(Attr(bool), self, name='invalid') self.is_nan = Scalar(Attr(bool), self, name='is_nan') self.subject = Scalar(Attr(str), self, name='subject') self.state = Scalar(Attr(str), self, name='state') self.user_tag_1 = Scalar(Attr(str), self, name='user_tag_1') self.user_tag_2 = Scalar(Attr(str), self, name='user_tag_2') self.user_tag_3 = Scalar(Attr(str), self, name='user_tag_3') self.user_tag_4 = Scalar(Attr(str), self, name='user_tag_4') self.user_tag_5 = Scalar(Attr(str), self, name='user_tag_5') self.operation_tag = Scalar(Attr(str, required=False), self, name='operation_tag') self.parent_burst = Uuid(Attr(uuid.UUID, required=False), self, name='parent_burst') self.visible = Scalar(Attr(bool), self, name='visible') self.metadata_cache = None if not self.storage_manager.is_valid_hdf5_file(): self.written_by.store(self.__class__.__module__ + '.' + self.__class__.__name__) self.is_new_file = True @classmethod def file_name_base(cls): return cls.__name__.replace("H5", "") def read_subtype_attr(self): return None def iter_accessors(self): # type: () -> typing.Generator[Accessor] for accessor in self.__dict__.values(): if isinstance(accessor, Accessor): yield accessor def iter_datasets(self): for dataset in self.__dict__.values(): if isinstance(dataset, DataSet): yield dataset def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def close(self): self.storage_manager.close_file() def store(self, datatype, scalars_only=False, store_references=True): # type: (HasTraits, bool, bool) -> None for accessor in self.iter_accessors(): f_name = accessor.trait_attribute.field_name if f_name is None: # skipp attribute that does not seem to belong to a traited type # accessor is an independent Accessor continue if scalars_only and not isinstance(accessor, Scalar): continue if not store_references and isinstance(accessor, Reference): continue accessor.store(getattr(datatype, f_name)) def load_into(self, datatype): # type: (HasTraits) -> None for accessor in self.iter_accessors(): if isinstance(accessor, (Reference, ReferenceList)): # we do not load references recursively continue f_name = accessor.trait_attribute.field_name if f_name is None: # skipp attribute that does not seem to belong to a traited type continue # handle optional data, that will be missing from the h5 files try: value = accessor.load() except MissingDataSetException: if accessor.trait_attribute.required: raise else: value = None if isinstance(accessor, JsonFinal): current_attr = getattr(datatype, f_name) for k, v in current_attr.items(): current_attr[k] = value[k] else: try: setattr(datatype, f_name, value) except TraitFinalAttributeError: if getattr(datatype, f_name) != value: raise else: LOGGER.info( 'Cannot overwrite Final attribute: {} on {}, but it already has the expected value'.format( f_name, type(datatype).__name__)) def store_generic_attributes(self, generic_attributes, create=True): # type: (GenericAttributes, bool) -> None # write_metadata creation time, serializer class name, etc if create: self.create_date.store(date2string(datetime.now())) self.generic_attributes.fill_from(generic_attributes) self.invalid.store(self.generic_attributes.invalid) self.is_nan.store(self.generic_attributes.is_nan) self.subject.store(self.generic_attributes.subject) self.state.store(self.generic_attributes.state) self.user_tag_1.store(self.generic_attributes.user_tag_1) self.user_tag_2.store(self.generic_attributes.user_tag_2) self.user_tag_3.store(self.generic_attributes.user_tag_3) self.user_tag_4.store(self.generic_attributes.user_tag_4) self.user_tag_5.store(self.generic_attributes.user_tag_5) self.operation_tag.store(self.generic_attributes.operation_tag) self.visible.store(self.generic_attributes.visible) if self.generic_attributes.parent_burst is not None: self.parent_burst.store(uuid.UUID(self.generic_attributes.parent_burst)) def load_generic_attributes(self): # type: () -> GenericAttributes self.generic_attributes.invalid = self.invalid.load() self.generic_attributes.is_nan = self.is_nan.load() self.generic_attributes.subject = self.subject.load() self.generic_attributes.state = self.state.load() self.generic_attributes.user_tag_1 = self.user_tag_1.load() self.generic_attributes.user_tag_2 = self.user_tag_2.load() self.generic_attributes.user_tag_3 = self.user_tag_3.load() self.generic_attributes.user_tag_4 = self.user_tag_4.load() self.generic_attributes.user_tag_5 = self.user_tag_5.load() self.generic_attributes.visible = self.visible.load() self.generic_attributes.create_date = string2date(str(self.create_date.load())) or None try: self.generic_attributes.operation_tag = self.operation_tag.load() except MissingDataSetException: self.generic_attributes.operation_tag = None try: burst = self.parent_burst.load() self.generic_attributes.parent_burst = burst.hex if burst is not None else None except MissingDataSetException: self.generic_attributes.parent_burst = None return self.generic_attributes def gather_references(self, datatype_cls=None): ret = [] for accessor in self.iter_accessors(): trait_attribute = None if datatype_cls: if hasattr(datatype_cls, accessor.field_name): trait_attribute = getattr(datatype_cls, accessor.field_name) if not trait_attribute: trait_attribute = accessor.trait_attribute if isinstance(accessor, Reference): ret.append((trait_attribute, accessor.load())) if isinstance(accessor, ReferenceList): hex_gids = accessor.load() gids = [uuid.UUID(hex_gid) for hex_gid in hex_gids] ret.append((trait_attribute, gids)) return ret def determine_datatype_from_file(self): config_type = self.type.load() package, cls_name = config_type.rsplit('.', 1) module = importlib.import_module(package) datatype_cls = getattr(module, cls_name) return datatype_cls @staticmethod def determine_type(path): # type: (str) -> typing.Type[HasTraits] type_class_fqn = H5File.get_metadata_param(path, 'type') if type_class_fqn is None: return HasTraits package, cls_name = type_class_fqn.rsplit('.', 1) module = importlib.import_module(package) cls = getattr(module, cls_name) return cls @staticmethod def get_metadata_param(path, param): base_dir, fname = os.path.split(path) storage_manager = HDF5StorageManager(base_dir, fname) meta = storage_manager.get_metadata() return meta.get(param) @staticmethod def h5_class_from_file(path): # type: (str) -> typing.Type[H5File] h5file_class_fqn = H5File.get_metadata_param(path, H5File.KEY_WRITTEN_BY) if h5file_class_fqn is None: return H5File(path) package, cls_name = h5file_class_fqn.rsplit('.', 1) module = importlib.import_module(package) cls = getattr(module, cls_name) return cls @staticmethod def from_file(path): # type: (str) -> H5File cls = H5File.h5_class_from_file(path) return cls(path) def __repr__(self): return '<{}("{}")>'.format(type(self).__name__, self.path)
def __init__(self, **kwargs): super(ViewModel, self).__init__(**kwargs) self.create_date = datetime.now() self.generic_attributes = GenericAttributes()
def store_complete(self, datatype, generic_attributes=GenericAttributes()): project = dao.get_project_by_id(self.current_project_id) return h5.store_complete(datatype, self.operation_id, project.name, generic_attributes)
def store_complete(self, datatype, generic_attributes=GenericAttributes()): return h5.store_complete_to_dir(datatype, self.storage_path, generic_attributes)