def import_operation(operation_entity, migration=False): """ Store a Operation entity. """ do_merge = False if operation_entity.id: do_merge = True operation_entity = dao.store_entity(operation_entity, merge=do_merge) operation_group_id = operation_entity.fk_operation_group datatype_group = None if operation_group_id is not None: datatype_group = dao.get_datatypegroup_by_op_group_id( operation_group_id) if datatype_group is None and migration is False: # If no dataType group present for current op. group, create it. operation_group = dao.get_operationgroup_by_id( operation_group_id) datatype_group = DataTypeGroup( operation_group, operation_id=operation_entity.id) datatype_group.state = UploadAlgorithmCategoryConfig.defaultdatastate datatype_group = dao.store_entity(datatype_group) return operation_entity, datatype_group
def async_launch_and_prepare_pse(self, burst_config, user, project, simulator_algo, range_param1, range_param2, session_stored_simulator): try: simulator_id = simulator_algo.id algo_category = simulator_algo.algorithm_category operation_group = burst_config.operation_group metric_operation_group = burst_config.metric_operation_group operations = [] range_param2_values = [] if range_param2: range_param2_values = range_param2.get_range_values() for param1_value in range_param1.get_range_values(): for param2_value in range_param2_values: simulator = copy.deepcopy(session_stored_simulator) self._set_simulator_range_parameter(simulator, range_param1.name, param1_value) self._set_simulator_range_parameter(simulator, range_param2.name, param2_value) simulator_index = SimulatorIndex() simulator_index.fk_parent_burst = burst_config.id simulator_index = dao.store_entity(simulator_index) ranges = json.dumps({range_param1.name: param1_value[0], range_param2.name: param2_value[0]}) operation = self._prepare_operation(project.id, user.id, simulator_id, simulator_index, algo_category, operation_group, {DataTypeMetaData.KEY_BURST: burst_config.id}, ranges) simulator_index.fk_from_operation = operation.id dao.store_entity(simulator_index) storage_path = self.files_helper.get_project_folder(project, str(operation.id)) self.serialize_simulator(simulator, simulator_index.gid, None, storage_path) operations.append(operation) first_operation = operations[0] datatype_group = DataTypeGroup(operation_group, operation_id=first_operation.id, fk_parent_burst=burst_config.id, state=json.loads(first_operation.meta_data)[DataTypeMetaData.KEY_STATE]) dao.store_entity(datatype_group) metrics_datatype_group = DataTypeGroup(metric_operation_group, fk_parent_burst=burst_config.id) dao.store_entity(metrics_datatype_group) wf_errs = 0 for operation in operations: try: OperationService().launch_operation(operation.id, True) except Exception as excep: self.logger.error(excep) wf_errs += 1 BurstService2().mark_burst_finished(burst_config, error_message=str(excep)) self.logger.debug("Finished launching workflows. " + str(len(operations) - wf_errs) + " were launched successfully, " + str(wf_errs) + " had error on pre-launch steps") except Exception as excep: self.logger.error(excep) BurstService2().mark_burst_finished(burst_config, error_message=str(excep))
def export(self, data, project, public_key_path, password): """ Exports data type: 1. If data is a normal data type, simply exports storage file (HDF format) 2. If data is a DataTypeGroup creates a zip with all files for all data types """ download_file_name = self._get_export_file_name(data) if DataTypeGroup.is_data_a_group(data): all_datatypes, op_file_dict = self.prepare_datatypes_for_export( data) # Copy the linked datatypes dt_path_list = [] data_type = all_datatypes[0] self.gather_datatypes_for_copy(data_type, dt_path_list) # Create ZIP archive zip_file = self.storage_interface.export_datatypes_structure( op_file_dict, data, download_file_name, public_key_path, password, (dt_path_list[1:], data_type)) return download_file_name, zip_file, True else: dt_path_list = [] self.gather_datatypes_for_copy(data, dt_path_list) download_file_name = self._get_export_file_name(data) zip_to_export = self.storage_interface.export_datatypes( dt_path_list, data, download_file_name, public_key_path, password) return None, zip_to_export, True
def __import_operation(operation_entity): """ Store a Operation entity. """ operation_entity = dao.store_entity(operation_entity) operation_group_id = operation_entity.fk_operation_group datatype_group = None if operation_group_id is not None: try: datatype_group = dao.get_datatypegroup_by_op_group_id(operation_group_id) except SQLAlchemyError: # If no dataType group present for current op. group, create it. operation_group = dao.get_operationgroup_by_id(operation_group_id) datatype_group = DataTypeGroup(operation_group, operation_id=operation_entity.id) datatype_group.state = UploadAlgorithmCategoryConfig.defaultdatastate datatype_group = dao.store_entity(datatype_group) return operation_entity, datatype_group
def prepare_operations(self, user_id, project, algorithm, category, visible=True, existing_dt_group=None, view_model=None, **kwargs): """ Do all the necessary preparations for storing an operation. If it's the case of a range of values create an operation group and multiple operations for each possible instance from the range. """ operations = [] available_args, group = self._prepare_group(project.id, existing_dt_group, kwargs) if len(available_args) > TvbProfile.current.MAX_RANGE_NUMBER: raise LaunchException("Too big range specified. You should limit the" " resulting operations to %d" % TvbProfile.current.MAX_RANGE_NUMBER) else: self.logger.debug("Launching a range with %d operations..." % len(available_args)) group_id = None if group is not None: group_id = group.id ga = self._prepare_metadata(category, kwargs, group) ga.visible = visible view_model.generic_attributes = ga self.logger.debug("Saving Operation(userId=" + str(user_id) + ",projectId=" + str(project.id) + ",algorithmId=" + str(algorithm.id) + ", ops_group= " + str(group_id) + ")") for (one_set_of_args, range_vals) in available_args: range_values = json.dumps(range_vals) if range_vals else None operation = Operation(user_id, project.id, algorithm.id, json.dumps({'gid': view_model.gid.hex}), op_group_id=group_id, user_group=ga.operation_tag, range_values=range_values) operation.visible = visible operations.append(operation) operations = dao.store_entities(operations) if group is not None: if existing_dt_group is None: datatype_group = DataTypeGroup(group, operation_id=operations[0].id, state=category.defaultdatastate) dao.store_entity(datatype_group) else: # Reset count existing_dt_group.count_results = None dao.store_entity(existing_dt_group) for operation in operations: self._store_view_model(operation, project, view_model) return operations, group
def get_all_datatypes_from_data(data): """ This method builds an array with all data types to be processed later. - If current data is a simple data type is added to an array. - If it is an data type group all its children are loaded and added to array. """ # first check if current data is a DataTypeGroup if DataTypeGroup.is_data_a_group(data): data_types = ProjectService.get_datatypes_from_datatype_group(data.id) result = [] if data_types is not None and len(data_types) > 0: for data_type in data_types: entity = load_entity_by_gid(data_type.gid) result.append(entity) return result else: return [data]
def _get_effective_data_type(self, data): """ This method returns the data type for the provided data. - If current data is a simple data type is returned. - If it is an data type group, we return the first element. Only one element is necessary since all group elements are the same type. """ # first check if current data is a DataTypeGroup if DataTypeGroup.is_data_a_group(data): if self.skip_group_datatypes(): return None data_types = ProjectService.get_datatypes_from_datatype_group( data.id) if data_types is not None and len(data_types) > 0: # Since all objects in a group are the same type it's enough return load_entity_by_gid(data_types[0].gid) else: return None else: return data
def export(self, data, project, public_key_path, password): """ Exports data type: 1. If data is a normal data type, simply exports storage file (HDF format) 2. If data is a DataTypeGroup creates a zip with all files for all data types """ download_file_name = self._get_export_file_name(data) if DataTypeGroup.is_data_a_group(data): _, op_file_dict = self.prepare_datatypes_for_export(data) # Create ZIP archive zip_file = self.storage_interface.export_datatypes_structure( op_file_dict, data, download_file_name, public_key_path, password) return download_file_name, zip_file, True else: data_path = h5.path_for_stored_index(data) data_file = self.storage_interface.export_datatypes( [data_path], data, download_file_name, public_key_path, password) return None, data_file, True
def async_launch_and_prepare_pse(self, burst_config, user, project, simulator_algo, range_param1, range_param2, session_stored_simulator): try: algo_category = simulator_algo.algorithm_category operation_group = burst_config.operation_group metric_operation_group = burst_config.metric_operation_group operations = [] range_param2_values = [None] if range_param2: range_param2_values = range_param2.get_range_values() first_simulator = None ga = self.operation_service._prepare_metadata( simulator_algo.algorithm_category, {}, operation_group, burst_config.gid) session_stored_simulator.generic_attributes = ga for param1_value in range_param1.get_range_values(): for param2_value in range_param2_values: # Copy, but generate a new GUID for every Simulator in PSE simulator = copy.deepcopy(session_stored_simulator) simulator.gid = uuid.uuid4() self._set_simulator_range_parameter( simulator, range_param1.name, param1_value) ranges = { range_param1.name: self._set_range_param_in_dict(param1_value) } if param2_value is not None: self._set_simulator_range_parameter( simulator, range_param2.name, param2_value) ranges[ range_param2.name] = self._set_range_param_in_dict( param2_value) ranges = json.dumps(ranges) operation = self.operation_service.prepare_operation( user.id, project.id, simulator_algo, simulator.gid, operation_group, ranges) storage_path = self.files_helper.get_project_folder( project, str(operation.id)) h5.store_view_model(simulator, storage_path) operations.append(operation) if first_simulator is None: first_simulator = simulator first_operation = operations[0] storage_path = self.files_helper.get_project_folder( project, str(first_operation.id)) burst_config = self.burst_service.update_simulation_fields( burst_config.id, first_operation.id, first_simulator.gid) self.burst_service.store_burst_configuration( burst_config, storage_path) datatype_group = DataTypeGroup( operation_group, operation_id=first_operation.id, fk_parent_burst=burst_config.gid, state=algo_category.defaultdatastate) dao.store_entity(datatype_group) metrics_datatype_group = DataTypeGroup( metric_operation_group, fk_parent_burst=burst_config.gid) dao.store_entity(metrics_datatype_group) wf_errs = 0 for operation in operations: try: OperationService().launch_operation(operation.id, True) except Exception as excep: self.logger.error(excep) wf_errs += 1 self.burst_service.mark_burst_finished( burst_config, error_message=str(excep)) self.logger.debug("Finished launching workflows. " + str(len(operations) - wf_errs) + " were launched successfully, " + str(wf_errs) + " had error on pre-launch steps") return first_operation except Exception as excep: self.logger.error(excep) self.burst_service.mark_burst_finished(burst_config, error_message=str(excep))
def _prepare_operations(self, algo_category, burst_config, metric_operation_group, operation_group, project, range_param1, range_param2, range_param2_values, session_stored_simulator, simulator_algo, user): first_simulator = None pse_canceled = False operations = [] for param1_value in range_param1.get_range_values(): for param2_value in range_param2_values: burst_config = dao.get_burst_by_id(burst_config.id) if burst_config is None: self.logger.debug("Burst config was deleted") pse_canceled = True break if burst_config.status in [ BurstConfiguration.BURST_CANCELED, BurstConfiguration.BURST_ERROR ]: self.logger.debug( "Current burst status is {}. Preparing operations cannot continue." .format(burst_config.status)) pse_canceled = True break # Copy, but generate a new GUID for every Simulator in PSE simulator = copy.deepcopy(session_stored_simulator) simulator.gid = uuid.uuid4() self._set_simulator_range_parameter(simulator, range_param1.name, param1_value) ranges = { range_param1.name: self._set_range_param_in_dict(param1_value) } if param2_value is not None: self._set_simulator_range_parameter( simulator, range_param2.name, param2_value) ranges[range_param2.name] = self._set_range_param_in_dict( param2_value) ranges = json.dumps(ranges) operation = self.operation_service.prepare_operation( user.id, project, simulator_algo, view_model=simulator, ranges=ranges, burst_gid=burst_config.gid, op_group_id=burst_config.fk_operation_group) simulator.range_values = ranges operations.append(operation) if first_simulator is None: first_simulator = simulator storage_path = self.storage_interface.get_project_folder( project.name, str(operation.id)) burst_config = self.burst_service.update_simulation_fields( burst_config, operation.id, first_simulator.gid) self.burst_service.store_burst_configuration( burst_config, storage_path) datatype_group = DataTypeGroup( operation_group, operation_id=operation.id, fk_parent_burst=burst_config.gid, state=algo_category.defaultdatastate) dao.store_entity(datatype_group) metrics_datatype_group = DataTypeGroup( metric_operation_group, fk_parent_burst=burst_config.gid, state=algo_category.defaultdatastate) dao.store_entity(metrics_datatype_group) return operations, pse_canceled
def get_export_file_extension(self, data): if DataTypeGroup.is_data_a_group(data): return StorageInterface.TVB_ZIP_FILE_EXTENSION else: return StorageInterface.TVB_STORAGE_FILE_EXTENSION
def import_project_operations(self, project, import_path, is_group=False, importer_operation_id=None): """ This method scans provided folder and identify all operations that needs to be imported """ all_dts_count = 0 all_stored_dts_count = 0 imported_operations = [] ordered_operations = self._retrieve_operations_in_order( project, import_path, importer_operation_id) for operation_data in ordered_operations: if operation_data.is_old_form: operation_entity, datatype_group = self.import_operation( operation_data.operation) new_op_folder = self.files_helper.get_project_folder( project, str(operation_entity.id)) try: operation_datatypes = self._load_datatypes_from_operation_folder( operation_data.operation_folder, operation_entity, datatype_group) # Create and store view_model from operation self.create_view_model(operation_entity, operation_data, new_op_folder) self._store_imported_datatypes_in_db( project, operation_datatypes) imported_operations.append(operation_entity) except MissingReferenceException: operation_entity.status = STATUS_ERROR dao.store_entity(operation_entity) elif operation_data.main_view_model is not None: do_merge = False if importer_operation_id: do_merge = True operation_entity = dao.store_entity(operation_data.operation, merge=do_merge) dt_group = None op_group = dao.get_operationgroup_by_id( operation_entity.fk_operation_group) if op_group: dt_group = dao.get_datatypegroup_by_op_group_id( op_group.id) if not dt_group: first_op = dao.get_operations_in_group( op_group.id, only_first_operation=True) dt_group = DataTypeGroup( op_group, operation_id=first_op.id, state=DEFAULTDATASTATE_INTERMEDIATE) dt_group = dao.store_entity(dt_group) # Store the DataTypes in db dts = {} all_dts_count += len(operation_data.dt_paths) for dt_path in operation_data.dt_paths: dt = self.load_datatype_from_file(dt_path, operation_entity.id, dt_group, project.id) if isinstance(dt, BurstConfiguration): if op_group: dt.fk_operation_group = op_group.id all_stored_dts_count += self._store_or_link_burst_config( dt, dt_path, project.id) else: dts[dt_path] = dt if op_group: op_group.fill_operationgroup_name(dt.type) dao.store_entity(op_group) try: stored_dts_count = self._store_imported_datatypes_in_db( project, dts) all_stored_dts_count += stored_dts_count if operation_data.main_view_model.is_metric_operation: self._update_burst_metric(operation_entity) #TODO: TVB-2849 to reveiw these flags and simplify condition if stored_dts_count > 0 or ( not operation_data.is_self_generated and not is_group) or importer_operation_id is not None: imported_operations.append(operation_entity) new_op_folder = self.files_helper.get_project_folder( project, str(operation_entity.id)) view_model_disk_size = 0 for h5_file in operation_data.all_view_model_files: view_model_disk_size += FilesHelper.compute_size_on_disk( h5_file) shutil.move(h5_file, new_op_folder) operation_entity.view_model_disk_size = view_model_disk_size dao.store_entity(operation_entity) else: # In case all Dts under the current operation were Links and the ViewModel is dummy, # don't keep the Operation empty in DB dao.remove_entity(Operation, operation_entity.id) self.files_helper.remove_operation_data( project.name, operation_entity.id) except MissingReferenceException as excep: dao.remove_entity(Operation, operation_entity.id) self.files_helper.remove_operation_data( project.name, operation_entity.id) raise excep else: self.logger.warning( "Folder %s will be ignored, as we could not find a serialized " "operation or DTs inside!" % operation_data.operation_folder) self._update_dt_groups(project.id) self._update_burst_configurations(project.id) return imported_operations, all_dts_count, all_stored_dts_count
def prepare_operations_for_workflowsteps(self, workflow_step_list, workflows, user_id, burst_id, project_id, group, sim_operations): """ Create and store Operation entities from a list of Workflow Steps. Will be generated workflows x workflow_step_list Operations. For every step in workflow_step_list one OperationGroup and one DataTypeGroup will be created (in case of PSE). """ for step in workflow_step_list: operation_group = None if (group is not None) and not isinstance(step, WorkflowStepView): operation_group = OperationGroup(project_id=project_id, ranges=group.range_references) operation_group = dao.store_entity(operation_group) operation = None metadata = {DataTypeMetaData.KEY_BURST: burst_id} algo_category = dao.get_algorithm_by_id(step.fk_algorithm) if algo_category is not None: algo_category = algo_category.algorithm_category for wf_idx, workflow in enumerate(workflows): cloned_w_step = step.clone() cloned_w_step.fk_workflow = workflow.id dynamic_params = cloned_w_step.dynamic_param op_params = cloned_w_step.static_param op_params.update(dynamic_params) range_values = None group_id = None if operation_group is not None: group_id = operation_group.id range_values = sim_operations[wf_idx].range_values if not isinstance(step, WorkflowStepView): ## For visualization steps, do not create operations, as those are not really needed. metadata, user_group = self._prepare_metadata( metadata, algo_category, operation_group, op_params) operation = Operation(user_id, project_id, step.fk_algorithm, json.dumps( op_params, cls=MapAsJson.MapAsJsonEncoder), meta=json.dumps(metadata), op_group_id=group_id, range_values=range_values, user_group=user_group) operation.visible = step.step_visible operation = dao.store_entity(operation) cloned_w_step.fk_operation = operation.id dao.store_entity(cloned_w_step) if operation_group is not None and operation is not None: datatype_group = DataTypeGroup( operation_group, operation_id=operation.id, fk_parent_burst=burst_id, state=metadata[DataTypeMetaData.KEY_STATE]) dao.store_entity(datatype_group)
def prepare_operations(self, user_id, project_id, algorithm, category, metadata, visible=True, existing_dt_group=None, **kwargs): """ Do all the necessary preparations for storing an operation. If it's the case of a range of values create an operation group and multiple operations for each possible instance from the range. :param metadata: Initial MetaData with potential Burst identification inside. """ operations = [] available_args, group = self._prepare_group(project_id, existing_dt_group, kwargs) if len(available_args) > TvbProfile.current.MAX_RANGE_NUMBER: raise LaunchException( "Too big range specified. You should limit the" " resulting operations to %d" % TvbProfile.current.MAX_RANGE_NUMBER) else: self.logger.debug("Launching a range with %d operations..." % len(available_args)) group_id = None if group is not None: group_id = group.id metadata, user_group = self._prepare_metadata(metadata, category, group, kwargs) self.logger.debug("Saving Operation(userId=" + str(user_id) + ",projectId=" + str(project_id) + "," + str(metadata) + ",algorithmId=" + str(algorithm.id) + ", ops_group= " + str(group_id) + ")") visible_operation = visible and category.display is False meta_str = json.dumps(metadata) for (one_set_of_args, range_vals) in available_args: range_values = json.dumps(range_vals) if range_vals else None operation = Operation(user_id, project_id, algorithm.id, json.dumps(one_set_of_args), meta_str, op_group_id=group_id, user_group=user_group, range_values=range_values) operation.visible = visible_operation operations.append(operation) operations = dao.store_entities(operations) if group is not None: burst_id = None if DataTypeMetaData.KEY_BURST in metadata: burst_id = metadata[DataTypeMetaData.KEY_BURST] if existing_dt_group is None: datatype_group = DataTypeGroup( group, operation_id=operations[0].id, fk_parent_burst=burst_id, state=metadata[DataTypeMetaData.KEY_STATE]) dao.store_entity(datatype_group) else: # Reset count existing_dt_group.count_results = None dao.store_entity(existing_dt_group) return operations, group
def import_list_of_operations(self, project, import_path, is_group=False, importer_operation_id=None): """ This method scans provided folder and identify all operations that needs to be imported """ all_dts_count = 0 all_stored_dts_count = 0 imported_operations = [] ordered_operations = self._retrieve_operations_in_order( project, import_path, None if is_group else importer_operation_id) if is_group and len(ordered_operations) > 0: first_op = dao.get_operation_by_id(importer_operation_id) vm_path = h5.determine_filepath(first_op.view_model_gid, os.path.dirname(import_path)) os.remove(vm_path) ordered_operations[0].operation.id = importer_operation_id for operation_data in ordered_operations: if operation_data.is_old_form: operation_entity, datatype_group = self.import_operation( operation_data.operation) new_op_folder = self.storage_interface.get_project_folder( project.name, str(operation_entity.id)) try: operation_datatypes = self._load_datatypes_from_operation_folder( operation_data.operation_folder, operation_entity, datatype_group) # Create and store view_model from operation self.create_view_model(operation_entity, operation_data, new_op_folder) self._store_imported_datatypes_in_db( project, operation_datatypes) imported_operations.append(operation_entity) except MissingReferenceException: operation_entity.status = STATUS_ERROR dao.store_entity(operation_entity) elif operation_data.main_view_model is not None: operation_data.operation.create_date = datetime.now() operation_data.operation.start_date = datetime.now() operation_data.operation.completion_date = datetime.now() do_merge = False if importer_operation_id: do_merge = True operation_entity = dao.store_entity(operation_data.operation, merge=do_merge) dt_group = None op_group = dao.get_operationgroup_by_id( operation_entity.fk_operation_group) if op_group: dt_group = dao.get_datatypegroup_by_op_group_id( op_group.id) if not dt_group: first_op = dao.get_operations_in_group( op_group.id, only_first_operation=True) dt_group = DataTypeGroup( op_group, operation_id=first_op.id, state=DEFAULTDATASTATE_INTERMEDIATE) dt_group = dao.store_entity(dt_group) # Store the DataTypes in db dts = {} all_dts_count += len(operation_data.dt_paths) for dt_path in operation_data.dt_paths: dt = self.load_datatype_from_file(dt_path, operation_entity.id, dt_group, project.id) if isinstance(dt, BurstConfiguration): if op_group: dt.fk_operation_group = op_group.id all_stored_dts_count += self._store_or_link_burst_config( dt, dt_path, project.id) else: dts[dt_path] = dt if op_group: op_group.fill_operationgroup_name(dt.type) dao.store_entity(op_group) try: stored_dts_count = self._store_imported_datatypes_in_db( project, dts) all_stored_dts_count += stored_dts_count if operation_data.main_view_model.is_metric_operation: self._update_burst_metric(operation_entity) imported_operations.append(operation_entity) new_op_folder = self.storage_interface.get_project_folder( project.name, str(operation_entity.id)) view_model_disk_size = 0 for h5_file in operation_data.all_view_model_files: view_model_disk_size += StorageInterface.compute_size_on_disk( h5_file) shutil.move(h5_file, new_op_folder) operation_entity.view_model_disk_size = view_model_disk_size dao.store_entity(operation_entity) except MissingReferenceException as excep: self.storage_interface.remove_operation_data( project.name, operation_entity.id) operation_entity.fk_operation_group = None dao.store_entity(operation_entity) dao.remove_entity(DataTypeGroup, dt_group.id) raise excep else: self.logger.warning( "Folder %s will be ignored, as we could not find a serialized " "operation or DTs inside!" % operation_data.operation_folder) # We want importer_operation_id to be kept just for the first operation (the first iteration) if is_group: importer_operation_id = None self._update_dt_groups(project.id) self._update_burst_configurations(project.id) return imported_operations, all_dts_count, all_stored_dts_count