def test_get_path(self, os_mock, datetime_mock): datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3) os_mock.path.join.side_effect = [ "testpath/2442_12_24_01_02_03", "testpath/2442_12_24_01_02_03/my_result", "testpath/2442_12_24_01_02_03/my_result/result.csv" ] self.filemanager = FileManager("testpath") os_mock.path.isfile.return_value = False os_mock.path.split.return_value = ("", "result.csv") filepath = self.filemanager.get_path("result.csv", "my_result") join_calls = [ call("testpath", "2442_12_24_01_02_03"), call("testpath/2442_12_24_01_02_03", "my_result"), call("testpath/2442_12_24_01_02_03/my_result", "result.csv") ] os_mock.path.join.assert_has_calls(join_calls) os_mock.path.split.assert_called_once_with("result.csv") self.assertEqual(filepath, "testpath/2442_12_24_01_02_03/my_result/result.csv")
def get_json(self, fm: FileManager): json = super().get_json(fm) condition_path = None train_if_path = None callbacks_paths = [] if self.condition: condition_path = fm.get_path(f"{self.name}_condition.pickle") with open(condition_path, 'wb') as outfile: cloudpickle.dump(self.condition, outfile) if self.train_if: train_if_path = fm.get_path(f"{self.name}_train_if.pickle") with open(train_if_path, 'wb') as outfile: cloudpickle.dump(self.train_if, outfile) for callback in self.callbacks: callback_path = fm.get_path(f"{self.name}_callback.pickle") with open(callback_path, 'wb') as outfile: cloudpickle.dump(callback, outfile) callbacks_paths.append(callback_path) json.update({ "callbacks": callbacks_paths, "condition": condition_path, "train_if": train_if_path, "batch_size": self.batch_size }) return json
def test_not_allowed_filetype(self, os_mock, datetime_mock): datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3) os_mock.path.join.side_effect = ["testpath/2442_12_24_01_02_03"] self.filemanager = FileManager("testpath") os_mock.path.isfile.return_value = False os_mock.path.split.return_value = ("", "result.test") with self.assertRaises(IOException) as cm: self.filemanager.get_path("result.test") self.assertEqual(cm.exception.args, ( "test is not an allowed file type. Allowed types are ['png', 'csv', 'xlsx', " "'pickle', 'tex', 'json', 'h5', 'pt', 'md'].", ))
def to_folder(self, path: Union[str, Path]): """ Saves the pipeline in pipeline.json in the specified folder. :param path: path of the folder :return: None """ if not isinstance(path, Path): path = Path(path) save_file_manager = FileManager(path, time_mode=False) modules = [] # 1. Iterate over steps and collect all modules -> With step_id to module_id # Create for each step dict with information for restorage steps_for_storing = [] for step in self.id_to_step.values(): step_json = step.get_json(save_file_manager) if isinstance(step, Step): if step.module in modules: step_json["module_id"] = modules.index(step.module) else: modules.append(step.module) step_json["module_id"] = len(modules) - 1 steps_for_storing.append(step_json) # 2. Iterate over all modules and create Json and save as pickle or h5 ... if necessary... modules_for_storing = [] for module in modules: stored_module = module.save(save_file_manager) modules_for_storing.append(stored_module) # 3. Put everything together and dump it. stored_pipeline = { "name": "Pipeline", "id": 1, "version": 1, "modules": modules_for_storing, "steps": steps_for_storing, "path": self.file_manager.basic_path if self.file_manager else None, "batch": str(self.batch) if self.batch else None, } file_path = save_file_manager.get_path('pipeline.json') with open(file_path, 'w') as outfile: json.dump(obj=stored_pipeline, fp=outfile, sort_keys=False, indent=4, cls=PyWATTSJsonEncoder)
def save(self, fm: FileManager): json = super().save(fm) file_path = fm.get_path(f'{self.name}.pickle') with open(file_path, 'wb') as outfile: pickle.dump(obj=self.module, file=outfile) json.update({"sklearn_module": file_path}) return json
def create_summary(self, summaries: List[SummaryObject], fm: FileManager): """ This method is responsible for creating and storing the summaries as json file. :param summaries: The summaries that should be stored. :type summaries: List[SummaryObject] :param fm: The pyWATTS filemanager. :type fm: FileManager """ summary_dict = {} for category in [ SummaryCategory.Summary, SummaryCategory.FitTime, SummaryCategory.TransformTime ]: category_dict = {} for summary in filter(lambda s: s.category == category, summaries): if summary.additional_information != "" or len( summary.k_v) > 0: if isinstance(summary, SummaryObjectList): category_dict.update(self._create_summary(summary)) elif isinstance(summary, SummaryObjectTable): category_dict.update( self._create_table_summary(summary)) summary_dict.update({category.name: category_dict}) with open(fm.get_path("summary.json"), "w") as file: json.dump(summary_dict, file) return summary_dict
def create_summary(self, summaries: List[SummaryObject], fm: FileManager): """ This method is responsible for creating and storing the summaries as markdown file. :param summaries: The summaries that should be stored. :type summaries: List[SummaryObject] :param fm: The pyWATTS filemanager. :type fm: FileManager """ summary_string = "# Summary: \n" for category in [ SummaryCategory.Summary, SummaryCategory.FitTime, SummaryCategory.TransformTime ]: summary_string += f"## {category.name}\n" for summary in filter(lambda s: s.category == category, summaries): if summary.additional_information != "" or len( summary.k_v) > 0: if isinstance(summary, SummaryObjectList): summary_string += self._create_summary(summary) elif isinstance(summary, SummaryObjectTable): summary_string += self._create_table_summary(summary) with open(fm.get_path("summary.md"), "w") as file: file.write(summary_string) return summary_string
def save(self, fm: FileManager) -> Dict: json = super().save(fm) if self.filter_method is not None: filter_path = fm.get_path(f"{self.name}_filter.pickle") with open(filter_path, 'wb') as outfile: cloudpickle.dump(self.filter_method, outfile) json["filter"] = filter_path return json
def save(self, fm: FileManager) -> dict: """ Stores the keras model at the given path :param fm: The Filemanager, which contains the path where the model should be stored :return: The path where the model is stored. """ json = super().save(fm) self.model.save(filepath=fm.get_path(f"{self.name}.h5")) aux_models = [] for name, aux_model in self.aux_models.items(): aux_model.save(filepath=fm.get_path(f"{self.name}_{name}.h5")) aux_models.append((name, fm.get_path(f"{self.name}_{name}.h5"))) json.update({ "aux_models": aux_models, "model": fm.get_path(f"{self.name}.h5") }) return json
def __init__(self, path: str = ".", batch: Optional[pd.Timedelta] = None, name="Pipeline"): super().__init__(name) self.batch = batch self.counter = None self.start_steps = dict() self.id_to_step: Dict[int, BaseStep] = {} self.file_manager = FileManager(path)
def save(self, fm: FileManager): """ Saves the pytorch wrapper and the containing model :param fm: Filemanager for getting the path :type fm: FileManager :return: Dictionary with additional information :rtype: Dict """ json = super().save(fm) file_path = fm.get_path(f'{self.name}.pt') loss_fn_path = fm.get_path(f"loss_{self.name}.pickle") with open(loss_fn_path, "wb")as file: cloudpickle.dump(self.loss_fn, file) optimizer_path = fm.get_path(f"optimizer_{self.name}.pickle") with open(optimizer_path, "wb")as file: cloudpickle.dump(self.optimizer, file) torch.save(self.model, file_path) json.update({"pytorch_module": file_path, "optimizer": optimizer_path, "loss_fn": loss_fn_path}) return json
def test_duplicate_filename(self, os_mock, datetime_mock, logger_mock): datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3) os_mock.path.join.side_effect = [ "testpath/2442_12_24_01_02_03", "testpath/2442_12_24_01_02_03/my_result", "testpath/2442_12_24_01_02_03/my_result/result.csv" ] os_mock.path.splitext.return_value = ( "testpath/2442_12_24_01_02_03/my_result/result", "csv") self.filemanager = FileManager("testpath") os_mock.path.isfile.return_value = True os_mock.path.split.return_value = ("", "result.csv") result = self.filemanager.get_path("result.csv") self.assertEqual( result, 'testpath/2442_12_24_01_02_03/my_result/result_1.csv') logger_mock.info.assert_called_with( 'File %s already exists. We appended %s to the name', 'testpath/2442_12_24_01_02_03/my_result', 1)
def save(self, fm: FileManager) -> Dict: """ Stores the PNN at the given path :param fm: The Filemanager, which contains the path where the model should be stored :return: The path where the model is stored. """ json = super().save(fm) if self.is_fitted: filepath = fm.get_path(f"{self.name}.h5") self.pnn.save(filepath=filepath) json.update({"pnn": filepath}) return json
def save(self, fm: FileManager): """ Saves the Conditional module to JSON file :param fm: A FileManager, from which the path where the JSON file is saved is fetches :type fm: FileManager :return: Dictionary with name, parameters, related module and class, and path to the file :rtype: Dict """ json_module = super().save(fm) file_path = fm.get_path(f'{self.name}.pickle') with open(file_path, 'wb') as outfile: cloudpickle.dump(self, file=outfile) json_module["pickled_module"] = file_path return json_module
def save(self, fm: FileManager): """ Saves the statsmodels wrappers and the containing model :param fm: FileManager for getting the path :type fm: FileManager :return: Dictionary with all information for restoting the module :rtype: Dict """ json = super().save(fm) if self.is_fitted: model_file_path = fm.get_path(f"{self.name}_fitted_model.pickle") self.model.save(model_file_path) json.update({"statsmodel_model": model_file_path}) json.update({ "sm_class": self.module.__name__, "sm_module": self.module.__module__ }) return json
class TestFilemanager(unittest.TestCase): @patch("pywatts.core.filemanager.datetime") @patch("pywatts.core.filemanager.os") def test_get_path(self, os_mock, datetime_mock): datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3) os_mock.path.join.side_effect = [ "testpath/2442_12_24_01_02_03", "testpath/2442_12_24_01_02_03/my_result", "testpath/2442_12_24_01_02_03/my_result/result.csv" ] self.filemanager = FileManager("testpath") os_mock.path.isfile.return_value = False os_mock.path.split.return_value = ("", "result.csv") filepath = self.filemanager.get_path("result.csv", "my_result") join_calls = [ call("testpath", "2442_12_24_01_02_03"), call("testpath/2442_12_24_01_02_03", "my_result"), call("testpath/2442_12_24_01_02_03/my_result", "result.csv") ] os_mock.path.join.assert_has_calls(join_calls) os_mock.path.split.assert_called_once_with("result.csv") self.assertEqual(filepath, "testpath/2442_12_24_01_02_03/my_result/result.csv") @patch("pywatts.core.filemanager.datetime") @patch("pywatts.core.filemanager.os") def test_get_path_filename_with_path(self, os_mock, datetime_mock): datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3) os_mock.path.join.side_effect = [ "testpath/2442_12_24_01_02_03", "testpath/2442_12_24_01_02_03/my_result", "testpath/2442_12_24_01_02_03/my_result/result.csv" ] self.filemanager = FileManager("testpath") os_mock.path.isfile.return_value = False os_mock.path.split.return_value = ("", "result.csv") filepath = self.filemanager.get_path("path/result.csv", "my_result") self.assertEqual(filepath, "testpath/2442_12_24_01_02_03/my_result/result.csv") @patch("pywatts.core.filemanager.datetime") @patch("pywatts.core.filemanager.os") def test_not_allowed_filetype(self, os_mock, datetime_mock): datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3) os_mock.path.join.side_effect = ["testpath/2442_12_24_01_02_03"] self.filemanager = FileManager("testpath") os_mock.path.isfile.return_value = False os_mock.path.split.return_value = ("", "result.test") with self.assertRaises(IOException) as cm: self.filemanager.get_path("result.test") self.assertEqual(cm.exception.args, ( "test is not an allowed file type. Allowed types are ['png', 'csv', 'xlsx', " "'pickle', 'tex', 'json', 'h5', 'pt', 'md'].", )) @patch("pywatts.core.filemanager.logger") @patch("pywatts.core.filemanager.datetime") @patch("pywatts.core.filemanager.os") def test_duplicate_filename(self, os_mock, datetime_mock, logger_mock): datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3) os_mock.path.join.side_effect = [ "testpath/2442_12_24_01_02_03", "testpath/2442_12_24_01_02_03/my_result", "testpath/2442_12_24_01_02_03/my_result/result.csv" ] os_mock.path.splitext.return_value = ( "testpath/2442_12_24_01_02_03/my_result/result", "csv") self.filemanager = FileManager("testpath") os_mock.path.isfile.return_value = True os_mock.path.split.return_value = ("", "result.csv") result = self.filemanager.get_path("result.csv") self.assertEqual( result, 'testpath/2442_12_24_01_02_03/my_result/result_1.csv') logger_mock.info.assert_called_with( 'File %s already exists. We appended %s to the name', 'testpath/2442_12_24_01_02_03/my_result', 1)