def test_discover_new_module_in_file(self): """DataSetRepo should discover SmvModules added to an existing file """ with ExtraPath(self.before_dir()): # File should already have been imported before module is added self.build_new_repo().dataSetsForStage("stage") with ExtraPath(self.after_dir()): modules = list( self.build_new_repo().dataSetsForStage("stage") ) self.assertTrue( "mod:stage.modules.NewModule" in modules, "mod:stage.modules.NewModule not in " + str(modules) )
def test_new_repo_reloads_base_class(self): """When DataSetRepo reloads an SmvDataSet it should reload its client ABC (if any) Users may create ABCs (which may or may not actually use the abc module) for SmvModules. When an implementation SmvModule is imported for the first time in a transaction it should be recompiled, and it should also trigger the reload of the ABC even if the ABC lives in another file. """ with ExtraPath(self.before_dir()): abcmod1 = self.build_new_repo().loadDataSet("stage.modules.ImplMod").__class__ with ExtraPath(self.before_dir()): abcmod2 = self.build_new_repo().loadDataSet("stage.modules.ImplMod").__class__ self.assertNotEqual(abcmod1, abcmod2)
def test_repo_dslist_filter_abc(self): """DataSetRepo's dataSetsForStage method should not return ABC Classes """ with ExtraPath(self.before_dir()): modules = self.build_new_repo().dataSetsForStage("stage") self.assertNotIn("mod:stage.abcmod.ABCMod", modules, "mod:stage.abcmod.ABCMod is in " + str(modules) )
def test_link_to_SmvResultModule(self): """Test that result of link to module with non-DataFrame result same as module's result """ with ExtraPath("src/test/python/smv_result"): RMres = self.smvApp.getModuleResult("mod:stage2.modules.RM") Mdf = self.smvApp.getModuleResult("mod:stage2.modules.M") self.assertEqual(str(RMres), Mdf.collect()[0][0])
def test_module_depends_on_model(self): """Test module can depends on model and use directly""" with ExtraPath("src/test/python/smv_model"): mod = self.df("stage1.modules.Model") res = self.df("stage1.modules.ModuleUsesModel") exp = self.createDF("a:String", "\"{}\"".format(mod)) self.should_be_same(res, exp)
def test_link_to_SmvResultModule(self): """Test that result of link to SmvModel is same as SmvModel's result """ with ExtraPath("src/test/python/smv_model"): ModelRes = self.smvApp.getModuleResult("mod:stage1.modules.Model") ModelExecDf = self.smvApp.getModuleResult( "mod:stage2.modules.ModelExecWithLink") self.assertEqual(str(ModelRes), ModelExecDf.collect()[0][0])
def test_repo_compiles_module_only_once(self): """DataSetRepo should not recompile module twice in a transaction Loading an SmvDataSet should only cause a recompile of its module if the module has not been imported previously in this transaction. This applies even when loading different SmvDataSets from the same file. """ dsr = self.build_new_repo() with ExtraPath(self.before_dir()): dsA1 = dsr.loadDataSet("stage.modules.CompileOnceA").__class__ # load a different SmvDataSet from the same file dsr.loadDataSet("stage.modules.CompileOnceB") # get the first SmvDataSet from the second SmvDataSet's module # if the module wasn't recompiled these should be equal dsA2 = getattr(sys.modules["stage.modules"], "CompileOnceA") # note that the module `sys.modules["stage.modules"]` won't change # identity (at least in Python 2.7), but its attributes will self.assertEqual(dsA1, dsA2)
def test_ignore_JavaObjects(self): """When a file in a stage contains a Py4J JavaObject, DataSetRepo should ignore it Users may utilize Py4J to interact with Java code, and in doing so may create JavaObjects at the top level of a file. DataSetRepo should not interpret these JavaObjects as SmvDataSets, despite the fact that they have a truthy (not None) IsSmvDataSet attribute. Note: the reason why JavaObjects have an IsSmvDataSet attribute is that they override __getattr__ to **always** return something. The same thing is true of PySpark Columns and classes from other Python libraries. """ # dir containing stage with module containing JavaObject java_obj_dir = self.resourceTestDir() + "/java_obj" with ExtraPath(java_obj_dir): mods_in_dir = self.build_new_repo()._dataSetsForStage("stage") self.assertEqual(mods_in_dir, ["mod:stage.modules.WhateverModule"])
def test_SmvResultModule_persistence(self): """Test persistence of non-DataFrame results """ with ExtraPath("src/test/python/smv_result"): res = self.smvApp.getModuleResult("mod:stage1.modules.RM") self.assertEqual(res, [100, "100", 100.0])
def test_SmvModelExec(self): with ExtraPath("src/test/python/smv_model"): model = self.smvApp.getModuleResult("mod:stage1.modules.Model") execDf = self.df("stage1.modules.ModelExec") self.assertEqual(str(model), execDf.collect()[0][0])