def test_ii_impl_match(): mkl_h = None nba_h = None try: with lktu.set_env_var('CSR_KERNEL', 'mkl'): mkl_h = run_sp(_train_ii) mkl = mkl_h.get() with lktu.set_env_var('CSR_KERNEL', 'numba'): nba_h = run_sp(_train_ii) nba = nba_h.get() assert mkl.sim_matrix_.nnz == nba.sim_matrix_.nnz assert mkl.sim_matrix_.nrows == nba.sim_matrix_.nrows assert mkl.sim_matrix_.ncols == nba.sim_matrix_.ncols assert all(mkl.sim_matrix_.rowptrs == nba.sim_matrix_.rowptrs) for i in range(mkl.sim_matrix_.nrows): sp, ep = mkl.sim_matrix_.row_extent(i) assert all(np.diff(mkl.sim_matrix_.values[sp:ep]) <= 0) assert all(np.diff(nba.sim_matrix_.values[sp:ep]) <= 0) assert set(mkl.sim_matrix_.colinds[sp:ep]) == set( nba.sim_matrix_.colinds[sp:ep]) assert mkl.sim_matrix_.values[sp:ep] == \ approx(nba.sim_matrix_.values[sp:ep], abs=1.0e-3) finally: mkl = None nba = None gc.collect() mkl_h.close() nba_h.close()
def train_isolated(algo, ratings, *, file=None, **kwargs): """ Train an algorithm in a subprocess to isolate the training process. This function spawns a subprocess (in the same way that LensKit's multiprocessing support does), calls :meth:`lenskit.algorithms.Algorithm.fit` on it, and serializes the result for shared-memory use. Training the algorithm in a single-purpose subprocess makes sure that any training resources, such as TensorFlow sessions, are cleaned up by virtue of the process terminating when model training is completed. It can also reduce memory use, because the original trained model and the shared memory version are not in memory at the same time. While the batch functions use shared memory to reduce memory overhead for parallel processing, naive use of these functions will still have 2 copies of the model in memory, the shared one and the original, because the sharing process does not tear down the original model. Training in a subprocess solves this problem elegantly. Args: algo(lenskit.algorithms.Algorithm): The algorithm to train. ratings(pandas.DataFrame): The rating data. file(str or pathlib.Path or None): The file in which to save the trained model. If ``None``, uses a default file path or shared memory. kwargs(dict): Additional named parameters to :meth:`lenskit.algorithms.Algorithm.fit`. Returns: lenskit.sharing.PersistedObject: The saved model object. This is the owner, so it needs to be closed when finished to free resources. """ return run_sp(_train_and_save, algo, file, ratings, kwargs)
def test_run_sp_persist(): a1 = np.random.randn(100, 100) a2 = np.random.randn(100, 100) res = run_sp(_sp_matmul_p, a1, a2) try: assert res.is_owner assert np.all(res.get() == a1 @ a2) finally: res.close()
def test_run_sp_persist(method): if method == 'shm' and not SHM_AVAILABLE: pytest.skip('SHM backend not available') a1 = np.random.randn(100, 100) a2 = np.random.randn(100, 100) res = run_sp(_sp_matmul_p, a1, a2, method=method) try: assert res.is_owner assert np.all(res.get() == a1 @ a2) finally: res.close()
def test_run_sp_fail(): a1 = np.random.randn(100, 100) a2 = np.random.randn(100, 100) with raises(ChildProcessError): run_sp(_sp_matmul, a1, a2, fail=True)
def test_run_sp(): a1 = np.random.randn(100, 100) a2 = np.random.randn(100, 100) res = run_sp(_sp_matmul, a1, a2) assert np.all(res == a1 @ a2)
def test_sp_random_seed(): init = get_root_seed() seed = run_sp(_get_seed) # we should spawn a seed for the worker assert seed.entropy == init.entropy assert seed.spawn_key == (init.n_children_spawned - 1, )
def test_sp_is_worker(): pid, w, mpw = run_sp(_worker_status, 'fishtank') assert pid != os.getpid() assert w assert not mpw