def init(): # reset random state for consistency before any other packages are # imported from zensols.deeplearn import TorchConfig TorchConfig.init() # initialize the NLP system from zensols import deepnlp deepnlp.init()
def test_sparse_create(self): conf = TorchConfig(False, data_type=torch.float16) arr = conf.sparse([[ 7, 22, 22, 42, 60, 62, 70, 76, 112, 124, 124, 128, 135, 141, 153 ], [3, 2, 5, 0, 4, 6, 1, 5, 6, 2, 5, 4, 3, 0, 1 ]], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], (174, 30)) self.assertTrue((174, 30), arr.shape) self.assertEqual(0., arr[7, 2].item()) self.assertEqual(1., arr[7, 3].item())
def main(): print() TorchConfig.init() logging.basicConfig(level=logging.WARN) logger.setLevel(logging.INFO) run = [2, 3, 4] res = None for r in run: res = {1: dataset, 2: train_model, 3: test_model, 4: load_results}[r]() return res
def main(): print() TorchConfig.init() logging.basicConfig(level=logging.WARN) logging.getLogger('zensols.deeplearn.model').setLevel(logging.WARN) run = 5 { 0: dataset, 1: dataframe, 2: metadata, 3: stash_info, 4: batch, 5: model, 6: tmp }[run]()
def batch_data_point_sets(self) -> List[DataPointIDSet]: """Create the data point ID sets. Each instance returned will correlate to a batch and each set of keys point to a feature :class:`.DataPoint`. """ psets = [] batch_id = 0 cont = self.split_stash_container tc_seed = TorchConfig.get_random_seed_context() if logger.isEnabledFor(logging.INFO): logger.info(f'{self.name}: creating keys with ({type(cont)}) ' + f'using batch size of {self.batch_size}') for split, keys in cont.keys_by_split.items(): if logger.isEnabledFor(logging.INFO): logger.info(f'keys for split {split}: {len(keys)}') # keys are ordered and needed to be as such for consistency # keys = sorted(keys, key=int) cslice = it.islice(chunks(keys, self.batch_size), self.batch_limit) for chunk in cslice: chunk = tuple(chunk) if logger.isEnabledFor(logging.DEBUG): logger.debug(f'chunked size: {len(chunk)}') dp_set = DataPointIDSet(str(batch_id), chunk, split, tc_seed) psets.append(dp_set) batch_id += 1 logger.info(f'created {len(psets)} each set limited with ' + f'{self.batch_limit} with batch_limit={self.batch_limit}') return psets
def test_diff(self): dtype: torch.dtype = torch.float size = (2, 3, 4) arr = torch.arange(1, reduce(lambda x, y: x * y, size) + 1, dtype=dtype).view(size) arr2 = torch.arange(1, 11) size = (3, 2) arr3 = torch.arange(1, reduce(lambda x, y: x * y, size) + 1, dtype=dtype).view(size) arrs = (arr, arr2, arr3) enc = self.de.encode(arrs) decs = self.de.decode(enc) decs[2][1][1] = 1.11 for enc, dec, tf in zip(arrs, decs, [True, True, False]): if tf: self.assertTrue(TorchConfig.equal(enc, dec)) else: self.assertFalse(TorchConfig.equal(enc, dec))
def assertTensorEquals(self, should, tensor): self.assertEqual(should.shape, tensor.shape) try: eq = TorchConfig.equal(should, tensor) except RuntimeError as e: logger.error(f'error comparing {should} with {tensor}') raise e if not eq: logger.error(f'tensor {should} does not equal {tensor}') self.assertTrue(eq)
def __init__(self, app_root_dir: str = '..', deepnlp_path: str = '..'): """Set up the interpreter environment so we can import local packages. :param app_root_dir: the application root directory :param deepnlp_path: the path to the DeepNLP source code """ import sys from pathlib import Path self.app_root_dir = Path(app_root_dir) # add the example to the Python library path sys.path.append(str(self.app_root_dir / 'cb')) # add the deepnlp path sys.path.append(deepnlp_path) # reset random state for consistency before any other packages are # imported from zensols.deeplearn import TorchConfig TorchConfig.init() # initialize the NLP system from zensols.deepnlp import init init()
def to_matrix(self, torch_config: TorchConfig) -> torch.Tensor: dev = torch_config.device if dev in self.tensors: if logger.isEnabledFor(logging.INFO): logger.info(f'reusing already cached from {torch_config}') vecs = self.tensors[dev] else: if logger.isEnabledFor(logging.INFO): logger.info(f'created tensor vectory matrix on {torch_config}') vecs = torch_config.from_numpy(self.vectors) self.tensors[dev] = vecs return vecs
def main(): print() TorchConfig.set_random_seed() ProtoModelFacade.configure_default_cli_logging() facade = create_facade() # test mem deallocation on feature changes runs = [4, 5, 0, 4, 5, 7, 8, 9, 10] runs = [3] for run in runs: res = {-1: tmp, 0: facade.tmp, 1: facade.print_sample, 2: lambda: facade.batch_metadata.write(), 3: lambda: facade.debug(3), 4: facade.train, 5: facade.test, 6: facade.clear, 7: facade.write_result, 8: facade.persist_result, 9: facade.deallocate, 10: end_dealloc}[run]() return res
def test_datasets(self): tc = TorchConfig(False) fac = self.fac stash = fac('dataloader_stash') dataset = fac('mnist_batch_stash') dataset.delegate_attr = True ds_name = 'train val test'.split() batch_size = dataset.delegate.batch_size name: str ds: Tuple[Tuple[torch.Tensor, torch.Tensor]] for name, ds in zip(ds_name, stash.get_data_by_split()): ds_start = 0 ds_stash = dataset.splits[name] ds_data = torch.cat(tuple(map(lambda x: x[0], ds))) ds_labels = torch.cat(tuple(map(lambda x: x[1], ds))) dpts = sum(map(lambda b: len(b.data_point_ids), ds_stash.values())) logger.info(f'name: stash size: {len(ds_stash)}, ' + f'data set size: {len(ds)}, ' + f'stash X batch_size: {len(ds_stash) * batch_size}, ' + f'data/label shapes: {ds_data.shape}/{ds_labels.shape}, ' + f'data points: {dpts}') assert len(ds) == len(ds_stash) assert dpts == ds_labels.shape[0] assert ds_labels.shape[0] == ds_data.shape[0] for id, batch in ds_stash: ds_end = ds_start + len(batch) dsb_labels = ds_labels[ds_start:ds_end] dsb_data = ds_data[ds_start:ds_end] ds_start = ds_end blabels = batch.get_labels() bdata = batch.get_data() if logger.isEnabledFor(logging.DEBUG): logger.debug(f'data point ids: {batch.data_point_ids}') logger.debug(f'ds/batch labels: {dsb_labels}/{blabels}') assert (tc.equal(dsb_labels, blabels)) assert (tc.equal(dsb_data, bdata))
def cleanup(self, include_cuda: bool = True, quiet: bool = False): """Report memory leaks, run the Python garbage collector and optionally empty the CUDA cache. :param include_cuda: if ``True`` clear the GPU cache :param quiet: do not report unallocated objects, regardless of the setting of :obj:`allocation_tracking` """ if self.allocation_tracking and not quiet: include_stack, only_counts = False, False if self.allocation_tracking == 'stack': include_stack, only_counts = True, False elif self.allocation_tracking == 'counts': include_stack, only_counts = False, True include_stack = (self.allocation_tracking == 'stack') Deallocatable._print_undeallocated(include_stack, only_counts) self.deallocate() Deallocatable._deallocate_all() gc.collect() if include_cuda: # free up memory in the GPU TorchConfig.empty_cache()
def _create_model(self, docs: Iterable[FeatureDocument]) -> Any: if logger.isEnabledFor(logging.INFO): logger.info(f'creating {self.topics} topics') docs = tuple(map(lambda doc: self.feat_to_tokens(doc), docs)) id2word = corpora.Dictionary(docs) corpus = tuple(map(lambda doc: id2word.doc2bow(doc), docs)) rand_state = TorchConfig.get_random_seed() if rand_state is None: rand_state = 0 params = { 'corpus': corpus, 'id2word': id2word, 'num_topics': self.topics, 'random_state': rand_state, 'update_every': 1, 'chunksize': 100, 'passes': 10, 'alpha': 'auto', 'per_word_topics': True } with time(f'modeled {self.topics} acros {len(docs)} documents'): lda = LdaModel(**params) return {'lda': lda, 'corpus': corpus, 'id2word': id2word}
def assertClose(self, da, db): assert set(da.keys()) == set(db.keys()) for k in da.keys(): a = da[k] b = db[k] self.assertTrue(TorchConfig.close(a, b))
def setUp(self): TorchConfig.init() self.recreate_factory() targ = Path('target') if targ.exists() and targ.is_dir(): shutil.rmtree(targ)
#!/usr/bin/env python from typing import List import sys from pathlib import Path import logging from zensols.deeplearn import TorchConfig from zensols import deepnlp # reset random state for consistency before any other packages are # imported TorchConfig.init() # initialize the NLP system deepnlp.init() class CliHarness(object): """A utility class to automate the creation of execution of the model from either the command line or a Python REPL. """ def __init__(self, args: List[str] = sys.argv, src_dir_name: str = 'src'): """Configure the Python interpreter and this run class. :param args: the command line arguments :param src_dir_name: the directory add the Python path containing the source for the application """ self.args = args[1:]
def setUp(self): tc = TorchConfig(False) self.de = NonUniformDimensionEncoder(tc)
def test_create_empty(self): conf = TorchConfig(False, data_type=torch.float16) tensor = conf.empty((3, 10)) self.assertEqual(torch.float16, tensor.dtype) self.assertEqual(3, tensor.shape[0]) self.assertEqual(10, tensor.shape[1])
def test_create_tensor(self): conf = TorchConfig(False) tensor = conf.from_iterable(it.islice(it.count(), 5)) self.assertEqual(torch.float32, tensor.dtype) should = torch.FloatTensor([0, 1, 2, 3, 4]) self.assertTrue(torch.all(should.eq(tensor)))
def test_cuda_config_cpu(self): conf = TorchConfig(False) self.assertEqual(TorchConfig.cpu_device_name(), conf.device.type)
def test_cuda_config_write(self): writer = StringIO() conf = TorchConfig() conf.write(writer=writer) logger.debug(writer.getvalue()) self.assertTrue(len(writer.getvalue()) > 0)
def test_cuda_config(self): conf = TorchConfig() self.assertNotEqual(None, conf.info)
def test_rand(self): conf = self.conf size = (10, 20) self.rand_assert(50, size, conf) conf = TorchConfig(True, data_type=torch.float64) self.rand_assert(50, size, conf)
def setUp(self): super().setUp() self.conf = TorchConfig(False, data_type=torch.float64)
def setUp(self): TorchConfig.init() config = AppConfig('test-resources/iris/iris.conf', env={'app_root': '.'}) self.config = config self.fac = ImportConfigFactory(config, shared=True, reload=False)
def test_config_type(self): conf = TorchConfig(False) self.assertEqual(torch.float32, conf.data_type) self.assertEqual(torch.FloatTensor, conf.tensor_class)
def _trans_test(self, arrs: Sequence[Tensor]): enc = self.de.encode(arrs) decs = self.de.decode(enc) for enc, dec in zip(arrs, decs): self.assertTrue(TorchConfig.equal(enc, dec))