예제 #1
0
def init():
    # reset random state for consistency before any other packages are
    # imported
    from zensols.deeplearn import TorchConfig
    TorchConfig.init()
    # initialize the NLP system
    from zensols import deepnlp
    deepnlp.init()
예제 #2
0
 def test_sparse_create(self):
     conf = TorchConfig(False, data_type=torch.float16)
     arr = conf.sparse([[
         7, 22, 22, 42, 60, 62, 70, 76, 112, 124, 124, 128, 135, 141, 153
     ], [3, 2, 5, 0, 4, 6, 1, 5, 6, 2, 5, 4, 3, 0, 1
         ]], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
                       (174, 30))
     self.assertTrue((174, 30), arr.shape)
     self.assertEqual(0., arr[7, 2].item())
     self.assertEqual(1., arr[7, 3].item())
예제 #3
0
파일: proto.py 프로젝트: plandes/deeplearn
def main():
    print()
    TorchConfig.init()
    logging.basicConfig(level=logging.WARN)
    logger.setLevel(logging.INFO)
    run = [2, 3, 4]
    res = None
    for r in run:
        res = {1: dataset, 2: train_model, 3: test_model, 4: load_results}[r]()
    return res
예제 #4
0
파일: proto.py 프로젝트: plandes/deeplearn
def main():
    print()
    TorchConfig.init()
    logging.basicConfig(level=logging.WARN)
    logging.getLogger('zensols.deeplearn.model').setLevel(logging.WARN)
    run = 5
    {
        0: dataset,
        1: dataframe,
        2: metadata,
        3: stash_info,
        4: batch,
        5: model,
        6: tmp
    }[run]()
예제 #5
0
파일: stash.py 프로젝트: plandes/deeplearn
    def batch_data_point_sets(self) -> List[DataPointIDSet]:
        """Create the data point ID sets.  Each instance returned will correlate to a
        batch and each set of keys point to a feature :class:`.DataPoint`.

        """
        psets = []
        batch_id = 0
        cont = self.split_stash_container
        tc_seed = TorchConfig.get_random_seed_context()
        if logger.isEnabledFor(logging.INFO):
            logger.info(f'{self.name}: creating keys with ({type(cont)}) ' +
                        f'using batch size of {self.batch_size}')
        for split, keys in cont.keys_by_split.items():
            if logger.isEnabledFor(logging.INFO):
                logger.info(f'keys for split {split}: {len(keys)}')
            # keys are ordered and needed to be as such for consistency
            # keys = sorted(keys, key=int)
            cslice = it.islice(chunks(keys, self.batch_size), self.batch_limit)
            for chunk in cslice:
                chunk = tuple(chunk)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug(f'chunked size: {len(chunk)}')
                dp_set = DataPointIDSet(str(batch_id), chunk, split, tc_seed)
                psets.append(dp_set)
                batch_id += 1
        logger.info(f'created {len(psets)} each set limited with ' +
                    f'{self.batch_limit} with batch_limit={self.batch_limit}')
        return psets
예제 #6
0
 def test_diff(self):
     dtype: torch.dtype = torch.float
     size = (2, 3, 4)
     arr = torch.arange(1, reduce(lambda x, y: x * y, size) + 1, dtype=dtype).view(size)
     arr2 = torch.arange(1, 11)
     size = (3, 2)
     arr3 = torch.arange(1, reduce(lambda x, y: x * y, size) + 1, dtype=dtype).view(size)
     arrs = (arr, arr2, arr3)
     enc = self.de.encode(arrs)
     decs = self.de.decode(enc)
     decs[2][1][1] = 1.11
     for enc, dec, tf in zip(arrs, decs, [True, True, False]):
         if tf:
             self.assertTrue(TorchConfig.equal(enc, dec))
         else:
             self.assertFalse(TorchConfig.equal(enc, dec))
예제 #7
0
파일: util.py 프로젝트: plandes/deeplearn
 def assertTensorEquals(self, should, tensor):
     self.assertEqual(should.shape, tensor.shape)
     try:
         eq = TorchConfig.equal(should, tensor)
     except RuntimeError as e:
         logger.error(f'error comparing {should} with {tensor}')
         raise e
     if not eq:
         logger.error(f'tensor {should} does not equal {tensor}')
     self.assertTrue(eq)
예제 #8
0
파일: harness.py 프로젝트: plandes/deepnlp
    def __init__(self, app_root_dir: str = '..', deepnlp_path: str = '..'):
        """Set up the interpreter environment so we can import local packages.

        :param app_root_dir: the application root directory
        :param deepnlp_path: the path to the DeepNLP source code
        """
        import sys
        from pathlib import Path
        self.app_root_dir = Path(app_root_dir)
        # add the example to the Python library path
        sys.path.append(str(self.app_root_dir / 'cb'))
        # add the deepnlp path
        sys.path.append(deepnlp_path)
        # reset random state for consistency before any other packages are
        # imported
        from zensols.deeplearn import TorchConfig
        TorchConfig.init()
        # initialize the NLP system
        from zensols.deepnlp import init
        init()
예제 #9
0
파일: domain.py 프로젝트: plandes/deepnlp
 def to_matrix(self, torch_config: TorchConfig) -> torch.Tensor:
     dev = torch_config.device
     if dev in self.tensors:
         if logger.isEnabledFor(logging.INFO):
             logger.info(f'reusing already cached from {torch_config}')
         vecs = self.tensors[dev]
     else:
         if logger.isEnabledFor(logging.INFO):
             logger.info(f'created tensor vectory matrix on {torch_config}')
         vecs = torch_config.from_numpy(self.vectors)
         self.tensors[dev] = vecs
     return vecs
예제 #10
0
파일: proto.py 프로젝트: plandes/deepnlp
def main():
    print()
    TorchConfig.set_random_seed()
    ProtoModelFacade.configure_default_cli_logging()
    facade = create_facade()
    # test mem deallocation on feature changes
    runs = [4, 5, 0, 4, 5, 7, 8, 9, 10]
    runs = [3]
    for run in runs:
        res = {-1: tmp,
               0: facade.tmp,
               1: facade.print_sample,
               2: lambda: facade.batch_metadata.write(),
               3: lambda: facade.debug(3),
               4: facade.train,
               5: facade.test,
               6: facade.clear,
               7: facade.write_result,
               8: facade.persist_result,
               9: facade.deallocate,
               10: end_dealloc}[run]()
    return res
예제 #11
0
 def test_datasets(self):
     tc = TorchConfig(False)
     fac = self.fac
     stash = fac('dataloader_stash')
     dataset = fac('mnist_batch_stash')
     dataset.delegate_attr = True
     ds_name = 'train val test'.split()
     batch_size = dataset.delegate.batch_size
     name: str
     ds: Tuple[Tuple[torch.Tensor, torch.Tensor]]
     for name, ds in zip(ds_name, stash.get_data_by_split()):
         ds_start = 0
         ds_stash = dataset.splits[name]
         ds_data = torch.cat(tuple(map(lambda x: x[0], ds)))
         ds_labels = torch.cat(tuple(map(lambda x: x[1], ds)))
         dpts = sum(map(lambda b: len(b.data_point_ids), ds_stash.values()))
         logger.info(f'name: stash size: {len(ds_stash)}, ' +
                     f'data set size: {len(ds)}, ' +
                     f'stash X batch_size: {len(ds_stash) * batch_size}, ' +
                     f'data/label shapes: {ds_data.shape}/{ds_labels.shape}, ' +
                     f'data points: {dpts}')
         assert len(ds) == len(ds_stash)
         assert dpts == ds_labels.shape[0]
         assert ds_labels.shape[0] == ds_data.shape[0]
         for id, batch in ds_stash:
             ds_end = ds_start + len(batch)
             dsb_labels = ds_labels[ds_start:ds_end]
             dsb_data = ds_data[ds_start:ds_end]
             ds_start = ds_end
             blabels = batch.get_labels()
             bdata = batch.get_data()
             if logger.isEnabledFor(logging.DEBUG):
                 logger.debug(f'data point ids: {batch.data_point_ids}')
                 logger.debug(f'ds/batch labels: {dsb_labels}/{blabels}')
             assert (tc.equal(dsb_labels, blabels))
             assert (tc.equal(dsb_data, bdata))
예제 #12
0
    def cleanup(self, include_cuda: bool = True, quiet: bool = False):
        """Report memory leaks, run the Python garbage collector and optionally empty
        the CUDA cache.

        :param include_cuda: if ``True`` clear the GPU cache

        :param quiet: do not report unallocated objects, regardless of the
                      setting of :obj:`allocation_tracking`

        """
        if self.allocation_tracking and not quiet:
            include_stack, only_counts = False, False
            if self.allocation_tracking == 'stack':
                include_stack, only_counts = True, False
            elif self.allocation_tracking == 'counts':
                include_stack, only_counts = False, True
            include_stack = (self.allocation_tracking == 'stack')
            Deallocatable._print_undeallocated(include_stack, only_counts)
        self.deallocate()
        Deallocatable._deallocate_all()
        gc.collect()
        if include_cuda:
            # free up memory in the GPU
            TorchConfig.empty_cache()
예제 #13
0
파일: lda.py 프로젝트: plandes/deepnlp
 def _create_model(self, docs: Iterable[FeatureDocument]) -> Any:
     if logger.isEnabledFor(logging.INFO):
         logger.info(f'creating {self.topics} topics')
     docs = tuple(map(lambda doc: self.feat_to_tokens(doc), docs))
     id2word = corpora.Dictionary(docs)
     corpus = tuple(map(lambda doc: id2word.doc2bow(doc), docs))
     rand_state = TorchConfig.get_random_seed()
     if rand_state is None:
         rand_state = 0
     params = {
         'corpus': corpus,
         'id2word': id2word,
         'num_topics': self.topics,
         'random_state': rand_state,
         'update_every': 1,
         'chunksize': 100,
         'passes': 10,
         'alpha': 'auto',
         'per_word_topics': True
     }
     with time(f'modeled {self.topics} acros {len(docs)} documents'):
         lda = LdaModel(**params)
     return {'lda': lda, 'corpus': corpus, 'id2word': id2word}
예제 #14
0
 def assertClose(self, da, db):
     assert set(da.keys()) == set(db.keys())
     for k in da.keys():
         a = da[k]
         b = db[k]
         self.assertTrue(TorchConfig.close(a, b))
예제 #15
0
파일: util.py 프로젝트: plandes/deeplearn
 def setUp(self):
     TorchConfig.init()
     self.recreate_factory()
     targ = Path('target')
     if targ.exists() and targ.is_dir():
         shutil.rmtree(targ)
예제 #16
0
파일: run.py 프로젝트: plandes/deepnlp
#!/usr/bin/env python

from typing import List
import sys
from pathlib import Path
import logging
from zensols.deeplearn import TorchConfig
from zensols import deepnlp

# reset random state for consistency before any other packages are
# imported
TorchConfig.init()
# initialize the NLP system
deepnlp.init()


class CliHarness(object):
    """A utility class to automate the creation of execution of the model from
    either the command line or a Python REPL.

    """
    def __init__(self, args: List[str] = sys.argv, src_dir_name: str = 'src'):
        """Configure the Python interpreter and this run class.

        :param args: the command line arguments

        :param src_dir_name: the directory add the Python path containing the
                             source for the application

        """
        self.args = args[1:]
예제 #17
0
 def setUp(self):
     tc = TorchConfig(False)
     self.de = NonUniformDimensionEncoder(tc)
예제 #18
0
 def test_create_empty(self):
     conf = TorchConfig(False, data_type=torch.float16)
     tensor = conf.empty((3, 10))
     self.assertEqual(torch.float16, tensor.dtype)
     self.assertEqual(3, tensor.shape[0])
     self.assertEqual(10, tensor.shape[1])
예제 #19
0
 def test_create_tensor(self):
     conf = TorchConfig(False)
     tensor = conf.from_iterable(it.islice(it.count(), 5))
     self.assertEqual(torch.float32, tensor.dtype)
     should = torch.FloatTensor([0, 1, 2, 3, 4])
     self.assertTrue(torch.all(should.eq(tensor)))
예제 #20
0
 def test_cuda_config_cpu(self):
     conf = TorchConfig(False)
     self.assertEqual(TorchConfig.cpu_device_name(), conf.device.type)
예제 #21
0
 def test_cuda_config_write(self):
     writer = StringIO()
     conf = TorchConfig()
     conf.write(writer=writer)
     logger.debug(writer.getvalue())
     self.assertTrue(len(writer.getvalue()) > 0)
예제 #22
0
 def test_cuda_config(self):
     conf = TorchConfig()
     self.assertNotEqual(None, conf.info)
예제 #23
0
 def test_rand(self):
     conf = self.conf
     size = (10, 20)
     self.rand_assert(50, size, conf)
     conf = TorchConfig(True, data_type=torch.float64)
     self.rand_assert(50, size, conf)
예제 #24
0
 def setUp(self):
     super().setUp()
     self.conf = TorchConfig(False, data_type=torch.float64)
예제 #25
0
 def setUp(self):
     TorchConfig.init()
     config = AppConfig('test-resources/iris/iris.conf',
                        env={'app_root': '.'})
     self.config = config
     self.fac = ImportConfigFactory(config, shared=True, reload=False)
예제 #26
0
 def test_config_type(self):
     conf = TorchConfig(False)
     self.assertEqual(torch.float32, conf.data_type)
     self.assertEqual(torch.FloatTensor, conf.tensor_class)
예제 #27
0
 def _trans_test(self, arrs: Sequence[Tensor]):
     enc = self.de.encode(arrs)
     decs = self.de.decode(enc)
     for enc, dec in zip(arrs, decs):
         self.assertTrue(TorchConfig.equal(enc, dec))