Exemplo n.º 1
0
from .cubes import PerplexityStrategy, GreedyStrategy
from .model_constructor import init_simple_default_model, create_default_topics
from .rel_toolbox_lite import count_vocab_size, handle_regularizer

import artm

from inspect import signature, Parameter
from strictyaml import Map, Str, Int, Seq, Float, Bool
from strictyaml import Any, Optional, EmptyDict, EmptyNone, EmptyList
from strictyaml import dirty_load

# TODO: use stackoverflow.com/questions/37929851/parse-numpydoc-docstring-and-access-components
# for now just hardcode most common / important types
ARTM_TYPES = {
    "tau": Float(),
    "topic_names": Str() | Seq(Str()) | EmptyNone(),
    # TODO: handle class_ids in model and in regularizers separately
    "class_ids": Str() | Seq(Str()) | EmptyNone(),
    "gamma": Float() | EmptyNone(),
    "seed": Int(),
    "num_document_passes": Int(),
    "num_processors": Int(),
    "cache_theta": Bool(),
    "reuse_theta": Bool(),
    "theta_name": Str()
}

# change log style
lc = artm.messages.ConfigureLoggingArgs()
lc.minloglevel = 3
lib = artm.wrapper.LibArtm(logging_config=lc)
Exemplo n.º 2
0
        'report': _REPORT_DEFAULTS,
    },
    'onSuccess': {
        'report': _REPORT_DEFAULTS,
    },
    'environment': [],
    'executionTimeout': None,
    'killTimeout': 30,
    'statsd': None,
}


_report_schema = Map({
    Opt("sentry"): Map({
        Opt("dsn"): Map({
            Opt("value"): EmptyNone() | Str(),
            Opt("fromFile"): EmptyNone() | Str(),
            Opt("fromEnvVar"): EmptyNone() | Str(),
        }),
        Opt("fingerprint"): Seq(Str()),
        Opt("level"): Str(),
        Opt("extra"): MapPattern(Str(), Str() | Int() | Bool()),
    }),
    Opt("mail"): Map({
        "from": EmptyNone() | Str(),
        "to": EmptyNone() | Str(),
        Opt("smtpHost"): Str(),
        Opt("smtpPort"): Int(),
        Opt("subject"): Str(),
        Opt("body"): Str(),
    })
Exemplo n.º 3
0
class TestConfig:
    _str_to_dtype = {
        'float16': torch.float16,
        'float32': torch.float32,
        'float64': torch.float64,
        'uint8': torch.uint8,
        'int8': torch.int8,
        'int16': torch.int16,
        'int32': torch.int32,
        'int64': torch.int64,
    }
    _schema = Map({
        'simulation':
        Map({
            'Ts': Int(),  # Time-discretization in milliseconds
            'tSample': Int(),  # Number of simulation steps
            'tStartLoss': Int(),  # Start computing loss at this time-step
        }),
        'model':
        Map({
            'type': Str(),  # {cnn5-avgp-fc1}
            'CkptFile': Str(),  # Path to checkpoint
            'dtype': EmptyNone() | Str(
            ),  # {float16, float32, float64, uint8, int8, int16, int32, int64}
        }),
        'batchsize':
        Int(),
        'hardware':
        Map({
            'readerThreads': EmptyNone() |
            Int(),  # {empty: cpu_count, 0: main thread, >0: num threads used}
            'gpuDevice': Int(),  # GPU to be used by device number
        }),
    })

    def __init__(self, config_filepath):
        with open(config_filepath, 'r') as stream:
            self.dictionary = load(stream.read(), self._schema).data

        # Some sanity checks.
        assert self.dictionary['simulation'][
            'Ts'] == 1, "Only 1 ms time-step is tested"
        assert self.dictionary['simulation'][
            'tSample'] == 100, "Only 100 ms available"
        assert self.dictionary['simulation']['tSample'] > self.dictionary[
            'simulation']['tStartLoss']
        assert os.path.exists(self.dictionary['model']['CkptFile'])
        assert self.dictionary['batchsize'] >= 1

        model_dtype_str = self.dictionary['model']['dtype']
        if model_dtype_str is None:
            self.dictionary['model']['dtype'] = torch.float32
        else:
            self.dictionary['model']['dtype'] = self._str_to_dtype[
                model_dtype_str]

        self.dictionary['hardware']['gpuDevice'] = torch.device(
            'cuda:{}'.format(self.dictionary['hardware']['gpuDevice']))

        if self.dictionary['hardware']['readerThreads'] is None:
            self.dictionary['hardware']['readerThreads'] = os.cpu_count()

    def __getitem__(self, key):
        return self.dictionary[key]

    def __setitem__(self, key, value):
        self.dictionary[key] = value