Exemplo n.º 1
0
 def test_load_tasks_from_module(self):
     import tests.module_with_tasks as module
     loader = ModuleTaskLoader(module)
     loader.setup({})
     config = loader.load_doit_config()
     task_list = loader.load_tasks(Command(), [])
     assert ['xxx1'] == [t.name for t in task_list]
     assert {'verbose': 2} == config
Exemplo n.º 2
0
 def test_load_tasks(self):
     cmd = Command()
     members = {'task_xxx1': lambda : {'actions':[]},
                'task_no': 'strings are not tasks',
                'blabla': lambda :None,
                'DOIT_CONFIG': {'verbose': 2},
                }
     loader = ModuleTaskLoader(members)
     task_list, config = loader.load_tasks(cmd, {}, [])
     assert ['xxx1'] == [t.name for t in task_list]
     assert {'verbose': 2} == config
Exemplo n.º 3
0
 def testPluginBackend(self, depfile_name):
     mycmd = self.MyCmd(task_loader=ModuleTaskLoader({}),
                        config={'BACKEND': {'j2': 'doit.dependency:JsonDB'}})
     params, args = CmdParse(mycmd.get_options()).parse(['--backend', 'j2'])
     params['dep_file'] = depfile_name
     mycmd.execute(params, args)
     assert mycmd.dep_manager.db_class is mycmd._backends['j2']
Exemplo n.º 4
0
    def test_force_verbosity(self, dep_manager):
        members = {
            'DOIT_CONFIG': {
                'verbosity': 0
            },
            'task_xxx1': lambda: {
                'actions': []
            },
        }
        loader = ModuleTaskLoader(members)

        class SampleCmd(DoitCmdBase):
            opt_verbosity = {
                'name': 'verbosity',
                'short': 'v',
                'long': 'verbosity',
                'type': int,
                'default': None,
                'help': "verbosity foo"
            }
            cmd_options = (opt_verbosity, )

            def _execute(self, verbosity, force_verbosity):
                return verbosity, force_verbosity

        cmd = CmdFactory(SampleCmd,
                         task_loader=loader,
                         dep_manager=dep_manager)
        assert (2, True) == cmd.parse_execute(
            ['--db-file', dep_manager.name, '-v2'])
        assert (0, False) == cmd.parse_execute(['--db-file', dep_manager.name])
Exemplo n.º 5
0
def process_pipeline(tasks,
                     *args,
                     version=None,
                     workdir=None,
                     patterns=None,
                     num_processes=None,
                     output=None,
                     **kwargs):
    doit_args = ['-n', num_processes, '--continue']
    task_names = load_tasks(tasks)
    if not task_names:
        domains = get_domains(patterns)
        task_names = load_tasks([
            gen_task(workdir, domains)
            for gen_task in (gen_dig, gen_host, gen_ssl)
        ])

    def task_setup():
        return {
            'actions': [
                f'rm -rf {workdir}',
            ],
        }

    globals()[task_setup.__name__] = task_setup
    exitcode = DoitMain(ModuleTaskLoader(globals())).run(doit_args +
                                                         task_names)
    create_result(workdir, output)
    sys.exit(exitcode)
Exemplo n.º 6
0
    def doit(line):
        """
        Run *doit* with `task_creators` from all interactive variables
        (IPython's global namespace).

        Examples:

            >>> %doit --help          ## Show help for options and arguments.

            >>> def task_foo():
                    return {'actions': ['echo hi IPython'],
                            'verbosity': 2}

            >>> %doit list            ## List any tasks discovered.
            foo

            >>> %doit                 ## Run any tasks.
            .  foo
            hi IPython

        """
        # Override db-files location inside ipython-profile dir,
        # which is certainly writable.
        prof_dir = ip.profile_dir.location
        opt_vals = {'dep_file': os.path.join(prof_dir, 'db', '.doit.db')}
        commander = DoitMain(ModuleTaskLoader(ip.user_module),
                             extra_config={'GLOBAL': opt_vals})
        commander.BIN_NAME = 'doit'
        commander.run(line.split())
Exemplo n.º 7
0
    def _run_doit(self, sel_tasks, reporter=None, doit_vars=None):
        """load this file as dodo file to collect tasks"""
        inc = IncrementalTasks(self.py_files, test_files=list(self.test_files))
        output = StringIO()
        config = {
            'dep_file': self.DB_FILE,
            'continue': True,
            'outfile': output,
        }
        if reporter:
            config['reporter'] = reporter

        ctx = {
            'tasks_generator': inc,
            'DOIT_CONFIG': config,
        }
        doit_cmd.reset_vars()
        if doit_vars:
            for key, value in doit_vars.items():
                doit_cmd.set_var(key, value)
        loader = ModuleTaskLoader(ctx)
        cmd = Run(task_loader=loader)
        cmd.parse_execute(sel_tasks)
        output.seek(0)
        return inc.graph, output.read()
Exemplo n.º 8
0
    def test_execute(self, depfile_name):
        members = {'task_xxx1': lambda : {'actions':[]},}
        loader = get_loader({}, task_loader=ModuleTaskLoader(members))

        mycmd = self.MyCmd(task_loader=loader)
        assert 'min' == mycmd.parse_execute([
            '--db-file', depfile_name,
            '--mine', 'min'])
Exemplo n.º 9
0
def test_doit_coverage(cookies):
    result = cookies.bake()
    with inside_dir(result.project):
        with poetryenv_in_project():
            importlib.reload(dodo)
            dodo.webbrowser = mock.MagicMock()
            assert DoitMain(ModuleTaskLoader(dodo)).run(["coverage"]) == 0
    importlib.reload(dodo)
Exemplo n.º 10
0
    def test_task_config(self):
        # Ensure that doit.cfg specified task parameters are applied.

        cmd = Command()
        members = {
            'task_foo': lambda: {
                'actions': [],
                'params': [{
                    'name': 'x',
                    'default': None,
                    'long': 'x'
                }]
            },
            'DOIT_CONFIG': {
                'task:foo': {
                    'x': 1
                }
            },
        }
        loader = ModuleTaskLoader(members)
        loader.setup({})
        loader.config = loader.load_doit_config()
        task_list = loader.load_tasks(cmd, [])
        task = task_list.pop()
        task.init_options()
        assert 1 == task.options['x']
Exemplo n.º 11
0
    def test_new_cmd(self):
        class MyRawCmd(self.MyCmd):
            def execute(self, params, args):
                return params['my_opt']

        members = {'task_xxx1': lambda : {'actions':[]},}
        loader = ModuleTaskLoader(members)
        mycmd = MyRawCmd(loader)
        assert 'min' == mycmd.parse_execute(['--mine', 'min'])
Exemplo n.º 12
0
    def testCustomCodec(self, depfile_name):
        class MyCodec(JSONCodec):
            pass

        mycmd = self.MyCmd(task_loader=ModuleTaskLoader({}))
        params, args = CmdParse(mycmd.get_options()).parse([])
        params['codec_cls'] = MyCodec
        params['dep_file'] = depfile_name
        mycmd.execute(params, args)
        assert isinstance(mycmd.dep_manager.backend.codec, MyCodec)
 def run(self, cmds):
     tasks = {}
     for v in self.loaders:
         for name, l in v.list_tasks():
             f = l
             tasks[name] = f
     ml = ModuleTaskLoader(tasks)
     main = DoitMain(ml)
     main.config['default_tasks'] = cmds
     return main.run([])
Exemplo n.º 14
0
def run_task(module, task):
    """
    run_task - Have doit run the named task

    :param module module: module containing tasks
    :param str task: task to run
    """
    start = time.time()
    DoitMain(ModuleTaskLoader(module)).run([task])
    print("%.2f seconds" % (time.time() - start))
Exemplo n.º 15
0
def run_doit_task(tasks):
    """
      :param tasks: (dict) task_name -> {options}
    """
    loader = ModuleTaskLoader(globals())
    doit_config = {
        'verbosity': 2,
        'reporter': ErrorOnlyReporter,
    }
    return run_tasks(loader, tasks, extra_config={'GLOBAL': doit_config})
Exemplo n.º 16
0
    def testCustomChecker(self, depfile_name):
        class MyChecker(FileChangedChecker):
            pass

        mycmd = self.MyCmd(task_loader=ModuleTaskLoader({}))
        params, args = CmdParse(mycmd.get_options()).parse([])
        params['check_file_uptodate'] = MyChecker
        params['dep_file'] = depfile_name
        mycmd.execute(params, args)
        assert isinstance(mycmd.dep_manager.checker, MyChecker)
Exemplo n.º 17
0
    def test_new_cmd(self):
        class MyRawCmd(self.MyCmd):
            def execute(self, params, args):
                return params['my_opt']

        members = {'task_xxx1': lambda : {'actions':[]},}
        loader = ModuleTaskLoader(members)
        mycmd = MyRawCmd(task_loader=loader, cmds={'foo':None, 'bar':None})
        assert mycmd.loader.cmd_names == ['bar', 'foo']
        assert 'min' == mycmd.parse_execute(['--mine', 'min'])
Exemplo n.º 18
0
 def test_load_tasks_from_module(self):
     import tests.module_with_tasks as module
     loader = ModuleTaskLoader(module)
     loader.setup({})
     config = loader.load_doit_config()
     task_list = loader.load_tasks(Command(), [])
     assert ['xxx1'] == [t.name for t in task_list]
     assert {'verbose': 2} == config
Exemplo n.º 19
0
    def _doit_prepare(self, builder, task):
        miner.dodo.builder = builder

        # create build directory for storing doit database
        if not os.path.exists(builder.build_dir):
            os.makedirs(builder.build_dir)

        opt_vals = {'dep_file': os.path.join(builder.build_dir, '.doit.db')}
        commander = DoitMain(ModuleTaskLoader(miner.dodo),
                             extra_config={'GLOBAL': opt_vals})
        commander.BIN_NAME = 'doit'

        logging.info('Preparing LEDE build system...')
        commander.run(['--verbosity', '2', task])
Exemplo n.º 20
0
def test_doit_docs(cookies, docs_generator):
    extra_context = {"docs_generator": docs_generator}
    result = cookies.bake(extra_context=extra_context)
    project = result.project
    with inside_dir(project):
        with poetryenv_in_project():
            importlib.reload(dodo)
            dodo.webbrowser = mock.MagicMock()
            project.mkdir("docs", "htmlcov")
            with project.join("docs", "htmlcov", "index.html").open("w") as fo:
                fo.write("")
            assert DoitMain(ModuleTaskLoader(dodo)).run(["docs"]) == 0
            assert project.join("site", "htmlcov").check(dir=1)
    importlib.reload(dodo)
Exemplo n.º 21
0
def main(output, build_dir, elm_path, mount_at, exclude_modules,
         exclude_source_directories, force_exclusion, fake_user, fake_project,
         fake_version, fake_summary, fake_license, validate, doit_args,
         project_path, include_paths):
    """Generate static documentation for your Elm project"""

    if not shutil.which('rsync'):
        raise click.UsageError('this program requires rsync')

    if not check_rsync_version():
        raise click.UsageError(
            'this program requires rsync version {} or greater'.format(
                '.'.join(REQUIRED_RSYNC_VERSION)))

    if not validate and output is None:
        raise click.BadParameter('please specify --output directory')

    resolved_include_paths = [_resolve_path(path) for path in include_paths]
    exclude_modules = exclude_modules.split(',') if exclude_modules else []
    exclude_source_directories = exclude_source_directories.split(
        ',') if exclude_source_directories else []
    project_config = ProjectConfig(
        include_paths=resolved_include_paths,
        exclude_modules=exclude_modules,
        exclude_source_directories=exclude_source_directories,
        force_exclusion=force_exclusion,
        fake_user=fake_user,
        fake_project=fake_project,
        fake_version=fake_version,
        fake_summary=fake_summary,
        fake_license=fake_license,
    )

    task_creators = build_task_creators(
        _resolve_path(project_path),
        project_config,
        _resolve_path(elm_path) if elm_path else None,
        _resolve_path(output) if output is not None else None,
        build_path=_resolve_path(build_dir) if build_dir is not None else None,
        mount_point=mount_at,
        validate=validate)

    extra_config = {'GLOBAL': {'outfile': LazyOutfile()}}
    result = DoitMain(ModuleTaskLoader(task_creators),
                      extra_config=extra_config).run(
                          doit_args.split(' ') if doit_args else [])
    if result is not None and result > 0:
        raise DoitException('see output above', result)
Exemplo n.º 22
0
    def test_minversion(self, monkeypatch):
        members = {
            'task_xxx1': lambda : {'actions':[]},
            'DOIT_CONFIG': {'minversion': '5.2.3'},
            }
        loader = ModuleTaskLoader(members)

        # version ok
        monkeypatch.setattr(version, 'VERSION', '7.5.8')
        mycmd = self.MyCmd(loader)
        assert 'xxx' == mycmd.parse_execute([])

        # version too old
        monkeypatch.setattr(version, 'VERSION', '5.2.1')
        mycmd = self.MyCmd(loader)
        pytest.raises(InvalidDodoFile, mycmd.parse_execute, [])
Exemplo n.º 23
0
    def _doIt(self, *args, **kwargs):
        '''
        DoIt wrapper

        @param args   [in] (list) arguments
        @param kwargs [in] (dict) keyword arguments
        '''
        members = dict(kwargs.get('tasks', self._tasks))
        members.update(
            DOIT_CONFIG={
                'backend': 'json',
                'dep_file': self._dep_file,
                'reporter': self._reporter,
                'verbosity': self._verbosity,
                'minversion': '0.27.0'
            })

        status = DoitMain(ModuleTaskLoader(members)).run(args)
        if status:
            sys.exit(status)
Exemplo n.º 24
0
def test_config_simple(capsys):

    DOIT_CONFIG = doit_config(backend='r')

    loader = ModuleTaskLoader(locals())
    loader.setup({})
    config = loader.load_doit_config()

    assert config == {'backend': 'r'}

    res = DoitMain(ModuleTaskLoader(locals())).run(())
    assert res == 3

    captured = capsys.readouterr()
    with capsys.disabled():
        assert "TypeError: 'NoneType' object is not callable" in captured.err
Exemplo n.º 25
0
def doit_handler(scope="session"):
    handler = DoitMain(ModuleTaskLoader(dodo))
    return handler
Exemplo n.º 26
0
def run(task_creators):
    """run doit using task_creators

    @param task_creators: module or dict containing task creators
    """
    sys.exit(DoitMain(ModuleTaskLoader(task_creators)).run(sys.argv[1:]))
Exemplo n.º 27
0

def task_report():
    """ renders the template in reports\report_template.docx with all figures and numbers.yaml mapped"""

    figure_files = glob.glob(r'reports\figures\*.png')

    outfile = r'reports\report_analysis-double-pendulum.docx'
    infile = r'reports\report_template.docx'
    context_file = r'reports\numbers.yaml'
    src_file = 'dodo_utils.py'

    return {
        'actions': [(template_renderer(figure_files,
                                       context_file), (infile, outfile))],
        'targets': [outfile],
        'file_dep': [infile] + figure_files + [context_file, src_file],
        'clean':
        True
    }


from doit_xtended.linkedtasks import _generated_linked_tasks

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    from doit.cmd_base import ModuleTaskLoader
    from doit.doit_cmd import DoitMain

    d = DoitMain(ModuleTaskLoader(globals()))
    d.run(['-s', 'models:prediction'])
Exemplo n.º 28
0
def train_profile(profile_dir: Path,
                  profile: Profile) -> Tuple[int, List[str]]:

    # Compact
    def ppath(query, default=None, write=False):
        return utils_ppath(profile, profile_dir, query, default, write=write)

    language = profile.get("language", "")

    # Inputs
    stt_system = profile.get("speech_to_text.system")
    stt_prefix = f"speech_to_text.{stt_system}"

    # intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist")
    sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini")
    sentences_dir = ppath("speech_to_text.sentences_dir", "sentences.dir")
    base_dictionary = ppath(f"{stt_prefix}.base_dictionary",
                            "base_dictionary.txt")
    base_language_model = ppath(f"{stt_prefix}.base_language_model",
                                "base_language_model.txt")
    base_language_model_weight = float(
        profile.get(f"{stt_prefix}.mix_weight", 0))
    g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst")
    acoustic_model_type = stt_system

    # Pocketsphinx
    acoustic_model = ppath(f"{stt_prefix}.acoustic_model", "acoustic_model")

    # Kaldi
    kaldi_dir = Path(
        os.path.expandvars(profile.get(f"{stt_prefix}.kaldi_dir",
                                       "/opt/kaldi")))
    kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph",
                                                   "graph")

    if acoustic_model_type == "kaldi":
        # Kaldi acoustic models are inside model directory
        acoustic_model = ppath(f"{stt_prefix}.model_dir", "model")
    else:
        _LOGGER.warning("Unsupported acoustic model type: %s",
                        acoustic_model_type)

    # ignore/upper/lower
    word_casing = profile.get("speech_to_text.dictionary_casing",
                              "ignore").lower()

    # default/ignore/upper/lower
    g2p_word_casing = profile.get("speech_to_text.g2p_casing",
                                  word_casing).lower()

    # all/first
    dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule",
                                  "all").lower()

    # Outputs
    dictionary = ppath(f"{stt_prefix}.dictionary",
                       "dictionary.txt",
                       write=True)
    custom_words = ppath(f"{stt_prefix}.custom_words",
                         "custom_words.txt",
                         write=True)
    language_model = ppath(f"{stt_prefix}.language_model",
                           "language_model.txt",
                           write=True)
    base_language_model_fst = ppath(f"{stt_prefix}.base_language_model_fst",
                                    "base_language_model.fst",
                                    write=True)
    intent_graph = ppath("intent.fsticiffs.intent_graph",
                         "intent.json",
                         write=True)
    intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True)
    vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True)
    unknown_words = ppath(f"{stt_prefix}.unknown_words",
                          "unknown_words.txt",
                          write=True)
    grammar_dir = ppath("speech_to_text.grammars_dir", "grammars", write=True)
    fsts_dir = ppath("speech_to_text.fsts_dir", "fsts", write=True)
    slots_dir = ppath("speech_to_text.slots_dir", "slots", write=True)

    # -----------------------------------------------------------------------------

    # Create cache directories
    for dir_path in [grammar_dir, fsts_dir]:
        dir_path.mkdir(parents=True, exist_ok=True)

    # -----------------------------------------------------------------------------

    ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir)

    # Join ini files into a single combined file and parse
    _LOGGER.debug("Parsing ini file(s): %s", [str(p) for p in ini_paths])

    try:
        intents = get_all_intents(ini_paths)
    except Exception:
        _LOGGER.exception("Failed to parse %s", ini_paths)
        return (1, ["Failed to parse sentences"])

    # -----------------------------------------------------------------------------

    def get_slot_names(item):
        """Yield referenced slot names."""
        if isinstance(item, jsgf.SlotReference):
            yield item.slot_name
        elif isinstance(item, jsgf.Sequence):
            for sub_item in item.items:
                for slot_name in get_slot_names(sub_item):
                    yield slot_name
        elif isinstance(item, jsgf.Rule):
            for slot_name in get_slot_names(item.rule_body):
                yield slot_name

    def number_transform(word):
        """Automatically transform numbers"""
        if not isinstance(word, jsgf.Word):
            # Skip anything besides words
            return

        try:
            n = int(word.text)

            # 75 -> (seventy five):75
            number_text = num2words(n, lang=language).replace("-", " ").strip()
            assert number_text, f"Empty num2words result for {n}"
            number_words = number_text.split()

            if len(number_words) == 1:
                # Easy case, single word
                word.text = number_text
                word.substitution = str(n)
            else:
                # Hard case, split into mutliple Words
                return jsgf.Sequence(
                    text=number_text,
                    type=jsgf.SequenceType.GROUP,
                    substitution=str(n),
                    items=[jsgf.Word(w) for w in number_words],
                )
        except ValueError:
            # Not a number
            pass

    def do_intents_to_graph(intents, slot_names, targets):
        sentences, replacements = ini_jsgf.split_rules(intents)

        # Load slot values
        for slot_name in slot_names:
            slot_path = slots_dir / slot_name
            assert slot_path.is_file(), f"Missing slot file at {slot_path}"

            # Parse each non-empty line as a JSGF sentence
            slot_values = []
            with open(slot_path, "r") as slot_file:
                for line in slot_file:
                    line = line.strip()
                    if line:
                        sentence = jsgf.Sentence.parse(line)
                        slot_values.append(sentence)

            # Replace $slot with sentences
            replacements[f"${slot_name}"] = slot_values

        if profile.get("intent.replace_numbers", True):
            # Replace numbers in parsed sentences
            for intent_sentences in sentences.values():
                for sentence in intent_sentences:
                    jsgf.walk_expression(sentence, number_transform,
                                         replacements)

        # Convert to directed graph
        graph = intents_to_graph(intents, replacements)

        # Write graph to JSON file
        json_graph = graph_to_json(graph)
        with open(targets[0], "w") as graph_file:
            json.dump(json_graph, graph_file)

    def task_ini_graph():
        """sentences.ini -> intent.json"""
        slot_names = set()
        for intent_name in intents:
            for item in intents[intent_name]:
                for slot_name in get_slot_names(item):
                    slot_names.add(slot_name)

        # Add slot files as dependencies
        deps = [(slots_dir / slot_name) for slot_name in slot_names]

        # Add profile itself as a dependency
        profile_json_path = profile_dir / "profile.json"
        if profile_json_path.is_file():
            deps.append(profile_json_path)

        return {
            "file_dep": ini_paths + deps,
            "targets": [intent_graph],
            "actions": [(do_intents_to_graph, [intents, slot_names])],
        }

    # -----------------------------------------------------------------------------

    def do_graph_to_fst(intent_graph, targets):
        with open(intent_graph, "r") as graph_file:
            json_graph = json.load(graph_file)

        graph = json_to_graph(json_graph)
        graph_fst = graph_to_fst(graph)

        # Create symbol tables
        isymbols = fst.SymbolTable()
        for symbol, number in graph_fst.input_symbols.items():
            isymbols.add_symbol(symbol, number)

        osymbols = fst.SymbolTable()
        for symbol, number in graph_fst.output_symbols.items():
            osymbols.add_symbol(symbol, number)

        # Compile FST
        compiler = fst.Compiler(isymbols=isymbols,
                                osymbols=osymbols,
                                keep_isymbols=True,
                                keep_osymbols=True)

        compiler.write(graph_fst.intent_fst)
        compiled_fst = compiler.compile()

        # Write to file
        compiled_fst.write(str(targets[0]))

    def task_intent_fst():
        """intent.json -> intent.fst"""
        return {
            "file_dep": [intent_graph],
            "targets": [intent_fst],
            "actions": [(do_graph_to_fst, [intent_graph])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="intent_fst")
    def task_language_model():
        """Creates an ARPA language model from intent.fst."""

        if base_language_model_weight > 0:
            yield {
                "name": "base_lm_to_fst",
                "file_dep": [base_language_model],
                "targets": [base_language_model_fst],
                "actions": ["ngramread --ARPA %(dependencies)s %(targets)s"],
            }

        # FST -> n-gram counts
        intent_counts = str(intent_fst) + ".counts"
        yield {
            "name": "intent_counts",
            "file_dep": [intent_fst],
            "targets": [intent_counts],
            "actions": ["ngramcount %(dependencies)s %(targets)s"],
        }

        # n-gram counts -> model
        intent_model = str(intent_fst) + ".model"
        yield {
            "name": "intent_model",
            "file_dep": [intent_counts],
            "targets": [intent_model],
            "actions": ["ngrammake %(dependencies)s %(targets)s"],
        }

        if base_language_model_weight > 0:
            merged_model = Path(str(intent_model) + ".merge")

            # merge
            yield {
                "name":
                "lm_merge",
                "file_dep": [base_language_model_fst, intent_model],
                "targets": [merged_model],
                "actions": [
                    f"ngrammerge --alpha={base_language_model_weight} %(dependencies)s %(targets)s"
                ],
            }

            intent_model = merged_model

        # model -> ARPA
        yield {
            "name": "intent_arpa",
            "file_dep": [intent_model],
            "targets": [language_model],
            "actions": ["ngramprint --ARPA %(dependencies)s > %(targets)s"],
        }

    # -----------------------------------------------------------------------------

    def do_vocab(targets):
        with open(targets[0], "w") as vocab_file:
            input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
            for i in range(input_symbols.num_symbols()):
                # Critical that we use get_nth_key here when input symbols
                # numbering is discontiguous.
                key = input_symbols.get_nth_key(i)
                symbol = input_symbols.find(key).decode().strip()
                if symbol and not (symbol.startswith("__")
                                   or symbol.startswith("<")):
                    print(symbol, file=vocab_file)

            if base_language_model_weight > 0:
                # Add all words from base dictionary
                with open(base_dictionary, "r") as dict_file:
                    for word in read_dict(dict_file):
                        print(word, file=vocab_file)

    @create_after(executed="language_model")
    def task_vocab():
        """Writes all vocabulary words to a file from intent.fst."""
        return {
            "file_dep": [intent_fst],
            "targets": [vocab],
            "actions": [do_vocab]
        }

    # -----------------------------------------------------------------------------

    def do_dict(dictionary_paths: Iterable[Path], targets):
        with open(targets[0], "w") as dictionary_file:
            if unknown_words.exists():
                unknown_words.unlink()

            dictionary_format = FORMAT_CMU
            if acoustic_model_type == "julius":
                dictionary_format = FORMAT_JULIUS

            make_dict(
                vocab,
                dictionary_paths,
                dictionary_file,
                unknown_path=unknown_words,
                dictionary_format=dictionary_format,
                merge_rule=dict_merge_rule,
                upper=(word_casing == "upper"),
                lower=(word_casing == "lower"),
            )

            if unknown_words.exists() and g2p_model.exists():
                # Generate single pronunciation guesses
                _LOGGER.debug("Guessing pronunciations for unknown word(s)")

                g2p_output = subprocess.check_output(
                    [
                        "phonetisaurus-apply",
                        "--model",
                        str(g2p_model),
                        "--word_list",
                        str(unknown_words),
                        "--nbest",
                        "1",
                    ],
                    universal_newlines=True,
                )

                g2p_transform = lambda w: w
                if g2p_word_casing == "upper":
                    g2p_transform = lambda w: w.upper()
                elif g2p_word_casing == "lower":
                    g2p_transform = lambda w: w.lower()

                # Append to dictionary and custom words
                with open(custom_words, "a") as words_file:
                    with open(unknown_words, "w") as unknown_words_file:
                        for line in g2p_output.splitlines():
                            line = line.strip()
                            word, phonemes = re.split(r"\s+", line, maxsplit=1)
                            word = g2p_transform(word)
                            print(word, phonemes, file=dictionary_file)
                            print(word, phonemes, file=words_file)
                            print(word, phonemes, file=unknown_words_file)

    @create_after(executed="vocab")
    def task_vocab_dict():
        """Creates custom pronunciation dictionary based on desired vocabulary."""
        dictionary_paths = [base_dictionary]
        if custom_words.exists():
            # Custom dictionary goes first so that the "first" dictionary merge
            # rule will choose pronunciations from it.
            dictionary_paths.insert(0, custom_words)

        # Exclude dictionaries that don't exist
        dictionary_paths = [p for p in dictionary_paths if p.exists()]

        return {
            "file_dep": [vocab] + dictionary_paths,
            "targets": [dictionary],
            "actions": [(do_dict, [dictionary_paths])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="vocab_dict")
    def task_kaldi_train():
        """Creates HCLG.fst for a Kaldi nnet3 or gmm model."""
        if acoustic_model_type == "kaldi":
            return {
                "file_dep": [dictionary, language_model],
                "targets": [kaldi_graph_dir / "HCLG.fst"],
                "actions": [[
                    "bash",
                    str(acoustic_model / "train.sh"),
                    str(kaldi_dir),
                    str(acoustic_model),
                    str(dictionary),
                    str(language_model),
                ]],
            }

    # -----------------------------------------------------------------------------

    errors = []

    class MyReporter(ConsoleReporter):
        def add_failure(self, task, exception):
            super().add_failure(task, exception)
            errors.append(f"{task}: {exception}")

        def runtime_error(self, msg):
            super().runtime_error(msg)
            errors.append(msg)

    DOIT_CONFIG = {"action_string_formatting": "old", "reporter": MyReporter}

    # Monkey patch inspect to make doit work inside Pyinstaller.
    # It grabs the line numbers of functions probably for debugging reasons, but
    # PyInstaller doesn't seem to keep that information around.
    #
    # This better thing to do would be to create a custom TaskLoader.
    import inspect

    inspect.getsourcelines = lambda obj: [0, 0]

    # Run doit main
    result = DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:])
    return (result, errors)
Exemplo n.º 29
0
    def test_execute(self):
        members = {'task_xxx1': lambda : {'actions':[]},}
        loader = ModuleTaskLoader(members)

        mycmd = self.MyCmd(loader)
        assert 'min' == mycmd.parse_execute(['--mine', 'min'])
Exemplo n.º 30
0
def train():
    doit.doit_cmd.DoitMain(ModuleTaskLoader(sys.modules[__name__])).run(
        ['train'])
Exemplo n.º 31
0
 def testInvalidChecker(self):
     mycmd = self.MyCmd(task_loader=ModuleTaskLoader({}))
     params, args = CmdParse(mycmd.get_options()).parse([])
     params['check_file_uptodate'] = 'i dont exist'
     pytest.raises(InvalidCommand, mycmd.execute, params, args)