Exemplo n.º 1
0
    def setUp(self):
        self.cconf = ingest_yaml_doc(self.conf_file)
        for source in self.cconf['sources']:
            source['source_file_path'] = os.path.join(TEST_PATH, source['source_file_path'])
            source['target_file_path'] = os.path.join(TEST_PATH, source['target_file_path'])
        self.cconf['container_path'] = os.path.join(TEST_PATH, self.cconf['container_path'])

        self.cconf = CorporaConfig(self.cconf)
Exemplo n.º 2
0
def build_translation_model(args):
    conf = fetch_config(args)
    if args.t_translate_config is None:
        tconf = conf.system.files.data.translate
    elif os.path.isfile(args.t_translate_config):
        tconf = TranslateConfig(ingest_yaml_doc(args.t_translate_config), conf)
    else:
        logger.error(args.t_translate_config + " doesn't exist")
        return

    if os.path.exists(tconf.paths.project) is False:
        os.makedirs(tconf.paths.project)
    elif os.path.isfile(tconf.paths.project):
        logger.error(tconf.paths.project + " is a file")
        sys.exit(1)
    elif os.listdir(tconf.paths.project) != []:
        logger.error(tconf.paths.project + " must be empty")
        sys.exit(1)

    with open(os.path.join(tconf.paths.project, "translate.yaml"), 'w') as f:
        yaml.dump(tconf.dict(), f, default_flow_style=False)

    tconf.conf.runstate.pool_size = tconf.settings.pool_size
    run_args = get_run_args(tconf)
    app = BuildApp(conf)
    os.environ['IRSTLM'] = tconf.paths.irstlm

    setup_train(tconf)
    setup_tune(tconf)
    setup_test(tconf)

    for idx, parameter_set in enumerate(run_args):
        parameter_set = list(parameter_set)
        parameter_set.append(idx)
        parameter_set.append(tconf)
        t = app.add()
        t.job = build_model
        t.args = parameter_set
        t.description = "model_" + str(parameter_set[9])

    app.run()

    aggregate_model_data(tconf.paths.project)

    from_addr = "*****@*****.**"
    to_addr = [tconf.settings.email]

    with open(tconf.paths.project + "/data.csv") as data:
        msg = MIMEText(data.read())

    msg['Subject'] = "Model Complete"
    msg['From'] = from_addr
    msg['To'] = ", ".join(to_addr)

    server = smtplib.SMTP("localhost")
    server.sendmail(from_addr, to_addr, msg.as_string())
    server.quit()
Exemplo n.º 3
0
    def test_cconf_creation(self):
        self.cconf = ingest_yaml_doc(self.conf_file)
        for source in self.cconf['sources']:
            source['source_file_path'] = os.path.join(TEST_PATH, source['source_file_path'])
            source['target_file_path'] = os.path.join(TEST_PATH, source['target_file_path'])
        self.cconf['container_path'] = os.path.join(TEST_PATH, self.cconf['container_path'])

        with self.assertRaises(Exception):
            self.cconf = CorporaConfig(self.cconf)
Exemplo n.º 4
0
def build_translation_model(args):
    conf = fetch_config(args)
    if args.t_translate_config is None:
        tconf = conf.system.files.data.translate
    elif os.path.isfile(args.t_translate_config):
        tconf = TranslateConfig(ingest_yaml_doc(args.t_translate_config), conf)
    else:
        logger.error(args.t_translate_config + " doesn't exist")
        return

    if os.path.exists(tconf.paths.project) is False:
        os.makedirs(tconf.paths.project)
    elif os.path.isfile(tconf.paths.project):
        logger.error(tconf.paths.project + " is a file")
        sys.exit(1)
    elif os.listdir(tconf.paths.project) != []:
        logger.error(tconf.paths.project + " must be empty")
        sys.exit(1)

    with open(os.path.join(tconf.paths.project, "translate.yaml"), 'w') as f:
        yaml.dump(tconf.dict(), f, default_flow_style=False)

    tconf.conf.runstate.pool_size = tconf.settings.pool_size
    run_args = get_run_args(tconf)
    app = BuildApp(conf)
    os.environ['IRSTLM'] = tconf.paths.irstlm

    setup_train(tconf)
    setup_tune(tconf)
    setup_test(tconf)

    for idx, parameter_set in enumerate(run_args):
        parameter_set = list(parameter_set)
        parameter_set.append(idx)
        parameter_set.append(tconf)
        t = app.add()
        t.job = build_model
        t.args = parameter_set
        t.description = "model_" + str(parameter_set[9])

    app.run()

    aggregate_model_data(tconf.paths.project)

    from_addr = "*****@*****.**"
    to_addr = [tconf.settings.email]

    with open(tconf.paths.project+"/data.csv") as data:
        msg = MIMEText(data.read())

    msg['Subject'] = "Model Complete"
    msg['From'] = from_addr
    msg['To'] = ", ".join(to_addr)

    server = smtplib.SMTP("localhost")
    server.sendmail(from_addr, to_addr, msg.as_string())
    server.quit()
Exemplo n.º 5
0
def translate_text_doc(args):
    conf = fetch_config(args)
    if args.t_translate_config is None:
        tconf = conf.system.files.data.translate
    elif os.path.isfile(args.t_translate_config):
        tconf = TranslateConfig(ingest_yaml_doc(args.t_translate_config), conf)
    else:
        logger.error(args.t_translate_config + " doesn't exist")
        return
    translate_file(args.t_input_file, args.t_output_file, tconf, args.t_protected_regex)
Exemplo n.º 6
0
def translate_text_doc(args):
    conf = fetch_config(args)
    if args.t_translate_config is None:
        tconf = conf.system.files.data.translate
    elif os.path.isfile(args.t_translate_config):
        tconf = TranslateConfig(ingest_yaml_doc(args.t_translate_config), conf)
    else:
        logger.error(args.t_translate_config + " doesn't exist")
        return
    translate_file(args.t_input_file, args.t_output_file, tconf,
                   args.t_protected_regex)
Exemplo n.º 7
0
    def setUp(self):
        self.cconf = ingest_yaml_doc(self.conf_file)
        for source in self.cconf['sources']:
            source['source_file_path'] = os.path.join(
                TEST_PATH, source['source_file_path'])
            source['target_file_path'] = os.path.join(
                TEST_PATH, source['target_file_path'])
        self.cconf['container_path'] = os.path.join(
            TEST_PATH, self.cconf['container_path'])

        self.cconf = CorporaConfig(self.cconf)
Exemplo n.º 8
0
def model_results(args):
    conf = fetch_config(args)

    if args.t_translate_config is None:
        tconf = conf.system.files.data.translate
    elif os.path.isfile(args.t_translate_config):
        tconf = TranslateConfig(ingest_yaml_doc(args.t_translate_config), conf)
    else:
        logger.error(args.t_translate_config + " doesn't exist")
        return

    aggregate_model_data(tconf.paths.project)
Exemplo n.º 9
0
    def test_cconf_creation(self):
        self.cconf = ingest_yaml_doc(self.conf_file)
        for source in self.cconf['sources']:
            source['source_file_path'] = os.path.join(
                TEST_PATH, source['source_file_path'])
            source['target_file_path'] = os.path.join(
                TEST_PATH, source['target_file_path'])
        self.cconf['container_path'] = os.path.join(
            TEST_PATH, self.cconf['container_path'])

        with self.assertRaises(Exception):
            self.cconf = CorporaConfig(self.cconf)
Exemplo n.º 10
0
def model_results(args):
    conf = fetch_config(args)

    if args.t_translate_config is None:
        tconf = conf.system.files.data.translate
    elif os.path.isfile(args.t_translate_config):
        tconf = TranslateConfig(ingest_yaml_doc(args.t_translate_config), conf)
    else:
        logger.error(args.t_translate_config + " doesn't exist")
        return

    aggregate_model_data(tconf.paths.project)
Exemplo n.º 11
0
    def ingest(self, input_obj=None):
        if input_obj is None:
            return
        elif isinstance(input_obj, dict):
            pass
        elif not isinstance(input_obj, ConfigurationBase) and os.path.isfile(input_obj):
            input_obj = ingest_yaml_doc(input_obj)
        else:
            msg = 'cannot ingest Configuration obj from object with type {0}'.format(type(input_obj))
            logger.critical(msg)
            raise TypeError(msg)

        for key, value in input_obj.items():
            setattr(self, key, value)
            logger.debug('setting {0} using default setter in {1} object'.format(key, type(self)))
Exemplo n.º 12
0
def create_corpora(args):
    conf = fetch_config(args)

    if args.t_corpora_config is None:
        cconf = conf.system.files.data.corpora
    elif os.path.isfile(args.t_corpora_config):
        cconf = CorporaConfig(ingest_yaml_doc(args.t_corpora_config))
    else:
        logger.error(args.t_corpora_config + " doesn't exist")
        return

    if os.path.exists(cconf.container_path):
        logger.error(cconf.container_path + " already exists. Please delete it or change the container and try again")
        return

    create_hybrid_corpora(cconf)
Exemplo n.º 13
0
def generated_includes(conf):
    toc_spec_files = []
    step_files = []
    for fn in expand_tree(os.path.join(conf.paths.includes),
                          input_extension='yaml'):
        base = os.path.basename(fn)

        if base.startswith('toc-spec'):
            toc_spec_files.append(fn)
        elif base.startswith('ref-spec'):
            toc_spec_files.append(fn)
        elif base.startswith('steps'):
            step_files.append(fn)
        elif base.startswith('example'):
            # example files, for the purpose of this have the same structure as
            # steps, so we can just use that:
            step_files.append(fn)

    maskl = len(conf.paths.source)
    path_prefix = conf.paths.includes[len(conf.paths.source):]
    mapping = {}
    for spec_file in toc_spec_files:
        if os.path.exists(spec_file):
            data = ingest_yaml_doc(spec_file)
        else:
            continue

        deps = [os.path.join(path_prefix, i) for i in data['sources']]

        mapping[spec_file[maskl:]] = deps

    for step_def in step_files:
        data = ingest_yaml_list(step_def)

        deps = []
        for step in data:
            if 'source' in step:
                deps.append(step['source']['file'])

        if len(deps) != 0:
            deps = [os.path.join(path_prefix, i) for i in deps]

            mapping[step_def[maskl:]] = deps

    return mapping
Exemplo n.º 14
0
def generated_includes(conf):
    toc_spec_files = []
    step_files = []
    for fn in expand_tree(os.path.join(conf.paths.includes), input_extension='yaml'):
        base = os.path.basename(fn)

        if base.startswith('toc-spec'):
            toc_spec_files.append(fn)
        elif base.startswith('ref-spec'):
            toc_spec_files.append(fn)
        elif base.startswith('steps'):
            step_files.append(fn)
        elif base.startswith('example'):
            # example files, for the purpose of this have the same structure as
            # steps, so we can just use that:
            step_files.append(fn)

    maskl = len(conf.paths.source)
    path_prefix = conf.paths.includes[len(conf.paths.source):]
    mapping = {}
    for spec_file in toc_spec_files:
        if os.path.exists(spec_file):
            data = ingest_yaml_doc(spec_file)
        else:
            continue

        deps = [ os.path.join(path_prefix, i ) for i in data['sources']]

        mapping[spec_file[maskl:]] = deps

    for step_def in step_files:
        data = ingest_yaml_list(step_def)

        deps = []
        for step in data:
            if 'source' in step:
                deps.append(step['source']['file'])

        if len(deps) != 0:
            deps = [ os.path.join(path_prefix, i ) for i in deps ]

            mapping[step_def[maskl:]] = deps

    return mapping
Exemplo n.º 15
0
    def ingest(self, input_obj=None):
        if input_obj is None:
            return
        elif isinstance(input_obj, dict):
            pass
        elif not isinstance(input_obj,
                            ConfigurationBase) and os.path.isfile(input_obj):
            input_obj = ingest_yaml_doc(input_obj)
        else:
            msg = 'cannot ingest Configuration obj from object with type {0}'.format(
                type(input_obj))
            logger.critical(msg)
            raise TypeError(msg)

        for key, value in input_obj.items():
            setattr(self, key, value)
            logger.debug(
                'setting {0} using default setter in {1} object'.format(
                    key, type(self)))
Exemplo n.º 16
0
def create_corpora(args):
    conf = fetch_config(args)

    if args.t_corpora_config is None:
        cconf = conf.system.files.data.corpora
    elif os.path.isfile(args.t_corpora_config):
        cconf = CorporaConfig(ingest_yaml_doc(args.t_corpora_config))
    else:
        logger.error(args.t_corpora_config + " doesn't exist")
        return

    if os.path.exists(cconf.container_path):
        logger.error(
            cconf.container_path +
            " already exists. Please delete it or change the container and try again"
        )
        return

    create_hybrid_corpora(cconf)
Exemplo n.º 17
0
def create_manual_symlink(conf):
    fpath = os.path.join(conf.paths.projectroot,
                         conf.paths.builddata, 'integration.yaml')

    if os.path.exists(fpath):
        iconf = ingest_yaml_doc(fpath)
    else:
        return False

    if 'base' not in iconf:
        return True
    else:
        if 'links' not in iconf['base']:
            return True
        else:
            links = get_top_level_links(iconf['base']['links'], conf)

            if links:
                for name, target in links:
                    create_link(target, name)
Exemplo n.º 18
0
    def _prep_load_data(self, input_obj):
        if isinstance(input_obj, dict):
            pass
        elif not isinstance(input_obj,
                            ConfigurationBase) and os.path.isfile(input_obj):
            self._source_fn = input_obj

            if input_obj.endswith('json'):
                input_obj = ingest_json_doc(input_obj)
            elif input_obj.endswith('yaml'):
                input_obj = ingest_yaml_doc(input_obj)
            else:
                logger.error(
                    "file {0} has unknown data format".format(input_obj))
        else:
            msg = 'cannot ingest Configuration obj from object with type {0}'.format(
                type(input_obj))
            logger.critical(msg)
            raise TypeError(msg)

        return input_obj
Exemplo n.º 19
0
def build_makefile(m, conf):
    m.section_break('giza build integration')
    m.newline()

    m.section_break('content generation targets')
    for gen_target in [ 'api', 'assets', 'images', 'intersphinx', 'options',
                        'primer', 'steps', 'tables', 'toc']:
        m.target([gen_target, hyph_concat('giza', gen_target)])
        m.job('giza generate ' + gen_target)

        m.target([hyph_concat('force', gen_target), hyph_concat('giza', 'force', gen_target)])
        m.job('giza --force generate ' + gen_target)
        m.newline()

    m.section_break('sphinx targets')

    sconf = ingest_yaml_doc(os.path.join(conf.paths.projectroot,
                                         conf.paths.builddata,
                                         'sphinx.yaml'))
    builders = [b for b in sconf
                if not b.endswith('base') and b not in
                ('prerequisites', 'generated-source', 'languages', 'editions', 'sphinx_builders')]
    if 'editions' in sconf:
        editions = sconf['editions']
    else:
        editions = []

    if 'root-base' in sconf and 'languages' in sconf['root-base']:
        languages = sconf['root-base']['languages']
    else:
        languages = []

    complete = []

    for builder in builders:
        if '-' in builder:
            builder = builder.split('-')[0]

        if builder in complete:
            continue

        m.comment(builder + ' targets')
        for edition in editions:
            m.target([hyph_concat(builder, edition), hyph_concat('giza', builder, edition)])
            m.job('giza sphinx --builder {0} --edition {1}'.format(builder, edition))

            for language in languages:
                m.target([hyph_concat(builder, edition, language),
                          hyph_concat('giza', builder, edition, language)])
                m.job('giza sphinx --builder {0} --edition {1} --language {2}'.format(builder, edition, language))

        if len(editions) == 0:
            m.target([hyph_concat(builder),
                      hyph_concat('giza', builder)])
            m.job('giza sphinx --builder ' + builder)

            for language in languages:
                m.target([hyph_concat(builder, language),
                          hyph_concat('giza', builder, language)])
                m.job('giza sphinx --builder {0} --language {1}'.format(builder, language))
        else:
            m.target([hyph_concat(builder),
                      hyph_concat('giza', builder)])
            m.job('giza sphinx --builder {0} --edition {1}'.format(builder, ' '.join(editions)))

        m.newline()
        complete.append(builder)

    m.section_break('deploy targets')
    if 'push' in conf.system.files.data:
        for ptarget in conf.system.files.data.push:
            name = ptarget['target']
            m.target(hyph_concat('deploy', name))
            m.job('giza deploy --target ' + name)
            m.newline()

    m.section_break('integration and publish targets')

    m.target(['giza-publish', 'publish'])

    base_job = 'giza sphinx --builder publish'
    if len(editions) > 0:
        base_job += " --serial_sphinx --edition " + ' '.join(editions)

    m.job(base_job)
    m.newline()

    for lang in languages:
        m.target([hyph_concat('publish', lang),
                  hyph_concat('giza', 'publish', lang)])
        m.job(base_job + ' --language ' + lang)
        m.newline()

    # following targets build a group of sphinx targets followed by running
    # one or more deploy actions.
    m.section_break('push targets')
    if 'push' in conf.system.files.data:
        for ptarget in conf.system.files.data.push:
            push_base_job = 'giza push --deploy {0} --builder publish'.format(ptarget['target'])

            if len(editions) > 0:
                push_base_job += " --serial_sphinx --edition " + ' '.join(editions)

            m.target([ptarget['target'],
                      hyph_concat('giza', ptarget['target'])])
            m.job(push_base_job)
            m.newline()

            for lang in languages:
                m.target([ hyph_concat(ptarget['target'], lang),
                           hyph_concat('giza', ptarget['target'], lang) ])
                m.job(push_base_job + ' --language ' + lang)
                m.newline()

    return m
Exemplo n.º 20
0
def get_sconf_base(conf):
    sconf_path = os.path.join(conf.paths.projectroot, conf.paths.builddata,
                              'sphinx.yaml')

    return ingest_yaml_doc(sconf_path)
Exemplo n.º 21
0
def get_sconf_base(conf):
    sconf_path = os.path.join(conf.paths.projectroot, conf.paths.builddata, 'sphinx.yaml')

    return ingest_yaml_doc(sconf_path)