Пример #1
0
def write_script(d, stage, weightstring=None, logfile=True, include_instruction_pipe=False):
    
    if stage not in set(['nbest','forest']):
        raise Exception
    if logfile:
        logdir = os.path.join(d.outdir,'logs')
        cfg.execute(d,'mkdir -p %s' % logdir)
    ruledir = d.config['rules']
    decodefile = os.path.join(d.tmpdir,'decoder')
    decodescript = open(decodefile,'w')
    infos = []
    print >> decodescript, '#!/usr/bin/env bash'
    print >> decodescript, 'HOST=`hostname`'
    print >> decodescript, 'TMPDIR=%s' % os.getenv('TMPDIR','/tmp')
    if logfile:
        print >> decodescript, 'LOG=%s/decode-log.$HOST-$$.log' % logdir
    print >> decodescript, 'cd %s' % d.tmpdir
    print >> decodescript, 'set -e'
    print >> decodescript, 'set -o pipefail'
    if include_instruction_pipe:
        print >> decodescript, os.path.join(d.scriptdir,'decoder-instructions'), ruledir , '-c %s | \\' % d.config_files
    print >> decodescript, d.config['decoder']['exec'], "%s/xsearchdb" % ruledir , '--multi-thread \\'
    if 'weights' in d.config:
        print >> decodescript, '  -w %s \\' % os.path.abspath(d.config['weights'])
    if 'nbests' not in d.config['decoder']['options']:
        d.config['decoder']['options']['nbests'] = 10
    for k,v in d.config['decoder']['options'].iteritems():
        print >> decodescript,'  --%s %s \\' % (k,v)
    for step in cfg.steps(d):
        if step.stage == 'decode':
            print >> decodescript, '  %s \\' % step.options
            if step.info != '':
                infos.append(step.info)
    if len(infos) > 0:
        print >> decodescript, '  -u %s \\' % ','.join(infos)

    if stage == 'nbest':
        print >> decodescript, '  --output-format nbest --newline-after-pop true --append-rules \\'
    elif stage == 'forest':
        print >> decodescript, '  --output-format forest --newline-after-pop true \\'
    if logfile:
        print >> decodescript, ' 2> $LOG \\'
        #print >> decodescript, '  2> >(gzip > $LOG.gz) \\'
    if stage == 'forest':
        print >> decodescript, "| %s/join_forests | sed -u -e 's/@UNKNOWN@//g'" % d.scriptdir 
    else:
        print >> decodescript, "| %s/join_nbests %s" % (d.scriptdir,d.config['decoder']['options']['nbests'])
    print >> decodescript, '\n\n'
    if logfile:
        print >> decodescript, 'gzip $LOG \n\n'
    decodescript.close()
    os.chmod(decodefile, stat.S_IRWXU | os.stat(decodefile)[stat.ST_MODE])
    return decodefile
Пример #2
0
def write_script(d):
    steps = cfg.steps(d)

    repairs = ['cat']
    for step in steps:
        if step.stage == 'post-process-extras':
            repairs.append(step.executable())

    postprocmap = os.path.join(d.tmpdir, 'postprocmap')
    ppm = open(postprocmap, 'w')
    print >> ppm, '#!/usr/bin/env bash'
    print >> ppm, 'set -e'
    print >> ppm, 'set -o pipefail'
    print >> ppm, 'cd', os.getcwd()
    print >> ppm, '|'.join(repairs)
    ppm.close()
    os.chmod(postprocmap, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXUSR)

    return postprocmap
Пример #3
0
def write_script(d):
    steps = cfg.steps(d)
    
    repairs = ['cat']
    for step in steps:
        if step.stage == 'post-process-extras':
            repairs.append(step.executable())
    
    postprocmap = os.path.join(d.tmpdir,'postprocmap')
    ppm = open(postprocmap,'w')
    print >> ppm, '#!/usr/bin/env bash'
    print >> ppm, 'set -e'
    print >> ppm, 'set -o pipefail'
    print >> ppm, 'cd', os.getcwd()
    print >> ppm, '|'.join(repairs)
    ppm.close()
    os.chmod(postprocmap,stat.S_IRWXU | stat.S_IRWXG | stat.S_IXUSR)
    
    return postprocmap
Пример #4
0
                   , default=argparse.SUPPRESS
                   , action=store_training
                   )
if __name__ == '__main__':
    os.putenv('LANG','C')
    os.putenv('LC_ALL','C')
    d = cfg.parse_args(parser,write='$outdir/rules.config',modeldir=True)
    cfgf = open(os.path.join(d.outdir,'rules.config'),'a')
    print >> cfgf, '\nrules:', d.outdir
    cfgf.close()
    dir = os.path.abspath(os.path.dirname(__file__))
    finp = os.path.join(dir,'ghkm','filterbadinput')
    names = []
    triplefiles = [d.config['target'], d.config['source'], d.config['align']]
    
    steps = cfg.steps(d)

    hp = d.hadoop

    training = os.path.join(d.tmpdir,'training')
    trainingtmp = os.path.join(d.tmpdir,'training.tmp')
    trainingnew = trainingtmp + '.new'
    pastefiles = [trainingnew]
    for step in steps:
        if step.stage == 'ghkm':
          try:
            if 'file' in step.__dict__:
                pastefiles.append(step.file)
            names.append(step.name)
          except:
            print >> sys.stderr, 'no file in', step