def write_script(d, stage, weightstring=None, logfile=True, include_instruction_pipe=False): if stage not in set(['nbest','forest']): raise Exception if logfile: logdir = os.path.join(d.outdir,'logs') cfg.execute(d,'mkdir -p %s' % logdir) ruledir = d.config['rules'] decodefile = os.path.join(d.tmpdir,'decoder') decodescript = open(decodefile,'w') infos = [] print >> decodescript, '#!/usr/bin/env bash' print >> decodescript, 'HOST=`hostname`' print >> decodescript, 'TMPDIR=%s' % os.getenv('TMPDIR','/tmp') if logfile: print >> decodescript, 'LOG=%s/decode-log.$HOST-$$.log' % logdir print >> decodescript, 'cd %s' % d.tmpdir print >> decodescript, 'set -e' print >> decodescript, 'set -o pipefail' if include_instruction_pipe: print >> decodescript, os.path.join(d.scriptdir,'decoder-instructions'), ruledir , '-c %s | \\' % d.config_files print >> decodescript, d.config['decoder']['exec'], "%s/xsearchdb" % ruledir , '--multi-thread \\' if 'weights' in d.config: print >> decodescript, ' -w %s \\' % os.path.abspath(d.config['weights']) if 'nbests' not in d.config['decoder']['options']: d.config['decoder']['options']['nbests'] = 10 for k,v in d.config['decoder']['options'].iteritems(): print >> decodescript,' --%s %s \\' % (k,v) for step in cfg.steps(d): if step.stage == 'decode': print >> decodescript, ' %s \\' % step.options if step.info != '': infos.append(step.info) if len(infos) > 0: print >> decodescript, ' -u %s \\' % ','.join(infos) if stage == 'nbest': print >> decodescript, ' --output-format nbest --newline-after-pop true --append-rules \\' elif stage == 'forest': print >> decodescript, ' --output-format forest --newline-after-pop true \\' if logfile: print >> decodescript, ' 2> $LOG \\' #print >> decodescript, ' 2> >(gzip > $LOG.gz) \\' if stage == 'forest': print >> decodescript, "| %s/join_forests | sed -u -e 's/@UNKNOWN@//g'" % d.scriptdir else: print >> decodescript, "| %s/join_nbests %s" % (d.scriptdir,d.config['decoder']['options']['nbests']) print >> decodescript, '\n\n' if logfile: print >> decodescript, 'gzip $LOG \n\n' decodescript.close() os.chmod(decodefile, stat.S_IRWXU | os.stat(decodefile)[stat.ST_MODE]) return decodefile
def write_script(d): steps = cfg.steps(d) repairs = ['cat'] for step in steps: if step.stage == 'post-process-extras': repairs.append(step.executable()) postprocmap = os.path.join(d.tmpdir, 'postprocmap') ppm = open(postprocmap, 'w') print >> ppm, '#!/usr/bin/env bash' print >> ppm, 'set -e' print >> ppm, 'set -o pipefail' print >> ppm, 'cd', os.getcwd() print >> ppm, '|'.join(repairs) ppm.close() os.chmod(postprocmap, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXUSR) return postprocmap
def write_script(d): steps = cfg.steps(d) repairs = ['cat'] for step in steps: if step.stage == 'post-process-extras': repairs.append(step.executable()) postprocmap = os.path.join(d.tmpdir,'postprocmap') ppm = open(postprocmap,'w') print >> ppm, '#!/usr/bin/env bash' print >> ppm, 'set -e' print >> ppm, 'set -o pipefail' print >> ppm, 'cd', os.getcwd() print >> ppm, '|'.join(repairs) ppm.close() os.chmod(postprocmap,stat.S_IRWXU | stat.S_IRWXG | stat.S_IXUSR) return postprocmap
, default=argparse.SUPPRESS , action=store_training ) if __name__ == '__main__': os.putenv('LANG','C') os.putenv('LC_ALL','C') d = cfg.parse_args(parser,write='$outdir/rules.config',modeldir=True) cfgf = open(os.path.join(d.outdir,'rules.config'),'a') print >> cfgf, '\nrules:', d.outdir cfgf.close() dir = os.path.abspath(os.path.dirname(__file__)) finp = os.path.join(dir,'ghkm','filterbadinput') names = [] triplefiles = [d.config['target'], d.config['source'], d.config['align']] steps = cfg.steps(d) hp = d.hadoop training = os.path.join(d.tmpdir,'training') trainingtmp = os.path.join(d.tmpdir,'training.tmp') trainingnew = trainingtmp + '.new' pastefiles = [trainingnew] for step in steps: if step.stage == 'ghkm': try: if 'file' in step.__dict__: pastefiles.append(step.file) names.append(step.name) except: print >> sys.stderr, 'no file in', step