示例#1
0
def main(argv):
    if len(argv) != 4:
        print >> sys.stderr, 'Usage: hoccorpus2posneg INDIR LABEL OUTFN'
        return 1
    indir, label, outfn = argv[1:]

    documents = load_hoccorpus(indir)

    posfn, negfn = outfn + '.pos', outfn + '.neg'
    with io.open(posfn, 'wt', encoding='utf-8') as posout:
        with io.open(negfn, 'wt', encoding='utf-8') as negout:
            for d in documents:
                for s in d.sentences:
                    out = posout if label in s.labels else negout
                    out.write(s.text + u'\n')
示例#2
0
def main(argv):
    if len(argv) != 4:
        print >> sys.stderr, 'Usage: hoccorpus2posneg INDIR LABEL OUTFN'
        return 1
    indir, label, outfn = argv[1:]

    documents = load_hoccorpus(indir)

    posfn, negfn = outfn + '.pos', outfn + '.neg'
    with io.open(posfn, 'wt', encoding='utf-8') as posout:
        with io.open(negfn, 'wt', encoding='utf-8') as negout:
            for d in documents:
                for s in d.sentences:
                    out = posout if label in s.labels else negout
                    out.write(s.text + u'\n')
示例#3
0
def main(argv):
    if len(argv) != 3:
        print >> sys.stderr, 'Usage: hoccorpus2ann INDIR OUTDIR'
        return 1
    indir, outdir = argv[1:]

    if not path.isdir(outdir):
        print >> sys.stderr, '%s is not a directory' % outdir
        return 1

    documents = load_hoccorpus(indir)
    
    for d in documents:
        txtout = path.join(outdir, d.id+'.txt')
        with io.open(txtout, 'wt', encoding='utf-8') as out:
            out.write(d.text)
        annout = path.join(outdir, d.id+'.ann')
        with io.open(annout, 'wt', encoding='utf-8') as out:
            out.write(u'\n'.join(d.to_standoff()))