def corpus_to_xigt(corp: Corpus): """ Given an INTENT2 Corpus object, return its representation in xigtxml format. """ xc = XigtCorpus() EXPORT_LOG.info('Preparing to export INTENT2 Coprus to Xigt') for inst in corp: xigt_inst = instance_to_xigt(inst) try: dumps(XigtCorpus(igts=[xigt_inst])) xc.append(xigt_inst) except (TypeError, XigtError) as te: EXPORT_LOG.error('Error in serializing instance "{}": {}'.format( inst.id, te)) EXPORT_LOG.info( 'Corpus successfully converted. Returning string for writing.') return dumps(xc)
def run(args): if args.infiles: for fn in args.infiles: logging.info('Cleaning {}'.format(fn)) xc = xigtxml.load(fn, mode='full') clean_corpus(xc) xigtxml.dump(fn, xc) else: xc = xigtxml.load(sys.stdin, mode='full') clean_corpus(xc) print(xigtxml.dumps(xc))
def run(args): if args.infiles: for fn in args.infiles: logging.info('Normalizing {}'.format(fn)) xc = xigtxml.load(fn, mode='full') normalize_corpus(xc) xigtxml.dump(fn, xc) else: xc = xigtxml.load(sys.stdin, mode='full') normalize_corpus(xc) print(xigtxml.dumps(xc))
def run(args): xc = xigtxml.load(args.infile) if args.igt_key: logging.info('Sorting %s IGTs' % args.infile) xc.sort(key=make_sortkey(args.igt_key)) if args.tier_key: logging.info('Sorting %s tiers by key' % args.infile) for igt in xc: igt.sort(key=make_sortkey(args.tier_key)) elif args.tier_deps: logging.info('Sorting %s tiers by ref-dependencies' % args.infile) refattrs = [ra.strip() for ra in args.tier_deps.split(',')] for igt in xc: igt.sort_tiers(refattrs=refattrs) if args.item_key: logging.info('Sorting %s items by key' % args.infile) for igt in xc: for tier in igt: tier.sort(key=make_sortkey(args.item_key)) if args.in_place: xigtxml.dump(args.infile, xc) else: print(xigtxml.dumps(xc))
) # cycle 1 xc4 = XigtCorpus( id="xc1", igts=[Igt(id="i1", tiers=[Tier(id="w", type="words", segmentation="w", items=[Item(id="w1", segmentation="w1")])])], ) # cycle 2 xc5 = XigtCorpus( id="xc1", igts=[ Igt( id="i1", tiers=[ Tier( id="w", type="words", segmentation="w", items=[Item(id="w1", segmentation="w1,w2"), Item(id="w2", segmentation="w1,w2")], ) ], ) ], ) if __name__ == "__main__": from xigt.codecs import xigtxml print(xigtxml.dumps(xc1m))
else: return xigtxml.default_decode_meta(elem) ### Encoding ### def encode_meta(meta): metatype = meta.type.lower() if metatype in ('judgment', 'vetted', 'phenomena'): attributes = dict(type=meta.type, **meta.attributes) e = etree.Element('meta', attrib=attributes) if metatype == 'phenomena': for phenomenon in meta.content: p = etree.Element('phenomenon') p.text = phenomenon e.append(p) return e else: return xigtxml.default_encode_meta(meta) ### Function maps ### xigtxml.decode_meta = matrix_decode_meta xigtxml.encode_meta = matrix_encode_meta if __name__ == '__main__': import sys f = sys.argv[1] xc = xigtxml.load(open(f,'r')) print(xigtxml.dumps(xc, pretty_print=True)) xigtxml.dump(open('abkhaz-out.xigt','w'), xc, pretty_print=True)