Exemplo n.º 1
0
def separate_tiers(args):
    tiers = set(args.tiers)
    # assuming XML for now
    with open(args.infile, 'r') as instream:
        src_xc = xigtxml.load(instream)
        sep_xc = XigtCorpus(attributes=src_xc.attributes,
                            metadata=src_xc.metadata)
        for igt in src_xc.igts:
            sep_xc.add(
                Igt(id=igt.id,
                    type=igt.type,
                    attributes=igt.attributes,
                    metadata=igt.metadata,
                    tiers=[t for t in igt.tiers if t.type in tiers]))
        xigtxml.dump(open(args.outfile, 'w'), sep_xc)

    if not args.remainder: return
    with open(args.infile, 'r') as instream:
        src_xc = xigtxml.load(instream)
        rem_xc = XigtCorpus(attributes=src_xc.attributes,
                            metadata=src_xc.metadata)
        for igt in src_xc.igts:
            rem_xc.add(
                Igt(id=igt.id,
                    type=igt.type,
                    attributes=igt.attributes,
                    metadata=igt.metadata,
                    tiers=[t for t in igt.tiers if t.type not in tiers]))
        xigtxml.dump(open(args.remainder, 'w'), rem_xc)
Exemplo n.º 2
0
def _xigt_import(infile, outfile, options):
    with open(infile, 'r') as in_fh, open(outfile, 'w') as out_fh:
        igts = odin_igts(in_fh, options)
        xc = XigtCorpus(
            igts=igts,
            nsmap=_nsmap,
            mode='transient'
        )
        xigtxml.dump(out_fh, xc)
Exemplo n.º 3
0
def default_decode_xigtcorpus(elem, igts=None, mode='full'):
    # xigt-corpus { attrs, metadata, content }
    # first get the attrs
    ns, tag = _qname_split(elem.tag)
    assert tag == 'xigt-corpus'
    return XigtCorpus(
        id=elem.get('id'),
        attributes=get_attributes(elem, ignore=('id', )),
        metadata=[decode_metadata(md) for md in elem.findall('metadata')],
        igts=igts or [decode_igt(igt) for igt in elem.findall('igt')],
        mode=mode,
        namespace=ns,
        nsmap=elem.attrib.nsmap)
Exemplo n.º 4
0
def xigt_import(infile, outfile, options=None):

    if options is None:
        options = {}
    options.setdefault('tier_types', default_tier_types)
    options.setdefault('alignments', default_alignments)
    options.setdefault('record_markers', default_record_markers)
    options.setdefault('attribute_map', default_attribute_map)
    options.setdefault('error_recovery_method', default_error_recovery_method)

    with open(infile, 'r') as in_fh, open(outfile, 'w') as out_fh:
        tb = toolbox.read_toolbox_file(in_fh)
        igts = toolbox_igts(tb, options)
        xc = XigtCorpus(igts=igts, mode='transient')
        xigtxml.dump(out_fh, xc)
Exemplo n.º 5
0
def xigt_import(infile, outfile, options=None):

    if options is None:
        options = {}
    options.setdefault('record_markers', default_record_markers)
    options.setdefault('igt_attribute_map', default_igt_attribute_map)
    options.setdefault('tier_map', default_tier_map)
    options.setdefault('make_phrase_tier', default_make_phrase_tier)
    options.setdefault('tier_types', default_tier_types)
    options.setdefault('alignments', default_alignments)
    options.setdefault('error_recovery_method', default_error_recovery_method)

    # just use existing info to create marker-based alignment info
    options['tb_alignments'] = _make_tb_alignments(options)

    with open(infile, 'r') as in_fh, open(outfile, 'w') as out_fh:
        tb = toolbox.read_toolbox_file(in_fh)
        igts = toolbox_igts(tb, options)
        xc = XigtCorpus(igts=igts, mode='transient')
        xigtxml.dump(out_fh, xc)