示例#1
0
__author__ = 'AmirPouya'
import standard_xml
import argparse

parser = argparse.ArgumentParser('python LDC2XML.py')
parser.add_argument('--input', '-i', required=True)
parser.add_argument('--output', '-o', required=True)
parser.add_argument('--cname', required=True, default='corpus')
parser.add_argument('--srclang', required=True, default='ar')
parser.add_argument('-trglang', required=True, default='en')
args = parser.parse_args()

file = args.input
out_file = args.output
config = {}
config['corpus_name'] = args.cname
config['source_lang'] = args.srclang
config['target_lang'] = args.trglang

dics = standard_xml.read_cdecformat(file)
standard_xml.write(dics[0], dics[1], out_file, config)
示例#2
0
#target_file=sys.argv[2]

parser = argparse.ArgumentParser('python LDC2XML.py')
parser.add_argument('--source', '-s', required=True)
parser.add_argument('--target', '-t', required=True)
parser.add_argument('--output', '-o', required=True)
parser.add_argument('--cname', required=True, default='corpus')
parser.add_argument('--srclang', required=True, default='ar')
parser.add_argument('-trglang', required=True, default='en')
args = parser.parse_args()

source_file = args.source
target_file = args.target
out_file = args.output
config = {}
config['corpus_name'] = args.cname
config['source_lang'] = args.srclang
config['target_lang'] = args.trglang

xml_parser = ET.XMLParser(recover=True,
                          encoding='utf-8',
                          resolve_entities=True)
source_tree = ET.parse(source_file, xml_parser)
target_tree = ET.parse(target_file, xml_parser)

source_root = source_tree.getroot()
target_root = target_tree.getroot()
source_dic = dic_builder.extract_dic(source_root)
target_dic = dic_builder.extract_dic(target_root)
standard_xml.write(source_dic, target_dic, out_file, config)
示例#3
0
import argparse
import standard_xml

parser=argparse.ArgumentParser('python RAW2XML.py')
parser.add_argument('--source','-s',required=True)
parser.add_argument('--target','-t',required=True)
parser.add_argument('--output','-o',required=True)
parser.add_argument('--cname',required=True,default='corpus')
parser.add_argument('--srclang',required=True,default='ar')
parser.add_argument('-trglang',required=True,default='en')
args=parser.parse_args()

source_file=args.source
target_file=args.target
out_file=args.output
config={}
config['corpus_name']=args.cname
config['source_lang']=args.srclang
config['target_lang']=args.trglang
print 'Start Building Dics:'
dics=standard_xml.read_rawtext(source_file,target_file)
print 'Start Writing XML'
standard_xml.write(dics[0],dics[1],out_file,config)


示例#4
0
parser.add_argument('--source','-s',required=True)
parser.add_argument('--target','-t',required=True)
parser.add_argument('--output','-o',required=True)
parser.add_argument('--cname',required=True,default='corpus')
parser.add_argument('--srclang',required=True,default='ar')
parser.add_argument('-trglang',required=True,default='en')
args=parser.parse_args()

source_file=args.source
target_file=args.target
out_file=args.output
config={}
config['corpus_name']=args.cname
config['source_lang']=args.srclang
config['target_lang']=args.trglang

xml_parser = ET.XMLParser(recover=True,encoding='utf-8',resolve_entities=True )
source_tree=ET.parse(source_file,xml_parser)
target_tree=ET.parse(target_file,xml_parser)


source_root=source_tree.getroot()
target_root=target_tree.getroot()
source_dic=dic_builder.extract_dic(source_root)
target_dic=dic_builder.extract_dic(target_root)
standard_xml.write(source_dic,target_dic,out_file,config)