def main(args, opts): global Opts Opts = opts global Char Char = '' global Lineno Lineno = 1 global KW jdb.KW = KW = jdb.Kwds(jdb.std_csv_dir()) if opts.l: opts.l = open(opts.l, "w", encoding=opts.e) else: opts.l = sys.stderr if not opts.o: fn = (os.path.split(args[0]))[1] fn = (os.path.splitext(fn))[0] opts.o = fn + ".pgi" elif opts.o == "-": opts.o = None if opts.g: langs = [KW.LANG[iso639_1_to_2[x]].id for x in opts.g.split(',')] else: langs = None workfiles = pgi.initialize(opts.t) srcdate = parse_xmlfile(args[0], 4, workfiles, opts.b, opts.c, langs) srcrec = jdb.Obj(id=4, kw='kanjidic', descr='kanjidic2.xml', dt=srcdate, seq='seq_kanjidic', srct=KW.SRCT['kanjidic'].id) pgi.wrcorp(srcrec, workfiles) pgi.finalize(workfiles, opts.o, not opts.k) print("\nDone!", file=sys.stderr)
def main (args, opts): global msg global Opts; Opts = opts global KW; jdb.KW = KW = jdb.Kwds (jdb.std_csv_dir()) # Create a globally accessible function, msg() that has # has 'logfile' and 'opts.verbose' already bound and # which will be called elsewhere when there is a need to # write a message to the logfile. logfile = sys.stderr if opts.logfile: logfile = open (opts.logfile, "w", encoding=opts.encoding) def msg (message): _msg (logfile, opts.verbose, message) fin = ABPairReader (args[0], encoding='utf-8') # FIXME: following gives localtime, change to utc or lt+tz. mtime = datetime.date.fromtimestamp(os.stat(args[0])[8]) corpid, corprec \ = pgi.parse_corpus_opt (opts.corpus, "examples", mtime, KW.SRCT['examples'].id) tmpfiles = pgi.initialize (opts.tempdir) if not opts.noaction: tmpfiles = pgi.initialize (opts.tempdir) if corprec: pgi.wrcorp (corprec, tmpfiles) for eid, entr in enumerate (parse_ex (fin, opts.begin)): if not opts.noaction: entr.src = corpid jdb.setkeys (entr, eid+1) pgi.wrentr (entr, tmpfiles) if not (eid % 2000): sys.stdout.write ('.'); sys.stdout.flush() if opts.count and eid+1 >= opts.count: break sys.stdout.write ('\n') if not opts.noaction: pgi.finalize (tmpfiles, opts.output, not opts.keep)
def main(args, opts): global KW if opts.database: jdb.dbOpen(opts.database, **jdb.dbopts(opts)) KW = jdb.KW else: jdb.KW = KW = jdb.Kwds(jdb.std_csv_dir()) xlang = None if opts.lang: xlang = [KW.LANG[x].id for x in opts.lang.split(',')] #FIXME: we open the xml file with utf-8 encoding even though # its encoding may be given within the file and may be different. inpf = jmxml.JmdictFile(open(args[0], encoding='utf-8')) tmpfiles = pgi.initialize(opts.tempdir) if not opts.logfile: logfile = sys.stderr else: logfile = open(opts.logfile, "w", encoding=opts.encoding) eid = 0 jmparser = jmxml.Jmparser(KW, logfile=logfile) for typ, entr in jmparser.parse_xmlfile(inpf, opts.begin, opts.count, opts.extract, xlang, toptag=True, seqnum_init=opts.sequence[0], seqnum_incr=opts.sequence[1]): if typ == 'entry': eid += 1 if not ((eid - 1) % 1800): sys.stdout.write('.') sys.stdout.flush() logfile.flush() if not getattr(entr, 'src', None): entr.src = corpid jdb.setkeys(entr, eid) pgi.wrentr(entr, tmpfiles) elif typ == 'corpus': pgi.wrcorp(entr, tmpfiles) elif typ == 'grpdef': pgi.wrgrpdef(entr, tmpfiles) elif typ == 'root': # Note that 'entr' here is actually the tag name of the # top-level element in the xml file, typically either # "JMdict" or "JMnedict". try: corpid, corprec \ = pgi.parse_corpus_opt (opts.corpus, entr, inpf.created, kw=KW) except KeyError: pass else: if corprec: pgi.wrcorp(corprec, tmpfiles) sys.stdout.write('\n') pgi.finalize(tmpfiles, opts.output, not opts.keep)
def main(args, opts): m = {'vol': 'sndvol', 'sel': 'sndfile', 'clip': 'snd'} inpf = jmxml.JmdictFile(open(args[0])) workfiles = pgi.initialize(opts.tempdir) snd_iter = jmxml.parse_sndfile(inpf) for obj, typ, lineno in snd_iter: pgi._wrrow(obj, workfiles[m[typ]]) pgi.finalize(workfiles, args[1], delfiles=(not opts.keep), transaction=True)