Пример #1
0
def main(args, opts):
    global KW

    if opts.database:
        jdb.dbOpen(opts.database, **jdb.dbopts(opts))
        KW = jdb.KW
    else:
        jdb.KW = KW = jdb.Kwds(jdb.std_csv_dir())

    xlang = None
    if opts.lang:
        xlang = [KW.LANG[x].id for x in opts.lang.split(',')]

    #FIXME: we open the xml file with utf-8 encoding even though
    # its encoding may be given within the file and may be different.
    inpf = jmxml.JmdictFile(open(args[0], encoding='utf-8'))
    tmpfiles = pgi.initialize(opts.tempdir)
    if not opts.logfile: logfile = sys.stderr
    else: logfile = open(opts.logfile, "w", encoding=opts.encoding)
    eid = 0
    jmparser = jmxml.Jmparser(KW, logfile=logfile)
    for typ, entr in jmparser.parse_xmlfile(inpf,
                                            opts.begin,
                                            opts.count,
                                            opts.extract,
                                            xlang,
                                            toptag=True,
                                            seqnum_init=opts.sequence[0],
                                            seqnum_incr=opts.sequence[1]):
        if typ == 'entry':
            eid += 1
            if not ((eid - 1) % 1800):
                sys.stdout.write('.')
                sys.stdout.flush()
                logfile.flush()
            if not getattr(entr, 'src', None): entr.src = corpid
            jdb.setkeys(entr, eid)
            pgi.wrentr(entr, tmpfiles)
        elif typ == 'corpus':
            pgi.wrcorp(entr, tmpfiles)
        elif typ == 'grpdef':
            pgi.wrgrpdef(entr, tmpfiles)
        elif typ == 'root':
            # Note that 'entr' here is actually the tag name of the
            # top-level element in the xml file, typically either
            # "JMdict" or "JMnedict".
            try:                corpid, corprec \
                   = pgi.parse_corpus_opt (opts.corpus, entr, inpf.created, kw=KW)
            except KeyError:
                pass
            else:
                if corprec: pgi.wrcorp(corprec, tmpfiles)

    sys.stdout.write('\n')
    pgi.finalize(tmpfiles, opts.output, not opts.keep)
Пример #2
0
def main(args, opts):
    # Open the database.  jdb.dbopts() extracts the db-related
    # options from the command line options in 'opts'.
    cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    stats = [jdb.KW.STAT['D'].id] if opts.deleted else []
    if opts.rejected: stats.append(jdb.KW.STAT['R'].id)
    entries, tmptbl = find_entries(cur, stats, opts.corpus,
                                   str(opts.age) + ' days')
    if opts.outfile:
        outf = open_outfile(opts.outfile)
        write_log(entries, outf)
    if opts.verbose:
        for e in entries:
            ts = e._hist[-1].dt.isoformat(' ')[:10]
            # FIXME: Want seq number in output, but edfmt.entr() should
            #  provide it like wwwjdic, following the last gloss, not us.
            print >> sys.stderr, "[%s,%s] %s %s" % (e.src, e.id, ts,
                                                    edfmt.entr(e))
    cur.execute("BEGIN")
    delcnt = del_entries(cur, tmptbl)
    if opts.noaction: cur.execute("ROLLBACK")
    else: cur.execute("COMMIT")
    if opts.verbose:
        print >>sys.stderr, "%d entries read, %d entries deleted"\
                             % (len(entries), delcnt)
        if opts.noaction:
            print >> sys.stderr, "%d deleted rolled back" % delcnt
Пример #3
0
def main(args, opts):
    global KW, tokens, Encoding

    Encoding = opts.encoding or sys.getdefaultencoding()
    cur = jdb.dbOpen('jmdict')
    # Get local ref to the keyword tables...
    KW = jdb.KW

    lexer, tokens = jellex.create_lexer(debug=opts.debug >> 8)
    parser = jelparse.create_parser(lexer, tokens, tabmodule='jelparse_tab')
    parser.debug = opts.debug

    if opts.seq:
        seq = opts.seq
        #FIXME: Corpid (used for xref resolution) is hardwired
        # to 1 (jmdict) below.
        srctxt, parsedtxt = _roundtrip(cur, lexer, parser, seq, 1)
        if not srctxt:
            print("Entry %s not found" % seq)
        else:
            print(srctxt)
            print("----")
            print(parsedtxt)
    else:
        _interactive(cur, lexer, parser)
Пример #4
0
def main(args, opts):
    KW = jdb.Kwds()
    if opts.database:
        cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
        KW.loaddb(cur)
    for dir in args:
        KW.loadcsv(dir)
    gen_module(KW)
Пример #5
0
def main():
    cur = jdb.dbOpen('jmnew')
    entrs, data = jdb.entrList(cur, [542], ret_tuple=True)
    jdb.augment_xrefs(cur, data['xref'])
    jdb.augment_xrefs(cur, data['xref'], rev=1)
    markup_xrefs(cur, data['xref'])
    for e in entrs:
        txt = entr(e)
        print(txt)
Пример #6
0
def main(cmdargs=sys.argv):
    # Parse command line arguments.
    args = parse_cmdline(cmdargs)

    # Open a database connection.
    cur = jdb.dbOpen(None, **jdb.parse_pguri(args.database))

    # Parse the input command file.  The result is a list of
    #  3-tuples of seq, src, edits.  'edits' in turn is a list
    #  of Cmd() instances that describe a sequence of changes
    #  to be made to the entry identified by seq,src.
    #  parse_cmdfile() returns None if any errors occured.
    if not args.filename: f = sys.stdin
    else: f = open(args.filename)
    cmds = parse_cmdfile(f, args.corpus)
    if f != sys.stdin: f.close()
    if cmds is None:
        L.error("Exiting due to errors in input file")
        return

    # Now go through 'cmds' and make the requested
    # change to each entry.  Changes are commited individually
    # and any failed attempts will result in some flavor of
    # UpdateError, which we catch and print, then continue
    # with the next entry.
    done = 0
    for seq, src, edits in cmds:
        L.info("Modifying seq# %s, src %s" % (seq, src))
        try:
            entr = getentry(cur, seq, src)
        except UpdateError as e:
            L.error(e)
            continue
        hist = jdb.Hist(name=args.name,
                        email=args.email,
                        notes=args.comment,
                        refs=args.refs,
                        userid=args.userid)
        for edit in edits:
            try:
                doedit(entr, hist, edit)
            except UpdateError as e:
                L.error(e)
                break
        else:  # Executed if the for-loop exits normally (not via 'break').
            entr._hist.append(hist)
            try:
                submit(cur, entr, args.userid, args.noaction)
            except UpdateError as e:
                L.error(e)
            else:
                done += 1
    if len(cmds) != done:
        L.error("%d entries not updated due to errors" % (len(cmds) - done))
    noactmsg = "not updated due to --noaction option" if args.noaction else "updated"
    L.info("%d entries %s" % (done, noactmsg))
Пример #7
0
def main(args, opts):
    # The following call creates a database "cursor" that will
    # be used for subsequent database operations.  It also, as
    # a side-effect, create a global variable in module 'jdb'
    # named 'KW' which contains data read from all the keyword
    # database tables (tables with names matchingthe pattern
    # "kw*".  We read this data once at program startup to avoid
    # multiple hi-cost trips to the database later.
    try:
        cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    except jdb.dbapi.OperationalError as e:
        print("Error, unable to connect to database, do you need -u or -p?\n",
              str(e),
              file=sys.stderr)
        sys.exit(1)

    enc = opts.encoding or sys.stdout.encoding or 'utf-8'
    jdb.reset_encoding(sys.stdout, encoding=enc)

    # Get the command line options and convert them into a sql
    # statement that will find the desired entries.
    sql, sqlargs = opts2sql(args, opts)
    if opts.debug:
        print(("%s  %s" % (sql, repr(sqlargs))))

    # Retrieve the entries from the database.  'entrs' will be
    # set to a list on entry objects.  'raw' is set to dictionary,
    # keyed by table name, and with values consisting of all the
    # rows retrieved from that table.
    entrs, raw = jdb.entrList(cur, sql, sqlargs, ret_tuple=True)

    # Any xrefs in the retrieved entry objects contain contain only
    # the entry id numbers of the referenced entries.  We want to be
    # able to show the refernced entriy's kanji, glosses, etc so we
    # call "augment_xrefs" to get this extra information.  Same for
    # any reverse refrerences.
    jdb.augment_xrefs(cur, raw['xref'])
    jdb.augment_xrefs(cur, raw['xrer'], rev=1)
    jdb.add_xsens_lists(raw['xref'])
    jdb.mark_seq_xrefs(cur, raw['xref'])

    # Now all we have to do is print the entries.
    first = True
    for e in entrs:
        # Format the entry for printing, according to the
        # kind of out put the user requested.
        if opts.jel: txt = fmtjel.entr(e)
        else: txt = fmt.entr(e)

        # Print the formatted entry using the requested encoding
        # and inserting a blank line between entries.
        if not first: print()
        print(txt)
        first = False

    if len(entrs) == 0: print("No entries found")
Пример #8
0
def globalSetup():
    global Cursor
    # Get login credentials from dbauth.py if possible.
    try:
        import dbauth
        kwargs = dbauth.auth
    except ImportError:
        kwargs = {'database': 'jmdict'}
    kwargs['autocommit'] = True
    Cursor = jdb.dbOpen(None, **kwargs)
Пример #9
0
def get_max_ids(opts):
    # Get and return 1 + the max values of entr.id found in the
    # database defined by the connection parameters we were called
    # with.
    dbh = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    sql = "SELECT 1+COALESCE((SELECT MAX(id) FROM entr),0) AS entr"
    dbh.execute(sql)
    rs = dbh.fetchall()
    dbh.close()
    return rs[0][0]
Пример #10
0
def main (args, opts):
        jdb.reset_encoding (sys.stdout, opts.encoding)
          # Open the database.  jdb.dbopts() extracts the db-related
          # options from the command line options in 'opts'.
        cur = jdb.dbOpen (opts.database, **jdb.dbopts (opts))
        for f in args:
            fname, ldata = getlabels (cur, f)
            if not fname: print ("No data for sound file '%s'" % str(f), file=sys.stderr)
            else:
                print (fname)
                for r in ldata:
                    strt = r.strt/100.0
                    print ("%f\t%f\t%s" % (strt, strt + r.leng/100.0, r.trns))
Пример #11
0
def _get_text_from_database(seq, src):
    cur = jdb.dbOpen('jmdict')
    KW = jdb.KW
    sql = "SELECT id FROM entr WHERE seq=%s AND src=%s"
    elist = jdb.entrList(cur, sql, [seq, src])
    if not elist:
        print("Entry %s not found" % seq)
        return
    entr = elist[0]
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    txt = fmtjel.entr(entr)
    txt = txt.partition('\n')[2]
    return txt
Пример #12
0
def globalSetup():
    global Cur, KW, Lexer, Parser
    if Cur: return False
    try:
        import dbauth
        kwargs = dbauth.auth
    except ImportError:
        kwargs = {'database': 'jmdict'}
    kwargs['autocommit'] = True
    Cur = jdb.dbOpen(None, **kwargs)
    KW = jdb.KW
    Lexer, tokens = jellex.create_lexer()
    Parser = jelparse.create_parser(Lexer, tokens)
    return True
Пример #13
0
def _main(args, opts):
    cur = jdb.dbOpen('jmdict')
    while True:
        try:
            id = input("Id number? ")
        except EOFError:
            id = None
        if not id: break
        e, raw = jdb.entrList(cur, [int(id)], ret_tuple=True)
        jdb.augment_xrefs(cur, raw['xref'])
        if not e:
            print("Entry id %d not found" % id)
        else:
            txt = entr(e[0], compat=None)
            print(txt)
Пример #14
0
def main(args, opts):
    jdb.reset_encoding(sys.stdout, opts.encoding)
    dir = jdb.find_in_syspath("dtd-audio.xml")
    dtd = jdb.get_dtd(dir + "/" + "dtd-audio.xml", "JMaudio", opts.encoding)
    print(dtd)
    print("<JMaudio>")
    cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    vols = jdb.dbread(cur, "SELECT * FROM sndvol")
    for v in vols:
        print("\n".join(fmtxml.sndvols([v])))
        sels = jdb.dbread(cur, "SELECT * FROM sndfile s WHERE s.vol=%s",
                          [v.id])
        for s in sels:
            print("\n".join(fmtxml.sndsels([s])))
            clips = jdb.dbread(cur, "SELECT * FROM snd c WHERE c.file=%s",
                               [s.id])
            for c in clips:
                print("\n".join(fmtxml.sndclips([c])))
    print('</JMaudio>')
Пример #15
0
def main(args, opts):
    global Opts
    Opts = opts
    # Debugging flags:
    #  1 -- Print generated xref records.
    #  2 -- Print executed sql.
    #  4 -- Print info about read xresolve records.
    if opts.debug & 0x02: Debug.prtsql = True
    if opts.verbose: opts.keep = True

    try:
        dbh = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    except jdb.dbapi.OperationalError as e:
        perr("Error, unable to connect to database, do you need -u or -p?\n" %
             str(e))

    try:
        xref_src = get_src_ids(opts.source_corpus)
    except KeyError:
        perr("Unknown corpus: '%s'" % opts.source_corpus)
    try:
        targ_src = get_src_ids(opts.target_corpus)
    except KeyError:
        perr("Unknown corpus: '%s'" % opts.target_corpus)

    #FIXME: need to make work with multiple srcs in targ_src and
    # provide limiting the scope (eg to one src or an entry) of
    # the K/R pairs in the file.
    #krmap = read_krmap (dbh, opts.filename, targ_src)
    krmap = {}

    blksz = 1000
    for xresolv_rows in get_xresolv_block(dbh, blksz, xref_src):
        if not xresolv_rows: break
        resolv(dbh, xresolv_rows, targ_src, krmap)
        if opts.noaction:
            if opts.verbose: print("ROLLBACK")
            dbh.connection.rollback()
        else:
            if opts.verbose: print("COMMIT")
            dbh.connection.commit()
    dbh.close()
Пример #16
0
def main(args, opts):
    jdb.reset_encoding(sys.stdout, opts.encoding)
    # Open the database.  jdb.dbopts() extracts the db-related
    # options from the command line options in 'opts'.
    cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    fidnum = args[0]
    lbls = args[1]
    xx, rsb = labels_from_db(cur, fidnum)
    rsa = labels_from_file(lbls)
    update, nomatch = align(rsa, rsb, opts.quiet)
    updated = added = None
    ans = ask_action()
    if ans == 'i':
        updated, added = do_interactive(cur, fidnum, update, nomatch)
    elif ans == 'u':
        updated, added = do_noninteractive(cur, fidnum, update, nomatch)
    if updated is not None:
        pout("%d sound records updated" % updated)
    if added is not None:
        pout("%d sound records added" % added)
        for a in nomatch:
            if hasattr(a, 'id'):
                pout("Added: %d:(%d,%d,%s)" % (a.id, a.strt, a.leng, a.trns))
Пример #17
0
def main (args, opts):
        global Opts;  Opts = opts
        cur = jdb.dbOpen (opts.database, **jdb.dbopts(opts))

        checks = [
            #------ 1 -- Approved but edit of another entry.
            ("The following entries are \"approved\" (entr.unap is FALSE) \n"\
             "but are listed as being edits of another entry (entr.dfrm \n"\
             "is non-NULL):",

            "SELECT e.id "\
                "FROM entr e "\
                "WHERE NOT e.unap AND e.dfrm IS NOT NULL "\
                "ORDER BY e.id LIMIT %s" % LIMIT),

            #------ 2 -- Dfrm cycle.
            ("The following entries are part of a dfrm cycle:",

            "WITH RECURSIVE wt (id, dfrm, depth, path, cycle) AS ("\
                   "SELECT e.id, e.dfrm, 1, ARRAY[e.id], false "\
                   "FROM entr e "\
                     "UNION ALL "\
                   "SELECT e.id, e.dfrm, wt.depth+1, path||e.id, e.id=ANY(path) "\
                   "FROM entr e, wt "\
                   "WHERE e.id = wt.dfrm AND NOT cycle) "\
                "SELECT DISTINCT wt.id FROM wt WHERE cycle LIMIT %s" % LIMIT),

            #------ 3 -- Multiple A entries in seqset.
            ("More that one \"A\" (approved and active) entry is a \n"\
             "seqset.  Following are the src,seq numbers:",

                # FIXME: We have to exclude the Examples corpus (see IS-157)
                #  but there is no guarantee the its 'src' number is 3.
            "SELECT src, seq FROM entr e WHERE NOT unap and stat=2 AND src!=3 "\
                "GROUP BY src, seq HAVING count(*)>1 "\
                "ORDER BY src,seq  LIMIT %s" % LIMIT),

            #------ 4 -- Multiple src/seq in editset.
            ("Entry id's in editsets where some entries have different \n"\
             "corpus or seq# than others:",

            "SELECT e1.id, e2.id "\
                "FROM entr e1 "\
                "JOIN entr e2 ON e2.dfrm=e1.id "\
                "WHERE e1.src!=e2.src OR e1.seq!=e2.seq "\
                "ORDER BY e1.id,e2.id LIMIT %s" % LIMIT),

            #------ 5 -- JIS semicolon in gloss.
            ("Entries with a JIS semicolon in a gloss:",

                # Hex string is unicode codepoint of JIS semicolon.
            u"SELECT entr FROM gloss WHERE txt LIKE '%%\uFF1B%%' "\
                "ORDER BY entr LIMIT %s" % LIMIT),

            #------ 6 -- JIS space in gloss.
            ("Entries with a JIS space in a gloss:",

                # Hex string is unicode codepoint of JIS space.
            u"SELECT entr FROM gloss WHERE txt LIKE '%%\u3000%%' "\
                "ORDER BY entr LIMIT %s" % LIMIT),

            #------ 7 -- No readings.
            ("Entries with no readings:",

                # FIXME: We have to exclude the Examples corpus (since none of
                #  its entries have readings) but there is no guarantee that its
                #  'src' number is 3.
                # Don't bother reporting deleted or rejected entries.
            "SELECT e.id FROM entr e WHERE src!=3 AND stat=2 AND NOT EXISTS "\
                "(SELECT 1 FROM rdng r WHERE r.entr=e.id) "\
                "ORDER BY e.id LIMIT %s" % LIMIT),

            #------ 8 -- No senses.
            ("Entries with no senses:",

            "SELECT e.id FROM entr e WHERE NOT EXISTS "\
                "(SELECT 1 FROM sens s WHERE s.entr=e.id) "\
                "ORDER BY e.id LIMIT %s" % LIMIT),

            #------ 9 -- No glosses.
            ("Entries with glossless senses:",

            "SELECT e.id,s.sens FROM entr e JOIN sens s ON s.entr=e.id WHERE NOT EXISTS "\
                "(SELECT 1 FROM gloss g WHERE g.entr=s.entr AND g.sens=s.sens) "\
                "ORDER BY e.id,s.sens LIMIT %s" % LIMIT),

            #------ 10 -- No PoS.
            ("Entries with senses that have no PoS:",

                # FIXME: Poslessness is a bad thing only in jmdict corpora but
                #  but there is no way to identify such.  We'll take a guess that
                #  there is only one and its 'src' is 1.
            "SELECT e.id,s.sens FROM entr e JOIN sens s ON s.entr=e.id WHERE src=1 AND NOT EXISTS "\
                "(SELECT 1 FROM pos p WHERE p.entr=s.entr AND p.sens=s.sens) "\
                "ORDER BY e.id,s.sens LIMIT %s" % LIMIT),

            #------ 11 -- Non-sequential kanj numbers.
            ("Entries with kanj.kanj numbers that are not sequential or do "\
             "not start at one.",

            "SELECT entr FROM kanj "\
                "GROUP BY entr HAVING MIN(kanj)!=1 OR COUNT(*)!=MAX(kanj) "\
                "ORDER by entr LIMIT %s" % LIMIT),

            #------ 12 -- Non-sequential rdng numbers.
            ("Entries with rdng.rdng numbers that are not sequential or do "\
             "not start at one.",

            "SELECT entr FROM rdng "\
                "GROUP BY entr HAVING MIN(rdng)!=1 OR COUNT(*)!=MAX(rdng) "\
                "ORDER by entr LIMIT %s" % LIMIT),

            #------ 13 -- Non-sequential sens numbers.
            ("Entries with sens.sens numbers that are not sequential or do "\
             "not start at one.",

            "SELECT entr FROM sens "\
                "GROUP BY entr HAVING MIN(sens)!=1 OR COUNT(*)!=MAX(sens) "\
                "ORDER by entr LIMIT %s" % LIMIT),

            #------ 14 -- Non-sequential gloss numbers.
            ("Entries with gloss.gloss numbers that are not sequential or do "\
             "not start at one.",

            "SELECT entr,sens FROM gloss "\
                "GROUP BY entr,sens HAVING MIN(gloss)!=1 OR COUNT(*)!=MAX(gloss) "\
                "ORDER by entr,sens LIMIT %s" % LIMIT),

            #------ 15 -- Deleted or rejected without history.
            ("Deleted or rejected entries with no history.  These will not be "\
             "expunged by the usual maintenance scripts because with no history, "\
             "they have no \"age\".",

            "SELECT e.id FROM entr e WHERE stat IN (4,6)"\
                "AND NOT EXISTS (SELECT 1 FROM hist h WHERE h.entr=e.id) "\
                "ORDER by e.id LIMIT %s" % LIMIT),
            ]

        errs = ok = 0
        for n, (msg, sql) in enumerate (checks):
            if args and n+1 not in args: continue
            sqlargs = None   # See the comments in jb.dbread().
            bad = run_check (cur, "Check %d"%(n+1), msg, sql, sqlargs)
            if bad: errs += 1
            else: ok += 1
        if Opts.verbose: print "%d ok" % ok
        if Opts.verbose and errs: print "%d errors" % errs
        if errs: sys.exit(1)
Пример #18
0
def main(args, opts):
    global Debug
    Debug = opts.debug
    # Open the database.  jdb.dbopts() extracts the db-related
    # options from the command line options in 'opts'.
    cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))

    # If no "--root" option was supplied, choose a default based
    # on the value of the "--compat" option.
    if not opts.root:
        if opts.compat in ('jmnedict', 'jmneold'): opts.root = 'JMnedict'
        else: opts.root = 'JMdict'

    outf = None
    if not opts.nodtd:
        # Choose a dtd to use based on the "--compat" option.
        # The dtd file is expected to be located somewhere in the
        # pythonpath (sys.path) directories.
        if opts.compat == 'jmdict': dtd = "dtd-jmdict.xml"
        elif opts.compat == 'jmdicthist': dtd = "dtd-jmdict.xml"
        elif opts.compat == 'jmnedict': dtd = "dtd-jmnedict.xml"
        elif opts.compat == 'jmneold': dtd = "dtd-jmneold.xml"
        else: dtd = "dtd-jmdict-ex.xml"
        dir = jdb.find_in_syspath(dtd)
        dtdfn = dir + "/" + dtd  # Fully qualified dtd file name.

        # jdb.get_dtd() reads the dtd text, and replaces the root
        # element name name and encoding with the values supplied
        # in the arguments.
        dtdtxt = jdb.get_dtd(dtdfn, opts.root, opts.encoding)
        if len(args) == 0: outf = sys.stdout
        else: outf = open(args[0], "w")
        jdb.reset_encoding(outf, opts.encoding)
        outf.write(dtdtxt)

    if opts.seqfile:
        if opts.seqfile == '-': f = sys.stdin
        else: f = open(opts.seqfile)
        #FIXME: we should read these incrementally.
        entrlist = [int(x)
                    for x in f.read().split()]  # seq# separated by sp or nl.
        if f != sys.stdin: f.close()

    # Turn the "--corpus" option value into a string that can be
    # and'ed into a SQL WHERE clause to restrict the results to
    # the specified corpora.
    corp_terms = parse_corpus_opt(opts.corpus, 'e.src')

    # If the output file was not opened in the dtd section
    # above, open it now.  We postpose opening it until the
    # last possible moment to avoid creating it and then
    # bombing because there was a typo in the input or dtd
    # filename, etc.
    # FIXME: Should do a "write" function that opens the
    #  file just before writing.
    if not outf:
        if len(args) == 0: outf = sys.stdout
        else: outf = open(args[0], "w")

    whr_act = " AND NOT unap AND stat=" + str(
        jdb.KW.STAT['A'].id) if opts.compat else ""
    if opts.begin:
        # If a "--begin" sequence number was given, we need to read
        # the entr record so we can get the src id number.  Complain
        # and exit if not found.  Complain if more than one entry
        # with the requested seq number exists.  More than one may be
        # found since the same sequence number may exist in different
        # corpora, or in the same corpus if an entry was edited.
        #
        #FIXME: no way to select from multiple entries with same seq
        # number.  Might want just the stat="A" entries for example.
        sql = "SELECT id,seq,src FROM entr e WHERE seq=%s%s%s ORDER BY src" \
                % (int(opts.begin), corp_terms, whr_act)
        if Debug: print(sql, file=sys.stderr)
        start = time.time()
        rs = jdb.dbread(cur, sql)
        if Debug:
            print("Time: %s (init read)" % (time.time() - start),
                  file=sys.stderr)
        if not rs:
            print ("No entry with seq '%s' found" \
                                 % opts.begin, file=sys.stderr)
            sys.exit(1)
        if len(rs) > 1:
            print ("Multiple entries having seq '%s' found, results " \
                   "may not be as expected.  Consider using -s to " \
                   "restrict to a single corpus." % (opts.begin), file=sys.stderr)
        lastsrc, lastseq, lastid = rs[0].src, rs[0].seq, rs[0].id
    if not opts.begin and not opts.seqfile:
        # If no "--begin" option, remove the " AND" from the front of
        # the 'corp_terms' string.  Read the first entry (by seq number)
        # in the requested corpora.
        cc = corp_terms[4:] if corp_terms else 'True'
        # If compat (jmdict or jmnedict), restrict the xml to Active
        # entries only.
        sql = "SELECT id,seq,src FROM entr e WHERE %s%s ORDER BY src,seq LIMIT 1" % (
            cc, whr_act)
        start = time.time()
        if Debug: print(sql, file=sys.stderr)
        rs = jdb.dbread(cur, sql)
        if Debug:
            print("Time: %s (init read)" % (time.time() - start),
                  file=sys.stderr)
        lastsrc, lastseq, lastid = rs[0].src, rs[0].seq, rs[0].id

    # Add an enclosing root element only if we are also including
    # a DTD (ie, producing a full XML file).  Otherwise, the file
    # generated will just be a list of <entr> elements.
    if not opts.nodtd:
        if opts.compat:  # Add a date comment...
            today = time.strftime("%Y-%m-%d", time.localtime())
            outf.write("<!-- %s created: %s -->\n" % (opts.root, today))
        outf.write('<%s>\n' % opts.root)

    entrlist_loc = 0
    count = opts.count
    done = 0
    blksize = opts.blocksize
    corpora = set()

    while count is None or count > 0:

        if opts.seqfile:
            seqnums = tuple(entrlist[entrlist_loc:entrlist_loc + blksize])
            if not seqnums: break
            entrlist_loc += blksize
            #FIXME: need detection of non-existent seq#s.
            sql = "SELECT id FROM entr e WHERE seq IN %s" + corp_terms + whr_act
            sql_args = [seqnums]
            if Debug: print(sql, sql_args, file=sys.stderr)
            start = time.time()
            tmptbl = jdb.entrFind(cur, sql, sql_args)
        else:
            # In this loop we read blocks of 'blksize' entries.  Each
            # block read is ordered by entr src (i.e. corpus), seq, and
            # id.  The block to read is specified in WHERE clause which
            # is effectively:
            #   WHERE ((e.src=lastsrc AND e.seq=lastseq AND e.id>=lastid+1)
            #           OR (e.src=lastsrc AND e.seq>=lastseq)
            #           OR e.src>lastsrc)
            # and (lastsrc, lastseq, lastid) are from the last entry in
            # the last block read.

            whr = "WHERE ((e.src=%%s AND e.seq=%%s AND e.id>=%%s) " \
                          "OR (e.src=%%s AND e.seq>%%s) " \
                          "OR e.src>%%s) %s%s" % (corp_terms, whr_act)
            sql = "SELECT e.id FROM entr e" \
                  " %s ORDER BY src,seq,id LIMIT %d" \
                   % (whr, blksize if count is None else min (blksize, count))

            # The following args will be substituted for the "%%s" in
            # the sql above, in jbd.findEntr().
            sql_args = [lastsrc, lastseq, lastid, lastsrc, lastseq, lastsrc]

            # Create a temporary table of id numbers and give that to
            # jdb.entrList().  This is an order of magnitude faster than
            # giving the above sql directly to entrList().
            if Debug: print(sql, sql_args, file=sys.stderr)
            start = time.time()
            tmptbl = jdb.entrFind(cur, sql, sql_args)
        mid = time.time()
        entrs, raw = jdb.entrList(cur,
                                  tmptbl,
                                  None,
                                  ord="src,seq,id",
                                  ret_tuple=True)
        end = time.time()
        if Debug: print("read %d entries" % len(entrs), file=sys.stderr)
        if Debug:
            print("Time: %s (entrFind), %s (entrList)" %
                  (mid - start, end - mid),
                  file=sys.stderr)
        if not entrs: break
        write_entrs(cur, entrs, raw, corpora, opts, outf)

        # Update the 'last*' variables for the next time through
        # the loop.  Also, decrement 'count', if we are counting.
        lastsrc = entrs[-1].src
        lastseq = entrs[-1].seq
        lastid = entrs[-1].id + 1
        if count is not None: count -= blksize
        done += len(entrs)
        if not Debug: sys.stderr.write('.')
        else: print("%d entries written" % done, file=sys.stderr)
    if not opts.nodtd: outf.writelines('</%s>\n' % opts.root)
    if not Debug: sys.stderr.write('\n')
    print("Wrote %d entries" % done, file=sys.stderr)