示例#1
0
    def __init__(self, argv=sys.argv):
        op = get_option_parser()
        op.add_option("-l", "--log", dest="log", metavar="FILE", help="log file")
        op.add_option("-q", "--queue", dest="queue", metavar="FILE", help="queue file")
        op.add_option(
            "-Q", "--no-queue", dest="noqueue", action="store_true", help="do not use backup queue", default=False
        )
        op.add_option("-c", "--cnf", dest="cnf", metavar="FILE", help="MySQL cnf file")
        op.add_option("-L", "--language", dest="lang", metavar="STR", help="language code")
        opts, args = op.parse_args(argv)

        if opts.lang is not None:
            self.LANG = opts.lang
        if opts.cnf:
            if os.path.exists(opts.cnf):
                self.cnf = os.path.realpath(opts.cnf)
                db = Database(self.cnf)
            else:
                raise ValueError("cnf file not found: " + opts.cnf)
        else:
            self.cnf = os.path.expanduser("~/.my.cnf")
            if os.path.exists(self.cnf):
                db = Database(self.cnf)
            else:
                raise ValueError("no cnf file found; use -c option")

        site = None
        group = None

        m = re.match(r"^[gtdhHc](?:([0-9]+)(?:-([0-9]+)(?:-([0-9]+))?)?)$", sys.argv[0].split("/")[-1])
        if m:

            def f(x):
                if x:
                    return int(x)
                else:
                    return None

            sid, gid, tid = map(f, m.groups())
        else:
            sid = gid = tid = None

        if opts.site:
            site = db.getSite(opts.site)
        if site is None and opts.sid:
            site = db.getSite(sid=opts.sid)
        if site is None and sid:
            site = db.getSite(sid=sid)

        if site is None:
            raise ValueError("invalid site or site is not specified")

        if opts.group:
            group = site.getGroup(opts.group)
            if group is None:
                raise ValueError("invalid group: " + opts.group)
        if group is None and opts.gid:
            group = site.getGroup(gid=opts.gid)
            if group is None:
                raise ValueError("invalid group id: %d" % opts.gid)
        if group is None and gid:
            group = site.getGroup(gid=gid)
            if group is None:
                raise ValueError("invalid group id: %s" % gid)
        if group and opts.lang is None and group.lang:
            self.LANG = group.lang
        if not opts.noqueue:
            if opts.queue:
                WorkerBaseQ.__init__(self, opts.queue)
                self.QUEUE_FILE = opts.queue
            elif m:
                fn = m.group(0)
                if not sid:
                    fn += "%d" % site.id
                if not gid and group:
                    fn += "-%d" % group.id
                fn += ".q"
                WorkerBaseQ.__init__(self, fn)
                self.QUEUE_FILE = fn
            else:
                fn = os.path.basename(sys.argv[0]) + "-%d" % site.id
                if group:
                    fn += "-%d" % group.id
                fn += ".q"
                WorkerBaseQ.__init__(self, fn)
                self.QUEUE_FILE = fn
        else:
            WorkerBaseQ.__init__(self)
            self.QUEUE_FILE = None

        if opts.log:
            self.LOG_FILE = opts.log
        else:
            self.LOG_FILE = os.path.basename(sys.argv[0]) + ".log"

        # try to get group info from the top url store in the queue
        if not group:
            topurl = self.top()
            if hasattr(topurl, "gid"):
                group = site.getGroup(gid=topurl.gid)

        self.db = db
        self.site = site
        self.group = group
        self.opts = opts
        self.args = args
示例#2
0
import os
import re
import mycurl
from mycurl.db import Database
import MySQLdb
import time, datetime

python = sys.executable

db = Database('~/.my.cnf')

HTMLDIR = mycurl.MYCURL_POOL_HTML
SGMDIR = mycurl.MYCURL_POOL_SGM
VALIDATOR = '/ldc/projects/GALE/tools/bin/validateWebSgm'

op = mycurl.get_option_parser()
op.add_option("-L", "--language", dest="language", metavar="STR",
              help="language", nargs=1)
op.add_option("-d", "--first-date", dest="firstdate", metavar="STR",
              help="first post date (YYYY-MM-DD)", nargs=1)
op.add_option("-u", "--last-date", dest="lastdate", metavar="STR",
              help="last post date (YYYY-MM-DD)", nargs=1)
op.add_option("-b", "--base-dir", dest="base", metavar="DIR",
              help="webcol base directory", nargs=1)
op.add_option("-t", "--test", dest="test", action="store_true",
              default=False, help="just test")
op.remove_option("-g")
op.remove_option("-s")
opts, args = op.parse_args(sys.argv)

if opts.base: