def run(self, args, opts): self.set_pages(opts.pages) try: opts.spargs = arglist_to_dict(opts.spargs) except ValueError: raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False) self.settings.set('GOOD_ONLY', opts.good_only) self.settings.set('SEE_LZ', opts.see_lz) if opts.filter: try: opts.filter = eval('filter.' + opts.filter) except: raise UsageError("Invalid filter function name!") self.settings.set("FILTER", opts.filter) cfg = config.config() if len(args) >= 3: raise UsageError("Too many arguments!") for i in range(len(args)): if isinstance(args[i], bytes): args[i] = args[i].decode("utf8") if not 'MYSQL_PORT' in cfg.config.keys(): cfg.config['MYSQL_PORT'] = 3306 self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST']) self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER']) self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD']) self.settings.set('MYSQL_PORT', cfg.config['MYSQL_PORT']) tbname = cfg.config['DEFAULT_TIEBA'] if len(args) >= 1: tbname = args[0] dbname = None if tbname in cfg.config['MYSQL_DBNAME'].keys(): dbname = cfg.config['MYSQL_DBNAME'][tbname] if len(args) >= 2: dbname = args[1] cfg.config['MYSQL_DBNAME'][tbname] = dbname if not dbname: raise UsageError("Please input database name!") self.settings.set('TIEBA_NAME', tbname, priority='cmdline') self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline') config.init_database(cfg.config['MYSQL_HOST'], cfg.config['MYSQL_USER'], cfg.config['MYSQL_PASSWD'], cfg.config['MYSQL_PORT'], dbname) log = config.log(tbname, dbname, self.settings['BEGIN_PAGE'], opts.good_only, opts.see_lz) self.settings.set('SIMPLE_LOG', log) self.crawler_process.crawl('tieba', **opts.spargs) self.crawler_process.start() cfg.save()
def run(self, args, opts): self.set_pages(opts.pages) self.settings.set('GOOD_ONLY', opts.good_only) self.settings.set('SEE_LZ', opts.see_lz) if opts.filter: try: opts.filter = eval('filter.' + opts.filter) except: raise UsageError("Invalid filter function name!") self.settings.set("FILTER", opts.filter) cfg = config.config() if len(args) >= 3: raise UsageError("Too many arguments!") self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST']) self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER']) self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD']) tbname = cfg.config['DEFAULT_TIEBA'] if len(args) >= 1: tbname = args[0] if isinstance(tbname, unicode): tbname = tbname.encode('utf8') dbname = None for key in cfg.config['MYSQL_DBNAME'].keys(): if key.encode('utf8') == tbname: dbname = cfg.config['MYSQL_DBNAME'][key] if len(args) >= 2: dbname = args[1] cfg.config['MYSQL_DBNAME'][tbname.decode('utf8')] = dbname if not dbname: raise UsageError("Please input database name!") self.settings.set('TIEBA_NAME', tbname, priority='cmdline') self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline') config.init_database(cfg.config['MYSQL_HOST'], cfg.config['MYSQL_USER'], cfg.config['MYSQL_PASSWD'], dbname) log = config.log(tbname, dbname, self.settings['BEGIN_PAGE'], opts.good_only, opts.see_lz) self.settings.set('SIMPLE_LOG', log) self.crawler_process.crawl('tieba', **opts.spargs) self.crawler_process.start() cfg.save()
def run(self, args, opts): if len(args) >= 3: raise UsageError() cfg = config.config() self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST']) self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER']) self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD']) tbname = cfg.config['DEFAULT_TIEBA'] if len(args) >= 1: tbname = args[0] if isinstance(tbname, unicode): tbname = tbname.encode('utf8') dbname = None for i in cfg.config['MYSQL_DBNAME'].keys(): if i.encode('utf8') == tbname: dbname = cfg.config['MYSQL_DBNAME'][i] if len(args) >= 2: dbname = args[1] cfg.config['MYSQL_DBNAME'][tbname.decode('utf8')] = dbname if not dbname: raise UsageError() self.settings.set('TIEBA_NAME', tbname, priority='cmdline') self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline') self.settings.set('MAX_PAGE', opts.page, priority='cmdline') config.init_database(cfg.config['MYSQL_HOST'], cfg.config['MYSQL_USER'], cfg.config['MYSQL_PASSWD'], dbname) log = config.log(tbname, dbname) self.settings.set('SIMPLE_LOG', log) self.crawler_process.crawl('tieba', **opts.spargs) self.crawler_process.start() cfg.save()
def run(self, args, opts): self.set_pages(opts.pages) if opts.filter: try: opts.filter = eval('filter.' + opts.filter) except: raise UsageError("Invalid filter function name!") self.settings.set("FILTER", opts.filter) cfg = config.config() if len(args) >= 3: raise UsageError("Too many arguments!") self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST']) self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER']) self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD']) self.settings.set('MYSQL_USE_SSL', cfg.config['MYSQL_USE_SSL']) self.settings.set('MYSQL_SSL_CHECK_HOSTNAME', cfg.config['MYSQL_SSL_CHECK_HOSTNAME']) self.settings.set('MYSQL_SSL_CA_PATH', cfg.config['MYSQL_SSL_CA_PATH']) tbname = cfg.config['DEFAULT_TIEBA'] if len(args) >= 1: tbname = args[0] if isinstance(tbname, str): tbname = tbname.encode('utf8') dbname = None for key in cfg.config['MYSQL_DBNAME'].keys(): if key.encode('utf8') == tbname: dbname = cfg.config['MYSQL_DBNAME'][key] if len(args) >= 2: dbname = args[1] cfg.config['MYSQL_DBNAME'][tbname.decode('utf8')] = dbname if not dbname: raise UsageError("Please input database name!") self.settings.set('TIEBA_NAME', tbname, priority='cmdline') self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline') use_ssl = False ssl_check_hostname = False if cfg.config['MYSQL_USE_SSL'] == 'True': use_ssl = True if cfg.config['MYSQL_SSL_CHECK_HOSTNAME'] == 'False': ssl_check_hostname = False else: ssl_check_hostname = True config.init_database(cfg.config['MYSQL_HOST'],\ cfg.config['MYSQL_USER'], cfg.config['MYSQL_PASSWD'], dbname,\ use_ssl = use_ssl, ssl_check_hostname = ssl_check_hostname,\ ssl_ca = cfg.config['MYSQL_SSL_CA_PATH'], spider_type='pantip') log = config.log(tbname, dbname, self.settings['BEGIN_PAGE']) self.settings.set('SIMPLE_LOG', log) self.crawler_process.crawl('pantip', **opts.spargs) self.crawler_process.start() cfg.save()