Пример #1
0
    def run(self, args, opts):
        self.set_pages(opts.pages)
        try:
            opts.spargs = arglist_to_dict(opts.spargs)
        except ValueError:
            raise UsageError("Invalid -a value, use -a NAME=VALUE",
                             print_help=False)
        self.settings.set('GOOD_ONLY', opts.good_only)
        self.settings.set('SEE_LZ', opts.see_lz)
        if opts.filter:
            try:
                opts.filter = eval('filter.' + opts.filter)
            except:
                raise UsageError("Invalid filter function name!")
        self.settings.set("FILTER", opts.filter)
        cfg = config.config()
        if len(args) >= 3:
            raise UsageError("Too many arguments!")

        for i in range(len(args)):
            if isinstance(args[i], bytes):
                args[i] = args[i].decode("utf8")

        if not 'MYSQL_PORT' in cfg.config.keys():
            cfg.config['MYSQL_PORT'] = 3306

        self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST'])
        self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER'])
        self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD'])
        self.settings.set('MYSQL_PORT', cfg.config['MYSQL_PORT'])

        tbname = cfg.config['DEFAULT_TIEBA']
        if len(args) >= 1:
            tbname = args[0]

        dbname = None
        if tbname in cfg.config['MYSQL_DBNAME'].keys():
            dbname = cfg.config['MYSQL_DBNAME'][tbname]
        if len(args) >= 2:
            dbname = args[1]
            cfg.config['MYSQL_DBNAME'][tbname] = dbname
        if not dbname:
            raise UsageError("Please input database name!")

        self.settings.set('TIEBA_NAME', tbname, priority='cmdline')
        self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline')

        config.init_database(cfg.config['MYSQL_HOST'],
                             cfg.config['MYSQL_USER'],
                             cfg.config['MYSQL_PASSWD'],
                             cfg.config['MYSQL_PORT'], dbname)

        log = config.log(tbname, dbname, self.settings['BEGIN_PAGE'],
                         opts.good_only, opts.see_lz)
        self.settings.set('SIMPLE_LOG', log)
        self.crawler_process.crawl('tieba', **opts.spargs)
        self.crawler_process.start()

        cfg.save()
Пример #2
0
    def run(self, args, opts):
        self.set_pages(opts.pages)
        self.settings.set('GOOD_ONLY', opts.good_only)
        self.settings.set('SEE_LZ', opts.see_lz)
        if opts.filter:
            try:
                opts.filter = eval('filter.' + opts.filter)
            except:
                raise UsageError("Invalid filter function name!")
        self.settings.set("FILTER", opts.filter)
        cfg = config.config()
        if len(args) >= 3:
            raise UsageError("Too many arguments!")

        self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST'])
        self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER'])
        self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD'])

        tbname = cfg.config['DEFAULT_TIEBA']
        if len(args) >= 1:
            tbname = args[0]
        if isinstance(tbname, unicode):
            tbname = tbname.encode('utf8')

        dbname = None
        for key in cfg.config['MYSQL_DBNAME'].keys():
            if key.encode('utf8') == tbname:
                dbname = cfg.config['MYSQL_DBNAME'][key]
        if len(args) >= 2:
            dbname = args[1]
            cfg.config['MYSQL_DBNAME'][tbname.decode('utf8')] = dbname
        if not dbname:
            raise UsageError("Please input database name!")

        self.settings.set('TIEBA_NAME', tbname, priority='cmdline')
        self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline')

        config.init_database(cfg.config['MYSQL_HOST'],
                             cfg.config['MYSQL_USER'],
                             cfg.config['MYSQL_PASSWD'], dbname)

        log = config.log(tbname, dbname, self.settings['BEGIN_PAGE'],
                         opts.good_only, opts.see_lz)
        self.settings.set('SIMPLE_LOG', log)

        self.crawler_process.crawl('tieba', **opts.spargs)
        self.crawler_process.start()

        cfg.save()
Пример #3
0
 def run(self, args, opts):
     if len(args) >= 3:
         raise UsageError()
     cfg = config.config()
     self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST'])
     self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER'])
     self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD'])
     
     tbname = cfg.config['DEFAULT_TIEBA']
     if len(args) >= 1:
         tbname = args[0]
     if isinstance(tbname, unicode):
         tbname = tbname.encode('utf8')
         
     dbname = None    
     for i in cfg.config['MYSQL_DBNAME'].keys():
         if i.encode('utf8') == tbname:
             dbname = cfg.config['MYSQL_DBNAME'][i]
     if len(args) >= 2:
         dbname = args[1]
         cfg.config['MYSQL_DBNAME'][tbname.decode('utf8')] = dbname
     if not dbname:
         raise UsageError()
         
     self.settings.set('TIEBA_NAME', tbname, priority='cmdline')
     self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline')
     self.settings.set('MAX_PAGE', opts.page, priority='cmdline')
     
     config.init_database(cfg.config['MYSQL_HOST'], cfg.config['MYSQL_USER'], cfg.config['MYSQL_PASSWD'], dbname)
     
     log = config.log(tbname, dbname)
     self.settings.set('SIMPLE_LOG', log)
     
     self.crawler_process.crawl('tieba', **opts.spargs)
     self.crawler_process.start()
     
     cfg.save()
Пример #4
0
    def run(self, args, opts):
        self.set_pages(opts.pages)
        if opts.filter:
            try:
                opts.filter = eval('filter.' + opts.filter)
            except:
                raise UsageError("Invalid filter function name!")
        self.settings.set("FILTER", opts.filter)
        cfg = config.config()
        if len(args) >= 3:
            raise UsageError("Too many arguments!")

        self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST'])
        self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER'])
        self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD'])
        self.settings.set('MYSQL_USE_SSL', cfg.config['MYSQL_USE_SSL'])
        self.settings.set('MYSQL_SSL_CHECK_HOSTNAME',
                          cfg.config['MYSQL_SSL_CHECK_HOSTNAME'])
        self.settings.set('MYSQL_SSL_CA_PATH', cfg.config['MYSQL_SSL_CA_PATH'])

        tbname = cfg.config['DEFAULT_TIEBA']
        if len(args) >= 1:
            tbname = args[0]
        if isinstance(tbname, str):
            tbname = tbname.encode('utf8')

        dbname = None
        for key in cfg.config['MYSQL_DBNAME'].keys():
            if key.encode('utf8') == tbname:
                dbname = cfg.config['MYSQL_DBNAME'][key]
        if len(args) >= 2:
            dbname = args[1]
            cfg.config['MYSQL_DBNAME'][tbname.decode('utf8')] = dbname
        if not dbname:
            raise UsageError("Please input database name!")

        self.settings.set('TIEBA_NAME', tbname, priority='cmdline')
        self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline')

        use_ssl = False
        ssl_check_hostname = False

        if cfg.config['MYSQL_USE_SSL'] == 'True':
            use_ssl = True

        if cfg.config['MYSQL_SSL_CHECK_HOSTNAME'] == 'False':
            ssl_check_hostname = False
        else:
            ssl_check_hostname = True

        config.init_database(cfg.config['MYSQL_HOST'],\
            cfg.config['MYSQL_USER'], cfg.config['MYSQL_PASSWD'], dbname,\
            use_ssl = use_ssl, ssl_check_hostname = ssl_check_hostname,\
            ssl_ca = cfg.config['MYSQL_SSL_CA_PATH'], spider_type='pantip')

        log = config.log(tbname, dbname, self.settings['BEGIN_PAGE'])
        self.settings.set('SIMPLE_LOG', log)

        self.crawler_process.crawl('pantip', **opts.spargs)
        self.crawler_process.start()

        cfg.save()