def connect_db(ctx, param, value): if not value: return return utils.Get(lambda: connect_database(value))
def cli(ctx, **kwargs): """ A powerful spider system in python. """ if kwargs['add_sys_path']: sys.path.append(os.getcwd()) logging.config.fileConfig(kwargs['logging_config']) # get db from env for db in ('taskdb', 'projectdb', 'resultdb'): if kwargs[db] is not None: continue if os.environ.get('MYSQL_NAME'): kwargs[db] = utils.Get(lambda db=db: connect_database( 'sqlalchemy+mysql+%s://%s:%s/%s' % ( db, os.environ['MYSQL_PORT_3306_TCP_ADDR'], os.environ['MYSQL_PORT_3306_TCP_PORT'], db))) elif os.environ.get('MONGODB_NAME'): kwargs[db] = utils.Get(lambda db=db: connect_database( 'mongodb+%s://%s:%s/%s' % ( db, os.environ['MONGODB_PORT_27017_TCP_ADDR'], os.environ['MONGODB_PORT_27017_TCP_PORT'], db))) elif ctx.invoked_subcommand == 'bench': if kwargs['data_path'] == './data': kwargs['data_path'] += '/bench' shutil.rmtree(kwargs['data_path'], ignore_errors=True) os.mkdir(kwargs['data_path']) if db in ('taskdb', 'resultdb'): kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s://' % (db))) elif db in ('projectdb', ): kwargs[db] = utils.Get(lambda db=db: connect_database('local+%s://%s' % ( db, os.path.join(os.path.dirname(__file__), 'libs/bench.py')))) else: if not os.path.exists(kwargs['data_path']): os.mkdir(kwargs['data_path']) kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % ( db, kwargs['data_path'], db[:-2]))) kwargs['is_%s_default' % db] = True # create folder for counter.dump if not os.path.exists(kwargs['data_path']): os.mkdir(kwargs['data_path']) # message queue, compatible with old version if kwargs.get('message_queue'): pass elif kwargs.get('amqp_url'): kwargs['message_queue'] = kwargs['amqp_url'] elif os.environ.get('RABBITMQ_NAME'): kwargs['message_queue'] = ("amqp://*****:*****@%(RABBITMQ_PORT_5672_TCP_ADDR)s" ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ) elif kwargs.get('beanstalk'): kwargs['message_queue'] = "beanstalk://%s/" % kwargs['beanstalk'] for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result'): if kwargs.get('message_queue'): kwargs[name] = utils.Get(lambda name=name: connect_message_queue( name, kwargs.get('message_queue'), kwargs['queue_maxsize'])) else: kwargs[name] = connect_message_queue(name, kwargs.get('message_queue'), kwargs['queue_maxsize']) # phantomjs-proxy if kwargs.get('phantomjs_proxy'): pass elif os.environ.get('PHANTOMJS_NAME'): kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT_25555_TCP'][len('tcp://'):] # puppeteer-proxy if kwargs.get('puppeteer_proxy'): pass elif os.environ.get('PUPPETEER_NAME'): kwargs['puppeteer_proxy'] = os.environ['PUPPETEER_PORT_22222_TCP'][len('tcp://'):] ctx.obj = utils.ObjectDict(ctx.obj or {}) ctx.obj['instances'] = [] ctx.obj.update(kwargs) if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'): ctx.invoke(all) return ctx
def cli(ctx, **kwargs): """ A powerful spider system in python. """ logging.config.fileConfig(kwargs['logging_config']) # get db from env for db in ('taskdb', 'projectdb', 'resultdb'): if kwargs[db] is not None: continue if os.environ.get('MYSQL_NAME'): kwargs[db] = utils.Get( lambda db=db: connect_database('mysql+%s://%s:%s/%s' % ( db, os.environ['MYSQL_PORT_3306_TCP_ADDR'], os.environ[ 'MYSQL_PORT_3306_TCP_PORT'], db))) elif os.environ.get('MONGODB_NAME'): kwargs[db] = utils.Get( lambda db=db: connect_database('mongodb+%s://%s:%s/%s' % ( db, os.environ['MONGODB_PORT_27017_TCP_ADDR'], os.environ[ 'MONGODB_PORT_27017_TCP_PORT'], db))) elif ctx.invoked_subcommand == 'bench': if kwargs['data_path'] == './data': kwargs['data_path'] += '/bench' shutil.rmtree(kwargs['data_path'], ignore_errors=True) os.mkdir(kwargs['data_path']) if db in ('taskdb', 'resultdb'): kwargs[db] = utils.Get( lambda db=db: connect_database('sqlite+%s://' % (db))) else: kwargs[db] = utils.Get( lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % ( db, kwargs['data_path'], db[:-2]))) else: if not os.path.exists(kwargs['data_path']): os.mkdir(kwargs['data_path']) kwargs[db] = utils.Get(lambda db=db: connect_database( 'sqlite+%s:///%s/%s.db' % (db, kwargs['data_path'], db[:-2]))) kwargs['is_%s_default' % db] = True # create folder for counter.dump if not os.path.exists(kwargs['data_path']): os.mkdir(kwargs['data_path']) # queue if kwargs.get('amqp_url'): from pyspider.libs.rabbitmq import Queue for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result'): kwargs[name] = utils.Get( lambda name=name: Queue(name, amqp_url=kwargs['amqp_url'], maxsize=kwargs['queue_maxsize'])) elif os.environ.get('RABBITMQ_NAME'): from pyspider.libs.rabbitmq import Queue amqp_url = ("amqp://*****:*****@%(RABBITMQ_PORT_5672_TCP_ADDR)s" ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ) for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result'): kwargs[name] = utils.Get(lambda name=name: Queue( name, amqp_url=amqp_url, maxsize=kwargs['queue_maxsize'])) elif kwargs.get('beanstalk'): from pyspider.libs.beanstalk import Queue for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result'): kwargs[name] = utils.Get( lambda name=name: Queue(name, host=kwargs.get('beanstalk'), maxsize=kwargs['queue_maxsize'])) else: from multiprocessing import Queue for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result'): kwargs[name] = Queue(kwargs['queue_maxsize']) # phantomjs-proxy if kwargs.get('phantomjs_proxy'): pass elif os.environ.get('PHANTOMJS_NAME'): kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT_25555_TCP'][ len('tcp://'):] ctx.obj = utils.ObjectDict(ctx.obj or {}) ctx.obj['instances'] = [] ctx.obj.update(kwargs) if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'): ctx.invoke(all) return ctx