Exemplo n.º 1
0
def load_spider_class(config, spider_name):
    if not SPIDER_REGISTRY:
        build_spider_registry(config)
    if spider_name not in SPIDER_REGISTRY:
        raise SpiderInternalError('Unknown spider: %s' % spider_name)
    else:
        return SPIDER_REGISTRY[spider_name]
Exemplo n.º 2
0
def build_spider_registry(config):
    # TODO: make smart spider class searching
    #for mask in config.SPIDER_LOCATIONS:
    #for path in glob.glob(mask):
    #if path.endswith('.py'):
    #mod_path = path[:-3].replace('/', '.')
    #try:
    #mod = __import__
    for path in config.get('GRAB_SPIDER_MODULES'):
        if ':' in path:
            path, cls_name = path.split(':')
        else:
            cls_name = None
        try:
            mod = __import__(path, None, None, ['foo'])
        except ImportError as ex:
            if not path in unicode(ex):
                logging.error('', exc_info=ex)
        else:
            for key in dir(mod):
                if key == 'Spider':
                    continue
                if cls_name is None or key == cls_name:
                    val = getattr(mod, key)
                    if isinstance(val, type) and issubclass(val, Spider):
                        spider_name = val.get_spider_name()
                        logger.debug(
                            'Module `%s`, found spider `%s` with name `%s`' %
                            (path, val.__name__, spider_name))
                        if spider_name in SPIDER_REGISTRY:
                            raise SpiderInternalError('There are two different spiders with the '\
                                                    'same name "%s"' % spider_name)
                        else:
                            SPIDER_REGISTRY[spider_name] = val
Exemplo n.º 3
0
def build_spider_registry(config):
    # TODO: make smart spider class searching
    #for mask in config.SPIDER_LOCATIONS:
    #for path in glob.glob(mask):
    #if path.endswith('.py'):
    #mod_path = path[:-3].replace('/', '.')
    #try:
    #mod = __import__

    SPIDER_REGISTRY.clear()
    module_mapping = {}

    opt_modules = []
    opt_modules = config['global'].get('spider_modules',
                                       deprecated_key='GRAB_SPIDER_MODULES',
                                       default=[])
    #try:
    #opt_modules = config['global']['spider_modules']
    #except KeyError:
    #opt_modules = config.get('GRAB_SPIDER_MODULES', [])

    for path in opt_modules:
        if ':' in path:
            path, cls_name = path.split(':')
        else:
            cls_name = None
        try:
            mod = __import__(path, None, None, ['foo'])
        except ImportError as ex:
            if not path in unicode(ex):
                logging.error('', exc_info=ex)
        else:
            for key in dir(mod):
                if key == 'Spider':
                    continue
                if cls_name is None or key == cls_name:
                    val = getattr(mod, key)
                    if isinstance(val, type) and issubclass(val, Spider):
                        if val.Meta.abstract:
                            pass
                        else:
                            spider_name = val.get_spider_name()
                            logger.debug(
                                'Module `%s`, found spider `%s` with name `%s`'
                                % (path, val.__name__, spider_name))
                            if spider_name in SPIDER_REGISTRY:
                                raise SpiderInternalError(
                                    'There are two different spiders with the '\
                                    'same name "%s". Modules: %s and %s' % (
                                        spider_name,
                                        SPIDER_REGISTRY[spider_name].__module__,
                                        val.__module__))
                            else:
                                SPIDER_REGISTRY[spider_name] = val
    return SPIDER_REGISTRY
Exemplo n.º 4
0
def build_spider_registry(config):
    SPIDER_REGISTRY.clear()

    opt_modules = []
    opt_modules = config['global'].get('spider_modules', [])

    for path in opt_modules:
        if ':' in path:
            path, cls_name = path.split(':')
        else:
            cls_name = None
        try:
            mod = __import__(path, None, None, ['foo'])
        except ImportError as ex:
            if path not in six.text_type(ex):
                logger.error('', exc_info=ex)
        else:
            for key in dir(mod):
                if key == 'Spider':
                    continue
                if cls_name is None or key == cls_name:
                    val = getattr(mod, key)
                    if isinstance(val, type) and issubclass(val, Spider):
                        if val.Meta.abstract:
                            pass
                        else:
                            spider_name = val.get_spider_name()
                            logger.debug(
                                'Module `%s`, found spider `%s` '
                                'with name `%s`', path, val.__name__,
                                spider_name)
                            if spider_name in SPIDER_REGISTRY:
                                mod = SPIDER_REGISTRY[spider_name].__module__
                                raise SpiderInternalError(
                                    'There are two different spiders with '
                                    'the same name "%s". '
                                    'Modules: %s and %s' %
                                    (spider_name, mod, val.__module__))
                            else:
                                SPIDER_REGISTRY[spider_name] = val
    return SPIDER_REGISTRY