def main():
    try:
        from sh_scrapy.env import get_args_and_env, decode_uri
        job = decode_uri(envvar='JOB_DATA')
        args, env = get_args_and_env(job)
        os.environ.update(env)

        from sh_scrapy.log import initialize_logging
        from sh_scrapy.settings import populate_settings
        loghdlr = initialize_logging()
    except:
        _fatalerror()
        raise

    # user code will be imported beyond this point --------------
    try:
        settings = populate_settings(job['spider'])
        loghdlr.setLevel(settings['LOG_LEVEL'])
    except Exception:
        logging.exception('Settings initialization failed')
        raise

    try:
        _run(args, settings)
    except Exception:
        logging.exception('Script initialization failed')
        raise
Exemplo n.º 2
0
def main():
    try:
        from sh_scrapy.env import get_args_and_env
        job = json.load(open(os.getenv('JOB_DATA')))
        args, env = get_args_and_env(job)
        os.environ.update(env)

        from sh_scrapy.log import initialize_logging
        from sh_scrapy.settings import populate_settings
        loghdlr = initialize_logging()
    except:
        _fatalerror()
        raise

    # user code will be imported beyond this point --------------
    try:
        settings = populate_settings(job['spider'])
        loghdlr.setLevel(settings['LOG_LEVEL'])
    except Exception:
        logging.exception('Settings initialization failed')
        raise

    try:
        _run(args, settings)
    except Exception:
        logging.exception('Script initialization failed')
        raise
def test_get_args_and_env_run_script():
    msg = {'key': '1/2/3', 'job_cmd': ['custom.py', 'arg1'],
           'auth': 'authstring'}
    result = get_args_and_env(msg)
    expected_auth = codecs.encode(to_bytes('1/2/3:authstring'), 'hex')
    assert len(result) == 2
    assert result[0] == ['custom.py', 'arg1']
    assert result[1] == {
        'SHUB_JOBAUTH': to_native_str(expected_auth),
        'SHUB_JOBKEY': '1/2/3',
        'SHUB_JOBNAME': 'custom.py',
        'SHUB_JOB_TAGS': ''}
    add_fields = {'tags': ['tagA', 'tagB'], 'api_url': 'some-api-url'}
    msg.update(add_fields)
    result1 = get_args_and_env(msg)
    assert len(result1) == 2
    assert result1[1]['SHUB_APIURL'] == 'some-api-url'
    assert result1[1]['SHUB_JOB_TAGS'] == 'tagA,tagB'
Exemplo n.º 4
0
def test_get_args_and_env_run_script():
    msg = {
        'key': '1/2/3',
        'job_cmd': ['custom.py', 'arg1'],
        'auth': 'authstring'
    }
    result = get_args_and_env(msg)
    expected_auth = codecs.encode(to_bytes('1/2/3:authstring'), 'hex_codec')
    assert len(result) == 2
    assert result[0] == ['custom.py', 'arg1']
    assert result[1] == {
        'SHUB_JOBAUTH': to_native_str(expected_auth),
        'SHUB_JOBKEY': '1/2/3',
        'SHUB_JOBNAME': 'custom.py',
        'SHUB_JOB_TAGS': ''
    }
    add_fields = {'tags': ['tagA', 'tagB'], 'api_url': 'some-api-url'}
    msg.update(add_fields)
    result1 = get_args_and_env(msg)
    assert len(result1) == 2
    assert result1[1]['SHUB_APIURL'] == 'some-api-url'
    assert result1[1]['SHUB_JOB_TAGS'] == 'tagA,tagB'
Exemplo n.º 5
0
def test_get_args_and_env_run_spider():
    msg = {
        'key': '1/2/3',
        'spider': 'test',
        'spider_type': 'auto',
        'auth': 'auths',
        'spider_args': {
            'arg1': 'val1',
            'arg2': 'val2'
        },
        'settings': {
            'SETTING1': 'VAL1',
            'SETTING2': 'VAL2'
        }
    }
    result = get_args_and_env(msg)
    expected_auth = codecs.encode(to_bytes('1/2/3:auths'), 'hex_codec')
    assert len(result) == 2
    assert result[0] == [
        'scrapy', 'crawl', 'test', '-a', 'arg1=val1', '-a', 'arg2=val2', '-s',
        'SETTING1=VAL1', '-s', 'SETTING2=VAL2'
    ]
    assert result[1] == {
        'SCRAPY_JOB': '1/2/3',
        'SCRAPY_PROJECT_ID': '1',
        'SCRAPY_SPIDER': 'test',
        'SHUB_JOBAUTH': to_native_str(expected_auth),
        'SHUB_JOBKEY': '1/2/3',
        'SHUB_JOBNAME': 'test',
        'SHUB_JOB_TAGS': '',
        'SHUB_SPIDER_TYPE': 'auto'
    }
    add_fields = {'tags': ['tagA', 'tagB'], 'api_url': 'some-api-url'}
    msg.update(add_fields)
    result1 = get_args_and_env(msg)
    assert len(result1) == 2
    assert result1[1]['SHUB_APIURL'] == 'some-api-url'
    assert result1[1]['SHUB_JOB_TAGS'] == 'tagA,tagB'
def test_get_args_and_env_run_spider():
    msg = {'key': '1/2/3', 'spider': 'test', 'spider_type': 'auto',
           'auth': 'auths', 'spider_args': {'arg1': 'val1', 'arg2': 'val2'},
           'settings': {'SETTING1': 'VAL1', 'SETTING2': 'VAL2'}}
    result = get_args_and_env(msg)
    expected_auth = codecs.encode(to_bytes('1/2/3:auths'), 'hex')
    assert len(result) == 2
    assert result[0] == ['scrapy', 'crawl', 'test', '-a', 'arg1=val1',
                         '-a', 'arg2=val2', '-s', 'SETTING1=VAL1', '-s',
                         'SETTING2=VAL2']
    assert result[1] == {'SCRAPY_JOB': '1/2/3',
                         'SCRAPY_PROJECT_ID': '1',
                         'SCRAPY_SPIDER': 'test',
                         'SHUB_JOBAUTH': to_native_str(expected_auth),
                         'SHUB_JOBKEY': '1/2/3',
                         'SHUB_JOBNAME': 'test',
                         'SHUB_JOB_TAGS': '',
                         'SHUB_SPIDER_TYPE': 'auto'}
    add_fields = {'tags': ['tagA', 'tagB'], 'api_url': 'some-api-url'}
    msg.update(add_fields)
    result1 = get_args_and_env(msg)
    assert len(result1) == 2
    assert result1[1]['SHUB_APIURL'] == 'some-api-url'
    assert result1[1]['SHUB_JOB_TAGS'] == 'tagA,tagB'
def _launch():
    try:
        from scrapy.exceptions import ScrapyDeprecationWarning
        warnings.filterwarnings('ignore', category=ScrapyDeprecationWarning, module='^sh_scrapy')

        from sh_scrapy.env import get_args_and_env, decode_uri
        job = decode_uri(envvar='JOB_DATA')
        assert job, 'JOB_DATA must be set'
        args, env = get_args_and_env(job)
        os.environ.update(env)

        print args, env

        from sh_scrapy.log import initialize_logging
        from sh_scrapy.settings import populate_settings  # NOQA
        loghdlr = initialize_logging()
    except:
        _fatalerror()
        raise

    _run_usercode(job['spider'], args, _get_apisettings, loghdlr)
def _launch():
    try:
        from scrapy.exceptions import ScrapyDeprecationWarning
        warnings.filterwarnings('ignore',
                                category=ScrapyDeprecationWarning,
                                module='^sh_scrapy')
        from sh_scrapy.env import get_args_and_env, decode_uri
        job = decode_uri(envvar='SHUB_JOB_DATA')
        assert job, 'SHUB_JOB_DATA must be set'
        args, env = get_args_and_env(job)
        os.environ.update(env)

        from sh_scrapy.log import initialize_logging
        from sh_scrapy.settings import populate_settings  # NOQA
        from sh_scrapy.env import setup_environment
        loghdlr = initialize_logging()
        setup_environment()
    except:
        _fatalerror()
        raise

    _run_usercode(job['spider'], args, _get_apisettings, loghdlr)