Exemplo n.º 1
0
 def test_get_target_version(self, mock_lsh):
     get_target('mytarget', auth_required=False)
     get_target_conf('mytargetconf', auth_required=False)
     get_version()
     mock_lsh.return_value.get_target.assert_called_once_with(
         'mytarget', auth_required=False)
     mock_lsh.return_value.get_target_conf.assert_called_once_with(
         'mytargetconf', auth_required=False)
     mock_lsh.return_value.get_version.assert_called_once_with()
Exemplo n.º 2
0
 def test_get_target_version(self, mock_lsh):
     get_target('mytarget', auth_required=False)
     get_target_conf('mytargetconf', auth_required=False)
     get_version()
     mock_lsh.return_value.get_target.assert_called_once_with(
         'mytarget', auth_required=False)
     mock_lsh.return_value.get_target_conf.assert_called_once_with(
         'mytargetconf', auth_required=False)
     mock_lsh.return_value.get_version.assert_called_once_with()
Exemplo n.º 3
0
def get_job_specs(job):
    """
    Parse job identifier into valid job id and corresponding API key.

    With projects default=10 and external=20 defined in config:
    * 1/1 -> 10/1/1
    * 2/2/2 -> 2/2/2
    * external/2/2 -> 20/2/2

    It also accepts job URLs from Scrapinghub.
    """
    match = re.match(r'^((\w+)/)?(\d+/\d+)$', job)
    if not match:
        job_url_re = r'^https?://[^/]+/p/((\d+)/)job/(\d+/\d+).*'
        match = re.match(job_url_re, job)
    if not match:
        raise BadParameterException(
            "Job ID {} is invalid. Format should be spiderid/jobid (inside a "
            "project) or target/spiderid/jobid, where target can be either a "
            "project ID or an identifier defined in scrapinghub.yml."
            "".format(job),
            param_hint='job_id',
        )
    # XXX: Lazy import due to circular dependency
    from shub.config import get_target_conf
    targetconf = get_target_conf(match.group(2) or 'default')
    return ("{}/{}".format(targetconf.project_id,
                           match.group(3)), targetconf.apikey)
Exemplo n.º 4
0
def get_job_specs(job):
    """
    Parse job identifier into valid job id and corresponding API key.

    With projects default=10 and external=20 defined in config:
    * 1/1 -> 10/1/1
    * 2/2/2 -> 2/2/2
    * external/2/2 -> 20/2/2

    It also accepts job URLs from Scrapy Cloud Dashboard.
    """
    match = re.match(r"^((\w+)/)?(\d+/\d+)$", job)
    if not match:
        dash_job_url_re = r"^https?://[^/]+/p/((\d+)/)job/(\d+/\d+).*"
        match = re.match(dash_job_url_re, job)
    if not match:
        raise BadParameterException(
            "Job ID {} is invalid. Format should be spiderid/jobid (inside a "
            "project) or target/spiderid/jobid, where target can be either a "
            "project ID or an identifier defined in scrapinghub.yml."
            "".format(job),
            param_hint="job_id",
        )
    # XXX: Lazy import due to circular dependency
    from shub.config import get_target_conf

    targetconf = get_target_conf(match.group(2) or "default")
    return ("{}/{}".format(targetconf.project_id, match.group(3)), targetconf.apikey)
Exemplo n.º 5
0
def cli(target_or_key, keys, force):
    # target_or_key contains a target or just another job key
    if "/" in target_or_key:
        keys = (target_or_key,) + keys
        target = "default"
    else:
        target = target_or_key

    targetconf = get_target_conf(target)
    project_id = targetconf.project_id
    client = get_scrapinghub_client_from_config(targetconf)
    project = client.get_project(project_id)

    try:
        job_keys = [validate_job_key(project_id, key) for key in keys]
    except (BadParameterException, SubcommandException) as err:
        click.echo('Error during keys validation: %s' % str(err))
        exit(1)

    if not force:
        jobs_str = ", ".join([str(job) for job in job_keys])
        click.confirm(
            'Do you want to cancel these %s jobs? \n\n%s \n\nconfirm?'
            % (len(job_keys), jobs_str),
            abort=True
        )

    try:
        output = project.jobs.cancel(
            keys=[str(job) for job in job_keys]
        )
    except (ValueError, ScrapinghubAPIError) as err:
        raise ShubException(str(err))

    click.echo(output)
Exemplo n.º 6
0
def main(target, requirements_file):
    targetconf = get_target_conf(target)
    requirements_full_path = os.path.abspath(requirements_file)
    eggs_tmp_dir = _mk_and_cd_eggs_tmpdir()
    _download_egg_files(eggs_tmp_dir, requirements_full_path)
    decompress_egg_files()
    build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint,
                          targetconf.apikey)
Exemplo n.º 7
0
def main(target, requirements_file):
    targetconf = get_target_conf(target)
    requirements_full_path = os.path.abspath(requirements_file)
    eggs_tmp_dir = _mk_and_cd_eggs_tmpdir()
    _download_egg_files(eggs_tmp_dir, requirements_full_path)
    decompress_egg_files()
    build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint,
                          targetconf.apikey)
def main(target):
    targetconf = get_target_conf(target)

    url = urljoin(targetconf.endpoint, 'migrate-eggs.zip')
    params = {'project': targetconf.project_id}
    auth = (targetconf.apikey, '')

    response = requests.get(url, auth=auth, params=params, stream=True)

    with zipfile.ZipFile(BytesIO(response.content), 'r') as mfile:
        Migrator(mfile).start()
Exemplo n.º 9
0
def cli(spider, argument, set):
    try:
        target, spider = spider.rsplit("/", 1)
    except ValueError:
        target = "default"
    targetconf = get_target_conf(target)
    job_key = schedule_spider(targetconf.project_id, targetconf.endpoint, targetconf.apikey, spider, argument, set)
    watch_url = urljoin(targetconf.endpoint, "../p/{}/job/{}/{}".format(*job_key.split("/")))
    short_key = job_key.split("/", 1)[1] if target == "default" else job_key
    click.echo("Spider {} scheduled, job ID: {}".format(spider, job_key))
    click.echo("Watch the log on the command line:\n    shub log -f {}" "".format(short_key))
    click.echo("or print items as they are being scraped:\n    shub items -f " "{}".format(short_key))
    click.echo("or watch it running in Scrapinghub's web interface:\n    {}" "".format(watch_url))
Exemplo n.º 10
0
def main(target, from_url=None, git_branch=None, from_pypi=None):
    targetconf = get_target_conf(target)

    if from_pypi:
        _fetch_from_pypi(from_pypi)
        decompress_egg_files()
        utils.build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint, targetconf.apikey)
        return

    if from_url:
        _checkout(from_url, git_branch)

    if not os.path.isfile("setup.py"):
        error = "No setup.py -- are you running from a valid Python project?"
        raise NotFoundException(error)

    utils.build_and_deploy_egg(targetconf.project_id, targetconf.endpoint, targetconf.apikey)
Exemplo n.º 11
0
def main(target, from_url=None, git_branch=None, from_pypi=None):
    targetconf = get_target_conf(target)

    if from_pypi:
        _fetch_from_pypi(from_pypi)
        decompress_egg_files()
        utils.build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint,
                                    targetconf.apikey)
        return

    if from_url:
        _checkout(from_url, git_branch)

    if not os.path.isfile('setup.py'):
        error = "No setup.py -- are you running from a valid Python project?"
        raise NotFoundException(error)

    utils.build_and_deploy_egg(targetconf.project_id, targetconf.endpoint,
                               targetconf.apikey)
Exemplo n.º 12
0
def cli(spider, argument, set):
    try:
        target, spider = spider.rsplit('/', 1)
    except ValueError:
        target = 'default'
    targetconf = get_target_conf(target)
    job_key = schedule_spider(targetconf.project_id, targetconf.endpoint,
                              targetconf.apikey, spider, argument, set)
    watch_url = urljoin(
        targetconf.endpoint,
        '../p/{}/job/{}/{}'.format(*job_key.split('/')),
    )
    short_key = job_key.split('/', 1)[1] if target == 'default' else job_key
    click.echo("Spider {} scheduled, job ID: {}".format(spider, job_key))
    click.echo("Watch the log on the command line:\n    shub log -f {}"
               "".format(short_key))
    click.echo("or print items as they are being scraped:\n    shub items -f "
               "{}".format(short_key))
    click.echo("or watch it running in Scrapinghub's web interface:\n    {}"
               "".format(watch_url))
Exemplo n.º 13
0
def cli(target):
    targetconf = get_target_conf(target)
    destfile = 'eggs-%s.zip' % targetconf.project_id
    fetch_eggs(targetconf.project_id, targetconf.endpoint, targetconf.apikey,
               destfile)
Exemplo n.º 14
0
def cli(source_project, new_project, copy_main):
    source = get_target_conf(source_project)
    target = get_target_conf(new_project)
    copy_eggs(source.project_id, source.endpoint, source.apikey,
              target.project_id, target.endpoint, target.apikey,
              copy_main)
Exemplo n.º 15
0
def cli(target):
    targetconf = get_target_conf(target)
    destfile = 'eggs-%s.zip' % targetconf.project_id
    fetch_eggs(targetconf.project_id, targetconf.endpoint, targetconf.apikey,
               destfile)
Exemplo n.º 16
0
def cli(source_project, new_project, copy_main):
    source = get_target_conf(source_project)
    target = get_target_conf(new_project)
    copy_eggs(source.project_id, source.endpoint, source.apikey,
              target.project_id, target.endpoint, target.apikey, copy_main)