Exemplo n.º 1
0
def load_status_url(status_id):
    """ Load status url from file by status_id"""
    if not os.path.isfile(STATUS_FILE_LOCATION):
        raise NotFoundException(
            'Status file is not found at {}'.format(STATUS_FILE_LOCATION))
    data = _load_status_file(STATUS_FILE_LOCATION)
    # return latest status url if status id is not provided
    if not isinstance(status_id, int) and data:
        max_status_id = max(data.keys())
        click.echo(
            'Getting results for latest status id {}.'.format(max_status_id))
        return data[max_status_id]
    if status_id not in data:
        raise NotFoundException(
            "Status url with id {} is not found".format(status_id))
    return data[status_id]
Exemplo n.º 2
0
 def get_image(self, target):
     """Return image for a given target."""
     if target not in self.images:
         raise NotFoundException("Could not find image for %s. Please "
                                 "define it in your scrapinghub.yml."
                                 "" % target)
     return self.images[target]
Exemplo n.º 3
0
def get_job(job):
    jobid, apikey = get_job_specs(job)
    hsc = HubstorageClient(auth=apikey)
    job = hsc.get_job(jobid)
    if not job.metadata:
        raise NotFoundException('Job {} does not exist'.format(jobid))
    return job
Exemplo n.º 4
0
 def get_target_conf(self, target, auth_required=True):
     proj = self.get_project(target)
     if proj['endpoint'] not in self.endpoints:
         raise NotFoundException("Could not find endpoint %s. Please "
                                 "define it in your scrapinghub.yml."
                                 "" % proj['endpoint'])
     try:
         apikey = str(self.apikeys[proj['apikey']])
     except KeyError:
         if auth_required:
             msg = None
             if proj['endpoint'] != 'default':
                 msg = ("Could not find API key for endpoint %s."
                        "" % proj['endpoint'])
             raise MissingAuthException(msg)
         apikey = None
     return Target(
         project_id=proj['id'],
         endpoint=self.endpoints[proj['endpoint']],
         apikey=apikey,
         stack=(self.stacks.get(proj['stack'], proj['stack'])
                if 'stack' in proj else self.stacks.get('default')),
         requirements_file=self.requirements_file,
         version=self.get_version(),
         eggs=self.eggs,
     )
Exemplo n.º 5
0
 def get_image(self, target):
     """Return image for a given target."""
     target_conf = self.get_target_conf(target, auth_required=False)
     project, image = target_conf.project_id, target_conf.image
     if image is None:
         raise NotFoundException(
             "Could not find image for project '{}'. Please define it "
             "in your scrapinghub.yml.".format(target))
     elif image is False:
         raise BadConfigException(
             "Using custom images is disabled for the project '{}'. "
             "Please enable it in your scrapinghub.yml.".format(target))
     elif target_conf.stack:
         raise BadConfigException(
             "Ambiguous configuration: There is both a custom image and a "
             "stack configured for project '{}'. Please see {} for "
             "information on how to configure both custom image-based and "
             "stack-based projects.".format(target, CONFIG_DOCS_LINK))
     default_image = SH_IMAGES_REPOSITORY.format(project=project)
     if image.startswith(SH_IMAGES_REGISTRY) and image != default_image:
         raise BadConfigException(
             "Found wrong SH repository for project '{}': expected {}.\n  "
             "Please use aliases `True` or `scrapinghub` to fix it in your "
             "config.".format(target, default_image))
     return image
Exemplo n.º 6
0
def decompress_egg_files(directory=None):
    try:
        EXTS = pip.utils.ARCHIVE_EXTENSIONS
    except AttributeError:
        try:
            EXTS = pip._internal.utils.misc.ARCHIVE_EXTENSIONS
        except AttributeError:
            EXTS = ('.zip', '.whl', '.tar', '.tar.gz', '.tar.bz2')
    try:
        unpack_file = pip.utils.unpack_file
    except AttributeError:
        # XXX a work-around for pip >= 10.0
        try:
            unpack_file = pip.util.unpack_file
        except AttributeError:
            unpack_file = pip._internal.utils.misc.unpack_file
    pathname = "*"
    if directory is not None:
        pathname = os.path.join(directory, pathname)
    eggs = [f for ext in EXTS for f in glob(pathname + "%s" % ext)]
    if not eggs:
        files = glob(pathname)
        err = ('No egg files with a supported file extension were found. '
               'Files: %s' % ', '.join(files))
        raise NotFoundException(err)
    for egg in eggs:
        click.echo("Uncompressing: %s" % egg)
        egg_ext = EXTS[list(egg.endswith(ext) for ext in EXTS).index(True)]
        decompress_location = egg[:-len(egg_ext)]
        unpack_file(egg, decompress_location, None, None)
Exemplo n.º 7
0
def _build_egg():
    if not inside_project():
        raise NotFoundException("No Scrapy project found in this location.")
    create_default_setup_py()
    d = tempfile.mkdtemp(prefix="shub-deploy-")
    run_python(['setup.py', 'clean', '-a', 'bdist_egg', '-d', d])
    egg = glob.glob(os.path.join(d, '*.egg'))[0]
    return egg, d
Exemplo n.º 8
0
def get_project_dir():
    """Get the path to the closest directory that contains either
    ``scrapinghub.yml``. ``scrapy.cfg``, or ``Dockerfile`` (in this priority).
    """
    for filename in ['scrapinghub.yml', 'scrapy.cfg', 'Dockerfile']:
        closest = closest_file(filename)
        if closest:
            return os.path.dirname(closest)
    raise NotFoundException(
        "Cannot find project: There is no scrapinghub.yml, scrapy.cfg, or "
        "Dockerfile in this directory or any of the parent directories.")
Exemplo n.º 9
0
def patch_sys_executable():
    """
    Context manager that monkey-patches sys.executable to point to the Python
    interpreter.

    Some scripts, in particular pip, depend on sys.executable pointing to the
    Python interpreter. When frozen, however, sys.executable points to the
    stand-alone file (i.e. the frozen script).
    """
    if getattr(sys, 'frozen', False):
        orig_exe = sys.executable
        for pythonname in ('python2.7', 'python2', 'python'):
            try:
                py_exe = find_exe(pythonname)
                break
            except NotFoundException:
                pass
        else:
            raise NotFoundException('Please install Python 2.7')

        output = subprocess.check_output([py_exe, '--version'],
                                         stderr=subprocess.STDOUT)
        if 'Python 2.7.' not in output:
            raise NotFoundException('Please install Python 2.7')

        # PyInstaller sets this environment variable in its bootloader. Remove
        # it so the system-wide Python installation uses its own library path
        # (this is particularly important if the system Python version differs
        # from the Python version that the binary was compiled with)
        orig_lib_path = os.environ.pop(b'LD_LIBRARY_PATH', None)
        sys.executable = py_exe
        yield
        sys.executable = orig_exe
        if orig_lib_path:
            os.environ[b'LD_LIBRARY_PATH'] = orig_lib_path
    else:
        yield
Exemplo n.º 10
0
def cli(project, target_dir):
    target_dir = os.path.normpath(
        os.path.join(os.getcwd(), target_dir or project))
    if os.path.exists(target_dir):
        raise BadParameterException(
            "Target directory %s already exists, please delete it or supply a "
            "non-existing target." % target_dir)
    projects = get_available_projects()
    if project not in projects:
        raise NotFoundException(
            "There is no example project named '%s'. Run 'shub bootstrap -l' "
            "to get a list of all available projects." % project)
    click.echo("Downloading custom image examples")
    repo_zip = get_repo_zip(EXAMPLE_REPO)
    click.echo("Cloning project '%s' into %s" % (project, target_dir))
    unzip_project(repo_zip, project=projects[project], target_dir=target_dir)
Exemplo n.º 11
0
def main(target, from_url=None, git_branch=None, from_pypi=None):
    targetconf = get_target_conf(target)

    if from_pypi:
        _fetch_from_pypi(from_pypi)
        decompress_egg_files()
        utils.build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint,
                                    targetconf.apikey)
        return

    if from_url:
        _checkout(from_url, git_branch)

    if not os.path.isfile('setup.py'):
        error = "No setup.py -- are you running from a valid Python project?"
        raise NotFoundException(error)

    utils.build_and_deploy_egg(targetconf.project_id, targetconf.endpoint,
                               targetconf.apikey)
Exemplo n.º 12
0
def _deploy_wizard(conf, target='default'):
    """
    Ask user for project ID, ensure they have access to that project, and save
    it to given ``target`` in local ``scrapinghub.yml`` if desired.
    """
    closest_scrapycfg = closest_file('scrapy.cfg')
    # Double-checking to make deploy_wizard() independent of cli()
    if not closest_scrapycfg:
        raise NotFoundException("No Scrapy project found in this location.")
    closest_sh_yml = os.path.join(os.path.dirname(closest_scrapycfg),
                                  'scrapinghub.yml')
    # Get default endpoint and API key (meanwhile making sure the user is
    # logged in)
    endpoint, apikey = conf.get_endpoint(0), conf.get_apikey(0)
    project = click.prompt("Target project ID", type=int)
    if not _has_project_access(project, endpoint, apikey):
        raise InvalidAuthException(
            "The account you logged in to has no access to project {}. Please "
            "double-check the project ID and make sure you logged in to the "
            "correct acount.".format(project),
        )
    conf.projects[target] = project
    if click.confirm("Save as default", default=True):
        try:
            with update_yaml_dict(closest_sh_yml) as conf_yml:
                default_entry = {'default': project}
                if 'projects' in conf_yml:
                    conf_yml['projects'].update(default_entry)
                else:
                    conf_yml['projects'] = default_entry
        except Exception:
            click.echo(
                "There was an error while trying to write to scrapinghub.yml. "
                "Could not save project {} as default.".format(project),
            )
        else:
            click.echo(
                "Project {} was set as default in scrapinghub.yml. You can "
                "deploy to it via 'shub deploy' from now on.".format(project),
            )
Exemplo n.º 13
0
def cli(target, version, debug, egg, build_egg, verbose, keep_log):
    if not inside_project():
        raise NotFoundException("No Scrapy project found in this location.")
    tmpdir = None
    try:
        if build_egg:
            egg, tmpdir = _build_egg()
            click.echo("Writing egg to %s" % build_egg)
            shutil.copyfile(egg, build_egg)
        else:
            conf = load_shub_config()
            if target == 'default' and target not in conf.projects:
                _deploy_wizard(conf)
            targetconf = conf.get_target_conf(target)
            version = version or targetconf.version
            auth = (targetconf.apikey, '')

            if egg:
                click.echo("Using egg: %s" % egg)
                egg = egg
            else:
                click.echo("Packing version %s" % version)
                egg, tmpdir = _build_egg()

            _upload_egg(targetconf.endpoint, egg, targetconf.project_id,
                        version, auth, verbose, keep_log, targetconf.stack,
                        targetconf.requirements_file, targetconf.eggs)
            click.echo("Run your spiders at: "
                       "https://app.scrapinghub.com/p/%s/"
                       "" % targetconf.project_id)
    finally:
        if tmpdir:
            if debug:
                click.echo("Output dir not removed: %s" % tmpdir)
            else:
                shutil.rmtree(tmpdir, ignore_errors=True)
Exemplo n.º 14
0
def find_exe(exe_name):
    exe = find_executable(exe_name)
    if not exe:
        raise NotFoundException("Please install {}".format(exe_name))
    return exe