Exemplo n.º 1
0
def create(name, ec2_config, state_path, num_hosts = 1, description = None,
           api_key = None, admin_key = None, ssl_credentials = None, cors_origin = '',
           port = _PORT_DEFAULT_NUM):
    '''
    Launch a Predictive Services cluster. This cluster can currently be launched
    on EC2 by specifying an EC2 environment.

    Parameters
    ----------
    name : str
        The name of the Predictive Service that will be launched.

        This string can only contain: a-z, A-Z and hyphens.

    ec2_config : :class:`~graphlab.deploy.Ec2Config`
        Must be an EC2 Configuration object used for starting up EC2 host(s).
        This configuration should contain the instance type, region, aws
        credientials, and other information used to launch EC2 host(s).

    state_path :  str
        S3 path used to manage state for the Predictive Service. This path can
        also be used to create the Predictive Service object on another
        computer.

    num_hosts : int, optional
        The number of EC2 host(s) to use for this Predictive Service. The default
        number of EC2 host(s) is 1.

    description : str, optional
        Description of this Predictive Service.

    api_key : str, optional
        An API key that client must included with requests. If an api_key is
        not specified, one will be auto generated. The API Key can be retrieved
        from the return PredictiveService object.

    admin_key : str, optional
        An API key used for control operations (i.e. anything other than client
        requests). If an admin_key is not specified, one will be auto generated.
        The API Key can be retrieved from the return PredictiveService object.


    ssl_credentials : tuple of len three, with types: str, str, bool.
        The first string is the path to the private key file. The second string
        is the path to public key certificate. The third denotes whether the
        certificates are self signed (and any client should not verify the
        certificate).

        These files must be in the precise format AWS expects. Such a private
        key and a self-signed certificate can be generated using openssl with
        following commands:

        >>> openssl genrsa 1024 > privatekey.pem
        >>> openssl req -new -key privatekey.pem -out csr.pem
        >>> openssl x509 -req -days 365 -in csr.pem -signkey privatekey.pem -out server.crt

        If a tuple is not given, requests will be served over HTTP rather than
        HTTPS (i.e.  encryption will not be used).

    cors_origin : str, optional
        The string value to use as HTTP header Access-Control-Allow-Origin,
        in order to support Cross-Origin Resource Sharing as described in
        https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS.
        The default value is ''. If '' (empty String) is specified, CORS
        support will be disabled. If the string value is '*', CORS support
        will be enabled for all URIs. If the string value is
        'https://dato.com', CORS support will be enabled for
        'https://dato.com' only.

    port : int, optional
        The port the server listens to . The default port is 9005.
        Please do not specify 9006 or 19006. These are reserved for cache.

    Returns
    -------
    out : :py:class:`~graphlab.deploy.PredictiveService`
        A Predictive Service object which can be used to manage the deployment.

    See Also
    --------
    graphlab.deploy.predictive_service.load, graphlab.deploy.PredictiveService

    Examples
    --------
    Let us start out by creating a single node Predictive Serivce on EC2.

    .. sourcecode:: python

        # create an Ec2Config for Predictive Service, with region, instance_type,
        # and aws credientials.
        ec2 = graphlab.deploy.Ec2Config(aws_access_key_id = 'ACCESS_KEY_ID',
                                        aws_secret_access_key = 'SECRET')

        # create a Predictive Service using a name, ec2 config, and
        # a directory to store the Predictive Service metadata and logs
        ps = graphlab.deploy.predictive_service.create('name',
                                                       ec2,
                                                       's3://mybucket/pred_services/name')

        # get status of this Predictive Service
        ps.get_status()

    Now, lets create a multi-node Predictive Service by specifying the number
    of nodes needed in the EC2 environment variable

    .. sourcecode:: python

        # create an Ec2Config for Predictive Service, with region, instance_type,
        # and aws credientials.
        ec2 = graphlab.deploy.Ec2Config(aws_access_key_id = 'ACCESS_KEY_ID',
                                        aws_secret_access_key = 'SECRET')

        # create the Predictive Service with 3 nodes
        ps = graphlab.deploy.predictive_service.create('name_cluster',
                                               ec2_cluster,
                                               's3://mybucket/pred_services/name_cluster',
                                               num_hosts = 3)

        # get status of this Predictive Service
        ps.get_status()

        # shut down this Predictive Service
        ps.terminate_service()

    '''
    if num_hosts > _NODE_LAUNCH_LIMIT:
        raise ValueError("You cannot launch more than %d nodes at one time. " \
                         "If this limit is problematic, please contact " \
                         "*****@*****.**" % _NODE_LAUNCH_LIMIT)

    # Validate inputs for current session
    if _gl.deploy._default_session.exists(name, _PredictiveService._typename):
        # found another predictive service or predictive service endpoint with the same name
        raise ValueError("Validation Error: Predictive Service already exists with the name '%s', please rename or delete the existing Predictive Service." % name)

    # Validate Ec2 Config
    if not isinstance(ec2_config, _gl.deploy.Ec2Config):
        raise TypeError('Unsupported type given for ec2_config parameter. Must be an Ec2Config object.')

    # Save AWS config
    if(hasattr(ec2_config, 'aws_access_key_id') and hasattr(ec2_config, 'aws_secret_access_key')):
        aws_access_key = ec2_config.aws_access_key_id
        aws_secret_key = ec2_config.aws_secret_access_key
    else:
        try:
            aws_access_key, aws_secret_key = _get_credentials()
        except:
            raise IOError('No AWS credentials set. Credentials must either be set in the ' \
                              'ec2_config parameter or set globally using ' \
                              'graphlab.aws.set_credentials(...).')
    aws_credentials = {
        'aws_access_key_id': aws_access_key,
        'aws_secret_access_key': aws_secret_key
        }

    # Warn if specified bucket is in different region than specified in env.
    s3_bucket_name, _ = _file_util.parse_s3_path(state_path)

    __default_config_path = _os.path.join(_os.path.expanduser("~"), ".graphlab", "config")
    try:
        _file_util.upload_to_s3(__default_config_path, state_path + "/license",
                                aws_credentials = aws_credentials, silent = True)
        region = _file_util.get_s3_bucket_region(s3_bucket_name, aws_credentials)
    except _S3ResponseError as e:
        _logger.error("Unable to connect to state_path's bucket; check your AWS credentials")
        raise

    if region != ec2_config.region:
        _logger.warn("The bucket in your state path is in a different region " \
                     "(%s) than the one specified in your environment (%s). " \
                     "AWS data transfer rates apply. Additionally, upload and " \
                     "download speeds may be slower than expected. If this is " \
                     "not what you intended, abort this operation or " \
                     "terminate the service upon its completion, then be sure " \
                     "that your environment and S3 bucket are in the same " \
                     "region before relaunching." % (region, ec2_config.region))

    # Validate 'name' value
    if not _name_checker.match(name):
        raise ValueError('Predictive Service Name can only contain: a-z, A-Z and hyphens.')
    if len(name) > 32:
        raise ValueError("Predictive Service name can not be longer than 32 characters.")

    conn = _lb_connection(ec2_config.region, **aws_credentials)
    for lb in conn.get_all_load_balancers():
        if lb.name == name:
            raise IOError('There is already a load balancer with that name. Load balancer names' \
                              ' must be unique in their region. Please choose a different name.')

    tracker = _mt._get_metric_tracker()
    tracker.track('deploy.predictive_service.create', value=1,
            properties={'num_hosts':num_hosts, 'instance_type':ec2_config.instance_type})

    _logger.info("Launching Predictive Service with %d hosts, as specified by num_hosts parameter"
                 % (num_hosts))

    # Set defaults values, if needed
    if not api_key:
        api_key = str(_random_uuid())
    if not admin_key:
        admin_key = str(_random_uuid())

    result = None
    env = None
    try:
        starttime = _datetime.datetime.now()
        _logger.info("Launching Predictive Service, with name: %s" % name)

        _logger.info("[Step 0/5]: Initializing S3 locations.")
        # Create the predictive service object. It writes init state to S3.
        result = _PredictiveService(name, state_path, description, api_key, admin_key,
                                    aws_credentials, cors_origin = cors_origin, port = port)

        # Launch the environment.
        env = _Ec2PredictiveServiceEnvironment.launch(name, ec2_config, state_path, num_hosts, admin_key,
                                                      ssl_credentials, aws_credentials, started=starttime,
                                                      port = port)

        # Attach the launched environment and write all service state to S3.
        result._environment = env
        result._save_state()

        _logger.info("[Step 4/5]: Waiting for Load Balancer to put all instances into service.")
        while ((_datetime.datetime.now() - starttime).total_seconds() < _MAX_CREATE_TIMEOUT_SECS):
            # query status, verify all InService
            nodes = env.get_status(_show_errors = False)
            statuses = []
            for node in nodes:
                statuses.append(node['state'] == 'InService')
            if all(statuses):
                _logger.info("Cluster is fully operational, [%d/%d] instances currently in service." %
                        (statuses.count(True), len(statuses)))
                break
            else:
                _logger.info("Cluster not fully operational yet, [%d/%d] instances currently in service." %
                        (statuses.count(True), len(statuses)))
                _time.sleep(15)
        else:
            _logger.error("Instances failed to be ready within 10 minutes. Tearing down.")
            raise RuntimeError("Cluster configuration not successful in time, timing out.")

        _logger.info("[Step 5/5]: Finalizing Configuration.")

        result.cache_enable(None, True)

        _gl.deploy._default_session.register(result)
        result.save()

        return result
    except Exception as e:
        # any exceptions we should gracefully terminate / tear down what we've created
        _logger.warning("Tearing down Predictive Service due to error launching")

        # default behavior deletes the log files in tear down.
        # To preserve the logs set GRAPHLAB_DEBUG in environment, and the logs will be preserved
        delete_logs = True
        if 'GRAPHLAB_DEBUG' in _os.environ:
            _logger.info("Preserving Log Files for debugging purposes")
            delete_logs = False

        if env:
            env.terminate(delete_logs)

        if result and delete_logs:
            _logger.info('Deleting model data.')
            try:
                _file_util.s3_recursive_delete(state_path, aws_credentials)
            except:
                _logger.error("Could not delete model data. Please manually delete data under: %s" %
                              state_path)

        raise
def create(name, environment, state_path, description = None, api_key = None, admin_key = None,
           ssl_credentials = None):
    '''
    Launch a Predictive Services cluster. This cluster can currently be launched
    on EC2 by specifying an EC2 environment.

    Parameters
    ----------
    name : str
        The name of the Predictive Service that will be launched.

        This string can only contain: a-z, A-Z and hyphens.

    environment : :class:`~graphlab.deploy.environment.EC2` or str
        Must be either an EC2 Environment object or the name of EC2 environment.
        This is the environment where the Predictive Service will be deployed.
        Caching will always be used for query results but a distributed cache 
        will only be used if the number of hosts is greater than two.

    state_path :  str
        S3 path used to manage state for the Predictive Service. This path can
        also be used to create the Predictive Service object on another
        computer.

    description : str, optional
        Description of this Predictive Service.

    api_key : str, optional
        An API key that client must included with requests. If an api_key is
        not specified, one will be auto generated. The API Key can be retrieved
        from the return PredictiveService object.

    admin_key : str, optional
        An API key used for control operations (i.e. anything other than client
        requests). If an admin_key is not specified, one will be auto generated.
        The API Key can be retrieved from the return PredictiveService object.

    ssl_credentials : tuple of len three, with types: str, str, bool.
        The first string is the path to the private key file. The second string
        is the path to public key certificate. The third denotes whether the
        certificates are self signed (and any client should not verify the
        certificate).

        These files must be in the precise format AWS expects. Such a private
        key and a self-signed certificate can be generated using openssl with
        following commands:

        >>> openssl genrsa 1024 > privatekey.pem
        >>> openssl req -new -key privatekey.pem -out csr.pem
        >>> openssl x509 -req -days 365 -in csr.pem -signkey privatekey.pem -out server.crt

        If a tuple is not given, requests will be served over HTTP rather than
        HTTPS (i.e.  encryption will not be used).

    Returns
    -------
    out : :py:class:`~graphlab.deploy._predictive_service._predictive_service.PredictiveService`
        A Predictive Service object which can be used to manage the deployment.

    Examples
    --------
        >>> ec2 = graphlab.deploy.environments['ec2']
        >>> ps = graphlab.deploy.create('name', ec2, 's3://mybucket/pred_services/name')
        >>> print ps
        >>> ps.get_status()

    See Also
    --------
    load
    '''

    # local test hook
    if (environment is None):
        # load locally
        ret = _gl.deploy.predictive_service.load(state_path, override=True)
        ret._environment = _LocalPredictiveServiceEnvironment.launch(predictive_service_path = ret._s3_state_path, log_path = ret._environment.log_path, num_hosts = 3, admin_key = ret._environment.admin_key)
        return ret

    # Validate inputs for current session
    if not _gl.deploy._default_session.is_name_unique(name):
        # found another predictive service or predictive service endpoint with the same name
        raise RuntimeError("Validation Error: Predictive Service already exists with the name '%s', please rename or delete the existing Predictive Service." % name)

    # Validate 'environment' value
    if not isinstance(environment, _gl.deploy.environment.EC2) and not isinstance(environment, str):
        raise TypeError('Unsupported type given for environment parameter. Must be either an EC2' \
                          ' environment or the name of an EC2 environment.')
    if isinstance(environment, str):
        # Get the EC2 config object
        environment_name = environment
        environment = _gl.deploy.environments[environment_name]
        if not environment:
            raise TypeError("No environment named '%s'." % environment_name)
        if not isinstance(environment, _gl.deploy.environment.EC2):
            raise TypeError("%s is not an EC2 environment." % environment_name)

    # Save AWS config
    if(hasattr(environment, 'aws_access_key') and hasattr(environment, 'aws_secret_key')):
        aws_access_key = environment.aws_access_key
        aws_secret_key = environment.aws_secret_key
    else:
        try:
            aws_access_key, aws_secret_key = _get_credentials()
        except:
            raise IOError('No AWS credentials set. Credentials must either be set in the ' \
                              'environment parameter or set globally using ' \
                              'graphlab.aws.set_credentials(...).')
    aws_credentials = {
        'aws_access_key_id': aws_access_key,
        'aws_secret_access_key': aws_secret_key
        }

    # Validate 'name' value
    if not _name_checker.match(name):
        raise IOError('Names can only can only contain: a-z, A-Z and hyphens.')
    conn = _lb_connection(environment.region, **aws_credentials)
    for lb in conn.get_all_load_balancers():
        if lb.name == name:
            raise IOError('There is already a load balancer with that name. Load balancer names' \
                              ' must be unique in their region. Please choose a different name.')

    # clone the environment, so not mutating existing environment
    environment = environment.clone()

    tracker = _mt._get_metric_tracker()
    tracker.track('deploy.predictive_service.create', value=1,
            properties={'num_hosts':environment.num_hosts, 'instance_type':environment.instance_type})

    _logger.info("Launching Predictive Service with %d hosts, as specified by num_hosts parameter"
                 % (environment.num_hosts))

    # Add a tag for all EC2 instances to indicate they are related to this PredictiveService
    if not environment.tags:
        environment.tags = {}
    environment.tags.update({'Name':name, 'predictive_service':name})

    # Set defaults values, if needed
    if not api_key:
        api_key = str(_random_uuid())
    if not admin_key:
        admin_key = str(_random_uuid())

    result = None
    env = None
    try:
        starttime = _datetime.datetime.now()
        _logger.info("Launching Predictive Service, with name: %s" % name)

        _logger.info("[Step 0/5]: Initializing S3 locations.")
        # Create the predictive service object. It writes init state to S3.
        result = _PredictiveService(name, state_path, description, api_key, aws_credentials)

        # Launch the environment.
        env = _Ec2PredictiveServiceEnvironment.launch(name, environment, state_path, admin_key,
                                                     ssl_credentials, aws_credentials, started=starttime)

        # Attach the launched environment and write all service state to S3.
        result._environment = env
        result._save_state_to_s3()

        _logger.info("[Step 4/5]: Waiting for Load Balancer to put all instances into service.")
        while ((_datetime.datetime.now() - starttime).total_seconds() < _MAX_CREATE_TIMEOUT_SECS):
            # query status, verify all InService
            nodes = env.get_status(_show_errors = False)
            statuses = []
            for node in nodes:
                statuses.append(node['state'] == 'InService')
            if all(statuses):
                _logger.info("Cluster is fully operational, [%d/%d] instances currently in service." %
                        (statuses.count(True), len(statuses)))
                break
            else:
                _logger.info("Cluster not fully operational yet, [%d/%d] instances currently in service." %
                        (statuses.count(True), len(statuses)))
                _time.sleep(15)
        else:
            _logger.error("Instances failed to be ready within 10 minutes. Tearing down.")
            raise RuntimeError("Cluster configuration not successful in time, timing out.")

        _logger.info("[Step 5/5]: Finalizing Configuration.")

        _gl.deploy._default_session.register(result)
        result.save()

        return result
    except Exception as e:
        # any exceptions we should gracefully terminate / tear down what we've created
        _logger.warning("Tearing down Predictive Service due to error launching")

        # default behavior deletes the log files in tear down.
        # To preserve the logs set GRAPHLAB_DEBUG in environment, and the logs will be preserved
        delete_logs = True
        if 'GRAPHLAB_DEBUG' in _os.environ:
            _logger.info("Preserving Log Files for debugging purposes")
            delete_logs = False

        if env:
            env.terminate(delete_logs)

        if result and delete_logs:
            _logger.info('Deleting model data.')
            try:
                _s3_recursive_delete(state_path, aws_credentials)
            except:
                _logger.error("Could not delete model data. Please manually delete data under: %s" %
                              state_path)

        if isinstance(e,LicenseValidationException):
            # catch exception and print license check hint message here instead of raise
            _logger.info(e)        
        else:    
            # re-raise exception so object not returned etc
            raise
def create(name, ec2_config, state_path, license_file_path = None, num_hosts = 1,
           description = None, api_key = None, admin_key = None,
           ssl_credentials = None, cors_origin = '',
           port = _DEFAULT_ADMIN_PORT, admin_ui_port = _DEFAULT_ADMIN_UI_PORT,
           scheme='internet-facing',
           config_file=None):
    """Refer to psclient.create"""
    if not _name_checker.match(name):
        raise ValueError('Predictive Service Name can only contain: a-z, A-Z and hyphens.')
    if len(name) > 32:
        raise ValueError("Predictive Service name can not be longer than 32 characters.")

    if num_hosts > _NODE_LAUNCH_LIMIT:
        raise ValueError("You cannot launch more than %d nodes at one time. " \
                         "If this limit is problematic, please visit " \
                         "https://turi.com/support for support options." % _NODE_LAUNCH_LIMIT)

    # Validate Ec2 Config
    if not isinstance(ec2_config, Ec2Config):
        raise TypeError('Unsupported type given for ec2_config parameter. Must be an Ec2Config object.')

    # Save AWS config
    if(hasattr(ec2_config, 'aws_access_key_id') and hasattr(ec2_config, 'aws_secret_access_key')):
        aws_access_key = ec2_config.aws_access_key_id
        aws_secret_key = ec2_config.aws_secret_access_key
    else:
        raise IOError('No AWS credentials set. Credentials must either be set in the ' \
                          'ec2_config parameter or set globally using ' \
                          'psclient.aws.set_credentials(...).')
    aws_credentials = {
        'aws_access_key_id': aws_access_key,
        'aws_secret_access_key': aws_secret_key
    }

    if _file_util.exists(state_path, aws_credentials):
        raise RuntimeError('Path %s already exists, choose a different path as state path' % state_path)

    # Warn if specified bucket is in different region than specified in env.
    s3_bucket_name, _ = _file_util.parse_s3_path(state_path)

    if license_file_path:
        license_file_path = _os.path.abspath(_os.path.expanduser(license_file_path))
    else:
        license_file_path = _os.path.join(_os.path.expanduser("~"), ".graphlab", "config")
    try:
        _file_util.upload_to_s3(license_file_path, state_path + "/license",
                                aws_credentials = aws_credentials, silent = True)
        region = _file_util.get_s3_bucket_region(s3_bucket_name, aws_credentials)
    except _S3ResponseError as e:
        _logger.error("Unable to connect to state_path's bucket; check your AWS credentials")
        raise

    if region != ec2_config.region:
        _logger.warn("The bucket in your state path is in a different region " \
                     "(%s) than the one specified in your environment (%s). " \
                     "AWS data transfer rates apply. Additionally, upload and " \
                     "download speeds may be slower than expected. If this is " \
                     "not what you intended, abort this operation or " \
                     "terminate the service upon its completion, then be sure " \
                     "that your environment and S3 bucket are in the same " \
                     "region before relaunching." % (region, ec2_config.region))

    conn = _lb_connection(ec2_config.region, **aws_credentials)
    for lb in conn.get_all_load_balancers():
        if lb.name == name:
            raise IOError('There is already a load balancer with that name. Load balancer names' \
                              ' must be unique in their region. Please choose a different name.')

    _logger.info("Launching Predictive Service with %d hosts, as specified by num_hosts parameter"
                 % (num_hosts))

    # Set defaults values, if needed
    if not api_key:
        api_key = str(_random_uuid())
    if not admin_key:
        admin_key = str(_random_uuid())

    # get product key
    product_key = _Ec2PredictiveServiceEnvironment._get_product_key(
                                            license_file_path, aws_credentials)

    result = None
    env = None
    try:
        starttime = _datetime.datetime.now()
        _logger.info("Launching Predictive Service, with name: %s" % name)

        _logger.info("[Step 0/5]: Initializing S3 locations.")

        # Check for invalid names and port
        _check_name_and_port(name, port)

        # Launch the environment.
        env = _Ec2PredictiveServiceEnvironment.launch(name, ec2_config, state_path, num_hosts,
                                                      ssl_credentials, aws_credentials, started=starttime,
                                                      admin_port=port, admin_ui_port=admin_ui_port,
                                                      product_key=product_key, scheme=scheme)

        # Create initial state file to save
        state = _generate_init_state_file(name, env, description, api_key,
                                          admin_key, aws_credentials,
                                          cors_origin, 'enabled',
                                          SystemConfig())

        # Save initial state file with environment
        _save_initial_state(env, state, state_path)

        # create endpoint
        endpoint = 'https://' if ssl_credentials else 'http://'
        endpoint = endpoint + env.load_balancer_dns_name + ":" + str(port)

        _logger.info("[Step 4/5]: Waiting for Load Balancer to put all instances into service.")
        while ((_datetime.datetime.now() - starttime).total_seconds() < _MAX_CREATE_TIMEOUT_SECS):
            # query status, verify all InService
            nodes = env._get_load_balancer_status()
            statuses = []
            for node in nodes:
                statuses.append(node['state'] == 'InService')
            if all(statuses):
                _logger.info("Cluster is fully operational, [%d/%d] instances currently in service." %
                        (statuses.count(True), len(statuses)))
                break
            else:
                _logger.info("Cluster not fully operational yet, [%d/%d] instances currently in service." %
                        (statuses.count(True), len(statuses)))
                _time.sleep(15)
        else:
            _logger.error("Instances failed to be ready within 10 minutes. Tearing down.")
            raise RuntimeError("Cluster configuration not successful in time, timing out.")

        _logger.info("[Step 5/5]: Finalizing Configuration.")

    except Exception as e:
        # any exceptions we should gracefully terminate / tear down what we've created
        _logger.warning("Tearing down Predictive Service due to error launching, %s" % e)

        # default behavior deletes the log files in tear down.
        # To preserve the logs set GRAPHLAB_DEBUG in environment, and the logs will be preserved
        delete_logs = True
        if 'GRAPHLAB_DEBUG' in _os.environ:
            _logger.info("Preserving Log Files for debugging purposes")
            delete_logs = False

        if env:
            env.terminate(delete_logs)

        if result and delete_logs:
            _logger.info('Deleting model data.')
            try:
                _file_util.s3_recursive_delete(state_path, aws_credentials)
            except:
                _logger.error("Could not delete model data. Please manually delete data under: %s" %
                              state_path)

        raise


    # Create the predictive service object.
    ps = _PredictiveService(endpoint, admin_key)

    if not config_file:
        config_file = _os.path.join(
            _os.getcwd(),
            name+'.conf')

    generate_config(config_file, ps)
    _logger.info("Created config file at %s", config_file)
    _logger.info("Contents are:\n%s", open(config_file).read())

    return ps