def List(self, buckets): """See base class.""" stdout, _, _ = vm_util.IssueCommand( ['aws', 's3', 'ls', buckets, '--region', self.region]) return stdout
def testNoTimeout(self): _, _, retcode = vm_util.IssueCommand(['sleep', '2s'], timeout=None) self.assertEqual(retcode, 0)
def _AddTag(self, key, value): cmd = self.cmd_prefix + [ 'emr', 'add-tags', '--resource-id', self.cluster_id, '--tag', '{}={}'.format(key, value) ] vm_util.IssueCommand(cmd)
def Copy(self, src_url, dst_url): """See base class.""" vm_util.IssueCommand(['gsutil', 'cp', src_url, dst_url])
def List(self, buckets): """See base class.""" stdout, _, _ = vm_util.IssueCommand(['gsutil', 'ls', buckets]) return stdout
def _Delete(self): vm_util.IssueCommand( [azure.AZURE_PATH, 'group', 'delete', '--quiet', self.name])
def SubmitJob(self, jarfile=None, classname=None, pyspark_file=None, query_file=None, job_poll_interval=5, job_stdout_file=None, job_arguments=None, job_files=None, job_jars=None, job_type=None, properties=None): """See base class.""" assert job_type # Create job definition job_name = f'{self.cluster_id}-{self._job_counter}' self._job_counter += 1 glue_command = {} glue_default_args = {} if job_type == self.PYSPARK_JOB_TYPE: glue_command = { 'Name': 'glueetl', 'ScriptLocation': self._glue_script_wrapper_url, } all_properties = self.GetJobProperties() if properties: all_properties.update(properties) glue_default_args = { '--extra-py-files': pyspark_file, **all_properties } else: raise ValueError(f'Unsupported job type {job_type} for AWS Glue.') vm_util.IssueCommand(self.cmd_prefix + [ 'glue', 'create-job', '--name', job_name, '--role', self.role, '--command', json.dumps(glue_command), '--default-arguments', json.dumps(glue_default_args), '--glue-version', self.dpb_version, '--number-of-workers', str(self.spec.worker_count), '--worker-type', self.spec.worker_group.vm_spec.machine_type, ]) # Run job definition stdout, _, _ = vm_util.IssueCommand(self.cmd_prefix + [ 'glue', 'start-job-run', '--job-name', job_name, '--arguments', json.dumps({ '--pkb_main': _ModuleFromPyFilename(pyspark_file), '--pkb_args': json.dumps(job_arguments) }) ]) job_run_id = json.loads(stdout)['JobRunId'] return self._WaitForJob((job_name, job_run_id), GLUE_TIMEOUT, job_poll_interval)
def EmptyBucket(self, bucket): vm_util.IssueCommand([ 'aws', 's3', 'rm', 's3://%s' % bucket, '--region', self.region, '--recursive' ])
def Prepare(benchmark_spec): """Installs and sets up dataset on the Spark clusters. Copies scripts and all the queries to cloud. Creates external Hive tables for data (unless BigQuery is being used). Args: benchmark_spec: The benchmark specification """ dpb_service_instance = benchmark_spec.dpb_service # buckets must start with a letter bucket = 'pkb-' + benchmark_spec.uuid.split('-')[0] storage_service = dpb_service_instance.storage_service storage_service.MakeBucket(bucket) benchmark_spec.base_dir = dpb_service_instance.PERSISTENT_FS_PREFIX + bucket temp_run_dir = temp_dir.GetRunDirPath() spark_sql_perf_dir = os.path.join(temp_run_dir, 'spark_sql_perf_dir') vm_util.IssueCommand( ['git', 'clone', SPARK_SQL_PERF_GIT, spark_sql_perf_dir]) vm_util.IssueCommand(['git', 'checkout', SPARK_SQL_PERF_GIT_COMMIT], cwd=spark_sql_perf_dir) benchmark_spec.queries = [] query_dir = os.path.join(spark_sql_perf_dir, 'src', 'main', 'resources', FLAGS.dpb_sparksql_query) for dir_name, _, files in os.walk(query_dir): for filename in files: match = re.match(r'q?([0-9]+)a?.sql', filename) if match: query_id = match.group(1) # if order is specified only upload those queries if not FLAGS.dpb_sparksql_order or query_id in FLAGS.dpb_sparksql_order: benchmark_spec.queries.append(query_id) query = '{}.sql'.format(query_id) src_url = os.path.join(dir_name, filename) storage_service.CopyToBucket(src_url, bucket, query) if not benchmark_spec.queries: raise errors.Benchmarks.PrepareException('No queries were staged') for script in [SPARK_TABLE_SCRIPT, SPARK_SQL_RUNNER_SCRIPT]: src_url = data.ResourcePath(script) storage_service.CopyToBucket(src_url, bucket, script) benchmark_spec.table_subdirs = [] if FLAGS.dpb_sparksql_data: table_dir = FLAGS.dpb_sparksql_data.rstrip('/') + '/' stdout = storage_service.List(table_dir) for line in stdout.split('\n'): # GCS will sometimes list the directory itself. if line and line != table_dir: benchmark_spec.table_subdirs.append( re.split(' |/', line.rstrip('/')).pop()) # Create external Hive tables if FLAGS.dpb_sparksql_create_hive_tables: try: result = dpb_service_instance.SubmitJob( pyspark_file=os.path.join(benchmark_spec.base_dir, SPARK_TABLE_SCRIPT), job_type=BaseDpbService.PYSPARK_JOB_TYPE, job_arguments=[ FLAGS.dpb_sparksql_data, ','.join(benchmark_spec.table_subdirs) ]) logging.info(result) except dpb_service.JobSubmissionError as e: raise errors.Benchmarks.PrepareException( 'Creating tables from {}/* failed'.format( FLAGS.dpb_sparksql_data)) from e
def MakeBucket(self, bucket_name): vm_util.IssueCommand([ 'aws', 's3', 'mb', 's3://%s' % bucket_name, '--region=%s' % self.region ])
def DeleteBucket(self, bucket): vm_util.IssueCommand([ 'aws', 's3', 'rb', 's3://%s' % bucket, '--region', self.region, '--force' ]) # --force deletes even if bucket contains objects.
def _Delete(self): cmd = self.cmd_prefix + [ 'ec2', 'delete-security-group', '--group-id=' + self.group_id ] vm_util.IssueCommand(cmd)
def CreateBucket(self, source_bucket): mb_cmd = self.cmd_prefix + ['s3', 'mb', source_bucket] stdout, _, _ = vm_util.IssueCommand(mb_cmd)
def _Create(self): """Creates the cluster.""" name = 'pkb_' + FLAGS.run_uri # Set up ebs details if disk_spec is present int he config ebs_configuration = None if self.spec.worker_group.disk_spec: # Make sure nothing we are ignoring is included in the disk spec assert self.spec.worker_group.disk_spec.device_path is None assert self.spec.worker_group.disk_spec.disk_number is None assert self.spec.worker_group.disk_spec.mount_point is None assert self.spec.worker_group.disk_spec.iops is None ebs_configuration = { 'EbsBlockDeviceConfigs': [{ 'VolumeSpecification': { 'SizeInGB': self.spec.worker_group.disk_spec.disk_size, 'VolumeType': self.spec.worker_group.disk_spec.disk_type }, 'VolumesPerInstance': self.spec.worker_group.disk_spec.num_striped_disks }] } # Create the specification for the master and the worker nodes instance_groups = [] core_instances = { 'InstanceCount': self.spec.worker_count, 'InstanceGroupType': 'CORE', 'InstanceType': self.spec.worker_group.vm_spec.machine_type } if ebs_configuration: core_instances.update({'EbsConfiguration': ebs_configuration}) master_instance = { 'InstanceCount': 1, 'InstanceGroupType': 'MASTER', 'InstanceType': self.spec.worker_group.vm_spec.machine_type } if ebs_configuration: master_instance.update({'EbsConfiguration': ebs_configuration}) instance_groups.append(core_instances) instance_groups.append(master_instance) # Create the log bucket to hold job's log output logs_bucket = FLAGS.aws_emr_loguri or self._CreateLogBucket() cmd = self.cmd_prefix + [ 'emr', 'create-cluster', '--name', name, '--release-label', self.emr_release_label, '--use-default-roles', '--instance-groups', json.dumps(instance_groups), '--application', 'Name=Spark', 'Name=Hadoop', '--log-uri', logs_bucket ] if self.network: cmd += ['--ec2-attributes', 'SubnetId=' + self.network.subnet.id] stdout, _, _ = vm_util.IssueCommand(cmd) result = json.loads(stdout) self.cluster_id = result['ClusterId'] logging.info('Cluster created with id %s', self.cluster_id) for tag_key, tag_value in util.MakeDefaultTags().items(): self._AddTag(tag_key, tag_value)
def _Create(self): vm_util.IssueCommand( [azure.AZURE_PATH, 'network', 'nsg', 'create', '--location', self.location, self.name] + self.resource_group.args)
def Prepare(self, vm): """Prepares the DB and everything for the AWS-RDS provider. Args: vm: The VM to be used as the test client. """ logging.info('Preparing MySQL Service benchmarks for RDS.') # TODO: Refactor the RDS DB instance creation and deletion logic out # to a new class called RDSDBInstance that Inherits from # perfkitbenchmarker.resource.BaseResource. # And do the same for GCP. # First is to create another subnet in the same VPC as the VM but in a # different zone. RDS requires two subnets in two different zones to create # a DB instance, EVEN IF you do not specify multi-AZ in your DB creation # request. # Get a list of zones and pick one that's different from the zone VM is in. new_subnet_zone = None get_zones_cmd = util.AWS_PREFIX + [ 'ec2', 'describe-availability-zones' ] stdout, _, _ = vm_util.IssueCommand(get_zones_cmd) response = json.loads(stdout) all_zones = response['AvailabilityZones'] for zone in all_zones: if zone['ZoneName'] != vm.zone: new_subnet_zone = zone['ZoneName'] break if new_subnet_zone is None: raise DBStatusQueryError( 'Cannot find a zone to create the required ' 'second subnet for the DB instance.') # Now create a new subnet in the zone that's different from where the VM is logging.info('Creating a second subnet in zone %s', new_subnet_zone) new_subnet = aws_network.AwsSubnet(new_subnet_zone, vm.network.vpc.id, '10.0.1.0/24') new_subnet.Create() logging.info('Successfully created a new subnet, subnet id is: %s', new_subnet.id) # Remember this so we can cleanup properly. vm.extra_subnet_for_db = new_subnet # Now we can create a new DB subnet group that has two subnets in it. db_subnet_group_name = 'pkb%s' % FLAGS.run_uri create_db_subnet_group_cmd = util.AWS_PREFIX + [ 'rds', 'create-db-subnet-group', '--db-subnet-group-name', db_subnet_group_name, '--db-subnet-group-description', 'pkb_subnet_group_for_db', '--subnet-ids', vm.network.subnet.id, new_subnet.id ] stdout, stderr, _ = vm_util.IssueCommand(create_db_subnet_group_cmd) logging.info( 'Created a DB subnet group, stdout is:\n%s\nstderr is:\n%s', stdout, stderr) vm.db_subnet_group_name = db_subnet_group_name # open up tcp port 3306 in the VPC's security group, we need that to connect # to the DB. open_port_cmd = util.AWS_PREFIX + [ 'ec2', 'authorize-security-group-ingress', '--group-id', vm.group_id, '--source-group', vm.group_id, '--protocol', 'tcp', '--port', MYSQL_PORT ] stdout, stderr, _ = vm_util.IssueCommand(open_port_cmd) logging.info('Granted DB port ingress, stdout is:\n%s\nstderr is:\n%s', stdout, stderr) # Finally, it's time to create the DB instance! vm.db_instance_id = 'pkb-DB-%s' % FLAGS.run_uri db_class = \ RDS_CORE_TO_DB_CLASS_MAP['%s' % FLAGS.mysql_svc_db_instance_cores] vm.db_instance_master_user = MYSQL_ROOT_USER vm.db_instance_master_password = _GenerateRandomPassword() create_db_cmd = util.AWS_PREFIX + [ 'rds', 'create-db-instance', '--db-instance-identifier', vm.db_instance_id, '--db-instance-class', db_class, '--engine', RDS_DB_ENGINE, '--engine-version', RDS_DB_ENGINE_VERSION, '--storage-type', RDS_DB_STORAGE_TYPE_GP2, '--allocated-storage', RDS_DB_STORAGE_GP2_SIZE, '--vpc-security-group-ids', vm.group_id, '--master-username', vm.db_instance_master_user, '--master-user-password', vm.db_instance_master_password, '--availability-zone', vm.zone, '--db-subnet-group-name', vm.db_subnet_group_name ] status_query_cmd = util.AWS_PREFIX + [ 'rds', 'describe-db-instances', '--db-instance-id', vm.db_instance_id ] stdout, stderr, _ = vm_util.IssueCommand(create_db_cmd) logging.info( 'Request to create the DB has been issued, stdout:\n%s\n' 'stderr:%s\n', stdout, stderr) response = json.loads(stdout) db_creation_status = _RDSParseDBInstanceStatus(response) for status_query_count in xrange(1, DB_STATUS_QUERY_LIMIT + 1): if db_creation_status == 'available': break if db_creation_status not in RDS_DB_CREATION_PENDING_STATUS: raise DBStatusQueryError( 'Invalid status in DB creation response. ' ' stdout is\n%s, stderr is\n%s' % (stdout, stderr)) logging.info( 'Querying db creation status, current state is %s, query ' 'count is %d', db_creation_status, status_query_count) time.sleep(DB_STATUS_QUERY_INTERVAL) stdout, stderr, _ = vm_util.IssueCommand(status_query_cmd) response = json.loads(stdout) db_creation_status = _RDSParseDBInstanceStatus(response) else: raise DBStatusQueryError( 'DB creation timed-out, we have ' 'waited at least %s * %s seconds.' % (DB_STATUS_QUERY_INTERVAL, DB_STATUS_QUERY_LIMIT)) # We are good now, db has been created. Now get the endpoint address. # On RDS, you always connect with a DNS name, if you do that from a EC2 VM, # that DNS name will be resolved to an internal IP address of the DB. if 'DBInstance' in response: vm.db_instance_address = response['DBInstance']['Endpoint'][ 'Address'] else: if 'DBInstances' in response: vm.db_instance_address = \ response['DBInstances'][0]['Endpoint']['Address'] logging.info('Successfully created an RDS DB instance. Address is %s', vm.db_instance_address) logging.info('Complete output is:\n %s', response)
def _Exists(self): _, _, retcode = vm_util.IssueCommand( [azure.AZURE_PATH, 'resource', 'list', self.name], suppress_warning=True) return retcode == 0
def Cleanup(self, vm): """Clean up RDS instances, cleanup the extra subnet created for the creation of the RDS instance. Args: vm: The VM that was used as the test client, which also stores states for clean-up. """ # Now, we can delete the DB instance. vm.db_instance_id is the id to call. # We need to keep querying the status of the deletion here before we let # this go. RDS DB deletion takes some time to finish. And we have to # wait until this DB is deleted before we proceed because this DB holds # references to various other resources: subnet groups, subnets, vpc, etc. delete_db_cmd = util.AWS_PREFIX + [ 'rds', 'delete-db-instance', '--db-instance-identifier', vm.db_instance_id, '--skip-final-snapshot' ] logging.info('Deleting db instance %s...', vm.db_instance_id) # Note below, the status of this deletion command is validated below in the # loop. both stdout and stderr are checked. stdout, stderr, _ = vm_util.IssueCommand(delete_db_cmd) logging.info( 'Request to delete the DB has been issued, stdout:\n%s\n' 'stderr:%s\n', stdout, stderr) status_query_cmd = util.AWS_PREFIX + [ 'rds', 'describe-db-instances', '--db-instance-id', vm.db_instance_id ] db_status = None for status_query_count in xrange(1, DB_STATUS_QUERY_LIMIT + 1): try: response = json.loads(stdout) except ValueError: # stdout cannot be parsed into json, it might simply be empty because # deletion has been completed. break db_status = _RDSParseDBInstanceStatus(response) if db_status == 'deleting': logging.info( 'DB is still in the deleting state, status_query_count ' 'is %d', status_query_count) # Wait for a few seconds and query status time.sleep(DB_STATUS_QUERY_INTERVAL) stdout, stderr, _ = vm_util.IssueCommand(status_query_cmd) else: logging.info( 'DB deletion status is no longer in deleting, it is %s', db_status) break else: logging.warn( 'DB is still in deleting state after long wait, bail.') db_instance_deletion_failed = False if db_status == 'deleted' or re.findall('DBInstanceNotFound', stderr): # Sometimes we get a 'deleted' status from DB status query command, # but even more times, the DB status query command would fail with # an "not found" error, both are positive confirmation that the DB has # been deleted. logging.info('DB has been successfully deleted, got confirmation.') else: # We did not get a positive confirmation that the DB is deleted even after # long wait, we have to bail. But we will log an error message, and # then raise an exception at the end of this function so this particular # run will show as a failed run to the user and allow them to examine # the logs db_instance_deletion_failed = True logging.error( 'RDS DB instance %s failed to be deleted, we did not get ' 'final confirmation from stderr, which is:\n %s', vm.db_instance_id, stderr) if hasattr(vm, 'db_subnet_group_name'): delete_db_subnet_group_cmd = util.AWS_PREFIX + [ 'rds', 'delete-db-subnet-group', '--db-subnet-group-name', vm.db_subnet_group_name ] stdout, stderr, _ = vm_util.IssueCommand( delete_db_subnet_group_cmd) logging.info( 'Deleted the db subnet group. stdout is:\n%s, stderr: \n%s', stdout, stderr) if hasattr(vm, 'extra_subnet_for_db'): vm.extra_subnet_for_db.Delete() if db_instance_deletion_failed: raise DBStatusQueryError( 'Failed to get confirmation of DB instance ' 'deletion! Check the log for details!')
def RemoteHostCommand(self, command, should_log=False, retries=SSH_RETRIES, ignore_failure=False, login_shell=False, suppress_warning=False, timeout=None): """Runs a command on the VM. This is guaranteed to run on the host VM, whereas RemoteCommand might run within i.e. a container in the host VM. Args: command: A valid bash command. should_log: A boolean indicating whether the command result should be logged at the info level. Even if it is false, the results will still be logged at the debug level. retries: The maximum number of times RemoteCommand should retry SSHing when it receives a 255 return code. ignore_failure: Ignore any failure if set to true. login_shell: Run command in a login shell. suppress_warning: Suppress the result logging from IssueCommand when the return code is non-zero. Returns: A tuple of stdout and stderr from running the command. Raises: RemoteCommandError: If there was a problem establishing the connection. """ if vm_util.RunningOnWindows(): # Multi-line commands passed to ssh won't work on Windows unless the # newlines are escaped. command = command.replace('\n', '\\n') user_host = '%s@%s' % (self.user_name, self.ip_address) ssh_cmd = ['ssh', '-A', '-p', str(self.ssh_port), user_host] ssh_cmd.extend(vm_util.GetSshOptions(self.ssh_private_key)) try: if login_shell: ssh_cmd.extend(['-t', '-t', 'bash -l -c "%s"' % command]) self._pseudo_tty_lock.acquire() else: ssh_cmd.append(command) for _ in range(retries): stdout, stderr, retcode = vm_util.IssueCommand( ssh_cmd, force_info_log=should_log, suppress_warning=suppress_warning, timeout=timeout) if retcode != 255: # Retry on 255 because this indicates an SSH failure break finally: if login_shell: self._pseudo_tty_lock.release() if retcode: full_cmd = ' '.join(ssh_cmd) error_text = ('Got non-zero return code (%s) executing %s\n' 'Full command: %s\nSTDOUT: %sSTDERR: %s' % (retcode, command, full_cmd, stdout, stderr)) if not ignore_failure: raise errors.VirtualMachine.RemoteCommandError(error_text) return stdout, stderr
def Prepare(self, vm): """Prepares the DB and everything for the provider GCP (Cloud SQL). Args: vm: The VM to be used as the test client """ # TODO: Refactor the GCP Cloud SQL instance creation and deletion logic out # to a new class called GCPCloudSQLInstance that Inherits from # perfkitbenchmarker.resource.BaseResource. logging.info( 'Preparing MySQL Service benchmarks for Google Cloud SQL.') vm.db_instance_name = 'pkb%s' % FLAGS.run_uri db_tier = 'db-n1-standard-%s' % FLAGS.mysql_svc_db_instance_cores # Currently, we create DB instance in the same zone as the test VM. db_instance_zone = vm.zone # Currently GCP REQUIRES you to connect to the DB instance via external IP # (i.e., using external IPs of the DB instance AND the VM instance). authorized_network = '%s/32' % vm.ip_address create_db_cmd = [ FLAGS.gcloud_path, 'sql', 'instances', 'create', vm.db_instance_name, '--quiet', '--format=json', '--async', '--activation-policy=ALWAYS', '--assign-ip', '--authorized-networks=%s' % authorized_network, '--backup-start-time=%s' % DEFAULT_BACKUP_START_TIME, '--enable-bin-log', '--tier=%s' % db_tier, '--gce-zone=%s' % db_instance_zone, '--database-version=%s' % GCP_MY_SQL_VERSION, '--pricing-plan=%s' % GCP_PRICING_PLAN ] stdout, _, _ = vm_util.IssueCommand(create_db_cmd) response = json.loads(stdout) if response['operation'] is None or response[ 'operationType'] != 'CREATE': raise DBStatusQueryError('Invalid operation or unrecognized ' 'operationType in DB creation response. ' ' stdout is %s' % stdout) status_query_cmd = [ FLAGS.gcloud_path, 'sql', 'instances', 'describe', vm.db_instance_name, '--format', 'json' ] stdout, _, _ = vm_util.IssueCommand(status_query_cmd) response = json.loads(stdout) query_count = 1 while True: state = response['state'] if state is None: raise ValueError( 'Cannot parse response from status query command. ' 'The state is missing. stdout is %s' % stdout) if state == 'RUNNABLE': break else: if query_count > DB_STATUS_QUERY_LIMIT: raise DBStatusQueryError( 'DB creation timed-out, we have ' 'waited at least %s * %s seconds.' % (DB_STATUS_QUERY_INTERVAL, DB_STATUS_QUERY_LIMIT)) logging.info( 'Querying db creation status, current state is %s, query ' 'count is %d', state, query_count) time.sleep(DB_STATUS_QUERY_INTERVAL) stdout, _, _ = vm_util.IssueCommand(status_query_cmd) response = json.loads(stdout) query_count += 1 logging.info( 'Successfully created the DB instance. Complete response is ' '%s', response) vm.db_instance_address = response['ipAddresses'][0]['ipAddress'] logging.info('DB IP address is: %s', vm.db_instance_address) # Set the root password to a common one that can be referred to in common # code across providers. vm.db_instance_master_user = MYSQL_ROOT_USER vm.db_instance_master_password = _GenerateRandomPassword() set_password_cmd = [ FLAGS.gcloud_path, 'sql', 'instances', 'set-root-password', vm.db_instance_name, '--password', vm.db_instance_master_password ] stdout, stderr, _ = vm_util.IssueCommand(set_password_cmd) logging.info('Set root password completed. Stdout:\n%s\nStderr:\n%s', stdout, stderr)
def EmptyBucket(self, bucket): # Ignore failures here and retry in DeleteBucket. See more comments there. vm_util.IssueCommand( ['gsutil', '-m', 'rm', '-r', 'gs://%s/*' % bucket], raise_on_failure=False)
def _Create(self): """Creates the AWS RDS instance. Raises: Exception: if unknown how to create self.spec.engine. """ if self.spec.engine in _RDS_ENGINES: instance_identifier = self.instance_id self.all_instance_ids.append(instance_identifier) cmd = util.AWS_PREFIX + [ 'rds', 'create-db-instance', '--db-instance-identifier=%s' % instance_identifier, '--engine=%s' % self.spec.engine, '--master-username=%s' % self.spec.database_username, '--master-user-password=%s' % self.spec.database_password, '--allocated-storage=%s' % self.spec.disk_spec.disk_size, '--storage-type=%s' % self.spec.disk_spec.disk_type, '--db-instance-class=%s' % self.spec.vm_spec.machine_type, '--no-auto-minor-version-upgrade', '--region=%s' % self.region, '--engine-version=%s' % self.spec.engine_version, '--db-subnet-group-name=%s' % self.db_subnet_group_name, '--vpc-security-group-ids=%s' % self.security_group_id, '--availability-zone=%s' % self.spec.vm_spec.zone, '--tags' ] + util.MakeFormattedDefaultTags() if self.spec.disk_spec.disk_type == aws_disk.IO1: cmd.append('--iops=%s' % self.spec.disk_spec.iops) # TODO(ferneyhough): add backup_enabled and backup_window vm_util.IssueCommand(cmd) elif self.spec.engine in _AURORA_ENGINES: zones_needed_for_high_availability = len(self.zones) > 1 if zones_needed_for_high_availability != self.spec.high_availability: raise Exception( 'When managed_db_high_availability is true, multiple ' 'zones must be specified. When ' 'managed_db_high_availability is false, one zone ' 'should be specified. ' 'managed_db_high_availability: {0} ' 'zone count: {1} '.format( zones_needed_for_high_availability, len(self.zones))) cluster_identifier = 'pkb-db-cluster-' + FLAGS.run_uri # Create the cluster. cmd = util.AWS_PREFIX + [ 'rds', 'create-db-cluster', '--db-cluster-identifier=%s' % cluster_identifier, '--engine=%s' % self.spec.engine, '--engine-version=%s' % self.spec.engine_version, '--master-username=%s' % self.spec.database_username, '--master-user-password=%s' % self.spec.database_password, '--region=%s' % self.region, '--db-subnet-group-name=%s' % self.db_subnet_group_name, '--vpc-security-group-ids=%s' % self.security_group_id, '--availability-zones=%s' % self.spec.zones[0], '--tags' ] + util.MakeFormattedDefaultTags() self.cluster_id = cluster_identifier vm_util.IssueCommand(cmd) for zone in self.zones: # The first instance is assumed to be writer - # and so use the instance_id for that id. if zone == self.zones[0]: instance_identifier = self.instance_id else: instance_identifier = self.instance_id + '-' + zone self.all_instance_ids.append(instance_identifier) cmd = util.AWS_PREFIX + [ 'rds', 'create-db-instance', '--db-instance-identifier=%s' % instance_identifier, '--db-cluster-identifier=%s' % cluster_identifier, '--engine=%s' % self.spec.engine, '--engine-version=%s' % self.spec.engine_version, '--no-auto-minor-version-upgrade', '--db-instance-class=%s' % self.spec.vm_spec.machine_type, '--region=%s' % self.region, '--availability-zone=%s' % zone, '--tags' ] + util.MakeFormattedDefaultTags() vm_util.IssueCommand(cmd) else: raise Exception( 'Unknown how to create AWS data base engine {0}'.format( self.spec.engine))
def CopyToBucket(self, src_path, bucket, object_path): """See base class.""" dst_url = self.MakeRemoteCliDownloadUrl(bucket, object_path) vm_util.IssueCommand(['gsutil', 'cp', src_path, dst_url])
def GetDefaultUser(): """Get the default project.""" cmd = [FLAGS.gcloud_path, 'config', 'list', '--format=json'] stdout, _, _ = vm_util.IssueCommand(cmd) result = json.loads(stdout) return result['core']['account']
def testTimeoutReached(self): _, _, retcode = vm_util.IssueCommand(['sleep', '2s'], timeout=1) self.assertEqual(retcode, -9)
def _Create(self): """Creates the AWS RDS instance. Raises: Exception: if unknown how to create self.spec.engine. """ if (self.spec.engine == managed_relational_db.MYSQL or self.spec.engine == managed_relational_db.POSTGRES): instance_identifier = self.instance_id self.all_instance_ids.append(instance_identifier) cmd = util.AWS_PREFIX + [ 'rds', 'create-db-instance', '--db-instance-identifier=%s' % instance_identifier, '--engine=%s' % self.spec.engine, '--master-username=%s' % self.spec.database_username, '--master-user-password=%s' % self.spec.database_password, '--allocated-storage=%s' % self.spec.disk_spec.disk_size, '--storage-type=%s' % self.spec.disk_spec.disk_type, '--db-instance-class=%s' % self.spec.vm_spec.machine_type, '--no-auto-minor-version-upgrade', '--region=%s' % self.region, '--engine-version=%s' % self.spec.engine_version, '--db-subnet-group-name=%s' % self.db_subnet_group_name, '--vpc-security-group-ids=%s' % self.security_group_id, ] if self.spec.disk_spec.disk_type == aws_disk.IO1: cmd.append('--iops=%s' % self.spec.disk_spec.iops) if self.spec.high_availability: cmd.append('--multi-az') else: cmd.append('--availability-zone=%s' % self.spec.vm_spec.zone) # TODO(ferneyhough): add backup_enabled and backup_window vm_util.IssueCommand(cmd) elif self.spec.engine == managed_relational_db.AURORA_POSTGRES: cluster_identifier = 'pkb-db-cluster-' + FLAGS.run_uri # Create the cluster. cmd = util.AWS_PREFIX + [ 'rds', 'create-db-cluster', '--db-cluster-identifier=%s' % cluster_identifier, '--engine=aurora-postgresql', '--master-username=%s' % self.spec.database_username, '--master-user-password=%s' % self.spec.database_password, '--region=%s' % self.region, '--db-subnet-group-name=%s' % self.db_subnet_group_name, '--vpc-security-group-ids=%s' % self.security_group_id, '--availability-zones=%s' % self.spec.zones[0] ] self.cluster_id = cluster_identifier vm_util.IssueCommand(cmd) for zone in self.zones: # The first instance is assumed to be writer - # and so use the instance_id for that id. if zone == self.zones[0]: instance_identifier = self.instance_id else: instance_identifier = self.instance_id + '-' + zone self.all_instance_ids.append(instance_identifier) cmd = util.AWS_PREFIX + [ 'rds', 'create-db-instance', '--db-instance-identifier=%s' % instance_identifier, '--db-cluster-identifier=%s' % cluster_identifier, '--engine=aurora-postgresql', '--no-auto-minor-version-upgrade', '--db-instance-class=%s' % self.spec.machine_type, '--region=%s' % self.region, '--availability-zone=%s' % zone ] vm_util.IssueCommand(cmd) else: raise Exception( 'Unknown how to create AWS data base engine {0}'.format( self.spec.engine))
def _Create(self): """Creates the cluster.""" name = 'pkb_' + FLAGS.run_uri # Set up ebs details if disk_spec is present in the config ebs_configuration = None if self.spec.worker_group.disk_spec: # Make sure nothing we are ignoring is included in the disk spec assert self.spec.worker_group.disk_spec.device_path is None assert self.spec.worker_group.disk_spec.disk_number is None assert self.spec.worker_group.disk_spec.iops is None self.dpb_hdfs_type = disk_to_hdfs_map[ self.spec.worker_group.disk_spec.disk_type] if self.spec.worker_group.disk_spec.disk_type != disk.LOCAL: ebs_configuration = { 'EbsBlockDeviceConfigs': [{ 'VolumeSpecification': { 'SizeInGB': self.spec.worker_group.disk_spec.disk_size, 'VolumeType': self.spec.worker_group.disk_spec.disk_type }, 'VolumesPerInstance': self.spec.worker_group.disk_count }] } # Create the specification for the master and the worker nodes instance_groups = [] core_instances = { 'InstanceCount': self.spec.worker_count, 'InstanceGroupType': 'CORE', 'InstanceType': self.spec.worker_group.vm_spec.machine_type } if ebs_configuration: core_instances.update({'EbsConfiguration': ebs_configuration}) master_instance = { 'InstanceCount': 1, 'InstanceGroupType': 'MASTER', 'InstanceType': self.spec.worker_group.vm_spec.machine_type } if ebs_configuration: master_instance.update({'EbsConfiguration': ebs_configuration}) instance_groups.append(core_instances) instance_groups.append(master_instance) # Spark SQL needs to access Hive cmd = self.cmd_prefix + [ 'emr', 'create-cluster', '--name', name, '--release-label', self.dpb_version, '--use-default-roles', '--instance-groups', json.dumps(instance_groups), '--application', 'Name=Spark', 'Name=Hadoop', 'Name=Hive', '--log-uri', self.base_dir ] ec2_attributes = [ 'KeyName=' + aws_virtual_machine.AwsKeyFileManager.GetKeyNameForRun(), 'SubnetId=' + self.network.subnet.id, # Place all VMs in default security group for simplicity and speed of # provisioning 'EmrManagedMasterSecurityGroup=' + self.security_group_id, 'EmrManagedSlaveSecurityGroup=' + self.security_group_id, ] cmd += ['--ec2-attributes', ','.join(ec2_attributes)] if FLAGS.dpb_cluster_properties: cmd += ['--configurations', _GetClusterConfiguration()] stdout, _, _ = vm_util.IssueCommand(cmd) result = json.loads(stdout) self.cluster_id = result['ClusterId'] logging.info('Cluster created with id %s', self.cluster_id) for tag_key, tag_value in util.MakeDefaultTags().items(): self._AddTag(tag_key, tag_value)
def _Create(self): vm_util.IssueCommand( [azure.AZURE_PATH, 'network', 'vnet', 'subnet', 'create', '--vnet-name', self.vnet.name, '--address-prefix', self.vnet.address_space, self.name] + self.resource_group.args)
def _Delete(self): if self.cluster_id: delete_cmd = self.cmd_prefix + [ 'emr', 'terminate-clusters', '--cluster-ids', self.cluster_id ] vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
def CopyToBucket(self, src_path, bucket, object_path): """See base class.""" dst_url = self.MakeRemoteCliDownloadUrl(bucket, object_path) vm_util.IssueCommand( ['aws', 's3', 'cp', src_path, dst_url, '--region', self.region])