def _Create(self): """Create an Apache Spark cluster.""" # need to fix this to install spark def InstallHadoop(vm): vm.Install('hadoop') vm_util.RunThreaded( InstallHadoop, self.vms['worker_group'] + self.vms['master_group']) self.leader = self.vms['master_group'][0] hadoop.ConfigureAndStart(self.leader, self.vms['worker_group'])
def _Create(self): """Create an un-managed yarn cluster.""" logging.info('Should have created vms by now.') logging.info(str(self.vms)) def InstallHadoop(vm): vm.Install('hadoop') vm_util.RunThreaded( InstallHadoop, self.vms['worker_group'] + self.vms['master_group']) self.leader = self.vms['master_group'][0] hadoop.ConfigureAndStart(self.leader, self.vms['worker_group'])
def _Create(self): """Create an Apache Spark cluster.""" # need to fix this to install spark def InstallHadoop(vm): vm.Install('hadoop') if 'worker_group' not in self.vms: raise errors.Resource.CreationError( 'PkbSparkService requires worker_group VMs.') vm_util.RunThreaded( InstallHadoop, self.vms['worker_group'] + self.vms['master_group']) self.leader = self.vms['master_group'][0] hadoop.ConfigureAndStart(self.leader, self.vms['worker_group'])
def ConfigureAndStart(leader, workers, configure_s3=False): """Run Spark Standalone and HDFS on a cluster. Args: leader: VM. leader VM - will be the HDFS NameNode, Spark Master. workers: List of VMs. Each VM will run an HDFS DataNode, Spark Worker. configure_s3: Whether to configure Spark to access S3. """ # Start HDFS hadoop.ConfigureAndStart(leader, workers, start_yarn=False) vms = [leader] + workers # If there are no workers set up in pseudo-distributed mode, where the leader # node runs the worker daemons. workers = workers or [leader] fn = functools.partial(_RenderConfig, leader=leader, workers=workers, configure_s3=configure_s3) vm_util.RunThreaded(fn, vms) leader.RemoteCommand( "rm -f {0} && ssh-keygen -q -t rsa -N '' -f {0}".format( SPARK_PRIVATE_KEY)) public_key = leader.RemoteCommand( 'cat {0}.pub'.format(SPARK_PRIVATE_KEY))[0] def AddKey(vm): vm.RemoteCommand( 'echo "{0}" >> ~/.ssh/authorized_keys'.format(public_key)) vm_util.RunThreaded(AddKey, vms) # HDFS setup and formatting, Spark startup leader.RemoteCommand('bash {0}/start-all.sh'.format(SPARK_SBIN), should_log=True) logging.info('Sleeping 10s for Spark nodes to join.') time.sleep(10) logging.info('Checking Spark status.') worker_online_count = _GetOnlineWorkerCount(leader) if worker_online_count != len(workers): raise ValueError('Not all nodes running Spark: {0} < {1}'.format( worker_online_count, len(workers))) else: logging.info('Spark running on all %d workers', len(workers))
def Prepare(benchmark_spec): """Prepare the virtual machines to run hadoop. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. """ by_role = _GetVMsByRole(benchmark_spec.vm_groups) loaders = by_role['clients'] assert loaders, 'No loader VMs: {0}'.format(by_role) # HBase cluster hbase_vms = by_role['hbase_vms'] assert hbase_vms, 'No HBase VMs: {0}'.format(by_role) master = by_role['master'] zk_quorum = by_role['zk_quorum'] assert zk_quorum, 'No zookeeper quorum: {0}'.format(by_role) workers = by_role['workers'] assert workers, 'No workers: {0}'.format(by_role) hbase_install_fns = [ functools.partial(vm.Install, 'hbase') for vm in hbase_vms ] ycsb_install_fns = [ functools.partial(vm.Install, 'ycsb') for vm in loaders ] vm_util.RunThreaded(lambda f: f(), hbase_install_fns + ycsb_install_fns) hadoop.ConfigureAndStart(master, workers, start_yarn=False) hbase.ConfigureAndStart(master, workers, zk_quorum) CreateYCSBTable(master, use_snappy=FLAGS.hbase_use_snappy) # Populate hbase-site.xml on the loaders. master.PullFile(vm_util.GetTempDir(), posixpath.join(hbase.HBASE_CONF_DIR, HBASE_SITE)) def PushHBaseSite(vm): conf_dir = posixpath.join(ycsb.YCSB_DIR, FLAGS.hbase_binding + '-binding', 'conf') vm.RemoteCommand('mkdir -p {}'.format(conf_dir)) vm.PushFile(os.path.join(vm_util.GetTempDir(), HBASE_SITE), posixpath.join(conf_dir, HBASE_SITE)) vm_util.RunThreaded(PushHBaseSite, loaders) benchmark_spec.executor = ycsb.YCSBExecutor(FLAGS.hbase_binding)
def Prepare(benchmark_spec): """Prepare the virtual machines to run hadoop. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. """ master = benchmark_spec.vm_groups['master'][0] workers = benchmark_spec.vm_groups['workers'] vms = benchmark_spec.vms def InstallHadoop(vm): vm.Install('hadoop') vm_util.RunThreaded(InstallHadoop, vms) hadoop.ConfigureAndStart(master, workers)
def _Create(self): """Create an un-managed yarn cluster.""" logging.info('Should have created vms by now.') logging.info(str(self.vms)) def InstallHadoop(vm): vm.Install('hadoop') if self.cloud == 'GCP': hadoop.InstallGcsConnector(vm) if self.cloud == 'AWS': hadoop.InstallS3Connector(vm) vm_util.RunThreaded( InstallHadoop, self.vms['worker_group'] + self.vms['master_group']) self.leader = self.vms['master_group'][0] hadoop.ConfigureAndStart(self.leader, self.vms['worker_group'], configure_s3=self.cloud == 'AWS')
def _Create(self): """Create an un-managed yarn cluster.""" logging.info('Should have created vms by now.') logging.info(str(self.vms)) def InstallHadoop(vm): vm.Install('hadoop') if self.cloud == 'GCP': hadoop.InstallGcsConnector(vm) if 'worker_group' not in self.vms: raise errors.Resource.CreationError( 'UnmanagedDpbServiceYarnCluster requires VMs in a worker_group.' ) vm_util.RunThreaded( InstallHadoop, self.vms['worker_group'] + self.vms['master_group']) self.leader = self.vms['master_group'][0] hadoop.ConfigureAndStart(self.leader, self.vms['worker_group'], configure_s3=self.cloud == 'AWS')