Пример #1
0
    def _Create(self):
        """Create an Apache Spark cluster."""

        # need to fix this to install spark
        def InstallHadoop(vm):
            vm.Install('hadoop')

        vm_util.RunThreaded(
            InstallHadoop, self.vms['worker_group'] + self.vms['master_group'])
        self.leader = self.vms['master_group'][0]
        hadoop.ConfigureAndStart(self.leader, self.vms['worker_group'])
    def _Create(self):
        """Create an un-managed yarn cluster."""
        logging.info('Should have created vms by now.')
        logging.info(str(self.vms))

        def InstallHadoop(vm):
            vm.Install('hadoop')

        vm_util.RunThreaded(
            InstallHadoop, self.vms['worker_group'] + self.vms['master_group'])
        self.leader = self.vms['master_group'][0]
        hadoop.ConfigureAndStart(self.leader, self.vms['worker_group'])
Пример #3
0
    def _Create(self):
        """Create an Apache Spark cluster."""

        # need to fix this to install spark
        def InstallHadoop(vm):
            vm.Install('hadoop')

        if 'worker_group' not in self.vms:
            raise errors.Resource.CreationError(
                'PkbSparkService requires worker_group VMs.')
        vm_util.RunThreaded(
            InstallHadoop, self.vms['worker_group'] + self.vms['master_group'])
        self.leader = self.vms['master_group'][0]
        hadoop.ConfigureAndStart(self.leader, self.vms['worker_group'])
Пример #4
0
def ConfigureAndStart(leader, workers, configure_s3=False):
    """Run Spark Standalone and HDFS on a cluster.

  Args:
    leader: VM. leader VM - will be the HDFS NameNode, Spark Master.
    workers: List of VMs. Each VM will run an HDFS DataNode, Spark Worker.
    configure_s3: Whether to configure Spark to access S3.
  """
    # Start HDFS
    hadoop.ConfigureAndStart(leader, workers, start_yarn=False)

    vms = [leader] + workers
    # If there are no workers set up in pseudo-distributed mode, where the leader
    # node runs the worker daemons.
    workers = workers or [leader]
    fn = functools.partial(_RenderConfig,
                           leader=leader,
                           workers=workers,
                           configure_s3=configure_s3)
    vm_util.RunThreaded(fn, vms)

    leader.RemoteCommand(
        "rm -f {0} && ssh-keygen -q -t rsa -N '' -f {0}".format(
            SPARK_PRIVATE_KEY))

    public_key = leader.RemoteCommand(
        'cat {0}.pub'.format(SPARK_PRIVATE_KEY))[0]

    def AddKey(vm):
        vm.RemoteCommand(
            'echo "{0}" >> ~/.ssh/authorized_keys'.format(public_key))

    vm_util.RunThreaded(AddKey, vms)

    # HDFS setup and formatting, Spark startup
    leader.RemoteCommand('bash {0}/start-all.sh'.format(SPARK_SBIN),
                         should_log=True)

    logging.info('Sleeping 10s for Spark nodes to join.')
    time.sleep(10)

    logging.info('Checking Spark status.')
    worker_online_count = _GetOnlineWorkerCount(leader)
    if worker_online_count != len(workers):
        raise ValueError('Not all nodes running Spark: {0} < {1}'.format(
            worker_online_count, len(workers)))
    else:
        logging.info('Spark running on all %d workers', len(workers))
def Prepare(benchmark_spec):
    """Prepare the virtual machines to run hadoop.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.
  """
    by_role = _GetVMsByRole(benchmark_spec.vm_groups)

    loaders = by_role['clients']
    assert loaders, 'No loader VMs: {0}'.format(by_role)

    # HBase cluster
    hbase_vms = by_role['hbase_vms']
    assert hbase_vms, 'No HBase VMs: {0}'.format(by_role)
    master = by_role['master']
    zk_quorum = by_role['zk_quorum']
    assert zk_quorum, 'No zookeeper quorum: {0}'.format(by_role)
    workers = by_role['workers']
    assert workers, 'No workers: {0}'.format(by_role)

    hbase_install_fns = [
        functools.partial(vm.Install, 'hbase') for vm in hbase_vms
    ]
    ycsb_install_fns = [
        functools.partial(vm.Install, 'ycsb') for vm in loaders
    ]

    vm_util.RunThreaded(lambda f: f(), hbase_install_fns + ycsb_install_fns)

    hadoop.ConfigureAndStart(master, workers, start_yarn=False)
    hbase.ConfigureAndStart(master, workers, zk_quorum)

    CreateYCSBTable(master, use_snappy=FLAGS.hbase_use_snappy)

    # Populate hbase-site.xml on the loaders.
    master.PullFile(vm_util.GetTempDir(),
                    posixpath.join(hbase.HBASE_CONF_DIR, HBASE_SITE))

    def PushHBaseSite(vm):
        conf_dir = posixpath.join(ycsb.YCSB_DIR,
                                  FLAGS.hbase_binding + '-binding', 'conf')
        vm.RemoteCommand('mkdir -p {}'.format(conf_dir))
        vm.PushFile(os.path.join(vm_util.GetTempDir(), HBASE_SITE),
                    posixpath.join(conf_dir, HBASE_SITE))

    vm_util.RunThreaded(PushHBaseSite, loaders)
    benchmark_spec.executor = ycsb.YCSBExecutor(FLAGS.hbase_binding)
def Prepare(benchmark_spec):
    """Prepare the virtual machines to run hadoop.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
        required to run the benchmark.
  """
    master = benchmark_spec.vm_groups['master'][0]
    workers = benchmark_spec.vm_groups['workers']
    vms = benchmark_spec.vms

    def InstallHadoop(vm):
        vm.Install('hadoop')

    vm_util.RunThreaded(InstallHadoop, vms)
    hadoop.ConfigureAndStart(master, workers)
Пример #7
0
    def _Create(self):
        """Create an un-managed yarn cluster."""
        logging.info('Should have created vms by now.')
        logging.info(str(self.vms))

        def InstallHadoop(vm):
            vm.Install('hadoop')
            if self.cloud == 'GCP':
                hadoop.InstallGcsConnector(vm)
            if self.cloud == 'AWS':
                hadoop.InstallS3Connector(vm)

        vm_util.RunThreaded(
            InstallHadoop, self.vms['worker_group'] + self.vms['master_group'])
        self.leader = self.vms['master_group'][0]
        hadoop.ConfigureAndStart(self.leader,
                                 self.vms['worker_group'],
                                 configure_s3=self.cloud == 'AWS')
Пример #8
0
    def _Create(self):
        """Create an un-managed yarn cluster."""
        logging.info('Should have created vms by now.')
        logging.info(str(self.vms))

        def InstallHadoop(vm):
            vm.Install('hadoop')
            if self.cloud == 'GCP':
                hadoop.InstallGcsConnector(vm)

        if 'worker_group' not in self.vms:
            raise errors.Resource.CreationError(
                'UnmanagedDpbServiceYarnCluster requires VMs in a worker_group.'
            )
        vm_util.RunThreaded(
            InstallHadoop, self.vms['worker_group'] + self.vms['master_group'])
        self.leader = self.vms['master_group'][0]
        hadoop.ConfigureAndStart(self.leader,
                                 self.vms['worker_group'],
                                 configure_s3=self.cloud == 'AWS')