Python runOnAllNodesAsync示例，cluster.runOnAllNodesAsync Python示例

示例#1

0

显示文件

文件： deep.py 项目： totor31/gce-setup-anduse

def restoreDeepEval():
    command = []

    command.append('mkdir -p /mnt/data/tmp/DEEPEVAL')
    command.append('cd /mnt/data/tmp/DEEPEVAL')
    listEval = [
            'DEEPEVAL-default-2016-03-18.tar.gz',
            'DEEPEVAL-fusion1-2016-03-18.tar.gz',
            # 'DEEPEVAL-fusion16cnn-2016-03-18.tar.gz',
            # 'DEEPEVAL-fusion16cnn2-2016-03-18.tar.gz',
            # 'DEEPEVAL-fusion8cnn-2016-03-18.tar.gz',
            'DEEPEVAL-norm51-2016-03-18.tar.gz',
            'DEEPEVAL-norm51Scale1-2016-03-18.tar.gz',
            'DEEPEVAL-norm51Scale2-2016-03-18.tar.gz',
            'DEEPEVAL-norm51Scale3-2016-03-18.tar.gz',
            'DEEPEVAL-ratio-2016-03-18.tar.gz',
            'DEEPEVAL-veac-2016-03-18.tar.gz']

    for l in listEval:
        command.append('gsutil cp gs://deep-store/{0} .'.format(l))
        command.append('tar -xf {0}'.format(l))
        command.append('rm -r {0}'.format(l))

    # command.append('gsutil cp gs://deep-store/DEEPEVAL.tar.gz .')
    # command.append('rm -rf DEEPEVAL/fusion9a')
    # command.append('rsync -la DEEPEVAL/* /mnt/data/tmp/DEEPEVAL/')
    cluster.runOnAllNodesAsync(';'.join(command))

示例#2

0

显示文件

文件： hadoop.py 项目： totor31/gce-setup-anduse

def setupSoftFiles():
    """Copy hdfs soft from common data disk to homedir
    """

    listCommand = []
    listCommand.append('gsutil cp {0} .'.format(bucketstore))
    listCommand.append('tar -xf {0}'.format(os.path.basename(bucketstore)))

    command = ';'.join(listCommand)
    cluster.runOnAllNodesAsync(command)

示例#3

0

显示文件

文件： package.py 项目： totor31/gce-setup-anduse

def setupPostinstall():

    listCommand = []
    listCommand.append('cd {0}'.format(softdir))
    listCommand.append(postinstallscript)
    listCommand.append('ln -fs {0} {1}'.format(pythonbin, spark.pathpython))
    listCommand = ';'.join(listCommand)
    cluster.runOnAllNodesAsync(listCommand)

    addjars()

示例#4

0

显示文件

文件： package.py 项目： totor31/gce-setup-anduse

def restoreFromStore():

    listCommand = []

    listCommand.append('gsutil cp {0} {1}'.format(bucketstore, os.path.dirname(softdir)))
    listCommand.append('cd {0}'.format(os.path.dirname(softdir)))
    listCommand.append('tar -xf {0}'.format(os.path.basename(bucketstore)))

    command = ';'.join(listCommand)

    cluster.runOnAllNodesAsync(command)

示例#5

0

显示文件

文件： hadoop.py 项目： totor31/gce-setup-anduse

def setupBashrc():

    inStartup = utils.getTemplateFile('hdfs-setup_hdfs.sh')
    outStartup = utils.getLocalTempFile('setup_hdfs.sh')

    utils.stringReplaceInFile(
        inStartup,
        outStartup,
        {
            'XXHADOOPLOCALPATHXX': "'{0}'".format(softdir)
        })

    cluster.rsyncOnAllNodesLocalhostToLocalAsync(outStartup, '~/setup_hdfs.sh')

    command = 'echo "source setup_hdfs.sh" >> setup.sh'
    cluster.runOnAllNodesAsync(command)

示例#6

0

显示文件

文件： package.py 项目： totor31/gce-setup-anduse

def setupProfile():

    inStartup = utils.getTemplateFile('package-setup_profile_package.sh')
    outStartup = utils.getLocalTempFile('setup_profile_package.sh')

    utils.stringReplaceInFile(
        inStartup,
        outStartup,
        {
            # 'XX-PYTHON-XX': "'{0}'".format(pythonbin),
            'XX-SOURCESCRIPT-XX': sourcescript
        })

    cluster.rsyncOnAllNodesLocalhostToLocalAsync(
        outStartup, '~/setup_profile_package.sh')

    command = 'echo "source $HOME/setup_profile_package.sh" >> setup_profile.sh'
    cluster.runOnAllNodesAsync(command)

示例#7

0

显示文件

文件： spark.py 项目： totor31/gce-setup-anduse

def setupBashrc():

    inStartup = utils.getTemplateFile('spark-setup_spark.sh')
    outStartup = utils.getLocalTempFile('setup_spark.sh')

    utils.stringReplaceInFile(
        inStartup,
        outStartup,
        {
            'XXSPARKLOCALPATHXX': "'{0}'".format(softdir)
        })

    cluster.rsyncOnAllNodesLocalhostToLocalAsync(
        outStartup, '~/setup_spark.sh')

    command = 'echo "source setup_spark.sh" >> setup.sh'
    cluster.runOnAllNodesAsync(command)

    if useCloudConnector and cluster.getNSlaves() > 0:
        addjars('{0}/lib/gcs-connector-latest-hadoop2.jar'.format(softdir))

示例#8

0

显示文件

文件： spark.py 项目： totor31/gce-setup-anduse

def addjars(jars):

    try:
        jars.split(':')
        jars = [jars]
    except:
        pass

    for j in jars:
        cluster.runOnMaster("echo '' >> setup_spark.sh".format(j))

        addJar = []
        addJar.append('if [[ -z "$SPARKJARS" ]]; then ')
        addJar.append('     export SPARKJARS={0}'.format(j))
        addJar.append('else ')
        addJar.append('     export SPARKJARS={0},$SPARKJARS'.format(j))
        addJar.append('fi')

        c = ["echo '" + a + "' >> setup_spark.sh" for a in addJar]

        print c

        cluster.runOnAllNodesAsync(';'.join(c))

    jars = ':'.join(jars)

    command = []
    command.append(
        'echo "" >> {0}'.format(softdir + '/conf/spark-defaults.conf'))

    command.append('echo "spark.driver.extraClassPath {0}" >> {1}'.format(
        jars, softdir + '/conf/spark-defaults.conf'))
    command.append('echo "spark.executor.extraClassPath {0}" >> {1}'.format(
        jars, softdir + '/conf/spark-defaults.conf'))

    command = ';'.join(command)

    cluster.runOnAllNodesAsync(command)

示例#9

0

显示文件

文件： deep.py 项目： totor31/gce-setup-anduse

def restoreDeepModels():
    """Download and install models from bucket

    """

    command = []
    command.append('mkdir /mnt/data/tmp/DEEPSTORE/')
    command.append('rm -r /mnt/data/tmp/DEEPSTORE/*')
    command.append('cd /mnt/data/tmp/')
    command.append('mkdir download')
    command.append('cd download')
    listStore = [
            'deepStore-default-2016-03-18.tar.gz',
            'deepStore-fusion1-2016-03-18.tar.gz',
            'deepStore-fusion16cnn-2016-03-18.tar.gz',
            'deepStore-fusion16cnn2-2016-03-18.tar.gz',
            'deepStore-fusion8cnn-2016-03-18.tar.gz',
            'deepStore-fusion9a-2016-03-18.tar.gz',
            'deepStore-norm51-2016-03-18.tar.gz',
            'deepStore-norm51Scale1-2016-03-18.tar.gz',
            'deepStore-norm51Scale2-2016-03-18.tar.gz',
            'deepStore-norm51Scale3-2016-03-18.tar.gz',
            'deepStore-ratio-2016-03-18.tar.gz',
            'deepStore-veac-2016-03-18.tar.gz']

    for l in listStore:
        command.append('gsutil cp gs://deep-store/{0} .'.format(l))
        command.append('tar -zxvf {0}'.format(l))
        command.append('rm {0}'.format(l))

    command.append('gsutil cp gs://deep-store/deepStore* .')
    command.append('mv * /mnt/data/tmp/DEEPSTORE/')

    command = ';'.join(command)

    cluster.runOnAllNodesAsync(command)

示例#10

0

显示文件

文件： hadoop.py 项目： totor31/gce-setup-anduse

def setupConfigurationFiles():
    """Deploy hadoop"""

    (listnodes, nodes) = cluster.instanceListAll()
    mastername = listnodes['master'][0]

    inCoreSite = utils.getTemplateFile('hdfs-core-site.xml')
    outCoreSite = utils.getLocalTempFile('core-site.xml')

    inHdfsSite = utils.getTemplateFile('hdfs-hdfs-site.xml')
    outHdfsSite = utils.getLocalTempFile('hdfs-site.xml')

    outSlave = utils.getLocalTempFile('slaves')

    print '[ Configuring Hadoop ]'

    utils.stringReplaceInFile(
        inCoreSite,
        outCoreSite,
        {
            'PUT-MASTER-IP': mastername,
            'XX-PROJECTID-XX': utils.getProjectProperties()['Project']
        })

    pathnamenode = os.path.join(datadir, 'namenode')
    pathdatanode = os.path.join(datadir, 'datanode')

    utils.stringReplaceInFile(
        inHdfsSite,
        outHdfsSite,
        {
            'XXREPLICATIONXX': '3',
            'XXNAMENODEXX': pathnamenode,
            'XXDATANODEXX': pathdatanode,
        })

    with open(outSlave, 'w') as streamOut:
        for namenode in listnodes['slaves']:
            streamOut.write(namenode + '\n')

    cluster.rsyncOnAllNodesLocalhostToLocalAsync(
        outCoreSite, softdir + '/etc/hadoop/core-site.xml')
    cluster.rsyncOnAllNodesLocalhostToLocalAsync(
        outHdfsSite, softdir + '/etc/hadoop/hdfs-site.xml')
    cluster.rsyncOnAllNodesLocalhostToLocalAsync(
        outSlave, softdir + '/etc/hadoop/slaves')

    if useCloudConnector:
        cluster.runOnAllNodesAsync('gsutil cp {0} .'.format(bucketconnector))
        # cluster.runOnAllNodesAsync('wget https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar')
        cluster.runOnAllNodesAsync(
            'cp gcs-connector-latest-hadoop2.jar {0}/share/hadoop/common/'.format(softdir))

    listCommand = []
    listCommand.append("mkdir -p {0}".format(pathnamenode))
    listCommand.append("mkdir -p {0}".format(pathdatanode))
    listCommand.append("chmod -R a+rwx {0}".format(pathnamenode))
    listCommand.append("chmod -R a+rwx {0}".format(pathdatanode))

    command = ';'.join(listCommand)

    cluster.runOnAllNodesAsync(command)

示例#11

0

显示文件

文件： expe.py 项目： totor31/gce-setup-anduse

def additionPython():
    cluster.runOnMasterX("pip install ipdb")
    cluster.runOnAllNodesAsync('ln -s /mnt/data/package/softs/pypilok/irt')

示例#12

0

显示文件

文件： deep.py 项目： totor31/gce-setup-anduse

def additionPython():
    cluster.runOnAllNodesAsync('ln -s /mnt/data/package/softs/pypilok/irt')
    cluster.runOnAllNodesAsync('ln -s /mnt/data/package/softs/pypilok/ads')

示例#13

0

显示文件

文件： deep.py 项目： totor31/gce-setup-anduse

def syncEvalFromMaster():

    command = 'rsync -la cluster-data-master:/mnt/data/tmp/DEEPEVAL /mnt/data/tmp/'

    cluster.runOnAllNodesAsync(command)

示例#14

0

显示文件

文件： spark.py 项目： totor31/gce-setup-anduse

def setupConfigurationFiles():
    """Deploy spark configuration files"""

    (listnodes, nodes) = cluster.instanceListAll()

    # We create here a fake python link.
    # This python is used as the main spark driver
    # If we need to change the spark python driver,
    # we just have to overwrite this link.

    cluster.runOnAllNodesAsync('ln -fs `which python` {0}'.format(pathpython))

    # -------------------------------------------
    # handling of slaves
    # -------------------------------------------
    # The slave file contains information
    # about which hosts have to be used
    outSlave = utils.getLocalTempFile('slaves')

    with open(outSlave, 'w') as streamOut:
        for namenode in listnodes['slaves']:
            streamOut.write(namenode + '\n')

    # -------------------------------------------
    # handling of spark configuration
    # -------------------------------------------

    if cluster.getNSlaves() > 0:
        sparkMaster = 'spark://{0}:7077'.format(cluster.getMasterName())
    else:
        sparkMaster = 'local[{0}]'.format((cluster.nCores - 1))

    inConf = utils.getTemplateFile('spark-spark-defaults.conf')
    outConf = utils.getLocalTempFile('spark-defaults.conf')

    maxSlaves = int(cluster.nCores - 1)

    utils.stringReplaceInFile(
        inConf,
        outConf,
        {
            'XX-DRIVER-MEM-XX': mastermemory,
            'XX-EXECUTOR-MEM-XX': executormemory,
            'XX-SPARKMASTER-XX': sparkMaster,
            'XX-LOCAL-DIR-XX': localtempdir,
            'XX-CORES-XX': '{0}'.format(maxSlaves)
        })

    inEnv = utils.getTemplateFile('spark-spark-env.sh')
    outEnv = utils.getLocalTempFile('spark-env.sh')

    utils.stringReplaceInFile(
        inEnv,
        outEnv,
        {
            'XX-PYSPARK_PYTHON-XX': '"{0}"'.format(pathpython),
            'XX-SPARKMASTER-XX': sparkMaster,
            'XX-PYSPARK_DRIVER_PYTHON-XX': '"{0}"'.format(pathpython),
            'XX-PYTHONPATH-XX': '"{0}"'.format(pythonpath),
            'XX-LOCAL-DIR-XX': localtempdir,
            'XX-MASTER-IP-XX': '"{0}"'.format(cluster.ipGetMaster())
        })

    cluster.rsyncOnAllNodesLocalhostToLocalAsync(
        outSlave, softdir + '/conf/slaves')
    cluster.rsyncOnAllNodesLocalhostToLocalAsync(
        outConf, softdir + '/conf/spark-defaults.conf')
    cluster.rsyncOnAllNodesLocalhostToLocalAsync(
        outEnv, softdir + '/conf/spark-env.sh')
    cluster.runOnAllNodesAsync('mkdir -p /tmp/spark-events')

    # we remove info level display from spark...
    cluster.runOnMaster(
        'sed -i "s/log4j.rootCategory=INFO/log4j.rootCategory=WARN/g" {0}/conf/log4j.properties.template'.format(softdir))
    cluster.runOnMaster(
        'cp {0}/conf/log4j.properties.template {0}/conf/log4j.properties'.format(softdir))

    # --------------------------------
    # handling of connector
    # --------------------------------
    # We install here the hadoop connector for google cloud storage
    # This connector permits writing data on google cs directly
    # from spark

    if useCloudConnector and cluster.getNSlaves() > 0:
        cluster.runOnAllNodesAsync('gsutil cp {0} .'.format(bucketconnector))

        # cluster.runOnAllNodesAsync('wget https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar')
        cluster.runOnAllNodesAsync(
            'cp gcs-connector-latest-hadoop2.jar {0}/lib/'.format(softdir))
        cluster.runOnAllNodesAsync(
            'cp {0}/etc/hadoop/core-site.xml {1}/conf/'.format(hadoop.softdir, softdir))

    # ------------------------------------------
    # Deployment of spark overloading scripts
    # ------------------------------------------
    #
    # One problem with spark in standalone mode, is that
    # we have to use the client mode.
    # with the client mode, we cannot use the spark default conf
    # for setting additional jars at launch.
    #
    # We therefore use two scripts, one for spark-submit, one for pyspark
    # for overloading the calls.
    #
    # These scripts tests for the existence of jar variables
    # and make the call accordingly

    inPyspark = utils.getTemplateFile('pyspark-jars')
    cluster.rsyncOnAllNodesLocalhostToLocalAsync(inPyspark, 'pyspark-jars')

    inSubmit = utils.getTemplateFile('spark-submit-jars')
    cluster.rsyncOnAllNodesLocalhostToLocalAsync(inSubmit, 'spark-submit-jars')