Exemplos de connect_emr em Python, exemplos de boto.connect_emr em Python

Exemplo n.º 1

0

Exibir arquivo

    def dumpEMRClusters(self):
        '''
      Method to dump EMR clusters info.
    '''

        try:
            if self.botoprfl[0] != "default":
                conn = boto.connect_emr(profile_name=self.botoprfl)
            else:
                conn = boto.connect_emr()
            if conn:
                print("\n<Start of EMR clusters>\n")
                print(" Jobflows: %s" % conn.describe_jobflows())
                self.opygenericroutines.prntLogErrWarnInfo('',
                                                           'info',
                                                           bresume=True)
                for c in conn.list_clusters().clusters:
                    ec = " %s" % c
                    self.opygenericroutines.prntLogErrWarnInfo(str(ec),
                                                               'info',
                                                               bresume=True)
                self.opygenericroutines.prntLogErrWarnInfo('',
                                                           'info',
                                                           bresume=True)
                print("\n<End of EMR clusters>\n")
        except Exception, e:
            serr = (
                '%s :: dumpEMRClusters(...) : connect_emr,list_clusters(...).clusters, '
                '%s' % (self.sclsnme, str(e)))
            prntErrWarnInfo(serr, bresume=True)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_emr.py Projeto: kumar-abhishek/moto

def test_create_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name="My wordcount example",
        mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter.py",
        reducer="aggregate",
        input="s3n://elasticmapreduce/samples/wordcount/input",
        output="s3n://output_bucket/output/wordcount_output",
    )

    job_id = conn.run_jobflow(name="My jobflow", log_uri="s3://some_bucket/jobflow_logs", steps=[step1])

    instance_group = InstanceGroup(6, "TASK", "c1.medium", "SPOT", "spot-0.07", "0.07")
    instance_group = conn.add_instance_groups(job_id, [instance_group])
    instance_group_id = instance_group.instancegroupids
    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(6)
    instance_group = job_flow.instancegroups[0]
    instance_group.instancegroupid.should.equal(instance_group_id)
    int(instance_group.instancerunningcount).should.equal(6)
    instance_group.instancerole.should.equal("TASK")
    instance_group.instancetype.should.equal("c1.medium")
    instance_group.market.should.equal("SPOT")
    instance_group.name.should.equal("spot-0.07")
    instance_group.bidprice.should.equal("0.07")

Exemplo n.º 3

0

Exibir arquivo

Arquivo: models.py Projeto: neverknowsbest/cleancloud

	def get_elapsed_time_emr(job, emrid):
		"""Get elapsed time for EMR job with job flow id emrid, based on EMR job information."""
		emr = boto.connect_emr()
		jobflow = emr.describe_jobflow(emrid)
		emr.close()
	
		try:
			steps = [s for s in jobflow.steps if int(s.name.split("-")[1]) == job.id]
		except IndexError:
			try:
				stepcount = -2 if jobflow.steps[-1].name == "SimpleJoin" else -1
				starttime = datetime.datetime.strptime(jobflow.steps[stepcount].creationdatetime, '%Y-%m-%dT%H:%M:%SZ')
			except AttributeError as e:
				starttime = datetime.datetime.strptime(jobflow.startdatetime, '%Y-%m-%dT%H:%M:%SZ')
			except:
				starttime = datetime.datetime.strptime(jobflow.steps[-1].creationdatetime, '%Y-%m-%dT%H:%M:%SZ')
	
			try:
				endtime = datetime.datetime.strptime(jobflow.steps[-1].enddatetime, '%Y-%m-%dT%H:%M:%SZ')
			except AttributeError:
				endtime = datetime.datetime.today()	
			except:
				endtime = datetime.datetime.strptime(jobflow.steps[-1].enddatetime, '%Y-%m-%dT%H:%M:%SZ')
		else:
			starttime = datetime.datetime.strptime(steps[0].creationdatetime, '%Y-%m-%dT%H:%M:%SZ')
			endtime = datetime.datetime.strptime(steps[-1].enddatetime, '%Y-%m-%dT%H:%M:%SZ')

		return (endtime-starttime)

Exemplo n.º 4

0

Exibir arquivo

def test_bootstrap_actions():
    bootstrap_actions = [
        BootstrapAction(
            name="bs1",
            path="path/to/script",
            bootstrap_action_args=["arg1", "arg2&arg3"],
        ),
        BootstrapAction(name="bs2",
                        path="path/to/anotherscript",
                        bootstrap_action_args=[]),
    ]

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(bootstrap_actions=bootstrap_actions,
                                  **run_jobflow_args)

    jf = conn.describe_jobflow(cluster_id)
    for x, y in zip(jf.bootstrapactions, bootstrap_actions):
        x.name.should.equal(y.name)
        x.path.should.equal(y.path)
        list(o.value for o in x.args).should.equal(y.args())

    resp = conn.list_bootstrap_actions(cluster_id)
    for i, y in enumerate(bootstrap_actions):
        x = resp.actions[i]
        x.name.should.equal(y.name)
        x.scriptpath.should.equal(y.path)
        list(arg.value for arg in x.args).should.equal(y.args())

Exemplo n.º 5

0

Exibir arquivo

def test_run_jobflow_with_visible_to_all_users():
    conn = boto.connect_emr()
    for expected in (True, False):
        job_id = conn.run_jobflow(visible_to_all_users=expected,
                                  **run_jobflow_args)
        job_flow = conn.describe_jobflow(job_id)
        job_flow.visibletoallusers.should.equal(str(expected).lower())

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_emr.py Projeto: botify-labs/moto

def test_bootstrap_actions():
    bootstrap_actions = [
        BootstrapAction(
            name='bs1',
            path='path/to/script',
            bootstrap_action_args=['arg1', 'arg2&arg3']),
        BootstrapAction(
            name='bs2',
            path='path/to/anotherscript',
            bootstrap_action_args=[])
    ]

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(
        bootstrap_actions=bootstrap_actions,
        **run_jobflow_args
    )

    jf = conn.describe_jobflow(cluster_id)
    for x, y in zip(jf.bootstrapactions, bootstrap_actions):
        x.name.should.equal(y.name)
        x.path.should.equal(y.path)
        list(o.value for o in x.args).should.equal(y.args())

    resp = conn.list_bootstrap_actions(cluster_id)
    for i, y in enumerate(bootstrap_actions):
        x = resp.actions[i]
        x.name.should.equal(y.name)
        x.scriptpath.should.equal(y.path)
        list(arg.value for arg in x.args).should.equal(y.args())

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_emr.py Projeto: joshp123/moto

def test_create_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output')

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[step1],
    )

    instance_group = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07',
                                   '0.07')
    instance_group = conn.add_instance_groups(job_id, [instance_group])
    instance_group_id = instance_group.instancegroupids
    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(6)
    instance_group = job_flow.instancegroups[0]
    instance_group.instancegroupid.should.equal(instance_group_id)
    int(instance_group.instancerunningcount).should.equal(6)
    instance_group.instancerole.should.equal('TASK')
    instance_group.instancetype.should.equal('c1.medium')
    instance_group.market.should.equal('SPOT')
    instance_group.name.should.equal('spot-0.07')
    instance_group.bidprice.should.equal('0.07')

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_emr.py Projeto: crosswise/moto

def test_create_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output'
    )

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[step1],
    )

    instance_group = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')
    instance_group = conn.add_instance_groups(job_id, [instance_group])
    instance_group_id = instance_group.instancegroupids
    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(6)
    instance_group = job_flow.instancegroups[0]
    instance_group.instancegroupid.should.equal(instance_group_id)
    int(instance_group.instancerunningcount).should.equal(6)
    instance_group.instancerole.should.equal('TASK')
    instance_group.instancetype.should.equal('c1.medium')
    instance_group.market.should.equal('SPOT')
    instance_group.name.should.equal('spot-0.07')
    instance_group.bidprice.should.equal('0.07')

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_emr.py Projeto: kumar-abhishek/moto

def test_modify_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name="My wordcount example",
        mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter.py",
        reducer="aggregate",
        input="s3n://elasticmapreduce/samples/wordcount/input",
        output="s3n://output_bucket/output/wordcount_output",
    )

    job_id = conn.run_jobflow(name="My jobflow", log_uri="s3://some_bucket/jobflow_logs", steps=[step1])

    instance_group1 = InstanceGroup(6, "TASK", "c1.medium", "SPOT", "spot-0.07", "0.07")
    instance_group2 = InstanceGroup(6, "TASK", "c1.medium", "SPOT", "spot-0.07", "0.07")
    instance_group = conn.add_instance_groups(job_id, [instance_group1, instance_group2])
    instance_group_ids = instance_group.instancegroupids.split(",")

    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(12)
    instance_group = job_flow.instancegroups[0]
    int(instance_group.instancerunningcount).should.equal(6)

    conn.modify_instance_groups(instance_group_ids, [2, 3])

    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(5)
    instance_group1 = [group for group in job_flow.instancegroups if group.instancegroupid == instance_group_ids[0]][0]
    int(instance_group1.instancerunningcount).should.equal(2)
    instance_group2 = [group for group in job_flow.instancegroups if group.instancegroupid == instance_group_ids[1]][0]
    int(instance_group2.instancerunningcount).should.equal(3)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_emr.py Projeto: joshp123/moto

def test_add_steps_to_flow():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output')

    job_id = conn.run_jobflow(name='My jobflow',
                              log_uri='s3://some_bucket/jobflow_logs',
                              steps=[step1])

    job_flow = conn.describe_jobflow(job_id)
    job_flow.state.should.equal('STARTING')
    job_flow.jobflowid.should.equal(job_id)
    job_flow.name.should.equal('My jobflow')
    job_flow.loguri.should.equal('s3://some_bucket/jobflow_logs')

    step2 = StreamingStep(
        name='My wordcount example2',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input2',
        output='s3n://output_bucket/output/wordcount_output2')

    conn.add_jobflow_steps(job_id, [step2])

    job_flow = conn.describe_jobflow(job_id)
    job_step = job_flow.steps[0]
    job_step.name.should.equal('My wordcount example')
    job_step.state.should.equal('STARTING')
    args = [arg.value for arg in job_step.args]
    args.should.equal([
        '-mapper',
        's3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        '-reducer',
        'aggregate',
        '-input',
        's3n://elasticmapreduce/samples/wordcount/input',
        '-output',
        's3n://output_bucket/output/wordcount_output',
    ])

    job_step2 = job_flow.steps[1]
    job_step2.name.should.equal('My wordcount example2')
    job_step2.state.should.equal('PENDING')
    args = [arg.value for arg in job_step2.args]
    args.should.equal([
        '-mapper',
        's3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
        '-reducer',
        'aggregate',
        '-input',
        's3n://elasticmapreduce/samples/wordcount/input2',
        '-output',
        's3n://output_bucket/output/wordcount_output2',
    ])

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_emr.py Projeto: kumar-abhishek/moto

def test_terminate_job_flow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(name="My jobflow", log_uri="s3://some_bucket/jobflow_logs", steps=[])

    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("STARTING")
    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("TERMINATED")

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_emr.py Projeto: botify-labs/moto

def test_run_jobflow_with_visible_to_all_users():
    conn = boto.connect_emr()
    for expected in (True, False):
        job_id = conn.run_jobflow(
            visible_to_all_users=expected,
            **run_jobflow_args
        )
        job_flow = conn.describe_jobflow(job_id)
        job_flow.visibletoallusers.should.equal(str(expected).lower())

Exemplo n.º 13

0

Exibir arquivo

def test_terminate_jobflow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(**run_jobflow_args)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("WAITING")

    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("TERMINATED")

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_emr.py Projeto: rocky4570/moto

def test_terminate_jobflow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(**run_jobflow_args)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("WAITING")

    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("TERMINATED")

Exemplo n.º 15

0

Exibir arquivo

def run_tests(things, tests):
    if len(tests) == 0:
        raise Exception("no tests")
    oldNum = len(tests)
    tests = fix_suites(tests)
    print("tests expanded from %d to %d" % (oldNum, len(tests)))

    print("things:%s\ntests:%s\n" % (things, tests))

    emr = boto.connect_emr(settings.emr_id, settings.emr_key)

    def http(path):
        return "http://%s.s3.amazonaws.com/%s" % (settings.emr_bucket, path)

    run_s3_path = "emr/%s/%s/%s/" % (os.getenv("USER"), os.getenv(
        "HOST"), datetime.datetime.today().strftime("%Y%m%d-%H%M"))

    run_s3_root = "s3n://%s/%s/" % (settings.emr_bucket, run_s3_path)

    out = run_s3_root + "out"
    logs = run_s3_root + "logs"

    jar = "s3n://%s/%s" % (settings.emr_bucket, things[2])
    step_args = [http(things[0]), http(things[1]), out, ",".join(tests)]

    step = boto.emr.step.JarStep("emr main", jar=jar, step_args=step_args)
    print("jar:%s\nargs:%s" % (jar, step_args))

    setup = boto.emr.BootstrapAction(
        "setup", "s3n://%s/%s" % (settings.emr_bucket, things[3]), [])

    jobid = emr.run_jobflow(name="Mongo EMR for %s from %s" %
                            (os.getenv("USER"), os.getenv("HOST")),
                            ec2_keyname="emr1",
                            slave_instance_type="m1.large",
                            ami_version="latest",
                            num_instances=5,
                            log_uri=logs,
                            bootstrap_actions=[setup],
                            steps=[step])

    print("%s jobid: %s" % (datetime.datetime.today(), jobid))

    while (True):
        flow = emr.describe_jobflow(jobid)
        print("%s status: %s" % (datetime.datetime.today(), flow.state))
        if flow.state == "COMPLETED" or flow.state == "FAILED":
            break
        time.sleep(30)

    syncdir = "build/emrout/" + jobid + "/"
    sync_s3(run_s3_path, syncdir)

    final_out = "build/emrout/" + jobid + "/"

    print("output in: " + final_out)
    do_output(final_out)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: emr.py Projeto: 10genReviews/mongo

def run_tests( things , tests ):
    if len(tests) == 0:
        raise Exception( "no tests" )
    oldNum = len(tests)
    tests = fix_suites( tests )
    print( "tests expanded from %d to %d" % ( oldNum , len(tests) ) )
    
    print( "things:%s\ntests:%s\n" % ( things , tests ) )

    emr = boto.connect_emr( settings.emr_id , settings.emr_key )

    def http(path):
        return "http://%s.s3.amazonaws.com/%s" % ( settings.emr_bucket , path )
    
    run_s3_path = "emr/%s/%s/%s/" % ( os.getenv( "USER" ) , 
                                      os.getenv( "HOST" ) , 
                                      datetime.datetime.today().strftime( "%Y%m%d-%H%M" ) )

    run_s3_root = "s3n://%s/%s/" % ( settings.emr_bucket , run_s3_path )

    out = run_s3_root + "out"
    logs = run_s3_root + "logs"

    jar="s3n://%s/%s" % ( settings.emr_bucket , things[2] )
    step_args=[ http(things[0]) , http(things[1]) , out , ",".join(tests) ]
    
    step = boto.emr.step.JarStep( "emr main" , jar=jar,step_args=step_args )
    print( "jar:%s\nargs:%s" % ( jar , step_args ) )

    setup = boto.emr.BootstrapAction( "setup" , "s3n://%s/%s" % ( settings.emr_bucket , things[3] ) , []  )

    jobid = emr.run_jobflow( name = "Mongo EMR for %s from %s" % ( os.getenv( "USER" ) , os.getenv( "HOST" ) ) ,
                             ec2_keyname = "emr1" , 
                             slave_instance_type = "m1.large" ,
                             ami_version = "latest" ,
                             num_instances=5 ,
                             log_uri = logs ,
                             bootstrap_actions = [ setup ] , 
                             steps = [ step ] )

    
    print( "%s jobid: %s" % ( datetime.datetime.today() , jobid ) )

    while ( True ):
        flow = emr.describe_jobflow( jobid )
        print( "%s status: %s" % ( datetime.datetime.today() , flow.state ) )
        if flow.state == "COMPLETED" or flow.state == "FAILED":
            break
        time.sleep(30)

    syncdir = "build/emrout/" + jobid + "/"
    sync_s3( run_s3_path , syncdir )
    
    final_out = "build/emrout/" + jobid + "/" 
    
    print("output in: " + final_out )
    do_output( final_out )

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_emr.py Projeto: joshp123/moto

def test_terminate_job_flow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(name='My jobflow',
                              log_uri='s3://some_bucket/jobflow_logs',
                              steps=[])

    flow = conn.describe_jobflows()[0]
    flow.state.should.equal('STARTING')
    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal('TERMINATED')

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_emr.py Projeto: joshp123/moto

def test_create_job_flow_visible_to_all_users():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
        visible_to_all_users=True,
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('True')

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_emr.py Projeto: crosswise/moto

def test_create_job_flow_visible_to_all_users():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
        visible_to_all_users=True,
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('True')

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_emr.py Projeto: kumar-abhishek/moto

def test_create_job_flow_visible_to_all_users():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name="My jobflow",
        log_uri="s3://some_bucket/jobflow_logs",
        job_flow_role="some-role-arn",
        steps=[],
        visible_to_all_users=True,
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("True")

Exemplo n.º 21

0

Exibir arquivo

Arquivo: test_emr.py Projeto: joshp123/moto

def test_create_job_flow_with_new_params():
    # Test that run_jobflow works with newer params
    conn = boto.connect_emr()

    conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        master_instance_type='m1.medium',
        slave_instance_type='m1.small',
        job_flow_role='some-role-arn',
        steps=[],
    )

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_emr.py Projeto: crosswise/moto

def test_describe_cluster():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )

    cluster = conn.describe_cluster(job_id)
    cluster.name.should.equal("My jobflow")
    cluster.normalizedinstancehours.should.equal('0')
    cluster.status.state.should.equal("RUNNING")

Exemplo n.º 23

0

Exibir arquivo

Arquivo: test_emr.py Projeto: crosswise/moto

def test_create_job_flow_with_new_params():
    # Test that run_jobflow works with newer params
    conn = boto.connect_emr()

    conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        master_instance_type='m1.medium',
        slave_instance_type='m1.small',
        job_flow_role='some-role-arn',
        steps=[],
    )

Exemplo n.º 24

0

Exibir arquivo

def test_describe_cluster():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )

    cluster = conn.describe_cluster(job_id)
    cluster.name.should.equal("My jobflow")
    cluster.normalizedinstancehours.should.equal('0')
    cluster.status.state.should.equal("RUNNING")

Exemplo n.º 25

0

Exibir arquivo

Arquivo: test_emr.py Projeto: crosswise/moto

def test_terminate_job_flow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[]
    )

    flow = conn.describe_jobflows()[0]
    flow.state.should.equal('STARTING')
    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal('TERMINATED')

Exemplo n.º 26

0

Exibir arquivo

Arquivo: test_emr.py Projeto: rocky4570/moto

def test_set_termination_protection():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(**run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("false")

    conn.set_termination_protection(job_id, True)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("true")

    conn.set_termination_protection(job_id, False)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("false")

Exemplo n.º 27

0

Exibir arquivo

Arquivo: status.py Projeto: neverknowsbest/cleancloud

def step_completed(emrid):
	"""Check if EMR job with jobflow id emrid has completed."""
	emr = boto.connect_emr()
	job = emr.describe_jobflow(emrid)
	step = job.steps[-1]
	emr.close()
	
	# print >> sys.stderr, step.state
	
	if step.state == "COMPLETED":
		return True
	else:
		return False

Exemplo n.º 28

0

Exibir arquivo

def test_set_termination_protection():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(**run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("false")

    conn.set_termination_protection(job_id, True)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("true")

    conn.set_termination_protection(job_id, False)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("false")

Exemplo n.º 29

0

Exibir arquivo

Arquivo: test_emr.py Projeto: rocky4570/moto

def test_run_jobflow():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.state.should.equal("WAITING")
    job_flow.jobflowid.should.equal(job_id)
    job_flow.name.should.equal(args["name"])
    job_flow.masterinstancetype.should.equal(args["master_instance_type"])
    job_flow.slaveinstancetype.should.equal(args["slave_instance_type"])
    job_flow.loguri.should.equal(args["log_uri"])
    job_flow.visibletoallusers.should.equal("false")
    int(job_flow.normalizedinstancehours).should.equal(0)
    job_flow.steps.should.have.length_of(0)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: dumpawsinfo.py Projeto: richnusgeeks/devops

  def dumpEMRClusters(self):
    '''
      Method to dump EMR clusters info.
    '''

    try:
      if self.botoprfl[0] != "default":
        conn = boto.connect_emr(profile_name = self.botoprfl)
      else:
        conn = boto.connect_emr()
      if conn:
        print("\n<Start of EMR clusters>\n")
        print(" Jobflows: %s" %conn.describe_jobflows())
        self.opygenericroutines.prntLogErrWarnInfo('', 'info', bresume = True)
        for c in conn.list_clusters().clusters:
          ec = " %s" %c
          self.opygenericroutines.prntLogErrWarnInfo(str(ec), 'info', bresume = True)
        self.opygenericroutines.prntLogErrWarnInfo('', 'info', bresume = True)
        print("\n<End of EMR clusters>\n")
    except Exception, e:
      serr = ('%s :: dumpEMRClusters(...) : connect_emr,list_clusters(...).clusters, '
              '%s' %(self.sclsnme, str(e)))
      prntErrWarnInfo(serr, bresume = True)

Exemplo n.º 31

0

Exibir arquivo

Arquivo: test_emr.py Projeto: tomviner/moto

def test_describe_jobflows():
    conn = boto.connect_emr()
    job1_id = conn.run_jobflow(**run_jobflow_args)
    job2_id = conn.run_jobflow(**run_jobflow_args)

    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(2)

    jobs = conn.describe_jobflows(jobflow_ids=[job2_id])
    jobs.should.have.length_of(1)
    jobs[0].jobflowid.should.equal(job2_id)

    first_job = conn.describe_jobflow(job1_id)
    first_job.jobflowid.should.equal(job1_id)

Exemplo n.º 32

0

Exibir arquivo

Arquivo: status.py Projeto: neverknowsbest/cleancloud

def get_jobflow_status(emr_id):
	"""Get the EMR jobflow state for EMR jobflow id emr_id."""
	conn = boto.connect_emr()

	jobflow = conn.describe_jobflow(emr_id)
	status = jobflow.state
	try:
		details = jobflow.laststatechangereason
		url = "http://%s:9100" % jobflow.masterpublicdnsname
	except AttributeError:
		details = ""
		url = ""

	return status, details, url

Exemplo n.º 33

0

Exibir arquivo

Arquivo: test_emr.py Projeto: rocky4570/moto

def test_tags():
    input_tags = {"tag1": "val1", "tag2": "val2"}

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(**run_jobflow_args)

    conn.add_tags(cluster_id, input_tags)
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(2)
    dict((t.key, t.value) for t in cluster.tags).should.equal(input_tags)

    conn.remove_tags(cluster_id, list(input_tags.keys()))
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(0)

Exemplo n.º 34

0

Exibir arquivo

def test_run_jobflow():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.state.should.equal("WAITING")
    job_flow.jobflowid.should.equal(job_id)
    job_flow.name.should.equal(args["name"])
    job_flow.masterinstancetype.should.equal(args["master_instance_type"])
    job_flow.slaveinstancetype.should.equal(args["slave_instance_type"])
    job_flow.loguri.should.equal(args["log_uri"])
    job_flow.visibletoallusers.should.equal("false")
    int(job_flow.normalizedinstancehours).should.equal(0)
    job_flow.steps.should.have.length_of(0)

Exemplo n.º 35

0

Exibir arquivo

def test_tags():
    input_tags = {"tag1": "val1", "tag2": "val2"}

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(**run_jobflow_args)

    conn.add_tags(cluster_id, input_tags)
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(2)
    dict((t.key, t.value) for t in cluster.tags).should.equal(input_tags)

    conn.remove_tags(cluster_id, list(input_tags.keys()))
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(0)

Exemplo n.º 36

0

Exibir arquivo

def test_describe_jobflows():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    expected = {}

    for idx in range(4):
        cluster_name = "cluster" + str(idx)
        args["name"] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        expected[cluster_id] = {
            "id": cluster_id,
            "name": cluster_name,
            "state": "WAITING",
        }

    # need sleep since it appears the timestamp is always rounded to
    # the nearest second internally
    time.sleep(1)
    timestamp = datetime.now(pytz.utc)
    time.sleep(1)

    for idx in range(4, 6):
        cluster_name = "cluster" + str(idx)
        args["name"] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        conn.terminate_jobflow(cluster_id)
        expected[cluster_id] = {
            "id": cluster_id,
            "name": cluster_name,
            "state": "TERMINATED",
        }
    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(6)

    for cluster_id in expected:
        resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
        resp.should.have.length_of(1)
        resp[0].jobflowid.should.equal(cluster_id)

    resp = conn.describe_jobflows(states=["WAITING"])
    resp.should.have.length_of(4)
    for x in resp:
        x.state.should.equal("WAITING")

    resp = conn.describe_jobflows(created_before=timestamp)
    resp.should.have.length_of(4)

    resp = conn.describe_jobflows(created_after=timestamp)
    resp.should.have.length_of(2)

Exemplo n.º 37

0

Exibir arquivo

Arquivo: test_emr.py Projeto: zapier/moto

def test_describe_jobflows():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    expected = {}

    for idx in range(4):
        cluster_name = 'cluster' + str(idx)
        args['name'] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        expected[cluster_id] = {
            'id': cluster_id,
            'name': cluster_name,
            'state': 'WAITING'
        }

    # need sleep since it appears the timestamp is always rounded to
    # the nearest second internally
    time.sleep(1)
    timestamp = datetime.now(pytz.utc)
    time.sleep(1)

    for idx in range(4, 6):
        cluster_name = 'cluster' + str(idx)
        args['name'] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        conn.terminate_jobflow(cluster_id)
        expected[cluster_id] = {
            'id': cluster_id,
            'name': cluster_name,
            'state': 'TERMINATED'
        }
    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(6)

    for cluster_id, y in expected.items():
        resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
        resp.should.have.length_of(1)
        resp[0].jobflowid.should.equal(cluster_id)

    resp = conn.describe_jobflows(states=['WAITING'])
    resp.should.have.length_of(4)
    for x in resp:
        x.state.should.equal('WAITING')

    resp = conn.describe_jobflows(created_before=timestamp)
    resp.should.have.length_of(4)

    resp = conn.describe_jobflows(created_after=timestamp)
    resp.should.have.length_of(2)

Exemplo n.º 38

0

Exibir arquivo

Arquivo: test_emr.py Projeto: botify-labs/moto

def test_describe_jobflows():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    expected = {}

    for idx in range(4):
        cluster_name = 'cluster' + str(idx)
        args['name'] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        expected[cluster_id] = {
            'id': cluster_id,
            'name': cluster_name,
            'state': 'WAITING'
        }

    # need sleep since it appears the timestamp is always rounded to
    # the nearest second internally
    time.sleep(1)
    timestamp = datetime.now(pytz.utc)
    time.sleep(1)

    for idx in range(4, 6):
        cluster_name = 'cluster' + str(idx)
        args['name'] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        conn.terminate_jobflow(cluster_id)
        expected[cluster_id] = {
            'id': cluster_id,
            'name': cluster_name,
            'state': 'TERMINATED'
        }
    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(6)

    for cluster_id, y in expected.items():
        resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
        resp.should.have.length_of(1)
        resp[0].jobflowid.should.equal(cluster_id)

    resp = conn.describe_jobflows(states=['WAITING'])
    resp.should.have.length_of(4)
    for x in resp:
        x.state.should.equal('WAITING')

    resp = conn.describe_jobflows(created_before=timestamp)
    resp.should.have.length_of(4)

    resp = conn.describe_jobflows(created_after=timestamp)
    resp.should.have.length_of(2)

Exemplo n.º 39

0

Exibir arquivo

Arquivo: test_emr.py Projeto: rocky4570/moto

def test_run_jobflow_with_instance_groups():
    input_groups = dict((g.name, g) for g in input_instance_groups)
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(instance_groups=input_instance_groups, **run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
    int(job_flow.instancecount).should.equal(sum(g.num_instances for g in input_instance_groups))
    for instance_group in job_flow.instancegroups:
        expected = input_groups[instance_group.name]
        instance_group.should.have.property("instancegroupid")
        int(instance_group.instancerunningcount).should.equal(expected.num_instances)
        instance_group.instancerole.should.equal(expected.role)
        instance_group.instancetype.should.equal(expected.type)
        instance_group.market.should.equal(expected.market)
        if hasattr(expected, "bidprice"):
            instance_group.bidprice.should.equal(expected.bidprice)

Exemplo n.º 40

0

Exibir arquivo

Arquivo: test_emr.py Projeto: rocky4570/moto

def test_set_visible_to_all_users():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    args["visible_to_all_users"] = False
    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("false")

    conn.set_visible_to_all_users(job_id, True)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("true")

    conn.set_visible_to_all_users(job_id, False)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("false")

Exemplo n.º 41

0

Exibir arquivo

Arquivo: test_emr.py Projeto: crosswise/moto

def test_list_clusters():
    conn = boto.connect_emr()
    conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )

    summary = conn.list_clusters()
    clusters = summary.clusters
    clusters.should.have.length_of(1)
    cluster = clusters[0]
    cluster.name.should.equal("My jobflow")
    cluster.normalizedinstancehours.should.equal('0')
    cluster.status.state.should.equal("RUNNING")

Exemplo n.º 42

0

Exibir arquivo

def test_list_clusters():
    conn = boto.connect_emr()
    conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )

    summary = conn.list_clusters()
    clusters = summary.clusters
    clusters.should.have.length_of(1)
    cluster = clusters[0]
    cluster.name.should.equal("My jobflow")
    cluster.normalizedinstancehours.should.equal('0')
    cluster.status.state.should.equal("RUNNING")

Exemplo n.º 43

0

Exibir arquivo

Arquivo: util.py Projeto: neverknowsbest/cleancloud

def create_job_flow(steps, job):
	"""Start EMR job"""
	conn = boto.connect_emr()
	
	job_flows = conn.describe_jobflows(['WAITING'])
	for jf in job_flows:
		if int(jf.instancecount) >= int(job.nodes):
			conn.add_jobflow_steps(jf.jobflowid, steps)
			jobid = jf.jobflowid
			break
	else:
		jobid = conn.run_jobflow("nsr web jobflow", log_uri="s3n://nsr-logs", master_instance_type=str(job.node_size), slave_instance_type=str(job.node_size), num_instances=job.nodes, action_on_failure="CONTINUE", steps=steps, keep_alive=True)
		
	conn.close()
	return jobid

Exemplo n.º 44

0

Exibir arquivo

def test_set_visible_to_all_users():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    args["visible_to_all_users"] = False
    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("false")

    conn.set_visible_to_all_users(job_id, True)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("true")

    conn.set_visible_to_all_users(job_id, False)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("false")

Exemplo n.º 45

0

Exibir arquivo

Arquivo: test_emr.py Projeto: tomviner/moto

def test_run_jobflow_with_instance_groups():
    input_groups = dict((g.name, g) for g in input_instance_groups)
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(instance_groups=input_instance_groups,
                              **run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
    int(job_flow.instancecount).should.equal(sum(g.num_instances for g in input_instance_groups))
    for instance_group in job_flow.instancegroups:
        expected = input_groups[instance_group.name]
        instance_group.should.have.property('instancegroupid')
        int(instance_group.instancerunningcount).should.equal(expected.num_instances)
        instance_group.instancerole.should.equal(expected.role)
        instance_group.instancetype.should.equal(expected.type)
        instance_group.market.should.equal(expected.market)
        if hasattr(expected, 'bidprice'):
            instance_group.bidprice.should.equal(expected.bidprice)

Exemplo n.º 46

0

Exibir arquivo

Arquivo: test_emr.py Projeto: crosswise/moto

def test_create_job_flow_with_instance_groups():
    conn = boto.connect_emr()

    instance_groups = [InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07'),
                       InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')]
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
        instance_groups=instance_groups
    )

    job_flow = conn.describe_jobflow(job_id)
    int(job_flow.instancecount).should.equal(12)
    instance_group = job_flow.instancegroups[0]
    int(instance_group.instancerunningcount).should.equal(6)

Exemplo n.º 47

0

Exibir arquivo

def test_describe_job_flows():
    conn = boto.connect_emr()
    job1_id = conn.run_jobflow(name='My jobflow',
                               log_uri='s3://some_bucket/jobflow_logs',
                               steps=[])
    job2_id = conn.run_jobflow(name='My jobflow',
                               log_uri='s3://some_bucket/jobflow_logs',
                               steps=[])

    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(2)

    jobs = conn.describe_jobflows(jobflow_ids=[job2_id])
    jobs.should.have.length_of(1)
    jobs[0].jobflowid.should.equal(job2_id)

    first_job = conn.describe_jobflow(job1_id)
    first_job.jobflowid.should.equal(job1_id)

Exemplo n.º 48

0

Exibir arquivo

Arquivo: util.py Projeto: neverknowsbest/cleancloud

def cancel_job(job):
	"""Cancel job job by terminating the EMR job flow or killing the single machine process."""
	if job.job_type == 'e':
		c = boto.connect_emr()
		c.terminate_jobflow(job.jobflowid)
	else:
		filename = job.get_input_file().name.split('/')[-1]		
		kill_cmd = "pkill -f %s" % filename

		client = paramiko.SSHClient()
		client.load_host_keys('/var/www/known_hosts')
		client.connect('10.203.87.100', 22, 'ec2-user', key_filename='/var/www/nsr-dev.pem')
		stdin, stdout, stderr = client.exec_command(kill_cmd)
		# for line in stdout:
		# 	print line
		# for line in stderr:
		# 	print line
		client.exec_command("echo CANCELLED > ~/status-output/status-s-%s.log" % filename)

Exemplo n.º 49

0

Exibir arquivo

Arquivo: status.py Projeto: neverknowsbest/cleancloud

def get_step_status(emrid):	
	"""Get percentage complete of EMR job with jobflow id emrid.

	This screen scrapes the EMR tracker page, which is available at the job's masterpublicdnsname on port 9100. Accessing to this page is limited to whitelisted IPs, which can be set in the AWS Security Group settings page.
	"""
	emr = boto.connect_emr()
	job = emr.describe_jobflow(emrid)
	url = job.masterpublicdnsname	
	emr.close()
	
	c = httplib.HTTPConnection(url, 9100)
	c.request("GET", "/jobtracker.jsp")
	response = c.getresponse().read().split("\n")
	status_line = response[36]
	
	statuses = map(float, re.findall("<td>([0-9.]*)%<table", status_line))
	# print >> sys.stderr, statuses
	return sum(statuses)/200. * 90

Exemplo n.º 50

0

Exibir arquivo

Arquivo: test_emr.py Projeto: crosswise/moto

def test_set_termination_protection():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[]
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal(u'None')

    conn.set_termination_protection(job_id, True)

    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal('true')

    conn.set_termination_protection(job_id, False)

    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal('false')

Exemplo n.º 51

0

Exibir arquivo

Arquivo: test_emr.py Projeto: joshp123/moto

def test_modify_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output')

    job_id = conn.run_jobflow(name='My jobflow',
                              log_uri='s3://some_bucket/jobflow_logs',
                              steps=[step1])

    instance_group1 = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT',
                                    'spot-0.07', '0.07')
    instance_group2 = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT',
                                    'spot-0.07', '0.07')
    instance_group = conn.add_instance_groups(
        job_id, [instance_group1, instance_group2])
    instance_group_ids = instance_group.instancegroupids.split(",")

    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(12)
    instance_group = job_flow.instancegroups[0]
    int(instance_group.instancerunningcount).should.equal(6)

    conn.modify_instance_groups(instance_group_ids, [2, 3])

    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(5)
    instance_group1 = [
        group for group in job_flow.instancegroups
        if group.instancegroupid == instance_group_ids[0]
    ][0]
    int(instance_group1.instancerunningcount).should.equal(2)
    instance_group2 = [
        group for group in job_flow.instancegroups
        if group.instancegroupid == instance_group_ids[1]
    ][0]
    int(instance_group2.instancerunningcount).should.equal(3)

Exemplo n.º 52

0

Exibir arquivo

Arquivo: test_emr.py Projeto: rocky4570/moto

def test_describe_jobflows():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    expected = {}

    for idx in range(400):
        cluster_name = "cluster" + str(idx)
        args["name"] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        expected[cluster_id] = {"id": cluster_id, "name": cluster_name, "state": "WAITING"}

    # need sleep since it appears the timestamp is always rounded to
    # the nearest second internally
    time.sleep(1)
    timestamp = datetime.now(pytz.utc)
    time.sleep(1)

    for idx in range(400, 600):
        cluster_name = "cluster" + str(idx)
        args["name"] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        conn.terminate_jobflow(cluster_id)
        expected[cluster_id] = {"id": cluster_id, "name": cluster_name, "state": "TERMINATED"}
    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(512)

    for cluster_id, y in expected.items():
        resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
        resp.should.have.length_of(1)
        resp[0].jobflowid.should.equal(cluster_id)

    resp = conn.describe_jobflows(states=["WAITING"])
    resp.should.have.length_of(400)
    for x in resp:
        x.state.should.equal("WAITING")

    resp = conn.describe_jobflows(created_before=timestamp)
    resp.should.have.length_of(400)

    resp = conn.describe_jobflows(created_after=timestamp)
    resp.should.have.length_of(200)

Exemplo n.º 53

0

Exibir arquivo

Arquivo: test_emr.py Projeto: invenia/moto

def test_set_visible_to_all_users():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        job_flow_role='some-role-arn',
        steps=[],
        visible_to_all_users=False,
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('False')

    conn.set_visible_to_all_users(job_id, True)

    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('True')

    conn.set_visible_to_all_users(job_id, False)

    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('False')

Exemplo n.º 54

0

Exibir arquivo

Arquivo: test_emr.py Projeto: tomviner/moto

def test_list_clusters():
    conn = boto.connect_emr()

    args = run_jobflow_args.copy()
    args['name'] = 'jobflow1'
    cluster1_id = conn.run_jobflow(**args)
    args['name'] = 'jobflow2'
    cluster2_id = conn.run_jobflow(**args)
    conn.terminate_jobflow(cluster2_id)

    summary = conn.list_clusters()
    clusters = summary.clusters
    clusters.should.have.length_of(2)

    expected = {
        cluster1_id: {
            'id': cluster1_id,
            'name': 'jobflow1',
            'normalizedinstancehours': 0,
            'state': 'WAITING'},
        cluster2_id: {
            'id': cluster2_id,
            'name': 'jobflow2',
            'normalizedinstancehours': 0,
            'state': 'TERMINATED'},
    }

    for x in clusters:
        y = expected[x.id]
        x.id.should.equal(y['id'])
        x.name.should.equal(y['name'])
        int(x.normalizedinstancehours).should.equal(y['normalizedinstancehours'])
        x.status.state.should.equal(y['state'])
        x.status.timeline.creationdatetime.should.be.a(six.string_types)
        if y['state'] == 'TERMINATED':
            x.status.timeline.enddatetime.should.be.a(six.string_types)
        else:
            x.status.timeline.shouldnt.have.property('enddatetime')
        x.status.timeline.readydatetime.should.be.a(six.string_types)

Exemplo n.º 55

0

Exibir arquivo

def test_cluster_tagging():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )
    cluster_id = job_id
    conn.add_tags(cluster_id, {"tag1": "val1", "tag2": "val2"})

    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(2)
    tags = dict((tag.key, tag.value) for tag in cluster.tags)
    tags['tag1'].should.equal('val1')
    tags['tag2'].should.equal('val2')

    # Remove a tag
    conn.remove_tags(cluster_id, ["tag1"])
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(1)
    tags = dict((tag.key, tag.value) for tag in cluster.tags)
    tags['tag2'].should.equal('val2')

Exemplo n.º 56

0

Exibir arquivo

	if o in ('--spot-bid'):
		params['spot_bid_price']=a
	if o in ('--test'):
		params['test_mode']=True
	
required = ['aws_key','secret','keypair']

for pname in required:
    if not params.get(pname, None):
        print '\nERROR:%s is required' % pname
        usage()

for p, v in params.iteritems():
	print "param:" + `p`+ " value:" + `v`

conn = boto.connect_emr(params['aws_key'],params['secret'])

bootstrap_step1 = BootstrapAction("install_cc", "s3://commoncrawl-public/config64.sh",[params['aws_key'], params['secret']])
bootstrap_step2 = BootstrapAction("configure_hadoop", "s3://elasticmapreduce/bootstrap-actions/configure-hadoop",
	[
	"-m","mapred.tasktracker.map.tasks.maximum=8",
	"-m","mapred.child.java.opts=-XX:ErrorFile=/tmp/hs_err_${mapred.tip.id}.log -Xmx700m -XX:+UseParNewGC -XX:ParallelGCThreads=8 -XX:NewSize=100m -XX:+UseConcMarkSweepGC -XX:+UseTLAB -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycleMin=0 -XX:CMSIncrementalDutyCycle=10"
	])
bootstrap_step3 = BootstrapAction("configure_jobtrackerheap", "s3://elasticmapreduce/bootstrap-actions/configure-daemons",["--jobtracker-heap-size=12096"])

namenode_instance_group = InstanceGroup(1,"MASTER","c1.xlarge","ON_DEMAND","MASTER_GROUP")
core_instance_group = InstanceGroup(params['num_core'],"CORE","c1.xlarge","ON_DEMAND","CORE_GROUP")

instance_groups=[]
if params['num_spot'] <= 0:
	instance_groups=[namenode_instance_group,core_instance_group]

Exemplo n.º 57

0

Exibir arquivo

Arquivo: test_emr.py Projeto: joshp123/moto

def test_create_job_flow():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output')

    step2 = StreamingStep(
        name='My wordcount example2',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input2',
        output='s3n://output_bucket/output/wordcount_output2')

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        master_instance_type='m1.medium',
        slave_instance_type='m1.small',
        steps=[step1, step2],
    )

    job_flow = conn.describe_jobflow(job_id)
    job_flow.state.should.equal('STARTING')
    job_flow.jobflowid.should.equal(job_id)
    job_flow.name.should.equal('My jobflow')
    job_flow.masterinstancetype.should.equal('m1.medium')
    job_flow.slaveinstancetype.should.equal('m1.small')
    job_flow.loguri.should.equal('s3://some_bucket/jobflow_logs')
    job_flow.visibletoallusers.should.equal('False')
    int(job_flow.normalizedinstancehours).should.equal(0)
    job_step = job_flow.steps[0]
    job_step.name.should.equal('My wordcount example')
    job_step.state.should.equal('STARTING')
    args = [arg.value for arg in job_step.args]
    args.should.equal([
        '-mapper',
        's3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        '-reducer',
        'aggregate',
        '-input',
        's3n://elasticmapreduce/samples/wordcount/input',
        '-output',
        's3n://output_bucket/output/wordcount_output',
    ])

    job_step2 = job_flow.steps[1]
    job_step2.name.should.equal('My wordcount example2')
    job_step2.state.should.equal('PENDING')
    args = [arg.value for arg in job_step2.args]
    args.should.equal([
        '-mapper',
        's3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
        '-reducer',
        'aggregate',
        '-input',
        's3n://elasticmapreduce/samples/wordcount/input2',
        '-output',
        's3n://output_bucket/output/wordcount_output2',
    ])

Exemplo n.º 58

0

Exibir arquivo

def test_instance_groups():
    input_groups = dict((g.name, g) for g in input_instance_groups)

    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    for key in [
            "master_instance_type", "slave_instance_type", "num_instances"
    ]:
        del args[key]
    args["instance_groups"] = input_instance_groups[:2]
    job_id = conn.run_jobflow(**args)

    jf = conn.describe_jobflow(job_id)
    base_instance_count = int(jf.instancecount)

    conn.add_instance_groups(job_id, input_instance_groups[2:])

    jf = conn.describe_jobflow(job_id)
    int(jf.instancecount).should.equal(
        sum(g.num_instances for g in input_instance_groups))
    for x in jf.instancegroups:
        y = input_groups[x.name]
        if hasattr(y, "bidprice"):
            x.bidprice.should.equal(y.bidprice)
        x.creationdatetime.should.be.a(str)
        # x.enddatetime.should.be.a(str)
        x.should.have.property("instancegroupid")
        int(x.instancerequestcount).should.equal(y.num_instances)
        x.instancerole.should.equal(y.role)
        int(x.instancerunningcount).should.equal(y.num_instances)
        x.instancetype.should.equal(y.type)
        x.laststatechangereason.should.be.a(str)
        x.market.should.equal(y.market)
        x.name.should.be.a(str)
        x.readydatetime.should.be.a(str)
        x.startdatetime.should.be.a(str)
        x.state.should.equal("RUNNING")

    for x in conn.list_instance_groups(job_id).instancegroups:
        y = input_groups[x.name]
        if hasattr(y, "bidprice"):
            x.bidprice.should.equal(y.bidprice)
        # Configurations
        # EbsBlockDevices
        # EbsOptimized
        x.should.have.property("id")
        x.instancegrouptype.should.equal(y.role)
        x.instancetype.should.equal(y.type)
        x.market.should.equal(y.market)
        x.name.should.equal(y.name)
        int(x.requestedinstancecount).should.equal(y.num_instances)
        int(x.runninginstancecount).should.equal(y.num_instances)
        # ShrinkPolicy
        x.status.state.should.equal("RUNNING")
        x.status.statechangereason.code.should.be.a(str)
        x.status.statechangereason.message.should.be.a(str)
        x.status.timeline.creationdatetime.should.be.a(str)
        # x.status.timeline.enddatetime.should.be.a(str)
        x.status.timeline.readydatetime.should.be.a(str)

    igs = dict((g.name, g) for g in jf.instancegroups)

    conn.modify_instance_groups(
        [igs["task-1"].instancegroupid, igs["task-2"].instancegroupid], [2, 3])
    jf = conn.describe_jobflow(job_id)
    int(jf.instancecount).should.equal(base_instance_count + 5)
    igs = dict((g.name, g) for g in jf.instancegroups)
    int(igs["task-1"].instancerunningcount).should.equal(2)
    int(igs["task-2"].instancerunningcount).should.equal(3)

Exemplo n.º 59

0

Exibir arquivo

Arquivo: sample_emr.py Projeto: frostytear/Presentations

import datetime
import os

import boto
from boto.emr.instance_group import InstanceGroup
from boto.emr.step import InstallPigStep, PigStep


conn = boto.connect_emr()

instance_groups = [
    InstanceGroup(1, 'MASTER', 'm1.small', 'SPOT', '[email protected]', '0.10'),
    InstanceGroup(2, 'CORE', 'm1.small', 'SPOT', '[email protected]', '0.10'),
]

pig_file = 's3://elasticmapreduce/samples/pig-apache/do-reports2.pig'
INPUT = 's3://elasticmapreduce/samples/pig-apache/input/'
OUTPUT = ('s3://org.unencrypted.emr.output/apache_sample/%s' %
          datetime.datetime.utcnow().strftime("%s"))

print """\
Running pig job with settings:

    SCRIPT={script}
    INPUT={input}
    OUPUT={output}
""".format(script=pig_file, input=INPUT, output=OUTPUT)

pig_args = ['-p', 'INPUT=%s' % INPUT,
            '-p', 'OUTPUT=%s' % OUTPUT]

Exemplo n.º 60

0

Exibir arquivo

def test_steps():
    input_steps = [
        StreamingStep(
            name="My wordcount example",
            mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter.py",
            reducer="aggregate",
            input="s3n://elasticmapreduce/samples/wordcount/input",
            output="s3n://output_bucket/output/wordcount_output",
        ),
        StreamingStep(
            name="My wordcount example & co.",
            mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py",
            reducer="aggregate",
            input="s3n://elasticmapreduce/samples/wordcount/input2",
            output="s3n://output_bucket/output/wordcount_output2",
        ),
    ]

    # TODO: implementation and test for cancel_steps

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(steps=[input_steps[0]], **run_jobflow_args)

    jf = conn.describe_jobflow(cluster_id)
    jf.steps.should.have.length_of(1)

    conn.add_jobflow_steps(cluster_id, [input_steps[1]])

    jf = conn.describe_jobflow(cluster_id)
    jf.steps.should.have.length_of(2)
    for step in jf.steps:
        step.actiononfailure.should.equal("TERMINATE_JOB_FLOW")
        list(arg.value for arg in step.args).should.have.length_of(8)
        step.creationdatetime.should.be.a(str)
        # step.enddatetime.should.be.a(str)
        step.jar.should.equal(
            "/home/hadoop/contrib/streaming/hadoop-streaming.jar")
        step.laststatechangereason.should.be.a(str)
        step.mainclass.should.equal("")
        step.name.should.be.a(str)
        # step.readydatetime.should.be.a(str)
        # step.startdatetime.should.be.a(str)
        step.state.should.be.within(["RUNNING", "PENDING"])

    expected = dict((s.name, s) for s in input_steps)

    steps = conn.list_steps(cluster_id).steps
    for x in steps:
        y = expected[x.name]
        # actiononfailure
        list(arg.value for arg in x.config.args).should.equal([
            "-mapper",
            y.mapper,
            "-reducer",
            y.reducer,
            "-input",
            y.input,
            "-output",
            y.output,
        ])
        x.config.jar.should.equal(
            "/home/hadoop/contrib/streaming/hadoop-streaming.jar")
        x.config.mainclass.should.equal("")
        # properties
        x.should.have.property("id").should.be.a(str)
        x.name.should.equal(y.name)
        x.status.state.should.be.within(["RUNNING", "PENDING"])
        # x.status.statechangereason
        x.status.timeline.creationdatetime.should.be.a(str)
        # x.status.timeline.enddatetime.should.be.a(str)
        # x.status.timeline.startdatetime.should.be.a(str)

        x = conn.describe_step(cluster_id, x.id)
        list(arg.value for arg in x.config.args).should.equal([
            "-mapper",
            y.mapper,
            "-reducer",
            y.reducer,
            "-input",
            y.input,
            "-output",
            y.output,
        ])
        x.config.jar.should.equal(
            "/home/hadoop/contrib/streaming/hadoop-streaming.jar")
        x.config.mainclass.should.equal("")
        # properties
        x.should.have.property("id").should.be.a(str)
        x.name.should.equal(y.name)
        x.status.state.should.be.within(["RUNNING", "PENDING"])
        # x.status.statechangereason
        x.status.timeline.creationdatetime.should.be.a(str)
        # x.status.timeline.enddatetime.should.be.a(str)
        # x.status.timeline.startdatetime.should.be.a(str)

    @requires_boto_gte("2.39")
    def test_list_steps_with_states():
        # boto's list_steps prior to 2.39 has a bug that ignores
        # step_states argument.
        steps = conn.list_steps(cluster_id).steps
        step_id = steps[0].id
        steps = conn.list_steps(cluster_id, step_states=["RUNNING"]).steps
        steps.should.have.length_of(1)
        steps[0].id.should.equal(step_id)

    test_list_steps_with_states()