def test_allow_string_cluster(self): spot = "2" withSpotPrice = "WithSpotPrice" cluster = emr.Cluster( 'Cluster', # AdditionalInfo="Additional Info", Applications=[ emr.Application(Name="Hadoop"), emr.Application(Name="Hive"), emr.Application(Name="Mahout"), emr.Application(Name="Pig"), emr.Application(Name="Spark") ], BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='file:/usr/share/aws/emr/scripts/install-hue', Args=["dummy", "parameter"])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }) ], Instances=emr.JobFlowInstancesConfig( Ec2KeyName="KeyName", Ec2SubnetId="SubnetId", MasterInstanceGroup=emr.InstanceGroupConfigProperty( InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("MasterAutoScalingPolicy")), ), CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name="Core Instance", BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")), Market=If(withSpotPrice, "SPOT", "ON_DEMAND"), InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("CoreAutoScalingPolicy"), )), ), JobFlowRole="EMRJobFlowRole", LogUri="s3://cluster-logs", Name="EMR Cluster", ReleaseLabel="emr-5.5.0", ServiceRole="EMRServiceRole", AutoScalingRole="EMR_AutoScaling_DefaultRole", VisibleToAllUsers="true", Tags=Tags(Name="EMR Sample Cluster")) cluster.to_dict()
JobFlowRole=Ref(emr_instance_profile), ServiceRole=Ref(emr_service_role), Instances=emr.JobFlowInstancesConfig( MasterInstanceGroup=emr.InstanceGroupConfigProperty( Name="Master Instance", InstanceCount="1", InstanceType="m3.xlarge", Market="ON_DEMAND"), CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name="Core Instance", BidPrice="20", InstanceCount="1", InstanceType="m3.xlarge", Market="SPOT")), Applications=[ emr.Application(Name="Hadoop"), emr.Application(Name="Hive"), emr.Application(Name="Mahout"), emr.Application(Name="Pig"), emr.Application(Name="Spark") ], VisibleToAllUsers="true", Tags=Tags(Name="EMR Sample Cluster"))) step = template.add_resource( emr.Step('TestStep', Name="TestStep", ActionOnFailure='CONTINUE', HadoopJarStep=emr.HadoopJarStepConfig( Args=["5", "10"], Jar="s3://emr-cfn-test/hadoop-mapreduce-examples-2.6.0.jar",
def test_allow_string_cluster(self): cluster_security_configuration = emr.SecurityConfiguration( 'emrsecurityconfiguration', Name="EMRSecurityConfiguration", SecurityConfiguration=security_configuration) spot = "2" withSpotPrice = "WithSpotPrice" cluster = emr.Cluster( 'Cluster', # AdditionalInfo="Additional Info", Applications=[ emr.Application(Name="Hadoop"), emr.Application(Name="Hive"), emr.Application(Name="Mahout"), emr.Application(Name="Pig"), emr.Application(Name="Spark") ], BootstrapActions=[ emr.BootstrapActionConfig( Name='Dummy bootstrap action', ScriptBootstrapAction=emr.ScriptBootstrapActionConfig( Path='file:/usr/share/aws/emr/scripts/install-hue', Args=["dummy", "parameter"])) ], Configurations=[ emr.Configuration(Classification="core-site", ConfigurationProperties={ 'hadoop.security.groups.cache.secs': '250' }) ], Instances=emr.JobFlowInstancesConfig( Ec2KeyName="KeyName", Ec2SubnetId="SubnetId", MasterInstanceGroup=emr.InstanceGroupConfigProperty( InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("MasterAutoScalingPolicy")), ), CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name="Core Instance", BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")), Market=If(withSpotPrice, "SPOT", "ON_DEMAND"), InstanceCount="1", InstanceType=M4_LARGE, AutoScalingPolicy=emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity="1", MaxCapacity="3"), Rules=self.generate_rules("CoreAutoScalingPolicy"), )), ), JobFlowRole="EMRJobFlowRole", LogUri="s3://cluster-logs", Name="EMR Cluster", ReleaseLabel="emr-5.5.0", SecurityConfiguration=Ref(cluster_security_configuration), ServiceRole="EMRServiceRole", AutoScalingRole="EMR_AutoScaling_DefaultRole", VisibleToAllUsers="true", Tags=Tags(Name="EMR Sample Cluster")) cluster.to_dict() autoscale_policy = emr.AutoScalingPolicy( Constraints=emr.ScalingConstraints(MinCapacity=0, MaxCapacity=5), Rules=[ emr.ScalingRule( Name='ScaleUpContainerPending', Description='Scale up on over-provisioned ' 'containers', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType=emr.CHANGE_IN_CAPACITY, CoolDown=300, ScalingAdjustment=1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='GREATER_THAN', MetricName='ContainerPendingRatio', Period=300, Threshold=0.75, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))), emr.ScalingRule( Name='ScaleUpMemory', Description='Scale up on low memory', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType='CHANGE_IN_CAPACITY', CoolDown=300, ScalingAdjustment=1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='LESS_THAN', MetricName='YARNMemoryAvailablePercentage', Period=300, Threshold=15, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))), emr.ScalingRule( Name='ScaleDownMemory', Description='Scale down on high memory', Action=emr.ScalingAction( SimpleScalingPolicyConfiguration=emr. SimpleScalingPolicyConfiguration( AdjustmentType=emr.CHANGE_IN_CAPACITY, CoolDown=300, ScalingAdjustment=-1)), Trigger=emr.ScalingTrigger( CloudWatchAlarmDefinition=emr. CloudWatchAlarmDefinition( ComparisonOperator='GREATER_THAN', MetricName='YARNMemoryAvailablePercentage', Period=300, Threshold=75, Dimensions=[ emr.MetricDimension(Key='JobFlowId', Value='${emr.clusterId}') ]))) ]) emr.InstanceGroupConfig('TaskInstanceGroup', AutoScalingPolicy=autoscale_policy, InstanceCount=0, InstanceType=M4_LARGE, InstanceRole='TASK', Market='ON_DEMAND', Name='Task Instance', JobFlowId=Ref(cluster))
"PYTHONPATH": os.path.join('/home/hadoop/miniconda', 'bin/python') + ":/usr/lib/spark/python/:$PYTHONPATH", "PYSPARK_DRIVER_PYTHON": os.path.join('/home/hadoop/miniconda', 'bin/python'), "SPARK_HOME": "/usr/lib/spark", "PYTHONHASHSEED": "123" }) ]), ], Applications=[ emr.Application(Name=app) for app in cfg['applications'] ], VisibleToAllUsers='true', Tags=Tags(Name='jupyter-cluster'))) # Outputs template.add_output([ Output("EMRCluster", Description="EMRCluster", Value=Ref(cluster)), Output("EMRClusterMasterDNS", Description="EMRCluster", Value=GetAtt(cluster, 'MasterPublicDNS')), ]) template_json = template.to_json(indent=4) print(template_json)
def build(ssh_keypair_name): template = Template() template.set_version("2010-09-09") keyname_param = template.add_parameter( Parameter( "KeyName", ConstraintDescription= "must be the name of an existing EC2 KeyPair.", Description= "Name of an existing EC2 KeyPair to enable SSH access to \ the instance", Type="AWS::EC2::KeyPair::KeyName", Default=ssh_keypair_name, )) sshlocation_param = template.add_parameter( Parameter( "SSHLocation", Description= " The IP address range that can be used to SSH to the EC2 \ instances", Type="String", MinLength="9", MaxLength="18", Default="0.0.0.0/0", AllowedPattern= r"(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})", ConstraintDescription=( "must be a valid IP CIDR range of the form x.x.x.x/x."), )) vpc = template.add_resource(VPC("VPC", CidrBlock="10.0.0.0/16")) subnet = template.add_resource( Subnet( "Subnet", CidrBlock="10.0.0.0/24", VpcId=Ref(vpc), )) internet_gateway = template.add_resource( InternetGateway("InternetGateway")) attach_gateway = template.add_resource( VPCGatewayAttachment("AttachGateway", VpcId=Ref(vpc), InternetGatewayId=Ref(internet_gateway))) route_table = template.add_resource( RouteTable("RouteTable", VpcId=Ref(vpc))) template.add_resource( Route( "Route", DependsOn=attach_gateway, GatewayId=Ref(internet_gateway), DestinationCidrBlock="0.0.0.0/0", RouteTableId=Ref(route_table), )) template.add_resource( SubnetRouteTableAssociation( "SubnetRouteTableAssociation", SubnetId=Ref(subnet), RouteTableId=Ref(route_table), )) network_acl = template.add_resource( NetworkAcl("NetworkAcl", VpcId=Ref(vpc))) template.add_resource( NetworkAclEntry( "InboundSSHNetworkAclEntry", NetworkAclId=Ref(network_acl), RuleNumber="101", Protocol="6", PortRange=PortRange(To="22", From="22"), Egress="false", RuleAction="allow", CidrBlock="0.0.0.0/0", )) template.add_resource( NetworkAclEntry( "InboundResponsePortsNetworkAclEntry", NetworkAclId=Ref(network_acl), RuleNumber="102", Protocol="6", PortRange=PortRange(To="65535", From="1024"), Egress="false", RuleAction="allow", CidrBlock="0.0.0.0/0", )) template.add_resource( NetworkAclEntry( "OutBoundResponsePortsNetworkAclEntry", NetworkAclId=Ref(network_acl), RuleNumber="103", Protocol="6", PortRange=PortRange(To="65535", From="1024"), Egress="true", RuleAction="allow", CidrBlock="0.0.0.0/0", )) template.add_resource( NetworkAclEntry( "OutBoundHTTPPortsNetworkAclEntry", NetworkAclId=Ref(network_acl), RuleNumber="104", Protocol="6", PortRange=PortRange(To="80", From="80"), Egress="true", RuleAction="allow", CidrBlock="0.0.0.0/0", )) template.add_resource( NetworkAclEntry( "OutBoundHTTPSPortsNetworkAclEntry", NetworkAclId=Ref(network_acl), RuleNumber="105", Protocol="6", PortRange=PortRange(To="443", From="443"), Egress="true", RuleAction="allow", CidrBlock="0.0.0.0/0", )) template.add_resource( NetworkAclEntry( "OutBoundSSHPortsNetworkAclEntry", NetworkAclId=Ref(network_acl), RuleNumber="106", Protocol="6", PortRange=PortRange(To="22", From="22"), Egress="true", RuleAction="allow", CidrBlock="0.0.0.0/0", )) template.add_resource( SubnetNetworkAclAssociation("SubnetNetworkAclAssociation", SubnetId=Ref(subnet), NetworkAclId=Ref(network_acl))) emr_security_group = template.add_resource( SecurityGroup( "EMRSecurityGroup", GroupDescription="Enable SSH access via port 22", SecurityGroupIngress=[ SecurityGroupRule(IpProtocol="tcp", FromPort="22", ToPort="22", CidrIp=Ref(sshlocation_param)), ], VpcId=Ref(vpc), )) emr_service_role = template.add_resource( iam.Role( "EMRServiceRole", AssumeRolePolicyDocument={ "Statement": [{ "Effect": "Allow", "Principal": { "Service": ["elasticmapreduce.amazonaws.com"] }, "Action": ["sts:AssumeRole"], }] }, ManagedPolicyArns=[ "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole" ], )) emr_job_flow_role = template.add_resource( iam.Role( "EMRJobFlowRole", AssumeRolePolicyDocument={ "Statement": [{ "Effect": "Allow", "Principal": { "Service": ["ec2.amazonaws.com"] }, "Action": ["sts:AssumeRole"] }] }, ManagedPolicyArns=[ "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role" ], )) emr_instance_profile = template.add_resource( iam.InstanceProfile("EMRInstanceProfile", Roles=[Ref(emr_job_flow_role)])) cluster = template.add_resource( emr.Cluster( "EMRCluster", ReleaseLabel="emr-6.0.0", JobFlowRole=Ref(emr_instance_profile), ServiceRole=Ref(emr_service_role), Instances=emr.JobFlowInstancesConfig( Ec2KeyName=Ref(keyname_param), Ec2SubnetId=Ref(subnet), EmrManagedMasterSecurityGroup=Ref(emr_security_group), EmrManagedSlaveSecurityGroup=Ref(emr_security_group), MasterInstanceGroup=emr.InstanceGroupConfigProperty( Name="Master Instance", InstanceCount="1", InstanceType=M4_LARGE, Market="ON_DEMAND", ), CoreInstanceGroup=emr.InstanceGroupConfigProperty( Name="Core Instance", EbsConfiguration=emr.EbsConfiguration( EbsBlockDeviceConfigs=[ emr.EbsBlockDeviceConfigs( VolumeSpecification=emr.VolumeSpecification( SizeInGB="10", VolumeType="gp2"), VolumesPerInstance="1", ) ], EbsOptimized="true", ), InstanceCount="1", InstanceType=M4_LARGE, ), ), Applications=[emr.Application(Name="Spark")], VisibleToAllUsers="true", )) template.add_output( [Output("MasterPublicDNS", Value=GetAtt(cluster, "MasterPublicDNS"))]) return template