Exemplo n.º 1
0
    def test_allow_string_cluster(self):
        spot = "2"
        withSpotPrice = "WithSpotPrice"
        cluster = emr.Cluster(
            'Cluster',
            # AdditionalInfo="Additional Info",
            Applications=[
                emr.Application(Name="Hadoop"),
                emr.Application(Name="Hive"),
                emr.Application(Name="Mahout"),
                emr.Application(Name="Pig"),
                emr.Application(Name="Spark")
            ],
            BootstrapActions=[
                emr.BootstrapActionConfig(
                    Name='Dummy bootstrap action',
                    ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                        Path='file:/usr/share/aws/emr/scripts/install-hue',
                        Args=["dummy", "parameter"]))
            ],
            Configurations=[
                emr.Configuration(Classification="core-site",
                                  ConfigurationProperties={
                                      'hadoop.security.groups.cache.secs':
                                      '250'
                                  })
            ],
            Instances=emr.JobFlowInstancesConfig(
                Ec2KeyName="KeyName",
                Ec2SubnetId="SubnetId",
                MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("MasterAutoScalingPolicy")),
                ),
                CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Core Instance",
                    BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
                    Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("CoreAutoScalingPolicy"),
                    )),
            ),
            JobFlowRole="EMRJobFlowRole",
            LogUri="s3://cluster-logs",
            Name="EMR Cluster",
            ReleaseLabel="emr-5.5.0",
            ServiceRole="EMRServiceRole",
            AutoScalingRole="EMR_AutoScaling_DefaultRole",
            VisibleToAllUsers="true",
            Tags=Tags(Name="EMR Sample Cluster"))

        cluster.to_dict()
Exemplo n.º 2
0
                               })
                       ])
 ],
 EbsConfiguration=emr.EbsConfiguration(EbsBlockDeviceConfig=[
     emr.EbsBlockDeviceConfig(
         VolumeSpecification=emr.VolumeSpecification(
             SizeInGB="100", VolumeType="standard"),
         VolumesPerInstance="1")
 ],
                                       EbsOptimized="true"),
 JobFlowRole=Ref(emr_instance_profile),
 ServiceRole=Ref(emr_service_role),
 Instances=emr.JobFlowInstancesConfig(
     MasterInstanceGroup=emr.InstanceGroupConfigProperty(
         Name="Master Instance",
         InstanceCount="1",
         InstanceType="m3.xlarge",
         Market="ON_DEMAND"),
     CoreInstanceGroup=emr.InstanceGroupConfigProperty(
         Name="Core Instance",
         BidPrice="20",
         InstanceCount="1",
         InstanceType="m3.xlarge",
         Market="SPOT")),
 Applications=[
     emr.Application(Name="Hadoop"),
     emr.Application(Name="Hive"),
     emr.Application(Name="Mahout"),
     emr.Application(Name="Pig"),
     emr.Application(Name="Spark")
 ],
Exemplo n.º 3
0
                     Join("", ["-XX:GCTimeRatio=",
                               Ref(gcTimeRatio)])
                 })
         ])
 ],
 JobFlowRole=Ref(emr_instance_profile),
 ServiceRole=Ref(emr_service_role),
 AutoScalingRole=emr_autoscaling_role,
 Instances=emr.JobFlowInstancesConfig(
     Ec2KeyName=Ref(keyname),
     Ec2SubnetId=Ref(subnet),
     MasterInstanceGroup=emr.InstanceGroupConfigProperty(
         Name="Master Instance",
         InstanceCount="1",
         InstanceType=M4_LARGE,
         Market="ON_DEMAND",
         AutoScalingPolicy=emr.AutoScalingPolicy(
             Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                MaxCapacity="3"),
             Rules=generate_rules("MasterAutoScalingPolicy"))),
     CoreInstanceGroup=emr.InstanceGroupConfigProperty(
         Name="Core Instance",
         BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
         Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
         AutoScalingPolicy=emr.AutoScalingPolicy(
             Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                MaxCapacity="3"),
             Rules=generate_rules("CoreAutoScalingPolicy"),
         ),
         EbsConfiguration=emr.EbsConfiguration(EbsBlockDeviceConfigs=[
             emr.EbsBlockDeviceConfigs(
Exemplo n.º 4
0
    def test_allow_string_cluster(self):
        cluster_security_configuration = emr.SecurityConfiguration(
            'emrsecurityconfiguration',
            Name="EMRSecurityConfiguration",
            SecurityConfiguration=security_configuration)

        spot = "2"
        withSpotPrice = "WithSpotPrice"
        cluster = emr.Cluster(
            'Cluster',
            # AdditionalInfo="Additional Info",
            Applications=[
                emr.Application(Name="Hadoop"),
                emr.Application(Name="Hive"),
                emr.Application(Name="Mahout"),
                emr.Application(Name="Pig"),
                emr.Application(Name="Spark")
            ],
            BootstrapActions=[
                emr.BootstrapActionConfig(
                    Name='Dummy bootstrap action',
                    ScriptBootstrapAction=emr.ScriptBootstrapActionConfig(
                        Path='file:/usr/share/aws/emr/scripts/install-hue',
                        Args=["dummy", "parameter"]))
            ],
            Configurations=[
                emr.Configuration(Classification="core-site",
                                  ConfigurationProperties={
                                      'hadoop.security.groups.cache.secs':
                                      '250'
                                  })
            ],
            Instances=emr.JobFlowInstancesConfig(
                Ec2KeyName="KeyName",
                Ec2SubnetId="SubnetId",
                MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("MasterAutoScalingPolicy")),
                ),
                CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Core Instance",
                    BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
                    Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    AutoScalingPolicy=emr.AutoScalingPolicy(
                        Constraints=emr.ScalingConstraints(MinCapacity="1",
                                                           MaxCapacity="3"),
                        Rules=self.generate_rules("CoreAutoScalingPolicy"),
                    )),
            ),
            JobFlowRole="EMRJobFlowRole",
            LogUri="s3://cluster-logs",
            Name="EMR Cluster",
            ReleaseLabel="emr-5.5.0",
            SecurityConfiguration=Ref(cluster_security_configuration),
            ServiceRole="EMRServiceRole",
            AutoScalingRole="EMR_AutoScaling_DefaultRole",
            VisibleToAllUsers="true",
            Tags=Tags(Name="EMR Sample Cluster"))

        cluster.to_dict()

        autoscale_policy = emr.AutoScalingPolicy(
            Constraints=emr.ScalingConstraints(MinCapacity=0, MaxCapacity=5),
            Rules=[
                emr.ScalingRule(
                    Name='ScaleUpContainerPending',
                    Description='Scale up on over-provisioned '
                    'containers',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType=emr.CHANGE_IN_CAPACITY,
                            CoolDown=300,
                            ScalingAdjustment=1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='GREATER_THAN',
                            MetricName='ContainerPendingRatio',
                            Period=300,
                            Threshold=0.75,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ]))),
                emr.ScalingRule(
                    Name='ScaleUpMemory',
                    Description='Scale up on low memory',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType='CHANGE_IN_CAPACITY',
                            CoolDown=300,
                            ScalingAdjustment=1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='LESS_THAN',
                            MetricName='YARNMemoryAvailablePercentage',
                            Period=300,
                            Threshold=15,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ]))),
                emr.ScalingRule(
                    Name='ScaleDownMemory',
                    Description='Scale down on high memory',
                    Action=emr.ScalingAction(
                        SimpleScalingPolicyConfiguration=emr.
                        SimpleScalingPolicyConfiguration(
                            AdjustmentType=emr.CHANGE_IN_CAPACITY,
                            CoolDown=300,
                            ScalingAdjustment=-1)),
                    Trigger=emr.ScalingTrigger(
                        CloudWatchAlarmDefinition=emr.
                        CloudWatchAlarmDefinition(
                            ComparisonOperator='GREATER_THAN',
                            MetricName='YARNMemoryAvailablePercentage',
                            Period=300,
                            Threshold=75,
                            Dimensions=[
                                emr.MetricDimension(Key='JobFlowId',
                                                    Value='${emr.clusterId}')
                            ])))
            ])

        emr.InstanceGroupConfig('TaskInstanceGroup',
                                AutoScalingPolicy=autoscale_policy,
                                InstanceCount=0,
                                InstanceType=M4_LARGE,
                                InstanceRole='TASK',
                                Market='ON_DEMAND',
                                Name='Task Instance',
                                JobFlowId=Ref(cluster))
Exemplo n.º 5
0
                     "HADOOP_DATANODE_HEAPSIZE":
                     "2048",
                     "HADOOP_NAMENODE_OPTS":
                     Join("", ["-XX:GCTimeRatio=",
                               Ref(gcTimeRatio)])
                 })
         ])
 ],
 JobFlowRole=Ref(emr_instance_profile),
 ServiceRole=Ref(emr_service_role),
 Instances=emr.JobFlowInstancesConfig(
     Ec2KeyName=Ref(keyname),
     Ec2SubnetId=Ref(subnet),
     MasterInstanceGroup=emr.InstanceGroupConfigProperty(
         Name="Master Instance",
         InstanceCount="1",
         InstanceType=M4_LARGE,
         Market="ON_DEMAND"),
     CoreInstanceGroup=emr.InstanceGroupConfigProperty(
         Name="Core Instance",
         BidPrice=If(withSpotPrice, Ref(spot), Ref("AWS::NoValue")),
         Market=If(withSpotPrice, "SPOT", "ON_DEMAND"),
         EbsConfiguration=emr.EbsConfiguration(EbsBlockDeviceConfigs=[
             emr.EbsBlockDeviceConfigs(
                 VolumeSpecification=emr.VolumeSpecification(
                     SizeInGB="10", VolumeType="gp2"),
                 VolumesPerInstance="1")
         ],
                                               EbsOptimized="true"),
         InstanceCount="1",
         InstanceType=M4_LARGE,
Exemplo n.º 6
0
              Description='Number of core instances',
              MaxValue='10'))

cluster = template.add_resource(
    emr.Cluster(
        'Cluster',
        Name='Jupyter Cluster',
        ReleaseLabel=cfg['version'],
        JobFlowRole='GenericEMRInstanceProfile',
        ServiceRole='GenericEMRServiceRole',
        Instances=emr.JobFlowInstancesConfig(
            Ec2KeyName=cfg['ssh_key'],
            Ec2SubnetId=networking_resources['JupyterEMRSubnet'],
            MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                Name='Master Instance',
                InstanceCount='1',
                InstanceType='m4.xlarge',
                Market='ON_DEMAND'),
            CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                Name='Core Instance',
                InstanceCount=Ref(instances),
                InstanceType='m4.xlarge',
                Market='SPOT',
                BidPrice='0.1'),
            AdditionalMasterSecurityGroups=[
                networking_resources['EMRMasterSecurityGroup']
            ],
            # AdditionalSlaveSecurityGroups=[Ref(emr_additional_slave_sg_param)]
        ),
        LogUri='s3://nicor-dev/logs/emr/jupyter',
        BootstrapActions=[
Exemplo n.º 7
0
def build(ssh_keypair_name):
    template = Template()
    template.set_version("2010-09-09")

    keyname_param = template.add_parameter(
        Parameter(
            "KeyName",
            ConstraintDescription=
            "must be the name of an existing EC2 KeyPair.",
            Description=
            "Name of an existing EC2 KeyPair to enable SSH access to \
    the instance",
            Type="AWS::EC2::KeyPair::KeyName",
            Default=ssh_keypair_name,
        ))

    sshlocation_param = template.add_parameter(
        Parameter(
            "SSHLocation",
            Description=
            " The IP address range that can be used to SSH to the EC2 \
    instances",
            Type="String",
            MinLength="9",
            MaxLength="18",
            Default="0.0.0.0/0",
            AllowedPattern=
            r"(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})",
            ConstraintDescription=(
                "must be a valid IP CIDR range of the form x.x.x.x/x."),
        ))

    vpc = template.add_resource(VPC("VPC", CidrBlock="10.0.0.0/16"))

    subnet = template.add_resource(
        Subnet(
            "Subnet",
            CidrBlock="10.0.0.0/24",
            VpcId=Ref(vpc),
        ))

    internet_gateway = template.add_resource(
        InternetGateway("InternetGateway"))
    attach_gateway = template.add_resource(
        VPCGatewayAttachment("AttachGateway",
                             VpcId=Ref(vpc),
                             InternetGatewayId=Ref(internet_gateway)))
    route_table = template.add_resource(
        RouteTable("RouteTable", VpcId=Ref(vpc)))

    template.add_resource(
        Route(
            "Route",
            DependsOn=attach_gateway,
            GatewayId=Ref(internet_gateway),
            DestinationCidrBlock="0.0.0.0/0",
            RouteTableId=Ref(route_table),
        ))

    template.add_resource(
        SubnetRouteTableAssociation(
            "SubnetRouteTableAssociation",
            SubnetId=Ref(subnet),
            RouteTableId=Ref(route_table),
        ))

    network_acl = template.add_resource(
        NetworkAcl("NetworkAcl", VpcId=Ref(vpc)))

    template.add_resource(
        NetworkAclEntry(
            "InboundSSHNetworkAclEntry",
            NetworkAclId=Ref(network_acl),
            RuleNumber="101",
            Protocol="6",
            PortRange=PortRange(To="22", From="22"),
            Egress="false",
            RuleAction="allow",
            CidrBlock="0.0.0.0/0",
        ))

    template.add_resource(
        NetworkAclEntry(
            "InboundResponsePortsNetworkAclEntry",
            NetworkAclId=Ref(network_acl),
            RuleNumber="102",
            Protocol="6",
            PortRange=PortRange(To="65535", From="1024"),
            Egress="false",
            RuleAction="allow",
            CidrBlock="0.0.0.0/0",
        ))

    template.add_resource(
        NetworkAclEntry(
            "OutBoundResponsePortsNetworkAclEntry",
            NetworkAclId=Ref(network_acl),
            RuleNumber="103",
            Protocol="6",
            PortRange=PortRange(To="65535", From="1024"),
            Egress="true",
            RuleAction="allow",
            CidrBlock="0.0.0.0/0",
        ))

    template.add_resource(
        NetworkAclEntry(
            "OutBoundHTTPPortsNetworkAclEntry",
            NetworkAclId=Ref(network_acl),
            RuleNumber="104",
            Protocol="6",
            PortRange=PortRange(To="80", From="80"),
            Egress="true",
            RuleAction="allow",
            CidrBlock="0.0.0.0/0",
        ))

    template.add_resource(
        NetworkAclEntry(
            "OutBoundHTTPSPortsNetworkAclEntry",
            NetworkAclId=Ref(network_acl),
            RuleNumber="105",
            Protocol="6",
            PortRange=PortRange(To="443", From="443"),
            Egress="true",
            RuleAction="allow",
            CidrBlock="0.0.0.0/0",
        ))

    template.add_resource(
        NetworkAclEntry(
            "OutBoundSSHPortsNetworkAclEntry",
            NetworkAclId=Ref(network_acl),
            RuleNumber="106",
            Protocol="6",
            PortRange=PortRange(To="22", From="22"),
            Egress="true",
            RuleAction="allow",
            CidrBlock="0.0.0.0/0",
        ))

    template.add_resource(
        SubnetNetworkAclAssociation("SubnetNetworkAclAssociation",
                                    SubnetId=Ref(subnet),
                                    NetworkAclId=Ref(network_acl)))

    emr_security_group = template.add_resource(
        SecurityGroup(
            "EMRSecurityGroup",
            GroupDescription="Enable SSH access via port 22",
            SecurityGroupIngress=[
                SecurityGroupRule(IpProtocol="tcp",
                                  FromPort="22",
                                  ToPort="22",
                                  CidrIp=Ref(sshlocation_param)),
            ],
            VpcId=Ref(vpc),
        ))

    emr_service_role = template.add_resource(
        iam.Role(
            "EMRServiceRole",
            AssumeRolePolicyDocument={
                "Statement": [{
                    "Effect": "Allow",
                    "Principal": {
                        "Service": ["elasticmapreduce.amazonaws.com"]
                    },
                    "Action": ["sts:AssumeRole"],
                }]
            },
            ManagedPolicyArns=[
                "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole"
            ],
        ))

    emr_job_flow_role = template.add_resource(
        iam.Role(
            "EMRJobFlowRole",
            AssumeRolePolicyDocument={
                "Statement": [{
                    "Effect": "Allow",
                    "Principal": {
                        "Service": ["ec2.amazonaws.com"]
                    },
                    "Action": ["sts:AssumeRole"]
                }]
            },
            ManagedPolicyArns=[
                "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role"
            ],
        ))

    emr_instance_profile = template.add_resource(
        iam.InstanceProfile("EMRInstanceProfile",
                            Roles=[Ref(emr_job_flow_role)]))

    cluster = template.add_resource(
        emr.Cluster(
            "EMRCluster",
            ReleaseLabel="emr-6.0.0",
            JobFlowRole=Ref(emr_instance_profile),
            ServiceRole=Ref(emr_service_role),
            Instances=emr.JobFlowInstancesConfig(
                Ec2KeyName=Ref(keyname_param),
                Ec2SubnetId=Ref(subnet),
                EmrManagedMasterSecurityGroup=Ref(emr_security_group),
                EmrManagedSlaveSecurityGroup=Ref(emr_security_group),
                MasterInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Master Instance",
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                    Market="ON_DEMAND",
                ),
                CoreInstanceGroup=emr.InstanceGroupConfigProperty(
                    Name="Core Instance",
                    EbsConfiguration=emr.EbsConfiguration(
                        EbsBlockDeviceConfigs=[
                            emr.EbsBlockDeviceConfigs(
                                VolumeSpecification=emr.VolumeSpecification(
                                    SizeInGB="10", VolumeType="gp2"),
                                VolumesPerInstance="1",
                            )
                        ],
                        EbsOptimized="true",
                    ),
                    InstanceCount="1",
                    InstanceType=M4_LARGE,
                ),
            ),
            Applications=[emr.Application(Name="Spark")],
            VisibleToAllUsers="true",
        ))

    template.add_output(
        [Output("MasterPublicDNS", Value=GetAtt(cluster, "MasterPublicDNS"))])
    return template
Exemplo n.º 8
0
                               ConfigurationProperties={
                                   "HADOOP_DATANODE_HEAPSIZE":
                                   "2048",
                                   "HADOOP_NAMENODE_OPTS":
                                   "-XX:GCTimeRatio=19"
                               })
                       ])
 ],
 JobFlowRole=Ref(emr_instance_profile),
 ServiceRole=Ref(emr_service_role),
 Instances=emr.JobFlowInstancesConfig(
     Ec2KeyName=Ref(keyname),
     Ec2SubnetId=Ref(subnet),
     MasterInstanceGroup=emr.InstanceGroupConfigProperty(
         Name="Master Instance",
         InstanceCount="1",
         InstanceType=M4_LARGE,
         Market="ON_DEMAND"),
     CoreInstanceGroup=emr.InstanceGroupConfigProperty(
         Name="Core Instance",
         BidPrice="0.1",
         EbsConfiguration=emr.EbsConfiguration(EbsBlockDeviceConfigs=[
             emr.EbsBlockDeviceConfigs(
                 VolumeSpecification=emr.VolumeSpecification(
                     SizeInGB="10", VolumeType="gp2"),
                 VolumesPerInstance="1")
         ],
                                               EbsOptimized="true"),
         InstanceCount="1",
         InstanceType=M4_LARGE,
         Market="SPOT")),