def addSearchGuard(template, role, subnet, keyname, secgroup, profilename): profile = InstanceProfile("sgprofile" + profilename, Path="/", Roles=[Ref(role)]) template.add_resource(profile) instance = Instance( "sg" + profilename, InstanceType="m4.xlarge", ImageId=FindInMap("RegionToAmi", Ref("AWS::Region"), "stable"), DisableApiTermination=False, IamInstanceProfile=Ref(profile), KeyName=Ref(keyname), Monitoring=False, InstanceInitiatedShutdownBehavior="stop", UserData=userdata.from_file("src/bootstrap.sh"), NetworkInterfaces=[ NetworkInterfaceProperty(DeviceIndex=0, Description="Primary network interface", SubnetId=Ref(subnet), DeleteOnTermination=True, AssociatePublicIpAddress=True, GroupSet=[Ref(secgroup)]) ], Tags=[ Tag("Name", "Search Guard " + profilename), Tag("sgnodetag", profilename) ], EbsOptimized=False, BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/sda1", Ebs=EBSBlockDevice(VolumeSize=25)) ]) template.add_resource(instance) return instance
def add_launch_config(self): ''' Add autoscaling launch configurastion ''' self.cfn_template.add_resource( LaunchConfiguration( title=constants.INST_LC, AssociatePublicIpAddress=False, BlockDeviceMappings=[ BlockDeviceMapping( DeviceName='/dev/sda1', Ebs=EBSBlockDevice( DeleteOnTermination=True, VolumeSize=int('100'), VolumeType='gp2' ) ) ], IamInstanceProfile=Ref(constants.INST_PROFILE), ImageId=Ref('AmiId'), InstanceType=Ref('InstanceType'), SecurityGroups=[ Ref(constants.SSH_SG), ImportValue(Sub('${Environment}-AppSecurityGroup')), ], UserData=Base64( Sub(constants.USERDATA) ) ) ) return self.cfn_template
def add_vpnInstance(self): t = self.template self.vpnInstance = t.add_resource( Instance( "OpenVPNInstance", ImageId=Ref(self.amiParam), SecurityGroupIds=[Ref(self.openVPNSecurityGroup)], SubnetId=Ref(self.vpnSubnetParam), KeyName=Ref(self.keyPairParam), InstanceType=Ref(self.instanceTypeParam), BlockDeviceMappings=[ BlockDeviceMapping( DeviceName="/dev/sda1", Ebs=EBSBlockDevice( VolumeSize=Ref(self.volumeSizeParam))) ], UserData=Base64( Join("", [ "admin_user="******"\n", "admin_pw=", self.sceptreUserData['vpnAdminPw'], "\n", "reroute_gw=1\n", "reroute_dns=1\n" ])), Tags=self.defaultTags + [Tag('Name', Join("", [self.namePrefix, 'OpenVPNInstance']))]))
def test_using_ephemeral_instance_store(self): ''' EC2 instance is launched from an EBS-backed root volume (which is the default). Ephemeral instance stores aren't standalone resources like EBS volumes; it is part of your EC2 instance. Ephemeral storage is predefined by instance type. ''' test_stack_name = 'TestInstanceStore' init_cf_env(test_stack_name) ### t = Template() sg = ts_add_security_group(t) instance = ts_add_instance_with_public_ip(t, Ref(sg)) instance.InstanceType = 'm5ad.large' # ephemeral storage is predefined by instance type instance.BlockDeviceMappings = [ # use block device mapping to specify device BlockDeviceMapping( DeviceName='/dev/xvda', # EBS root volume (OS lives here) Ebs=EBSBlockDevice(VolumeSize=20, VolumeType='gp2')), ] t.add_output([ Output( "PublicIP", Value=GetAtt(instance, "PublicIp"), ), ]) dump_template(t, True) create_stack(test_stack_name, t) outputs = get_stack_outputs(test_stack_name) public_ip = get_output_value(outputs, 'PublicIP') stdout = run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} sudo fdisk -l') stdout = run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} lsblk') run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} sudo mkfs -t ext4 /dev/nvme1n1' ) run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} sudo mkdir /mnt/volume/') run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} sudo mount /dev/nvme1n1 /mnt/volume/' ) # write performance comparision run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} sudo dd if=/dev/zero of=/mnt/volume/tempfile bs=1M count=1024' ) run( f'ssh {SSH_OPTIONS} ec2-user@{public_ip} "echo 3 | sudo tee /proc/sys/vm/drop_caches"', True) run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} sudo dd if=/dev/zero of=/tempfile bs=1M count=1024' ) # write to ebs # read performance comparision run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} sudo dd if=/mnt/volume/tempfile of=/dev/null bs=1M count=1024' ) run( f'ssh {SSH_OPTIONS} ec2-user@{public_ip} "echo 3 | sudo tee /proc/sys/vm/drop_caches"', True) run(f'ssh {SSH_OPTIONS} ec2-user@{public_ip} sudo dd if=/tempfile of=/dev/null bs=1M count=1024' )
def my_block_device_mappings_ebs(count,devicenamebase,volumesize,volumetype): block_device_mappings_ebs = [] block_device_mappings_ebs.append(my_block_device_mappings_root("/dev/sd","100","gp2")) for i in xrange(count): block_device_mappings_ebs.append( BlockDeviceMapping( DeviceName = devicenamebase + chr(i+98), Ebs = EBSBlockDevice( VolumeSize = volumesize, VolumeType = volumetype, DeleteOnTermination = True, ))) return block_device_mappings_ebs
def block_devices(self): """ Get block devices for the EC2 instances """ block_devices = [] for block in self.instance_base.get('ebs_mounts'): block_devices.append(BlockDeviceMapping( DeviceName=block.get('device_name'), Ebs=EBSBlockDevice( VolumeSize=block.get('size', 8), DeleteOnTermination=True, VolumeType='gp2' ) )) return block_devices
def main(**params): try: # Metadata t = Template() t.set_version("2010-09-09") t.set_description( "(SOCA) - Base template to deploy compute nodes. Version 2.6.0") allow_anonymous_data_collection = params["MetricCollectionAnonymous"] debug = False mip_usage = False instances_list = params[ "InstanceType"] # list of instance type. Use + to specify more than one type asg_lt = asg_LaunchTemplate() ltd = LaunchTemplateData("NodeLaunchTemplateData") mip = MixedInstancesPolicy() stack_name = Ref("AWS::StackName") # Begin LaunchTemplateData UserData = '''#!/bin/bash -ex # Configure the proxy value="''' + params['ProxyCACert'] + '''" echo $value > /etc/pki/ca-trust/source/anchors/proxyCA.pem update-ca-trust cat <<EOF > /etc/profile.d/proxy.sh proxy_url="http://''' + params['ProxyPrivateDnsName'] + ''':3128/" export HTTP_PROXY=\$proxy_url export HTTPS_PROXY=\$proxy_url export http_proxy=\$proxy_url export https_proxy=\$proxy_url # No proxy: # Comma separated list of destinations that shouldn't go to the proxy. # - EC2 metadata service # - Private IP address ranges (VPC local) export NO_PROXY="''' + params['NoProxy'] + '''" export no_proxy=\$NO_PROXY export REQUESTS_CA_BUNDLE=/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem EOF source /etc/profile.d/proxy.sh cat <<EOF > /etc/yum.repos.d/10_proxy.conf [main] proxy=http://''' + params['ProxyPrivateDnsName'] + ''':3128/ EOF if grep -q 'Amazon Linux release 2' /etc/system-release; then BASE_OS=amazonlinux2 elif grep -q 'CentOS Linux release 7' /etc/system-release; then BASE_OS=centos7 else BASE_OS=rhel7 fi # Install pip and awscli export PATH=$PATH:/usr/local/bin if [[ "$BASE_OS" == "centos7" ]] || [[ "$BASE_OS" == "rhel7" ]]; then yum install -y python3-pip PIP=$(which pip3) $PIP install awscli else yum install -y python3-pip PIP=$(which pip3) $PIP install awscli fi # Configure using ansible # If not amazon linux then the proxy needs to be set up before ansible can be installed. # The playbooks are downloaded from S3 using the S3 VPC endpoint so don't require the proxy. if ! yum list installed ansible &> /dev/null; then if [ $BASE_OS == "amazonlinux2" ]; then amazon-linux-extras install -y ansible2 else yum -y install ansible fi fi aws s3 cp --recursive s3://''' + params['S3Bucket'] + '''/''' + params[ 'S3InstallFolder'] + '''/playbooks/ /root/playbooks/ cd /root/playbooks ansible-playbook computeNode.yml -e Region=''' + params[ 'Region'] + ''' -e Domain=''' + params[ 'SocaDomain'] + ''' -e S3InstallBucket=''' + params[ 'S3Bucket'] + ''' -e S3InstallFolder=''' + params[ 'S3InstallFolder'] + ''' -e ClusterId=''' + params[ 'ClusterId'] + ''' -e NoProxy=''' + params[ 'NoProxy'] + ''' -e NodeType=''' + params[ 'NodeType'] + ''' >> /root/ansible.log 2>&1 if [[ "$BASE_OS" == "centos7" ]] || [[ "$BASE_OS" == "rhel7" ]]; then yum install -y nfs-utils # enforce install of nfs-utils fi if [[ "$BASE_OS" == "amazonlinux2" ]]; then /usr/sbin/update-motd --disable fi GET_INSTANCE_TYPE=$(curl http://169.254.169.254/latest/meta-data/instance-type) echo export "SOCA_CONFIGURATION="''' + str(params['ClusterId'] ) + '''"" >> /etc/environment echo export "SOCA_BASE_OS="$BASE_OS"" >> /etc/environment echo export "SOCA_JOB_QUEUE="''' + str(params['JobQueue'] ) + '''"" >> /etc/environment echo export "SOCA_JOB_OWNER="''' + str(params['JobOwner'] ) + '''"" >> /etc/environment echo export "SOCA_JOB_NAME="''' + str(params['JobName'] ) + '''"" >> /etc/environment echo export "SOCA_JOB_PROJECT="''' + str(params['JobProject'] ) + '''"" >> /etc/environment echo export "SOCA_VERSION="''' + str(params['Version'] ) + '''"" >> /etc/environment echo export "SOCA_JOB_EFA="''' + str(params['Efa']).lower( ) + '''"" >> /etc/environment echo export "SOCA_JOB_ID="''' + str(params['JobId'] ) + '''"" >> /etc/environment echo export "SOCA_SCRATCH_SIZE=''' + str( params['ScratchSize'] ) + '''" >> /etc/environment echo export "SOCA_INSTALL_BUCKET="''' + str( params['S3Bucket'] ) + '''"" >> /etc/environment echo export "SOCA_INSTALL_BUCKET_FOLDER="''' + str( params['S3InstallFolder'] ) + '''"" >> /etc/environment echo export "SOCA_FSX_LUSTRE_BUCKET="''' + str( params['FSxLustreConfiguration'] ['fsx_lustre'] ).lower() + '''"" >> /etc/environment echo export "SOCA_FSX_LUSTRE_DNS="''' + str( params['FSxLustreConfiguration'] ['existing_fsx'] ).lower() + '''"" >> /etc/environment echo export "SOCA_INSTANCE_TYPE=$GET_INSTANCE_TYPE" >> /etc/environment echo export "SOCA_INSTANCE_HYPERTHREADING="''' + str( params['ThreadsPerCore'] ).lower() + '''"" >> /etc/environment echo export "SOCA_SYSTEM_METRICS="''' + str(params['SystemMetrics']).lower( ) + '''"" >> /etc/environment echo export "SOCA_ESDOMAIN_ENDPOINT="''' + str( params['ESDomainEndpoint'] ).lower() + '''"" >> /etc/environment echo export "SOCA_HOST_SYSTEM_LOG="/apps/soca/''' + str( params['ClusterId'] ) + '''/cluster_node_bootstrap/logs/''' + str( params['JobId'] ) + '''/$(hostname -s)"" >> /etc/environment echo export "AWS_STACK_ID=${AWS::StackName}" >> /etc/environment echo export "AWS_DEFAULT_REGION=''' + params[ 'Region'] + '''" >> /etc/environment source /etc/environment AWS=$(which aws) # Give yum permission to the user on this specific machine echo "''' + params['JobOwner'] + ''' ALL=(ALL) /bin/yum" >> /etc/sudoers mkdir -p /apps mkdir -p /data # Mount EFS echo "''' + params['EFSDataDns'] + ''':/ /data nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport 0 0" >> /etc/fstab echo "''' + params['EFSAppsDns'] + ''':/ /apps nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport 0 0" >> /etc/fstab EFS_MOUNT=0 mount -a while [[ $? -ne 0 ]] && [[ $EFS_MOUNT -lt 5 ]] do SLEEP_TIME=$(( RANDOM % 60 )) echo "Failed to mount EFS, retrying in $SLEEP_TIME seconds and Loop $EFS_MOUNT/5..." sleep $SLEEP_TIME ((EFS_MOUNT++)) mount -a done # Configure Chrony yum remove -y ntp yum install -y chrony mv /etc/chrony.conf /etc/chrony.conf.original echo -e """ # use the local instance NTP service, if available server 169.254.169.123 prefer iburst minpoll 4 maxpoll 4 # Use public servers from the pool.ntp.org project. # Please consider joining the pool (http://www.pool.ntp.org/join.html). # !!! [BEGIN] SOCA REQUIREMENT # You will need to open UDP egress traffic on your security group if you want to enable public pool #pool 2.amazon.pool.ntp.org iburst # !!! [END] SOCA REQUIREMENT # Record the rate at which the system clock gains/losses time. driftfile /var/lib/chrony/drift # Allow the system clock to be stepped in the first three updates # if its offset is larger than 1 second. makestep 1.0 3 # Specify file containing keys for NTP authentication. keyfile /etc/chrony.keys # Specify directory for log files. logdir /var/log/chrony # save data between restarts for fast re-load dumponexit dumpdir /var/run/chrony """ > /etc/chrony.conf systemctl enable chronyd # Prepare Log folder mkdir -p $SOCA_HOST_SYSTEM_LOG chmod +x /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/ComputeNodePostReboot.sh echo "@reboot /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/ComputeNodePostReboot.sh >> $SOCA_HOST_SYSTEM_LOG/ComputeNodePostReboot.log 2>&1" | crontab - $AWS s3 cp s3://$SOCA_INSTALL_BUCKET/$SOCA_INSTALL_BUCKET_FOLDER/scripts/config.cfg /root/ chmod +x /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/ComputeNode.sh /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/ComputeNode.sh ''' + params[ 'SchedulerHostname'] + ''' >> $SOCA_HOST_SYSTEM_LOG/ComputeNode.sh.log 2>&1''' SpotFleet = True if ((params["SpotPrice"] is not False) and (int(params["DesiredCapacity"]) > 1 or len(instances_list) > 1)) else False ltd.EbsOptimized = True for instance in instances_list: if "t2." in instance: ltd.EbsOptimized = False # metal + t2 does not support CpuOptions unsupported = ["t2.", "metal"] if all(itype not in instance for itype in unsupported) and (SpotFleet is False or len(instances_list) == 1): # Spotfleet with multiple instance types doesn't support CpuOptions # So we can't add CpuOptions if SpotPrice is specified and when multiple instances are specified ltd.CpuOptions = CpuOptions( CoreCount=int(params["CoreCount"]), ThreadsPerCore=1 if params["ThreadsPerCore"] is False else 2) ltd.IamInstanceProfile = IamInstanceProfile( Arn=params["ComputeNodeInstanceProfileArn"]) ltd.KeyName = params["SSHKeyPair"] ltd.ImageId = params["ImageId"] if params["SpotPrice"] is not False and params[ "SpotAllocationCount"] is False: ltd.InstanceMarketOptions = InstanceMarketOptions( MarketType="spot", SpotOptions=SpotOptions( MaxPrice=Ref("AWS::NoValue") if params["SpotPrice"] == "auto" else str(params["SpotPrice"]) # auto -> cap at OD price )) ltd.InstanceType = instances_list[0] ltd.NetworkInterfaces = [ NetworkInterfaces(InterfaceType="efa" if params["Efa"] is not False else Ref("AWS::NoValue"), DeleteOnTermination=True, DeviceIndex=0, Groups=[params["SecurityGroupId"]]) ] ltd.UserData = Base64(Sub(UserData)) ltd.BlockDeviceMappings = [ LaunchTemplateBlockDeviceMapping( DeviceName="/dev/xvda" if params["BaseOS"] == "amazonlinux2" else "/dev/sda1", Ebs=EBSBlockDevice(VolumeSize=params["RootSize"], VolumeType="gp2", DeleteOnTermination="false" if params["KeepEbs"] is True else "true", Encrypted=True)) ] if int(params["ScratchSize"]) > 0: ltd.BlockDeviceMappings.append( BlockDeviceMapping( DeviceName="/dev/xvdbx", Ebs=EBSBlockDevice( VolumeSize=params["ScratchSize"], VolumeType="io1" if int(params["VolumeTypeIops"]) > 0 else "gp2", Iops=params["VolumeTypeIops"] if int(params["VolumeTypeIops"]) > 0 else Ref("AWS::NoValue"), DeleteOnTermination="false" if params["KeepEbs"] is True else "true", Encrypted=True))) ltd.TagSpecifications = [ ec2.TagSpecifications( ResourceType="instance", Tags=base_Tags( Name=str(params["ClusterId"]) + "-compute-job-" + str(params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_TerminateWhenIdle=str(params["TerminateWhenIdle"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_ClusterId=str(params["ClusterId"]), _soca_NodeType="soca-compute-node")) ] # End LaunchTemplateData # Begin Launch Template Resource lt = LaunchTemplate("NodeLaunchTemplate") lt.LaunchTemplateName = params["ClusterId"] + "-" + str( params["JobId"]) lt.LaunchTemplateData = ltd t.add_resource(lt) # End Launch Template Resource if SpotFleet is True: # SpotPrice is defined and DesiredCapacity > 1 or need to try more than 1 instance_type # Create SpotFleet # Begin SpotFleetRequestConfigData Resource sfrcd = ec2.SpotFleetRequestConfigData() sfrcd.AllocationStrategy = params["SpotAllocationStrategy"] sfrcd.ExcessCapacityTerminationPolicy = "noTermination" sfrcd.IamFleetRole = params["SpotFleetIAMRoleArn"] sfrcd.InstanceInterruptionBehavior = "terminate" if params["SpotPrice"] != "auto": sfrcd.SpotPrice = str(params["SpotPrice"]) sfrcd.TargetCapacity = params["DesiredCapacity"] sfrcd.Type = "maintain" sfltc = ec2.LaunchTemplateConfigs() sflts = ec2.LaunchTemplateSpecification(LaunchTemplateId=Ref(lt), Version=GetAtt( lt, "LatestVersionNumber")) sfltc.LaunchTemplateSpecification = sflts sfltc.Overrides = [] for subnet in params["SubnetId"]: for instance in instances_list: sfltc.Overrides.append( ec2.LaunchTemplateOverrides(InstanceType=instance, SubnetId=subnet)) sfrcd.LaunchTemplateConfigs = [sfltc] TagSpecifications = ec2.SpotFleetTagSpecification( ResourceType="spot-fleet-request", Tags=base_Tags( Name=str(params["ClusterId"]) + "-compute-job-" + str(params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_TerminateWhenIdle=str(params["TerminateWhenIdle"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_ClusterId=str(params["ClusterId"]), _soca_NodeType="soca-compute-node")) # End SpotFleetRequestConfigData Resource # Begin SpotFleet Resource spotfleet = ec2.SpotFleet("SpotFleet") spotfleet.SpotFleetRequestConfigData = sfrcd t.add_resource(spotfleet) # End SpotFleet Resource else: asg_lt.LaunchTemplateSpecification = LaunchTemplateSpecification( LaunchTemplateId=Ref(lt), Version=GetAtt(lt, "LatestVersionNumber")) asg_lt.Overrides = [] for instance in instances_list: asg_lt.Overrides.append( LaunchTemplateOverrides(InstanceType=instance)) # Begin InstancesDistribution if params["SpotPrice"] is not False and \ params["SpotAllocationCount"] is not False and \ (int(params["DesiredCapacity"]) - int(params["SpotAllocationCount"])) > 0: mip_usage = True idistribution = InstancesDistribution() idistribution.OnDemandAllocationStrategy = "prioritized" # only supported value idistribution.OnDemandBaseCapacity = params[ "DesiredCapacity"] - params["SpotAllocationCount"] idistribution.OnDemandPercentageAboveBaseCapacity = "0" # force the other instances to be SPOT idistribution.SpotMaxPrice = Ref( "AWS::NoValue") if params["SpotPrice"] == "auto" else str( params["SpotPrice"]) idistribution.SpotAllocationStrategy = params[ 'SpotAllocationStrategy'] mip.InstancesDistribution = idistribution # End MixedPolicyInstance # Begin AutoScalingGroup Resource asg = AutoScalingGroup("AutoScalingComputeGroup") asg.DependsOn = "NodeLaunchTemplate" if mip_usage is True or instances_list.__len__() > 1: mip.LaunchTemplate = asg_lt asg.MixedInstancesPolicy = mip else: asg.LaunchTemplate = LaunchTemplateSpecification( LaunchTemplateId=Ref(lt), Version=GetAtt(lt, "LatestVersionNumber")) asg.MinSize = int(params["DesiredCapacity"]) asg.MaxSize = int(params["DesiredCapacity"]) asg.VPCZoneIdentifier = params["SubnetId"] if params["PlacementGroup"] is True: pg = PlacementGroup("ComputeNodePlacementGroup") pg.Strategy = "cluster" t.add_resource(pg) asg.PlacementGroup = Ref(pg) asg.Tags = Tags( Name=str(params["ClusterId"]) + "-compute-job-" + str(params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_TerminateWhenIdle=str(params["TerminateWhenIdle"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_ClusterId=str(params["ClusterId"]), _soca_NodeType="soca-compute-node") t.add_resource(asg) # End AutoScalingGroup Resource # Begin FSx for Lustre if params["FSxLustreConfiguration"]["fsx_lustre"] is not False: if params["FSxLustreConfiguration"]["existing_fsx"] is False: fsx_lustre = FileSystem("FSxForLustre") fsx_lustre.FileSystemType = "LUSTRE" fsx_lustre.StorageCapacity = params["FSxLustreConfiguration"][ "capacity"] fsx_lustre.SecurityGroupIds = [params["SecurityGroupId"]] fsx_lustre.SubnetIds = params["SubnetId"] fsx_lustre_configuration = LustreConfiguration() fsx_lustre_configuration.DeploymentType = params[ "FSxLustreConfiguration"]["deployment_type"].upper() if params["FSxLustreConfiguration"]["deployment_type"].upper( ) == "PERSISTENT_1": fsx_lustre_configuration.PerUnitStorageThroughput = params[ "FSxLustreConfiguration"]["per_unit_throughput"] if params["FSxLustreConfiguration"]["s3_backend"] is not False: fsx_lustre_configuration.ImportPath = params[ "FSxLustreConfiguration"]["import_path"] if params[ "FSxLustreConfiguration"][ "import_path"] is not False else params[ "FSxLustreConfiguration"]["s3_backend"] fsx_lustre_configuration.ExportPath = params[ "FSxLustreConfiguration"]["import_path"] if params[ "FSxLustreConfiguration"][ "import_path"] is not False else params[ "FSxLustreConfiguration"][ "s3_backend"] + "/" + params[ "ClusterId"] + "-fsxoutput/job-" + params[ "JobId"] + "/" fsx_lustre.LustreConfiguration = fsx_lustre_configuration fsx_lustre.Tags = base_Tags( # False disable PropagateAtLaunch Name=str(params["ClusterId"] + "-compute-job-" + params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_TerminateWhenIdle=str(params["TerminateWhenIdle"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_FSx="true", _soca_ClusterId=str(params["ClusterId"]), ) t.add_resource(fsx_lustre) # End FSx For Lustre # Begin Custom Resource # Change Mapping to No if you want to disable this if allow_anonymous_data_collection is True: metrics = CustomResourceSendAnonymousMetrics("SendAnonymousData") metrics.ServiceToken = params["SolutionMetricLambda"] metrics.DesiredCapacity = str(params["DesiredCapacity"]) metrics.InstanceType = str(params["InstanceType"]) metrics.Efa = str(params["Efa"]) metrics.ScratchSize = str(params["ScratchSize"]) metrics.RootSize = str(params["RootSize"]) metrics.SpotPrice = str(params["SpotPrice"]) metrics.BaseOS = str(params["BaseOS"]) metrics.StackUUID = str(params["StackUUID"]) metrics.KeepForever = str(params["KeepForever"]) metrics.FsxLustre = str(params["FSxLustreConfiguration"]) metrics.TerminateWhenIdle = str(params["TerminateWhenIdle"]) metrics.Dcv = "false" t.add_resource(metrics) # End Custom Resource if debug is True: print(t.to_json()) # Tags must use "soca:<Key>" syntax template_output = t.to_yaml().replace("_soca_", "soca:") return {'success': True, 'output': template_output} except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] return { 'success': False, 'output': 'cloudformation_builder.py: ' + (str(e) + ': error :' + str(exc_type) + ' ' + str(fname) + ' ' + str(exc_tb.tb_lineno)) }
def create_template(num_masters, num_agents, num_publicAgents): #outfilename = "test.json" outfilename = "cf_" + str(num_masters) + "." + str(num_agents) + "." + str( num_publicAgents) + ".json" # Create the Template t = Template() t.add_version('2010-09-09') t.add_description('Creates a set of Servers for DC/OS using CentOS 7.3 AMI. Creates a boot server to host the DC/OS installer and a NAT Instance for outbound connections from private agents. Creates ' + str(num_masters) + ' Master(s), ' \ + str(num_agents) + ' Private Agent(s), and ' + str(num_publicAgents) + ' Public Agent(s). After creating the Stack; Log into the boot server and run the DCOS Bash Script installer for AWS') # Amazon Linux AMI 2016.09.1.20170119 x86_64 VPC NAT HVM EBS # amzn-ami-vpc-nat-hvm-2016.09.1.20170119-x86_64-ebs - # ami-dd3dd7cb us-east-1 (N. Virginia) # ami-564b6e33 us-east-2 (Ohio) # ami-7d54061d us-west-1 (N. Cal) # ami-3b6fd05b us-west-2 (Oregon) t.add_mapping( 'NATAmi', { 'us-east-1': { 'default': 'ami-dd3dd7cb' }, 'us-east-2': { 'default': 'ami-564b6e33' }, 'us-west-1': { 'default': 'ami-7d54061d' }, 'us-west-2': { 'default': 'ami-3b6fd05b' }, }) # The c73 AMI pre created and deployed on each region t.add_mapping( 'c73Ami', { 'us-east-1': { 'default': 'ami-46c1b650' }, 'us-east-2': { 'default': 'ami-18f8df7d' }, 'us-west-1': { 'default': 'ami-f5d7f195' }, 'us-west-2': { 'default': 'ami-f4533694' }, }) # CloudFormation Parameters # Sometimes when I deployed stack on us-east-1; it would fail on av zone us-east-1c with error messages instance type not support on this AZ. I added this parameter to fix all of the components in on AZ for now avzone_param = t.add_parameter( Parameter( "AVZoneName", ConstraintDescription='Must be the name of an an Availability Zone', Description='Name of an Availability Zone', Type='AWS::EC2::AvailabilityZone::Name', )) # Every agent will get a data drive of this size dataDriveSizeGB_param = t.add_parameter( Parameter( "dataDriveSizeGB", Default="100", MinValue=20, MaxValue=1000, Description= 'Size of data drive to add to private agents from 20 to 1000GB', Type='Number')) # The key will be added to the centos user so you can login to centos using the key keyname_param = t.add_parameter( Parameter( "KeyName", ConstraintDescription= 'Must be the name of an existing EC2 KeyPair.', Description= 'Name of an existing EC2 KeyPair to enable SSH access to the instance', Type='AWS::EC2::KeyPair::KeyName', )) # While you can allow everyone it's more secure to just allow a single machine or subnet of machines; web port will also be opened to this CIDR sshlocation_param = t.add_parameter( Parameter( "sshlocation", Type="String", Description= "Subnet allowed to ssh to these servers. 0.0.0.0/0 to allow all.")) # Instance type for Master instanceTypeMaster_param = t.add_parameter( Parameter( 'InstanceTypeMaster', Type='String', Description='EC2 instance type for ' + str(num_masters) + ' Masters(s)', Default='m4.xlarge', AllowedValues=[ 't2.xlarge', 't2.2xlarge', 'm4.xlarge', 'm4.2xlarge', 'm4.4xlarge', 'm4.10xlarge', 'c4.xlarge', 'c4.2xlarge', 'c4.4xlarge', 'c4.8xlarge', ], ConstraintDescription='Must be a valid EC2 instance type.', )) # Instance type for Agents instanceTypeAgent_param = t.add_parameter( Parameter( 'InstanceTypeAgent', Type='String', Description='EC2 instance type for ' + str(num_agents) + ' Private Agent(s)', Default='m4.2xlarge', AllowedValues=[ 't2.xlarge', 't2.2xlarge', 'm4.xlarge', 'm4.2xlarge', 'm4.4xlarge', 'm4.10xlarge', 'c4.xlarge', 'c4.2xlarge', 'c4.4xlarge', 'c4.8xlarge', ], ConstraintDescription='Must be a valid EC2 instance type.', )) # Instance type for Public Agents instanceTypePublicAgent_param = t.add_parameter( Parameter( 'InstanceTypePublicAgent', Type='String', Description='EC2 instance type for ' + str(num_publicAgents) + ' Public Agent(s)', Default='m4.xlarge', AllowedValues=[ 't2.xlarge', 't2.2xlarge', 'm4.xlarge', 'm4.2xlarge', 'm4.4xlarge', 'm4.10xlarge', 'c4.xlarge', 'c4.2xlarge', 'c4.4xlarge', 'c4.8xlarge', ], ConstraintDescription='Must be a valid EC2 instance type.', )) # Adding Resources ref_stack_id = Ref('AWS::StackId') ref_region = Ref('AWS::Region') ref_stack_name = Ref('AWS::StackName') # Create VPC nm = 'vpc' vpc = t.add_resource( VPC(nm, CidrBlock='10.10.0.0/16', EnableDnsSupport=True, EnableDnsHostnames=True, Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Create Subnet for Masters nm = 'mastersSubnet' subnetMasters = t.add_resource( Subnet(nm, AvailabilityZone=Ref(avzone_param), CidrBlock='10.10.0.0/24', VpcId=Ref(vpc), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Create Subnet for Agents nm = 'agentsSubnet' subnetAgents = t.add_resource( Subnet(nm, AvailabilityZone=Ref(avzone_param), CidrBlock='10.10.16.0/24', VpcId=Ref(vpc), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Create Subnet for Public Agents nm = 'publicAgentsSubnet' subnetPublicAgents = t.add_resource( Subnet(nm, AvailabilityZone=Ref(avzone_param), CidrBlock='10.10.32.0/24', VpcId=Ref(vpc), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Create Gateway; route to the outside world (Internet) nm = 'ig' internetGateway = t.add_resource( InternetGateway(nm, Tags=Tags(Application=ref_stack_id, Name=Join( "", [Ref('AWS::StackName'), "-", nm])))) # Attach Gateway to VPC nm = 'gatewayAttachment' gatewayAttachment = t.add_resource( VPCGatewayAttachment(nm, VpcId=Ref(vpc), InternetGatewayId=Ref(internetGateway))) # Create Route Table nm = 'routeTable' routeTable = t.add_resource( RouteTable(nm, VpcId=Ref(vpc), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Add Routes # Allow all outbound traffic nm = 'route' route = t.add_resource( Route( nm, DependsOn=gatewayAttachment.title, GatewayId=Ref(internetGateway), DestinationCidrBlock='0.0.0.0/0', RouteTableId=Ref(routeTable), )) # Associate RouteTable to Master and Public Subnets nm = 'subnetRTAMasters' subnetRouteTableAssociation = t.add_resource( SubnetRouteTableAssociation( nm, SubnetId=Ref(subnetMasters), RouteTableId=Ref(routeTable), )) nm = 'subnetRTAPublicAgents' subnetRouteTableAssociation = t.add_resource( SubnetRouteTableAssociation( nm, SubnetId=Ref(subnetPublicAgents), RouteTableId=Ref(routeTable), )) # Create Security Group (General access to ssh and internal connectionsn between masters, agents, and public agents) nm = 'securityGroup' securityGroup = t.add_resource( SecurityGroup(nm, GroupDescription='Security Group', SecurityGroupIngress=[ SecurityGroupRule(IpProtocol='tcp', FromPort='22', ToPort='22', CidrIp=Ref(sshlocation_param)), SecurityGroupRule(IpProtocol='-1', CidrIp='10.10.0.0/16') ], VpcId=Ref(vpc), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Create Security Group Public Agents nm = 'securityGroupPublicAgents' publicAgentsSG = t.add_resource( SecurityGroup(nm, GroupDescription='Security Group Public Agents', SecurityGroupIngress=[ SecurityGroupRule(IpProtocol='tcp', FromPort='80', ToPort='80', CidrIp='0.0.0.0/0'), SecurityGroupRule(IpProtocol='tcp', FromPort='443', ToPort='443', CidrIp='0.0.0.0/0'), SecurityGroupRule(IpProtocol='tcp', FromPort='10000', ToPort='10010', CidrIp='0.0.0.0/0'), SecurityGroupRule(IpProtocol='tcp', FromPort='9090', ToPort='9090', CidrIp='0.0.0.0/0') ], VpcId=Ref(vpc), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Create Security Group Masters Allow Access from sshlocation param as test nm = 'securityGroupMasters' mastersSG = t.add_resource( SecurityGroup(nm, GroupDescription='Security Group Masters', SecurityGroupIngress=[ SecurityGroupRule(IpProtocol='tcp', FromPort='80', ToPort='80', CidrIp=Ref(sshlocation_param)), SecurityGroupRule(IpProtocol='tcp', FromPort='443', ToPort='443', CidrIp=Ref(sshlocation_param)) ], VpcId=Ref(vpc), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) if useNatInstance: # **** Also change in natRoute **** # Create NAT instance; This allows private agents to get out to the Internet nm = 'nat' nat = t.add_resource( Instance( nm, SourceDestCheck="false", ImageId=FindInMap("NATAmi", Ref("AWS::Region"), "default"), InstanceType="m4.large", AvailabilityZone=Ref(avzone_param), KeyName=Ref(keyname_param), DependsOn=internetGateway.title, NetworkInterfaces=[ NetworkInterfaceProperty(GroupSet=[Ref(securityGroup)], AssociatePublicIpAddress='true', DeviceIndex='0', DeleteOnTermination='true', SubnetId=Ref(subnetMasters), PrivateIpAddress='10.10.0.9') ], BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/xvda", Ebs=EBSBlockDevice( DeleteOnTermination='true', )) ], Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) else: # Create Elastic IP for NatGateay nm = 'natIP' nat_eip = t.add_resource(EIP( nm, Domain="vpc", )) # Create NAT Gateway nm = 'natGateway' nat = t.add_resource( NatGateway( nm, AllocationId=GetAtt(nat_eip, 'AllocationId'), SubnetId=Ref(subnetMasters), )) # Create Route Table for NAT nm = 'natRouteTable' routeTableNAT = t.add_resource( RouteTable(nm, VpcId=Ref(vpc), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Associate Agent Subnet to NAT nm = 'subnetRTAAgents' subnetRouteTableAssociation = t.add_resource( SubnetRouteTableAssociation( nm, SubnetId=Ref(subnetAgents), RouteTableId=Ref(routeTableNAT), )) # Add Routes (Agents can reach out anywhere) nm = 'natRoute' if useNatInstance: route = t.add_resource( Route( nm, RouteTableId=Ref(routeTableNAT), DestinationCidrBlock='0.0.0.0/0', InstanceId=Ref(nat), )) else: route = t.add_resource( Route( nm, RouteTableId=Ref(routeTableNAT), DestinationCidrBlock='0.0.0.0/0', NatGatewayId=Ref(nat), )) # **************************************** # NOTE: I am using static PrivateIPAddresses; this may not be a good choice; however, it simplified the install script. The range of IP's for the master and agents are limited to 24 subnet and I start at 11 # With this configuration the max number of agents is around 240. # **************************************** # Create boot instance # Installs on AWS so far have taken longer than on Azure. Takes about 10 minutes for the boot server to configure. # Tried several InstanceType from t2.micro to m4.large; all take about 10 minutes for boot to load. The docker start of mesosphere/dcos-genconf seems to be taking longer than it did on azure. nm = 'boot' boot = t.add_resource( Instance(nm, ImageId=FindInMap("c73Ami", Ref("AWS::Region"), "default"), InstanceType="m4.xlarge", AvailabilityZone=Ref(avzone_param), KeyName=Ref(keyname_param), NetworkInterfaces=[ NetworkInterfaceProperty(GroupSet=[Ref(securityGroup)], AssociatePublicIpAddress='true', DeviceIndex='0', DeleteOnTermination='true', SubnetId=Ref(subnetMasters), PrivateIpAddress='10.10.0.10') ], BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/sda1", Ebs=EBSBlockDevice( VolumeSize="100", DeleteOnTermination='true', )) ], Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Create master instance(s) masters = [] i = 1 while i <= num_masters: nm = 'm' + str(i) private_ip = "10.10.0." + str(i + 10) instance = t.add_resource( Instance(nm, ImageId=FindInMap("c73Ami", Ref("AWS::Region"), "default"), InstanceType=Ref(instanceTypeMaster_param), AvailabilityZone=Ref(avzone_param), KeyName=Ref(keyname_param), NetworkInterfaces=[ NetworkInterfaceProperty( GroupSet=[Ref(securityGroup), Ref(mastersSG)], AssociatePublicIpAddress='true', DeviceIndex='0', DeleteOnTermination='true', SubnetId=Ref(subnetMasters), PrivateIpAddress=private_ip) ], BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/sda1", Ebs=EBSBlockDevice( VolumeSize="100", DeleteOnTermination='true', )) ], Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) masters.append(instance) i += 1 # Create agent instance(s) i = 1 while i <= num_agents: nm = 'a' + str(i) private_ip = "10.10.16." + str(i + 10) instance = t.add_resource( Instance( nm, ImageId=FindInMap("c73Ami", Ref("AWS::Region"), "default"), InstanceType=Ref(instanceTypeAgent_param), AvailabilityZone=Ref(avzone_param), KeyName=Ref(keyname_param), NetworkInterfaces=[ NetworkInterfaceProperty(GroupSet=[Ref(securityGroup)], AssociatePublicIpAddress='false', DeviceIndex='0', DeleteOnTermination='true', SubnetId=Ref(subnetAgents), PrivateIpAddress=private_ip) ], BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/sda1", Ebs=EBSBlockDevice( VolumeSize="100", DeleteOnTermination='true', )) ], Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) volume = t.add_resource( Volume(nm + "data", AvailabilityZone=Ref(avzone_param), Size=Ref(dataDriveSizeGB_param), Tags=Tags( Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm + "data"])))) volattach = t.add_resource( VolumeAttachment(nm + "dataattach", InstanceId=Ref(instance), VolumeId=Ref(volume), Device="/dev/sdc")) i += 1 # Create public agent instance(s) publicAgents = [] i = 1 nm = "p1" while i <= num_publicAgents: nm = 'p' + str(i) private_ip = "10.10.32." + str(i + 10) instance = t.add_resource( Instance( nm, ImageId=FindInMap("c73Ami", Ref("AWS::Region"), "default"), InstanceType=Ref(instanceTypePublicAgent_param), AvailabilityZone=Ref(avzone_param), KeyName=Ref(keyname_param), NetworkInterfaces=[ NetworkInterfaceProperty( GroupSet=[Ref(securityGroup), Ref(publicAgentsSG)], AssociatePublicIpAddress='true', DeviceIndex='0', DeleteOnTermination='true', SubnetId=Ref(subnetPublicAgents), PrivateIpAddress=private_ip) ], BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/sda1", Ebs=EBSBlockDevice( VolumeSize="100", DeleteOnTermination='true', )) ], Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) publicAgents.append(instance) i += 1 # Load Balancer Masters nm = "masters" elasticLBMasters = t.add_resource( elb.LoadBalancer( nm, Instances=[Ref(r) for r in masters], Subnets=[Ref(subnetMasters)], SecurityGroups=[Ref(mastersSG)], CrossZone=False, Listeners=[ elb.Listener( LoadBalancerPort="80", InstancePort="80", Protocol="TCP", ), elb.Listener( LoadBalancerPort="443", InstancePort="443", Protocol="TCP", ), ], # Health Checking on port 80 which should be there after DCOS has been installed. HealthCheck=elb.HealthCheck( Target="TCP:80", HealthyThreshold="2", UnhealthyThreshold="2", Interval="30", Timeout="5", ), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Load Balancer Public Agents nm = "publicagents" elasticLBPublicAgents = t.add_resource( elb.LoadBalancer( nm, #AvailabilityZones=GetAZs(""), Instances=[Ref(r) for r in publicAgents], Subnets=[Ref(subnetPublicAgents)], SecurityGroups=[Ref(publicAgentsSG)], CrossZone=False, Listeners=[ elb.Listener( LoadBalancerPort="10000", InstancePort="10000", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10001", InstancePort="10001", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10002", InstancePort="10002", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10003", InstancePort="10003", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10004", InstancePort="10004", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10005", InstancePort="10005", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10006", InstancePort="10006", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10007", InstancePort="10007", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10008", InstancePort="10008", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10009", InstancePort="10009", Protocol="TCP", ), elb.Listener( LoadBalancerPort="10010", InstancePort="10010", Protocol="TCP", ), elb.Listener( LoadBalancerPort="9090", InstancePort="9090", Protocol="TCP", ), elb.Listener( LoadBalancerPort="80", InstancePort="80", Protocol="TCP", ), elb.Listener( LoadBalancerPort="443", InstancePort="443", Protocol="TCP", ) ], # I've added health check for port 9090; becomes healthy after Marathon-LB is installed. HealthCheck=elb.HealthCheck( Target="TCP:9090", HealthyThreshold="2", UnhealthyThreshold="2", Interval="30", Timeout="5", ), Tags=Tags(Application=ref_stack_id, Name=Join("", [Ref('AWS::StackName'), "-", nm])))) # Outputs t.add_output( Output("BootServer", Description="Name/IP of Boot Server", Value=Join( "/", [GetAtt(boot, "PublicDnsName"), GetAtt(boot, "PublicIp")]))) t.add_output( Output("MastersURL", Description="URL of the Masters", Value=Join( "", ["http://", GetAtt(elasticLBMasters, "DNSName")]))) t.add_output( Output( "PublicAgentsURL", Description="URL of the Public Agents haproxy stats.", Value=Join("", [ "http://", GetAtt(elasticLBPublicAgents, "DNSName"), ":9090/haproxy?stats" ]))) # Write json to file jsonStr = t.to_json() fout = open(outfilename, "w") fout.write(jsonStr) fout.close() # Print the json to screen print(jsonStr)
def main(**params): try: # Metadata t = Template() t.set_version("2010-09-09") t.set_description("(SOCA) - Base template to deploy compute nodes. Version 2.7.2") allow_anonymous_data_collection = params["MetricCollectionAnonymous"] debug = False mip_usage = False instances_list = params["InstanceType"] # list of instance type. Use + to specify more than one type asg_lt = asg_LaunchTemplate() ltd = LaunchTemplateData("NodeLaunchTemplateData") mip = MixedInstancesPolicy() stack_name = Ref("AWS::StackName") # Begin LaunchTemplateData UserData = '''#!/bin/bash -x export PATH=$PATH:/usr/local/bin if [[ "''' + params['BaseOS'] + '''" == "centos7" ]] || [[ "''' + params['BaseOS'] + '''" == "rhel7" ]]; then yum install -y python3-pip PIP=$(which pip3) $PIP install awscli yum install -y nfs-utils # enforce install of nfs-utils else yum install -y python3-pip PIP=$(which pip3) $PIP install awscli fi if [[ "''' + params['BaseOS'] + '''" == "amazonlinux2" ]]; then /usr/sbin/update-motd --disable fi GET_INSTANCE_TYPE=$(curl http://169.254.169.254/latest/meta-data/instance-type) echo export "SOCA_CONFIGURATION="''' + str(params['ClusterId']) + '''"" >> /etc/environment echo export "SOCA_BASE_OS="''' + str(params['BaseOS']) + '''"" >> /etc/environment echo export "SOCA_JOB_QUEUE="''' + str(params['JobQueue']) + '''"" >> /etc/environment echo export "SOCA_JOB_OWNER="''' + str(params['JobOwner']) + '''"" >> /etc/environment echo export "SOCA_JOB_NAME="''' + str(params['JobName']) + '''"" >> /etc/environment echo export "SOCA_JOB_PROJECT="''' + str(params['JobProject']) + '''"" >> /etc/environment echo export "SOCA_VERSION="''' + str(params['Version']) + '''"" >> /etc/environment echo export "SOCA_JOB_EFA="''' + str(params['Efa']).lower() + '''"" >> /etc/environment echo export "SOCA_JOB_ID="''' + str(params['JobId']) + '''"" >> /etc/environment echo export "SOCA_SCRATCH_SIZE=''' + str(params['ScratchSize']) + '''" >> /etc/environment echo export "SOCA_INSTALL_BUCKET="''' + str(params['S3Bucket']) + '''"" >> /etc/environment echo export "SOCA_INSTALL_BUCKET_FOLDER="''' + str(params['S3InstallFolder']) + '''"" >> /etc/environment echo export "SOCA_FSX_LUSTRE_BUCKET="''' + str(params['FSxLustreConfiguration']['fsx_lustre']).lower() + '''"" >> /etc/environment echo export "SOCA_FSX_LUSTRE_DNS="''' + str(params['FSxLustreConfiguration']['existing_fsx']).lower() + '''"" >> /etc/environment echo export "SOCA_INSTANCE_TYPE=$GET_INSTANCE_TYPE" >> /etc/environment echo export "SOCA_INSTANCE_HYPERTHREADING="''' + str(params['ThreadsPerCore']).lower() + '''"" >> /etc/environment echo export "SOCA_SYSTEM_METRICS="''' + str(params['SystemMetrics']).lower() + '''"" >> /etc/environment echo export "SOCA_ESDOMAIN_ENDPOINT="''' + str(params['ESDomainEndpoint']).lower() + '''"" >> /etc/environment echo export "SOCA_AUTH_PROVIDER="''' + str(params['AuthProvider']).lower() + '''"" >> /etc/environment echo export "SOCA_HOST_SYSTEM_LOG="/apps/soca/''' + str(params['ClusterId']) + '''/cluster_node_bootstrap/logs/''' + str(params['JobId']) + '''/$(hostname -s)"" >> /etc/environment echo export "AWS_STACK_ID=${AWS::StackName}" >> /etc/environment echo export "AWS_DEFAULT_REGION=${AWS::Region}" >> /etc/environment source /etc/environment AWS=$(command -v aws) # Give yum permission to the user on this specific machine echo "''' + params['JobOwner'] + ''' ALL=(ALL) /bin/yum" >> /etc/sudoers # Mount File system mkdir -p /apps mkdir -p /data FS_DATA_PROVIDER='''+params['FileSystemDataProvider']+''' FS_DATA='''+params['FileSystemData']+''' FS_APPS_PROVIDER='''+params['FileSystemAppsProvider']+''' FS_APPS='''+params['FileSystemApps']+''' if [[ "$FS_DATA_PROVIDER" == "fsx_lustre" ]] || [[ "$FS_APPS_PROVIDER" == "fsx_lustre" ]]; then if [[ -z "$(rpm -qa lustre-client)" ]]; then # Install FSx for Lustre Client if [[ "$SOCA_BASE_OS" == "amazonlinux2" ]]; then amazon-linux-extras install -y lustre2.10 else kernel=$(uname -r) machine=$(uname -m) echo "Found kernel version: $kernel running on: $machine" yum -y install wget if [[ $kernel == *"3.10.0-957"*$machine ]]; then yum -y install https://downloads.whamcloud.com/public/lustre/lustre-2.10.8/el7/client/RPMS/x86_64/kmod-lustre-client-2.10.8-1.el7.x86_64.rpm yum -y install https://downloads.whamcloud.com/public/lustre/lustre-2.10.8/el7/client/RPMS/x86_64/lustre-client-2.10.8-1.el7.x86_64.rpm elif [[ $kernel == *"3.10.0-1062"*$machine ]]; then wget https://fsx-lustre-client-repo-public-keys.s3.amazonaws.com/fsx-rpm-public-key.asc -O /tmp/fsx-rpm-public-key.asc rpm --import /tmp/fsx-rpm-public-key.asc wget https://fsx-lustre-client-repo.s3.amazonaws.com/el/7/fsx-lustre-client.repo -O /etc/yum.repos.d/aws-fsx.repo sed -i 's#7#7.7#' /etc/yum.repos.d/aws-fsx.repo yum clean all yum install -y kmod-lustre-client lustre-client elif [[ $kernel == *"3.10.0-1127"*$machine ]]; then wget https://fsx-lustre-client-repo-public-keys.s3.amazonaws.com/fsx-rpm-public-key.asc -O /tmp/fsx-rpm-public-key.asc rpm --import /tmp/fsx-rpm-public-key.asc wget https://fsx-lustre-client-repo.s3.amazonaws.com/el/7/fsx-lustre-client.repo -O /etc/yum.repos.d/aws-fsx.repo sed -i 's#7#7.8#' /etc/yum.repos.d/aws-fsx.repo yum clean all yum install -y kmod-lustre-client lustre-client elif [[ $kernel == *"3.10.0-1160"*$machine ]]; then wget https://fsx-lustre-client-repo-public-keys.s3.amazonaws.com/fsx-rpm-public-key.asc -O /tmp/fsx-rpm-public-key.asc rpm --import /tmp/fsx-rpm-public-key.asc wget https://fsx-lustre-client-repo.s3.amazonaws.com/el/7/fsx-lustre-client.repo -O /etc/yum.repos.d/aws-fsx.repo yum clean all yum install -y kmod-lustre-client lustre-client elif [[ $kernel == *"4.18.0-193"*$machine ]]; then # FSX for Lustre on aarch64 is supported only on 4.18.0-193 wget https://fsx-lustre-client-repo-public-keys.s3.amazonaws.com/fsx-rpm-public-key.asc -O /tmp/fsx-rpm-public-key.asc rpm --import /tmp/fsx-rpm-public-key.asc wget https://fsx-lustre-client-repo.s3.amazonaws.com/centos/7/fsx-lustre-client.repo -O /etc/yum.repos.d/aws-fsx.repo yum clean all yum install -y kmod-lustre-client lustre-client else echo "ERROR: Can't install FSx for Lustre client as kernel version: $kernel isn't matching expected versions: (x86_64: 3.10.0-957, -1062, -1127, -1160, aarch64: 4.18.0-193)!" fi fi fi fi if [[ "$FS_DATA_PROVIDER" == "efs" ]]; then echo "$FS_DATA:/ /data nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport 0 0" >> /etc/fstab elif [[ "$FS_DATA_PROVIDER" == "fsx_lustre" ]]; then FSX_ID=$(echo $FS_DATA | cut -d. -f1) FSX_DATA_MOUNT_NAME=$($AWS fsx describe-file-systems --file-system-ids $FSX_ID --query FileSystems[].LustreConfiguration.MountName --output text) echo "$FS_DATA@tcp:/$FSX_DATA_MOUNT_NAME /data lustre defaults,noatime,flock,_netdev 0 0" >> /etc/fstab fi if [[ "$FS_APPS_PROVIDER" == "efs" ]]; then echo "$FS_APPS:/ /apps nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport 0 0" >> /etc/fstab elif [[ "$FS_APPS_PROVIDER" == "fsx_lustre" ]]; then FSX_ID=$(echo $FS_APPS | cut -d. -f1) FSX_APPS_MOUNT_NAME=$($AWS fsx describe-file-systems --file-system-ids $FSX_ID --query FileSystems[].LustreConfiguration.MountName --output text) echo "$FS_APPS@tcp:/$FSX_APPS_MOUNT_NAME /apps lustre defaults,noatime,flock,_netdev 0 0" >> /etc/fstab fi FS_MOUNT=0 mount -a while [[ $? -ne 0 ]] && [[ $FS_MOUNT -lt 5 ]] do SLEEP_TIME=$(( RANDOM % 60 )) echo "Failed to mount FS, retrying in $SLEEP_TIME seconds and Loop $FS_MOUNT/5..." sleep $SLEEP_TIME ((FS_MOUNT++)) mount -a done # Configure Chrony yum remove -y ntp yum install -y chrony mv /etc/chrony.conf /etc/chrony.conf.original echo -e """ # use the local instance NTP service, if available server 169.254.169.123 prefer iburst minpoll 4 maxpoll 4 # Use public servers from the pool.ntp.org project. # Please consider joining the pool (http://www.pool.ntp.org/join.html). # !!! [BEGIN] SOCA REQUIREMENT # You will need to open UDP egress traffic on your security group if you want to enable public pool #pool 2.amazon.pool.ntp.org iburst # !!! [END] SOCA REQUIREMENT # Record the rate at which the system clock gains/losses time. driftfile /var/lib/chrony/drift # Allow the system clock to be stepped in the first three updates # if its offset is larger than 1 second. makestep 1.0 3 # Specify file containing keys for NTP authentication. keyfile /etc/chrony.keys # Specify directory for log files. logdir /var/log/chrony # save data between restarts for fast re-load dumponexit dumpdir /var/run/chrony """ > /etc/chrony.conf systemctl enable chronyd # Prepare Log folder mkdir -p $SOCA_HOST_SYSTEM_LOG echo "@reboot /bin/bash /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/ComputeNodePostReboot.sh >> $SOCA_HOST_SYSTEM_LOG/ComputeNodePostReboot.log 2>&1" | crontab - cp /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/config.cfg /root/ /bin/bash /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/ComputeNode.sh ''' + params['SchedulerHostname'] + ''' >> $SOCA_HOST_SYSTEM_LOG/ComputeNode.sh.log 2>&1''' # Specify the security groups to assign to the compute nodes. Max 5 per instance security_groups = [params["SecurityGroupId"]] if params["AdditionalSecurityGroupIds"]: for sg_id in params["AdditionalSecurityGroupIds"]: security_groups.append(sg_id) # Specify the IAM instance profile to use instance_profile = params["ComputeNodeInstanceProfileArn"] if params["CustomIamInstanceProfile"] is False else params["CustomIamInstanceProfile"] SpotFleet = True if ((params["SpotPrice"] is not False) and (params["SpotAllocationCount"] is False) and (int(params["DesiredCapacity"]) > 1 or len(instances_list)>1)) else False ltd.EbsOptimized = True for instance in instances_list: if "t2." in instance: ltd.EbsOptimized = False # metal + t2 does not support CpuOptions unsupported = ["t2.", "metal"] if all(itype not in instance for itype in unsupported) and (SpotFleet is False or len(instances_list) == 1): # Spotfleet with multiple instance types doesn't support CpuOptions # So we can't add CpuOptions if SpotPrice is specified and when multiple instances are specified ltd.CpuOptions = CpuOptions( CoreCount=int(params["CoreCount"]), ThreadsPerCore=1 if params["ThreadsPerCore"] is False else 2) ltd.IamInstanceProfile = IamInstanceProfile(Arn=instance_profile) ltd.KeyName = params["SSHKeyPair"] ltd.ImageId = params["ImageId"] if params["SpotPrice"] is not False and params["SpotAllocationCount"] is False: ltd.InstanceMarketOptions = InstanceMarketOptions( MarketType="spot", SpotOptions=SpotOptions( MaxPrice=Ref("AWS::NoValue") if params["SpotPrice"] == "auto" else str(params["SpotPrice"]) # auto -> cap at OD price ) ) ltd.InstanceType = instances_list[0] ltd.NetworkInterfaces = [NetworkInterfaces( InterfaceType="efa" if params["Efa"] is not False else Ref("AWS::NoValue"), DeleteOnTermination=True, DeviceIndex=0, Groups=security_groups )] ltd.UserData = Base64(Sub(UserData)) ltd.BlockDeviceMappings = [ LaunchTemplateBlockDeviceMapping( DeviceName="/dev/xvda" if params["BaseOS"] == "amazonlinux2" else "/dev/sda1", Ebs=EBSBlockDevice( VolumeSize=params["RootSize"], VolumeType="gp3", DeleteOnTermination="false" if params["KeepEbs"] is True else "true", Encrypted=True)) ] if int(params["ScratchSize"]) > 0: ltd.BlockDeviceMappings.append( BlockDeviceMapping( DeviceName="/dev/xvdbx", Ebs=EBSBlockDevice( VolumeSize=params["ScratchSize"], VolumeType="io1" if int(params["VolumeTypeIops"]) > 0 else "gp3", Iops=params["VolumeTypeIops"] if int(params["VolumeTypeIops"]) > 0 else Ref("AWS::NoValue"), DeleteOnTermination="false" if params["KeepEbs"] is True else "true", Encrypted=True)) ) ltd.TagSpecifications = [ec2.TagSpecifications( ResourceType="instance", Tags = base_Tags( Name=str(params["ClusterId"]) + "-compute-job-" + str(params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_TerminateWhenIdle=str(params["TerminateWhenIdle"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_ClusterId=str(params["ClusterId"]), _soca_NodeType="soca-compute-node"))] # End LaunchTemplateData # Begin Launch Template Resource lt = LaunchTemplate("NodeLaunchTemplate") lt.LaunchTemplateName = params["ClusterId"] + "-" + str(params["JobId"]) lt.LaunchTemplateData = ltd t.add_resource(lt) # End Launch Template Resource if SpotFleet is True: # SpotPrice is defined and DesiredCapacity > 1 or need to try more than 1 instance_type # Create SpotFleet # Begin SpotFleetRequestConfigData Resource sfrcd = ec2.SpotFleetRequestConfigData() sfrcd.AllocationStrategy = params["SpotAllocationStrategy"] sfrcd.ExcessCapacityTerminationPolicy = "noTermination" sfrcd.IamFleetRole = params["SpotFleetIAMRoleArn"] sfrcd.InstanceInterruptionBehavior = "terminate" if params["SpotPrice"] != "auto": sfrcd.SpotPrice = str(params["SpotPrice"]) sfrcd.SpotMaintenanceStrategies = ec2.SpotMaintenanceStrategies( CapacityRebalance=ec2.SpotCapacityRebalance(ReplacementStrategy="launch")) sfrcd.TargetCapacity = params["DesiredCapacity"] sfrcd.Type = "maintain" sfltc = ec2.LaunchTemplateConfigs() sflts = ec2.LaunchTemplateSpecification( LaunchTemplateId=Ref(lt), Version=GetAtt(lt, "LatestVersionNumber")) sfltc.LaunchTemplateSpecification = sflts sfltc.Overrides = [] for subnet in params["SubnetId"]: for index, instance in enumerate(instances_list): if params["WeightedCapacity"] is not False: sfltc.Overrides.append(ec2.LaunchTemplateOverrides( InstanceType = instance, SubnetId = subnet, WeightedCapacity = params["WeightedCapacity"][index])) else: sfltc.Overrides.append(ec2.LaunchTemplateOverrides( InstanceType = instance, SubnetId = subnet)) sfrcd.LaunchTemplateConfigs = [sfltc] TagSpecifications = ec2.SpotFleetTagSpecification( ResourceType="spot-fleet-request", Tags=base_Tags( Name=str(params["ClusterId"]) + "-compute-job-" + str(params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_TerminateWhenIdle=str(params["TerminateWhenIdle"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_ClusterId=str(params["ClusterId"]), _soca_NodeType="soca-compute-node")) # End SpotFleetRequestConfigData Resource # Begin SpotFleet Resource spotfleet = ec2.SpotFleet("SpotFleet") spotfleet.SpotFleetRequestConfigData = sfrcd t.add_resource(spotfleet) # End SpotFleet Resource else: asg_lt.LaunchTemplateSpecification = LaunchTemplateSpecification( LaunchTemplateId=Ref(lt), Version=GetAtt(lt, "LatestVersionNumber") ) asg_lt.Overrides = [] for index, instance in enumerate(instances_list): if params["WeightedCapacity"] is not False: mip_usage = True asg_lt.Overrides.append(LaunchTemplateOverrides( InstanceType=instance, WeightedCapacity=str(params["WeightedCapacity"][index]))) else: asg_lt.Overrides.append(LaunchTemplateOverrides( InstanceType=instance)) # Begin InstancesDistribution if params["SpotPrice"] is not False and \ params["SpotAllocationCount"] is not False and \ (int(params["DesiredCapacity"]) - int(params["SpotAllocationCount"])) > 0: mip_usage = True idistribution = InstancesDistribution() idistribution.OnDemandAllocationStrategy = "prioritized" # only supported value idistribution.OnDemandBaseCapacity = params["DesiredCapacity"] - params["SpotAllocationCount"] idistribution.OnDemandPercentageAboveBaseCapacity = "0" # force the other instances to be SPOT idistribution.SpotMaxPrice = Ref("AWS::NoValue") if params["SpotPrice"] == "auto" else str( params["SpotPrice"]) idistribution.SpotAllocationStrategy = params['SpotAllocationStrategy'] mip.InstancesDistribution = idistribution # End MixedPolicyInstance # Begin AutoScalingGroup Resource asg = AutoScalingGroup("AutoScalingComputeGroup") asg.DependsOn = "NodeLaunchTemplate" if mip_usage is True or instances_list.__len__() > 1: mip.LaunchTemplate = asg_lt asg.MixedInstancesPolicy = mip else: asg.LaunchTemplate = LaunchTemplateSpecification( LaunchTemplateId=Ref(lt), Version=GetAtt(lt, "LatestVersionNumber")) asg.MinSize = int(params["DesiredCapacity"]) asg.MaxSize = int(params["DesiredCapacity"]) asg.VPCZoneIdentifier = params["SubnetId"] asg.CapacityRebalance = False if params["PlacementGroup"] is True: pg = PlacementGroup("ComputeNodePlacementGroup") pg.Strategy = "cluster" t.add_resource(pg) asg.PlacementGroup = Ref(pg) asg.Tags = Tags( Name=str(params["ClusterId"]) + "-compute-job-" + str(params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_TerminateWhenIdle=str(params["TerminateWhenIdle"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_ClusterId=str(params["ClusterId"]), _soca_NodeType="soca-compute-node") t.add_resource(asg) # End AutoScalingGroup Resource # Begin FSx for Lustre if params["FSxLustreConfiguration"]["fsx_lustre"] is not False: if params["FSxLustreConfiguration"]["existing_fsx"] is False: fsx_lustre = FileSystem("FSxForLustre") fsx_lustre.FileSystemType = "LUSTRE" fsx_lustre.StorageCapacity = params["FSxLustreConfiguration"]["capacity"] fsx_lustre.SecurityGroupIds = security_groups fsx_lustre.SubnetIds = params["SubnetId"] fsx_lustre_configuration = LustreConfiguration() fsx_lustre_configuration.DeploymentType = params["FSxLustreConfiguration"]["deployment_type"].upper() if params["FSxLustreConfiguration"]["deployment_type"].upper() == "PERSISTENT_1": fsx_lustre_configuration.PerUnitStorageThroughput = params["FSxLustreConfiguration"]["per_unit_throughput"] if params["FSxLustreConfiguration"]["s3_backend"] is not False: fsx_lustre_configuration.ImportPath = params["FSxLustreConfiguration"]["import_path"] if params["FSxLustreConfiguration"]["import_path"] is not False else params["FSxLustreConfiguration"]["s3_backend"] fsx_lustre_configuration.ExportPath = params["FSxLustreConfiguration"]["import_path"] if params["FSxLustreConfiguration"]["import_path"] is not False else params["FSxLustreConfiguration"]["s3_backend"] + "/" + params["ClusterId"] + "-fsxoutput/job-" + params["JobId"] + "/" fsx_lustre.LustreConfiguration = fsx_lustre_configuration fsx_lustre.Tags = base_Tags( # False disable PropagateAtLaunch Name=str(params["ClusterId"] + "-compute-job-" + params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_TerminateWhenIdle=str(params["TerminateWhenIdle"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_FSx="true", _soca_ClusterId=str(params["ClusterId"]), ) t.add_resource(fsx_lustre) # End FSx For Lustre # Begin Custom Resource # Change Mapping to No if you want to disable this if allow_anonymous_data_collection is True: metrics = CustomResourceSendAnonymousMetrics("SendAnonymousData") metrics.ServiceToken = params["SolutionMetricsLambda"] metrics.DesiredCapacity = str(params["DesiredCapacity"]) metrics.InstanceType = str(params["InstanceType"]) metrics.Efa = str(params["Efa"]) metrics.ScratchSize = str(params["ScratchSize"]) metrics.RootSize = str(params["RootSize"]) metrics.SpotPrice = str(params["SpotPrice"]) metrics.BaseOS = str(params["BaseOS"]) metrics.StackUUID = str(params["StackUUID"]) metrics.KeepForever = str(params["KeepForever"]) metrics.FsxLustre = str(params["FSxLustreConfiguration"]) metrics.TerminateWhenIdle = str(params["TerminateWhenIdle"]) metrics.Dcv = "false" t.add_resource(metrics) # End Custom Resource if debug is True: print(t.to_json()) # Tags must use "soca:<Key>" syntax template_output = t.to_yaml().replace("_soca_", "soca:") return {'success': True, 'output': template_output} except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] return {'success': False, 'output': 'cloudformation_builder.py: ' + ( str(e) + ': error :' + str(exc_type) + ' ' + str(fname) + ' ' + str(exc_tb.tb_lineno))}
for role in roles: # Dont name this resource since named resources need full stack destroy. launchConfig = t.add_resource( LaunchConfiguration( "launchconfig" + role.upper(), ImageId=rolemap[role]["instance"]["ami"], SecurityGroups=[Ref("defaultSG"), Ref(serverSecurityGroup)], InstanceType=rolemap[role]["instance"]["type"], IamInstanceProfile=Ref("iamCodeDeploy"), AssociatePublicIpAddress=True, KeyName=Ref("keyName"), BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/xvda", Ebs=EBSBlockDevice(DeleteOnTermination=True, VolumeType="gp2", VolumeSize=10)) ], UserData=Base64( Join('', [ '#!/bin/bash\n', 'sudo apt-get install wget\n', 'wget https://aws-codedeploy-', awsRegion, '.s3.amazonaws.com/latest/install\n', 'chmod +x ./install\n', 'sudo ./install auto\n', ])))) loadbalancer = [] targetgroup = []
ToPort='8898', CidrIp='0.0.0.0/0') ], )) ec2Instance = t.add_resource( Instance( "Ec2Instance", InstanceType=Ref(instanceType_param), SecurityGroupIds=[Ref(ec2SecurityGroup)], ImageId=Ref(imageId_param), KeyName=Ref(keyname_param), BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/sda1", Ebs=EBSBlockDevice( VolumeSize="128", VolumeType="gp2", )), ], Tags=Tags(Application=ref_stack_id, Network="Public", Rev="0.0.3"), )) ec2EIP = t.add_resource( EIP( "ec2EIP", InstanceId=Ref(ec2Instance), Domain='vpc', )) hostRecordSet = t.add_resource( RecordSetType( "hostRecordSet",
"write_files:\n", " - path: /etc/environment\n", " content: |\n", " REGION_NAME=", Ref("AWS::Region"), "\n", " CLUSTER_NAME=", Ref(ClusterName), "\n", " PRIVATEIP=$private_ipv4\n", " PUBLICIP=$public_ipv4\n" ] )), KeyName=Ref(SecurityKeyName), SecurityGroups=[Ref(CoreOSSecurityGroup)], InstanceType=Ref(AutoScalingGroupInstanceType), BlockDeviceMappings=[ BlockDeviceMapping( DeviceName="/dev/xvda", Ebs=EBSBlockDevice(VolumeSize="150") ) ], ImageId=FindInMap("CoreOSImageRegionMap", Ref("AWS::Region"), "AMI") )) CoreOSServerAutoScale = t.add_resource(AutoScalingGroup( "CoreOSServerAutoScale", DesiredCapacity= Ref(AutoScalingSize), LoadBalancerNames=[Ref(ElasticLoadBalancer)], MinSize=Ref(AutoScalingSize), MaxSize=Ref(AutoScalingSize), VPCZoneIdentifier=[Ref(privateBSubnet),Ref(privateASubnet)], LaunchConfigurationName=Ref(CoreOSLaunchConfig) ))
def add_resources(self): self.runner_ssm_role = self.template.add_resource( Role( "RunnerSsmRole", Path="/", ManagedPolicyArns=[ "arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM" ], AssumeRolePolicyDocument=aws.Policy(Statement=[ Statement( Action=[sts.AssumeRole], Effect=Allow, Principal=Principal("Service", ["ec2.amazonaws.com"]), ) ]), )) self.runner_ssm_instanceprofile = self.template.add_resource( InstanceProfile("RunnerSsmInstanceProfile", Path="/", Roles=[Ref(self.runner_ssm_role)])) self.runner_launch_config = self.template.add_resource( LaunchConfiguration( "RunnerLaunchConfiguration", UserData=Base64( Join( "", [ "#!/bin/bash\n", "#####install ssm######\n", "yum install -y amazon-ssm-agent\n", "systemctl enable amazon-ssm-agent\n", "systemctl start amazon-ssm-agent\n", "####install docker####\n", "yum install -y docker\n", "systemctl enable docker\n", "systemctl start docker\n", "####install runner####\n", "yum install -y wget\n", "wget -O /usr/local/bin/gitlab-runner ", "https://gitlab-runner-downloads.s3.amazonaws.com/v", Ref(self.runner_version), "/binaries/gitlab-runner-linux-amd64\n", "ln -s /usr/local/bin/gitlab-runner ", "/usr/bin/gitlab-runner\n", "chmod +x /usr/local/bin/gitlab-runner\n", "useradd --comment 'GitLab Runner' ", "--create-home gitlab-runner --shell /bin/bash\n", "/usr/local/bin/gitlab-runner install ", "--user=gitlab-runner " "--working-directory=/home/gitlab-runner\n", "systemctl enable gitlab-runner\n", "systemctl start gitlab-runner\n", "####register runner####\n", "gitlab-runner register ", "--config=/etc/gitlab-runner/config.toml ", "--request-concurrency=", Ref(self.runner_job_concurrency), " ", "--tag-list=", Ref(self.runner_tag_list), " ", "--non-interactive ", "--registration-token=", Ref(self.runner_register_token), " ", "--run-untagged=true ", "--locked=false ", "--url=", Ref(self.runner_gitlab_url), " ", "--executor=docker ", "--docker-image=alpine:latest ", "--docker-privileged=true\n", "####create unregister script####\n", "TOKEN=$(gitlab-runner list 2>&1 | grep Executor | ", "awk '{ print $4 }' | awk -F= '{ print $2 }')\n", "URL=$(gitlab-runner list 2>&1 | grep Executor | ", "awk '{ print $5 }' | awk -F= '{ print $2 }')\n", "echo gitlab-runner unregister ", "--url $URL --token $TOKEN > /unregister.sh\n", "chmod +x /unregister.sh", ], )), ImageId=Ref(self.runner_ami_id), KeyName=Ref(self.runner_key_pair), BlockDeviceMappings=[ BlockDeviceMapping( DeviceName="/dev/xvda", Ebs=EBSBlockDevice( VolumeSize=Ref(self.runner_volume_size)), ) ], SecurityGroups=[Ref(self.runner_security_group)], InstanceType=Ref(self.runner_server_instance_type), IamInstanceProfile=GetAtt(self.runner_ssm_instanceprofile, "Arn"), )) self.runner_autoscaling_group = self.template.add_resource( AutoScalingGroup( "RunnerAutoscalingGroup", DesiredCapacity=Ref(self.runner_desired_count), LaunchConfigurationName=Ref(self.runner_launch_config), MinSize=Ref(self.runner_min_count), MaxSize=Ref(self.runner_max_count), VPCZoneIdentifier=Split(",", Ref(self.runner_subnets)), Tags=[Tag("Name", "gitlab-runner-created-by-asg", True)], ))
] ) template.add_resource(ecs_instance_profile) launch_template = LaunchTemplate( region.replace("-", "") + "ecslivelaunchtemplate", LaunchTemplateName = "ecs-live-launch-template", LaunchTemplateData = LaunchTemplateData( ImageId = image_id, BlockDeviceMappings = [ LaunchTemplateBlockDeviceMapping( DeviceName = "/dev/xvda", Ebs = EBSBlockDevice( "ecsliveblockdevice", VolumeSize = 30 ) ) ], CreditSpecification = LaunchTemplateCreditSpecification( CpuCredits = "Unlimited" ), InstanceType = "t3.micro", IamInstanceProfile = IamInstanceProfile( region.replace("-", "") + "ecsliveiaminstanceprofile", Arn = GetAtt(ecs_instance_profile, "Arn") ), KeyName = "live-eu-west-1", SecurityGroupIds = [ GetAtt(security_group, "GroupId") ],
files=InitFiles({ '/etc/cfn/cfn-hup.conf': cfn_hup, '/etc/cfn/hooks.d/cfn-auto-reloader.conf': reloader, '/etc/docker/daemon.json': docker, })), 'services': { 'sysvinit': { 'cfn': cfn_service, 'docker': docker_service, } }, })), BlockDeviceMappings=[ BlockDeviceMapping(DeviceName='/dev/xvdh', Ebs=EBSBlockDevice( VolumeSize='50', VolumeType='standard', )) ]) t.add_resource(netkan_instance) t.add_resource( RecordSetType( "NetKANDns", HostedZoneId=ZONE_ID, Comment="NetKAN Bot DNS", Name=BOT_FQDN, Type="A", TTL="900", ResourceRecords=[GetAtt('NetKANCompute', "PublicIp")], ))
def my_block_device_mappings_root(devicenamebase,volumesize,volumetype): block_device_mappings_root = ( BlockDeviceMapping( DeviceName=devicenamebase + "a1", Ebs=EBSBlockDevice(VolumeSize=volumesize, VolumeType=volumetype) )) return block_device_mappings_root
def add_launch_template(template, hosts_sg): """Function to create a launch template. :param template: ECS Cluster template :type template: troposphere.Template :param hosts_sg: security group for the EC2 hosts :type hosts_sg: troposphere.ec2.SecurityGroup :return: launch_template :rtype: troposphere.ec2.LaunchTemplate """ # from troposphere.cloudformation import ( # WaitCondition, WaitConditionHandle # ) # Deactivated conditions given you could run with no EC2 at all. # Tricky condition to do as the WaitCondition and Handler cannot be created on a CFN Update, but only at the # very creation of the stack. # wait_handle = WaitConditionHandle( # 'BootstrapHandle', # template=template # ) # WaitCondition( # 'BootStrapCondition', # template=template, # DependsOn=[hosts_role], # Handle=Ref(wait_handle), # Timeout='900' # ) launch_template = LaunchTemplate( "LaunchTemplate", template=template, Metadata=cloudformation.Metadata( cloudformation.Init( cloudformation.InitConfigSets( default=["awspackages", "dockerconfig", "ecsconfig", "awsservices"] ), awspackages=cloudformation.InitConfig( packages={"yum": {"awslogs": [], "amazon-ssm-agent": []}}, commands={ "001-check-packages": {"command": "rpm -qa | grep amazon"}, "002-check-packages": {"command": "rpm -qa | grep aws"}, }, ), awsservices=cloudformation.InitConfig( services={ "sysvinit": { "amazon-ssm-agent": {"enabled": True, "ensureRunning": True} } } ), dockerconfig=cloudformation.InitConfig( commands={ "001-stop-docker": {"command": "systemctl stop docker"}, "098-reload-systemd": {"command": "systemctl daemon-reload"}, }, files={ "/etc/sysconfig/docker": { "owner": "root", "group": "root", "mode": "644", "content": Join( "\n", [ "DAEMON_MAXFILES=1048576", Join( " ", ["OPTIONS=--default-ulimit nofile=1024:4096"], ), "DAEMON_PIDFILE_TIMEOUT=10", "#EOF", "", ], ), } }, services={ "sysvinit": { "docker": { "enabled": True, "ensureRunning": True, "files": ["/etc/sysconfig/docker"], "commands": ["098-reload-systemd"], } } }, ), ecsconfig=cloudformation.InitConfig( files={ "/etc/ecs/ecs.config": { "owner": "root", "group": "root", "mode": "644", "content": Join( "\n", [ Sub(f"ECS_CLUSTER=${{{CLUSTER_NAME_T}}}"), "ECS_ENABLE_TASK_IAM_ROLE=true", "ECS_ENABLE_SPOT_INSTANCE_DRAINING=true", "ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST=true", "ECS_ENABLE_CONTAINER_METADATA=true", "ECS_ENABLE_UNTRACKED_IMAGE_CLEANUP=true", "ECS_UPDATES_ENABLED=true", "ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION=15m", "ECS_IMAGE_CLEANUP_INTERVAL=10m", "ECS_NUM_IMAGES_DELETE_PER_CYCLE=100", "ECS_ENABLE_TASK_ENI=true", "ECS_AWSVPC_BLOCK_IMDS=true", "ECS_TASK_METADATA_RPS_LIMIT=300,400", "ECS_ENABLE_AWSLOGS_EXECUTIONROLE_OVERRIDE=true", 'ECS_AVAILABLE_LOGGING_DRIVERS=["awslogs", "json-file"]', "#EOF", ], ), } }, commands={ "0001-restartecs": { "command": "systemctl --no-block restart ecs" } }, ), ) ), LaunchTemplateData=LaunchTemplateData( BlockDeviceMappings=[ LaunchTemplateBlockDeviceMapping( DeviceName="/dev/xvda", Ebs=EBSBlockDevice(DeleteOnTermination=True, Encrypted=True), ) ], ImageId=Ref(compute_params.ECS_AMI_ID), InstanceInitiatedShutdownBehavior="terminate", IamInstanceProfile=IamInstanceProfile( Arn=Sub(f"${{{HOST_PROFILE_T}.Arn}}") ), TagSpecifications=[ TagSpecifications( ResourceType="instance", Tags=Tags( Name=Sub(f"EcsNodes-${{{CLUSTER_NAME_T}}}"), StackName=Ref("AWS::StackName"), StackId=Ref("AWS::StackId"), ), ) ], InstanceType="m5a.large", Monitoring=Monitoring(Enabled=True), SecurityGroupIds=[GetAtt(hosts_sg, "GroupId")], UserData=Base64( Join( "\n", [ "#!/usr/bin/env bash", "export PATH=$PATH:/opt/aws/bin", "cfn-init -v || yum install aws-cfn-bootstrap -y", Sub( f"cfn-init --region ${{AWS::Region}} -r LaunchTemplate -s ${{AWS::StackName}}" ), # 'if [ $? -ne 0 ]; then', # Sub(f'cfn-signal -e 1 -r "Failed to bootstrap" \'${{{wait_handle.title}}}\''), # 'halt', # 'else', # Sub(f'cfn-signal -e 0 -r "Successfully bootstrapped" \'${{{wait_handle.title}}}\''), # 'fi', "# EOF", ], ) ), ), LaunchTemplateName=Ref(CLUSTER_NAME_T), ) return launch_template
def emit_configuration(): vpc = cfn.vpcs[0] region = Ref("AWS::Region") chefserver_instance_class = template.add_parameter( Parameter( 'ChefServerInstanceType', Type='String', Default='t2.medium', Description='Chef Server instance type', AllowedValues=cfn.usable_instances(), ConstraintDescription='Instance size must be a valid instance type' )) # Create IAM role for the chefserver instance # load the policies default_policy = json.loads( cfn.load_template("default_policy.json.j2", { "env": CLOUDENV, "cloud": CLOUDNAME, "region": "us-east-1" })) chefserver_role_name = '.'.join(['chefserver', CLOUDNAME, CLOUDENV]) chefserver_iam_role = template.add_resource( Role("ChefServerIamRole", AssumeRolePolicyDocument=ASSUME_ROLE_POLICY, Path="/", Policies=[ Policy(PolicyName="ChefServerPolicy", PolicyDocument=json.loads( cfn.load_template( "chefserver_policy.json.j2", { "env": CLOUDENV, "cloud": CLOUDNAME, "region": "us-east-1" }))), Policy(PolicyName="ChefserverDefaultPolicy", PolicyDocument=default_policy) ], DependsOn=vpc.title)) chefserver_instance_profile = template.add_resource( InstanceProfile("chefserverInstanceProfile", Path="/", Roles=[Ref(chefserver_iam_role)], DependsOn=chefserver_iam_role.title)) chefserver_user_data = cfn.load_template("chefserver-init.bash.j2", { "env": CLOUDENV, "cloud": CLOUDNAME, "deploy": "chefserver" }) chefserver_ingress_rules = [ SecurityGroupRule(IpProtocol=p[0], CidrIp='{0}.0.0/16'.format(CIDR_PREFIX), FromPort=p[1], ToPort=p[1]) for p in [('tcp', 80), ('tcp', 443)] ] chefserver_sg = template.add_resource( SecurityGroup("ChefServer", GroupDescription="Security Group for the Chef server", VpcId=Ref(vpc), SecurityGroupIngress=chefserver_ingress_rules, DependsOn=vpc.title)) chefserver_name = cfn.sanitize_id("ChefServer", CLOUDNAME, CLOUDENV) chefserver_instance = template.add_resource( Instance(chefserver_name, DependsOn=vpc.title, InstanceType=Ref(chefserver_instance_class), KeyName=Ref(cfn.keyname), SourceDestCheck=False, ImageId=FindInMap('RegionMap', region, int(cfn.Amis.EBS)), NetworkInterfaces=[ NetworkInterfaceProperty( Description='Network interface for {0}'.format( chefserver_name), GroupSet=[Ref(chefserver_sg)], SubnetId=Ref( cfn.get_vpc_subnets(vpc, cfn.SubnetTypes.PLATFORM)[0]), AssociatePublicIpAddress=True, DeviceIndex=0, DeleteOnTermination=True) ], BlockDeviceMappings=[ BlockDeviceMapping(DeviceName="/dev/sda1", Ebs=EBSBlockDevice( VolumeSize=50, DeleteOnTermination=False)) ]))
def main(**params): try: # Metadata t = Template() t.set_version("2010-09-09") t.set_description("(SOCA) - Base template to deploy compute nodes.") allow_anonymous_data_collection = params["MetricCollectionAnonymous"] debug = False mip_usage = False instances_list = params["InstanceType"].split("+") asg_lt = asg_LaunchTemplate() ltd = LaunchTemplateData("NodeLaunchTemplateData") mip = MixedInstancesPolicy() stack_name = Ref("AWS::StackName") # Begin LaunchTemplateData UserData = '''#!/bin/bash -xe export PATH=$PATH:/usr/local/bin if [[ "''' + params['BaseOS'] + '''" == "centos7" ]] || [[ "''' + params['BaseOS'] + '''" == "rhel7" ]]; then EASY_INSTALL=$(which easy_install-2.7) $EASY_INSTALL pip PIP=$(which pip2.7) $PIP install awscli yum install -y nfs-utils # enforce install of nfs-utils else # Upgrade awscli on ALI (do not use yum) EASY_INSTALL=$(which easy_install-2.7) $EASY_INSTALL pip PIP=$(which pip) $PIP install awscli --upgrade fi if [[ "''' + params['BaseOS'] + '''" == "amazonlinux2" ]]; then /usr/sbin/update-motd --disable fi GET_INSTANCE_TYPE=$(curl http://169.254.169.254/latest/meta-data/instance-type) echo export "SOCA_CONFIGURATION="''' + str(params['ClusterId']) + '''"" >> /etc/environment echo export "SOCA_BASE_OS="''' + str(params['BaseOS']) + '''"" >> /etc/environment echo export "SOCA_JOB_QUEUE="''' + str(params['JobQueue']) + '''"" >> /etc/environment echo export "SOCA_JOB_OWNER="''' + str(params['JobOwner']) + '''"" >> /etc/environment echo export "SOCA_JOB_NAME="''' + str(params['JobName']) + '''"" >> /etc/environment echo export "SOCA_JOB_PROJECT="''' + str(params['JobProject']) + '''"" >> /etc/environment echo export "SOCA_VERSION="''' + str(params['Version']) + '''"" >> /etc/environment echo export "SOCA_JOB_EFA="''' + str(params['Efa']).lower() + '''"" >> /etc/environment echo export "SOCA_JOB_ID="''' + str(params['JobId']) + '''"" >> /etc/environment echo export "SOCA_SCRATCH_SIZE=''' + str(params['ScratchSize']) + '''" >> /etc/environment echo export "SOCA_INSTALL_BUCKET="''' + str(params['S3Bucket']) + '''"" >> /etc/environment echo export "SOCA_INSTALL_BUCKET_FOLDER="''' + str(params['S3InstallFolder']) + '''"" >> /etc/environment echo export "SOCA_FSX_LUSTRE_BUCKET="''' + str(params['FSxLustreConfiguration']['fsx_lustre']).lower() + '''"" >> /etc/environment echo export "SOCA_FSX_LUSTRE_DNS="''' + str(params['FSxLustreConfiguration']['existing_fsx']).lower() + '''"" >> /etc/environment echo export "SOCA_INSTANCE_TYPE=$GET_INSTANCE_TYPE" >> /etc/environment echo export "SOCA_INSTANCE_HYPERTHREADING="''' + str(params['ThreadsPerCore']).lower() + '''"" >> /etc/environment echo export "SOCA_HOST_SYSTEM_LOG="/apps/soca/''' + str(params['ClusterId']) + '''/cluster_node_bootstrap/logs/''' + str(params['JobId']) + '''/$(hostname -s)"" >> /etc/environment echo export "AWS_STACK_ID=${AWS::StackName}" >> /etc/environment echo export "AWS_DEFAULT_REGION=${AWS::Region}" >> /etc/environment source /etc/environment AWS=$(which aws) # Give yum permission to the user on this specific machine echo "''' + params['JobOwner'] + ''' ALL=(ALL) /bin/yum" >> /etc/sudoers mkdir -p /apps mkdir -p /data # Mount EFS echo "''' + params['EFSDataDns'] + ''':/ /data nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 0 0" >> /etc/fstab echo "''' + params['EFSAppsDns'] + ''':/ /apps nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 0 0" >> /etc/fstab mount -a # Configure NTP yum remove -y ntp yum install -y chrony mv /etc/chrony.conf /etc/chrony.conf.original echo -e """ # use the local instance NTP service, if available server 169.254.169.123 prefer iburst minpoll 4 maxpoll 4 # Use public servers from the pool.ntp.org project. # Please consider joining the pool (http://www.pool.ntp.org/join.html). # !!! [BEGIN] SOCA REQUIREMENT # You will need to open UDP egress traffic on your security group if you want to enable public pool #pool 2.amazon.pool.ntp.org iburst # !!! [END] SOCA REQUIREMENT # Record the rate at which the system clock gains/losses time. driftfile /var/lib/chrony/drift # Allow the system clock to be stepped in the first three updates # if its offset is larger than 1 second. makestep 1.0 3 # Specify file containing keys for NTP authentication. keyfile /etc/chrony.keys # Specify directory for log files. logdir /var/log/chrony # save data between restarts for fast re-load dumponexit dumpdir /var/run/chrony """ > /etc/chrony.conf systemctl enable chronyd # Prepare Log folder mkdir -p $SOCA_HOST_SYSTEM_LOG echo "@reboot /bin/bash /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/ComputeNodePostReboot.sh >> $SOCA_HOST_SYSTEM_LOG/ComputeNodePostInstall.log 2>&1" | crontab - $AWS s3 cp s3://$SOCA_INSTALL_BUCKET/$SOCA_INSTALL_BUCKET_FOLDER/scripts/config.cfg /root/ /bin/bash /apps/soca/$SOCA_CONFIGURATION/cluster_node_bootstrap/ComputeNode.sh ''' + params['SchedulerHostname'] + ''' >> $SOCA_HOST_SYSTEM_LOG/ComputeNode.sh.log 2>&1''' ltd.EbsOptimized = True for instance in instances_list: if "t2." in instance: ltd.EbsOptimized = False else: # t2 does not support CpuOptions ltd.CpuOptions = CpuOptions( CoreCount=int(params["CoreCount"]), ThreadsPerCore=1 if params["ThreadsPerCore"] is False else 2) ltd.IamInstanceProfile = IamInstanceProfile(Arn=params["ComputeNodeInstanceProfileArn"]) ltd.KeyName = params["SSHKeyPair"] ltd.ImageId = params["ImageId"] if params["SpotPrice"] is not False and params["SpotAllocationCount"] is False: ltd.InstanceMarketOptions = InstanceMarketOptions( MarketType="spot", SpotOptions=SpotOptions( MaxPrice=Ref("AWS::NoValue") if params["SpotPrice"] == "auto" else str(params["SpotPrice"]) # auto -> cap at OD price ) ) ltd.InstanceType = instances_list[0] ltd.NetworkInterfaces = [NetworkInterfaces( InterfaceType="efa" if params["Efa"] is not False else Ref("AWS::NoValue"), DeleteOnTermination=True, DeviceIndex=0, Groups=[params["SecurityGroupId"]] )] ltd.UserData = Base64(Sub(UserData)) ltd.BlockDeviceMappings = [ BlockDeviceMapping( DeviceName="/dev/xvda" if params["BaseOS"] == "amazonlinux2" else "/dev/sda1", Ebs=EBSBlockDevice( VolumeSize=params["RootSize"], VolumeType="gp2", DeleteOnTermination="false" if params["KeepEbs"] is True else "true", Encrypted=True)) ] if int(params["ScratchSize"]) > 0: ltd.BlockDeviceMappings.append( BlockDeviceMapping( DeviceName="/dev/xvdbx", Ebs=EBSBlockDevice( VolumeSize=params["ScratchSize"], VolumeType="io1" if int(params["VolumeTypeIops"]) > 0 else "gp2", Iops=params["VolumeTypeIops"] if int(params["VolumeTypeIops"]) > 0 else Ref("AWS::NoValue"), DeleteOnTermination="false" if params["KeepEbs"] is True else "true", Encrypted=True)) ) # End LaunchTemplateData # Begin Launch Template Resource lt = LaunchTemplate("NodeLaunchTemplate") lt.LaunchTemplateName = params["ClusterId"] + "-" + str(params["JobId"]) lt.LaunchTemplateData = ltd t.add_resource(lt) # End Launch Template Resource asg_lt.LaunchTemplateSpecification = LaunchTemplateSpecification( LaunchTemplateId=Ref(lt), Version=GetAtt(lt, "LatestVersionNumber") ) asg_lt.Overrides = [] for instance in instances_list: asg_lt.Overrides.append(LaunchTemplateOverrides( InstanceType=instance)) # Begin InstancesDistribution if params["SpotPrice"] is not False and \ params["SpotAllocationCount"] is not False and \ (params["DesiredCapacity"] - params["SpotAllocationCount"]) > 0: mip_usage = True idistribution = InstancesDistribution() idistribution.OnDemandAllocationStrategy = "prioritized" # only supported value idistribution.OnDemandBaseCapacity = params["DesiredCapacity"] - params["SpotAllocationCount"] idistribution.OnDemandPercentageAboveBaseCapacity = "0" # force the other instances to be SPOT idistribution.SpotMaxPrice = Ref("AWS::NoValue") if params["SpotPrice"] == "auto" else str( params["SpotPrice"]) idistribution.SpotAllocationStrategy = params['SpotAllocationStrategy'] mip.InstancesDistribution = idistribution # End MixedPolicyInstance # Begin FSx for Lustre if params["FSxLustreConfiguration"]["fsx_lustre"] is not False: if params["FSxLustreConfiguration"]["existing_fsx"] is False: fsx_lustre = FileSystem("FSxForLustre") fsx_lustre.FileSystemType = "LUSTRE" fsx_lustre.StorageCapacity = params["FSxLustreConfiguration"]["capacity"] fsx_lustre.SecurityGroupIds = [params["SecurityGroupId"]] fsx_lustre.SubnetIds = params["SubnetId"] if params["FSxLustreConfiguration"]["s3_backend"] is not False: fsx_lustre_configuration = LustreConfiguration() fsx_lustre_configuration.ImportPath = params["FSxLustreConfiguration"]["import_path"] if params["FSxLustreConfiguration"]["import_path"] is not False else params["FSxLustreConfiguration"]["s3_backend"] fsx_lustre_configuration.ExportPath = params["FSxLustreConfiguration"]["import_path"] if params["FSxLustreConfiguration"]["import_path"] is not False else params["FSxLustreConfiguration"]["s3_backend"] + "/" + params["ClusterId"] + "-fsxoutput/job-" + params["JobId"] + "/" fsx_lustre.LustreConfiguration = fsx_lustre_configuration fsx_lustre.Tags = base_Tags( # False disable PropagateAtLaunch Name=str(params["ClusterId"] + "-compute-job-" + params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_FSx="true", _soca_ClusterId=str(params["ClusterId"]), ) t.add_resource(fsx_lustre) # End FSx For Lustre # Begin AutoScalingGroup Resource asg = AutoScalingGroup("AutoScalingComputeGroup") asg.DependsOn = "NodeLaunchTemplate" if mip_usage is True or instances_list.__len__() > 1: mip.LaunchTemplate = asg_lt asg.MixedInstancesPolicy = mip else: asg.LaunchTemplate = LaunchTemplateSpecification( LaunchTemplateId=Ref(lt), Version=GetAtt(lt, "LatestVersionNumber")) asg.MinSize = int(params["DesiredCapacity"]) asg.MaxSize = int(params["DesiredCapacity"]) asg.VPCZoneIdentifier = params["SubnetId"] if params["PlacementGroup"] is True: pg = PlacementGroup("ComputeNodePlacementGroup") pg.Strategy = "cluster" t.add_resource(pg) asg.PlacementGroup = Ref(pg) asg.Tags = Tags( Name=str(params["ClusterId"]) + "-compute-job-" + str(params["JobId"]), _soca_JobId=str(params["JobId"]), _soca_JobName=str(params["JobName"]), _soca_JobQueue=str(params["JobQueue"]), _soca_StackId=stack_name, _soca_JobOwner=str(params["JobOwner"]), _soca_JobProject=str(params["JobProject"]), _soca_KeepForever=str(params["KeepForever"]).lower(), _soca_ClusterId=str(params["ClusterId"]), _soca_NodeType="soca-compute-node") t.add_resource(asg) # End AutoScalingGroup Resource # Begin Custom Resource # Change Mapping to No if you want to disable this if allow_anonymous_data_collection is True: metrics = CustomResourceSendAnonymousMetrics("SendAnonymousData") metrics.ServiceToken = params["SolutionMetricLambda"] metrics.DesiredCapacity = str(params["DesiredCapacity"]) metrics.InstanceType = str(params["InstanceType"]) metrics.Efa = str(params["Efa"]) metrics.ScratchSize = str(params["ScratchSize"]) metrics.RootSize = str(params["RootSize"]) metrics.SpotPrice = str(params["SpotPrice"]) metrics.BaseOS = str(params["BaseOS"]) metrics.StackUUID = str(params["StackUUID"]) metrics.KeepForever = str(params["KeepForever"]) metrics.FsxLustre = str(params["FSxLustreConfiguration"]) t.add_resource(metrics) # End Custom Resource if debug is True: print(t.to_json()) # Tags must use "soca:<Key>" syntax template_output = t.to_yaml().replace("_soca_", "soca:") return {'success': True, 'output': template_output} except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] return {'success': False, 'output': 'cloudformation_builder.py: ' + ( str(e) + ': error :' + str(exc_type) + ' ' + str(fname) + ' ' + str(exc_tb.tb_lineno))}
# load balancer elb_listener_80 = Listener(config['name'] + 'Ssl') elb_listener_80.InstancePort = 80 elb_listener_80.LoadBalancerPort = 80 elb_listener_80.Protocol = 'HTTP' elb_listener_80.InstanceProtocol = 'HTTP' load_balancer = LoadBalancer(config['name'] + "Elb") load_balancer.CrossZone = True load_balancer.Listeners = [elb_listener_80] load_balancer.Subnets = [Ref(subnet.title) for subnet in app_subnets] load_balancer.SecurityGroups = [Ref(elb_sg)] t.add_resource(load_balancer) # launch configuration for consul server consul_block_device = EBSBlockDevice(config['name'] + 'Ebs') consul_block_device.DeleteOnTermination = config['consul_launch_config']['block_device']['delete_on_termination'] consul_block_device_mapping = BlockDeviceMapping(config['name'] + 'ConsulBlockDeviceMapping') consul_block_device_mapping.DeviceName = '/dev/sda1' consul_block_device_mapping.Ebs = consul_block_device consul_launch_config = LaunchConfiguration(config['name'] + 'ConsulLaunchConfig') consul_launch_config.AssociatePublicIpAddress = True consul_launch_config.EbsOptimized = config['consul_launch_config']['ebs_optimized'] consul_launch_config.ImageId = config['consul_launch_config']['image_id'] consul_launch_config.KeyName = config['consul_launch_config']['key_name'] consul_launch_config.InstanceType = config['consul_launch_config']['instance_type'] consul_launch_config.BlockDeviceMappings = [consul_block_device_mapping] consul_launch_config.SecurityGroups = [Ref(config['name'] + 'homeSsh'), Ref(consul_sg)] t.add_resource(consul_launch_config)