示例#1
0
    def _test(self, spotInstances=False):
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        self.createClusterUtil()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        leader = AWSProvisioner._getLeader(wait=False, clusterName=self.clusterName)

        assert len(self.getMatchingRoles(self.clusterName)) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = ['virtualenv', '--system-site-packages', '--never-download',
                        '/home/venv']
        self.sshUtil(venv_command)

        upgrade_command = ['/home/venv/bin/pip', 'install', 'setuptools==28.7.1']
        self.sshUtil(upgrade_command)

        yaml_command = ['/home/venv/bin/pip', 'install', 'pyyaml==3.12']
        self.sshUtil(yaml_command)

        # install toil scripts
        install_command = ['/home/venv/bin/pip', 'install', 'toil-scripts==%s' % self.toilScripts]
        self.sshUtil(install_command)

        toilOptions = ['--batchSystem=mesos',
                       '--workDir=/var/lib/toil',
                       '--mesosMaster=%s:5050' % leader.private_ip_address,
                       '--clean=always',
                       '--retryCount=2']

        toilOptions.extend(['--provisioner=aws',
                            '--nodeType=' + self.instanceType,
                            '--maxNodes=%s' % self.numWorkers,
                            '--logDebug'])
        if spotInstances:
            toilOptions.extend([
                '--preemptableNodeType=%s:%s' % (self.instanceType, self.spotBid),
                # The RNASeq pipeline does not specify a preemptability requirement so we
                # need to specify a default, otherwise jobs would never get scheduled.
                '--defaultPreemptable',
                '--maxPreemptableNodes=%s' % self.numWorkers])

        toilOptions = ' '.join(toilOptions)

        # TOIL_AWS_NODE_DEBUG prevents the provisioner from killing nodes that
        # fail a status check. This allows for easier debugging of
        # https://github.com/BD2KGenomics/toil/issues/1141
        runCommand = ['bash', '-c',
                      'PATH=/home/venv/bin/:$PATH '
                      'TOIL_AWS_NODE_DEBUG=True '
                      'TOIL_SCRIPTS_TEST_NUM_SAMPLES='+str(self.numSamples)+
                      ' TOIL_SCRIPTS_TEST_TOIL_OPTIONS=' + pipes.quote(toilOptions) +
                      ' TOIL_SCRIPTS_TEST_JOBSTORE=' + self.jobStore +
                      ' /home/venv/bin/python -m unittest -v' +
                      ' toil_scripts.rnaseq_cgl.test.test_rnaseq_cgl.RNASeqCGLTest.test_manifest']

        self.sshUtil(runCommand)
        assert len(self.getMatchingRoles(self.clusterName)) == 1

        AWSProvisioner.destroyCluster(self.clusterName)
        assert len(self.getMatchingRoles(self.clusterName)) == 0
示例#2
0
    def _test(self, spotInstances=False, fulfillableBid=True):
        """
        Does the work of the testing. Many features' test are thrown in here is no particular
        order

        :param spotInstances: Specify if you want to use spotInstances
        :param fulfillableBid: If false, the bid will never succeed. Used to test bid failure
        """
        if not fulfillableBid:
            self.spotBid = '0.01'
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        self.launchCluster()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        self.leader = AWSProvisioner._getLeader(wait=False, clusterName=self.clusterName)
        ctx = AWSProvisioner._buildContext(self.clusterName)

        assert len(self.getMatchingRoles(self.clusterName)) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = ['virtualenv', '--system-site-packages', '--never-download',
                        '/home/venv']
        self.sshUtil(venv_command)

        upgrade_command = ['/home/venv/bin/pip', 'install', 'setuptools==28.7.1']
        self.sshUtil(upgrade_command)

        yaml_command = ['/home/venv/bin/pip', 'install', 'pyyaml==3.12']
        self.sshUtil(yaml_command)

        self._getScript()

        toilOptions = [self.jobStore,
                       '--batchSystem=mesos',
                       '--workDir=/var/lib/toil',
                       '--clean=always',
                       '--retryCount=2',
                       '--clusterStats=/home/',
                       '--logDebug',
                       '--logFile=/home/sort.log',
                       '--provisioner=aws']

        if spotInstances:
            toilOptions.extend([
                '--preemptableNodeType=%s:%s' % (self.instanceType, self.spotBid),
                # The RNASeq pipeline does not specify a preemptability requirement so we
                # need to specify a default, otherwise jobs would never get scheduled.
                '--defaultPreemptable',
                '--maxPreemptableNodes=%s' % self.numWorkers])
        else:
            toilOptions.extend(['--nodeType=' + self.instanceType,
                                '--maxNodes=%s' % self.numWorkers])

        self._runScript(toilOptions)

        assert len(self.getMatchingRoles(self.clusterName)) == 1

        checkStatsCommand = ['/home/venv/bin/python', '-c',
                             'import json; import os; '
                             'json.load(open("/home/" + [f for f in os.listdir("/home/") '
                                                   'if f.endswith(".json")].pop()))'
                             ]

        self.sshUtil(checkStatsCommand)

        volumeID = self.getRootVolID()
        ctx = AWSProvisioner._buildContext(self.clusterName)
        AWSProvisioner.destroyCluster(self.clusterName)
        self.leader.update()
        for attempt in range(6):
            # https://github.com/BD2KGenomics/toil/issues/1567
            # retry this for up to 1 minute until the volume disappears
            try:
                ctx.ec2.get_all_volumes(volume_ids=[volumeID])
                time.sleep(10)
            except EC2ResponseError as e:
                if e.status == 400 and 'InvalidVolume.NotFound' in e.code:
                    break
                else:
                    raise
        else:
            self.fail('Volume with ID %s was not cleaned up properly' % volumeID)

        assert len(self.getMatchingRoles(self.clusterName)) == 0
示例#3
0
    def _test(self, spotInstances=False, fulfillableBid=True):
        """
        Does the work of the testing. Many features' test are thrown in here is no particular
        order

        :param spotInstances: Specify if you want to use spotInstances
        :param fulfillableBid: If false, the bid will never succeed. Used to test bid failure
        """
        if not fulfillableBid:
            self.spotBid = '0.01'
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        self.launchCluster()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        self.leader = AWSProvisioner._getLeader(wait=False,
                                                clusterName=self.clusterName)
        ctx = AWSProvisioner._buildContext(self.clusterName)

        assert len(self.getMatchingRoles(self.clusterName)) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = [
            'virtualenv', '--system-site-packages', '--never-download',
            '/home/venv'
        ]
        self.sshUtil(venv_command)

        upgrade_command = [
            '/home/venv/bin/pip', 'install', 'setuptools==28.7.1'
        ]
        self.sshUtil(upgrade_command)

        yaml_command = ['/home/venv/bin/pip', 'install', 'pyyaml==3.12']
        self.sshUtil(yaml_command)

        self._getScript()

        toilOptions = [
            self.jobStore, '--batchSystem=mesos', '--workDir=/var/lib/toil',
            '--clean=always', '--retryCount=2', '--clusterStats=/home/',
            '--logDebug', '--logFile=/home/sort.log', '--provisioner=aws'
        ]

        if spotInstances:
            toilOptions.extend([
                '--preemptableNodeType=%s:%s' %
                (self.instanceType, self.spotBid),
                # The RNASeq pipeline does not specify a preemptability requirement so we
                # need to specify a default, otherwise jobs would never get scheduled.
                '--defaultPreemptable',
                '--maxPreemptableNodes=%s' % self.numWorkers
            ])
        else:
            toilOptions.extend([
                '--nodeType=' + self.instanceType,
                '--maxNodes=%s' % self.numWorkers
            ])

        self._runScript(toilOptions)

        assert len(self.getMatchingRoles(self.clusterName)) == 1

        checkStatsCommand = [
            '/home/venv/bin/python', '-c', 'import json; import os; '
            'json.load(open("/home/" + [f for f in os.listdir("/home/") '
            'if f.endswith(".json")].pop()))'
        ]

        self.sshUtil(checkStatsCommand)

        volumeID = self.getRootVolID()
        ctx = AWSProvisioner._buildContext(self.clusterName)
        AWSProvisioner.destroyCluster(self.clusterName)
        self.leader.update()
        for attempt in range(6):
            # https://github.com/BD2KGenomics/toil/issues/1567
            # retry this for up to 1 minute until the volume disappears
            try:
                ctx.ec2.get_all_volumes(volume_ids=[volumeID])
                time.sleep(10)
            except EC2ResponseError as e:
                if e.status == 400 and 'InvalidVolume.NotFound' in e.code:
                    break
                else:
                    raise
        else:
            self.fail('Volume with ID %s was not cleaned up properly' %
                      volumeID)

        assert len(self.getMatchingRoles(self.clusterName)) == 0
示例#4
0
    def testAWSProvisionerUtils(self):
        clusterName = 'cluster-utils-test' + str(uuid.uuid4())
        keyName = os.getenv('TOIL_AWS_KEYNAME')

        try:
            # --provisioner flag should default to aws, so we're not explicitly
            # specifying that here
            system([self.toilMain, 'launch-cluster', '--nodeType=t2.micro',
                    '--keyPairName=' + keyName, clusterName])
        finally:
            system([self.toilMain, 'destroy-cluster', '--provisioner=aws', clusterName])
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner

            userTags = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}
            tags = {'Name': clusterName, 'Owner': keyName}
            tags.update(userTags)

            # launch preemptable master with same name
            system([self.toilMain, 'launch-cluster', '-t', 'key1=value1', '-t', 'key2=value2', '--tag', 'key3=value3',
                    '--nodeType=m3.medium:0.2', '--keyPairName=' + keyName, clusterName,
                    '--provisioner=aws', '--logLevel=DEBUG'])

            # test leader tags
            leaderTags = AWSProvisioner._getLeader(clusterName).tags
            self.assertEqual(tags, leaderTags)

            # Test strict host key checking
            # Doesn't work when run locally.
            if(keyName == 'jenkins@jenkins-master'):
                try:
                    AWSProvisioner.sshLeader(clusterName=clusterName, strict=True)
                except RuntimeError:
                    pass
                else:
                    self.fail("Host key verification passed where it should have failed")

            # Add the host key to known_hosts so that the rest of the tests can
            # pass without choking on the verification prompt.
            AWSProvisioner.sshLeader(clusterName=clusterName, strict=True, sshOptions=['-oStrictHostKeyChecking=no'])

            system([self.toilMain, 'ssh-cluster', '--provisioner=aws', clusterName])

            testStrings = ["'foo'",
                           '"foo"',
                           '  foo',
                           '$PATH',
                           '"',
                           "'",
                           '\\',
                           '| cat',
                           '&& cat',
                           '; cat'
                           ]
            for test in testStrings:
                logger.info('Testing SSH with special string: %s', test)
                compareTo = "import sys; assert sys.argv[1]==%r" % test
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['python', '-', test],
                                         input=compareTo)

            try:
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['nonsenseShouldFail'])
            except RuntimeError:
                pass
            else:
                self.fail('The remote command failed silently where it should have '
                          'raised an error')

            AWSProvisioner.sshLeader(clusterName=clusterName,
                                     args=['python', '-c', "import os; assert os.environ['TOIL_WORKDIR']=='/var/lib/toil'"])

            # `toil rsync-cluster`
            # Testing special characters - string.punctuation
            fname = '!"#$%&\'()*+,-.;<=>:\ ?@[\\\\]^_`{|}~'
            testData = os.urandom(3 * (10**6))
            with tempfile.NamedTemporaryFile(suffix=fname) as tmpFile:
                relpath = os.path.basename(tmpFile.name)
                tmpFile.write(testData)
                tmpFile.flush()
                # Upload file to leader
                AWSProvisioner.rsyncLeader(clusterName=clusterName, args=[tmpFile.name, ":"])
                # Ensure file exists
                AWSProvisioner.sshLeader(clusterName=clusterName, args=["test", "-e", relpath])
            tmpDir = tempfile.mkdtemp()
            # Download the file again and make sure it's the same file
            # `--protect-args` needed because remote bash chokes on special characters
            AWSProvisioner.rsyncLeader(clusterName=clusterName, args=["--protect-args", ":" + relpath, tmpDir])
            with open(os.path.join(tmpDir, relpath), "r") as f:
                self.assertEqual(f.read(), testData, "Downloaded file does not match original file")
        finally:
            system([self.toilMain, 'destroy-cluster', '--provisioner=aws', clusterName])
            try:
                shutil.rmtree(tmpDir)
            except NameError:
                pass
示例#5
0
    def _test(self, preemptableJobs=False):
        """
        Does the work of the testing. Many features' test are thrown in here is no particular
        order
        """
        from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        self.launchCluster()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        self.leader = AWSProvisioner._getLeader(wait=False,
                                                clusterName=self.clusterName)
        ctx = AWSProvisioner._buildContext(self.clusterName)

        assert len(self.getMatchingRoles(self.clusterName)) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = [
            'virtualenv', '--system-site-packages', '--never-download',
            '/home/venv'
        ]
        self.sshUtil(venv_command)

        upgrade_command = [
            '/home/venv/bin/pip', 'install', 'setuptools==28.7.1'
        ]
        self.sshUtil(upgrade_command)

        yaml_command = ['/home/venv/bin/pip', 'install', 'pyyaml==3.12']
        self.sshUtil(yaml_command)

        self._getScript()

        toilOptions = [
            self.jobStore, '--batchSystem=mesos', '--workDir=/var/lib/toil',
            '--clean=always', '--retryCount=2', '--clusterStats=/home/',
            '--logDebug', '--logFile=/home/sort.log', '--provisioner=aws'
        ]

        toilOptions.extend([
            '--nodeTypes=' + ",".join(self.instanceTypes),
            '--maxNodes=%s' % ",".join(self.numWorkers)
        ])
        if preemptableJobs:
            toilOptions.extend(['--defaultPreemptable'])

        self._runScript(toilOptions)

        assert len(self.getMatchingRoles(self.clusterName)) == 1

        checkStatsCommand = [
            '/home/venv/bin/python', '-c', 'import json; import os; '
            'json.load(open("/home/" + [f for f in os.listdir("/home/") '
            'if f.endswith(".json")].pop()))'
        ]

        self.sshUtil(checkStatsCommand)

        from boto.exception import EC2ResponseError
        volumeID = self.getRootVolID()
        ctx = AWSProvisioner._buildContext(self.clusterName)
        AWSProvisioner.destroyCluster(self.clusterName)
        self.leader.update()
        for attempt in range(6):
            # https://github.com/BD2KGenomics/toil/issues/1567
            # retry this for up to 1 minute until the volume disappears
            try:
                ctx.ec2.get_all_volumes(volume_ids=[volumeID])
                time.sleep(10)
            except EC2ResponseError as e:
                if e.status == 400 and 'InvalidVolume.NotFound' in e.code:
                    break
                else:
                    raise
        else:
            self.fail('Volume with ID %s was not cleaned up properly' %
                      volumeID)

        assert len(self.getMatchingRoles(self.clusterName)) == 0
示例#6
0
    def testAWSProvisionerUtils(self):
        clusterName = 'cluster-utils-test' + str(uuid.uuid4())
        keyName = os.getenv('TOIL_AWS_KEYNAME')

        try:
            # --provisioner flag should default to aws, so we're not explicitly
            # specifying that here
            system([
                self.toilMain, 'launch-cluster', '--leaderNodeType=t2.micro',
                '--keyPairName=' + keyName, clusterName
            ])
        finally:
            system([
                self.toilMain, 'destroy-cluster', '--provisioner=aws',
                clusterName
            ])
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner

            userTags = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}
            tags = {'Name': clusterName, 'Owner': keyName}
            tags.update(userTags)

            # launch preemptable master with same name
            system([
                self.toilMain, 'launch-cluster', '-t', 'key1=value1', '-t',
                'key2=value2', '--tag', 'key3=value3',
                '--leaderNodeType=m3.medium:0.2', '--keyPairName=' + keyName,
                clusterName, '--provisioner=aws', '--logLevel=DEBUG'
            ])

            # test leader tags
            leaderTags = AWSProvisioner._getLeader(clusterName).tags
            self.assertEqual(tags, leaderTags)

            # Test strict host key checking
            # Doesn't work when run locally.
            if (keyName == 'jenkins@jenkins-master'):
                try:
                    AWSProvisioner.sshLeader(clusterName=clusterName,
                                             strict=True)
                except RuntimeError:
                    pass
                else:
                    self.fail(
                        "Host key verification passed where it should have failed"
                    )

            # Add the host key to known_hosts so that the rest of the tests can
            # pass without choking on the verification prompt.
            AWSProvisioner.sshLeader(clusterName=clusterName,
                                     strict=True,
                                     sshOptions=['-oStrictHostKeyChecking=no'])

            system([
                self.toilMain, 'ssh-cluster', '--provisioner=aws', clusterName
            ])

            testStrings = [
                "'foo'", '"foo"', '  foo', '$PATH', '"', "'", '\\', '| cat',
                '&& cat', '; cat'
            ]
            for test in testStrings:
                logger.info('Testing SSH with special string: %s', test)
                compareTo = "import sys; assert sys.argv[1]==%r" % test
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['python', '-', test],
                                         input=compareTo)

            try:
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=['nonsenseShouldFail'])
            except RuntimeError:
                pass
            else:
                self.fail(
                    'The remote command failed silently where it should have '
                    'raised an error')

            AWSProvisioner.sshLeader(
                clusterName=clusterName,
                args=[
                    'python', '-c',
                    "import os; assert os.environ['TOIL_WORKDIR']=='/var/lib/toil'"
                ])

            # `toil rsync-cluster`
            # Testing special characters - string.punctuation
            fname = '!"#$%&\'()*+,-.;<=>:\ ?@[\\\\]^_`{|}~'
            testData = os.urandom(3 * (10**6))
            with tempfile.NamedTemporaryFile(suffix=fname) as tmpFile:
                relpath = os.path.basename(tmpFile.name)
                tmpFile.write(testData)
                tmpFile.flush()
                # Upload file to leader
                AWSProvisioner.rsyncLeader(clusterName=clusterName,
                                           args=[tmpFile.name, ":"])
                # Ensure file exists
                AWSProvisioner.sshLeader(clusterName=clusterName,
                                         args=["test", "-e", relpath])
            tmpDir = tempfile.mkdtemp()
            # Download the file again and make sure it's the same file
            # `--protect-args` needed because remote bash chokes on special characters
            AWSProvisioner.rsyncLeader(
                clusterName=clusterName,
                args=["--protect-args", ":" + relpath, tmpDir])
            with open(os.path.join(tmpDir, relpath), "r") as f:
                self.assertEqual(
                    f.read(), testData,
                    "Downloaded file does not match original file")
        finally:
            system([
                self.toilMain, 'destroy-cluster', '--provisioner=aws',
                clusterName
            ])
            try:
                shutil.rmtree(tmpDir)
            except NameError:
                pass