예제 #1
0
    def get_hostnames(self, name, size):
        """
        Waits until the asg has at least <size> instances in "InService"
        state and returns their public dns names.
        """
        for _ in wait_for(
                f"autoscaling group: {name} to reach size >= {size}"):
            asg_desc = self.describe_asg(name)
            if not asg_desc:
                return []
            else:
                instances = asg_desc["Instances"]
                ready_instance_ids = [
                    e["InstanceId"] for e in instances
                    if e["LifecycleState"] == "InService"
                ]
                if len(ready_instance_ids) >= size:
                    paginator = self._ec2.get_paginator("describe_instances")

                    hostnames = []
                    instance_ids = []
                    for e in paginator.paginate(
                            InstanceIds=ready_instance_ids):
                        for r in e["Reservations"]:
                            for i in r["Instances"]:
                                hostnames.append(i["PublicDnsName"])
                                instance_ids.append(i["InstanceId"])
                    return instance_ids, hostnames
예제 #2
0
    def delete_asg(self, name):
        if self.describe_asg(name):
            log.info(f"Deleting autoscaling group: {name}")
            self._asg.delete_auto_scaling_group(AutoScalingGroupName=name,
                                                ForceDelete=True)

            for _ in wait_for(f"instances in {name} to terminate"):
                if not self.describe_asg(name):
                    log.info(f"Deleted autoscaling group: {name}")
                    break

        # launch config needs to be deleted after asg
        self.delete_launch_config(name)
예제 #3
0
def test_invalidate_entry(testfs):
    (mnt_dir, fs_state) = testfs
    path = os.path.join(mnt_dir, 'message')
    os.stat(path)
    assert fs_state.lookup_called
    fs_state.lookup_called = False
    os.stat(path)
    assert not fs_state.lookup_called

    # Unfortunately there's no way to determine when the
    # kernel has processed the forget() request, so we
    # wait longer and longer until it works.
    def check(_wait_time=[0.01]):
        llfuse.setxattr(mnt_dir, 'command', b'forget_entry')
        time.sleep(_wait_time[0])
        fs_state.lookup_called = False
        os.stat(path)
        _wait_time[0] += max(1, _wait_time[0])
        return fs_state.lookup_called
    assert wait_for(check)
예제 #4
0
def test_invalidate_entry(testfs):
    (mnt_dir, fs_state) = testfs
    path = os.path.join(mnt_dir, 'message')
    os.stat(path)
    assert fs_state.lookup_called
    fs_state.lookup_called = False
    os.stat(path)
    assert not fs_state.lookup_called

    # Unfortunately there's no way to determine when the
    # kernel has processed the forget() request, so we
    # wait longer and longer until it works.
    def check(_wait_time=[0.01]):
        llfuse.setxattr(mnt_dir, 'command', b'forget_entry')
        time.sleep(_wait_time[0])
        fs_state.lookup_called = False
        os.stat(path)
        _wait_time[0] += max(1, _wait_time[0])
        return fs_state.lookup_called

    assert wait_for(check)
예제 #5
0
def test_invalidate_inode(testfs):
    (mnt_dir, fs_state) = testfs
    with open(os.path.join(mnt_dir, 'message'), 'r') as fh:
        assert fh.read() == 'hello world\n'
        assert fs_state.read_called
        fs_state.read_called = False
        fh.seek(0)
        assert fh.read() == 'hello world\n'
        assert not fs_state.read_called

        # Unfortunately there's no way to determine when the
        # kernel has processed the forget() request, so we
        # wait longer and longer until it works.
        def check(_wait_time=[0.01]):
            llfuse.setxattr(mnt_dir, 'command', b'forget_inode')
            time.sleep(_wait_time[0])
            fs_state.read_called = False
            fh.seek(0)
            assert fh.read() == 'hello world\n'
            _wait_time[0] += max(1, _wait_time[0])
            return fs_state.read_called
        assert wait_for(check)
예제 #6
0
    def create_specs_file(self, specs_file, s3_bucket_name, efs_id):
        username = getpass.getuser()
        rand = "".join(random.choices(string.ascii_uppercase + string.digits, k=5))
        hash = f"{username}-{rand}"
        stack_name = f"torchelastic-{hash}"
        this_dir = os.path.dirname(__file__)
        cfn_template = os.path.join(this_dir, "cfn/setup.yml")
        sample_specs = os.path.join(this_dir, "config/sample_specs.json")

        params = {
            "WorkerRoleName": f"torchelastic_worker_role-{hash}",
            "RendezvousRoleName": f"torchelastic_rendezvous_role-{hash}",
        }

        if s3_bucket_name:
            params["S3BucketName"] = s3_bucket_name
        if efs_id:
            params["EFSFileSystemId"] = efs_id

        self.create_stack(stack_name, cfn_template, **params)

        for _ in wait_for(
            f"cfn stack: {stack_name} to create", timeout=600, interval=2
        ):
            status, outputs = self.describe_stack(stack_name)
            if status == "CREATE_COMPLETE":
                break
            elif status == "CREATE_FAILED" or status.startswith("ROLLBACK_"):
                # when stack creation fails cfn starts rolling the stack back
                raise RuntimeError(
                    f"Error creating stack {stack_name}, status = {status}"
                )

        outputs["User"] = username

        log.info(f"Writing specs file to: {specs_file}")
        with open(sample_specs) as f:
            specs_template = Template(f.read())
            specs_template.stream(**outputs).dump(specs_file)
예제 #7
0
def test_invalidate_inode(testfs):
    (mnt_dir, fs_state) = testfs
    with open(os.path.join(mnt_dir, 'message'), 'r') as fh:
        assert fh.read() == 'hello world\n'
        assert fs_state.read_called
        fs_state.read_called = False
        fh.seek(0)
        assert fh.read() == 'hello world\n'
        assert not fs_state.read_called

        # Unfortunately there's no way to determine when the
        # kernel has processed the forget() request, so we
        # wait longer and longer until it works.
        def check(_wait_time=[0.01]):
            llfuse.setxattr(mnt_dir, 'command', b'forget_inode')
            time.sleep(_wait_time[0])
            fs_state.read_called = False
            fh.seek(0)
            assert fh.read() == 'hello world\n'
            _wait_time[0] += max(1, _wait_time[0])
            return fs_state.read_called

        assert wait_for(check)