def create(self, defn, check, allow_reboot, allow_recreate): assert isinstance(defn, HetznerDefinition) if self.state not in (self.RESCUE, self.UP) or check: self.check() self.set_common_state(defn) self.main_ipv4 = defn.main_ipv4 if not self.robot_admin_user or not self.robot_admin_pass: self.log_start("creating an exclusive robot admin account for " "‘{0}’...".format(self.name)) # Create a new Admin account exclusively for this machine. server = self._get_server_from_main_robot(self.main_ipv4, defn) with self.depl._db: (self.robot_admin_user, self.robot_admin_pass) = server.admin.create() self.log_end("done. ({0})".format(self.robot_admin_user)) if not self.vm_id: self.log("installing machine...") self.reboot_rescue(install=True, partitions=defn.partitions) self._install_base_system() self._detect_hardware() server = self._get_server_by_ip(self.main_ipv4) vm_id = "nixops-{0}-{1}".format(self.depl.uuid, self.name) server.set_name(vm_id[:100]) self.vm_id = vm_id known_hosts.remove(self.main_ipv4) self.just_installed = True
def create(self, defn, check, allow_reboot, allow_recreate): assert isinstance(defn, HetznerDefinition) if self.state not in (self.RESCUE, self.UP) or check: self.check() self.set_common_state(defn) self.main_ipv4 = defn.main_ipv4 if not self.robot_admin_user or not self.robot_admin_pass: self.log_start("creating an exclusive robot admin account for " "‘{0}’... ".format(self.name)) # Create a new Admin account exclusively for this machine. server = self._get_server_from_main_robot(self.main_ipv4, defn) with self.depl._db: (self.robot_admin_user, self.robot_admin_pass) = server.admin.create() self.log_end("done. ({0})".format(self.robot_admin_user)) if not self.vm_id: self.log("installing machine...") self.reboot_rescue(install=True, partitions=defn.partitions) self._install_base_system() self._detect_hardware() server = self._get_server_by_ip(self.main_ipv4) vm_id = "nixops-{0}-{1}".format(self.depl.uuid, self.name) server.set_name(vm_id[:100]) self.vm_id = vm_id known_hosts.remove(self.main_ipv4, None) self.just_installed = True
def create(self, defn, check, allow_reboot, allow_recreate): assert isinstance(defn, HetznerDefinition) if self.state not in (self.RESCUE, self.UP) or check: self.check() self.set_common_state(defn) self.main_ipv4 = defn.main_ipv4 if not self.robot_admin_user or not self.robot_admin_pass: self.log_start("creating an exclusive robot admin account for " "‘{0}’...".format(self.name)) # Create a new Admin account exclusively for this machine. server = self._get_server_from_main_robot(self.main_ipv4, defn) with self.depl._db: (self.robot_admin_user, self.robot_admin_pass) = server.admin.create() self.log_end("done. ({0})".format(self.robot_admin_user)) if not self.vm_id: self.log("installing machine...") self.reboot_rescue(install=True, partitions=defn.partitions) self._install_base_system() self._detect_hardware() server = self._get_server_by_ip(self.main_ipv4) vm_id = "nixops-{0}-{1}".format(self.depl.uuid, self.name) # XXX: Truncated to 50 chars until the Robot allows more. # And this also means, that this field is unreliable so we # can't use it for uniquely identifying machine UUIDs. server.set_name(vm_id[:50]) self.vm_id = vm_id known_hosts.remove(self.main_ipv4) self.just_installed = True
def destroy(self, wipe=False): if wipe: self.depl.logger.warn("wipe is not supported") if not self.project: return True if self.state == self.MISSING: # The machine is down, we have nothing to do. return True try: node = self.node() question = "are you sure you want to destroy {0}?" if not self.depl.logger.confirm(question.format(self.full_name)): return False known_hosts.remove(self.public_ipv4, self.public_host_key) self.log("destroying the GCE machine...") node.destroy() except libcloud.common.google.ResourceNotFoundError: self.warn("seems to have been destroyed already") self._node_deleted() # Destroy volumes created for this instance. for k, v in self.block_device_mapping.items(): if v.get('deleteOnTermination', False): self._delete_volume(v['disk_name'], v['region'], True) self.update_block_device_mapping(k, None) return True
def destroy(self, wipe=False): if wipe: self.depl.logger.warn("wipe is not supported") if not self.project: return True try: node = self.node() question = "are you sure you want to destroy {0}?" if not self.depl.logger.confirm(question.format(self.full_name)): return False known_hosts.remove(self.public_ipv4, self.public_host_key) self.log("destroying the GCE machine...") node.destroy() except libcloud.common.google.ResourceNotFoundError: self.warn("seems to have been destroyed already") self._node_deleted() # Destroy volumes created for this instance. for k, v in self.block_device_mapping.items(): if v.get('deleteOnTermination', False): self._delete_volume(v['disk_name'], v['region'], True) self.update_block_device_mapping(k, None) return True
def create(self, defn, check, allow_reboot, allow_recreate): assert isinstance(defn, HetznerDefinition) if self.state not in (self.RESCUE, self.UP) or check: self.check() self.set_common_state(defn) self.main_ipv4 = defn.main_ipv4 if defn.create_sub_account: if not self.robot_admin_user or not self.robot_admin_pass: self.log_start( "creating an exclusive robot admin sub-account " "for ‘{0}’... ".format(self.name) ) server = self._get_server_from_main_robot(self.main_ipv4, defn) with self.depl._db: ( self.robot_admin_user, self.robot_admin_pass, ) = server.admin.create() self.log_end("done. ({0})".format(self.robot_admin_user)) else: # If available, assign user and password even if they are already # in the DB, so that changes to them are immediately reflected. # If not available, we use the ones from the DB. (robot_user, robot_pass) = self._get_robot_user_and_pass( defn=defn, default_user=self.robot_admin_user, default_pass=self.robot_admin_pass, ) if ( robot_user != self.robot_admin_user or robot_pass != self.robot_admin_pass ): with self.depl._db: (self.robot_admin_user, self.robot_admin_pass) = ( robot_user, robot_pass, ) if not self.vm_id: self.log("installing machine...") self.reboot_rescue(install=True, partitions=defn.partitions) self._install_base_system() self._detect_hardware() server = self._get_server_by_ip(self.main_ipv4) vm_id = "nixops-{0}-{1}".format(self.depl.uuid, self.name) server.set_name(vm_id[:100]) self.vm_id = vm_id known_hosts.remove(self.main_ipv4, None) self.just_installed = True self.state_version = defn.config.nixosRelease
def _reset(self) -> None: assert self.depl if all((self.public_ipv4, self._public_host_key)): known_hosts.remove(self.public_ipv4, self._public_host_key) with self.depl._db: self.state = self.MISSING self.vm_id = None self.image_id = None self.location = None self.public_ipv4 = None self.server_type = None self.hw_info = None self._ssh_public_key = None self._ssh_private_key = None self._public_host_key = None
def create(self, defn, check, allow_reboot, allow_recreate): assert isinstance(defn, HetznerDefinition) if self.state not in (self.RESCUE, self.UP) or check: self.check() self.set_common_state(defn) self.main_ipv4 = defn.main_ipv4 if defn.create_sub_account: if not self.robot_admin_user or not self.robot_admin_pass: self.log_start("creating an exclusive robot admin sub-account " "for ‘{0}’... ".format(self.name)) server = self._get_server_from_main_robot(self.main_ipv4, defn) with self.depl._db: (self.robot_admin_user, self.robot_admin_pass) = server.admin.create() self.log_end("done. ({0})".format(self.robot_admin_user)) else: # If available, assign user and password even if they are already # in the DB, so that changes to them are immediately reflected. # If not available, we use the ones from the DB. (robot_user, robot_pass) = self._get_robot_user_and_pass( defn=defn, default_user=self.robot_admin_user, default_pass=self.robot_admin_pass, ) if robot_user != self.robot_admin_user or \ robot_pass != self.robot_admin_pass: with self.depl._db: (self.robot_admin_user, self.robot_admin_pass) = (robot_user, robot_pass) if not self.vm_id: self.log("installing machine...") self.reboot_rescue(install=True, partitions=defn.partitions) self._install_base_system() self._detect_hardware() server = self._get_server_by_ip(self.main_ipv4) vm_id = "nixops-{0}-{1}".format(self.depl.uuid, self.name) server.set_name(vm_id[:100]) self.vm_id = vm_id known_hosts.remove(self.main_ipv4, None) self.just_installed = True self.state_version = defn.config['nixosRelease']
class HetznerCloudState(MachineState[HetznerCloudDefinition]): """ State of a Hetzner Cloud machine. """ @classmethod def get_type(cls): return "hetznercloud" state = attr_property("state", MachineState.MISSING, int) # override vm_id = attr_property("vmId", None, int) # override type api_token = attr_property("hetznerCloud.apiToken", None) public_ipv4 = attr_property("publicIpv4", None) public_ipv6 = attr_property("publicIpv6", None) private_ipv4 = attr_property("privateIpv4", None) public_client_key = attr_property("hetznerCloud.publicClientKey", None) private_client_key = attr_property("hetznerCloud.privateClientKey", None) public_host_key = attr_property("hetznerCloud.publicHostKey", None) private_host_key = attr_property("hetznerCloud.privateHostKey", None) legacy_if_scheme = attr_property("legacyIfScheme", None, bool) labels = attr_property("hetznerCloud.labels", {}, "json") location = attr_property("hetznerCloud.location", None) server_name = attr_property("hetznerCloud.serverName", None) server_type = attr_property("hetznerCloud.serverType", None) server_networks = attr_property("hetznerCloud.serverNetworks", {}, "json") volumes = attr_property("hetznerCloud.volumes", {}, "json") ip_addresses = attr_property("hetznerCloud.ipAddresses", {}, "json") def __init__(self, depl: Deployment, name: str, id): MachineState.__init__(self, depl, name, id) self._client = None def cleanup_state(self) -> None: """ Discard all state pertaining to an instance. """ with self.depl._db: self.vm_id = None self.public_ipv4 = None self.public_ipv6 = None self.private_client_key = None self.public_client_key = None self.private_host_key = None self.public_host_key = None self.legacy_if_scheme = None self.location = None self.server_name = None self.server_type = None self.server_networks = {} self.labels = {} self.volumes = {} self.ip_addresses = {} def show_type(self): s = f"{super(HetznerCloudState, self).show_type()}" if self.location: s += f" [{self.location}; {self.server_type}]" return s @property def full_name(self) -> str: return f"Hetzner Cloud Server ‘{self.name}’" def get_instance(self) -> BoundServer: try: return self.get_client().servers.get_by_id(self.vm_id) except APIException as e: if e.code == "not_found": self.logger.warn( f"{self.full_name} was deleted from outside of nixops") return None else: raise def get_client(self) -> Client: """ Generic method to get or create a Hetzner Cloud client. """ if self._client: return self._client new_api_token = self.api_token or os.environ.get("HCLOUD_API_TOKEN") if new_api_token is not None: self.api_token = new_api_token if self.api_token is None: raise Exception("please set ‘apiToken’ or $HCLOUD_API_TOKEN") self._client = Client(token=self.api_token) return self._client def get_common_labels(self) -> Dict[str, str]: labels = { "CharonNetworkUUID": self.depl.uuid, "CharonInstanceName": self.name, "CharonStateFileHost": socket.gethostname(), "CharonStateFileUser": getpass.getuser(), } pattern = "^$|(?i)((?=^[a-z0-9])[a-z0-9._-]{0,63}[a-z0-9]$)" file_name = os.path.basename(self.depl._db.db_file) if re.match(pattern, file_name): labels["CharonStateFileName"] = file_name if self.depl.name: labels["CharonNetworkName"] = self.depl.name return labels def get_ssh_name(self) -> str: if not self.public_ipv4: raise Exception( f"{self.full_name} does not have a public IP address (yet)") return self.public_ipv4 def get_ssh_private_key_file(self) -> str: return self._ssh_private_key_file or self.write_ssh_private_key( self.private_client_key) def get_ssh_flags(self, *args, **kwargs) -> List[str]: super_flags = super(HetznerCloudState, self).get_ssh_flags(*args, **kwargs) return super_flags + ["-i", self.get_ssh_private_key_file()] def get_udev_name(self, volume_id: str) -> str: return f"/dev/disk/by-id/scsi-0HC_Volume_{volume_id}" def get_physical_spec(self) -> Dict[Any, Any]: ipv4 = [{"address": self.public_ipv4, "prefixLength": 32}] ipv6 = [{"address": self.public_ipv6[:-3], "prefixLength": 64}] for addr in self.ip_addresses.values(): try: socket.inet_pton(socket.AF_INET, addr) ipv4.append({"address": addr, "prefixLength": 32}) except socket.error: # not a valid address ipv4 ipv6.append({"address": addr, "prefixLength": 64}) def get_interface_name(i: int) -> str: return f"ens{10+i}" if self.legacy_if_scheme else f"enp{7+i}s0" spec = { "imports": [RawValue("<nixpkgs/nixos/modules/profiles/qemu-guest.nix>")], ("boot", "loader", "grub", "device"): "nodev", ("fileSystems", "/"): { "device": "/dev/sda1", "fsType": "ext4" }, **{("fileSystems", v["mountPoint"]): { "fsType": v["fsType"], "device": v["device"], } for k, v in self.volumes.items() if v["mountPoint"]}, # Hetzner Cloud networking defaults ("networking", "defaultGateway"): "172.31.1.1", ("networking", "nameservers"): [ "213.133.98.98", "213.133.99.99", "213.133.100.100", ], ( "networking", "interfaces", "ens3" if self.legacy_if_scheme else "enp1s0", ): { ("ipv4", "addresses"): ipv4, ("ipv6", "addresses"): ipv6, "useDHCP": True, }, ("users", "extraUsers", "root", "openssh", "authorizedKeys", "keys"): [self.public_client_key], } for i, v in enumerate(self.server_networks.values()): private_ipv4_addresses = [{ "address": addr, "prefixLength": 32 } for addr in [v["privateIpAddress"]] + v["aliasIpAddresses"]] spec[("networking", "interfaces", get_interface_name(i))] = { ("ipv4", "addresses"): private_ipv4_addresses, "useDHCP": True, } for v in self.volumes.values(): if v["fsType"] == "xfs": spec[("boot", "kernelModules")] = ["xfs"] break return spec def _update_attr(self, attr: str, k: str, v: Optional[Dict[str, Any]]) -> None: x = getattr(self, attr) if v is None: x.pop(k, None) else: x[k] = v setattr(self, attr, x) def _handle_changed_server_networks(self, defn: HetznerCloudDefinition, allow_recreate: bool) -> None: """ Detects and corrects any virtual network state desynchronisation. """ attached: Set[str] = { x.network.id for x in self.get_instance().private_net } # Detach server from networks for name in self.server_networks.keys(): nw: Optional[BoundNetwork] = self.get_client( ).networks.get_by_name(name) # Detect destroyed networks if nw is None: if name not in defn.server_networks: # we dont need it self.logger.warn( f"forgetting about network ‘{name}’ that no longer exists" " and is no longer needed by the deployment specification" ) self._update_attr("server_networks", name, None) else: # we do need it raise Exception( f"network ‘{name}’ (used by {self.full_name}) no longer exists;" " run ‘nixops deploy --check’ to update resource state" ) # Detect network detachment elif nw.id not in attached: self.logger.warn( f"instance was manually detached from network ‘{name}’ [{nw.id}]" ) if name in defn.server_networks: self._update_attr("server_networks", name, None) # Detach from existing networks if required. elif name not in defn.server_networks: self.logger.log(f"detaching from network ‘{name}’ [{nw.id}]") self.get_client().servers.detach_from_network( server=Server(self.vm_id), network=nw).wait_until_finished() self._update_attr("server_networks", name, None) # Attach server to networks for name, x in defn.server_networks.items(): if name not in self.server_networks: nw = self.get_client().networks.get_by_name(name) if nw is None: raise Exception( f"tried to attach instance to network ‘{name}’" " but it doesn't exist...") # NixOps will update machines in parallel, so retry # network attachment to deal with resource conflict. def attach_to_network() -> bool: try: self.wait_on_action( self.get_client().servers.attach_to_network( server=Server(self.vm_id), network=nw, ip=x["privateIpAddress"], alias_ips=x["aliasIpAddresses"], )) except APIException as e: if e.code == "conflict": return False else: raise else: self._update_attr("server_networks", x["network"], x) return True self.logger.log( f"attaching instance to network ‘{name}’ [{nw.id}]...") check_wait(attach_to_network) def _handle_changed_floating_ips(self, defn: HetznerCloudDefinition, allow_recreate: bool) -> None: """ Detects and corrects any floating IP state desynchronisation. """ assigned: Set[str] = { x.name for x in self.get_instance().public_net.floating_ips } for name in self.ip_addresses.keys(): fip: Optional[BoundFloatingIP] = self.get_client( ).floating_ips.get_by_name(name) # Detect manually destroyed floating IPs if fip is None: if name not in defn.ip_addresses: # we dont need it self.logger.warn( f"forgetting about floating IP ‘{name}’ that no longer" " exists and is no longer needed by the deployment" " specification") self._update_attr("ip_addresses", name, None) else: if name.startswith("nixops-" + self.depl.uuid): raise Exception( f"floating IP ‘{name}’ (used by {self.full_name})" " no longer exists; run ‘nixops deploy --check’" " to update resource state") else: raise Exception( f"floating IP ‘{name}’ (used by {self.full_name})" " was manually destroyed") # Detect unassigned floating IPs elif name not in assigned: if name not in defn.ip_addresses: # we dont need it self.logger.warn( f"forgetting about unassigned floating IP ‘{name}’ [{fip.id}]" " that is no longer needed by the deployment specification" ) else: # we do need it self.logger.warn( f"floating IP ‘{name}’ [{fip.id}] was manually unassigned;" " will reassign it.") self._update_attr("ip_addresses", name, None) # Assign missing floating IPs. for name in defn.ip_addresses: if name not in self.ip_addresses: fip = self.get_client().floating_ips.get_by_name(name) if fip is None: raise Exception(f"tried to assign floating IP ‘{name}’" " but it doesn't exist...") self.logger.log( f"assigning floating IP ‘{name}’ [{fip.id}]...") self.wait_on_action(fip.assign(Server(self.vm_id))) self._update_attr("ip_addresses", name, fip.ip) def _handle_changed_volumes(self, defn: HetznerCloudDefinition, allow_recreate: bool) -> None: """ Detects and corrects any volume state desynchronisation. """ attached: Set[str] = {x.name for x in self.get_instance().volumes} for name in self.volumes.keys(): volume: Optional[BoundVolume] = self.get_client( ).volumes.get_by_name(name) # Detect destroyed volumes. if volume is None: if name not in defn.volumes: # we dont need it self.logger.warn( f"forgetting about volume ‘{name}’ that no longer exists" " and is no longer needed by the deployment specification" ) else: if name.startswith("nixops-" + self.depl.uuid): raise Exception( f"volume ‘{name}’ (used by {self.full_name}) no longer exists;" " run ‘nixops deploy --check’ to update resource state" ) else: raise Exception( f"volume ‘{name}’ (used by {self.full_name}) was" " manually destroyed") # Detect detached volumes. elif name not in attached: if name not in defn.volumes: # we dont need it self.logger.warn( f"forgetting about detached volume ‘{name}’ [{volume.id}]" " that is no longer needed by the deployment specification" ) else: # we do need it self.logger.warn( f"volume ‘{name}’ [{volume.id}] was manually detached;" " will reattach it") self._update_attr("volumes", name, None) # Detach existing attached volumes if required. elif name not in defn.volumes: self.logger.warn( f"detaching volume ‘{name}’ [{volume.id}] that is no longer" " needed by the deployment specification") volume.detach().wait_until_finished() self._update_attr("volumes", name, None) # Attach missing volumes. resize filesystems if required, before mounting. for name, v in defn.volumes.items(): if name not in self.volumes: # Check if it exists. resources will have been created if user ran check, # but prexisting vols which got deleted may be gone (detected in code above) volume = self.get_client().volumes.get_by_name(name) if volume is None: self.logger.warn( f"tried to attach non-NixOps managed volume ‘{name}’," " but it doesn't exist... skipping") continue elif volume.location.name != self.location: raise Exception( f"volume ‘{name}’ [{volume.id}] is in a different location" " to {self.full_name}; attempting to attach it will fail." ) elif (volume.server and volume.server.id != self.vm_id and self.depl.logger.confirm( f"volume ‘{name}’ is in use by instance ‘{volume.server.id}’," " are you sure you want to attach this volume?") ): # noqa: E124 self.logger.log( f"detaching volume ‘{name}’ from instance ‘{volume.server.id}’..." ) volume.detach().wait_until_finished() volume.server = None # Attach volume. self.logger.log(f"attaching volume ‘{name}’ [{volume.id}]... ") volume.attach(Server(self.vm_id)).wait_until_finished() # Wait until the device is visible in the instance. v["device"] = self.get_udev_name(volume.id) def check_device() -> bool: return 0 == self.run_command(f"test -e {v['device']}", check=False) if not check_wait( check_device, initial=1, max_tries=10, exception=False): # If stopping times out, then do an unclean shutdown. self.logger.log_end("(timed out)") self.logger.log(f"can't find device ‘{v['device']}’...") self.logger.log("available devices:") self.run_command("lsblk") raise Exception("operation timed out") else: self._update_attr("volumes", name, v) self.logger.log_end("") # Grow filesystems on resource based volumes. # We want to grow the fs when its volume gets resized, but if the # volume isn't attached to any server at the time, thats not possible. # Blindly trying to grow all volumes when mounting them just in case # they got resized while they were orphaned is bad. Workaround: # the needsFSResize attribute of VolumeState is set when the volume # gets resized by NixOps. When attaching a volume NixOps will use this # flag to decide whether to grow the filesystem. if name.startswith("nixops-" + self.depl.uuid): res = self.depl.get_typed_resource(name[44:], "hetznercloud-volume", VolumeState) # get correct option definitions for volume resources v["size"] = res._state["size"] v["fsType"] = res._state["fsType"] v["device"] = self.get_udev_name(res._state["resourceId"]) question = ( f"volume {name} was resized, do you wish to grow its" " filesystem to fill the space?") op = (f"umount {v['device']} ;" f"e2fsck -fy {v['device']} &&" f"resize2fs {v['device']}") if (v["fsType"] == "ext4" and res.needsFSResize and self.depl.logger.confirm(question) and self.run_command(op, check=False) == 0): with res.depl._db: res.needsFSResize = False self._update_attr("volumes", name, v) if v["mountPoint"]: volume = self.get_client().volumes.get_by_name(name) v["device"] = self.get_udev_name(volume.id) self._update_attr("volumes", name, v) def after_activation(self, defn: HetznerCloudDefinition) -> None: # Unlike ext4, xfs filesystems must be resized while the underlying drive is mounted. # Thus this operation is delayed until after activation. for name, v in self.volumes.items(): if (name.startswith("nixops-" + self.depl.uuid) and v["mountPoint"] and v["fsType"] == "xfs"): res = self.depl.get_typed_resource(name[44:], "hetznercloud-volume", VolumeState) question = ( f"volume {name} was resized, do you wish to grow its" " filesystem to fill the space?") if (res.needsFSResize and self.depl.logger.confirm(question) and 0 == self.run_command( f"xfs_growfs {v['mountPoint']}", check=False)): with res.depl._db: res.needsFSResize = False def create_after( self, resources, defn: HetznerCloudDefinition) -> Set[HetznerCloudResourceState]: return { r for r in resources if isinstance(r, FloatingIPState) or isinstance(r, NetworkState) or isinstance(r, VolumeState) } def _create_ssh_key(self, public_key: str) -> BoundSSHKey: """Create or get a hetzner cloud ssh key.""" public_key = public_key.strip() hetzner_ssh_keys: List[BoundSSHKey] = self.get_client( ).ssh_keys.get_all() name: str = f"nixops-{self.depl.uuid}-{self.name}" for key in hetzner_ssh_keys: if key.public_key.strip() == public_key: return key elif key.name == name: self.get_client().ssh_keys.delete(key) ssh_key: BoundSSHKey = self.get_client().ssh_keys.create( name=name, public_key=public_key, ) return ssh_key def _create_instance(self, defn) -> None: if not self.public_client_key: (private, public) = create_key_pair(type="ed25519") self.public_client_key = public self.private_client_key = private if not self.public_host_key: (private, public) = create_key_pair(type="ed25519") self.public_host_key = public self.private_host_key = private location: BoundLocation = self.get_client().locations.get_by_name( defn.location) ssh_keys: List[BoundSSHKey] = [ self._create_ssh_key(self.public_client_key) ] # Ensure host keys get injected into the base OS user_data = ("#cloud-config\n" "ssh_keys:\n" " ed25519_public: {0}\n" " ed25519_private: |\n" " {1}").format( self.public_host_key, self.private_host_key.replace("\n", "\n ")) self.logger.log_start( f"creating {defn.server_type} server at {location.description}...") response = self.get_client().servers.create( name=defn.server_name, labels={ **self.get_common_labels(), **dict(defn.labels) }, location=location, server_type=ServerType(defn.server_type), ssh_keys=ssh_keys, user_data=user_data, image=Image(name="ubuntu-20.04"), # for lustration start_after_create=True, ) self.state = self.STARTING self.wait_on_action(response.action) with self.depl._db: self.vm_id = response.server.id self.public_ipv4 = response.server.public_net.ipv4.ip self.public_ipv6 = response.server.public_net.ipv6.ip self.server_name = defn.server_name self.server_type = defn.server_type self.legacy_if_scheme = defn.server_type.startswith("cx") self.location = defn.location self.labels = dict(defn.labels) self.private_host_key = None known_hosts.add(self.public_ipv4, self.public_host_key) self.logger.log_end(f"{self.public_ipv4}") def create( # noqa: C901 self, defn: HetznerCloudDefinition, check: bool, allow_reboot: bool, allow_recreate: bool, ) -> None: self.api_token = defn.api_token if self.state != self.UP: check = True self.set_common_state(defn) if self.api_token and self.api_token != defn.api_token: raise Exception("cannot change api token of an existing instance") # Destroy the instance (if allowed) to handle attribute changes which # require recreating i.e. location if (self.vm_id and allow_recreate and self.location != defn.location and self.depl.logger.confirm( "changing server location requires recreate, are you sure?" )): self._destroy() # Stop the instance (if allowed) to handle attribute changes which # require rebooting i.e. server_type if self.vm_id and allow_reboot and self.server_type != defn.server_type: self.stop() check = True # Check whether the instance hasn't been killed behind our backs. # Handle changed server type. # Restart stopped instances. if self.vm_id and check: instance = self.get_instance() if instance is None or instance.status in {"deleting"}: if not allow_recreate: raise Exception( f"{self.full_name} went away;" " use ‘--allow-recreate’ to create a new one") status = instance.status if instance else "gone" self.logger.log( f"{self.full_name} went away (state ‘{status}’), will recreate" ) self.cleanup_state() # Modify the server type, if desired. TODO store disk size # in state to enable option to later downsize server type. if instance.status == "off" and self.server_type != defn.server_type: self.logger.log_start( f"changing server type from ‘{self.server_type}’ to" f" ‘{defn.server_type}’; may take a few minutes...") instance.change_type(ServerType(defn.server_type), upgrade_disk=True).wait_until_finished() self.logger.log_end("done!") with self.depl._db: self.server_type = defn.server_type self.logger.log("instance was stopped, restarting...") self.start() # Provision the instance. if not self.vm_id: self._create_instance(defn) self.wait_for_ssh() self.state = self.RESCUE self.logger.log_start("running nixos-infect") self.run_command("bash </dev/stdin 2>&1", stdin=open(INFECT_PATH)) self.logger.log("rebooting into NixOS 😎") self.reboot_sync() self.state = self.UP if self.location != defn.location: raise Exception("cannot change location of an existing instance" f" (from ‘{self.location}‘ to ‘{defn.location}‘);" " use ‘--allow-recreate’") if self.server_type != defn.server_type: raise Exception( "cannot change server type of a running instance" f" (from ‘{self.server_type}‘ to ‘{defn.server_type}‘);" " use ‘--allow-reboot’") # Update name or labels if they have changed. if self.server_name != defn.server_name or self.labels != defn.labels: self.logger.log("updating trivial modified attributes") self.get_instance().update(defn.server_name, { **self.get_common_labels(), **dict(defn.labels) }) self._handle_changed_floating_ips(defn, allow_recreate) self._handle_changed_volumes(defn, allow_recreate) self._handle_changed_server_networks(defn, allow_recreate) def _destroy(self) -> None: if self.state != self.UP: return self.logger.log(f"destroying {self.full_name}") # Detach volumes for name, v in self.volumes.items(): self.logger.log(f"detaching volume {name}...") self.get_client().volumes.get_by_name( name).detach().wait_until_finished() if (instance := self.get_instance()) is not None: instance.delete() # Remove host ssh key. self.get_client().ssh_keys.get_by_name( f"nixops-{self.depl.uuid}-{self.name}").delete() known_hosts.remove(self.public_ipv4, self.public_host_key) self.cleanup_state()