Пример #1
0
class ReplicationService(Service):
    @item_method
    @accepts(Int("id"),
             Dict(
                 "replication_restore",
                 Str("name", required=True),
                 Path("target_dataset", required=True, empty=False),
                 strict=True,
             ))
    async def restore(self, id, data):
        """
        Create the opposite of replication task `id` (PULL if it was PUSH and vice versa).
        """
        replication_task = await self.middleware.call("replication.query",
                                                      [["id", "=", id]],
                                                      {"get": True})

        if replication_task["direction"] == "PUSH":
            data["direction"] = "PULL"
            data["naming_schema"] = list({
                pst["naming_schema"]
                for pst in replication_task["periodic_snapshot_tasks"]
            } | set(replication_task["also_include_naming_schema"]))
        else:
            data["direction"] = "PUSH"
            data["also_include_naming_schema"] = replication_task[
                "naming_schema"]

        data["source_datasets"], _ = (await self.middleware.call(
            "zettarepl.reverse_source_target_datasets",
            replication_task["source_datasets"],
            replication_task["target_dataset"]))

        for k in [
                "transport", "ssh_credentials", "netcat_active_side",
                "netcat_active_side_listen_address",
                "netcat_active_side_port_min", "netcat_active_side_port_max",
                "netcat_passive_side_connect_address", "recursive",
                "properties", "replicate", "compression", "large_block",
                "embed", "compressed", "retries"
        ]:
            data[k] = replication_task[k]

        if data["ssh_credentials"] is not None:
            data["ssh_credentials"] = data["ssh_credentials"]["id"]

        data["retention_policy"] = "NONE"
        data["auto"] = False
        data["enabled"] = False  # Do not run it automatically

        return await self.middleware.call("replication.create", data)
Пример #2
0
class ReplicationService(CRUDService):

    ZFS_SEND_REGEX = re.compile(r"zfs: sending (?P<snapshot>.+) \([0-9]+%: (?P<current>[0-9]+)/(?P<total>[0-9]+)\)")

    class Config:
        datastore = "storage.replication"
        datastore_prefix = "repl_"
        datastore_extend = "replication.extend"
        datastore_extend_context = "replication.extend_context"

    @private
    async def extend_context(self):
        legacy_result, legacy_result_datetime = await self.middleware.run_in_thread(self._legacy_extend_context)

        return {
            "state": await self.middleware.call("zettarepl.get_state"),
            "legacy_result": legacy_result,
            "legacy_result_datetime": legacy_result_datetime,
        }

    def _legacy_extend_context(self):
        try:
            with open("/tmp/.repl-result", "rb") as f:
                data = f.read()
                legacy_result = pickle.loads(data)
                legacy_result_datetime = datetime.fromtimestamp(os.stat("/tmp/.repl-result").st_mtime)
        except Exception:
            legacy_result = defaultdict(dict)
            legacy_result_datetime = None

        return legacy_result, legacy_result_datetime

    @private
    async def extend(self, data, context):
        data["periodic_snapshot_tasks"] = [
            {k.replace("task_", ""): v for k, v in task.items()}
            for task in data["periodic_snapshot_tasks"]
        ]

        for task in data["periodic_snapshot_tasks"]:
            Cron.convert_db_format_to_schedule(task, begin_end=True)

        if data["direction"] == "PUSH":
            data["also_include_naming_schema"] = data["naming_schema"]
            data["naming_schema"] = []
        if data["direction"] == "PULL":
            data["also_include_naming_schema"] = []

        Cron.convert_db_format_to_schedule(data, "schedule", key_prefix="schedule_", begin_end=True)
        Cron.convert_db_format_to_schedule(data, "restrict_schedule", key_prefix="restrict_schedule_", begin_end=True)

        if data["transport"] == "LEGACY":
            if data["id"] in context["legacy_result"]:
                legacy_result = context["legacy_result"][data["id"]]

                msg = legacy_result.get("msg")
                if msg == "Running":
                    state = "RUNNING"
                elif msg in ["Succeeded", "Up to date"]:
                    state = "FINISHED"
                else:
                    state = "ERROR"

                data["state"] = {
                    "datetime": context["legacy_result_datetime"],
                    "state": state,
                    "last_snapshot": legacy_result.get("last_snapshot"),
                }

                if state == "ERROR":
                    data["state"]["error"] = msg
            else:
                data["state"] = {
                    "state": "PENDING",
                }

            progressfile = f"/tmp/.repl_progress_{data['id']}"
            if os.path.exists(progressfile):
                with open(progressfile, "r") as f:
                    pid = f.read().strip()

                try:
                    pid = int(pid)
                except ValueError:
                    pass
                else:
                    try:
                        title = " ".join(await self.middleware.run_in_thread(
                            lambda: psutil.Process(pid).cmdline()
                        ))
                    except psutil.NoSuchProcess:
                        pass
                    else:
                        m = self.ZFS_SEND_REGEX.match(title)
                        if m:
                            dataset, snapshot = m.group("snapshot").split("@")
                            data["state"]["progress"] = {
                                "dataset": dataset,
                                "snapshot": snapshot,
                                "current": int(m.group("current")),
                                "total": int(m.group("total")),
                            }
        else:
            data["state"] = context["state"].get(f"replication_task_{data['id']}", {
                "state": "PENDING",
            })

        data["job"] = data["state"].pop("job", None)

        return data

    @private
    async def compress(self, data):
        if data["direction"] == "PUSH":
            data["naming_schema"] = data["also_include_naming_schema"]
        del data["also_include_naming_schema"]

        Cron.convert_schedule_to_db_format(data, "schedule", key_prefix="schedule_", begin_end=True)
        Cron.convert_schedule_to_db_format(data, "restrict_schedule", key_prefix="restrict_schedule_", begin_end=True)

        del data["periodic_snapshot_tasks"]

        return data

    @accepts(
        Dict(
            "replication_create",
            Str("name", required=True),
            Str("direction", enum=["PUSH", "PULL"], required=True),
            Str("transport", enum=["SSH", "SSH+NETCAT", "LOCAL", "LEGACY"], required=True),
            Int("ssh_credentials", null=True, default=None),
            Str("netcat_active_side", enum=["LOCAL", "REMOTE"], null=True, default=None),
            Str("netcat_active_side_listen_address", null=True, default=None),
            Int("netcat_active_side_port_min", null=True, default=None, validators=[Port()]),
            Int("netcat_active_side_port_max", null=True, default=None, validators=[Port()]),
            Str("netcat_passive_side_connect_address", null=True, default=None),
            List("source_datasets", items=[Path("dataset", empty=False)], required=True, empty=False),
            Path("target_dataset", required=True, empty=False),
            Bool("recursive", required=True),
            List("exclude", items=[Path("dataset", empty=False)], default=[]),
            Bool('properties', default=True),
            List("periodic_snapshot_tasks", items=[Int("periodic_snapshot_task")], default=[],
                 validators=[Unique()]),
            List("naming_schema", items=[
                Str("naming_schema", validators=[ReplicationSnapshotNamingSchema()])], default=[]),
            List("also_include_naming_schema", items=[
                Str("naming_schema", validators=[ReplicationSnapshotNamingSchema()])], default=[]),
            Bool("auto", required=True),
            Cron(
                "schedule",
                defaults={"minute": "00"},
                begin_end=True,
                null=True,
                default=None
            ),
            Cron(
                "restrict_schedule",
                defaults={"minute": "00"},
                begin_end=True,
                null=True,
                default=None
            ),
            Bool("only_matching_schedule", default=False),
            Bool("allow_from_scratch", default=False),
            Bool("hold_pending_snapshots", default=False),
            Str("retention_policy", enum=["SOURCE", "CUSTOM", "NONE"], required=True),
            Int("lifetime_value", null=True, default=None, validators=[Range(min=1)]),
            Str("lifetime_unit", null=True, default=None, enum=["HOUR", "DAY", "WEEK", "MONTH", "YEAR"]),
            Str("compression", enum=["LZ4", "PIGZ", "PLZIP"], null=True, default=None),
            Int("speed_limit", null=True, default=None, validators=[Range(min=1)]),
            Bool("dedup", default=False),
            Bool("large_block", default=True),
            Bool("embed", default=False),
            Bool("compressed", default=True),
            Int("retries", default=5, validators=[Range(min=1)]),
            Str("logging_level", enum=["DEBUG", "INFO", "WARNING", "ERROR"], null=True, default=None),
            Bool("enabled", default=True),
            register=True,
            strict=True,
        )
    )
    async def do_create(self, data):
        """
        Create a Replication Task

        Create a Replication Task that will push or pull ZFS snapshots to or from remote host..

        * `name` specifies a name for replication task
        * `direction` specifies whether task will `PUSH` or `PULL` snapshots
        * `transport` is a method of snapshots transfer:
          * `SSH` transfers snapshots via SSH connection. This method is supported everywhere but does not achieve
            great performance
            `ssh_credentials` is a required field for this transport (Keychain Credential ID of type `SSH_CREDENTIALS`)
          * `SSH+NETCAT` uses unencrypted connection for data transfer. This can only be used in trusted networks
            and requires a port (specified by range from `netcat_active_side_port_min` to `netcat_active_side_port_max`)
            to be open on `netcat_active_side`
            `ssh_credentials` is also required for control connection
          * `LOCAL` replicates to or from localhost
          * `LEGACY` uses legacy replication engine prior to FreeNAS 11.3
        * `source_datasets` is a non-empty list of datasets to replicate snapshots from
        * `target_dataset` is a dataset to put snapshots into. It must exist on target side
        * `recursive` and `exclude` have the same meaning as for Periodic Snapshot Task
        * `properties` control whether we should send dataset properties along with snapshots
        * `periodic_snapshot_tasks` is a list of periodic snapshot task IDs that are sources of snapshots for this
          replication task. Only push replication tasks can be bound to periodic snapshot tasks.
        * `naming_schema` is a list of naming schemas for pull replication
        * `also_include_naming_schema` is a list of naming schemas for push replication
        * `auto` allows replication to run automatically on schedule or after bound periodic snapshot task
        * `schedule` is a schedule to run replication task. Only `auto` replication tasks without bound periodic
          snapshot tasks can have a schedule
        * `restrict_schedule` restricts when replication task with bound periodic snapshot tasks runs. For example,
          you can have periodic snapshot tasks that run every 15 minutes, but only run replication task every hour.
        * Enabling `only_matching_schedule` will only replicate snapshots that match `schedule` or
          `restrict_schedule`
        * `allow_from_scratch` will destroy all snapshots on target side and replicate everything from scratch if none
          of the snapshots on target side matches source snapshots
        * `hold_pending_snapshots` will prevent source snapshots from being deleted by retention of replication fails
          for some reason
        * `retention_policy` specifies how to delete old snapshots on target side:
          * `SOURCE` deletes snapshots that are absent on source side
          * `CUSTOM` deletes snapshots that are older than `lifetime_value` and `lifetime_unit`
          * `NONE` does not delete any snapshots
        * `compression` compresses SSH stream. Available only for SSH transport
        * `speed_limit` limits speed of SSH stream. Available only for SSH transport
        * `dedup`, `large_block`, `embed` and `compressed` are various ZFS stream flag documented in `man zfs send`
        * `retries` specifies number of retries before considering replication failed

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "replication.create",
                "params": [{
                    "name": "Work Backup",
                    "direction": "PUSH",
                    "transport": "SSH",
                    "ssh_credentials": [12],
                    "source_datasets", ["data/work"],
                    "target_dataset": "repl/work",
                    "recursive": true,
                    "periodic_snapshot_tasks": [5],
                    "auto": true,
                    "restrict_schedule": {
                        "minute": "0",
                        "hour": "*/2",
                        "dom": "*",
                        "month": "*",
                        "dow": "1,2,3,4,5",
                        "begin": "09:00",
                        "end": "18:00"
                    },
                    "only_matching_schedule": true,
                    "retention_policy": "CUSTOM",
                    "lifetime_value": 1,
                    "lifetime_unit": "WEEK",
                }]
            }
        """

        verrors = ValidationErrors()
        verrors.add_child("replication_create", await self._validate(data))

        if verrors:
            raise verrors

        periodic_snapshot_tasks = data["periodic_snapshot_tasks"]
        await self.compress(data)

        id = await self.middleware.call(
            "datastore.insert",
            self._config.datastore,
            data,
            {"prefix": self._config.datastore_prefix}
        )

        await self._set_periodic_snapshot_tasks(id, periodic_snapshot_tasks)

        await self.middleware.call("service.restart", "cron")
        await self.middleware.call("zettarepl.update_tasks")

        return await self._get_instance(id)

    @accepts(Int("id"), Patch(
        "replication_create",
        "replication_update",
        ("attr", {"update": True}),
    ))
    async def do_update(self, id, data):
        """
        Update a Replication Task with specific `id`

        See the documentation for `create` method for information on payload contents

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "replication.update",
                "params": [
                    7,
                    {
                        "name": "Work Backup",
                        "direction": "PUSH",
                        "transport": "SSH",
                        "ssh_credentials": [12],
                        "source_datasets", ["data/work"],
                        "target_dataset": "repl/work",
                        "recursive": true,
                        "periodic_snapshot_tasks": [5],
                        "auto": true,
                        "restrict_schedule": {
                            "minute": "0",
                            "hour": "*/2",
                            "dom": "*",
                            "month": "*",
                            "dow": "1,2,3,4,5",
                            "begin": "09:00",
                            "end": "18:00"
                        },
                        "only_matching_schedule": true,
                        "retention_policy": "CUSTOM",
                        "lifetime_value": 1,
                        "lifetime_unit": "WEEK",
                    }
                ]
            }
        """

        old = await self._get_instance(id)

        new = old.copy()
        if new["ssh_credentials"]:
            new["ssh_credentials"] = new["ssh_credentials"]["id"]
        new["periodic_snapshot_tasks"] = [task["id"] for task in new["periodic_snapshot_tasks"]]
        new.update(data)

        verrors = ValidationErrors()
        verrors.add_child("replication_update", await self._validate(new, id))

        if verrors:
            raise verrors

        periodic_snapshot_tasks = new["periodic_snapshot_tasks"]
        await self.compress(new)

        new.pop("state", None)
        new.pop("job", None)

        await self.middleware.call(
            "datastore.update",
            self._config.datastore,
            id,
            new,
            {'prefix': self._config.datastore_prefix}
        )

        await self._set_periodic_snapshot_tasks(id, periodic_snapshot_tasks)

        await self.middleware.call("service.restart", "cron")
        await self.middleware.call("zettarepl.update_tasks")

        return await self._get_instance(id)

    @accepts(
        Int("id")
    )
    async def do_delete(self, id):
        """
        Delete a Replication Task with specific `id`

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "replication.delete",
                "params": [
                    1
                ]
            }
        """

        response = await self.middleware.call(
            "datastore.delete",
            self._config.datastore,
            id
        )

        await self.middleware.call("service.restart", "cron")
        await self.middleware.call("zettarepl.update_tasks")

        return response

    @item_method
    @accepts(Int("id"), Bool("really_run", default=True, hidden=True))
    @job(logs=True)
    async def run(self, job, id, really_run):
        """
        Run Replication Task of `id`.
        """
        if really_run:
            task = await self._get_instance(id)

            if not task["enabled"]:
                raise CallError("Task is not enabled")

            if task["transport"] == "LEGACY":
                raise CallError("You can't run legacy replication manually")

        await self.middleware.call("zettarepl.run_replication_task", id, really_run, job)

    async def _validate(self, data, id=None):
        verrors = ValidationErrors()

        await self._ensure_unique(verrors, "", "name", data["name"], id)

        # Direction

        snapshot_tasks = []

        if data["direction"] == "PUSH":
            e, snapshot_tasks = await self._query_periodic_snapshot_tasks(data["periodic_snapshot_tasks"])
            verrors.add_child("periodic_snapshot_tasks", e)

            if data["naming_schema"]:
                verrors.add("naming_schema", "This field has no sense for push replication")

            if data["transport"] != "LEGACY" and not snapshot_tasks and not data["also_include_naming_schema"]:
                verrors.add(
                    "periodic_snapshot_tasks", "You must at least either bind a periodic snapshot task or provide "
                                               "\"Also Include Naming Schema\" for push replication task"
                )

            if data["schedule"]:
                if data["periodic_snapshot_tasks"]:
                    verrors.add("schedule", "Push replication can't be bound to periodic snapshot task and have "
                                            "schedule at the same time")
            else:
                if data["auto"] and not data["periodic_snapshot_tasks"] and data["transport"] != "LEGACY":
                    verrors.add("auto", "Push replication that runs automatically must be either "
                                        "bound to periodic snapshot task or have schedule")

        if data["direction"] == "PULL":
            if data["schedule"]:
                pass
            else:
                if data["auto"]:
                    verrors.add("auto", "Pull replication that runs automatically must have schedule")

            if data["periodic_snapshot_tasks"]:
                verrors.add("periodic_snapshot_tasks", "Pull replication can't be bound to periodic snapshot task")

            if not data["naming_schema"]:
                verrors.add("naming_schema", "Naming schema is required for pull replication")

            if data["also_include_naming_schema"]:
                verrors.add("also_include_naming_schema", "This field has no sense for pull replication")

            if data["hold_pending_snapshots"]:
                verrors.add("hold_pending_snapshots", "Pull replication tasks can't hold pending snapshots because "
                                                      "they don't do source retention")

        # Transport

        if data["transport"] == "SSH+NETCAT":
            if data["netcat_active_side"] is None:
                verrors.add("netcat_active_side", "You must choose active side for SSH+netcat replication")

            if data["netcat_active_side_port_min"] is not None and data["netcat_active_side_port_max"] is not None:
                if data["netcat_active_side_port_min"] > data["netcat_active_side_port_max"]:
                    verrors.add("netcat_active_side_port_max",
                                "Please specify value greater or equal than netcat_active_side_port_min")

            if data["compression"] is not None:
                verrors.add("compression", "Compression is not supported for SSH+netcat replication")

            if data["speed_limit"] is not None:
                verrors.add("speed_limit", "Speed limit is not supported for SSH+netcat replication")
        else:
            if data["netcat_active_side"] is not None:
                verrors.add("netcat_active_side", "This field only has sense for SSH+netcat replication")

            for k in ["netcat_active_side_listen_address", "netcat_active_side_port_min", "netcat_active_side_port_max",
                      "netcat_passive_side_connect_address"]:
                if data[k] is not None:
                    verrors.add(k, "This field only has sense for SSH+netcat replication")

        if data["transport"] == "LOCAL":
            if data["ssh_credentials"] is not None:
                verrors.add("ssh_credentials", "Remote credentials have no sense for local replication")

            if data["compression"] is not None:
                verrors.add("compression", "Compression has no sense for local replication")

            if data["speed_limit"] is not None:
                verrors.add("speed_limit", "Speed limit has no sense for local replication")
        else:
            if data["ssh_credentials"] is None:
                verrors.add("ssh_credentials", "SSH Credentials are required for non-local replication")
            else:
                try:
                    await self.middleware.call("keychaincredential.get_of_type", data["ssh_credentials"],
                                               "SSH_CREDENTIALS")
                except CallError as e:
                    verrors.add("ssh_credentials", str(e))

        if data["transport"] == "LEGACY":
            for should_be_true in ["auto", "allow_from_scratch"]:
                if not data[should_be_true]:
                    verrors.add(should_be_true, "Legacy replication does not support disabling this option")

            for should_be_false in ["exclude", "periodic_snapshot_tasks", "naming_schema", "also_include_naming_schema",
                                    "only_matching_schedule", "dedup", "large_block", "embed", "compressed"]:
                if data[should_be_false]:
                    verrors.add(should_be_false, "Legacy replication does not support this option")

            if data["direction"] != "PUSH":
                verrors.add("direction", "Only push application is allowed for Legacy transport")

            if len(data["source_datasets"]) != 1:
                verrors.add("source_datasets", "You can only have one source dataset for legacy replication")

            if os.path.basename(data["target_dataset"]) != os.path.basename(data["source_datasets"][0]):
                verrors.add(
                    "target_dataset",
                    "Target dataset basename should be same as source dataset basename for Legacy transport",
                )

            if data["retention_policy"] not in ["SOURCE", "NONE"]:
                verrors.add("retention_policy", "Only \"source\" and \"none\" retention policies are supported by "
                                                "legacy replication")

            if data["retries"] != 1:
                verrors.add("retries", "This value should be 1 for legacy replication")

        # Common for all directions and transports

        for i, source_dataset in enumerate(data["source_datasets"]):
            for snapshot_task in snapshot_tasks:
                if is_child(source_dataset, snapshot_task["dataset"]):
                    if data["recursive"]:
                        for exclude in snapshot_task["exclude"]:
                            if exclude not in data["exclude"]:
                                verrors.add("exclude", f"You should exclude {exclude!r} as bound periodic snapshot "
                                                       f"task dataset {snapshot_task['dataset']!r} does")
                    else:
                        if source_dataset in snapshot_task["exclude"]:
                            verrors.add(f"source_datasets.{i}", f"Dataset {source_dataset!r} is excluded by bound "
                                                                f"periodic snapshot task for dataset "
                                                                f"{snapshot_task['dataset']!r}")

        if not data["recursive"] and data["exclude"]:
            verrors.add("exclude", "Excluding child datasets is only supported for recursive replication")

        for i, v in enumerate(data["exclude"]):
            if not any(v.startswith(ds + "/") for ds in data["source_datasets"]):
                verrors.add(f"exclude.{i}", "This dataset is not a child of any of source datasets")

        if data["schedule"]:
            if not data["auto"]:
                verrors.add("schedule", "You can't have schedule for replication that does not run automatically")
        else:
            if data["only_matching_schedule"]:
                verrors.add("only_matching_schedule", "You can't have only-matching-schedule without schedule")

        if data["retention_policy"] == "CUSTOM":
            if data["lifetime_value"] is None:
                verrors.add("lifetime_value", "This field is required for custom retention policy")
            if data["lifetime_unit"] is None:
                verrors.add("lifetime_value", "This field is required for custom retention policy")
        else:
            if data["lifetime_value"] is not None:
                verrors.add("lifetime_value", "This field has no sense for specified retention policy")
            if data["lifetime_unit"] is not None:
                verrors.add("lifetime_unit", "This field has no sense for specified retention policy")

        if data["enabled"]:
            for i, snapshot_task in enumerate(snapshot_tasks):
                if not snapshot_task["enabled"]:
                    verrors.add(
                        f"periodic_snapshot_tasks.{i}",
                        "You can't bind disabled periodic snapshot task to enabled replication task"
                    )

        return verrors

    async def _set_periodic_snapshot_tasks(self, replication_task_id, periodic_snapshot_tasks_ids):
        await self.middleware.call("datastore.delete", "storage.replication_repl_periodic_snapshot_tasks",
                                   [["replication_id", "=", replication_task_id]])
        for periodic_snapshot_task_id in periodic_snapshot_tasks_ids:
            await self.middleware.call(
                "datastore.insert", "storage.replication_repl_periodic_snapshot_tasks",
                {
                    "replication_id": replication_task_id,
                    "task_id": periodic_snapshot_task_id,
                },
            )

    async def _query_periodic_snapshot_tasks(self, ids):
        verrors = ValidationErrors()

        query_result = await self.middleware.call("pool.snapshottask.query", [["id", "in", ids]])

        snapshot_tasks = []
        for i, task_id in enumerate(ids):
            for task in query_result:
                if task["id"] == task_id:
                    snapshot_tasks.append(task)
                    break
            else:
                verrors.add(str(i), "This snapshot task does not exist")

        return verrors, snapshot_tasks

    @accepts(Str("transport", enum=["SSH", "SSH+NETCAT", "LOCAL", "LEGACY"], required=True),
             Int("ssh_credentials", null=True, default=None))
    async def list_datasets(self, transport, ssh_credentials=None):
        """
        List datasets on remote side

        Accepts `transport` and SSH credentials ID (for non-local transport)

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "replication.list_datasets",
                "params": [
                    "SSH",
                    7
                ]
            }
        """

        return await self.middleware.call("zettarepl.list_datasets", transport, ssh_credentials)

    @accepts(Str("dataset", required=True),
             Str("transport", enum=["SSH", "SSH+NETCAT", "LOCAL", "LEGACY"], required=True),
             Int("ssh_credentials", null=True, default=None))
    async def create_dataset(self, dataset, transport, ssh_credentials=None):
        """
        Creates dataset on remote side

        Accepts `dataset` name, `transport` and SSH credentials ID (for non-local transport)

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "replication.create_dataset",
                "params": [
                    "repl/work",
                    "SSH",
                    7
                ]
            }
        """

        return await self.middleware.call("zettarepl.create_dataset", dataset, transport, ssh_credentials)

    @accepts()
    async def list_naming_schemas(self):
        """
        List all naming schemas used in periodic snapshot and replication tasks.
        """
        naming_schemas = []
        for snapshottask in await self.middleware.call("pool.snapshottask.query"):
            naming_schemas.append(snapshottask["naming_schema"])
        for replication in await self.middleware.call("replication.query"):
            naming_schemas.extend(replication["naming_schema"])
            naming_schemas.extend(replication["also_include_naming_schema"])
        return sorted(set(naming_schemas))

    @accepts(
        List("datasets", empty=False, items=[
            Path("dataset", empty=False),
        ]),
        List("naming_schema", empty=False, items=[
            Str("naming_schema", validators=[ReplicationSnapshotNamingSchema()])
        ]),
        Str("transport", enum=["SSH", "SSH+NETCAT", "LOCAL", "LEGACY"], required=True),
        Int("ssh_credentials", null=True, default=None),
    )
    async def count_eligible_manual_snapshots(self, datasets, naming_schema, transport, ssh_credentials):
        """
        Count how many existing snapshots of `dataset` match `naming_schema`.

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "replication.count_eligible_manual_snapshots",
                "params": [
                    "repl/work",
                    ["auto-%Y-%m-%d_%H-%M"],
                    "SSH",
                    4
                ]
            }
        """
        return await self.middleware.call("zettarepl.count_eligible_manual_snapshots", datasets, naming_schema,
                                          transport, ssh_credentials)

    @accepts(
        Str("direction", enum=["PUSH", "PULL"], required=True),
        List("source_datasets", items=[Path("dataset", empty=False)], required=True, empty=False),
        Path("target_dataset", required=True, empty=False),
        Str("transport", enum=["SSH", "SSH+NETCAT", "LOCAL", "LEGACY"], required=True),
        Int("ssh_credentials", null=True, default=None),
    )
    async def target_unmatched_snapshots(self, direction, source_datasets, target_dataset, transport, ssh_credentials):
        """
        Check if target has any snapshots that do not exist on source.

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "replication.target_unmatched_snapshots",
                "params": [
                    "PUSH",
                    ["repl/work", "repl/games"],
                    "backup",
                    "SSH",
                    4
                ]
            }

        Returns

            {
                "backup/work": ["auto-2019-10-15_13-00", "auto-2019-10-15_09-00"],
                "backup/games": ["auto-2019-10-15_13-00"],
            }
        """
        return await self.middleware.call("zettarepl.target_unmatched_snapshots", direction, source_datasets,
                                          target_dataset, transport, ssh_credentials)

    # Legacy pair support
    @private
    @accepts(Dict(
        "replication-pair-data",
        Str("hostname", required=True),
        Str("public-key", required=True),
        Str("user", null=True),
    ))
    async def pair(self, data):
        result = await self.middleware.call("keychaincredential.ssh_pair", {
            "remote_hostname": data["hostname"],
            "username": data["user"] or "root",
            "public_key": data["public-key"],
        })
        return {
            "ssh_port": result["port"],
            "ssh_hostkey": result["host_key"],
        }
Пример #3
0
class PeriodicSnapshotTaskService(CRUDService):
    class Config:
        datastore = 'storage.task'
        datastore_prefix = 'task_'
        datastore_extend = 'pool.snapshottask.extend'
        datastore_extend_context = 'pool.snapshottask.extend_context'
        namespace = 'pool.snapshottask'

    @private
    async def extend_context(self):
        return {
            'legacy_replication_tasks': await self._legacy_replication_tasks(),
            'state': await self.middleware.call('zettarepl.get_state'),
            'vmware': await self.middleware.call('vmware.query'),
        }

    @private
    async def extend(self, data, context):
        Cron.convert_db_format_to_schedule(data, begin_end=True)

        data['legacy'] = self._is_legacy(data,
                                         context['legacy_replication_tasks'])

        data['vmware_sync'] = any((vmware['filesystem'] == data['dataset'] or (
            data['recursive']
            and is_child(vmware['filesystem'], data['dataset'])))
                                  for vmware in context['vmware'])

        data['state'] = context['state'].get(
            f'periodic_snapshot_task_{data["id"]}', {
                'state': 'UNKNOWN',
            })

        return data

    @accepts(
        Dict('periodic_snapshot_create',
             Path('dataset', required=True),
             Bool('recursive', required=True),
             List('exclude', items=[Path('item', empty=False)], default=[]),
             Int('lifetime_value', required=True),
             Str('lifetime_unit',
                 enum=['HOUR', 'DAY', 'WEEK', 'MONTH', 'YEAR'],
                 required=True),
             Str('naming_schema',
                 required=True,
                 validators=[ReplicationSnapshotNamingSchema()]),
             Cron('schedule', required=True, begin_end=True),
             Bool('enabled', default=True),
             register=True))
    async def do_create(self, data):
        """
        Create a Periodic Snapshot Task

        Create a Periodic Snapshot Task that will take snapshots of specified `dataset` at specified `schedule`.
        Recursive snapshots can be created if `recursive` flag is enabled. You can `exclude` specific child datasets
        from snapshot.
        Snapshots will be automatically destroyed after a certain amount of time, specified by
        `lifetime_value` and `lifetime_unit`.
        Snapshots will be named according to `naming_schema` which is a `strftime`-like template for snapshot name
        and must contain `%Y`, `%m`, `%d`, `%H` and `%M`.

        .. examples(websocket)::

          Create a recursive Periodic Snapshot Task for dataset `data/work` excluding `data/work/temp`. Snapshots
          will be created on weekdays every hour from 09:00 to 18:00 and will be stored for two weeks.

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "pool.snapshottask.create",
                "params": [{
                    "dataset": "data/work",
                    "recursive": true,
                    "exclude": ["data/work/temp"],
                    "lifetime_value": 2,
                    "lifetime_unit": "WEEK",
                    "naming_schema": "auto_%Y-%m-%d_%H-%M",
                    "schedule": {
                        "minute": "0",
                        "hour": "*",
                        "dom": "*",
                        "month": "*",
                        "dow": "1,2,3,4,5",
                        "begin": "09:00",
                        "end": "18:00"
                    }
                }]
            }
        """

        verrors = ValidationErrors()

        verrors.add_child('periodic_snapshot_create', await
                          self._validate(data))

        if verrors:
            raise verrors

        if self._is_legacy(data, await self._legacy_replication_tasks()):
            verrors.add_child('periodic_snapshot_create',
                              self._validate_legacy(data))

        if verrors:
            raise verrors

        Cron.convert_schedule_to_db_format(data, begin_end=True)

        data['id'] = await self.middleware.call(
            'datastore.insert', self._config.datastore, data,
            {'prefix': self._config.datastore_prefix})

        await self.middleware.call('service.restart', 'cron')
        await self.middleware.call('zettarepl.update_tasks')

        return await self._get_instance(data['id'])

    @accepts(Int('id', required=True),
             Patch('periodic_snapshot_create', 'periodic_snapshot_update',
                   ('attr', {
                       'update': True
                   })))
    async def do_update(self, id, data):
        """
        Update a Periodic Snapshot Task with specific `id`

        See the documentation for `create` method for information on payload contents

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "pool.snapshottask.update",
                "params": [
                    1,
                    {
                        "dataset": "data/work",
                        "recursive": true,
                        "exclude": ["data/work/temp"],
                        "lifetime_value": 2,
                        "lifetime_unit": "WEEK",
                        "naming_schema": "auto_%Y-%m-%d_%H-%M",
                        "schedule": {
                            "minute": "0",
                            "hour": "*",
                            "dom": "*",
                            "month": "*",
                            "dow": "1,2,3,4,5",
                            "begin": "09:00",
                            "end": "18:00"
                        }
                    }
                ]
            }
        """

        old = await self._get_instance(id)
        new = old.copy()
        new.update(data)

        verrors = ValidationErrors()

        verrors.add_child('periodic_snapshot_update', await
                          self._validate(new))

        if not data['enabled']:
            for replication_task in await self.middleware.call(
                    'replication.query', [['enabled', '=', True]]):
                if any(periodic_snapshot_task['id'] == id
                       for periodic_snapshot_task in
                       replication_task['periodic_snapshot_tasks']):
                    verrors.add('periodic_snapshot_update.enabled', (
                        f'You can\'t disable this periodic snapshot task because it is bound to enabled replication '
                        f'task {replication_task["id"]!r}'))
                    break

        if verrors:
            raise verrors

        legacy_replication_tasks = await self._legacy_replication_tasks()
        if self._is_legacy(new, legacy_replication_tasks):
            verrors.add_child(f'periodic_snapshot_update',
                              self._validate_legacy(new))
        else:
            if self._is_legacy(old, legacy_replication_tasks):
                verrors.add('periodic_snapshot_update.naming_schema', (
                    'This snapshot task is being used in legacy replication task. You must use naming schema '
                    f'{self._legacy_naming_schema(new)!r}. Please upgrade your replication tasks to edit this field.'
                ))

        if verrors:
            raise verrors

        Cron.convert_schedule_to_db_format(new, begin_end=True)

        for key in ('legacy', 'vmware_sync', 'state'):
            new.pop(key, None)

        await self.middleware.call('datastore.update', self._config.datastore,
                                   id, new,
                                   {'prefix': self._config.datastore_prefix})

        await self.middleware.call('service.restart', 'cron')
        await self.middleware.call('zettarepl.update_tasks')

        return await self._get_instance(id)

    @accepts(Int('id'))
    async def do_delete(self, id):
        """
        Delete a Periodic Snapshot Task with specific `id`

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "pool.snapshottask.delete",
                "params": [
                    1
                ]
            }
        """

        response = await self.middleware.call('datastore.delete',
                                              self._config.datastore, id)

        await self.middleware.call('service.restart', 'cron')

        return response

    @item_method
    @accepts(Int("id"))
    async def run(self, id):
        task = await self._get_instance(id)

        if not task["enabled"]:
            raise CallError("Task is not enabled")

        await self.middleware.call("zettarepl.run_periodic_snapshot_task",
                                   task["id"])

    async def _validate(self, data):
        verrors = ValidationErrors()

        if data['dataset'] not in (
                await self.middleware.call('pool.filesystem_choices')):
            verrors.add('dataset', 'Invalid ZFS dataset')

        if not data['recursive'] and data['exclude']:
            verrors.add(
                'exclude',
                'Excluding datasets has no sense for non-recursive periodic snapshot tasks'
            )

        for i, v in enumerate(data['exclude']):
            if not v.startswith(f'{data["dataset"]}/'):
                verrors.add(
                    f'exclude.{i}',
                    'Excluded dataset should be a child of selected dataset')

        return verrors

    def _validate_legacy(self, data):
        verrors = ValidationErrors()

        if data['exclude']:
            verrors.add(
                'exclude',
                ('Excluding child datasets is not available because this snapshot task is being used in '
                 'legacy replication task. Please upgrade your replication tasks to edit this field.'
                 ),
            )

        return verrors

    def _is_legacy(self, data, legacy_replication_tasks):
        if data['naming_schema'] == self._legacy_naming_schema(data):
            for replication_task in legacy_replication_tasks:
                if (data['dataset'] == replication_task['source_datasets'][0]
                        or
                    (data['recursive']
                     and is_child(replication_task['source_datasets'][0],
                                  data['dataset'])) or
                    (replication_task['recursive']
                     and is_child(data['dataset'],
                                  replication_task['source_datasets'][0]))):
                    return True

        return False

    def _legacy_naming_schema(self, data):
        return f'auto-%Y%m%d.%H%M%S-{data["lifetime_value"]}{data["lifetime_unit"].lower()[0]}'

    async def _legacy_replication_tasks(self):
        return await self.middleware.call('replication.query',
                                          [['transport', '=', 'LEGACY']])
Пример #4
0
class FilesystemService(Service):
    class Config:
        cli_namespace = 'storage.filesystem'

    @private
    def resolve_cluster_path(self, path, ignore_ctdb=False):
        """
        Convert a "CLUSTER:"-prefixed path to an absolute path
        on the server.
        """
        if not path.startswith(FuseConfig.FUSE_PATH_SUBST.value):
            return path

        gluster_volume = path[8:].split("/")[0]
        if gluster_volume == CTDBConfig.CTDB_VOL_NAME.value and not ignore_ctdb:
            raise CallError('access to ctdb volume is not permitted.',
                            errno.EPERM)
        elif not gluster_volume:
            raise CallError(
                f'More than the prefix "{FuseConfig.FUSE_PATH_SUBST.value}" must be provided'
            )

        is_mounted = self.middleware.call_sync('gluster.fuse.is_mounted',
                                               {'name': gluster_volume})
        if not is_mounted:
            raise CallError(
                f'{gluster_volume}: cluster volume is not mounted.',
                errno.ENXIO)

        cluster_path = path.replace(FuseConfig.FUSE_PATH_SUBST.value,
                                    f'{FuseConfig.FUSE_PATH_BASE.value}/')
        return cluster_path

    @accepts(Str('path'))
    @returns(Ref('path_entry'))
    def mkdir(self, path):
        """
        Create a directory at the specified path.
        """
        path = self.resolve_cluster_path(path)
        is_clustered = path.startswith("/cluster")

        p = pathlib.Path(path)
        if not p.is_absolute():
            raise CallError(f'{path}: not an absolute path.', errno.EINVAL)

        if p.exists():
            raise CallError(f'{path}: path already exists.', errno.EEXIST)

        realpath = os.path.realpath(path)
        if not is_clustered and not realpath.startswith('/mnt/'):
            raise CallError(f'{path}: path not permitted', errno.EPERM)

        os.mkdir(path)
        stat = p.stat()
        data = {
            'name': p.parts[-1],
            'path': path,
            'realpath': realpath,
            'type': 'DIRECTORY',
            'size': stat.st_size,
            'mode': stat.st_mode,
            'acl': False if self.acl_is_trivial(path) else True,
            'uid': stat.st_uid,
            'gid': stat.st_gid,
        }

        return data

    @accepts(Str('path', required=True), Ref('query-filters'),
             Ref('query-options'))
    @filterable_returns(
        Dict('path_entry',
             Str('name', required=True),
             Path('path', required=True),
             Path('realpath', required=True),
             Str('type',
                 required=True,
                 enum=['DIRECTORY', 'FILESYSTEM', 'SYMLINK', 'OTHER']),
             Int('size', required=True, null=True),
             Int('mode', required=True, null=True),
             Bool('acl', required=True, null=True),
             Int('uid', required=True, null=True),
             Int('gid', required=True, null=True),
             register=True))
    def listdir(self, path, filters, options):
        """
        Get the contents of a directory.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.

        Each entry of the list consists of:
          name(str): name of the file
          path(str): absolute path of the entry
          realpath(str): absolute real path of the entry (if SYMLINK)
          type(str): DIRECTORY | FILESYSTEM | SYMLINK | OTHER
          size(int): size of the entry
          mode(int): file mode/permission
          uid(int): user id of entry owner
          gid(int): group id of entry onwer
          acl(bool): extended ACL is present on file
        """
        path = self.resolve_cluster_path(path)
        if not os.path.exists(path):
            raise CallError(f'Directory {path} does not exist', errno.ENOENT)

        if not os.path.isdir(path):
            raise CallError(f'Path {path} is not a directory', errno.ENOTDIR)

        rv = []
        for entry in os.scandir(path):
            if entry.is_symlink():
                etype = 'SYMLINK'
            elif entry.is_dir():
                etype = 'DIRECTORY'
            elif entry.is_file():
                etype = 'FILE'
            else:
                etype = 'OTHER'

            data = {
                'name':
                entry.name,
                'path':
                entry.path.replace(f'{FuseConfig.FUSE_PATH_BASE.value}/',
                                   FuseConfig.FUSE_PATH_SUBST.value),
                'realpath':
                os.path.realpath(entry.path)
                if etype == 'SYMLINK' else entry.path,
                'type':
                etype,
            }
            try:
                stat = entry.stat()
                data.update({
                    'size':
                    stat.st_size,
                    'mode':
                    stat.st_mode,
                    'acl':
                    False if self.acl_is_trivial(data["path"]) else True,
                    'uid':
                    stat.st_uid,
                    'gid':
                    stat.st_gid,
                })
            except FileNotFoundError:
                data.update({
                    'size': None,
                    'mode': None,
                    'acl': None,
                    'uid': None,
                    'gid': None
                })
            rv.append(data)
        return filter_list(rv, filters=filters or [], options=options or {})

    @accepts(Str('path'))
    @returns(
        Dict(
            'path_stats',
            Int('size', required=True),
            Int('mode', required=True),
            Int('uid', required=True),
            Int('gid', required=True),
            Float('atime', required=True),
            Float('mtime', required=True),
            Float('ctime', required=True),
            Int('dev', required=True),
            Int('inode', required=True),
            Int('nlink', required=True),
            Str('user', null=True, required=True),
            Str('group', null=True, required=True),
            Bool('acl', required=True),
        ))
    def stat(self, path):
        """
        Return the filesystem stat(2) for a given `path`.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.
        """
        path = self.resolve_cluster_path(path)
        try:
            stat = os.stat(path, follow_symlinks=False)
        except FileNotFoundError:
            raise CallError(f'Path {path} not found', errno.ENOENT)

        stat = {
            'size': stat.st_size,
            'mode': stat.st_mode,
            'uid': stat.st_uid,
            'gid': stat.st_gid,
            'atime': stat.st_atime,
            'mtime': stat.st_mtime,
            'ctime': stat.st_ctime,
            'dev': stat.st_dev,
            'inode': stat.st_ino,
            'nlink': stat.st_nlink,
        }

        try:
            stat['user'] = pwd.getpwuid(stat['uid']).pw_name
        except KeyError:
            stat['user'] = None

        try:
            stat['group'] = grp.getgrgid(stat['gid']).gr_name
        except KeyError:
            stat['group'] = None

        stat['acl'] = False if self.acl_is_trivial(path) else True

        return stat

    @private
    @accepts(
        Str('path'),
        Str('content', max_length=2048000),
        Dict(
            'options',
            Bool('append', default=False),
            Int('mode'),
        ),
    )
    def file_receive(self, path, content, options):
        """
        Simplified file receiving method for small files.

        `content` must be a base 64 encoded file content.
        """
        dirname = os.path.dirname(path)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        if options.get('append'):
            openmode = 'ab'
        else:
            openmode = 'wb+'
        with open(path, openmode) as f:
            f.write(binascii.a2b_base64(content))
        mode = options.get('mode')
        if mode:
            os.chmod(path, mode)
        if path == PWENC_FILE_SECRET:
            self.middleware.call_sync('pwenc.reset_secret_cache')
        return True

    @private
    @accepts(
        Str('path'),
        Dict(
            'options',
            Int('offset'),
            Int('maxlen'),
        ),
    )
    def file_get_contents(self, path, options):
        """
        Get contents of a file `path` in base64 encode.

        DISCLAIMER: DO NOT USE THIS FOR BIG FILES (> 500KB).
        """
        if not os.path.exists(path):
            return None
        with open(path, 'rb') as f:
            if options.get('offset'):
                f.seek(options['offset'])
            data = binascii.b2a_base64(f.read(
                options.get('maxlen'))).decode().strip()
        return data

    @accepts(Str('path'))
    @returns()
    @job(pipes=["output"])
    async def get(self, job, path):
        """
        Job to get contents of `path`.
        """

        if not os.path.isfile(path):
            raise CallError(f'{path} is not a file')

        with open(path, 'rb') as f:
            await self.middleware.run_in_thread(shutil.copyfileobj, f,
                                                job.pipes.output.w)

    @accepts(
        Str('path'),
        Dict(
            'options',
            Bool('append', default=False),
            Int('mode'),
        ),
    )
    @returns(Bool('successful_put'))
    @job(pipes=["input"])
    async def put(self, job, path, options):
        """
        Job to put contents to `path`.
        """
        dirname = os.path.dirname(path)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        if options.get('append'):
            openmode = 'ab'
        else:
            openmode = 'wb+'

        with open(path, openmode) as f:
            await self.middleware.run_in_thread(shutil.copyfileobj,
                                                job.pipes.input.r, f)

        mode = options.get('mode')
        if mode:
            os.chmod(path, mode)
        return True

    @accepts(Str('path'))
    @returns(
        Dict(
            'path_statfs',
            List('flags', required=True),
            List('fsid', required=True),
            Str('fstype', required=True),
            Str('source', required=True),
            Str('dest', required=True),
            Int('blocksize', required=True),
            Int('total_blocks', required=True),
            Int('free_blocks', required=True),
            Int('avail_blocks', required=True),
            Int('files', required=True),
            Int('free_files', required=True),
            Int('name_max', required=True),
            Int('total_bytes', required=True),
            Int('free_bytes', required=True),
            Int('avail_bytes', required=True),
        ))
    def statfs(self, path):
        """
        Return stats from the filesystem of a given path.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.

        Raises:
            CallError(ENOENT) - Path not found
        """
        # check to see if this is a clustered path and if it is
        # resolve it to an absolute path
        # NOTE: this converts path prefixed with 'CLUSTER:' to '/cluster/...'
        path = self.resolve_cluster_path(path, ignore_ctdb=True)

        allowed_prefixes = ('/mnt/', FuseConfig.FUSE_PATH_BASE.value)
        if not path.startswith(allowed_prefixes):
            # if path doesn't start with '/mnt/' bail early
            raise CallError(
                f'Path must start with {" or ".join(allowed_prefixes)}')
        elif path == '/mnt/':
            # means the path given to us was a literal '/mnt/' which is incorrect.
            # NOTE: if the user provided 'CLUSTER:' as the literal path then
            # self.resolve_cluster_path() will raise a similar error
            raise CallError('Path must include more than "/mnt/"')

        try:
            st = os.statvfs(path)
        except FileNotFoundError:
            raise CallError('Path not found.', errno.ENOENT)

        # get the closest mountpoint to the path provided
        mountpoint = pathlib.Path(path)
        while not mountpoint.is_mount():
            mountpoint = mountpoint.parent.absolute()

        # strip the `/mnt/` or `/cluster/` prefix from the mountpoint
        device = mountpoint.as_posix().removeprefix('/mnt/')
        device = device.removeprefix('/cluster/')

        # we only look for /mnt/ or /cluster/ paths and, currently,
        # those 2 paths are limited to zfs and/or fuse.glusterfs
        fstype = 'zfs' if path.startswith('/mnt/') else 'fuse.glusterfs'

        return {
            'flags': [],
            'fstype': fstype,
            'source': device,
            'dest': mountpoint.as_posix(),
            'blocksize': st.f_frsize,
            'total_blocks': st.f_blocks,
            'free_blocks': st.f_bfree,
            'avail_blocks': st.f_bavail,
            'files': st.f_files,
            'free_files': st.f_ffree,
            'name_max': st.f_namemax,
            'fsid': [],
            'total_bytes': st.f_blocks * st.f_frsize,
            'free_bytes': st.f_bfree * st.f_frsize,
            'avail_bytes': st.f_bavail * st.f_frsize,
        }

    @accepts(Str('path'))
    @returns(Bool('paths_acl_is_trivial'))
    def acl_is_trivial(self, path):
        """
        Returns True if the ACL can be fully expressed as a file mode without losing
        any access rules.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.
        """
        path = self.resolve_cluster_path(path)
        if not os.path.exists(path):
            raise CallError(f'Path not found [{path}].', errno.ENOENT)

        acl = self.middleware.call_sync('filesystem.getacl', path, True)
        return acl['trivial']
Пример #5
0
class FilesystemService(Service):
    class Config:
        cli_namespace = 'storage.filesystem'

    @private
    def resolve_cluster_path(self, path):
        """
        Convert a "CLUSTER:"-prefixed path to an absolute path
        on the server.
        """
        if not path.startswith(FuseConfig.FUSE_PATH_SUBST.value):
            return path

        gluster_volume = path[8:].split("/")[0]
        if gluster_volume == CTDBConfig.CTDB_VOL_NAME.value:
            raise CallError('access to ctdb volume is not permitted.',
                            errno.EPERM)

        is_mounted = self.middleware.call_sync('gluster.fuse.is_mounted',
                                               {'name': gluster_volume})
        if not is_mounted:
            raise CallError(
                f'{gluster_volume}: cluster volume is not mounted.',
                errno.ENXIO)

        cluster_path = path.replace(FuseConfig.FUSE_PATH_SUBST.value,
                                    f'{FuseConfig.FUSE_PATH_BASE.value}/')
        return cluster_path

    @accepts(Str('path', required=True), Ref('query-filters'),
             Ref('query-options'))
    @filterable_returns(
        Dict(
            'path_entry',
            Str('name', required=True),
            Path('path', required=True),
            Path('realpath', required=True),
            Str('type',
                required=True,
                enum=['DIRECTORY', 'FILESYSTEM', 'SYMLINK', 'OTHER']),
            Int('size', required=True, null=True),
            Int('mode', required=True, null=True),
            Bool('acl', required=True, null=True),
            Int('uid', required=True, null=True),
            Int('gid', required=True, null=True),
        ))
    def listdir(self, path, filters, options):
        """
        Get the contents of a directory.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.

        Each entry of the list consists of:
          name(str): name of the file
          path(str): absolute path of the entry
          realpath(str): absolute real path of the entry (if SYMLINK)
          type(str): DIRECTORY | FILESYSTEM | SYMLINK | OTHER
          size(int): size of the entry
          mode(int): file mode/permission
          uid(int): user id of entry owner
          gid(int): group id of entry onwer
          acl(bool): extended ACL is present on file
        """
        path = self.resolve_cluster_path(path)
        if not os.path.exists(path):
            raise CallError(f'Directory {path} does not exist', errno.ENOENT)

        if not os.path.isdir(path):
            raise CallError(f'Path {path} is not a directory', errno.ENOTDIR)

        rv = []
        for entry in os.scandir(path):
            if entry.is_symlink():
                etype = 'SYMLINK'
            elif entry.is_dir():
                etype = 'DIRECTORY'
            elif entry.is_file():
                etype = 'FILE'
            else:
                etype = 'OTHER'

            data = {
                'name':
                entry.name,
                'path':
                entry.path.replace(f'{FuseConfig.FUSE_PATH_BASE.value}/',
                                   FuseConfig.FUSE_PATH_SUBST.value),
                'realpath':
                os.path.realpath(entry.path)
                if etype == 'SYMLINK' else entry.path,
                'type':
                etype,
            }
            try:
                stat = entry.stat()
                data.update({
                    'size':
                    stat.st_size,
                    'mode':
                    stat.st_mode,
                    'acl':
                    False if self.acl_is_trivial(data["path"]) else True,
                    'uid':
                    stat.st_uid,
                    'gid':
                    stat.st_gid,
                })
            except FileNotFoundError:
                data.update({
                    'size': None,
                    'mode': None,
                    'acl': None,
                    'uid': None,
                    'gid': None
                })
            rv.append(data)
        return filter_list(rv, filters=filters or [], options=options or {})

    @accepts(Str('path'))
    @returns(
        Dict(
            'path_stats',
            Int('size', required=True),
            Int('mode', required=True),
            Int('uid', required=True),
            Int('gid', required=True),
            Float('atime', required=True),
            Float('mtime', required=True),
            Float('ctime', required=True),
            Int('dev', required=True),
            Int('inode', required=True),
            Int('nlink', required=True),
            Str('user', null=True, required=True),
            Str('group', null=True, required=True),
            Bool('acl', required=True),
        ))
    def stat(self, path):
        """
        Return the filesystem stat(2) for a given `path`.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.
        """
        path = self.resolve_cluster_path(path)
        try:
            stat = os.stat(path, follow_symlinks=False)
        except FileNotFoundError:
            raise CallError(f'Path {path} not found', errno.ENOENT)

        stat = {
            'size': stat.st_size,
            'mode': stat.st_mode,
            'uid': stat.st_uid,
            'gid': stat.st_gid,
            'atime': stat.st_atime,
            'mtime': stat.st_mtime,
            'ctime': stat.st_ctime,
            'dev': stat.st_dev,
            'inode': stat.st_ino,
            'nlink': stat.st_nlink,
        }

        try:
            stat['user'] = pwd.getpwuid(stat['uid']).pw_name
        except KeyError:
            stat['user'] = None

        try:
            stat['group'] = grp.getgrgid(stat['gid']).gr_name
        except KeyError:
            stat['group'] = None

        stat['acl'] = False if self.acl_is_trivial(path) else True

        return stat

    @private
    @accepts(
        Str('path'),
        Str('content', max_length=2048000),
        Dict(
            'options',
            Bool('append', default=False),
            Int('mode'),
        ),
    )
    def file_receive(self, path, content, options):
        """
        Simplified file receiving method for small files.

        `content` must be a base 64 encoded file content.
        """
        dirname = os.path.dirname(path)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        if options.get('append'):
            openmode = 'ab'
        else:
            openmode = 'wb+'
        with open(path, openmode) as f:
            f.write(binascii.a2b_base64(content))
        mode = options.get('mode')
        if mode:
            os.chmod(path, mode)
        if path == PWENC_FILE_SECRET:
            self.middleware.call_sync('pwenc.reset_secret_cache')
        return True

    @private
    @accepts(
        Str('path'),
        Dict(
            'options',
            Int('offset'),
            Int('maxlen'),
        ),
    )
    def file_get_contents(self, path, options):
        """
        Get contents of a file `path` in base64 encode.

        DISCLAIMER: DO NOT USE THIS FOR BIG FILES (> 500KB).
        """
        if not os.path.exists(path):
            return None
        with open(path, 'rb') as f:
            if options.get('offset'):
                f.seek(options['offset'])
            data = binascii.b2a_base64(f.read(
                options.get('maxlen'))).decode().strip()
        return data

    @accepts(Str('path'))
    @returns()
    @job(pipes=["output"])
    async def get(self, job, path):
        """
        Job to get contents of `path`.
        """

        if not os.path.isfile(path):
            raise CallError(f'{path} is not a file')

        with open(path, 'rb') as f:
            await self.middleware.run_in_thread(shutil.copyfileobj, f,
                                                job.pipes.output.w)

    @accepts(
        Str('path'),
        Dict(
            'options',
            Bool('append', default=False),
            Int('mode'),
        ),
    )
    @returns(Bool('successful_put'))
    @job(pipes=["input"])
    async def put(self, job, path, options):
        """
        Job to put contents to `path`.
        """
        dirname = os.path.dirname(path)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        if options.get('append'):
            openmode = 'ab'
        else:
            openmode = 'wb+'

        with open(path, openmode) as f:
            await self.middleware.run_in_thread(shutil.copyfileobj,
                                                job.pipes.input.r, f)

        mode = options.get('mode')
        if mode:
            os.chmod(path, mode)
        return True

    @accepts(Str('path'))
    @returns(
        Dict(
            'path_statfs',
            List('flags', required=True),
            List('fsid', required=True),
            Str('fstype', required=True),
            Str('source', required=True),
            Str('dest', required=True),
            Int('blocksize', required=True),
            Int('total_blocks', required=True),
            Int('free_blocks', required=True),
            Int('avail_blocks', required=True),
            Int('files', required=True),
            Int('free_files', required=True),
            Int('name_max', required=True),
            Int('total_bytes', required=True),
            Int('free_bytes', required=True),
            Int('avail_bytes', required=True),
        ))
    def statfs(self, path):
        """
        Return stats from the filesystem of a given path.

        Raises:
            CallError(ENOENT) - Path not found
        """
        try:
            st = os.statvfs(path)
        except FileNotFoundError:
            raise CallError('Path not found.', errno.ENOENT)

        for partition in sorted(psutil.disk_partitions(),
                                key=lambda p: len(p.mountpoint),
                                reverse=True):
            if is_child(os.path.realpath(path), partition.mountpoint):
                break
        else:
            raise CallError('Unable to find mountpoint.')

        return {
            'flags': [],
            'fstype': partition.fstype,
            'source': partition.device,
            'dest': partition.mountpoint,
            'blocksize': st.f_frsize,
            'total_blocks': st.f_blocks,
            'free_blocks': st.f_bfree,
            'avail_blocks': st.f_bavail,
            'files': st.f_files,
            'free_files': st.f_ffree,
            'name_max': st.f_namemax,
            'fsid': [],
            'total_bytes': st.f_blocks * st.f_frsize,
            'free_bytes': st.f_bfree * st.f_frsize,
            'avail_bytes': st.f_bavail * st.f_frsize,
        }

    @accepts(Str('path'))
    @returns(Bool('paths_acl_is_trivial'))
    def acl_is_trivial(self, path):
        """
        Returns True if the ACL can be fully expressed as a file mode without losing
        any access rules.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.
        """
        path = self.resolve_cluster_path(path)
        if not os.path.exists(path):
            raise CallError(f'Path not found [{path}].', errno.ENOENT)

        acl = self.middleware.call_sync('filesystem.getacl', path, True)
        return acl['trivial']
Пример #6
0
class PeriodicSnapshotTaskService(CRUDService):
    class Config:
        datastore = 'storage.task'
        datastore_prefix = 'task_'
        datastore_extend = 'pool.snapshottask.extend'
        datastore_extend_context = 'pool.snapshottask.extend_context'
        namespace = 'pool.snapshottask'

    @private
    async def extend_context(self, extra):
        return {
            'state': await self.middleware.call('zettarepl.get_state'),
            'vmware': await self.middleware.call('vmware.query'),
        }

    @private
    async def extend(self, data, context):
        Cron.convert_db_format_to_schedule(data, begin_end=True)

        data['vmware_sync'] = any((vmware['filesystem'] == data['dataset'] or (
            data['recursive']
            and is_child(vmware['filesystem'], data['dataset'])))
                                  for vmware in context['vmware'])

        if 'error' in context['state']:
            data['state'] = context['state']['error']
        else:
            data['state'] = context['state']['tasks'].get(
                f'periodic_snapshot_task_{data["id"]}', {
                    'state': 'PENDING',
                })

        return data

    @accepts(
        Dict('periodic_snapshot_create',
             Path('dataset', required=True),
             Bool('recursive', required=True),
             List('exclude', items=[Path('item', empty=False)], default=[]),
             Int('lifetime_value', required=True),
             Str('lifetime_unit',
                 enum=['HOUR', 'DAY', 'WEEK', 'MONTH', 'YEAR'],
                 required=True),
             Str('naming_schema',
                 required=True,
                 validators=[ReplicationSnapshotNamingSchema()]),
             Cron('schedule',
                  defaults={
                      'minute': '00',
                      'begin': '00:00',
                      'end': '23:59',
                  },
                  required=True,
                  begin_end=True),
             Bool('allow_empty', default=True),
             Bool('enabled', default=True),
             register=True))
    async def do_create(self, data):
        """
        Create a Periodic Snapshot Task

        Create a Periodic Snapshot Task that will take snapshots of specified `dataset` at specified `schedule`.
        Recursive snapshots can be created if `recursive` flag is enabled. You can `exclude` specific child datasets
        or zvols from the snapshot.
        Snapshots will be automatically destroyed after a certain amount of time, specified by
        `lifetime_value` and `lifetime_unit`.
        If multiple periodic tasks create snapshots at the same time (for example hourly and daily at 00:00) the snapshot
        will be kept until the last of these tasks reaches its expiry time.
        Snapshots will be named according to `naming_schema` which is a `strftime`-like template for snapshot name
        and must contain `%Y`, `%m`, `%d`, `%H` and `%M`.

        .. examples(websocket)::

          Create a recursive Periodic Snapshot Task for dataset `data/work` excluding `data/work/temp`. Snapshots
          will be created on weekdays every hour from 09:00 to 18:00 and will be stored for two weeks.

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "pool.snapshottask.create",
                "params": [{
                    "dataset": "data/work",
                    "recursive": true,
                    "exclude": ["data/work/temp"],
                    "lifetime_value": 2,
                    "lifetime_unit": "WEEK",
                    "naming_schema": "auto_%Y-%m-%d_%H-%M",
                    "schedule": {
                        "minute": "0",
                        "hour": "*",
                        "dom": "*",
                        "month": "*",
                        "dow": "1,2,3,4,5",
                        "begin": "09:00",
                        "end": "18:00"
                    }
                }]
            }
        """

        verrors = ValidationErrors()

        verrors.add_child('periodic_snapshot_create', await
                          self._validate(data))

        if verrors:
            raise verrors

        Cron.convert_schedule_to_db_format(data, begin_end=True)

        data['id'] = await self.middleware.call(
            'datastore.insert', self._config.datastore, data,
            {'prefix': self._config.datastore_prefix})

        await self.middleware.call('zettarepl.update_tasks')

        return await self._get_instance(data['id'])

    @accepts(Int('id', required=True),
             Patch('periodic_snapshot_create', 'periodic_snapshot_update',
                   ('attr', {
                       'update': True
                   })))
    async def do_update(self, id, data):
        """
        Update a Periodic Snapshot Task with specific `id`

        See the documentation for `create` method for information on payload contents

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "pool.snapshottask.update",
                "params": [
                    1,
                    {
                        "dataset": "data/work",
                        "recursive": true,
                        "exclude": ["data/work/temp"],
                        "lifetime_value": 2,
                        "lifetime_unit": "WEEK",
                        "naming_schema": "auto_%Y-%m-%d_%H-%M",
                        "schedule": {
                            "minute": "0",
                            "hour": "*",
                            "dom": "*",
                            "month": "*",
                            "dow": "1,2,3,4,5",
                            "begin": "09:00",
                            "end": "18:00"
                        }
                    }
                ]
            }
        """

        old = await self._get_instance(id)
        new = old.copy()
        new.update(data)

        verrors = ValidationErrors()

        verrors.add_child('periodic_snapshot_update', await
                          self._validate(new))

        if not new['enabled']:
            for replication_task in await self.middleware.call(
                    'replication.query', [['enabled', '=', True]]):
                if any(periodic_snapshot_task['id'] == id
                       for periodic_snapshot_task in
                       replication_task['periodic_snapshot_tasks']):
                    verrors.add('periodic_snapshot_update.enabled', (
                        f'You can\'t disable this periodic snapshot task because it is bound to enabled replication '
                        f'task {replication_task["id"]!r}'))
                    break

        if verrors:
            raise verrors

        Cron.convert_schedule_to_db_format(new, begin_end=True)

        for key in ('vmware_sync', 'state'):
            new.pop(key, None)

        await self.middleware.call('datastore.update', self._config.datastore,
                                   id, new,
                                   {'prefix': self._config.datastore_prefix})

        await self.middleware.call('zettarepl.update_tasks')

        return await self._get_instance(id)

    @accepts(Int('id'))
    async def do_delete(self, id):
        """
        Delete a Periodic Snapshot Task with specific `id`

        .. examples(websocket)::

            :::javascript
            {
                "id": "6841f242-840a-11e6-a437-00e04d680384",
                "msg": "method",
                "method": "pool.snapshottask.delete",
                "params": [
                    1
                ]
            }
        """

        for replication_task in await self.middleware.call(
                'replication.query', [
                    ['direction', '=', 'PUSH'],
                    ['also_include_naming_schema', '=', []],
                    ['enabled', '=', True],
                ]):
            if len(replication_task['periodic_snapshot_tasks']) == 1:
                if replication_task['periodic_snapshot_tasks'][0]['id'] == id:
                    raise CallError(
                        f'You are deleting the last periodic snapshot task bound to enabled replication task '
                        f'{replication_task["name"]!r} which will break it. Please, disable that replication task '
                        f'first.', )

        response = await self.middleware.call('datastore.delete',
                                              self._config.datastore, id)

        await self.middleware.call('zettarepl.update_tasks')

        return response

    @item_method
    @accepts(Int("id"))
    async def run(self, id):
        """
        Execute a Periodic Snapshot Task of `id`.
        """
        task = await self._get_instance(id)

        if not task["enabled"]:
            raise CallError("Task is not enabled")

        await self.middleware.call("zettarepl.run_periodic_snapshot_task",
                                   task["id"])

    async def _validate(self, data):
        verrors = ValidationErrors()

        if data['dataset'] not in (
                await self.middleware.call('pool.filesystem_choices')):
            verrors.add('dataset', 'ZFS dataset or zvol not found')

        if not data['recursive'] and data['exclude']:
            verrors.add(
                'exclude',
                'Excluding datasets or zvols is not necessary for non-recursive periodic snapshot tasks'
            )

        for i, v in enumerate(data['exclude']):
            if not v.startswith(f'{data["dataset"]}/'):
                verrors.add(
                    f'exclude.{i}',
                    'Excluded dataset or zvol should be a child or other descendant of selected dataset'
                )

        return verrors
Пример #7
0
class FilesystemService(Service):
    class Config:
        cli_namespace = 'storage.filesystem'

    @accepts(Str('path'))
    @returns(Bool())
    def is_immutable(self, path):
        """
        Retrieves boolean which is set when immutable flag is set on `path`.
        """
        return chflags.is_immutable_set(path)

    @accepts(Bool('set_flag'), Str('path'))
    @returns()
    def set_immutable(self, set_flag, path):
        """
        Set/Unset immutable flag at `path`.

        `set_flag` when set will set immutable flag and when unset will unset immutable flag at `path`.
        """
        chflags.set_immutable(path, set_flag)

    @accepts(
        Dict(
            'set_dosmode',
            Path('path', required=True),
            Dict('dosmode',
                 Bool('readonly'),
                 Bool('hidden'),
                 Bool('system'),
                 Bool('archive'),
                 Bool('reparse'),
                 Bool('offline'),
                 Bool('sparse'),
                 register=True),
        ))
    @returns()
    def set_dosmode(self, data):
        return dosmode.set_dosflags(data['path'], data['dosmode'])

    @accepts(Str('path'))
    @returns(Ref('dosmode'))
    def get_dosmode(self, path):
        return dosmode.get_dosflags(path)

    @private
    def is_cluster_path(self, path):
        return path.startswith(FuseConfig.FUSE_PATH_SUBST.value)

    @private
    def resolve_cluster_path(self, path, ignore_ctdb=False):
        """
        Convert a "CLUSTER:"-prefixed path to an absolute path
        on the server.
        """
        if not path.startswith(FuseConfig.FUSE_PATH_SUBST.value):
            return path

        gluster_volume = path[8:].split("/")[0]
        if gluster_volume == CTDBConfig.CTDB_VOL_NAME.value and not ignore_ctdb:
            raise CallError('access to ctdb volume is not permitted.',
                            errno.EPERM)
        elif not gluster_volume:
            raise CallError(
                f'More than the prefix "{FuseConfig.FUSE_PATH_SUBST.value}" must be provided'
            )

        is_mounted = self.middleware.call_sync('gluster.fuse.is_mounted',
                                               {'name': gluster_volume})
        if not is_mounted:
            raise CallError(
                f'{gluster_volume}: cluster volume is not mounted.',
                errno.ENXIO)

        cluster_path = path.replace(FuseConfig.FUSE_PATH_SUBST.value,
                                    f'{FuseConfig.FUSE_PATH_BASE.value}/')
        return cluster_path

    @accepts(Str('path'))
    @returns(Ref('path_entry'))
    def mkdir(self, path):
        """
        Create a directory at the specified path.
        """
        path = self.resolve_cluster_path(path)
        is_clustered = path.startswith("/cluster")

        p = pathlib.Path(path)
        if not p.is_absolute():
            raise CallError(f'{path}: not an absolute path.', errno.EINVAL)

        if p.exists():
            raise CallError(f'{path}: path already exists.', errno.EEXIST)

        realpath = os.path.realpath(path)
        if not is_clustered and not realpath.startswith('/mnt/'):
            raise CallError(f'{path}: path not permitted', errno.EPERM)

        os.mkdir(path)
        stat = p.stat()
        data = {
            'name': p.parts[-1],
            'path': path,
            'realpath': realpath,
            'type': 'DIRECTORY',
            'size': stat.st_size,
            'mode': stat.st_mode,
            'acl': False if self.acl_is_trivial(path) else True,
            'uid': stat.st_uid,
            'gid': stat.st_gid,
        }

        return data

    @private
    def stat_entry_impl(self, entry, options=None):
        out = {'st': None, 'etype': None, 'is_ctldir': False}
        opts = options or {"dir_only": False, "file_only": False}
        path = entry.absolute()

        try:
            out['st'] = entry.lstat()
        except FileNotFoundError:
            return None

        if statlib.S_ISDIR(out['st'].st_mode):
            out['etype'] = 'DIRECTORY'

        elif statlib.S_ISLNK(out['st'].st_mode):
            out['etype'] = 'SYMLINK'
            try:
                out['st'] = entry.stat()
            except FileNotFoundError:
                return None

        elif statlib.S_ISREG(out['st'].st_mode):
            out['etype'] = 'FILE'

        else:
            out['etype'] = 'OTHER'

        while str(path) != '/':
            if not path.name == '.zfs':
                path = path.parent
                continue

            if path.stat().st_ino == ZFSCTL.INO_ROOT:
                out['is_ctldir'] = True
                break

            path = path.parent

        if opts['dir_only'] and out['etype'] != 'DIRECTORY':
            return None

        elif opts['file_only'] and out['etype'] != 'FILE':
            return None

        return out

    @accepts(Str('path', required=True), Ref('query-filters'),
             Ref('query-options'))
    @filterable_returns(
        Dict('path_entry',
             Str('name', required=True),
             Path('path', required=True),
             Path('realpath', required=True),
             Str('type',
                 required=True,
                 enum=['DIRECTORY', 'FILE', 'SYMLINK', 'OTHER']),
             Int('size', required=True, null=True),
             Int('mode', required=True, null=True),
             Bool('acl', required=True, null=True),
             Int('uid', required=True, null=True),
             Int('gid', required=True, null=True),
             Bool('is_mountpoint', required=True),
             Bool('is_ctldir', required=True),
             register=True))
    def listdir(self, path, filters, options):
        """
        Get the contents of a directory.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.

        Each entry of the list consists of:
          name(str): name of the file
          path(str): absolute path of the entry
          realpath(str): absolute real path of the entry (if SYMLINK)
          type(str): DIRECTORY | FILE | SYMLINK | OTHER
          size(int): size of the entry
          mode(int): file mode/permission
          uid(int): user id of entry owner
          gid(int): group id of entry onwer
          acl(bool): extended ACL is present on file
          is_mountpoint(bool): path is a mountpoint
          is_ctldir(bool): path is within special .zfs directory
        """

        path = self.resolve_cluster_path(path)
        path = pathlib.Path(path)
        if not path.exists():
            raise CallError(f'Directory {path} does not exist', errno.ENOENT)

        if not path.is_dir():
            raise CallError(f'Path {path} is not a directory', errno.ENOTDIR)

        if 'ix-applications' in path.parts:
            raise CallError(
                'Ix-applications is a system managed dataset and its contents cannot be listed'
            )

        stat_opts = {"file_only": False, "dir_only": False}
        for filter in filters:
            if filter[0] not in ['type']:
                continue

            if filter[1] != '=' and filter[2] not in ['DIRECTORY', 'FILE']:
                continue

            if filter[2] == 'DIRECTORY':
                stat_opts["dir_only"] = True
            else:
                stat_opts["file_only"] = True

        rv = []
        if stat_opts["dir_only"] and stat_opts["file_only"]:
            return rv

        only_top_level = path.absolute() == pathlib.Path('/mnt')
        for entry in path.iterdir():
            st = self.stat_entry_impl(entry, stat_opts)
            if st is None:
                continue

            if only_top_level and not entry.is_mount():
                # sometimes (on failures) the top-level directory
                # where the zpool is mounted does not get removed
                # after the zpool is exported. WebUI calls this
                # specifying `/mnt` as the path. This is used when
                # configuring shares in the "Path" drop-down. To
                # prevent shares from being configured to point to
                # a path that doesn't exist on a zpool, we'll
                # filter these here.
                continue
            if 'ix-applications' in entry.parts:
                continue

            etype = st['etype']
            stat = st['st']
            realpath = entry.resolve().as_posix(
            ) if etype == 'SYMLINK' else entry.absolute().as_posix()

            data = {
                'name':
                entry.name,
                'path':
                entry.as_posix().replace(f'{FuseConfig.FUSE_PATH_BASE.value}/',
                                         FuseConfig.FUSE_PATH_SUBST.value),
                'realpath':
                realpath,
                'type':
                etype,
                'size':
                stat.st_size,
                'mode':
                stat.st_mode,
                'acl':
                False if self.acl_is_trivial(realpath) else True,
                'uid':
                stat.st_uid,
                'gid':
                stat.st_gid,
                'is_mountpoint':
                entry.is_mount(),
                'is_ctldir':
                st['is_ctldir'],
            }

            rv.append(data)

        return filter_list(rv, filters=filters or [], options=options or {})

    @accepts(Str('path'))
    @returns(
        Dict(
            'path_stats',
            Str('realpath', required=True),
            Int('size', required=True),
            Int('mode', required=True),
            Int('uid', required=True),
            Int('gid', required=True),
            Float('atime', required=True),
            Float('mtime', required=True),
            Float('ctime', required=True),
            Int('dev', required=True),
            Int('inode', required=True),
            Int('nlink', required=True),
            Bool('is_mountpoint', required=True),
            Bool('is_ctldir', required=True),
            Str('user', null=True, required=True),
            Str('group', null=True, required=True),
            Bool('acl', required=True),
        ))
    def stat(self, _path):
        """
        Return the filesystem stat(2) for a given `path`.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.
        """
        path = pathlib.Path(self.resolve_cluster_path(_path))
        if not path.is_absolute():
            raise CallError(f'{_path}: path must be absolute', errno.EINVAL)

        st = self.stat_entry_impl(path, None)
        if st is None:
            raise CallError(f'Path {_path} not found', errno.ENOENT)

        realpath = path.resolve().as_posix(
        ) if st['etype'] == 'SYMLINK' else path.absolute().as_posix()

        stat = {
            'realpath': realpath,
            'type': st['etype'],
            'size': st['st'].st_size,
            'mode': st['st'].st_mode,
            'uid': st['st'].st_uid,
            'gid': st['st'].st_gid,
            'atime': st['st'].st_atime,
            'mtime': st['st'].st_mtime,
            'ctime': st['st'].st_ctime,
            'dev': st['st'].st_dev,
            'inode': st['st'].st_ino,
            'nlink': st['st'].st_nlink,
            'is_mountpoint': path.is_mount(),
            'is_ctldir': st['is_ctldir'],
        }

        try:
            stat['user'] = pwd.getpwuid(stat['uid']).pw_name
        except KeyError:
            stat['user'] = None

        try:
            stat['group'] = grp.getgrgid(stat['gid']).gr_name
        except KeyError:
            stat['group'] = None

        stat['acl'] = False if self.acl_is_trivial(_path) else True

        return stat

    @private
    @accepts(
        Str('path'),
        Str('content', max_length=2048000),
        Dict(
            'options',
            Bool('append', default=False),
            Int('mode'),
        ),
    )
    def file_receive(self, path, content, options):
        """
        Simplified file receiving method for small files.

        `content` must be a base 64 encoded file content.
        """
        dirname = os.path.dirname(path)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        if options.get('append'):
            openmode = 'ab'
        else:
            openmode = 'wb+'
        with open(path, openmode) as f:
            f.write(binascii.a2b_base64(content))
        mode = options.get('mode')
        if mode:
            os.chmod(path, mode)
        if path == PWENC_FILE_SECRET:
            self.middleware.call_sync('pwenc.reset_secret_cache')
        return True

    @private
    @accepts(
        Str('path'),
        Dict(
            'options',
            Int('offset'),
            Int('maxlen'),
        ),
    )
    def file_get_contents(self, path, options):
        """
        Get contents of a file `path` in base64 encode.

        DISCLAIMER: DO NOT USE THIS FOR BIG FILES (> 500KB).
        """
        if not os.path.exists(path):
            return None
        with open(path, 'rb') as f:
            if options.get('offset'):
                f.seek(options['offset'])
            data = binascii.b2a_base64(f.read(
                options.get('maxlen'))).decode().strip()
        return data

    @accepts(Str('path'))
    @returns()
    @job(pipes=["output"])
    async def get(self, job, path):
        """
        Job to get contents of `path`.
        """

        if not os.path.isfile(path):
            raise CallError(f'{path} is not a file')

        with open(path, 'rb') as f:
            await self.middleware.run_in_thread(shutil.copyfileobj, f,
                                                job.pipes.output.w)

    @accepts(
        Str('path'),
        Dict(
            'options',
            Bool('append', default=False),
            Int('mode'),
        ),
    )
    @returns(Bool('successful_put'))
    @job(pipes=["input"])
    async def put(self, job, path, options):
        """
        Job to put contents to `path`.
        """
        dirname = os.path.dirname(path)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        if options.get('append'):
            openmode = 'ab'
        else:
            openmode = 'wb+'

        with open(path, openmode) as f:
            await self.middleware.run_in_thread(shutil.copyfileobj,
                                                job.pipes.input.r, f)

        mode = options.get('mode')
        if mode:
            os.chmod(path, mode)
        return True

    @accepts(Str('path'))
    @returns(
        Dict(
            'path_statfs',
            List('flags', required=True),
            List('fsid', required=True),
            Str('fstype', required=True),
            Str('source', required=True),
            Str('dest', required=True),
            Int('blocksize', required=True),
            Int('total_blocks', required=True),
            Int('free_blocks', required=True),
            Int('avail_blocks', required=True),
            Int('files', required=True),
            Int('free_files', required=True),
            Int('name_max', required=True),
            Int('total_bytes', required=True),
            Int('free_bytes', required=True),
            Int('avail_bytes', required=True),
        ))
    def statfs(self, path):
        """
        Return stats from the filesystem of a given path.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.

        Raises:
            CallError(ENOENT) - Path not found
        """
        # check to see if this is a clustered path and if it is
        # resolve it to an absolute path
        # NOTE: this converts path prefixed with 'CLUSTER:' to '/cluster/...'
        path = self.resolve_cluster_path(path, ignore_ctdb=True)

        allowed_prefixes = ('/mnt/', FuseConfig.FUSE_PATH_BASE.value)
        if not path.startswith(allowed_prefixes):
            # if path doesn't start with '/mnt/' bail early
            raise CallError(
                f'Path must start with {" or ".join(allowed_prefixes)}')
        elif path == '/mnt/':
            # means the path given to us was a literal '/mnt/' which is incorrect.
            # NOTE: if the user provided 'CLUSTER:' as the literal path then
            # self.resolve_cluster_path() will raise a similar error
            raise CallError('Path must include more than "/mnt/"')

        try:
            st = os.statvfs(path)
        except FileNotFoundError:
            raise CallError('Path not found.', errno.ENOENT)

        # get the closest mountpoint to the path provided
        mountpoint = pathlib.Path(path)
        while not mountpoint.is_mount():
            mountpoint = mountpoint.parent.absolute()

        # strip the `/mnt/` or `/cluster/` prefix from the mountpoint
        device = mountpoint.as_posix().removeprefix('/mnt/')
        device = device.removeprefix('/cluster/')

        # get fstype for given path based on major:minor entry in mountinfo
        stx = stat_x.statx(path)
        maj_min = f'{stx.stx_dev_major}:{stx.stx_dev_minor}'
        fstype = None
        flags = []
        with open('/proc/self/mountinfo') as f:
            # example lines look like this. We use `find()` to keep the `.split()` calls to only 2 (instead of 3)
            # (minor optimization, but still one nonetheless)
            # "26 1 0:23 / / rw,relatime shared:1 - zfs boot-pool/ROOT/22.02-MASTER-20211129-015838 rw,xattr,posixacl"
            # OR
            # "129 26 0:50 / /mnt/data rw,noatime shared:72 - zfs data rw,xattr,posixacl"
            for line in f:
                if line.find(maj_min) != -1:
                    fstype = line.rsplit(' - ')[1].split()[0]
                    """
                    Following gets mount flags. For filesystems, there are two
                    varieties. First are flags returned by statfs(2) on Linux which
                    are defined in manpage. These are located in middle of mountinfo line.
                    The second info group is at end of mountinfo string and contains
                    superblock info returned from FS. We attempt to consilidate this
                    disparate info here.
                    """
                    unsorted_info, mount_flags = line.rsplit(' ', 1)
                    flags = mount_flags.strip().upper().split(',')

                    offset = unsorted_info.find(flags[0].lower())
                    other_flags = unsorted_info[offset:].split()[0]

                    for f in other_flags.split(','):
                        flag = f.upper()
                        if flag in flags:
                            continue

                        flags.append(flag)
                    break

        return {
            'flags': flags,
            'fstype': fstype,
            'source': device,
            'dest': mountpoint.as_posix(),
            'blocksize': st.f_frsize,
            'total_blocks': st.f_blocks,
            'free_blocks': st.f_bfree,
            'avail_blocks': st.f_bavail,
            'files': st.f_files,
            'free_files': st.f_ffree,
            'name_max': st.f_namemax,
            'fsid': [],
            'total_bytes': st.f_blocks * st.f_frsize,
            'free_bytes': st.f_bfree * st.f_frsize,
            'avail_bytes': st.f_bavail * st.f_frsize,
        }

    @accepts(Str('path'))
    @returns(Bool('paths_acl_is_trivial'))
    def acl_is_trivial(self, path):
        """
        Returns True if the ACL can be fully expressed as a file mode without losing
        any access rules.

        Paths on clustered volumes may be specifed with the path prefix
        `CLUSTER:<volume name>`. For example, to list directories
        in the directory 'data' in the clustered volume `smb01`, the
        path should be specified as `CLUSTER:smb01/data`.
        """
        path = self.resolve_cluster_path(path)
        if not os.path.exists(path):
            raise CallError(f'Path not found [{path}].', errno.ENOENT)

        acl_xattrs = ACLType.xattr_names()
        xattrs_present = set(os.listxattr(path))

        return False if (xattrs_present & acl_xattrs) else True