Пример #1
    def _refresh_rt(self):
        # for incremental sends, the receiver tells us the latest successful
        # snapshot on it. This should match self.rt in most cases. Sometimes,
        # it may not be the one refered by self.rt(latest) but a previous one.
        # We need to make sure to *only* send the incremental send that
        # receiver expects.
        self.msg = ('Failed to validate/refresh ReplicaTrail.')
        if (self.rlatest_snap is None):
            # Validate/update self.rt to the one that has the expected Snapshot
            # on the system.
            for rt in ReplicaTrail.objects.filter(
                    replica=self.replica, status='succeeded').order_by('-id'):
                snap_path = ('%s%s/.snapshots/%s/%s'
                             % (settings.MNT_PT, self.replica.pool,
                                self.replica.share, self.rt.snap_name))
                if (is_subvol(snap_path)):
                    return rt
            # Snapshots from previous succeeded ReplicaTrails don't actually
            # exist on the system. So we send a Full replication instead of
            # incremental.
            return None

        if (len(self.rlatest_snap) == 0):
            # Receiver sends empty string when it fails to reply back to an
            # incremental send request with an appropriate parent snapshot
            # name.
            return None

        if (self.rt.snap_name != self.rlatest_snap):
            self.msg = ('Mismatch on starting snapshot for '
                        'btrfs-send. Sender picked %s but Receiver wants '
                        '%s, which takes precedence.'
                        % (self.rt.snap_name, self.rlatest_snap))
            for rt in ReplicaTrail.objects.filter(
                    replica=self.replica, status='succeeded').order_by('-id'):
                if (rt.snap_name == self.rlatest_snap):
                    self.msg = ('%s. successful trail found for %s'
                                % (self.msg, self.rlatest_snap))
                    snap_path = ('%s%s/.snapshots/%s/%s'
                                 % (settings.MNT_PT, self.replica.pool,
                                    self.replica.share, self.rlatest_snap))
                    if (is_subvol(snap_path)):
                        self.msg = ('Snapshot(%s) exists in the system and '
                                    'will be used as the parent' % snap_path)
                        logger.debug('Id: %s. %s' % (self.identity, self.msg))
                        return rt
                    self.msg = ('Snapshot(%s) does not exist on the system. '
                                'So cannot use it.' % snap_path)
                    raise Exception(self.msg)
            raise Exception('%s. No succeeded trail found for %s.'
                            % (self.msg, self.rlatest_snap))

        snap_path = ('%s%s/.snapshots/%s/%s'
                     % (settings.MNT_PT, self.replica.pool,
                        self.replica.share, self.rlatest_snap))
        if (is_subvol(snap_path)):
            return self.rt
        raise Exception('Parent Snapshot(%s) to use in btrfs-send does not '
                        'exist in the system.' % snap_path)
Пример #5
 def _latest_snap(self, rso):
     for snap in ReceiveTrail.objects.filter(
             rshare=rso, status='succeeded').order_by('-id'):
         if (is_subvol('%s/%s' % (self.snap_dir, snap.snap_name))):
             return str(snap.snap_name)  # cannot be unicode for zmq message
     logger.error('Id: %s. There are no replication snapshots on the '
                  'system for '
                  'Share(%s).' % (self.identity, rso.share))
     # This would mean, a full backup transfer is required.
     return None
Пример #6
 def test_is_subvol_exists(self):
     mount_point = '/mnt2/test-pool/test-share'
     o = ['/mnt2/test-pool/test-share', '\tName: \t\t\ttest-share',
          '\tUUID: \t\t\t80c240a2-c353-7540-bb5e-b6a71a50a02e',
          '\tParent UUID: \t\t-', '\tReceived UUID: \t\t-',
          '\tCreation time: \t\t2016-07-27 17:01:09 +0100',
          '\tSubvolume ID: \t\t258', '\tGeneration: \t\t13',
          '\tGen at creation: \t13', '\tParent ID: \t\t5',
          '\tTop level ID: \t\t5', '\tFlags: \t\t\t-', '\tSnapshot(s):', '']
     e = ['']
     rc = 0
     # btrfs subvol show has return code of 0 (no errors) when subvol exists
     self.mock_run_command.return_value = (o, e, rc)
                     msg='Did NOT return True for existing subvol')
Пример #8
def create_repclone(share, request, logger, snapshot):
    Variant of create_clone but where the share already exists and is to be
    supplanted by a snapshot which is effectively moved into the shares prior
    position, both in the db and on the file system. This is achieved thus:
    Unmount target share - (via remove_share()).
    Btrfs subvol delete target share (via remove_share()).
    Remove prior target share mount point (dir).
    Move snap source to target share's former location (becomes share on disk).
    Update existing target share db entry with source snap's qgroup / usage.
    Remove source snap's db entry: updated share db entry makes it redundant.
    Remount share (which now represents the prior snap's subvol relocated).
    :param share: Share object to be supplanted
    :param request:
    :param logger: Logger object to reference
    :param snapshot: Source snapshot/quirk share object to supplant target.
    :return: response of serialized share (in it's updated form)
        logger.info("Supplanting share ({}) with "
                    "snapshot ({}).".format(share.name, snapshot.name))
        # We first strip our snapshot.name of any path as when we encounter the
        # initially created receive subvol it is identified as a share with a
        # snapshots location as it's subvol name (current quirk of import sys).
        # E.g. first receive subvol/share-in-snapdir name example:
        # ".snapshots/C583C37F-...1712B_sharename/sharename_19_replication_1".
        # Subsequent more regular snapshots (in db as such) are named thus:
        # "sharename_19_replication_2" or "sharename_19_replication_2" and on.
        # The 19 in the above names is the generation of the replication task.
        # Normalise source name across initial quirk share & subsequent snaps.
        source_name = snapshot.name.split("/")[-1]
        # Note in the above we have to use Object.name for polymorphism, but
        # our share is passed by it's subvol (potential fragility point).
        snap_path = "{}/.snapshots/{}/{}".format(share.pool.mnt_pt, share.name,
                                                     "//", "/")
        # e.g. for above: /mnt2/poolname/.snapshots/sharename/snapname
        # or /.snapshots/sharename/snapname for system pool shares
        share_path = ("{}/{}".format(share.pool.mnt_pt,
                                     share.name)).replace("//", "/")
        # e.g. for above: /mnt2/poolname/sharename or /sharename for system pool shares
        # Passed db snap assured by caller but this does not guarantee on disk.
        if not is_subvol(snap_path):
            raise Exception("Subvol with path ({}) does not exist. Aborting "
                            "replacement of share with path ({}).".format(
                                snap_path, share_path))
        # unmounts and then subvol deletes our on disk share
        remove_share(share.pool, share.name, PQGROUP_DEFAULT)
        # Remove read only flag on our snapshot subvol
        set_property(snap_path, "ro", "false", mount=False)
        # Ensure removed share path is clean, ie remove mount point.
        run_command(["/usr/bin/rm", "-rf", share_path], throw=False)
        # Now move snapshot to prior shares location. Given both a share and
        # a snapshot are subvols, we effectively promote the snap to a share.
            "Moving snapshot ({}) to prior share's pool location ({})".format(
                snap_path, share_path))
        shutil.move(snap_path, share_path)
        # This should have re-established our just removed subvol.
        # Supplant share db info with snap info to reflect new on disk state.
        share.qgroup = snapshot.qgroup
        share.rusage = snapshot.rusage
        share.eusage = snapshot.eusage
        # delete our now redundant snapshot/quirky share db entry
        # update our share's quota
        update_quota(share.pool, share.pqgroup, share.size * 1024)
        # mount our newly supplanted share
        # We independently mount all shares, data pool or system pool, in /mnt2/name
        mnt_pt = "{}{}".format(settings.MNT_PT, share.name)
        mount_share(share, mnt_pt)
        return Response(ShareSerializer(share).data)
    except Exception as e:
        handle_exception(e, request)
Пример #10
def create_repclone(share, request, logger, snapshot):
    Variant of create_clone but where the share already exists and is to be
    supplanted by a snapshot which is effectively moved into the shares prior
    position, both in the db and on the file system. This is achieved thus:
    Unmount target share - (via remove_share()).
    Btrfs subvol delete target share (via remove_share()).
    Remove prior target share mount point (dir).
    Move snap source to target share's former location (becomes share on disk).
    Update existing target share db entry with source snap's qgroup / usage.
    Remove source snap's db entry: updated share db entry makes it redundant.
    Remount share (which now represents the prior snap's subvol relocated).
    :param share: Share object to be supplanted
    :param request:
    :param logger: Logger object to reference
    :param snapshot: Source snapshot/quirk share object to supplant target.
    :return: response of serialized share (in it's updated form)
        logger.info('Supplanting share ({}) with '
                     'snapshot ({}).'.format(share.name, snapshot.name))
        # We first strip our snapshot.name of any path as when we encounter the
        # initially created receive subvol it is identified as a share with a
        # snapshots location as it's subvol name (current quirk of import sys).
        # E.g. first receive subvol/share-in-snapdir name example:
        # ".snapshots/C583C37F-...1712B_sharename/sharename_19_replication_1".
        # Subsequent more regular snapshots (in db as such) are named thus:
        # "sharename_19_replication_2" or "sharename_19_replication_2" and on.
        # The 19 in the above names is the generation of the replication task.
        # Normalise source name across initial quirk share & subsequent snaps.
        source_name = snapshot.name.split('/')[-1]
        # Note in the above we have to use Object.name for polymorphism, but
        # our share is passed by it's subvol (potential fragility point).
        snap_path = '{}{}/.snapshots/{}/{}'.format(settings.MNT_PT,
                                                   share.pool.name, share.name,
        # eg /mnt2/poolname/.snapshots/sharename/snapname
        share_path = ('{}{}/{}'.format(settings.MNT_PT, share.pool.name,
        # eg /mnt2/poolname/sharename
        # Passed db snap assured by caller but this does not guarantee on disk.
        if not is_subvol(snap_path):
            raise Exception('Subvol with path ({}) does not exist. Aborting '
                            'replacement of share ({}).'.format(snap_path,
        # unmounts and then subvol deletes our on disk share
        remove_share(share.pool, share.name, PQGROUP_DEFAULT)
        # Remove read only flag on our snapshot subvol
        set_property(snap_path, 'ro', 'false', mount=False)
        # Ensure removed share path is clean, ie remove mount point.
        run_command(['/usr/bin/rm', '-rf', share_path], throw=False)
        # Now move snapshot to prior shares location. Given both a share and
        # a snapshot are subvols, we effectively promote the snap to a share.
        shutil.move(snap_path, share_path)
        # This should have re-established our just removed subvol.
        # Supplant share db info with snap info to reflect new on disk state.
        share.qgroup = snapshot.qgroup
        share.rusage = snapshot.rusage
        share.eusage = snapshot.eusage
        # delete our now redundant snapshot/quirky share db entry
        # update our share's quota
        update_quota(share.pool, share.pqgroup, share.size * 1024)
        # mount our newly supplanted share
        mnt_pt = '{}{}'.format(settings.MNT_PT, share.name)
        mount_share(share, mnt_pt)
        return Response(ShareSerializer(share).data)
    except Exception as e:
        handle_exception(e, request)
Пример #12
    def run(self):
        logger.debug('Id: %s. Starting a new Receiver for meta: %s' %
                     (self.identity, self.meta))
        self.msg = ('Top level exception in receiver')
        latest_snap = None
        with self._clean_exit_handler():
            self.law = APIWrapper()
            self.poll = zmq.Poller()
            self.dealer = self.ctx.socket(zmq.DEALER)
            self.dealer.setsockopt_string(zmq.IDENTITY, u'%s' % self.identity)
            self.dealer.connect('ipc://%s' %
            self.poll.register(self.dealer, zmq.POLLIN)

            self.ack = True
            self.msg = ('Failed to get the sender ip for appliance: %s' %
            self.sender_ip = Appliance.objects.get(uuid=self.sender_id).ip

            if (not self.incremental):
                self.msg = ('Failed to verify/create share: %s.' % self.sname)
                self.create_share(self.sname, self.dest_pool)

                self.msg = ('Failed to create the replica metadata object '
                            'for share: %s.' % self.sname)
                data = {
                    'share': self.sname,
                    'appliance': self.sender_ip,
                    'src_share': self.src_share,
                self.rid = self.create_rshare(data)
                self.msg = ('Failed to retreive the replica metadata '
                            'object for share: %s.' % self.sname)
                rso = ReplicaShare.objects.get(share=self.sname)
                self.rid = rso.id
                # Find and send the current snapshot to the sender. This will
                # be used as the start by btrfs-send diff.
                self.msg = ('Failed to verify latest replication snapshot '
                            'on the system.')
                latest_snap = self._latest_snap(rso)

            self.msg = ('Failed to create receive trail for rid: %d' %
            data = {
                'snap_name': self.snap_name,
            self.rtid = self.create_receive_trail(self.rid, data)

            # delete the share, move the oldest snap to share
            self.msg = ('Failed to promote the oldest Snapshot to Share.')
            oldest_snap = get_oldest_snap(self.snap_dir,
            if (oldest_snap is not None):
                self.update_repclone(self.sname, oldest_snap)

            self.msg = ('Failed to prune old Snapshots')
            self._delete_old_snaps(self.sname, self.snap_dir,
                                   self.num_retain_snaps + 1)

            # TODO: The following should be re-instantiated once we have a
            # TODO: working method for doing so. see validate_src_share.
            # self.msg = ('Failed to validate the source share(%s) on '
            #             'sender(uuid: %s '
            #             ') Did the ip of the sender change?' %
            #             (self.src_share, self.sender_id))
            # self.validate_src_share(self.sender_id, self.src_share)

            sub_vol = ('%s%s/%s' %
                       (settings.MNT_PT, self.dest_pool, self.sname))
            if (not is_subvol(sub_vol)):
                self.msg = ('Failed to create parent subvolume %s' % sub_vol)
                run_command([BTRFS, 'subvolume', 'create', sub_vol])

            self.msg = ('Failed to create snapshot directory: %s' %
            run_command(['/usr/bin/mkdir', '-p', self.snap_dir])
            snap_fp = ('%s/%s' % (self.snap_dir, self.snap_name))

            # If the snapshot already exists, presumably from the previous
            # attempt and the sender tries to send the same, reply back with
            # snap_exists and do not start the btrfs-receive
            if (is_subvol(snap_fp)):
                logger.debug('Id: %s. Snapshot to be sent(%s) already '
                             'exists. Not starting a new receive process' %
                             (self.identity, snap_fp))

            cmd = [BTRFS, 'receive', self.snap_dir]
            self.msg = ('Failed to start the low level btrfs receive '
                        'command(%s). Aborting.' % cmd)
            self.rp = subprocess.Popen(cmd,

            self.msg = ('Failed to send receiver-ready')
            rcommand, rmsg = self._send_recv('receiver-ready', latest_snap
                                             or '')
            if (rcommand is None):
                logger.error('Id: %s. No response from the broker for '
                             'receiver-ready command. Aborting.' %

            term_commands = (
            num_tries = 10
            poll_interval = 6000  # 6 seconds
            num_msgs = 0
            t0 = time.time()
            while (True):
                socks = dict(self.poll.poll(poll_interval))
                if (socks.get(self.dealer) == zmq.POLLIN):
                    # reset to wait upto 60(poll_interval x num_tries
                    # milliseconds) for every message
                    num_tries = 10
                    command, message = self.dealer.recv_multipart()
                    if (command == 'btrfs-send-stream-finished'):
                        # this command concludes fsdata transfer. After this,
                        # btrfs-recev process should be
                        # terminated(.communicate).
                        if (self.rp.poll() is None):
                            self.msg = ('Failed to terminate btrfs-recv '
                            out, err = self.rp.communicate()
                            out = out.split('\n')
                            err = err.split('\n')
                            logger.debug('Id: %s. Terminated btrfs-recv. '
                                         'cmd = %s out = %s err: %s rc: %s' %
                                         (self.identity, cmd, out, err,
                        if (self.rp.returncode != 0):
                            self.msg = ('btrfs-recv exited with unexpected '
                                        'exitcode(%s). ' % self.rp.returncode)
                            raise Exception(self.msg)
                        data = {
                            'status': 'succeeded',
                            'kb_received': self.total_bytes_received / 1024,
                        self.msg = ('Failed to update receive trail for '
                                    'rtid: %d' % self.rtid)
                        self.update_receive_trail(self.rtid, data)


                        dsize, drate = self.size_report(
                            self.total_bytes_received, t0)
                        logger.debug('Id: %s. Receive complete. Total data '
                                     'transferred: %s. Rate: %s/sec.' %
                                     (self.identity, dsize, drate))

                    if (command in term_commands):
                        self.msg = ('Terminal command(%s) received from the '
                                    'sender. Aborting.' % command)
                        raise Exception(self.msg)

                    if (self.rp.poll() is None):
                        # @todo: implement advanced credit request system.
                        self.dealer.send_multipart([b'send-more', ''])
                        num_msgs += 1
                        self.total_bytes_received += len(message)
                        if (num_msgs == 1000):
                            num_msgs = 0
                            data = {
                                'status': 'pending',
                                self.total_bytes_received / 1024,
                            self.update_receive_trail(self.rtid, data)

                            dsize, drate = self.size_report(
                                self.total_bytes_received, t0)
                            logger.debug('Id: %s. Receiver alive. Data '
                                         'transferred: %s. Rate: %s/sec.' %
                                         (self.identity, dsize, drate))
                        out, err = self.rp.communicate()
                        out = out.split('\n')
                        err = err.split('\n')
                        logger.error('Id: %s. btrfs-recv died unexpectedly. '
                                     'cmd: %s out: %s. err: %s' %
                                     (self.identity, cmd, out, err))
                        msg = (
                            'Low level system error from btrfs receive '
                            'command. cmd: %s out: %s err: %s for rtid: %s' %
                            (cmd, out, err, self.rtid))
                        data = {
                            'status': 'failed',
                            'error': msg,
                        self.msg = ('Failed to update receive trail for '
                                    'rtid: %d.' % self.rtid)
                        self.update_receive_trail(self.rtid, data)
                        self.msg = msg
                        raise Exception(self.msg)
                    num_tries -= 1
                    msg = ('No response received from the broker. '
                           'remaining tries: %d' % num_tries)
                    logger.error('Id: %s. %s' % (self.identity, msg))
                    if (num_tries == 0):
                        self.msg = ('%s. Terminating the receiver.' % msg)
                        raise Exception(self.msg)
