Пример #1
0
    def start_instances(self, request, queryset):
        """
        Start all transmitted PostgreSQL instances

        This function assumes we're running against a bunch of Debian-based
        systems so we can use pg_ctlcluster. Thus far, that's the case. Later
        revisions may change that assumption.

        Skip already running services.
        """

        for inst in queryset:
            if inst.is_online:
                self.message_user(request, "%s is already running." % inst,
                    messages.WARNING
                )
                continue

            try:
                util = PGUtility(inst)
                util.start()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                continue

            self.message_user(request, "%s started!" % inst)
Пример #2
0
    def save_model(self, request, obj, form, change):
        """
        Automatically detect/populate several fields before saving instance

        Since we're defining what is (hopefully) an existing structure,
        we should be able to auto-detect several elements from the database
        itself. There can also a backend monitor on each server that will
        keep these values updated, but bootstrapping is always best.

        Autodetected fields:
          * is_online
          * master
          * version
        """

        # First, check the online status. We want this to be as fresh as
        # possible, so we might as well grab it now.

        obj.is_online = False

        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        check = sock.connect_ex((obj.server.hostname, obj.herd.db_port))

        if check == 0:
            obj.is_online = True

        # Then, since herds are organized such that each herd follows a single
        # primary node, we can auto-declare that this is a replica or not.
        # If we search and find a primary for this herd, that instance will
        # become our master.

        util = PGUtility(obj)
        obj.master = util.get_herd_primary()
        obj.version = util.get_version()

        if obj.master and not obj.version:
            obj.version = obj.master.version

        # Save now that we've hijacked everything.

        obj.save()

        # Attempt to initialize the instance. This only works if the instance
        # doesn't already exist. It's also optional, so don't derail the save
        # just because it didn't fully work.

        try:
            util.init_missing()
        except Exception, e:
            self.message_user(request, "Instance init: %s" % str(e),
                messages.WARNING
            )
Пример #3
0
    def reload_instances(self, request, queryset):
        """
        Reload all transmitted PostgreSQL instances

        This is provided as a way of reloading configuration files.
        """

        for inst in queryset:
            try:
                util = PGUtility(inst)
                util.reload()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                continue

            self.message_user(request, "%s config files reloaded!" % inst)
Пример #4
0
    def reload_instances(self, request, queryset):
        """
        Reload all transmitted PostgreSQL instances

        This is provided as a way of reloading configuration files.
        """

        for inst in queryset:
            try:
                util = PGUtility(inst)
                util.reload()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                continue

            self.message_user(request, "%s config files reloaded!" % inst)
Пример #5
0
    def promote_instances(self, request, queryset):
        """
        Promote transmitted PostgreSQL replication instances to master state
        """

        if request.POST.get('post') == 'yes':

            for inst_id in request.POST.getlist(admin.ACTION_CHECKBOX_NAME):
                inst = Instance.objects.get(pk=inst_id)

                try:
                    util = PGUtility(inst)
                    util.promote()

                except Exception, e:
                    self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                    continue

                self.message_user(request, "%s promoted to read/write!" % inst)
            return
Пример #6
0
    def restart_instances(self, request, queryset):
        """
        Restart all transmitted PostgreSQL instances

        Basicaly we just call for a fast stop followed by a start. Nothing
        complicated here. Unlike stop, we don't skip stopped instances, and
        unline start, we don't skip running ones.
        """

        for inst in queryset:
            try:
                util = PGUtility(inst)
                util.stop()
                util.start()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                continue

            self.message_user(request, "%s restarted!" % inst)
Пример #7
0
    def promote_instances(self, request, queryset):
        """
        Promote transmitted PostgreSQL replication instances to master state
        """

        if request.POST.get('post') == 'yes':

            for inst_id in request.POST.getlist(admin.ACTION_CHECKBOX_NAME):
                inst = Instance.objects.get(pk=inst_id)

                try:
                    util = PGUtility(inst)
                    util.promote()

                except Exception, e:
                    self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                    continue

                self.message_user(request, "%s promoted to read/write!" % inst)
            return
Пример #8
0
    def save_model(self, request, obj, form, change):
        """
        Automatically detect/populate several fields before saving instance

        Since we're defining what is (hopefully) an existing structure,
        we should be able to auto-detect several elements from the database
        itself. There can also a backend monitor on each server that will
        keep these values updated, but bootstrapping is always best.

        Autodetected fields:
          * is_online
          * master
          * version
        """

        # First, check the online status. We want this to be as fresh as
        # possible, so we might as well grab it now.

        obj.is_online = False

        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        check = sock.connect_ex((obj.server.hostname, obj.herd.db_port))

        if check == 0:
            obj.is_online = True

        # Then, since herds are organized such that each herd follows a single
        # primary node, we can auto-declare that this is a replica or not.
        # If we search and find a primary for this herd, that instance will
        # become our master.

        try:
            util = PGUtility(obj)
            obj.master = util.get_herd_primary()
            obj.version = util.get_version()
        except:
            pass

        # Finally, save now that we've hijacked everything.

        obj.save()
Пример #9
0
    def demote_instances(self, request, queryset):
        """
        Demote selected instances back into streaming herd replicas

        Given a node is a primary, meaning at one point it was promoted,
        we probably eventually want to convert it back. This encapsulates
        that process and works for several selected primaries.
        
        Instances which are the only primary in the herd are automatically
        pruned from the select list. This check is performed both before 
        *and* after the confirmation form, in case the only masters from
        a single herd are all selected.
        """

        if request.POST.get('post') == 'yes':

            # Iterate through every submitted instance and call the utility
            # to demote each. It should perform the check logic that ensures
            # we always have at least one remaining master in the herd.

            for inst_id in request.POST.getlist(admin.ACTION_CHECKBOX_NAME):
                inst = Instance.objects.get(pk=inst_id)

                #util = PGUtility(inst)
                #result = util.demote()

                try:
                    util = PGUtility(inst)
                    result = util.demote()

                except Exception, e:
                    self.message_user(request, "%s : %s" % (e, inst),
                                      messages.ERROR)
                    continue

                host = inst.server.hostname
                herd = inst.herd
                self.message_user(request,
                                  "%s demoted to %s replica!" % (host, herd))
            return
Пример #10
0
    def demote_instances(self, request, queryset):
        """
        Demote selected instances back into streaming herd replicas

        Given a node is a primary, meaning at one point it was promoted,
        we probably eventually want to convert it back. This encapsulates
        that process and works for several selected primaries.
        
        Instances which are the only primary in the herd are automatically
        pruned from the select list. This check is performed both before 
        *and* after the confirmation form, in case the only masters from
        a single herd are all selected.
        """

        if request.POST.get('post') == 'yes':

            # Iterate through every submitted instance and call the utility
            # to demote each. It should perform the check logic that ensures
            # we always have at least one remaining master in the herd.

            for inst_id in request.POST.getlist(admin.ACTION_CHECKBOX_NAME):
                inst = Instance.objects.get(pk=inst_id)

                #util = PGUtility(inst)
                #result = util.demote()

                try:
                    util = PGUtility(inst)
                    result = util.demote()

                except Exception, e:
                    self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                    continue

                host=inst.server.hostname
                herd=inst.herd
                self.message_user(request, "%s demoted to %s replica!" % (host, herd))
            return
Пример #11
0
    def stop_instances(self, request, queryset):
        """
        Stop all transmitted PostgreSQL instances

        Skip already stopped services.
        """

        for inst in queryset:
            if not inst.is_online:
                self.message_user(request, "%s is already stopped." % inst,
                    messages.WARNING
                )
                continue

            try:
                util = PGUtility(inst)
                util.stop()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                continue

            self.message_user(request, "%s stopped!" % inst)
Пример #12
0
    def stop_instances(self, request, queryset):
        """
        Stop all transmitted PostgreSQL instances

        Skip already stopped services.
        """

        for inst in queryset:
            if not inst.is_online:
                self.message_user(request, "%s is already stopped." % inst,
                                  messages.WARNING)
                continue

            try:
                util = PGUtility(inst)
                util.stop()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, inst),
                                  messages.ERROR)
                continue

            self.message_user(request, "%s stopped!" % inst)
Пример #13
0
    def rebuild_instances(self, request, queryset):
        """
        Rebuild all transmitted PostgreSQL replication instances from master
        """

        # If we should be rebuilding an instance, connect to the host,
        # ensure the instance is stopped, and sync the data directories
        # through rsync + ssh.

        if request.POST.get('post') == 'yes':

            for inst_id in request.POST.getlist(admin.ACTION_CHECKBOX_NAME):
                inst = Instance.objects.get(pk=inst_id)

                try:
                    util = PGUtility(inst)
                    util.master_sync()

                except Exception, e:
                    self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                    continue

                self.message_user(request, "%s rebuilt!" % inst)
            return
Пример #14
0
    def save_model(self, request, obj, form, change):
        """
        Automatically detect/populate several fields before saving instance

        Since we're defining what is (hopefully) an existing structure,
        we should be able to auto-detect several elements from the database
        itself. There can also a backend monitor on each server that will
        keep these values updated, but bootstrapping is always best.

        Autodetected fields:
          * is_online
          * master
          * version
        """

        # First, check the online status. We want this to be as fresh as
        # possible, so we might as well grab it now.

        obj.is_online = False

        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        check = sock.connect_ex((obj.server.hostname, obj.herd.db_port))

        if check == 0:
            obj.is_online = True

        # Then, since herds are organized such that each herd follows a single
        # primary node, we can auto-declare that this is a replica or not.
        # If we search and find a primary for this herd, that instance will
        # become our master.

        util = PGUtility(obj)
        obj.master = util.get_herd_primary()
        obj.version = util.get_version()

        if obj.master and not obj.version:
            obj.version = obj.master.version

        # Save now that we've hijacked everything.

        obj.save()

        # Attempt to initialize the instance. This only works if the instance
        # doesn't already exist. It's also optional, so don't derail the save
        # just because it didn't fully work.

        try:
            util.init_missing()
        except Exception, e:
            self.message_user(request, "Instance init: %s" % str(e),
                              messages.WARNING)
Пример #15
0
    def restart_instances(self, request, queryset):
        """
        Restart all transmitted PostgreSQL instances

        Basicaly we just call for a fast stop followed by a start. Nothing
        complicated here. Unlike stop, we don't skip stopped instances, and
        unline start, we don't skip running ones.
        """

        for inst in queryset:
            try:
                util = PGUtility(inst)
                util.stop()
                util.start()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, inst), messages.ERROR)
                continue

            self.message_user(request, "%s restarted!" % inst)
Пример #16
0
    def failover_pair(self, request, queryset):
        """
        Promote a Herd Follower to Leader Status

        This process is fairly complicated, and comes in several parts:

        1. Stop the current primary node. This ensures only the secondary
           can accept new data.
        2. Promote the top follower to read/write status. This essentially
           makes it the new leader of the herd.
        3. Assign the follower as the new stream source to the old primary.
           This officially swaps the roles of the two nodes. Note that the
           new follower is still out of sync with the new leader. This will
           require a separate node rebuild step to rectify.
        4. Move the declared virtual host to the new leader.
        5. Reassign all replicas to follow the new leader. We do this last
           because it relies on DNS propagation, and pushing a reload after
           that step implies a reconnection.
        """

        # Go to the confirmation form. As usual, this is fairly important,
        # so make sure the template is extremely descriptive regarding the
        # failover process.

        if request.POST.get('post') != 'yes':

            return render(request, 'admin/haas/disasterrecovery/failover.html', 
                    {'queryset' : queryset,
                     'opts': self.model._meta,
                     'action_checkbox_name': admin.ACTION_CHECKBOX_NAME,
                    }
            )

        # Since the form has been submitted, start swapping DR pairs.

        for dr_id in request.POST.getlist(admin.ACTION_CHECKBOX_NAME):
            newb = Instance.objects.get(pk=dr_id)
            sage = newb.master

            # Start with the transfer: stop -> promote -> alter.
            # Add in a short pause between to allow xlog propagation.

            try:
                sage_util = PGUtility(sage)
                newb_util = PGUtility(newb)

                sage_util.stop()
                sleep(5)
                newb_util.promote()

                sage.master = newb
                sage.save()

            except Exception, e:
                self.message_user(request,
                    "%s : %s" % (e, newb), messages.ERROR
                )
                continue

            # Now update the DNS. We'll just use the basic dnspython
            # module and load it with nameserver defaults. That should
            # be more than enough to propagate this change.

            try:
                def_dns = dns.resolver.get_default_resolver()

                new_dns = dns.update.Update(str(def_dns.domain).rstrip('.'))
                new_dns.delete(str(newb.herd.vhost), 'cname')
                new_dns.add(
                    str(newb.herd.vhost), '300', 'cname',
                    str(newb.server.hostname)
                )

                for ns in def_dns.nameservers:
                    dns.query.tcp(new_dns, ns)

            except Exception, e:
                self.message_user(request,
                    "%s : %s" % (e, newb), messages.ERROR
                )
                continue
Пример #17
0
    def failover_pair(self, request, queryset):
        """
        Promote a Herd Follower to Leader Status

        This process is fairly complicated, and comes in several parts:

        1. Stop the current primary node. This ensures only the secondary
           can accept new data.
        2. Promote the top follower to read/write status. This essentially
           makes it the new leader of the herd.
        3. Assign the follower as the new stream source to the old primary.
           This officially swaps the roles of the two nodes. Note that the
           new follower is still out of sync with the new leader. This will
           require a separate node rebuild step to rectify.
        4. Move the declared virtual host to the new leader.
        5. Reassign all replicas to follow the new leader. We do this last
           because it relies on DNS propagation, and pushing a reload after
           that step implies a reconnection.
        """

        # Go to the confirmation form. As usual, this is fairly important,
        # so make sure the template is extremely descriptive regarding the
        # failover process.

        if request.POST.get('post') != 'yes':

            return render(
                request, 'admin/haas/disasterrecovery/failover.html', {
                    'queryset': queryset,
                    'opts': self.model._meta,
                    'action_checkbox_name': admin.ACTION_CHECKBOX_NAME,
                })

        # Since the form has been submitted, start swapping DR pairs.

        for dr_id in request.POST.getlist(admin.ACTION_CHECKBOX_NAME):
            newb = Instance.objects.get(pk=dr_id)
            sage = newb.master

            # Start with the transfer: stop -> promote -> alter.
            # Add in a short pause between to allow xlog propagation.

            try:
                sage_util = PGUtility(sage)
                newb_util = PGUtility(newb)

                sage_util.stop()
                sleep(5)
                newb_util.promote()

                sage.master = newb
                sage.save()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, newb),
                                  messages.ERROR)
                continue

            # Now update the DNS. We'll just use the basic dnspython
            # module and load it with nameserver defaults. That should
            # be more than enough to propagate this change.

            try:
                def_dns = dns.resolver.get_default_resolver()

                new_dns = dns.update.Update(str(def_dns.domain).rstrip('.'))
                new_dns.delete(str(newb.herd.vhost), 'cname')
                new_dns.add(str(newb.herd.vhost), '300', 'cname',
                            str(newb.server.hostname))

                for ns in def_dns.nameservers:
                    dns.query.tcp(new_dns, ns)

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, newb),
                                  messages.ERROR)
                continue
Пример #18
0
class DRAdmin(SharedInstanceAdmin):
    actions = ['failover_pair', 'rebuild_instances']
    list_display = ('herd', 'container', 'mb_lag', 'vhost')
    list_filter = ('herd__environment', )
    search_fields = ('herd__herd_name', 'server__hostname', 'vhost')

    list_display_links = None
    can_delete = False

    def has_add_permission(self, request):
        return False

    def get_actions(self, request):
        """
        Remove Unused Actions From Master Class

        Though we inherit quite a lot from the Instance admin menu, we don't
        need most of the actions. So we'll throw away the ones we didn't
        explicitly include.
        """
        actions = super(DRAdmin, self).get_actions(request)

        for key in actions.keys():
            if key not in self.actions:
                del (actions[key])

        return actions

    def container(self, instance):
        return instance.server.hostname

    container.short_description = 'DR Container'
    container.admin_order_field = 'server__hostname'

    def failover_pair(self, request, queryset):
        """
        Promote a Herd Follower to Leader Status

        This process is fairly complicated, and comes in several parts:

        1. Stop the current primary node. This ensures only the secondary
           can accept new data.
        2. Promote the top follower to read/write status. This essentially
           makes it the new leader of the herd.
        3. Assign the follower as the new stream source to the old primary.
           This officially swaps the roles of the two nodes. Note that the
           new follower is still out of sync with the new leader. This will
           require a separate node rebuild step to rectify.
        4. Move the declared virtual host to the new leader.
        5. Reassign all replicas to follow the new leader. We do this last
           because it relies on DNS propagation, and pushing a reload after
           that step implies a reconnection.
        """

        # Go to the confirmation form. As usual, this is fairly important,
        # so make sure the template is extremely descriptive regarding the
        # failover process.

        if request.POST.get('post') != 'yes':

            return render(
                request, 'admin/haas/disasterrecovery/failover.html', {
                    'queryset': queryset,
                    'opts': self.model._meta,
                    'action_checkbox_name': admin.ACTION_CHECKBOX_NAME,
                })

        # Since the form has been submitted, start swapping DR pairs.

        for dr_id in request.POST.getlist(admin.ACTION_CHECKBOX_NAME):
            newb = Instance.objects.get(pk=dr_id)
            sage = newb.master

            # Start with the transfer: stop -> promote -> alter.
            # Add in a short pause between to allow xlog propagation.

            try:
                sage_util = PGUtility(sage)
                newb_util = PGUtility(newb)

                sage_util.stop()
                sleep(5)
                newb_util.promote()

                sage.master = newb
                sage.save()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, newb),
                                  messages.ERROR)
                continue

            # Now update the DNS. We'll just use the basic dnspython
            # module and load it with nameserver defaults. That should
            # be more than enough to propagate this change.

            try:
                def_dns = dns.resolver.get_default_resolver()

                new_dns = dns.update.Update(str(def_dns.domain).rstrip('.'))
                new_dns.delete(str(newb.herd.vhost), 'cname')
                new_dns.add(str(newb.herd.vhost), '300', 'cname',
                            str(newb.server.hostname))

                for ns in def_dns.nameservers:
                    dns.query.tcp(new_dns, ns)

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, newb),
                                  messages.ERROR)
                continue

            # Now we should get the list of all replica instances in this
            # herd, which should include the old primary. We just need to
            # update the recovery.conf file and reload the instance.

            try:
                herd = Instance.objects.filter(master_id__isnull=False,
                                               herd_id=newb.herd_id)

                for member in herd:
                    member.master = newb
                    member.save()

                    util = PGUtility(member)
                    util.update_stream_config()
                    util.reload()

            except Exception, e:
                self.message_user(request, "%s : %s" % (e, newb),
                                  messages.ERROR)
                continue