Python Ha.shutdown примеры использования

Язык программирования: Python

Пространство имен/Пакет: patroni.ha

Класс/Тип: Ha

Метод/Функция: shutdown

Примеров на hotexamples.com: 6

Python Ha.shutdown - 6 примеров найдено. Это лучшие примеры Python кода для patroni.ha.Ha.shutdown, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Ha(10)

run_cycle(6)

shutdown(4)

watch(4)

post_recover(3)

_is_healthiest_node(3)

update_lock(3)

touch_member(3)

bootstrap(3)

restart_scheduled(3)

restart(3)

fetch_node_status(3)

is_healthiest_node(3)

load_cluster_from_dcs(2)

reinitialize(2)

restart_matches(2)

delete_future_restart(2)

schedule_future_restart(2)

evaluate_scheduled_restart(2)

post_bootstrap(1)

is_leader(1)

enforce_master_role(1)

schedule_reinitialize(1)

get_effective_tags(1)

start(1)

wakeup(1)

demote(1)

Пример #1

Показать файл

Файл: test_ha.py Проект: vieyahn/patroni

class TestHa(unittest.TestCase):
    @patch('socket.getaddrinfo', socket_getaddrinfo)
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('patroni.dcs.dcs_modules',
           Mock(return_value=['patroni.dcs.foo', 'patroni.dcs.etcd']))
    @patch.object(etcd.Client, 'read', etcd_read)
    def setUp(self):
        with patch.object(Client, 'machines') as mock_machines:
            mock_machines.__get__ = Mock(
                return_value=['http://*****:*****@patch.object(Postgresql, 'fix_cluster_state', Mock())
    def test_crash_recovery(self):
        self.p.is_running = false
        self.p.controldata = lambda: {
            'Database cluster state': 'in production'
        }
        self.assertEquals(self.ha.run_cycle(),
                          'doing crash recovery in a single user mode')

    @patch.object(Postgresql, 'rewind_needed_and_possible',
                  Mock(return_value=True))
    def test_recover_with_rewind(self):
        self.p.is_running = false
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'running pg_rewind from leader')

    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, 'fix_cluster_state', Mock())
    def test_single_user_after_recover_failed(self):
        self.p.controldata = lambda: {'Database cluster state': 'in recovery'}
        self.p.is_running = false
        self.p.follow = false
        self.assertEquals(self.ha.run_cycle(), 'starting as a secondary')
        self.assertEquals(self.ha.run_cycle(),
                          'fixing cluster state in a single user mode')

    @patch('sys.exit', return_value=1)
    @patch('patroni.ha.Ha.sysid_valid', MagicMock(return_value=True))
    def test_sysid_no_match(self, exit_mock):
        self.ha.run_cycle()
        exit_mock.assert_called_once_with(1)

    @patch.object(Cluster, 'is_unlocked', Mock(return_value=False))
    def test_start_as_readonly(self):
        self.p.is_leader = false
        self.p.is_healthy = true
        self.ha.has_lock = true
        self.assertEquals(
            self.ha.run_cycle(),
            'promoted self to leader because i had the session lock')

    def test_acquire_lock_as_master(self):
        self.assertEquals(self.ha.run_cycle(),
                          'acquired session lock as a leader')

    def test_promoted_by_acquiring_lock(self):
        self.ha.is_healthiest_node = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')

    def test_demote_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self after trying and failing to obtain lock')

    def test_follow_new_leader_after_failing_to_obtain_lock(self):
        self.ha.is_healthiest_node = true
        self.ha.acquire_lock = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'following new leader after trying and failing to obtain lock')

    def test_demote_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoting self because i am not the healthiest node')

    def test_follow_new_leader_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )

    def test_promote_because_have_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'promoted self to leader because i had the session lock')

    def test_promote_without_watchdog(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = true
        with patch.object(Watchdog, 'activate', Mock(return_value=False)):
            self.assertEquals(
                self.ha.run_cycle(),
                'Demoting self because watchdog could not be activated')
            self.p.is_leader = false
            self.assertEquals(
                self.ha.run_cycle(),
                'Not promoting self because watchdog could not be activated')

    def test_leader_with_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

    def test_demote_because_not_having_lock(self):
        self.ha.cluster.is_unlocked = false
        with patch.object(Watchdog, 'is_running',
                          PropertyMock(return_value=True)):
            self.assertEquals(
                self.ha.run_cycle(),
                'demoting self because i do not have the lock and i was a leader'
            )

    def test_demote_because_update_lock_failed(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.ha.update_lock = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self because failed to update leader lock in DCS')
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'not promoting because failed to update leader lock in DCS')

    def test_follow(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')
        self.ha.patroni.replicatefrom = "foo"
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')

    def test_follow_in_pause(self):
        self.ha.cluster.is_unlocked = false
        self.ha.is_paused = true
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: no action')

    @patch.object(Postgresql, 'rewind_needed_and_possible',
                  Mock(return_value=True))
    def test_follow_triggers_rewind(self):
        self.p.is_leader = false
        self.p.trigger_check_diverged_lsn()
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'running pg_rewind from leader')

    def test_no_etcd_connection_master_demote(self):
        self.ha.load_cluster_from_dcs = Mock(
            side_effect=DCSError('Etcd is not responding properly'))
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self because DCS is not accessible and i was a leader')

    @patch('time.sleep', Mock())
    def test_bootstrap_from_another_member(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'trying to bootstrap from replica \'other\'')

    def test_bootstrap_waiting_for_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'waiting for leader to bootstrap')

    def test_bootstrap_without_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.p.can_create_replica_without_replication_connection = MagicMock(
            return_value=True)
        self.assertEquals(self.ha.bootstrap(),
                          'trying to bootstrap (without leader)')

    def test_bootstrap_initialize_lock_failed(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'failed to acquire initialize lock')

    def test_bootstrap_initialized_new_cluster(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.assertEquals(self.ha.bootstrap(),
                          'trying to bootstrap a new cluster')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(),
                          'waiting for end of recovery after bootstrap')
        self.p.is_leader = true
        self.assertEquals(self.ha.run_cycle(), 'running post_bootstrap')
        self.assertEquals(self.ha.run_cycle(), 'initialized a new cluster')

    def test_bootstrap_release_initialize_key_on_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.ha.bootstrap()
        self.p.is_running = false
        self.assertRaises(PatroniException, self.ha.post_bootstrap)

    def test_bootstrap_release_initialize_key_on_watchdog_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.ha.bootstrap()
        self.p.is_running.return_value = MockPostmaster()
        self.p.is_leader = true
        with patch.object(Watchdog, 'activate', Mock(return_value=False)):
            self.assertEquals(self.ha.post_bootstrap(),
                              'running post_bootstrap')
            self.assertRaises(PatroniException, self.ha.post_bootstrap)

    @patch('psycopg2.connect', psycopg2_connect)
    def test_reinitialize(self):
        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertIsNone(self.ha.reinitialize())

        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.state_handler.name = self.ha.cluster.leader.name
        self.assertIsNotNone(self.ha.reinitialize())

    @patch('time.sleep', Mock())
    def test_restart(self):
        self.assertEquals(self.ha.restart({}),
                          (True, 'restarted successfully'))
        self.p.restart = Mock(return_value=None)
        self.assertEquals(self.ha.restart({}),
                          (False, 'postgres is still starting'))
        self.p.restart = false
        self.assertEquals(self.ha.restart({}), (False, 'restart failed'))
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.reinitialize()
        self.assertEquals(self.ha.restart({}),
                          (False, 'reinitialize already in progress'))
        with patch.object(self.ha, "restart_matches", return_value=False):
            self.assertEquals(self.ha.restart({'foo': 'bar'}),
                              (False, "restart conditions are not satisfied"))

    @patch('os.kill', Mock())
    def test_restart_in_progress(self):
        with patch('patroni.async_executor.AsyncExecutor.busy',
                   PropertyMock(return_value=True)):
            self.ha.restart({}, run_async=True)
            self.assertTrue(self.ha.restart_scheduled())
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.cluster = get_cluster_initialized_with_leader()
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.has_lock = true
            self.assertEquals(self.ha.run_cycle(),
                              'updated leader lock during restart')

            self.ha.update_lock = false
            self.p.set_role('master')
            with patch('patroni.async_executor.CriticalTask.cancel',
                       Mock(return_value=False)):
                with patch(
                        'patroni.postgresql.Postgresql.terminate_starting_postmaster'
                ) as mock_terminate:
                    self.assertEquals(self.ha.run_cycle(),
                                      'lost leader lock during restart')
                    mock_terminate.assert_called()

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader(self):
        self.ha.fetch_node_status = get_node_status()
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', '', None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', 'blabla', None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.assertEquals(self.ha.run_cycle(),
                          'manual failover: demoting myself')
        self.p.rewind_needed_and_possible = true
        self.assertEquals(self.ha.run_cycle(),
                          'manual failover: demoting myself')
        self.ha.fetch_node_status = get_node_status(nofailover=True)
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(watchdog_failed=True)
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(wal_position=1)
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        # manual failover from the previous leader to us won't happen if we hold the nofailover flag
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

        # Failover scheduled time must include timezone
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.ha.run_cycle()

        scheduled = datetime.datetime.utcnow().replace(tzinfo=tzutc)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=30)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=-600)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = None
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_pause(self):
        self.ha.has_lock = true
        self.ha.is_paused = true
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock',
                          self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, self.p.name, '', None))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock',
                          self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_synchronous_mode(self):
        self.p.is_leader = true
        self.ha.has_lock = true
        self.ha.is_synchronous_mode = true
        self.ha.is_failover_possible = false
        self.ha.process_sync_replication = Mock()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, self.p.name, 'a', None), (self.p.name, None))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, self.p.name, 'a', None), (self.p.name, 'a'))
        self.ha.is_failover_possible = true
        self.assertEquals('manual failover: demoting myself',
                          self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_process_no_leader(self):
        self.p.is_leader = false
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'leader', None))
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, self.p.name, '', None))
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )
        self.ha.fetch_node_status = get_node_status(
            reachable=False)  # inaccessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        # set failover flag to True for all members of the cluster
        # this should elect the current member, as we are not going to call the API for it.
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'other', None))
        self.ha.fetch_node_status = get_node_status(
            nofailover=True)  # accessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        # same as previous, but set the current member to nofailover. In no case it should be elected as a leader
        self.ha.patroni.nofailover = True
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because I am not allowed to promote')

    def test_manual_failover_process_no_leader_in_pause(self):
        self.ha.is_paused = true
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'other', None))
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', '', None))
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', 'blabla', None))
        self.assertEquals('PAUSE: acquired session lock as a leader',
                          self.ha.run_cycle())
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', self.p.name, None))
        self.assertEquals(
            self.ha.run_cycle(),
            'PAUSE: promoted self to leader by acquiring session lock')

    def test_is_healthiest_node(self):
        self.ha.state_handler.is_leader = false
        self.ha.patroni.nofailover = False
        self.ha.fetch_node_status = get_node_status()
        self.assertTrue(self.ha.is_healthiest_node())
        with patch.object(Watchdog, 'is_healthy',
                          PropertyMock(return_value=False)):
            self.assertFalse(self.ha.is_healthiest_node())
        with patch('patroni.postgresql.Postgresql.is_starting',
                   return_value=True):
            self.assertFalse(self.ha.is_healthiest_node())
        self.ha.is_paused = true
        self.assertFalse(self.ha.is_healthiest_node())

    def test__is_healthiest_node(self):
        self.assertTrue(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.p.is_leader = false
        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertTrue(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = get_node_status(
            in_recovery=False)  # accessible, not in_recovery
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = get_node_status(
            wal_position=11)  # accessible, in_recovery, wal position ahead
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        with patch('patroni.postgresql.Postgresql.wal_position',
                   return_value=1):
            self.assertFalse(
                self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = True
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = False

    @patch('requests.get', requests_get)
    def test_fetch_node_status(self):
        member = Member(0, 'test', 1,
                        {'api_url': 'http://127.0.0.1:8011/patroni'})
        self.ha.fetch_node_status(member)
        member = Member(0, 'test', 1,
                        {'api_url': 'http://*****:*****@patch('patroni.ha.Ha.update_lock', return_value=True)
    @patch('patroni.ha.Ha.demote')
    def test_starting_timeout(self, demote, update_lock):
        def check_calls(seq):
            for mock, called in seq:
                if called:
                    mock.assert_called_once()
                else:
                    mock.assert_not_called()
                mock.reset_mock()

        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.p.check_for_startup = true
        self.p.time_in_state = lambda: 30
        self.assertEquals(
            self.ha.run_cycle(),
            'PostgreSQL is still starting up, 270 seconds until timeout')
        check_calls([(update_lock, True), (demote, False)])

        self.p.time_in_state = lambda: 350
        self.ha.fetch_node_status = get_node_status(
            reachable=False)  # inaccessible, in_recovery
        self.assertEquals(
            self.ha.run_cycle(),
            'master start has timed out, but continuing to wait because failover is not possible'
        )
        check_calls([(update_lock, True), (demote, False)])

        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(),
                          'stopped PostgreSQL because of startup timeout')
        check_calls([(update_lock, True), (demote, True)])

        update_lock.return_value = False
        self.assertEquals(
            self.ha.run_cycle(),
            'stopped PostgreSQL while starting up because leader key was lost')
        check_calls([(update_lock, True), (demote, True)])

        self.ha.has_lock = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')
        check_calls([(update_lock, False), (demote, False)])

    def test_manual_failover_while_starting(self):
        self.ha.has_lock = true
        self.p.check_for_startup = true
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(),
                          'manual failover: demoting myself')

    @patch('patroni.ha.Ha.demote')
    def test_failover_immediately_on_zero_master_start_timeout(self, demote):
        self.p.is_running = false
        self.ha.cluster = get_cluster_initialized_with_leader(
            sync=(self.p.name, 'other'))
        self.ha.cluster.config.data['synchronous_mode'] = True
        self.ha.patroni.config.set_dynamic_configuration(
            {'master_start_timeout': 0})
        self.ha.has_lock = true
        self.ha.update_lock = true
        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(),
                          'stopped PostgreSQL to fail over after a crash')
        demote.assert_called_once()

    @patch('patroni.postgresql.Postgresql.follow')
    def test_demote_immediate(self, follow):
        self.ha.has_lock = true
        self.e.get_cluster = Mock(
            return_value=get_cluster_initialized_without_leader())
        self.ha.demote('immediate')
        follow.assert_called_once_with(None)

    def test_process_sync_replication(self):
        self.ha.has_lock = true
        mock_set_sync = self.p.set_synchronous_standby = Mock()
        self.p.name = 'leader'

        # Test sync key removed when sync mode disabled
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader',
                                                                    'other'))
        with patch.object(self.ha.dcs,
                          'delete_sync_state') as mock_delete_sync:
            self.ha.run_cycle()
            mock_delete_sync.assert_called_once()
            mock_set_sync.assert_called_once_with(None)

        mock_set_sync.reset_mock()
        # Test sync key not touched when not there
        self.ha.cluster = get_cluster_initialized_with_leader()
        with patch.object(self.ha.dcs,
                          'delete_sync_state') as mock_delete_sync:
            self.ha.run_cycle()
            mock_delete_sync.assert_not_called()
            mock_set_sync.assert_called_once_with(None)

        mock_set_sync.reset_mock()

        self.ha.is_synchronous_mode = true

        # Test sync standby not touched when picking the same node
        self.p.pick_synchronous_standby = Mock(return_value=('other', True))
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader',
                                                                    'other'))
        self.ha.run_cycle()
        mock_set_sync.assert_not_called()

        mock_set_sync.reset_mock()

        # Test sync standby is replaced when switching standbys
        self.p.pick_synchronous_standby = Mock(return_value=('other2', False))
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.run_cycle()
        mock_set_sync.assert_called_once_with('other2')

        mock_set_sync.reset_mock()
        # Test sync standby is not disabled when updating dcs fails
        self.ha.dcs.write_sync_state = Mock(return_value=False)
        self.ha.run_cycle()
        mock_set_sync.assert_not_called()

        mock_set_sync.reset_mock()
        # Test changing sync standby
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(
            return_value=get_cluster_initialized_with_leader(sync=('leader',
                                                                   'other')))
        # self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        self.p.pick_synchronous_standby = Mock(return_value=('other2', True))
        self.ha.run_cycle()
        self.ha.dcs.get_cluster.assert_called_once()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 2)

        # Test updating sync standby key failed due to race
        self.ha.dcs.write_sync_state = Mock(side_effect=[True, False])
        self.ha.run_cycle()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 2)

        # Test changing sync standby failed due to race
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(
            return_value=get_cluster_initialized_with_leader(
                sync=('somebodyelse', None)))
        self.ha.run_cycle()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 1)

        # Test sync set to '*' when synchronous_mode_strict is enabled
        mock_set_sync.reset_mock()
        self.ha.is_synchronous_mode_strict = true
        self.p.pick_synchronous_standby = Mock(return_value=(None, False))
        self.ha.run_cycle()
        mock_set_sync.assert_called_once_with('*')

    def test_sync_replication_become_master(self):
        self.ha.is_synchronous_mode = true

        mock_set_sync = self.p.set_synchronous_standby = Mock()
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.has_lock = true
        mock_write_sync = self.ha.dcs.write_sync_state = Mock(
            return_value=True)
        self.p.name = 'leader'
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('other',
                                                                    None))

        # When we just became master nobody is sync
        self.assertEquals(self.ha.enforce_master_role('msg', 'promote msg'),
                          'promote msg')
        mock_set_sync.assert_called_once_with(None)
        mock_write_sync.assert_called_once_with('leader', None, index=0)

        mock_set_sync.reset_mock()

        # When we just became master nobody is sync
        self.p.set_role('replica')
        mock_write_sync.return_value = False
        self.assertTrue(
            self.ha.enforce_master_role('msg', 'promote msg') != 'promote msg')
        mock_set_sync.assert_not_called()

    def test_unhealthy_sync_mode(self):
        self.ha.is_synchronous_mode = true

        self.p.is_leader = false
        self.p.set_role('replica')
        self.p.name = 'other'
        self.ha.cluster = get_cluster_initialized_without_leader(
            sync=('leader', 'other2'))
        mock_write_sync = self.ha.dcs.write_sync_state = Mock(
            return_value=True)
        mock_acquire = self.ha.acquire_lock = Mock(return_value=True)
        mock_follow = self.p.follow = Mock()
        mock_promote = self.p.promote = Mock()

        # If we don't match the sync replica we are not allowed to acquire lock
        self.ha.run_cycle()
        mock_acquire.assert_not_called()
        mock_follow.assert_called_once()
        self.assertEquals(mock_follow.call_args[0][0], None)
        mock_write_sync.assert_not_called()

        mock_follow.reset_mock()
        # If we do match we will try to promote
        self.ha._is_healthiest_node = true

        self.ha.cluster = get_cluster_initialized_without_leader(
            sync=('leader', 'other'))
        self.ha.run_cycle()
        mock_acquire.assert_called_once()
        mock_follow.assert_not_called()
        mock_promote.assert_called_once()
        mock_write_sync.assert_called_once_with('other', None, index=0)

    def test_disable_sync_when_restarting(self):
        self.ha.is_synchronous_mode = true

        self.p.name = 'other'
        self.p.is_leader = false
        self.p.set_role('replica')
        mock_restart = self.p.restart = Mock(return_value=True)
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader',
                                                                    'other'))
        self.ha.touch_member = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(side_effect=[
            get_cluster_initialized_with_leader(sync=('leader', syncstandby))
            for syncstandby in ['other', None]
        ])

        with patch('time.sleep') as mock_sleep:
            self.ha.restart({})
            mock_restart.assert_called_once()
            mock_sleep.assert_called()

        # Restart is still called when DCS connection fails
        mock_restart.reset_mock()
        self.ha.dcs.get_cluster = Mock(side_effect=DCSError("foo"))
        self.ha.restart({})

        mock_restart.assert_called_once()

        # We don't try to fetch the cluster state when touch_member fails
        mock_restart.reset_mock()
        self.ha.dcs.get_cluster.reset_mock()
        self.ha.touch_member = Mock(return_value=False)

        self.ha.restart({})

        mock_restart.assert_called_once()
        self.ha.dcs.get_cluster.assert_not_called()

    def test_effective_tags(self):
        self.ha._disable_sync = True
        self.assertEquals(self.ha.get_effective_tags(), {
            'foo': 'bar',
            'nosync': True
        })
        self.ha._disable_sync = False
        self.assertEquals(self.ha.get_effective_tags(), {'foo': 'bar'})

    def test_restore_cluster_config(self):
        self.ha.cluster.config.data.clear()
        self.ha.has_lock = true
        self.ha.cluster.is_unlocked = false
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

    def test_watch(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.watch(0)

    def test_wakup(self):
        self.ha.wakeup()

    def test_shutdown(self):
        self.p.is_running = false
        self.ha.shutdown()

    @patch('time.sleep', Mock())
    def test_leader_with_empty_directory(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.has_lock = true
        self.p.data_directory_empty = true
        self.assertEquals(
            self.ha.run_cycle(),
            'released leader key voluntarily as data dir empty and currently leader'
        )
        self.assertEquals(self.p.role, 'uninitialized')

        # as has_lock is mocked out, we need to fake the leader key release
        self.ha.has_lock = false
        # will not say bootstrap from leader as replica can't self elect
        self.assertEquals(self.ha.run_cycle(),
                          "trying to bootstrap from replica 'other'")

Пример #2

Показать файл

Файл: __init__.py Проект: www3838438/patroni

class Patroni(object):
    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__
        from patroni.watchdog import Watchdog

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.watchdog.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                    and self.config.set_dynamic_configuration(self.dcs.cluster.config):
                self.reload_config()

            if self.postgresql.role != 'uninitialized':
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)

    def shutdown(self):
        self.api.shutdown()
        self.ha.shutdown()

Пример #3

Показать файл

Файл: test_ha.py Проект: jberkus/patroni

class TestHa(unittest.TestCase):

    @patch('socket.getaddrinfo', socket_getaddrinfo)
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('patroni.dcs.dcs_modules', Mock(return_value=['patroni.dcs.foo', 'patroni.dcs.etcd']))
    @patch.object(etcd.Client, 'read', etcd_read)
    def setUp(self):
        with patch.object(Client, 'machines') as mock_machines:
            mock_machines.__get__ = Mock(return_value=['http://*****:*****@patch.object(Postgresql, 'fix_cluster_state', Mock())
    def test_crash_recovery(self):
        self.p.is_running = false
        self.p.controldata = lambda: {'Database cluster state': 'in production'}
        self.assertEquals(self.ha.run_cycle(), 'doing crash recovery in a single user mode')

    @patch.object(Postgresql, 'rewind_needed_and_possible', Mock(return_value=True))
    def test_recover_with_rewind(self):
        self.p.is_running = false
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'running pg_rewind from leader')

    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, 'fix_cluster_state', Mock())
    def test_single_user_after_recover_failed(self):
        self.p.controldata = lambda: {'Database cluster state': 'in recovery'}
        self.p.is_running = false
        self.p.follow = false
        self.assertEquals(self.ha.run_cycle(), 'starting as a secondary')
        self.assertEquals(self.ha.run_cycle(), 'fixing cluster state in a single user mode')

    @patch('sys.exit', return_value=1)
    @patch('patroni.ha.Ha.sysid_valid', MagicMock(return_value=True))
    def test_sysid_no_match(self, exit_mock):
        self.ha.run_cycle()
        exit_mock.assert_called_once_with(1)

    @patch.object(Cluster, 'is_unlocked', Mock(return_value=False))
    def test_start_as_readonly(self):
        self.p.is_leader = false
        self.p.is_healthy = true
        self.ha.has_lock = true
        self.p.controldata = lambda: {'Database cluster state': 'in production'}
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader because i had the session lock')

    @patch('psycopg2.connect', psycopg2_connect)
    def test_acquire_lock_as_master(self):
        self.assertEquals(self.ha.run_cycle(), 'acquired session lock as a leader')

    def test_promoted_by_acquiring_lock(self):
        self.ha.is_healthiest_node = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')

    def test_long_promote(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.p.set_role('master')
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

    def test_demote_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.assertEquals(self.ha.run_cycle(), 'demoted self after trying and failing to obtain lock')

    def test_follow_new_leader_after_failing_to_obtain_lock(self):
        self.ha.is_healthiest_node = true
        self.ha.acquire_lock = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'following new leader after trying and failing to obtain lock')

    def test_demote_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.assertEquals(self.ha.run_cycle(), 'demoting self because i am not the healthiest node')

    def test_follow_new_leader_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')

    def test_promote_because_have_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader because i had the session lock')

    def test_promote_without_watchdog(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = true
        with patch.object(Watchdog, 'activate', Mock(return_value=False)):
            self.assertEquals(self.ha.run_cycle(), 'Demoting self because watchdog could not be activated')
            self.p.is_leader = false
            self.assertEquals(self.ha.run_cycle(), 'Not promoting self because watchdog could not be activated')

    def test_leader_with_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

    def test_demote_because_not_having_lock(self):
        self.ha.cluster.is_unlocked = false
        with patch.object(Watchdog, 'is_running', PropertyMock(return_value=True)):
            self.assertEquals(self.ha.run_cycle(), 'demoting self because i do not have the lock and i was a leader')

    def test_demote_because_update_lock_failed(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.ha.update_lock = false
        self.assertEquals(self.ha.run_cycle(), 'demoted self because failed to update leader lock in DCS')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'not promoting because failed to update leader lock in DCS')

    def test_follow(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')
        self.ha.patroni.replicatefrom = "foo"
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')

    def test_follow_in_pause(self):
        self.ha.cluster.is_unlocked = false
        self.ha.is_paused = true
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: no action')

    @patch.object(Postgresql, 'rewind_needed_and_possible', Mock(return_value=True))
    def test_follow_triggers_rewind(self):
        self.p.is_leader = false
        self.p.trigger_check_diverged_lsn()
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'running pg_rewind from leader')

    def test_no_etcd_connection_master_demote(self):
        self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly'))
        self.assertEquals(self.ha.run_cycle(), 'demoted self because DCS is not accessible and i was a leader')

    @patch('time.sleep', Mock())
    def test_bootstrap_from_another_member(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.bootstrap(), 'trying to bootstrap from replica \'other\'')

    def test_bootstrap_waiting_for_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(), 'waiting for leader to bootstrap')

    def test_bootstrap_without_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.p.can_create_replica_without_replication_connection = MagicMock(return_value=True)
        self.assertEquals(self.ha.bootstrap(), 'trying to bootstrap (without leader)')

    def test_bootstrap_initialize_lock_failed(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(), 'failed to acquire initialize lock')

    def test_bootstrap_initialized_new_cluster(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.assertEquals(self.ha.bootstrap(), 'trying to bootstrap a new cluster')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'waiting for end of recovery after bootstrap')
        self.p.is_leader = true
        self.assertEquals(self.ha.run_cycle(), 'running post_bootstrap')
        self.assertEquals(self.ha.run_cycle(), 'initialized a new cluster')

    def test_bootstrap_release_initialize_key_on_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.ha.bootstrap()
        self.p.is_running = false
        self.assertRaises(PatroniException, self.ha.post_bootstrap)

    def test_bootstrap_release_initialize_key_on_watchdog_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.ha.bootstrap()
        self.p.is_running.return_value = MockPostmaster()
        self.p.is_leader = true
        with patch.object(Watchdog, 'activate', Mock(return_value=False)):
            self.assertEquals(self.ha.post_bootstrap(), 'running post_bootstrap')
            self.assertRaises(PatroniException, self.ha.post_bootstrap)

    @patch('psycopg2.connect', psycopg2_connect)
    def test_reinitialize(self):
        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertIsNone(self.ha.reinitialize(True))

        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.state_handler.name = self.ha.cluster.leader.name
        self.assertIsNotNone(self.ha.reinitialize())

    @patch('time.sleep', Mock())
    def test_restart(self):
        self.assertEquals(self.ha.restart({}), (True, 'restarted successfully'))
        self.p.restart = Mock(return_value=None)
        self.assertEquals(self.ha.restart({}), (False, 'postgres is still starting'))
        self.p.restart = false
        self.assertEquals(self.ha.restart({}), (False, 'restart failed'))
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.reinitialize()
        self.assertEquals(self.ha.restart({}), (False, 'reinitialize already in progress'))
        with patch.object(self.ha, "restart_matches", return_value=False):
            self.assertEquals(self.ha.restart({'foo': 'bar'}), (False, "restart conditions are not satisfied"))

    @patch('os.kill', Mock())
    def test_restart_in_progress(self):
        with patch('patroni.async_executor.AsyncExecutor.busy', PropertyMock(return_value=True)):
            self.ha.restart({}, run_async=True)
            self.assertTrue(self.ha.restart_scheduled())
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.cluster = get_cluster_initialized_with_leader()
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.has_lock = true
            self.assertEquals(self.ha.run_cycle(), 'updated leader lock during restart')

            self.ha.update_lock = false
            self.p.set_role('master')
            with patch('patroni.async_executor.CriticalTask.cancel', Mock(return_value=False)):
                with patch('patroni.postgresql.Postgresql.terminate_starting_postmaster') as mock_terminate:
                    self.assertEquals(self.ha.run_cycle(), 'lost leader lock during restart')
                    mock_terminate.assert_called()

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader(self):
        self.ha.fetch_node_status = get_node_status()
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', '', None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', 'blabla', None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')
        self.p.rewind_needed_and_possible = true
        self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')
        self.ha.fetch_node_status = get_node_status(nofailover=True)
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(watchdog_failed=True)
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(wal_position=1)
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        # manual failover from the previous leader to us won't happen if we hold the nofailover flag
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

        # Failover scheduled time must include timezone
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.ha.run_cycle()

        scheduled = datetime.datetime.utcnow().replace(tzinfo=tzutc)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=30)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=-600)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = None
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_pause(self):
        self.ha.has_lock = true
        self.ha.is_paused = true
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock', self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, '', None))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock', self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_synchronous_mode(self):
        self.p.is_leader = true
        self.ha.has_lock = true
        self.ha.is_synchronous_mode = true
        self.ha.is_failover_possible = false
        self.ha.process_sync_replication = Mock()
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, None))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, 'a'))
        self.ha.is_failover_possible = true
        self.assertEquals('manual failover: demoting myself', self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_process_no_leader(self):
        self.p.is_leader = false
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'leader', None))
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, self.p.name, '', None))
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')
        self.ha.fetch_node_status = get_node_status(reachable=False)  # inaccessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        # set failover flag to True for all members of the cluster
        # this should elect the current member, as we are not going to call the API for it.
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'other', None))
        self.ha.fetch_node_status = get_node_status(nofailover=True)  # accessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        # same as previous, but set the current member to nofailover. In no case it should be elected as a leader
        self.ha.patroni.nofailover = True
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because I am not allowed to promote')

    def test_manual_failover_process_no_leader_in_pause(self):
        self.ha.is_paused = true
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'other', None))
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', '', None))
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', 'blabla', None))
        self.assertEquals('PAUSE: acquired session lock as a leader', self.ha.run_cycle())
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: promoted self to leader by acquiring session lock')

    def test_is_healthiest_node(self):
        self.ha.state_handler.is_leader = false
        self.ha.patroni.nofailover = False
        self.ha.fetch_node_status = get_node_status()
        self.assertTrue(self.ha.is_healthiest_node())
        with patch.object(Watchdog, 'is_healthy', PropertyMock(return_value=False)):
            self.assertFalse(self.ha.is_healthiest_node())
        with patch('patroni.postgresql.Postgresql.is_starting', return_value=True):
            self.assertFalse(self.ha.is_healthiest_node())
        self.ha.is_paused = true
        self.assertFalse(self.ha.is_healthiest_node())

    def test__is_healthiest_node(self):
        self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.p.is_leader = false
        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = get_node_status(in_recovery=False)  # accessible, not in_recovery
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = get_node_status(wal_position=11)  # accessible, in_recovery, wal position ahead
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        with patch('patroni.postgresql.Postgresql.timeline_wal_position', return_value=(1, 1)):
            self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = True
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = False

    @patch('requests.get', requests_get)
    def test_fetch_node_status(self):
        member = Member(0, 'test', 1, {'api_url': 'http://127.0.0.1:8011/patroni'})
        self.ha.fetch_node_status(member)
        member = Member(0, 'test', 1, {'api_url': 'http://*****:*****@patch('patroni.ha.Ha.update_lock', return_value=True)
    @patch('patroni.ha.Ha.demote')
    def test_starting_timeout(self, demote, update_lock):
        def check_calls(seq):
            for mock, called in seq:
                if called:
                    mock.assert_called_once()
                else:
                    mock.assert_not_called()
                mock.reset_mock()
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.p.check_for_startup = true
        self.p.time_in_state = lambda: 30
        self.assertEquals(self.ha.run_cycle(), 'PostgreSQL is still starting up, 270 seconds until timeout')
        check_calls([(update_lock, True), (demote, False)])

        self.p.time_in_state = lambda: 350
        self.ha.fetch_node_status = get_node_status(reachable=False)  # inaccessible, in_recovery
        self.assertEquals(self.ha.run_cycle(),
                          'master start has timed out, but continuing to wait because failover is not possible')
        check_calls([(update_lock, True), (demote, False)])

        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'stopped PostgreSQL because of startup timeout')
        check_calls([(update_lock, True), (demote, True)])

        update_lock.return_value = False
        self.assertEquals(self.ha.run_cycle(), 'stopped PostgreSQL while starting up because leader key was lost')
        check_calls([(update_lock, True), (demote, True)])

        self.ha.has_lock = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')
        check_calls([(update_lock, False), (demote, False)])

    def test_manual_failover_while_starting(self):
        self.ha.has_lock = true
        self.p.check_for_startup = true
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')

    @patch('patroni.ha.Ha.demote')
    def test_failover_immediately_on_zero_master_start_timeout(self, demote):
        self.p.is_running = false
        self.ha.cluster = get_cluster_initialized_with_leader(sync=(self.p.name, 'other'))
        self.ha.cluster.config.data['synchronous_mode'] = True
        self.ha.patroni.config.set_dynamic_configuration({'master_start_timeout': 0})
        self.ha.has_lock = true
        self.ha.update_lock = true
        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'stopped PostgreSQL to fail over after a crash')
        demote.assert_called_once()

    @patch('patroni.postgresql.Postgresql.follow')
    def test_demote_immediate(self, follow):
        self.ha.has_lock = true
        self.e.get_cluster = Mock(return_value=get_cluster_initialized_without_leader())
        self.ha.demote('immediate')
        follow.assert_called_once_with(None)

    def test_process_sync_replication(self):
        self.ha.has_lock = true
        mock_set_sync = self.p.set_synchronous_standby = Mock()
        self.p.name = 'leader'

        # Test sync key removed when sync mode disabled
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        with patch.object(self.ha.dcs, 'delete_sync_state') as mock_delete_sync:
            self.ha.run_cycle()
            mock_delete_sync.assert_called_once()
            mock_set_sync.assert_called_once_with(None)

        mock_set_sync.reset_mock()
        # Test sync key not touched when not there
        self.ha.cluster = get_cluster_initialized_with_leader()
        with patch.object(self.ha.dcs, 'delete_sync_state') as mock_delete_sync:
            self.ha.run_cycle()
            mock_delete_sync.assert_not_called()
            mock_set_sync.assert_called_once_with(None)

        mock_set_sync.reset_mock()

        self.ha.is_synchronous_mode = true

        # Test sync standby not touched when picking the same node
        self.p.pick_synchronous_standby = Mock(return_value=('other', True))
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        self.ha.run_cycle()
        mock_set_sync.assert_not_called()

        mock_set_sync.reset_mock()

        # Test sync standby is replaced when switching standbys
        self.p.pick_synchronous_standby = Mock(return_value=('other2', False))
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.run_cycle()
        mock_set_sync.assert_called_once_with('other2')

        mock_set_sync.reset_mock()
        # Test sync standby is not disabled when updating dcs fails
        self.ha.dcs.write_sync_state = Mock(return_value=False)
        self.ha.run_cycle()
        mock_set_sync.assert_not_called()

        mock_set_sync.reset_mock()
        # Test changing sync standby
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(return_value=get_cluster_initialized_with_leader(sync=('leader', 'other')))
        # self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        self.p.pick_synchronous_standby = Mock(return_value=('other2', True))
        self.ha.run_cycle()
        self.ha.dcs.get_cluster.assert_called_once()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 2)

        # Test updating sync standby key failed due to race
        self.ha.dcs.write_sync_state = Mock(side_effect=[True, False])
        self.ha.run_cycle()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 2)

        # Test changing sync standby failed due to race
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(return_value=get_cluster_initialized_with_leader(sync=('somebodyelse', None)))
        self.ha.run_cycle()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 1)

        # Test sync set to '*' when synchronous_mode_strict is enabled
        mock_set_sync.reset_mock()
        self.ha.is_synchronous_mode_strict = true
        self.p.pick_synchronous_standby = Mock(return_value=(None, False))
        self.ha.run_cycle()
        mock_set_sync.assert_called_once_with('*')

    def test_sync_replication_become_master(self):
        self.ha.is_synchronous_mode = true

        mock_set_sync = self.p.set_synchronous_standby = Mock()
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.has_lock = true
        mock_write_sync = self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.p.name = 'leader'
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('other', None))

        # When we just became master nobody is sync
        self.assertEquals(self.ha.enforce_master_role('msg', 'promote msg'), 'promote msg')
        mock_set_sync.assert_called_once_with(None)
        mock_write_sync.assert_called_once_with('leader', None, index=0)

        mock_set_sync.reset_mock()

        # When we just became master nobody is sync
        self.p.set_role('replica')
        mock_write_sync.return_value = False
        self.assertTrue(self.ha.enforce_master_role('msg', 'promote msg') != 'promote msg')
        mock_set_sync.assert_not_called()

    def test_unhealthy_sync_mode(self):
        self.ha.is_synchronous_mode = true

        self.p.is_leader = false
        self.p.set_role('replica')
        self.p.name = 'other'
        self.ha.cluster = get_cluster_initialized_without_leader(sync=('leader', 'other2'))
        mock_write_sync = self.ha.dcs.write_sync_state = Mock(return_value=True)
        mock_acquire = self.ha.acquire_lock = Mock(return_value=True)
        mock_follow = self.p.follow = Mock()
        mock_promote = self.p.promote = Mock()

        # If we don't match the sync replica we are not allowed to acquire lock
        self.ha.run_cycle()
        mock_acquire.assert_not_called()
        mock_follow.assert_called_once()
        self.assertEquals(mock_follow.call_args[0][0], None)
        mock_write_sync.assert_not_called()

        mock_follow.reset_mock()
        # If we do match we will try to promote
        self.ha._is_healthiest_node = true

        self.ha.cluster = get_cluster_initialized_without_leader(sync=('leader', 'other'))
        self.ha.run_cycle()
        mock_acquire.assert_called_once()
        mock_follow.assert_not_called()
        mock_promote.assert_called_once()
        mock_write_sync.assert_called_once_with('other', None, index=0)

    def test_disable_sync_when_restarting(self):
        self.ha.is_synchronous_mode = true

        self.p.name = 'other'
        self.p.is_leader = false
        self.p.set_role('replica')
        mock_restart = self.p.restart = Mock(return_value=True)
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        self.ha.touch_member = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(side_effect=[
            get_cluster_initialized_with_leader(sync=('leader', syncstandby))
            for syncstandby in ['other', None]])

        with patch('time.sleep') as mock_sleep:
            self.ha.restart({})
            mock_restart.assert_called_once()
            mock_sleep.assert_called()

        # Restart is still called when DCS connection fails
        mock_restart.reset_mock()
        self.ha.dcs.get_cluster = Mock(side_effect=DCSError("foo"))
        self.ha.restart({})

        mock_restart.assert_called_once()

        # We don't try to fetch the cluster state when touch_member fails
        mock_restart.reset_mock()
        self.ha.dcs.get_cluster.reset_mock()
        self.ha.touch_member = Mock(return_value=False)

        self.ha.restart({})

        mock_restart.assert_called_once()
        self.ha.dcs.get_cluster.assert_not_called()

    def test_effective_tags(self):
        self.ha._disable_sync = True
        self.assertEquals(self.ha.get_effective_tags(), {'foo': 'bar', 'nosync': True})
        self.ha._disable_sync = False
        self.assertEquals(self.ha.get_effective_tags(), {'foo': 'bar'})

    def test_restore_cluster_config(self):
        self.ha.cluster.config.data.clear()
        self.ha.has_lock = true
        self.ha.cluster.is_unlocked = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

    def test_watch(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.watch(0)

    def test_wakup(self):
        self.ha.wakeup()

    def test_shutdown(self):
        self.p.is_running = false
        self.ha.has_lock = true
        self.ha.shutdown()

    @patch('time.sleep', Mock())
    def test_leader_with_empty_directory(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.has_lock = true
        self.p.data_directory_empty = true
        self.assertEquals(self.ha.run_cycle(), 'released leader key voluntarily as data dir empty and currently leader')
        self.assertEquals(self.p.role, 'uninitialized')

        # as has_lock is mocked out, we need to fake the leader key release
        self.ha.has_lock = false
        # will not say bootstrap from leader as replica can't self elect
        self.assertEquals(self.ha.run_cycle(), "trying to bootstrap from replica 'other'")

    def test_update_cluster_history(self):
        self.p.get_master_timeline = Mock(return_value=1)
        self.ha.has_lock = true
        self.ha.cluster.is_unlocked = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

Пример #4

Показать файл

Файл: __init__.py Проект: yanyu510/patroni

class Patroni(AbstractPatroniDaemon):
    def __init__(self, config):
        from patroni.api import RestApiServer
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.request import PatroniRequest
        from patroni.watchdog import Watchdog

        super(Patroni, self).__init__(config)

        self.version = __version__
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.request = PatroniRequest(self.config, True)
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')
                time.sleep(5)

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self, sighup=False, local=False):
        try:
            super(Patroni, self).reload_config(sighup, local)
            if local:
                self.tags = self.get_tags()
                self.request.reload_config(self.config)
                self.api.reload_config(self.config['restapi'])
            self.watchdog.reload_config(self.config)
            self.postgresql.reload_config(self.config['postgresql'], sighup)
            self.dcs.reload_config(self.config)
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()
        super(Patroni, self).run()

    def _run_cycle(self):
        logger.info(self.ha.run_cycle())

        if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                and self.config.set_dynamic_configuration(self.dcs.cluster.config):
            self.reload_config()

        if self.postgresql.role != 'uninitialized':
            self.config.save_cache()

        self.schedule_next_run()

    def _shutdown(self):
        try:
            self.api.shutdown()
        except Exception:
            logger.exception('Exception during RestApi.shutdown')
        try:
            self.ha.shutdown()
        except Exception:
            logger.exception('Exception during Ha.shutdown')

Пример #5

Показать файл

class Patroni(object):
    def __init__(self, conf):
        from patroni.api import RestApiServer
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.log import PatroniLogger
        from patroni.postgresql import Postgresql
        from patroni.request import PatroniRequest
        from patroni.watchdog import Watchdog

        self.setup_signal_handlers()

        self.version = __version__
        self.logger = PatroniLogger()
        self.config = conf
        self.logger.reload_config(self.config.get('log', {}))
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.request = PatroniRequest(self.config, True)
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                #TODO: 动态参数 cluster.config 有哪些？如何设置？
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')
                time.sleep(5)

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self, sighup=False):
        try:
            self.tags = self.get_tags()
            self.logger.reload_config(self.config.get('log', {}))
            self.watchdog.reload_config(self.config)
            if sighup:
                self.request.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'], sighup)
            self.dcs.reload_config(self.config)
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        with self._sigterm_lock:
            if not self._received_sigterm:
                self._received_sigterm = True
                sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    @property
    def received_sigterm(self):
        with self._sigterm_lock:
            return self._received_sigterm

    def run(self):
        # SGS: 启动API
        self.api.start()
        # SGS: 启动日志记录
        self.logger.start()
        self.next_run = time.time()

        while not self.received_sigterm:
            # SGS: 是否收到SIGHUP信号 如何触发这个信息，直接用kill命令下发？
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config(True)
                else:
                    self.postgresql.config.reload_config(
                        self.config['postgresql'], True)
            # SGS: 定时检查本patroni关联的member的状态并做响应处理，具体见 run_cycle 函数里的注释
            logger.info(self.ha.run_cycle())

            if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                    and self.config.set_dynamic_configuration(self.dcs.cluster.config):
                self.reload_config()

            if self.postgresql.role != 'uninitialized':
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        from threading import Lock

        self._received_sighup = False
        self._sigterm_lock = Lock()
        self._received_sigterm = False
        if os.name != 'nt':
            signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)

    def shutdown(self):
        with self._sigterm_lock:
            self._received_sigterm = True
        try:
            self.api.shutdown()
        except Exception:
            logger.exception('Exception during RestApi.shutdown')
        try:
            self.ha.shutdown()
        except Exception:
            logger.exception('Exception during Ha.shutdown')
        self.logger.shutdown()

Пример #6

Показать файл

Файл: __init__.py Проект: jberkus/patroni

class Patroni(object):

    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__
        from patroni.watchdog import Watchdog

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {tag: value for tag, value in self.config.get('tags', {}).items()
                if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value}

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.watchdog.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s', self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                    and self.config.set_dynamic_configuration(self.dcs.cluster.config):
                self.reload_config()

            if self.postgresql.role != 'uninitialized':
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)

    def shutdown(self):
        self.api.shutdown()
        self.ha.shutdown()