def test_replica_start(app): plugins = setup_plugins(app, dcs_get_database_identifier='1234', dcs_lock=True, pg_am_i_replica=True, pg_get_database_identifier='1234') app._conn_info['a'] = 'b' def start_monitoring(): app.unhealthy('test_monitor', 'Waiting for first check') plugins.start_monitoring.side_effect = start_monitoring # sync startup timeout = app.initialize() assert plugins.mock_calls == [ call.initialize(), call.get_my_id(), # compare our id with the id in the DCS call.dcs_get_database_identifier(), call.pg_get_database_identifier(), # check if I am a replica call.pg_am_i_replica(), # not master, so sure the DB is running call.pg_start(), # start monitoring call.start_monitoring(), call.dcs_watch(conn_info=None, state=None), # setup our connection info call.get_conn_info(), # set our first state call.dcs_set_state({ 'a': 'b', 'host': '127.0.0.1', 'health_problems': {'test_monitor': {'can_be_replica': False, 'reason': 'Waiting for first check'}}, }) ] # Carry on running afterwards assert timeout == None assert app.health_problems == {'test_monitor': {'can_be_replica': False, 'reason': 'Waiting for first check'}} # Our test monitor becomes healthy plugins.reset_mock() app.healthy('test_monitor') assert plugins.mock_calls == [ call.dcs_set_state({'health_problems': {}, 'a': 'b', 'host': '127.0.0.1', }), call.pg_am_i_replica(), call.dcs_set_conn_info({'a': 'b', 'host': '127.0.0.1'}), ]
def test_failed_over_master_start(app): # A master has failed over and restarted, another master has sucessfully advanced plugins = setup_plugins(app, dcs_lock=False, dcs_get_timeline=2, pg_get_timeline=1, pg_am_i_replica=False) # sync startup timeout = app.initialize() assert plugins.mock_calls == [ call.initialize(), call.get_my_id(), # compare our id with the id in the DCS call.dcs_get_database_identifier(), call.pg_get_database_identifier(), # check if I am a replica call.pg_am_i_replica(), # no, so check if there is a master call.dcs_lock('master'), call.pg_stop(), # compare our timeline to what's in the DCS call.pg_get_timeline(), call.dcs_get_timeline(), # we're on an older timeline, so reset call.pg_reset(), ] # Carry on running afterwards assert timeout == 5
def test_replica_slightly_sick(app): plugins = setup_plugins(app, pg_am_i_replica=True) app.initialize() plugins.reset_mock() app.unhealthy('boom', 'It went Boom', can_be_replica=True) assert plugins.mock_calls == [ call.dcs_set_state({'host': '127.0.0.1', 'health_problems': {'boom': {'reason': 'It went Boom', 'can_be_replica': True}}}), call.pg_am_i_replica(), ]
async def test_master_lock_broken(app): plugins = setup_plugins(app, pg_am_i_replica=False) assert app.initialize() == None plugins.reset_mock() # if the lock is broken, shutdown postgresql and exist with patch('time.sleep') as sleep: with patch('sys.exit') as exit: app.master_lock_changed(None) assert exit.called_once_with(0) assert sleep.called_once_with(10) assert app._plugins.mock_calls == [ call.pg_am_i_replica(), call.pg_am_i_replica(), call.pg_stop(), call.dcs_disconnect() ] assert app._master_lock_owner == None # if the lock changes owner to someone else, shutdown postgresql and exist plugins.reset_mock() with patch('time.sleep') as sleep: with patch('sys.exit') as exit: app.master_lock_changed('someone else') assert exit.called_once_with(0) assert sleep.called_once_with(10) assert app._plugins.mock_calls == [ call.pg_am_i_replica(), call.pg_am_i_replica(), call.pg_stop(), call.dcs_disconnect() ] assert app._master_lock_owner == 'someone else' # if the lock is owned by us, carry on trucking plugins.reset_mock() with patch('time.sleep') as sleep: with patch('sys.exit') as exit: app.master_lock_changed(app.my_id) assert exit.called_once_with(0) assert sleep.called_once_with(10) assert app._plugins.mock_calls == [ call.pg_am_i_replica(), ] assert app._master_lock_owner == app.my_id
async def test_master_unhealthy(app): plugins = setup_plugins(app, pg_am_i_replica=False) app.initialize() plugins.reset_mock() app.unhealthy('boom', 'It went Boom', can_be_replica=True) assert plugins.mock_calls == [ call.dcs_set_state({'host': '127.0.0.1', 'health_problems': {'boom': {'reason': 'It went Boom', 'can_be_replica': True}}}), call.pg_am_i_replica(), call.dcs_delete_conn_info(), ]
async def test_plugin_subscribes_to_master_lock_change(app): plugins = setup_plugins(app, pg_get_timeline=42, master_lock_changed=[('pluginA', None)], pg_am_i_replica=True) assert app.initialize() == None plugins.reset_mock() app.master_lock_changed('someone else') assert app._plugins.mock_calls == [ call.pg_am_i_replica(), call.master_lock_changed('someone else'), ]
def test_restart_replica(app): plugins = setup_plugins(app, pg_am_i_replica=True) app.initialize() plugins.reset_mock() with patch('time.sleep') as sleep: with patch('sys.exit') as exit: app.restart(10) assert exit.called_once_with(0) assert sleep.called_once_with(10) assert app._plugins.mock_calls == [ call.pg_am_i_replica(), call.dcs_disconnect() ]
async def test_replica_reaction_to_master_lock_change(app): plugins = setup_plugins(app, pg_get_timeline=42, pg_am_i_replica=True) assert app.initialize() == None plugins.reset_mock() # if the lock changes owner to someone else, carry on trucking plugins.reset_mock() app.master_lock_changed('someone else') assert app._plugins.mock_calls == [ call.pg_am_i_replica(), ] assert app._master_lock_owner == 'someone else' # if the lock is owned by us, er, we stop replication and become the master plugins.reset_mock() app.master_lock_changed(app.my_id) print(app._plugins.mock_calls) assert app._plugins.mock_calls == [ call.pg_am_i_replica(), call.pg_stop_replication(), call.pg_get_timeline(), call.dcs_set_timeline(42), ] assert app._master_lock_owner == app.my_id
def test_replica_bootstrap(app): plugins = setup_plugins(app, dcs_get_database_identifier='1234', pg_get_database_identifier='42') timeout = app.initialize() assert app._plugins.mock_calls == [ call.initialize(), call.get_my_id(), # compare our id with the id in the DCS call.dcs_get_database_identifier(), call.pg_get_database_identifier(), # make sure postgresql is stopped call.pg_stop(), # postgresql restore call.pg_restore(), call.pg_setup_replication(), call.pg_am_i_replica() ] # shut down cleanly and immediately assert timeout == 0
async def test_replica_tries_to_take_over(app): plugins = setup_plugins(app, pg_am_i_replica=True) assert app.initialize() == None plugins.reset_mock() # if there is no lock owner, we start looping trying to become master app.master_lock_changed(None) assert app._plugins.mock_calls == [call.pg_am_i_replica()] plugins.reset_mock() from asyncio import sleep as real_sleep with patch('asyncio.sleep') as sleep: sleeper = FakeSleeper() sleep.side_effect = sleeper # the first thing is to sleep a bit await sleeper.next() assert sleeper.log == [3] # takeover attempted await sleeper.next() assert sleeper.log == [3, 3] assert app._plugins.mock_calls == [ call.dcs_get_all_state(), call.dcs_lock('master')]
def test_replica_bootstrap_fails_sanity_test(app): plugins = setup_plugins(app, pg_am_i_replica=False, dcs_get_database_identifier='1234', pg_get_database_identifier='42') timeout = app.initialize() assert app._plugins.mock_calls == [ call.initialize(), call.get_my_id(), # compare our id with the id in the DCS call.dcs_get_database_identifier(), call.pg_get_database_identifier(), # make sure postgresql is stopped call.pg_stop(), # postgresql restore call.pg_restore(), call.pg_setup_replication(), call.pg_am_i_replica(), call.pg_reset(), ] # shut down after 5 seconds to try again assert timeout == 5