Пример #1
0
    def __init__(self, conf):
        from patroni.api import RestApiServer
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.log import PatroniLogger
        from patroni.postgresql import Postgresql
        from patroni.request import PatroniRequest
        from patroni.watchdog import Watchdog

        self.setup_signal_handlers()

        self.version = __version__
        self.logger = PatroniLogger()
        self.config = conf
        self.logger.reload_config(self.config.get('log', {}))
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.request = PatroniRequest(self.config, True)
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}
Пример #2
0
 def setUp(self):
     with patch.object(Client, 'machines') as mock_machines:
         mock_machines.__get__ = Mock(
             return_value=['http://remotehost:2379'])
         self.p = Postgresql({
             'name': 'postgresql0',
             'scope': 'dummy',
             'listen': '127.0.0.1:5432',
             'data_dir': 'data/postgresql0',
             'superuser': {},
             'admin': {},
             'replication': {
                 'username': '',
                 'password': '',
                 'network': ''
             }
         })
         self.p.set_state('running')
         self.p.set_role('replica')
         self.p.check_replication_lag = true
         self.p.can_create_replica_without_replication_connection = MagicMock(
             return_value=False)
         self.e = Etcd('foo', {
             'ttl': 30,
             'host': 'ok:2379',
             'scope': 'test'
         })
         self.ha = Ha(MockPatroni(self.p, self.e))
         self.ha._async_executor.run_async = run_async
         self.ha.old_cluster = self.e.get_cluster()
         self.ha.cluster = get_cluster_not_initialized_without_leader()
         self.ha.load_cluster_from_dcs = Mock()
Пример #3
0
    def do_POST_restart(self):
        status_code = 500
        data = 'restart failed'
        request = self._read_json_content(body_is_optional=True)
        cluster = self.server.patroni.dcs.get_cluster()
        if request is None:
            # failed to parse the json
            return
        if request:
            logger.debug("received restart request: {0}".format(request))

        if cluster.is_paused() and 'schedule' in request:
            self._write_response(status_code, "Can't schedule restart in the paused state")
            return

        for k in request:
            if k == 'schedule':
                (_, data, request[k]) = self.parse_schedule(request[k], "restart")
                if _:
                    status_code = _
                    break
            elif k == 'role':
                if request[k] not in ('master', 'replica'):
                    status_code = 400
                    data = "PostgreSQL role should be either master or replica"
                    break
            elif k == 'postgres_version':
                try:
                    Postgresql.postgres_version_to_int(request[k])
                except PostgresException as e:
                    status_code = 400
                    data = e.value
                    break
            elif k == 'timeout':
                request[k] = parse_int(request[k], 's')
                if request[k] is None or request[k] <= 0:
                    status_code = 400
                    data = "Timeout should be a positive number of seconds"
                    break
            elif k != 'restart_pending':
                status_code = 400
                data = "Unknown filter for the scheduled restart: {0}".format(k)
                break
        else:
            if 'schedule' not in request:
                try:
                    status, data = self.server.patroni.ha.restart(request)
                    status_code = 200 if status else 503
                except Exception:
                    logger.exception('Exception during restart')
                    status_code = 400
            else:
                if self.server.patroni.ha.schedule_future_restart(request):
                    data = "Restart scheduled"
                    status_code = 202
                else:
                    data = "Another restart is already scheduled"
                    status_code = 409
        self._write_response(status_code, data)
Пример #4
0
    def do_POST_restart(self):
        status_code = 500
        data = 'restart failed'
        request = self._read_json_content(body_is_optional=True)
        cluster = self.server.patroni.dcs.get_cluster()
        if request is None:
            # failed to parse the json
            return
        if request:
            logger.debug("received restart request: {0}".format(request))

        if cluster.is_paused() and 'schedule' in request:
            self._write_response(status_code, "Can't schedule restart in the paused state")
            return

        for k in request:
            if k == 'schedule':
                (_, data, request[k]) = self.parse_schedule(request[k], "restart")
                if _:
                    status_code = _
                    break
            elif k == 'role':
                if request[k] not in ('master', 'replica'):
                    status_code = 400
                    data = "PostgreSQL role should be either master or replica"
                    break
            elif k == 'postgres_version':
                try:
                    Postgresql.postgres_version_to_int(request[k])
                except PostgresException as e:
                    status_code = 400
                    data = e.value
                    break
            elif k == 'timeout':
                request[k] = parse_int(request[k], 's')
                if request[k] is None or request[k] <= 0:
                    status_code = 400
                    data = "Timeout should be a positive number of seconds"
                    break
            elif k != 'restart_pending':
                status_code = 400
                data = "Unknown filter for the scheduled restart: {0}".format(k)
                break
        else:
            if 'schedule' not in request:
                try:
                    status, data = self.server.patroni.ha.restart(request)
                    status_code = 200 if status else 503
                except Exception:
                    logger.exception('Exception during restart')
                    status_code = 400
            else:
                if self.server.patroni.ha.schedule_future_restart(request):
                    data = "Restart scheduled"
                    status_code = 202
                else:
                    data = "Another restart is already scheduled"
                    status_code = 409
        self._write_response(status_code, data)
Пример #5
0
 def setUp(self):
     self.data_dir = 'data/test0'
     self.config_dir = self.data_dir
     if not os.path.exists(self.data_dir):
         os.makedirs(self.data_dir)
     self.p = Postgresql({
         'name': 'test0',
         'scope': 'batman',
         'data_dir': self.data_dir,
         'config_dir': self.config_dir,
         'retry_timeout': 10,
         'pgpass': '******',
         'listen': '127.0.0.2, 127.0.0.3:5432',
         'connect_address': '127.0.0.2:5432',
         'authentication': {
             'superuser': {
                 'username': '******',
                 'password': '******'
             },
             'replication': {
                 'username': '******',
                 'password': '******'
             }
         },
         'remove_data_directory_on_rewind_failure': True,
         'use_pg_rewind': True,
         'pg_ctl_timeout': 'bla',
         'parameters': self._PARAMETERS,
         'recovery_conf': {
             'foo': 'bar'
         },
         'pg_hba': ['host all all 0.0.0.0/0 md5'],
         'callbacks': {
             'on_start': 'true',
             'on_stop': 'true',
             'on_reload': 'true',
             'on_restart': 'true',
             'on_role_change': 'true'
         }
     })
     self.p._callback_executor = Mock()
     self.leadermem = Member(0, 'leader', 28, {
         'conn_url':
         'postgres://*****:*****@127.0.0.1:5435/postgres'
     })
     self.leader = Leader(-1, 28, self.leadermem)
     self.other = Member(
         0, 'test-1', 28, {
             'conn_url':
             'postgres://*****:*****@127.0.0.1:5433/postgres',
             'tags': {
                 'replicatefrom': 'leader'
             }
         })
     self.me = Member(0, 'test0', 28, {
         'conn_url':
         'postgres://*****:*****@127.0.0.1:5434/postgres'
     })
Пример #6
0
def restart(obj, cluster_name, member_names, force, role, p_any, scheduled, version, pending, timeout):
    cluster = get_dcs(obj, cluster_name).get_cluster()

    members = get_members(cluster, cluster_name, member_names, role, force, 'restart')
    if p_any:
        random.shuffle(members)
        members = members[:1]

    if version is None and not force:
        version = click.prompt('Restart if the PostgreSQL version is less than provided (e.g. 9.5.2) ',
                               type=str, default='')

    content = {}
    if pending:
        content['restart_pending'] = True

    if version:
        try:
            Postgresql.postgres_version_to_int(version)
        except PatroniException as e:
            raise PatroniCtlException(e.value)

        content['postgres_version'] = version

    if scheduled is None and not force:
        scheduled = click.prompt('When should the restart take place (e.g. 2015-10-01T14:30) ', type=str, default='now')

    scheduled_at = parse_scheduled(scheduled)
    if scheduled_at:
        if cluster.is_paused():
            raise PatroniCtlException("Can't schedule restart in the paused state")
        content['schedule'] = scheduled_at.isoformat()

    if timeout is not None:
        content['timeout'] = timeout

    for member in members:
        if 'schedule' in content:
            if force and member.data.get('scheduled_restart'):
                r = request_patroni(member, 'delete', 'restart', headers=auth_header(obj))
                check_response(r, member.name, 'flush scheduled restart', True)

        r = request_patroni(member, 'post', 'restart', content, auth_header(obj))
        if r.status_code == 200:
            click.echo('Success: restart on member {0}'.format(member.name))
        elif r.status_code == 202:
            click.echo('Success: restart scheduled on member {0}'.format(member.name))
        elif r.status_code == 409:
            click.echo('Failed: another restart is already scheduled on member {0}'.format(member.name))
        else:
            click.echo('Failed: restart for member {0}, status code={1}, ({2})'.format(
                member.name, r.status_code, r.text)
            )
Пример #7
0
def restart(obj, cluster_name, member_names, force, role, p_any, scheduled, version, pending, timeout):
    cluster = get_dcs(obj, cluster_name).get_cluster()

    members = get_members(cluster, cluster_name, member_names, role, force, 'restart')
    if p_any:
        random.shuffle(members)
        members = members[:1]

    if version is None and not force:
        version = click.prompt('Restart if the PostgreSQL version is less than provided (e.g. 9.5.2) ',
                               type=str, default='')

    content = {}
    if pending:
        content['restart_pending'] = True

    if version:
        try:
            Postgresql.postgres_version_to_int(version)
        except PatroniException as e:
            raise PatroniCtlException(e.value)

        content['postgres_version'] = version

    if scheduled is None and not force:
        scheduled = click.prompt('When should the restart take place (e.g. 2015-10-01T14:30) ', type=str, default='now')

    scheduled_at = parse_scheduled(scheduled)
    if scheduled_at:
        if cluster.is_paused():
            raise PatroniCtlException("Can't schedule restart in the paused state")
        content['schedule'] = scheduled_at.isoformat()

    if timeout is not None:
        content['timeout'] = timeout

    for member in members:
        if 'schedule' in content:
            if force and member.data.get('scheduled_restart'):
                r = request_patroni(member, 'delete', 'restart', headers=auth_header(obj))
                check_response(r, member.name, 'flush scheduled restart', True)

        r = request_patroni(member, 'post', 'restart', content, auth_header(obj))
        if r.status_code == 200:
            click.echo('Success: restart on member {0}'.format(member.name))
        elif r.status_code == 202:
            click.echo('Success: restart scheduled on member {0}'.format(member.name))
        elif r.status_code == 409:
            click.echo('Failed: another restart is already scheduled on member {0}'.format(member.name))
        else:
            click.echo('Failed: restart for member {0}, status code={1}, ({2})'.format(
                member.name, r.status_code, r.text)
            )
Пример #8
0
 def setUp(self):
     with patch.object(Client, 'machines') as mock_machines:
         mock_machines.__get__ = Mock(
             return_value=['http://remotehost:2379'])
         self.p = Postgresql({
             'name': 'postgresql0',
             'scope': 'dummy',
             'listen': '127.0.0.1:5432',
             'data_dir': 'data/postgresql0',
             'retry_timeout': 10,
             'authentication': {
                 'superuser': {
                     'username': '******',
                     'password': '******'
                 },
                 'replication': {
                     'username': '',
                     'password': ''
                 }
             },
             'parameters': {
                 'wal_level': 'hot_standby',
                 'max_replication_slots': 5,
                 'foo': 'bar',
                 'hot_standby': 'on',
                 'max_wal_senders': 5,
                 'wal_keep_segments': 8
             }
         })
         self.p.set_state('running')
         self.p.set_role('replica')
         self.p.postmaster_start_time = MagicMock(
             return_value=str(postmaster_start_time))
         self.p.check_replication_lag = true
         self.p.can_create_replica_without_replication_connection = MagicMock(
             return_value=False)
         self.e = get_dcs({
             'etcd': {
                 'ttl': 30,
                 'host': 'ok:2379',
                 'scope': 'test',
                 'name': 'foo',
                 'retry_timeout': 10
             }
         })
         self.ha = Ha(MockPatroni(self.p, self.e))
         self.ha.old_cluster = self.e.get_cluster()
         self.ha.cluster = get_cluster_not_initialized_without_leader()
         self.ha.load_cluster_from_dcs = Mock()
         self.ha.is_synchronous_mode = false
Пример #9
0
    def __init__(self):
        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}
 def setUp(self):
     self.data_dir = 'data/test0'
     if not os.path.exists(self.data_dir):
         os.makedirs(self.data_dir)
     self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': self.data_dir,
                          'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                          'pg_hba': ['host replication replicator 127.0.0.1/32 md5',
                                     'hostssl all all 0.0.0.0/0 md5',
                                     'host all all 0.0.0.0/0 md5'],
                          'superuser': {'username': '******', 'password': '******'},
                          'admin': {'username': '******', 'password': '******'},
                          'pg_rewind': {'username': '******', 'password': '******'},
                          'replication': {'username': '******',
                                          'password': '******'},
                          'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                          'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                        'on_restart': 'true', 'on_role_change': 'true',
                                        'on_reload': 'true'
                                        },
                          'restore': 'true'})
     self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
     self.leader = Leader(-1, 28, self.leadermem)
     self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres',
                         'tags': {'replicatefrom': 'leader'}})
     self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})
Пример #11
0
 def __init__(self, config):
     self.nap_time = config['loop_wait']
     self.postgresql = Postgresql(config['postgresql'])
     self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config))
     host, port = config['restapi']['listen'].split(':')
     self.api = RestApiServer(self, config['restapi'])
     self.next_run = time.time()
     self.shutdown_member_ttl = 300
Пример #12
0
 def __init__(self, config):
     self.nap_time = config['loop_wait']
     self.tags = config.get('tags', dict())
     self.postgresql = Postgresql(config['postgresql'])
     self.dcs = self.get_dcs(self.postgresql.name, config)
     self.api = RestApiServer(self, config['restapi'])
     self.ha = Ha(self)
     self.next_run = time.time()
Пример #13
0
    def __init__(self):
        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.nap_time = self.config['loop_wait']
        self.next_run = time.time()

        self._reload_config_scheduled = False
        self._received_sighup = False
        self._received_sigterm = False
Пример #14
0
def touch_member(config, dcs):
    ''' Rip-off of the ha.touch_member without inter-class dependencies '''
    p = Postgresql(config['postgresql'])
    p.set_state('running')
    p.set_role('master')

    def restapi_connection_string(config):
        protocol = 'https' if config.get('certfile') else 'http'
        connect_address = config.get('connect_address')
        listen = config['listen']
        return '{0}://{1}/patroni'.format(protocol, connect_address or listen)

    data = {
        'conn_url': p.connection_string,
        'api_url': restapi_connection_string(config['restapi']),
        'state': p.state,
        'role': p.role
    }

    return dcs.touch_member(json.dumps(data, separators=(',', ':')), permanent=True)
Пример #15
0
 def setUp(self):
     self.data_dir = 'data/test0'
     if not os.path.exists(self.data_dir):
         os.makedirs(self.data_dir)
     self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': self.data_dir, 'retry_timeout': 10,
                          'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                          'authentication': {'superuser': {'username': '******', 'password': '******'},
                                             'replication': {'username': '******', 'password': '******'}},
                          'use_pg_rewind': True,
                          'parameters': self._PARAMETERS,
                          'recovery_conf': {'foo': 'bar'},
                          'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                        'on_restart': 'true', 'on_role_change': 'true',
                                        'on_reload': 'true'
                                        },
                          'restore': 'true'})
     self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
     self.leader = Leader(-1, 28, self.leadermem)
     self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres',
                         'tags': {'replicatefrom': 'leader'}})
     self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})
Пример #16
0
    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}
Пример #17
0
 def setUp(self):
     self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0',
                          'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                          'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'],
                          'superuser': {'password': '******'},
                          'admin': {'username': '******', 'password': '******'},
                          'pg_rewind': {'username': '******', 'password': '******'},
                          'replication': {'username': '******',
                                          'password': '******',
                                          'network': '127.0.0.1/32'},
                          'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                          'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                        'on_restart': 'true', 'on_role_change': 'true',
                                        'on_reload': 'true'
                                        },
                          'restore': 'true'})
     if not os.path.exists(self.p.data_dir):
         os.makedirs(self.p.data_dir)
     self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
     self.leader = Leader(-1, 28, self.leadermem)
     self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres'})
     self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})
Пример #18
0
 def __init__(self, config):
     self.nap_time = config['loop_wait']
     self.tags = {
         tag: value
         for tag, value in config.get('tags', {}).items()
         if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value
     }
     self.postgresql = Postgresql(config['postgresql'])
     self.dcs = self.get_dcs(self.postgresql.name, config)
     self.version = __version__
     self.api = RestApiServer(self, config['restapi'])
     self.ha = Ha(self)
     self.next_run = time.time()
Пример #19
0
    def __init__(self, config):
        from patroni.api import RestApiServer
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.request import PatroniRequest
        from patroni.watchdog import Watchdog

        super(Patroni, self).__init__(config)

        self.version = __version__
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.request = PatroniRequest(self.config, True)
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}
Пример #20
0
    def __init__(self):
        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}
Пример #21
0
 def setUp(self):
     data_dir = os.path.join('data', 'test0')
     self.p = Postgresql({'name': 'postgresql0', 'scope': 'batman', 'data_dir': data_dir,
                          'config_dir': data_dir, 'retry_timeout': 10,
                          'krbsrvname': 'postgres', 'pgpass': os.path.join(data_dir, 'pgpass0'),
                          'listen': '127.0.0.2, 127.0.0.3:5432', 'connect_address': '127.0.0.2:5432',
                          'authentication': {'superuser': {'username': '******', 'password': '******'},
                                             'replication': {'username': '', 'password': '******'}},
                          'remove_data_directory_on_rewind_failure': True,
                          'use_pg_rewind': True, 'pg_ctl_timeout': 'bla',
                          'parameters': self._PARAMETERS,
                          'recovery_conf': {'foo': 'bar'},
                          'pg_hba': ['host all all 0.0.0.0/0 md5'],
                          'pg_ident': ['krb realm postgres'],
                          'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_reload': 'true',
                                        'on_restart': 'true', 'on_role_change': 'true'}})
 def setUp(self):
     with patch.object(etcd.Client, 'machines') as mock_machines:
         mock_machines.__get__ = Mock(return_value=['http://remotehost:2379'])
         self.p = Postgresql({'name': 'postgresql0', 'scope': 'dummy', 'listen': '127.0.0.1:5432',
                              'data_dir': 'data/postgresql0', 'superuser': {}, 'admin': {},
                              'replication': {'username': '', 'password': '', 'network': ''}})
         self.p.set_state('running')
         self.p.set_role('replica')
         self.p.check_replication_lag = true
         self.p.can_create_replica_without_replication_connection = MagicMock(return_value=False)
         self.e = get_dcs('foo', {'etcd': {'ttl': 30, 'host': 'ok:2379', 'scope': 'test'}})
         self.ha = Ha(MockPatroni(self.p, self.e))
         self.ha._async_executor.run_async = run_async
         self.ha.old_cluster = self.e.get_cluster()
         self.ha.cluster = get_cluster_not_initialized_without_leader()
         self.ha.load_cluster_from_dcs = Mock()
Пример #23
0
 def setUp(self):
     with patch.object(Client, 'machines') as mock_machines:
         mock_machines.__get__ = Mock(return_value=['http://remotehost:2379'])
         self.p = Postgresql({'name': 'postgresql0', 'scope': 'dummy', 'listen': '127.0.0.1:5432',
                              'data_dir': 'data/postgresql0', 'retry_timeout': 10,
                              'maximum_lag_on_failover': 5,
                              'authentication': {'superuser': {'username': '******', 'password': '******'},
                                                 'replication': {'username': '', 'password': ''}},
                              'parameters': {'wal_level': 'hot_standby', 'max_replication_slots': 5, 'foo': 'bar',
                                             'hot_standby': 'on', 'max_wal_senders': 5, 'wal_keep_segments': 8}})
         self.p.set_state('running')
         self.p.set_role('replica')
         self.p.postmaster_start_time = MagicMock(return_value=str(postmaster_start_time))
         self.p.can_create_replica_without_replication_connection = MagicMock(return_value=False)
         self.e = get_dcs({'etcd': {'ttl': 30, 'host': 'ok:2379', 'scope': 'test',
                                    'name': 'foo', 'retry_timeout': 10}})
         self.ha = Ha(MockPatroni(self.p, self.e))
         self.ha.old_cluster = self.e.get_cluster()
         self.ha.cluster = get_cluster_not_initialized_without_leader()
         self.ha.load_cluster_from_dcs = Mock()
Пример #24
0
 def setUp(self):
     self.data_dir = 'data/test0'
     if not os.path.exists(self.data_dir):
         os.makedirs(self.data_dir)
     self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': self.data_dir, 'retry_timeout': 10,
                          'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                          'authentication': {'superuser': {'username': '******', 'password': '******'},
                                             'replication': {'username': '******', 'password': '******'}},
                          'remove_data_directory_on_rewind_failure': True,
                          'use_pg_rewind': True, 'pg_ctl_timeout': 'bla',
                          'parameters': self._PARAMETERS,
                          'recovery_conf': {'foo': 'bar'},
                          'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                        'on_restart': 'true', 'on_role_change': 'true',
                                        'on_reload': 'true'
                                        },
                          'restore': 'true'})
     self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
     self.leader = Leader(-1, 28, self.leadermem)
     self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres',
                         'tags': {'replicatefrom': 'leader'}})
     self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})
Пример #25
0
    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}
Пример #26
0
 def set_up(self):
     subprocess.call = subprocess_call
     shutil.copy = nop
     self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0',
                          'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                          'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'],
                          'superuser': {'password': ''},
                          'admin': {'username': '******', 'password': '******'},
                          'replication': {'username': '******',
                                          'password': '******',
                                          'network': '127.0.0.1/32'},
                          'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                          'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                        'on_restart': 'true', 'on_role_change': 'true',
                                        'on_reload': 'true'
                                        },
                          'restore': 'true'})
     psycopg2.connect = psycopg2_connect
     if not os.path.exists(self.p.data_dir):
         os.makedirs(self.p.data_dir)
     self.leadermem = Member(0, 'leader', 'postgres://*****:*****@127.0.0.1:5435/postgres', None, None, 28)
     self.leader = Leader(-1, None, 28, self.leadermem)
     self.other = Member(0, 'test1', 'postgres://*****:*****@127.0.0.1:5433/postgres', None, None, 28)
     self.me = Member(0, 'test0', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, None, 28)
Пример #27
0
def touch_member(config, dcs):
    ''' Rip-off of the ha.touch_member without inter-class dependencies '''
    p = Postgresql(config['postgresql'])
    p.set_state('running')
    p.set_role('master')

    def restapi_connection_string(config):
        protocol = 'https' if config.get('certfile') else 'http'
        connect_address = config.get('connect_address')
        listen = config['listen']
        return '{0}://{1}/patroni'.format(protocol, connect_address or listen)

    data = {
        'conn_url': p.connection_string,
        'api_url': restapi_connection_string(config['restapi']),
        'state': p.state,
        'role': p.role
    }

    return dcs.touch_member(data, permanent=True)
Пример #28
0
class TestPostgresql(unittest.TestCase):
    _PARAMETERS = {
        'wal_level': 'hot_standby',
        'max_replication_slots': 5,
        'f.oo': 'bar',
        'search_path': 'public',
        'hot_standby': 'on',
        'max_wal_senders': 5,
        'wal_keep_segments': 8,
        'wal_log_hints': 'on',
        'max_locks_per_transaction': 64,
        'max_worker_processes': 8,
        'max_connections': 100,
        'max_prepared_transactions': 0,
        'track_commit_timestamp': 'off',
        'unix_socket_directories': '/tmp'
    }

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('os.rename', Mock())
    @patch.object(Postgresql, 'get_major_version', Mock(return_value=90600))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def setUp(self):
        self.data_dir = 'data/test0'
        self.config_dir = self.data_dir
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
        self.p = Postgresql({
            'name': 'test0',
            'scope': 'batman',
            'data_dir': self.data_dir,
            'config_dir': self.config_dir,
            'retry_timeout': 10,
            'listen': '127.0.0.2, 127.0.0.3:5432',
            'connect_address': '127.0.0.2:5432',
            'authentication': {
                'superuser': {
                    'username': '******',
                    'password': '******'
                },
                'replication': {
                    'username': '******',
                    'password': '******'
                }
            },
            'remove_data_directory_on_rewind_failure': True,
            'use_pg_rewind': True,
            'pg_ctl_timeout': 'bla',
            'parameters': self._PARAMETERS,
            'recovery_conf': {
                'foo': 'bar'
            },
            'pg_hba': ['host all all 0.0.0.0/0 md5'],
            'callbacks': {
                'on_start': 'true',
                'on_stop': 'true',
                'on_reload': 'true',
                'on_restart': 'true',
                'on_role_change': 'true'
            }
        })
        self.p._callback_executor = Mock()
        self.leadermem = Member(0, 'leader', 28, {
            'conn_url':
            'postgres://*****:*****@127.0.0.1:5435/postgres'
        })
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(
            0, 'test-1', 28, {
                'conn_url':
                'postgres://*****:*****@127.0.0.1:5433/postgres',
                'tags': {
                    'replicatefrom': 'leader'
                }
            })
        self.me = Member(0, 'test0', 28, {
            'conn_url':
            'postgres://*****:*****@127.0.0.1:5434/postgres'
        })

    def tearDown(self):
        shutil.rmtree('data')

    def test_get_initdb_options(self):
        self.assertEquals(
            self.p.get_initdb_options([{
                'encoding': 'UTF8'
            }, 'data-checksums']), ['--encoding=UTF8', '--data-checksums'])
        self.assertRaises(Exception, self.p.get_initdb_options,
                          [{
                              'pgdata': 'bar'
                          }])
        self.assertRaises(Exception, self.p.get_initdb_options, [{
            'foo': 'bar',
            1: 2
        }])
        self.assertRaises(Exception, self.p.get_initdb_options, [1])

    @patch('os.path.exists', Mock(return_value=True))
    @patch('os.unlink', Mock())
    def test_delete_trigger_file(self):
        self.p.delete_trigger_file()

    @patch('subprocess.Popen')
    @patch.object(Postgresql, 'wait_for_startup')
    @patch.object(Postgresql, 'wait_for_port_open')
    @patch.object(Postgresql, 'is_running')
    def test_start(self, mock_is_running, mock_wait_for_port_open,
                   mock_wait_for_startup, mock_popen):
        mock_is_running.return_value = True
        mock_wait_for_port_open.return_value = True
        mock_wait_for_startup.return_value = False
        mock_popen.return_value.stdout.readline.return_value = '123'
        self.assertTrue(self.p.start())
        mock_is_running.return_value = False
        open(os.path.join(self.data_dir, 'postmaster.pid'), 'w').close()
        pg_conf = os.path.join(self.data_dir, 'postgresql.conf')
        open(pg_conf, 'w').close()
        self.assertFalse(self.p.start(task=CriticalTask()))
        with open(pg_conf) as f:
            lines = f.readlines()
            self.assertTrue("f.oo = 'bar'\n" in lines)

        mock_wait_for_startup.return_value = None
        self.assertFalse(self.p.start(10))
        self.assertIsNone(self.p.start())

        mock_wait_for_port_open.return_value = False
        self.assertFalse(self.p.start())
        task = CriticalTask()
        task.cancel()
        self.assertFalse(self.p.start(task=task))

    @patch.object(Postgresql, 'pg_isready')
    @patch.object(Postgresql, 'read_pid_file')
    @patch.object(Postgresql, 'is_pid_running')
    @patch('patroni.postgresql.polling_loop', Mock(return_value=range(1)))
    def test_wait_for_port_open(self, mock_is_pid_running, mock_read_pid_file,
                                mock_pg_isready):
        mock_is_pid_running.return_value = False
        mock_pg_isready.return_value = STATE_NO_RESPONSE

        # No pid file and postmaster death
        mock_read_pid_file.return_value = {}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        mock_is_pid_running.return_value = True

        # timeout
        mock_read_pid_file.return_value = {'pid', 1}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # Garbage pid
        mock_read_pid_file.return_value = {
            'pid': 'garbage',
            'start_time': '101',
            'data_dir': '',
            'socket_dir': '',
            'port': '',
            'listen_addr': ''
        }
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # Not ready
        mock_read_pid_file.return_value = {
            'pid': '42',
            'start_time': '101',
            'data_dir': '',
            'socket_dir': '',
            'port': '',
            'listen_addr': ''
        }
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # pg_isready failure
        mock_pg_isready.return_value = 'garbage'
        self.assertTrue(self.p.wait_for_port_open(42, 100., 1))

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'is_running')
    @patch.object(Postgresql, 'get_pid')
    def test_stop(self, mock_get_pid, mock_is_running):
        mock_is_running.return_value = True
        mock_get_pid.return_value = 0
        mock_callback = Mock()
        self.assertTrue(self.p.stop(on_safepoint=mock_callback))
        mock_callback.assert_called()
        mock_get_pid.return_value = -1
        self.assertFalse(self.p.stop())
        mock_get_pid.return_value = 123
        with patch('os.kill', Mock(side_effect=[OSError(errno.ESRCH, ''), OSError, None])),\
                patch('psutil.Process', Mock(side_effect=psutil.NoSuchProcess(123))):
            self.assertTrue(self.p.stop())
        with patch.object(Postgresql, '_signal_postmaster_stop',
                          Mock(return_value=(123, None))):
            with patch.object(Postgresql, 'is_pid_running',
                              Mock(side_effect=[True, False, False])):
                self.assertTrue(self.p.stop())

    def test_restart(self):
        self.p.start = Mock(return_value=False)
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({
            'host': 'localhost',
            'port': '5432',
            'user': '******'
        })
        self.p.write_pgpass({
            'host': 'localhost',
            'port': '5432',
            'user': '******',
            'password': '******'
        })

    def test_checkpoint(self):
        with patch.object(MockCursor, 'fetchone', Mock(return_value=(True, ))):
            self.assertEquals(self.p.checkpoint({'user': '******'}),
                              'is_in_recovery=true')
        with patch.object(MockCursor, 'execute', Mock(return_value=None)):
            self.assertIsNone(self.p.checkpoint())
        self.assertEquals(self.p.checkpoint(), 'not accessible or not healty')

    @patch('subprocess.call', side_effect=OSError)
    @patch('patroni.postgresql.Postgresql.write_pgpass',
           MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_call):
        r = {
            'user': '',
            'host': '',
            'port': '',
            'database': '',
            'password': ''
        }
        self.assertTrue(self.p.pg_rewind(r))
        subprocess.call = mock_call
        self.assertFalse(self.p.pg_rewind(r))

    def test_check_recovery_conf(self):
        self.p.write_recovery_conf({'primary_conninfo': 'foo'})
        self.assertFalse(self.p.check_recovery_conf(None))
        self.p.write_recovery_conf({})
        self.assertTrue(self.p.check_recovery_conf(None))

    @patch.object(Postgresql, 'start', Mock())
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    def test__get_local_timeline_lsn(self):
        self.p.trigger_check_diverged_lsn()
        with patch.object(
                Postgresql, 'controldata',
                Mock(return_value={'Database cluster state': 'shut down'})):
            self.p.rewind_needed_and_possible(self.leader)
        with patch.object(
                Postgresql, 'controldata',
                Mock(return_value={
                    'Database cluster state': 'shut down in recovery'
                })):
            self.p.rewind_needed_and_possible(self.leader)
        with patch.object(Postgresql, 'is_running', Mock(return_value=True)):
            with patch.object(MockCursor, 'fetchone',
                              Mock(side_effect=[(False, ), Exception])):
                self.p.rewind_needed_and_possible(self.leader)

    @patch.object(Postgresql, 'start', Mock())
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, '_get_local_timeline_lsn',
                  Mock(return_value=(2, '0/40159C1')))
    @patch.object(Postgresql, 'check_leader_is_not_in_recovery')
    def test__check_timeline_and_lsn(self,
                                     mock_check_leader_is_not_in_recovery):
        mock_check_leader_is_not_in_recovery.return_value = False
        self.p.trigger_check_diverged_lsn()
        self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        mock_check_leader_is_not_in_recovery.return_value = True
        self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch('psycopg2.connect', Mock(side_effect=Exception)):
            self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        with patch.object(
                MockCursor, 'fetchone',
                Mock(side_effect=[(
                    '', 2, '0/0'
                ), ('', b'2\tG/40159C0\tno recovery target specified\n\n')])):
            self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch.object(
                MockCursor, 'fetchone',
                Mock(side_effect=[(
                    '', 2,
                    '0/0'), (
                        '', b'3\t040159C0\tno recovery target specified\n')])):
            self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch.object(MockCursor, 'fetchone',
                          Mock(return_value=('', 1, '0/0'))):
            with patch.object(Postgresql, '_get_local_timeline_lsn',
                              Mock(return_value=(1, '0/0'))):
                self.assertFalse(self.p.rewind_needed_and_possible(
                    self.leader))
            self.p.trigger_check_diverged_lsn()
            self.assertTrue(self.p.rewind_needed_and_possible(self.leader))

    @patch.object(MockCursor, 'fetchone',
                  Mock(side_effect=[(True, ), Exception]))
    def test_check_leader_is_not_in_recovery(self):
        self.p.check_leader_is_not_in_recovery()
        self.p.check_leader_is_not_in_recovery()

    @patch.object(Postgresql, 'checkpoint', side_effect=['', '1'])
    @patch.object(Postgresql, 'stop', Mock(return_value=False))
    @patch.object(Postgresql, 'start', Mock())
    def test_rewind(self, mock_checkpoint):
        self.p.rewind(self.leader)
        with patch.object(Postgresql, 'pg_rewind', Mock(return_value=False)):
            mock_checkpoint.side_effect = ['1', '', '', '']
            self.p.rewind(self.leader)
            self.p.rewind(self.leader)
            with patch.object(Postgresql, 'check_leader_is_not_in_recovery',
                              Mock(return_value=False)):
                self.p.rewind(self.leader)
            self.p.config['remove_data_directory_on_rewind_failure'] = False
            self.p.trigger_check_diverged_lsn()
            self.p.rewind(self.leader)
        with patch.object(Postgresql, 'is_running', Mock(return_value=True)):
            self.p.rewind(self.leader)
            self.p.is_leader = Mock(return_value=False)
            self.p.rewind(self.leader)

    @patch.object(Postgresql, 'is_running', Mock(return_value=False))
    @patch.object(Postgresql, 'start', Mock())
    def test_follow(self):
        self.p.follow(None)

    @patch('subprocess.check_output',
           Mock(return_value=0, side_effect=pg_controldata_string))
    def test_can_rewind(self):
        with patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with patch('subprocess.call', side_effect=OSError):
            self.assertFalse(self.p.can_rewind)
        with patch.object(Postgresql, 'controldata',
                          Mock(return_value={'wal_log_hints setting': 'on'})):
            self.assertTrue(self.p.can_rewind)
        self.p.config['use_pg_rewind'] = False
        self.assertFalse(self.p.can_rewind)

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError)
        with patch('subprocess.call', Mock(side_effect=[1, 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)
        with patch('subprocess.call', Mock(side_effect=[Exception(), 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        with patch('subprocess.call', Mock(return_value=0)):
            self.assertEquals(self.p.create_replica(self.leader), 0)
            del self.p.config['wale']
            self.assertEquals(self.p.create_replica(self.leader), 0)

        with patch('subprocess.call', Mock(side_effect=Exception("foo"))):
            self.assertEquals(self.p.create_replica(self.leader), 1)

        with patch('subprocess.call', Mock(return_value=1)):
            self.assertEquals(self.p.create_replica(self.leader), 1)

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, None, self.leader, 0,
                          [self.me, self.other, self.leadermem], None, None)
        with mock.patch('patroni.postgresql.Postgresql._query',
                        Mock(side_effect=psycopg2.OperationalError)):
            self.p.sync_replication_slots(cluster)
        self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.Postgresql.role',
                        new_callable=PropertyMock(return_value='replica')):
            self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.logger.error',
                        new_callable=Mock()) as errorlog_mock:
            self.p.query = Mock()
            alias1 = Member(
                0, 'test-3', 28, {
                    'conn_url':
                    'postgres://*****:*****@127.0.0.1:5436/postgres'
                })
            alias2 = Member(
                0, 'test.3', 28, {
                    'conn_url':
                    'postgres://*****:*****@127.0.0.1:5436/postgres'
                })
            cluster.members.extend([alias1, alias2])
            self.p.sync_replication_slots(cluster)
            errorlog_mock.assert_called_once()
            assert "test-3" in errorlog_mock.call_args[0][1]
            assert "test.3" in errorlog_mock.call_args[0][1]

    @patch.object(MockCursor, 'execute',
                  Mock(side_effect=psycopg2.OperationalError))
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query,
                          'RetryFailedError')
        self.assertRaises(psycopg2.ProgrammingError, self.p.query, 'blabla')

    @patch.object(Postgresql, 'pg_isready', Mock(return_value=STATE_REJECT))
    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())
        with patch.object(Postgresql, '_query',
                          Mock(side_effect=RetryFailedError(''))):
            self.assertRaises(PostgresConnectionException, self.p.is_leader)

    def test_reload(self):
        self.assertTrue(self.p.reload())

    @patch.object(Postgresql, 'is_running')
    def test_is_healthy(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.is_healthy())
        mock_is_running.return_value = False
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p.set_role('replica')
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '2')
        Thread(target=self.p.last_operation).start()

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('os.kill', Mock(side_effect=Exception))
    @patch('os.getpid', Mock(return_value=2))
    @patch('os.getppid', Mock(return_value=2))
    @patch.object(builtins, 'open', mock_open(read_data='-1'))
    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_is_running(self):
        self.assertFalse(self.p.is_running())

    @patch('shlex.split', Mock(side_effect=OSError))
    def test_call_nowait(self):
        self.p.set_role('replica')
        self.assertIsNone(self.p.call_nowait('on_start'))
        self.p.bootstrapping = True
        self.assertIsNone(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(
            side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError)):
            self.p.move_data_directory()

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertFalse(self.p.bootstrap({}))

        config = {
            'users': {
                'replicator': {
                    'password': '******',
                    'options': ['replication']
                }
            }
        }

        self.p.bootstrap(config)
        with open(os.path.join(self.config_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            self.assertTrue('host all all 0.0.0.0/0 md5\n' in lines)

        self.p.config.pop('pg_hba')
        config.update({
            'post_init':
            '/bin/false',
            'pg_hba': [
                'host replication replicator 127.0.0.1/32 md5',
                'hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'
            ]
        })
        self.p.bootstrap(config)
        with open(os.path.join(self.data_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            self.assertTrue(
                'host replication replicator 127.0.0.1/32 md5\n' in lines)

    def test_custom_bootstrap(self):
        config = {'method': 'foo', 'foo': {'command': 'bar'}}
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertFalse(self.p.bootstrap(config))
        with patch('subprocess.call', Mock(side_effect=Exception)):
            self.assertFalse(self.p.bootstrap(config))
        with patch('subprocess.call', Mock(return_value=0)),\
                patch('subprocess.Popen', Mock(side_effect=Exception("42"))),\
                patch('os.path.isfile', Mock(return_value=True)),\
                patch('os.unlink', Mock()),\
                patch.object(Postgresql, 'save_configuration_files', Mock()),\
                patch.object(Postgresql, 'restore_configuration_files', Mock()),\
                patch.object(Postgresql, 'write_recovery_conf', Mock()):
            with self.assertRaises(Exception) as e:
                self.p.bootstrap(config)
            self.assertEqual(str(e.exception), '42')

            config['foo']['recovery_conf'] = {'foo': 'bar'}

            with self.assertRaises(Exception) as e:
                self.p.bootstrap(config)
            self.assertEqual(str(e.exception), '42')

    @patch('time.sleep', Mock())
    @patch('os.unlink', Mock())
    @patch.object(Postgresql, 'run_bootstrap_post_init',
                  Mock(return_value=True))
    @patch.object(Postgresql, '_custom_bootstrap', Mock(return_value=True))
    @patch.object(Postgresql, 'start', Mock(return_value=True))
    def test_post_bootstrap(self):
        config = {'method': 'foo', 'foo': {'command': 'bar'}}
        self.p.bootstrap(config)

        task = CriticalTask()
        with patch.object(Postgresql, 'create_or_update_role',
                          Mock(side_effect=Exception)):
            self.p.post_bootstrap({}, task)
            self.assertFalse(task.result)

        self.p.config.pop('pg_hba')
        self.p.post_bootstrap({}, task)
        self.assertTrue(task.result)

        self.p.bootstrap(config)
        self.p.set_state('stopped')
        self.p.reload_config({
            'authentication': {
                'superuser': {
                    'username': '******',
                    'password': '******'
                },
                'replication': {
                    'username': '******',
                    'password': '******'
                }
            },
            'listen': '*',
            'retry_timeout': 10,
            'parameters': {
                'hba_file': 'foo'
            }
        })
        with patch.object(Postgresql, 'restart', Mock()) as mock_restart:
            self.p.post_bootstrap({}, task)
            mock_restart.assert_called_once()

    def test_run_bootstrap_post_init(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertFalse(
                self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        with patch('subprocess.call', Mock(side_effect=OSError)):
            self.assertFalse(
                self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        with patch('subprocess.call', Mock(return_value=0)) as mock_method:
            self.p._superuser.pop('username')
            self.assertTrue(
                self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))
            mock_method.assert_called()
            args, kwargs = mock_method.call_args
            self.assertTrue('PGPASSFILE' in kwargs['env'])
            self.assertEquals(
                args[0], ['/bin/false', 'postgres://127.0.0.2:5432/postgres'])

            mock_method.reset_mock()
            self.p._local_address.pop('host')
            self.assertTrue(
                self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))
            mock_method.assert_called()
            self.assertEquals(mock_method.call_args[0][0],
                              ['/bin/false', 'postgres://:5432/postgres'])

    @patch('patroni.postgresql.Postgresql.create_replica',
           Mock(return_value=0))
    def test_clone(self):
        self.p.clone(self.leader)

    @patch('os.listdir', Mock(return_value=['recovery.conf']))
    @patch('os.path.exists', Mock(return_value=True))
    def test_get_postgres_role_from_data_directory(self):
        self.assertEquals(self.p.get_postgres_role_from_data_directory(),
                          'replica')

    def test_remove_data_directory(self):
        self.p.remove_data_directory()
        open(self.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.data_dir)
        with patch('os.unlink', Mock(side_effect=OSError)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('patroni.postgresql.Postgresql._version_file_exists',
           Mock(return_value=True))
    def test_controldata(self):
        with patch('subprocess.check_output',
                   Mock(return_value=0, side_effect=pg_controldata_string)):
            data = self.p.controldata()
            self.assertEquals(len(data), 50)
            self.assertEquals(data['Database cluster state'],
                              'shut down in recovery')
            self.assertEquals(data['wal_log_hints setting'], 'on')
            self.assertEquals(int(data['Database block size']), 8192)

        with patch('subprocess.check_output',
                   Mock(side_effect=subprocess.CalledProcessError(1, ''))):
            self.assertEquals(self.p.controldata(), {})

    @patch('patroni.postgresql.Postgresql._version_file_exists',
           Mock(return_value=True))
    @patch('subprocess.check_output',
           MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_save_configuration_files(self):
        self.p.save_configuration_files()

    @patch('os.path.isfile', Mock(side_effect=[False, True]))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_restore_configuration_files(self):
        self.p.restore_configuration_files()

    def test_can_create_replica_without_replication_connection(self):
        self.p.config['create_replica_method'] = []
        self.assertFalse(
            self.p.can_create_replica_without_replication_connection())
        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo', 'no_master': 1}
        self.assertTrue(
            self.p.can_create_replica_without_replication_connection())

    def test_replica_method_can_work_without_replication_connection(self):
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'basebackup'))
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'foobar'))
        self.p.config['foo'] = {'command': 'bar', 'no_master': 1}
        self.assertTrue(
            self.p.replica_method_can_work_without_replication_connection(
                'foo'))
        self.p.config['foo'] = {'command': 'bar'}
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'foo'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_reload_config(self):
        parameters = self._PARAMETERS.copy()
        parameters.pop('f.oo')
        config = {
            'pg_hba': [''],
            'use_unix_socket': True,
            'authentication': {},
            'retry_timeout': 10,
            'listen': '*',
            'parameters': parameters
        }
        self.p.reload_config(config)
        parameters['b.ar'] = 'bar'
        self.p.reload_config(config)
        parameters['autovacuum'] = 'on'
        self.p.reload_config(config)
        parameters['autovacuum'] = 'off'
        parameters.pop('search_path')
        config['listen'] = '*:5433'
        self.p.reload_config(config)
        parameters['unix_socket_directories'] = '.'
        self.p.reload_config(config)
        self.p.resolve_connection_addresses()

    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_get_major_version(self):
        with patch.object(builtins, 'open', mock_open(read_data='9.4')):
            self.assertEquals(self.p.get_major_version(), 90400)
        with patch.object(builtins, 'open', Mock(side_effect=Exception)):
            self.assertEquals(self.p.get_major_version(), 0)

    def test_postmaster_start_time(self):
        with patch.object(
                MockCursor, "fetchone",
                Mock(return_value=('foo', True, '', '', '', '', False))):
            self.assertEqual(self.p.postmaster_start_time(), 'foo')
        with patch.object(MockCursor, "execute", side_effect=psycopg2.Error):
            self.assertIsNone(self.p.postmaster_start_time())

    def test_check_for_startup(self):
        with patch('subprocess.call', return_value=0):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=1):
            self.p._state = 'starting'
            self.assertTrue(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'starting')

        with patch('subprocess.call', return_value=2):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'start failed')

        with patch('subprocess.call', return_value=0):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=127):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

    def test_wait_for_startup(self):
        state = {'sleeps': 0, 'num_rejects': 0, 'final_return': 0}

        def increment_sleeps(*args):
            print("Sleep")
            state['sleeps'] += 1

        def isready_return(*args):
            ret = 1 if state['sleeps'] < state['num_rejects'] else state[
                'final_return']
            print("Isready {0} {1}".format(ret, state))
            return ret

        def time_in_state(*args):
            return state['sleeps']

        with patch('subprocess.call', side_effect=isready_return):
            with patch('time.sleep', side_effect=increment_sleeps):
                self.p.time_in_state = Mock(side_effect=time_in_state)

                self.p._state = 'stopped'
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 0)

                self.p._state = 'starting'
                state['num_rejects'] = 5
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 5)

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 2
                self.assertFalse(self.p.wait_for_startup())

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 0
                self.assertFalse(self.p.wait_for_startup(timeout=2))
                self.assertEquals(state['sleeps'], 3)

    def test_read_pid_file(self):
        pidfile = os.path.join(self.data_dir, 'postmaster.pid')
        if os.path.exists(pidfile):
            os.remove(pidfile)
        self.assertEquals(self.p.read_pid_file(), {})

    @patch('os.kill')
    def test_is_pid_running(self, mock_kill):
        mock_kill.return_value = True
        self.assertTrue(self.p.is_pid_running(-100))
        self.assertFalse(self.p.is_pid_running(0))
        self.assertFalse(self.p.is_pid_running(None))

    def test_pick_sync_standby(self):
        cluster = Cluster(True, None, self.leader, 0,
                          [self.me, self.other, self.leadermem], None,
                          SyncState(0, self.me.name, self.leadermem.name))

        with patch.object(Postgresql,
                          "query",
                          return_value=[
                              (self.leadermem.name, 'streaming', 'sync'),
                              (self.me.name, 'streaming', 'async'),
                              (self.other.name, 'streaming', 'async'),
                          ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (self.leadermem.name, True))

        with patch.object(Postgresql,
                          "query",
                          return_value=[
                              (self.me.name, 'streaming', 'async'),
                              (self.leadermem.name, 'streaming', 'potential'),
                              (self.other.name, 'streaming', 'async'),
                          ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (self.leadermem.name, False))

        with patch.object(Postgresql,
                          "query",
                          return_value=[
                              (self.me.name, 'streaming', 'async'),
                              (self.other.name, 'streaming', 'async'),
                          ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (self.me.name, False))

        with patch.object(Postgresql,
                          "query",
                          return_value=[
                              ('missing', 'streaming', 'sync'),
                              (self.me.name, 'streaming', 'async'),
                              (self.other.name, 'streaming', 'async'),
                          ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (self.me.name, False))

        with patch.object(Postgresql, "query", return_value=[]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (None, False))

    def test_set_sync_standby(self):
        def value_in_conf():
            with open(os.path.join(self.data_dir, 'postgresql.conf')) as f:
                for line in f:
                    if line.startswith('synchronous_standby_names'):
                        return line.strip()

        mock_reload = self.p.reload = Mock()
        self.p.set_synchronous_standby('n1')
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")
        mock_reload.assert_called()

        mock_reload.reset_mock()
        self.p.set_synchronous_standby('n1')
        mock_reload.assert_not_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")

        self.p.set_synchronous_standby('n2')
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n2'")

        mock_reload.reset_mock()
        self.p.set_synchronous_standby(None)
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), None)

    def test_get_server_parameters(self):
        config = {
            'synchronous_mode': True,
            'parameters': {
                'wal_level': 'hot_standby'
            },
            'listen': '0'
        }
        self.p.get_server_parameters(config)
        config['synchronous_mode_strict'] = True
        self.p.get_server_parameters(config)
        self.p.set_synchronous_standby('foo')
        self.p.get_server_parameters(config)

    @patch.object(Postgresql, 'read_pid_file', Mock(return_value={'pid': 'z'}))
    def test_get_pid(self):
        self.p.get_pid()

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    @patch.object(Postgresql, '_signal_postmaster_stop',
                  Mock(return_value=(123, None)))
    @patch.object(Postgresql, 'get_pid', Mock(return_value=123))
    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'is_pid_running')
    def test__wait_for_connection_close(self, mock_is_pid_running):
        mock_is_pid_running.side_effect = [True, False, False]
        mock_callback = Mock()
        self.p.stop(on_safepoint=mock_callback)

        mock_is_pid_running.side_effect = [True, False, False]
        with patch.object(MockCursor, "execute",
                          Mock(side_effect=psycopg2.Error)):
            self.p.stop(on_safepoint=mock_callback)

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    @patch.object(Postgresql, '_signal_postmaster_stop',
                  Mock(return_value=(123, None)))
    @patch.object(Postgresql, 'get_pid', Mock(return_value=123))
    @patch.object(Postgresql, 'is_pid_running', Mock(return_value=False))
    @patch('psutil.Process')
    def test__wait_for_user_backends_to_close(self, mock_psutil):
        child = Mock()
        child.cmdline.return_value = ['foo']
        mock_psutil.return_value.children.return_value = [child]
        mock_callback = Mock()
        self.p.stop(on_safepoint=mock_callback)

    @patch('os.kill', Mock(side_effect=[OSError(errno.ESRCH, ''), OSError]))
    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'is_pid_running',
                  Mock(side_effect=[True, False]))
    def test_terminate_starting_postmaster(self):
        self.p.terminate_starting_postmaster(123)
        self.p.terminate_starting_postmaster(123)
Пример #29
0
class TestPostgresql(unittest.TestCase):
    _PARAMETERS = {'wal_level': 'hot_standby', 'max_replication_slots': 5, 'f.oo': 'bar',
                   'search_path': 'public', 'hot_standby': 'on', 'max_wal_senders': 5,
                   'wal_keep_segments': 8, 'wal_log_hints': 'on', 'max_locks_per_transaction': 64,
                   'max_worker_processes': 8, 'max_connections': 100, 'max_prepared_transactions': 0}

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('os.rename', Mock())
    @patch.object(Postgresql, 'get_major_version', Mock(return_value=9.4))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def setUp(self):
        self.data_dir = 'data/test0'
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': self.data_dir, 'retry_timeout': 10,
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'authentication': {'superuser': {'username': '******', 'password': '******'},
                                                'replication': {'username': '******', 'password': '******'}},
                             'remove_data_directory_on_rewind_failure': True,
                             'use_pg_rewind': True, 'pg_ctl_timeout': 'bla',
                             'parameters': self._PARAMETERS,
                             'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres',
                            'tags': {'replicatefrom': 'leader'}})
        self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})

    def tearDown(self):
        shutil.rmtree('data')

    def test_get_initdb_options(self):
        self.assertEquals(self.p.get_initdb_options([{'encoding': 'UTF8'}, 'data-checksums']),
                          ['--encoding=UTF8', '--data-checksums'])
        self.assertRaises(Exception, self.p.get_initdb_options, [{'pgdata': 'bar'}])
        self.assertRaises(Exception, self.p.get_initdb_options, [{'foo': 'bar', 1: 2}])
        self.assertRaises(Exception, self.p.get_initdb_options, [1])

    @patch('os.path.exists', Mock(return_value=True))
    @patch('os.unlink', Mock())
    def test_delete_trigger_file(self):
        self.p.delete_trigger_file()

    @patch.object(Postgresql, 'is_running')
    def test_start(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.start())
        mock_is_running.return_value = False
        open(os.path.join(self.data_dir, 'postmaster.pid'), 'w').close()
        pg_conf = os.path.join(self.data_dir, 'postgresql.conf')
        open(pg_conf, 'w').close()
        self.assertTrue(self.p.start())
        with open(pg_conf) as f:
            lines = f.readlines()
            self.assertTrue("f.oo = 'bar'\n" in lines)

    @patch.object(Postgresql, 'is_running')
    def test_stop(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.stop())
        with patch('subprocess.call', Mock(return_value=1)):
            mock_is_running.return_value = False
            self.assertTrue(self.p.stop())

    def test_restart(self):
        self.p.start = false
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******', 'password': '******'})

    def test_checkpoint(self):
        with patch.object(MockCursor, 'fetchone', Mock(return_value=(True, ))):
            self.assertEquals(self.p.checkpoint({'user': '******'}), 'is_in_recovery=true')
        with patch.object(MockCursor, 'execute', Mock(return_value=None)):
            self.assertIsNone(self.p.checkpoint())
        self.assertEquals(self.p.checkpoint(), 'not accessible or not healty')

    @patch('subprocess.call', side_effect=OSError)
    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_call):
        r = {'user': '', 'host': '', 'port': '', 'database': '', 'password': ''}
        self.assertTrue(self.p.rewind(r))
        subprocess.call = mock_call
        self.assertFalse(self.p.rewind(r))

    @patch('os.unlink', Mock(return_value=True))
    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    @patch.object(Postgresql, 'single_user_mode', Mock(return_value=1))
    @patch.object(Postgresql, 'write_pgpass', Mock(return_value={}))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, 'rewind', return_value=False)
    def test_follow(self, mock_pg_rewind):
        with patch.object(Postgresql, 'check_recovery_conf', Mock(return_value=True)):
            self.assertTrue(self.p.follow(None, None))  # nothing to do, recovery.conf has good primary_conninfo

        self.p.follow(self.me, self.me)  # follow is called when the node is holding leader lock

        with patch.object(Postgresql, 'restart', Mock(return_value=False)):
            self.p.set_role('replica')
            self.p.follow(None, None)  # restart without rewind
            self.p.set_role('master')

        with patch.object(Postgresql, 'stop', Mock(return_value=False)):
            self.p.follow(self.leader, self.leader)  # failed to stop postgres

        self.p.follow(self.leader, None)  # Leader unknown, can not rewind

        self.p.follow(self.leader, self.leader)  # "leader" is not accessible or is_in_recovery

        with patch.object(Postgresql, 'checkpoint', Mock(return_value=None)):
            self.p.follow(self.leader, self.leader)
            self.p.set_role('master')
            mock_pg_rewind.return_value = True
            self.p.follow(self.leader, self.leader)

        self.p.follow(None, None)  # check_recovery_conf...

    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    def test_can_rewind(self):
        with patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with patch('subprocess.call', side_effect=OSError):
            self.assertFalse(self.p.can_rewind)
        with patch.object(Postgresql, 'controldata', Mock(return_value={'wal_log_hints setting': 'on'})):
            self.assertTrue(self.p.can_rewind)
        self.p.config['use_pg_rewind'] = False
        self.assertFalse(self.p.can_rewind)

    @patch('time.sleep', Mock())
    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError)
        with patch('subprocess.call', Mock(side_effect=[1, 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)
        with patch('subprocess.call', Mock(side_effect=[Exception(), 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        with patch('subprocess.call', Mock(return_value=0)):
            self.assertEquals(self.p.create_replica(self.leader), 0)
            del self.p.config['wale']
            self.assertEquals(self.p.create_replica(self.leader), 0)

        with patch('subprocess.call', Mock(side_effect=Exception("foo"))):
            self.assertEquals(self.p.create_replica(self.leader), 1)

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, None, self.leader, 0, [self.me, self.other, self.leadermem], None)
        self.p.sync_replication_slots(cluster)
        self.p.query = Mock(side_effect=psycopg2.OperationalError)
        self.p.schedule_load_slots = True
        self.p.sync_replication_slots(cluster)
        self.p.schedule_load_slots = False
        with mock.patch('patroni.postgresql.Postgresql.role', new_callable=PropertyMock(return_value='replica')):
            self.p.sync_replication_slots(cluster)

    @patch.object(MockConnect, 'closed', 2)
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())

    def test_reload(self):
        self.assertTrue(self.p.reload())

    @patch.object(Postgresql, 'is_running')
    def test_is_healthy(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.is_healthy())
        mock_is_running.return_value = False
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p._role = 'replica'
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('os.kill', Mock(side_effect=Exception))
    @patch('os.getpid', Mock(return_value=2))
    @patch('os.getppid', Mock(return_value=2))
    @patch.object(builtins, 'open', mock_open(read_data='-1'))
    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_is_running(self):
        self.assertFalse(self.p.is_running())

    @patch('subprocess.Popen', Mock(side_effect=OSError))
    def test_call_nowait(self):
        self.assertFalse(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    def test_check_replication_lag(self):
        self.assertTrue(self.p.check_replication_lag(0))

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError)):
            self.p.move_data_directory()

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertRaises(PostgresException, self.p.bootstrap, {})

        self.p.bootstrap({'users': {'replicator': {'password': '******', 'options': ['replication']}},
                          'pg_hba': ['host replication replicator 127.0.0.1/32 md5',
                                     'hostssl all all 0.0.0.0/0 md5',
                                     'host all all 0.0.0.0/0 md5']})
        with open(os.path.join(self.data_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            assert 'host replication replicator 127.0.0.1/32 md5\n' in lines
            assert 'host all all 0.0.0.0/0 md5\n' in lines

    @patch('patroni.postgresql.Postgresql.create_replica', Mock(return_value=0))
    def test_clone(self):
        self.p.clone(self.leader)

    @patch('os.listdir', Mock(return_value=['recovery.conf']))
    @patch('os.path.exists', Mock(return_value=True))
    def test_get_postgres_role_from_data_directory(self):
        self.assertEquals(self.p.get_postgres_role_from_data_directory(), 'replica')

    def test_remove_data_directory(self):
        self.p.remove_data_directory()
        open(self.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.data_dir)
        with patch('os.unlink', Mock(side_effect=OSError)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('patroni.postgresql.Postgresql._version_file_exists', Mock(return_value=True))
    def test_controldata(self):
        with patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string)):
            data = self.p.controldata()
            self.assertEquals(len(data), 50)
            self.assertEquals(data['Database cluster state'], 'shut down in recovery')
            self.assertEquals(data['wal_log_hints setting'], 'on')
            self.assertEquals(int(data['Database block size']), 8192)

        with patch('subprocess.check_output', Mock(side_effect=subprocess.CalledProcessError(1, ''))):
            self.assertEquals(self.p.controldata(), {})

    def test_read_postmaster_opts(self):
        m = mock_open(read_data=postmaster_opts_string())
        with patch.object(builtins, 'open', m):
            data = self.p.read_postmaster_opts()
            self.assertEquals(data['wal_level'], 'hot_standby')
            self.assertEquals(int(data['max_replication_slots']), 5)
            self.assertEqual(data.get('D'), None)

            m.side_effect = IOError
            data = self.p.read_postmaster_opts()
            self.assertEqual(data, dict())

    @patch('subprocess.Popen')
    @patch.object(builtins, 'open', MagicMock(return_value=42))
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.data_dir,
                                                      '-c', 'archive_command=false', '-c', 'archive_mode=on',
                                                       'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.reset_mock()
        self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.data_dir,
                                                      'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.return_value = None
        self.assertEquals(self.p.single_user_mode(), 1)

    @patch('os.listdir', MagicMock(side_effect=fake_listdir))
    @patch('os.unlink', return_value=True)
    @patch('os.remove', return_value=True)
    @patch('os.path.islink', return_value=False)
    @patch('os.path.isfile', return_value=True)
    def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink):
        ap = os.path.join(self.data_dir, 'pg_xlog', 'archive_status/')
        self.p.cleanup_archive_status()
        mock_remove.assert_has_calls([mock.call(ap + 'a'), mock.call(ap + 'b'), mock.call(ap + 'c')])
        mock_unlink.assert_not_called()

        mock_remove.reset_mock()

        mock_file.return_value = False
        mock_link.return_value = True
        self.p.cleanup_archive_status()
        mock_unlink.assert_has_calls([mock.call(ap + 'a'), mock.call(ap + 'b'), mock.call(ap + 'c')])
        mock_remove.assert_not_called()

        mock_unlink.reset_mock()
        mock_remove.reset_mock()

        mock_file.side_effect = OSError
        mock_link.side_effect = OSError
        self.p.cleanup_archive_status()
        mock_unlink.assert_not_called()
        mock_remove.assert_not_called()

    @patch('patroni.postgresql.Postgresql._version_file_exists', Mock(return_value=True))
    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_save_configuration_files(self):
        self.p.save_configuration_files()

    @patch('os.path.isfile', Mock(side_effect=[False, True]))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_restore_configuration_files(self):
        self.p.restore_configuration_files()

    def test_can_create_replica_without_replication_connection(self):
        self.p.config['create_replica_method'] = []
        self.assertFalse(self.p.can_create_replica_without_replication_connection())
        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo', 'no_master': 1}
        self.assertTrue(self.p.can_create_replica_without_replication_connection())

    def test_replica_method_can_work_without_replication_connection(self):
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('basebackup'))
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foobar'))
        self.p.config['foo'] = {'command': 'bar', 'no_master': 1}
        self.assertTrue(self.p.replica_method_can_work_without_replication_connection('foo'))
        self.p.config['foo'] = {'command': 'bar'}
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foo'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_reload_config(self):
        parameters = self._PARAMETERS.copy()
        parameters.pop('f.oo')
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['b.ar'] = 'bar'
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['autovacuum'] = 'on'
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['autovacuum'] = 'off'
        parameters.pop('search_path')
        self.p.reload_config({'retry_timeout': 10, 'listen': '*:5433', 'parameters': parameters})

    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_get_major_version(self):
        with patch.object(builtins, 'open', mock_open(read_data='9.4')):
            self.assertEquals(self.p.get_major_version(), 9.4)
        with patch.object(builtins, 'open', Mock(side_effect=Exception)):
            self.assertEquals(self.p.get_major_version(), 0.0)

    def test_postmaster_start_time(self):
        with patch.object(MockCursor, "fetchone", Mock(return_value=('foo', True, '', '', '', '', False))):
            self.assertEqual(self.p.postmaster_start_time(), 'foo')
        with patch.object(MockCursor, "execute", side_effect=psycopg2.Error):
            self.assertIsNone(self.p.postmaster_start_time())
Пример #30
0
class Patroni(object):
    def __init__(self, conf):
        from patroni.api import RestApiServer
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.log import PatroniLogger
        from patroni.postgresql import Postgresql
        from patroni.request import PatroniRequest
        from patroni.watchdog import Watchdog

        self.setup_signal_handlers()

        self.version = __version__
        self.logger = PatroniLogger()
        self.config = conf
        self.logger.reload_config(self.config.get('log', {}))
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.request = PatroniRequest(self.config, True)
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                #TODO: 动态参数 cluster.config 有哪些?如何设置?
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')
                time.sleep(5)

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self, sighup=False):
        try:
            self.tags = self.get_tags()
            self.logger.reload_config(self.config.get('log', {}))
            self.watchdog.reload_config(self.config)
            if sighup:
                self.request.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'], sighup)
            self.dcs.reload_config(self.config)
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        with self._sigterm_lock:
            if not self._received_sigterm:
                self._received_sigterm = True
                sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    @property
    def received_sigterm(self):
        with self._sigterm_lock:
            return self._received_sigterm

    def run(self):
        # SGS: 启动API
        self.api.start()
        # SGS: 启动日志记录
        self.logger.start()
        self.next_run = time.time()

        while not self.received_sigterm:
            # SGS: 是否收到SIGHUP信号 如何触发这个信息,直接用kill命令下发?
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config(True)
                else:
                    self.postgresql.config.reload_config(
                        self.config['postgresql'], True)
            # SGS: 定时检查本patroni关联的member的状态并做响应处理,具体见 run_cycle 函数里的注释
            logger.info(self.ha.run_cycle())

            if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                    and self.config.set_dynamic_configuration(self.dcs.cluster.config):
                self.reload_config()

            if self.postgresql.role != 'uninitialized':
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        from threading import Lock

        self._received_sighup = False
        self._sigterm_lock = Lock()
        self._received_sigterm = False
        if os.name != 'nt':
            signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)

    def shutdown(self):
        with self._sigterm_lock:
            self._received_sigterm = True
        try:
            self.api.shutdown()
        except Exception:
            logger.exception('Exception during RestApi.shutdown')
        try:
            self.ha.shutdown()
        except Exception:
            logger.exception('Exception during Ha.shutdown')
        self.logger.shutdown()
Пример #31
0
class TestHa(unittest.TestCase):

    @patch('socket.getaddrinfo', socket_getaddrinfo)
    @patch.object(etcd.Client, 'read', etcd_read)
    def setUp(self):
        with patch.object(etcd.Client, 'machines') as mock_machines:
            mock_machines.__get__ = Mock(return_value=['http://*****:*****@patch('sys.exit', return_value=1)
    @patch('patroni.ha.Ha.sysid_valid', MagicMock(return_value=True))
    def test_sysid_no_match(self, exit_mock):
        self.ha.run_cycle()
        exit_mock.assert_called_once_with(1)

    @patch.object(Cluster, 'is_unlocked', Mock(return_value=False))
    def test_start_as_readonly(self):
        self.p.is_leader = false
        self.p.is_healthy = true
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader because i had the session lock')

    def test_acquire_lock_as_master(self):
        self.assertEquals(self.ha.run_cycle(), 'acquired session lock as a leader')

    def test_promoted_by_acquiring_lock(self):
        self.ha.is_healthiest_node = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')

    def test_demote_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.assertEquals(self.ha.run_cycle(), 'demoted self after trying and failing to obtain lock')

    def test_follow_new_leader_after_failing_to_obtain_lock(self):
        self.ha.is_healthiest_node = true
        self.ha.acquire_lock = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'following new leader after trying and failing to obtain lock')

    def test_demote_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.assertEquals(self.ha.run_cycle(), 'demoting self because i am not the healthiest node')

    def test_follow_new_leader_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')

    def test_promote_because_have_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader because i had the session lock')

    def test_leader_with_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

    def test_demote_because_not_having_lock(self):
        self.ha.cluster.is_unlocked = false
        self.assertEquals(self.ha.run_cycle(), 'demoting self because i do not have the lock and i was a leader')

    def test_demote_because_update_lock_failed(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.ha.update_lock = false
        self.assertEquals(self.ha.run_cycle(), 'demoting self because i do not have the lock and i was a leader')

    def test_follow(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')
        self.ha.patroni.replicatefrom = "foo"
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')

    def test_no_etcd_connection_master_demote(self):
        self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly'))
        self.assertEquals(self.ha.run_cycle(), 'demoted self because DCS is not accessible and i was a leader')

    def test_bootstrap_from_another_member(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.bootstrap(), 'trying to bootstrap from replica \'other\'')

    def test_bootstrap_waiting_for_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(), 'waiting for leader to bootstrap')

    def test_bootstrap_without_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.p.can_create_replica_without_replication_connection = MagicMock(return_value=True)
        self.assertEquals(self.ha.bootstrap(), 'trying to bootstrap (without leader)')

    def test_bootstrap_initialize_lock_failed(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(), 'failed to acquire initialize lock')

    def test_bootstrap_initialized_new_cluster(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.assertEquals(self.ha.bootstrap(), 'initialized a new cluster')

    def test_bootstrap_release_initialize_key_on_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.p.bootstrap = Mock(side_effect=PostgresException("Could not bootstrap master PostgreSQL"))
        self.assertRaises(PostgresException, self.ha.bootstrap)

    def test_reinitialize(self):
        self.ha.schedule_reinitialize()
        self.ha.schedule_reinitialize()
        self.ha.run_cycle()
        self.assertIsNone(self.ha._async_executor.scheduled_action)

        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.has_lock = true
        self.ha.schedule_reinitialize()
        self.ha.run_cycle()
        self.assertIsNone(self.ha._async_executor.scheduled_action)

        self.ha.has_lock = false
        self.ha.schedule_reinitialize()
        self.ha.run_cycle()

    def test_restart(self):
        self.assertEquals(self.ha.restart(), (True, 'restarted successfully'))
        self.p.restart = false
        self.assertEquals(self.ha.restart(), (False, 'restart failed'))
        self.ha.schedule_reinitialize()
        self.assertEquals(self.ha.restart(), (False, 'reinitialize already in progress'))

    def test_restart_in_progress(self):
        self.ha._async_executor.schedule('restart', True)
        self.assertTrue(self.ha.restart_scheduled())
        self.assertEquals(self.ha.run_cycle(), 'not healthy enough for leader race')

        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'restart in progress')

        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(), 'updated leader lock during restart')

        self.ha.update_lock = false
        self.assertEquals(self.ha.run_cycle(), 'failed to update leader lock during restart')

    @patch('requests.get', requests_get)
    @patch('time.sleep', Mock())
    def test_manual_failover_from_leader(self):
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', '', None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', 'blabla', None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {'nofailover': 'True'})
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        # manual failover from the previous leader to us won't happen if we hold the nofailover flag
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

        # Failover scheduled time must include timezone
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.ha.run_cycle()

        scheduled = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=30)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=-600)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = None
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_process_no_leader(self):
        self.p.is_leader = false
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'leader', None))
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {})  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, self.p.name, '', None))
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')
        self.ha.fetch_node_status = lambda e: (e, False, True, 0, {})  # inaccessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        # set failover flag to True for all members of the cluster
        # this should elect the current member, as we are not going to call the API for it.
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'other', None))
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {'nofailover': 'True'})  # accessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        # same as previous, but set the current member to nofailover. In no case it should be elected as a leader
        self.ha.patroni.nofailover = True
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because I am not allowed to promote')

    def test_is_healthiest_node(self):
        self.ha.state_handler.is_leader = false
        self.ha.patroni.nofailover = False
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {})
        self.assertTrue(self.ha.is_healthiest_node())

    def test__is_healthiest_node(self):
        self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.p.is_leader = false
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {})  # accessible, in_recovery
        self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = lambda e: (e, True, False, 0, {})  # accessible, not in_recovery
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = lambda e: (e, True, True, 1, {})  # accessible, in_recovery, xlog location ahead
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.p.check_replication_lag = false
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = True
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = False

    @patch('requests.get', requests_get)
    def test_fetch_node_status(self):
        member = Member(0, 'test', 1, {'api_url': 'http://127.0.0.1:8011/patroni'})
        self.ha.fetch_node_status(member)
        member = Member(0, 'test', 1, {'api_url': 'http://localhost:8011/patroni'})
        self.ha.fetch_node_status(member)

    def test_post_recover(self):
        self.p.is_running = false
        self.ha.has_lock = true
        self.assertEqual(self.ha.post_recover(), 'removed leader key after trying and failing to start postgres')
        self.ha.has_lock = false
        self.assertEqual(self.ha.post_recover(), 'failed to start postgres')
        self.p.is_running = true
        self.assertIsNone(self.ha.post_recover())
Пример #32
0
 def test_create_connection_users(self):
     cfg = self.p.config
     cfg['superuser']['username'] = '******'
     p = Postgresql(cfg)
     p.create_connection_users()
Пример #33
0
class Patroni(object):

    def __init__(self):
        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    self.config.set_dynamic_configuration(cluster.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    self.config.set_dynamic_configuration(self.config['bootstrap']['dcs'])
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {tag: value for tag, value in self.config.get('tags', {}).items()
                if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value}

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s', self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            reap_children()
            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
        signal.signal(signal.SIGCHLD, sigchld_handler)
Пример #34
0
class TestPostgresql(unittest.TestCase):
    _PARAMETERS = {'wal_level': 'hot_standby', 'max_replication_slots': 5, 'f.oo': 'bar',
                   'search_path': 'public', 'hot_standby': 'on', 'max_wal_senders': 5,
                   'wal_keep_segments': 8, 'wal_log_hints': 'on', 'max_locks_per_transaction': 64,
                   'max_worker_processes': 8, 'max_connections': 100, 'max_prepared_transactions': 0,
                   'track_commit_timestamp': 'off'}

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('os.rename', Mock())
    @patch.object(Postgresql, 'get_major_version', Mock(return_value=9.6))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def setUp(self):
        self.data_dir = 'data/test0'
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': self.data_dir, 'retry_timeout': 10,
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'authentication': {'superuser': {'username': '******', 'password': '******'},
                                                'replication': {'username': '******', 'password': '******'}},
                             'remove_data_directory_on_rewind_failure': True,
                             'use_pg_rewind': True, 'pg_ctl_timeout': 'bla',
                             'parameters': self._PARAMETERS,
                             'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        self.p._callback_executor = Mock()
        self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(0, 'test-1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres',
                            'tags': {'replicatefrom': 'leader'}})
        self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})

    def tearDown(self):
        shutil.rmtree('data')

    def test_get_initdb_options(self):
        self.assertEquals(self.p.get_initdb_options([{'encoding': 'UTF8'}, 'data-checksums']),
                          ['--encoding=UTF8', '--data-checksums'])
        self.assertRaises(Exception, self.p.get_initdb_options, [{'pgdata': 'bar'}])
        self.assertRaises(Exception, self.p.get_initdb_options, [{'foo': 'bar', 1: 2}])
        self.assertRaises(Exception, self.p.get_initdb_options, [1])

    @patch('os.path.exists', Mock(return_value=True))
    @patch('os.unlink', Mock())
    def test_delete_trigger_file(self):
        self.p.delete_trigger_file()

    @patch('subprocess.Popen')
    @patch.object(Postgresql, 'wait_for_startup')
    @patch.object(Postgresql, 'wait_for_port_open')
    @patch.object(Postgresql, 'is_running')
    def test_start(self, mock_is_running, mock_wait_for_port_open, mock_wait_for_startup, mock_popen):
        mock_is_running.return_value = True
        mock_wait_for_port_open.return_value = True
        mock_wait_for_startup.return_value = False
        mock_popen.stdout.readline.return_value = '123'
        self.assertTrue(self.p.start())
        mock_is_running.return_value = False
        open(os.path.join(self.data_dir, 'postmaster.pid'), 'w').close()
        pg_conf = os.path.join(self.data_dir, 'postgresql.conf')
        open(pg_conf, 'w').close()
        self.assertFalse(self.p.start())
        with open(pg_conf) as f:
            lines = f.readlines()
            self.assertTrue("f.oo = 'bar'\n" in lines)

        mock_wait_for_startup.return_value = None
        self.assertFalse(self.p.start(10))
        self.assertIsNone(self.p.start())

        mock_wait_for_port_open.return_value = False
        self.assertFalse(self.p.start())

    @patch.object(Postgresql, 'pg_isready')
    @patch.object(Postgresql, 'read_pid_file')
    @patch.object(Postgresql, 'is_pid_running')
    @patch('patroni.postgresql.polling_loop', Mock(return_value=range(1)))
    def test_wait_for_port_open(self, mock_is_pid_running, mock_read_pid_file, mock_pg_isready):
        mock_is_pid_running.return_value = False
        mock_pg_isready.return_value = STATE_NO_RESPONSE

        # No pid file and postmaster death
        mock_read_pid_file.return_value = {}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        mock_is_pid_running.return_value = True

        # timeout
        mock_read_pid_file.return_value = {'pid', 1}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # Garbage pid
        mock_read_pid_file.return_value = {'pid': 'garbage', 'start_time': '101', 'data_dir': '',
                                           'socket_dir': '', 'port': '', 'listen_addr': ''}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # Not ready
        mock_read_pid_file.return_value = {'pid': '42', 'start_time': '101', 'data_dir': '',
                                           'socket_dir': '', 'port': '', 'listen_addr': ''}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # pg_isready failure
        mock_pg_isready.return_value = 'garbage'
        self.assertTrue(self.p.wait_for_port_open(42, 100., 1))

    @patch.object(Postgresql, 'is_running')
    def test_stop(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.stop())
        with patch('subprocess.call', Mock(return_value=1)):
            mock_is_running.return_value = False
            self.assertTrue(self.p.stop())

    def test_restart(self):
        self.p.start = Mock(return_value=False)
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******'})
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******', 'password': '******'})

    def test_checkpoint(self):
        with patch.object(MockCursor, 'fetchone', Mock(return_value=(True, ))):
            self.assertEquals(self.p.checkpoint({'user': '******'}), 'is_in_recovery=true')
        with patch.object(MockCursor, 'execute', Mock(return_value=None)):
            self.assertIsNone(self.p.checkpoint())
        self.assertEquals(self.p.checkpoint(), 'not accessible or not healty')

    @patch('subprocess.call', side_effect=OSError)
    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_call):
        r = {'user': '', 'host': '', 'port': '', 'database': '', 'password': ''}
        self.assertTrue(self.p.rewind(r))
        subprocess.call = mock_call
        self.assertFalse(self.p.rewind(r))

    @patch('os.unlink', Mock(return_value=True))
    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    @patch.object(Postgresql, 'single_user_mode', Mock(return_value=1))
    @patch.object(Postgresql, 'write_pgpass', Mock(return_value={}))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, 'rewind', return_value=False)
    def test_follow(self, mock_pg_rewind):
        with patch.object(Postgresql, 'check_recovery_conf', Mock(return_value=True)):
            self.assertTrue(self.p.follow(None, None))  # nothing to do, recovery.conf has good primary_conninfo

        self.p.follow(self.me, self.me)  # follow is called when the node is holding leader lock

        with patch.object(Postgresql, 'restart', Mock(return_value=False)):
            self.p.set_role('replica')
            self.p.follow(None, None)  # restart without rewind

        with patch.object(Postgresql, 'stop', Mock(return_value=False)):
            self.p.follow(self.leader, self.leader, need_rewind=True)  # failed to stop postgres

        self.p.follow(self.leader, self.leader)  # "leader" is not accessible or is_in_recovery

        with patch.object(Postgresql, 'checkpoint', Mock(return_value=None)):
            self.p.follow(self.leader, self.leader)
            mock_pg_rewind.return_value = True
            self.p.follow(self.leader, self.leader, need_rewind=True)

        self.p.follow(None, None)  # check_recovery_conf...

    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    def test_can_rewind(self):
        with patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with patch('subprocess.call', side_effect=OSError):
            self.assertFalse(self.p.can_rewind)
        with patch.object(Postgresql, 'controldata', Mock(return_value={'wal_log_hints setting': 'on'})):
            self.assertTrue(self.p.can_rewind)
        self.p.config['use_pg_rewind'] = False
        self.assertFalse(self.p.can_rewind)

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError)
        with patch('subprocess.call', Mock(side_effect=[1, 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)
        with patch('subprocess.call', Mock(side_effect=[Exception(), 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        with patch('subprocess.call', Mock(return_value=0)):
            self.assertEquals(self.p.create_replica(self.leader), 0)
            del self.p.config['wale']
            self.assertEquals(self.p.create_replica(self.leader), 0)

        with patch('subprocess.call', Mock(side_effect=Exception("foo"))):
            self.assertEquals(self.p.create_replica(self.leader), 1)

        with patch('subprocess.call', Mock(return_value=1)):
            self.assertEquals(self.p.create_replica(self.leader), 1)

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, None, self.leader, 0, [self.me, self.other, self.leadermem], None, None)
        with mock.patch('patroni.postgresql.Postgresql._query', Mock(side_effect=psycopg2.OperationalError)):
            self.p.sync_replication_slots(cluster)
        self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.Postgresql.role', new_callable=PropertyMock(return_value='replica')):
            self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.logger.error', new_callable=Mock()) as errorlog_mock:
            self.p.query = Mock()
            alias1 = Member(0, 'test-3', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5436/postgres'})
            alias2 = Member(0, 'test.3', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5436/postgres'})
            cluster.members.extend([alias1, alias2])
            self.p.sync_replication_slots(cluster)
            errorlog_mock.assert_called_once()
            assert "test-3" in errorlog_mock.call_args[0][1]
            assert "test.3" in errorlog_mock.call_args[0][1]

    @patch.object(MockConnect, 'closed', 2)
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    @patch.object(Postgresql, 'pg_isready', Mock(return_value=STATE_REJECT))
    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())
        with patch.object(Postgresql, '_query', Mock(side_effect=RetryFailedError(''))):
            self.assertRaises(PostgresConnectionException, self.p.is_leader)

    def test_reload(self):
        self.assertTrue(self.p.reload())

    @patch.object(Postgresql, 'is_running')
    def test_is_healthy(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.is_healthy())
        mock_is_running.return_value = False
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p._role = 'replica'
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')
        Thread(target=self.p.last_operation).start()

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('os.kill', Mock(side_effect=Exception))
    @patch('os.getpid', Mock(return_value=2))
    @patch('os.getppid', Mock(return_value=2))
    @patch.object(builtins, 'open', mock_open(read_data='-1'))
    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_is_running(self):
        self.assertFalse(self.p.is_running())

    @patch('shlex.split', Mock(side_effect=OSError))
    def test_call_nowait(self):
        self.assertIsNone(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError)):
            self.p.move_data_directory()

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertRaises(PostgresException, self.p.bootstrap, {})

        with patch.object(Postgresql, 'run_bootstrap_post_init', Mock(return_value=False)):
            self.assertRaises(PostgresException, self.p.bootstrap, {})

        self.p.bootstrap({'users': {'replicator': {'password': '******', 'options': ['replication']}},
                          'pg_hba': ['host replication replicator 127.0.0.1/32 md5',
                                     'hostssl all all 0.0.0.0/0 md5',
                                     'host all all 0.0.0.0/0 md5'],
                          'post_init': '/bin/false'})
        with open(os.path.join(self.data_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            assert 'host replication replicator 127.0.0.1/32 md5\n' in lines
            assert 'host all all 0.0.0.0/0 md5\n' in lines

    def test_run_bootstrap_post_init(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertFalse(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        with patch('subprocess.call', Mock(side_effect=OSError)):
            self.assertFalse(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        with patch('subprocess.call', Mock(return_value=0)) as mock_method:
            self.p._superuser.pop('username')
            self.assertTrue(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        mock_method.assert_called()
        args, kwargs = mock_method.call_args
        assert 'PGPASSFILE' in kwargs['env'].keys()
        self.assertEquals(args[0], ['/bin/false', 'postgres://*****:*****@patch('patroni.postgresql.Postgresql.create_replica', Mock(return_value=0))
    def test_clone(self):
        self.p.clone(self.leader)

    @patch('os.listdir', Mock(return_value=['recovery.conf']))
    @patch('os.path.exists', Mock(return_value=True))
    def test_get_postgres_role_from_data_directory(self):
        self.assertEquals(self.p.get_postgres_role_from_data_directory(), 'replica')

    def test_remove_data_directory(self):
        self.p.remove_data_directory()
        open(self.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.data_dir)
        with patch('os.unlink', Mock(side_effect=OSError)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('patroni.postgresql.Postgresql._version_file_exists', Mock(return_value=True))
    def test_controldata(self):
        with patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string)):
            data = self.p.controldata()
            self.assertEquals(len(data), 50)
            self.assertEquals(data['Database cluster state'], 'shut down in recovery')
            self.assertEquals(data['wal_log_hints setting'], 'on')
            self.assertEquals(int(data['Database block size']), 8192)

        with patch('subprocess.check_output', Mock(side_effect=subprocess.CalledProcessError(1, ''))):
            self.assertEquals(self.p.controldata(), {})

    def test_read_postmaster_opts(self):
        m = mock_open(read_data=postmaster_opts_string())
        with patch.object(builtins, 'open', m):
            data = self.p.read_postmaster_opts()
            self.assertEquals(data['wal_level'], 'hot_standby')
            self.assertEquals(int(data['max_replication_slots']), 5)
            self.assertEqual(data.get('D'), None)

            m.side_effect = IOError
            data = self.p.read_postmaster_opts()
            self.assertEqual(data, dict())

    @patch('subprocess.Popen')
    @patch.object(builtins, 'open', MagicMock(return_value=42))
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.data_dir,
                                                      '-c', 'archive_command=false', '-c', 'archive_mode=on',
                                                       'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.reset_mock()
        self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.data_dir,
                                                      'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.return_value = None
        self.assertEquals(self.p.single_user_mode(), 1)

    @patch('os.listdir', MagicMock(side_effect=fake_listdir))
    @patch('os.unlink', return_value=True)
    @patch('os.remove', return_value=True)
    @patch('os.path.islink', return_value=False)
    @patch('os.path.isfile', return_value=True)
    def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink):
        ap = os.path.join(self.data_dir, 'pg_xlog', 'archive_status/')
        self.p.cleanup_archive_status()
        mock_remove.assert_has_calls([mock.call(ap + 'a'), mock.call(ap + 'b'), mock.call(ap + 'c')])
        mock_unlink.assert_not_called()

        mock_remove.reset_mock()

        mock_file.return_value = False
        mock_link.return_value = True
        self.p.cleanup_archive_status()
        mock_unlink.assert_has_calls([mock.call(ap + 'a'), mock.call(ap + 'b'), mock.call(ap + 'c')])
        mock_remove.assert_not_called()

        mock_unlink.reset_mock()
        mock_remove.reset_mock()

        mock_file.side_effect = OSError
        mock_link.side_effect = OSError
        self.p.cleanup_archive_status()
        mock_unlink.assert_not_called()
        mock_remove.assert_not_called()

    @patch('patroni.postgresql.Postgresql._version_file_exists', Mock(return_value=True))
    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_save_configuration_files(self):
        self.p.save_configuration_files()

    @patch('os.path.isfile', Mock(side_effect=[False, True]))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_restore_configuration_files(self):
        self.p.restore_configuration_files()

    def test_can_create_replica_without_replication_connection(self):
        self.p.config['create_replica_method'] = []
        self.assertFalse(self.p.can_create_replica_without_replication_connection())
        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo', 'no_master': 1}
        self.assertTrue(self.p.can_create_replica_without_replication_connection())

    def test_replica_method_can_work_without_replication_connection(self):
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('basebackup'))
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foobar'))
        self.p.config['foo'] = {'command': 'bar', 'no_master': 1}
        self.assertTrue(self.p.replica_method_can_work_without_replication_connection('foo'))
        self.p.config['foo'] = {'command': 'bar'}
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foo'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_reload_config(self):
        parameters = self._PARAMETERS.copy()
        parameters.pop('f.oo')
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['b.ar'] = 'bar'
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['autovacuum'] = 'on'
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['autovacuum'] = 'off'
        parameters.pop('search_path')
        self.p.reload_config({'retry_timeout': 10, 'listen': '*:5433', 'parameters': parameters})

    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_get_major_version(self):
        with patch.object(builtins, 'open', mock_open(read_data='9.4')):
            self.assertEquals(self.p.get_major_version(), 9.4)
        with patch.object(builtins, 'open', Mock(side_effect=Exception)):
            self.assertEquals(self.p.get_major_version(), 0.0)

    def test_postmaster_start_time(self):
        with patch.object(MockCursor, "fetchone", Mock(return_value=('foo', True, '', '', '', '', False))):
            self.assertEqual(self.p.postmaster_start_time(), 'foo')
        with patch.object(MockCursor, "execute", side_effect=psycopg2.Error):
            self.assertIsNone(self.p.postmaster_start_time())

    def test_check_for_startup(self):
        with patch('subprocess.call', return_value=0):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=1):
            self.p._state = 'starting'
            self.assertTrue(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'starting')

        with patch('subprocess.call', return_value=2):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'start failed')

        with patch('subprocess.call', return_value=0):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=127):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

    def test_wait_for_startup(self):
        state = {'sleeps': 0, 'num_rejects': 0, 'final_return': 0}

        def increment_sleeps(*args):
            print("Sleep")
            state['sleeps'] += 1

        def isready_return(*args):
            ret = 1 if state['sleeps'] < state['num_rejects'] else state['final_return']
            print("Isready {0} {1}".format(ret, state))
            return ret

        def time_in_state(*args):
            return state['sleeps']

        with patch('subprocess.call', side_effect=isready_return):
            with patch('time.sleep', side_effect=increment_sleeps):
                self.p.time_in_state = Mock(side_effect=time_in_state)

                self.p._state = 'stopped'
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 0)

                self.p._state = 'starting'
                state['num_rejects'] = 5
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 5)

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 2
                self.assertFalse(self.p.wait_for_startup())

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 0
                self.assertFalse(self.p.wait_for_startup(timeout=2))
                self.assertEquals(state['sleeps'], 3)

    def test_read_pid_file(self):
        pidfile = os.path.join(self.data_dir, 'postmaster.pid')
        if os.path.exists(pidfile):
            os.remove(pidfile)
        self.assertEquals(self.p.read_pid_file(), {})

    @patch('os.kill')
    def test_is_pid_running(self, mock_kill):
        mock_kill.return_value = True
        self.assertTrue(self.p.is_pid_running(-100))
        self.assertFalse(self.p.is_pid_running(0))
        self.assertFalse(self.p.is_pid_running(None))

    def test_pick_sync_standby(self):
        cluster = Cluster(True, None, self.leader, 0, [self.me, self.other, self.leadermem], None,
                          SyncState(0, self.me.name, self.leadermem.name))

        with patch.object(Postgresql, "query", return_value=[
                    (self.leadermem.name, 'streaming', 'sync'),
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.leadermem.name, True))

        with patch.object(Postgresql, "query", return_value=[
                    (self.me.name, 'streaming', 'async'),
                    (self.leadermem.name, 'streaming', 'potential'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.leadermem.name, False))

        with patch.object(Postgresql, "query", return_value=[
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.me.name, False))

        with patch.object(Postgresql, "query", return_value=[
                    ('missing', 'streaming', 'sync'),
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.me.name, False))

        with patch.object(Postgresql, "query", return_value=[]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (None, False))

    def test_set_sync_standby(self):
        def value_in_conf():
            with open(os.path.join(self.data_dir, 'postgresql.conf')) as f:
                for line in f:
                    if line.startswith('synchronous_standby_names'):
                        return line.strip()

        mock_reload = self.p.reload = Mock()
        self.p.set_synchronous_standby('n1')
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")
        mock_reload.assert_called()

        mock_reload.reset_mock()
        self.p.set_synchronous_standby('n1')
        mock_reload.assert_not_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")

        self.p.set_synchronous_standby('n2')
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n2'")

        mock_reload.reset_mock()
        self.p.set_synchronous_standby(None)
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), None)

    def test_get_server_parameters(self):
        config = {'synchronous_mode': True, 'parameters': {'wal_level': 'hot_standby'}, 'listen': '0'}
        self.p.get_server_parameters(config)
        self.p.set_synchronous_standby('foo')
        self.p.get_server_parameters(config)
Пример #35
0
class TestPostgresql(unittest.TestCase):
    _PARAMETERS = {
        'wal_level': 'hot_standby',
        'max_replication_slots': 5,
        'f.oo': 'bar',
        'search_path': 'public',
        'hot_standby': 'on',
        'max_wal_senders': 5,
        'wal_keep_segments': 8,
        'wal_log_hints': 'on',
        'max_locks_per_transaction': 64,
        'max_worker_processes': 8,
        'max_connections': 100,
        'max_prepared_transactions': 0,
        'track_commit_timestamp': 'off',
        'unix_socket_directories': '/tmp'
    }

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('os.rename', Mock())
    @patch.object(Postgresql, 'get_major_version', Mock(return_value=90600))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def setUp(self):
        self.data_dir = 'data/test0'
        self.config_dir = self.data_dir
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
        self.p = Postgresql({
            'name': 'test0',
            'scope': 'batman',
            'data_dir': self.data_dir,
            'config_dir': self.config_dir,
            'retry_timeout': 10,
            'pgpass': '******',
            'listen': '127.0.0.2, 127.0.0.3:5432',
            'connect_address': '127.0.0.2:5432',
            'authentication': {
                'superuser': {
                    'username': '******',
                    'password': '******'
                },
                'replication': {
                    'username': '******',
                    'password': '******'
                }
            },
            'remove_data_directory_on_rewind_failure': True,
            'use_pg_rewind': True,
            'pg_ctl_timeout': 'bla',
            'parameters': self._PARAMETERS,
            'recovery_conf': {
                'foo': 'bar'
            },
            'pg_hba': ['host all all 0.0.0.0/0 md5'],
            'callbacks': {
                'on_start': 'true',
                'on_stop': 'true',
                'on_reload': 'true',
                'on_restart': 'true',
                'on_role_change': 'true'
            }
        })
        self.p._callback_executor = Mock()
        self.leadermem = Member(0, 'leader', 28, {
            'conn_url':
            'postgres://*****:*****@127.0.0.1:5435/postgres'
        })
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(
            0, 'test-1', 28, {
                'conn_url':
                'postgres://*****:*****@127.0.0.1:5433/postgres',
                'tags': {
                    'replicatefrom': 'leader'
                }
            })
        self.me = Member(0, 'test0', 28, {
            'conn_url':
            'postgres://*****:*****@127.0.0.1:5434/postgres'
        })

    def tearDown(self):
        shutil.rmtree('data')

    def test__initdb(self):
        self.assertRaises(Exception, self.p.bootstrap,
                          {'initdb': [{
                              'pgdata': 'bar'
                          }]})
        self.assertRaises(Exception, self.p.bootstrap,
                          {'initdb': [{
                              'foo': 'bar',
                              1: 2
                          }]})
        self.assertRaises(Exception, self.p.bootstrap, {'initdb': [1]})
        self.assertRaises(Exception, self.p.bootstrap, {'initdb': 1})

    @patch('os.path.exists', Mock(return_value=True))
    @patch('os.unlink', Mock())
    def test_delete_trigger_file(self):
        self.p.delete_trigger_file()

    @patch('subprocess.Popen')
    @patch.object(Postgresql, 'wait_for_startup')
    @patch.object(Postgresql, 'wait_for_port_open')
    @patch.object(Postgresql, 'is_running')
    def test_start(self, mock_is_running, mock_wait_for_port_open,
                   mock_wait_for_startup, mock_popen):
        mock_is_running.return_value = MockPostmaster()
        mock_wait_for_port_open.return_value = True
        mock_wait_for_startup.return_value = False
        mock_popen.return_value.stdout.readline.return_value = '123'
        self.assertTrue(self.p.start())
        mock_is_running.return_value = None

        mock_postmaster = MockPostmaster()
        with patch.object(PostmasterProcess,
                          'start',
                          return_value=mock_postmaster):
            pg_conf = os.path.join(self.data_dir, 'postgresql.conf')
            open(pg_conf, 'w').close()
            self.assertFalse(self.p.start(task=CriticalTask()))

            with open(pg_conf) as f:
                lines = f.readlines()
                self.assertTrue("f.oo = 'bar'\n" in lines)

            mock_wait_for_startup.return_value = None
            self.assertFalse(self.p.start(10))
            self.assertIsNone(self.p.start())

            mock_wait_for_port_open.return_value = False
            self.assertFalse(self.p.start())
            task = CriticalTask()
            task.cancel()
            self.assertFalse(self.p.start(task=task))

        self.p.cancel()
        self.assertFalse(self.p.start())

    @patch.object(Postgresql, 'pg_isready')
    @patch('patroni.postgresql.polling_loop', Mock(return_value=range(1)))
    def test_wait_for_port_open(self, mock_pg_isready):
        mock_pg_isready.return_value = STATE_NO_RESPONSE
        mock_postmaster = MockPostmaster(is_running=False)

        # No pid file and postmaster death
        self.assertFalse(self.p.wait_for_port_open(mock_postmaster, 1))

        mock_postmaster.is_running.return_value = True

        # timeout
        self.assertFalse(self.p.wait_for_port_open(mock_postmaster, 1))

        # pg_isready failure
        mock_pg_isready.return_value = 'garbage'
        self.assertTrue(self.p.wait_for_port_open(mock_postmaster, 1))

        # cancelled
        self.p.cancel()
        self.assertFalse(self.p.wait_for_port_open(mock_postmaster, 1))

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'is_running')
    @patch.object(Postgresql, '_wait_for_connection_close', Mock())
    def test_stop(self, mock_is_running):
        # Postmaster is not running
        mock_callback = Mock()
        mock_is_running.return_value = None
        self.assertTrue(self.p.stop(on_safepoint=mock_callback))
        mock_callback.assert_called()

        # Is running, stopped successfully
        mock_is_running.return_value = mock_postmaster = MockPostmaster()
        mock_callback.reset_mock()
        self.assertTrue(self.p.stop(on_safepoint=mock_callback))
        mock_callback.assert_called()
        mock_postmaster.signal_stop.assert_called()

        # Stop signal failed
        mock_postmaster.signal_stop.return_value = False
        self.assertFalse(self.p.stop())

        # Stop signal failed to find process
        mock_postmaster.signal_stop.return_value = True
        mock_callback.reset_mock()
        self.assertTrue(self.p.stop(on_safepoint=mock_callback))
        mock_callback.assert_called()

    def test_restart(self):
        self.p.start = Mock(return_value=False)
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({
            'host': 'localhost',
            'port': '5432',
            'user': '******'
        })
        self.p.write_pgpass({
            'host': 'localhost',
            'port': '5432',
            'user': '******',
            'password': '******'
        })

    def test_checkpoint(self):
        with patch.object(MockCursor, 'fetchone', Mock(return_value=(True, ))):
            self.assertEquals(self.p.checkpoint({'user': '******'}),
                              'is_in_recovery=true')
        with patch.object(MockCursor, 'execute', Mock(return_value=None)):
            self.assertIsNone(self.p.checkpoint())
        self.assertEquals(self.p.checkpoint(), 'not accessible or not healty')

    @patch.object(Postgresql, 'cancellable_subprocess_call')
    @patch('patroni.postgresql.Postgresql.write_pgpass',
           MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_cancellable_subprocess_call):
        r = {
            'user': '',
            'host': '',
            'port': '',
            'database': '',
            'password': ''
        }
        mock_cancellable_subprocess_call.return_value = 0
        self.assertTrue(self.p.pg_rewind(r))
        mock_cancellable_subprocess_call.side_effect = OSError
        self.assertFalse(self.p.pg_rewind(r))

    def test_check_recovery_conf(self):
        self.p.write_recovery_conf({'primary_conninfo': 'foo'})
        self.assertFalse(self.p.check_recovery_conf(None))
        self.p.write_recovery_conf({})
        self.assertTrue(self.p.check_recovery_conf(None))

    @patch.object(Postgresql, 'start', Mock())
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    def test__get_local_timeline_lsn(self):
        self.p.trigger_check_diverged_lsn()
        with patch.object(
                Postgresql, 'controldata',
                Mock(
                    return_value={
                        'Database cluster state': 'shut down in recovery',
                        'Minimum recovery ending location': '0/0',
                        "Min recovery ending loc's timeline": '0'
                    })):
            self.p.rewind_needed_and_possible(self.leader)
        with patch.object(Postgresql, 'is_running', Mock(return_value=True)):
            with patch.object(MockCursor, 'fetchone',
                              Mock(side_effect=[(False, ), Exception])):
                self.p.rewind_needed_and_possible(self.leader)

    @patch.object(Postgresql, 'start', Mock())
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, '_get_local_timeline_lsn',
                  Mock(return_value=(2, '40159C1')))
    @patch.object(Postgresql, 'check_leader_is_not_in_recovery')
    def test__check_timeline_and_lsn(self,
                                     mock_check_leader_is_not_in_recovery):
        mock_check_leader_is_not_in_recovery.return_value = False
        self.p.trigger_check_diverged_lsn()
        self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        mock_check_leader_is_not_in_recovery.return_value = True
        self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch('psycopg2.connect', Mock(side_effect=Exception)):
            self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch.object(
                MockCursor, 'fetchone',
                Mock(side_effect=[('', 2, '0/0'), ('',
                                                   b'3\t0/40159C0\tn\n')])):
            self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch.object(MockCursor, 'fetchone',
                          Mock(return_value=('', 1, '0/0'))):
            with patch.object(Postgresql, '_get_local_timeline_lsn',
                              Mock(return_value=(1, '0/0'))):
                self.assertFalse(self.p.rewind_needed_and_possible(
                    self.leader))
            self.p.trigger_check_diverged_lsn()
            self.assertTrue(self.p.rewind_needed_and_possible(self.leader))

    @patch.object(MockCursor, 'fetchone',
                  Mock(side_effect=[(True, ), Exception]))
    def test_check_leader_is_not_in_recovery(self):
        self.p.check_leader_is_not_in_recovery()
        self.p.check_leader_is_not_in_recovery()

    @patch.object(Postgresql, 'cancellable_subprocess_call',
                  Mock(return_value=0))
    @patch.object(Postgresql, 'checkpoint', side_effect=['', '1'])
    @patch.object(Postgresql, 'stop', Mock(return_value=False))
    @patch.object(Postgresql, 'start', Mock())
    def test_rewind(self, mock_checkpoint):
        self.p.rewind(self.leader)
        with patch.object(Postgresql, 'pg_rewind', Mock(return_value=False)):
            mock_checkpoint.side_effect = ['1', '', '', '']
            self.p.rewind(self.leader)
            self.p.rewind(self.leader)
            with patch.object(Postgresql, 'check_leader_is_not_in_recovery',
                              Mock(return_value=False)):
                self.p.rewind(self.leader)
            self.p.config['remove_data_directory_on_rewind_failure'] = False
            self.p.trigger_check_diverged_lsn()
            self.p.rewind(self.leader)
        with patch.object(Postgresql, 'is_running', Mock(return_value=True)):
            self.p.rewind(self.leader)
            self.p.is_leader = Mock(return_value=False)
            self.p.rewind(self.leader)

    @patch.object(Postgresql, 'is_running', Mock(return_value=False))
    @patch.object(Postgresql, 'start', Mock())
    def test_follow(self):
        self.p.follow(None)

    @patch('subprocess.check_output',
           Mock(return_value=0, side_effect=pg_controldata_string))
    def test_can_rewind(self):
        with patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with patch('subprocess.call', side_effect=OSError):
            self.assertFalse(self.p.can_rewind)
        with patch.object(Postgresql, 'controldata',
                          Mock(return_value={'wal_log_hints setting': 'on'})):
            self.assertTrue(self.p.can_rewind)
        self.p.config['use_pg_rewind'] = False
        self.assertFalse(self.p.can_rewind)

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'cancellable_subprocess_call')
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    def test_create_replica(self, mock_cancellable_subprocess_call):
        self.p.delete_trigger_file = Mock(side_effect=OSError)

        self.p.config['create_replica_methods'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        mock_cancellable_subprocess_call.return_value = 0
        self.assertEquals(self.p.create_replica(self.leader), 0)
        del self.p.config['wale']
        self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_methods'] = ['basebackup']
        self.p.config['basebackup'] = [{'max_rate': '100M'}, 'no-sync']
        self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['basebackup'] = [{'max_rate': '100M', 'compress': '9'}]
        with mock.patch('patroni.postgresql.logger.error',
                        new_callable=Mock()) as mock_logger:
            self.p.create_replica(self.leader)
            mock_logger.assert_called_once()
            self.assertTrue(
                "only one key-value is allowed and value should be a string"
                in mock_logger.call_args[0][0],
                "not matching {0}".format(mock_logger.call_args[0][0]))

        self.p.config['basebackup'] = [42]
        with mock.patch('patroni.postgresql.logger.error',
                        new_callable=Mock()) as mock_logger:
            self.p.create_replica(self.leader)
            mock_logger.assert_called_once()
            self.assertTrue(
                "value should be string value or a single key-value pair"
                in mock_logger.call_args[0][0],
                "not matching {0}".format(mock_logger.call_args[0][0]))

        self.p.config['basebackup'] = {"foo": "bar"}
        self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_methods'] = ['wale', 'basebackup']
        del self.p.config['basebackup']
        mock_cancellable_subprocess_call.return_value = 1
        self.assertEquals(self.p.create_replica(self.leader), 1)

        mock_cancellable_subprocess_call.side_effect = Exception('foo')
        self.assertEquals(self.p.create_replica(self.leader), 1)

        mock_cancellable_subprocess_call.side_effect = [1, 0]
        self.assertEquals(self.p.create_replica(self.leader), 0)

        mock_cancellable_subprocess_call.side_effect = [Exception(), 0]
        self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.cancel()
        self.assertEquals(self.p.create_replica(self.leader), 1)

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'cancellable_subprocess_call')
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    def test_create_replica_old_format(self, mock_cancellable_subprocess_call):
        """ The same test as before but with old 'create_replica_method'
            to test backward compatibility
        """
        self.p.delete_trigger_file = Mock(side_effect=OSError)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        mock_cancellable_subprocess_call.return_value = 0
        self.assertEquals(self.p.create_replica(self.leader), 0)
        del self.p.config['wale']
        self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_method'] = ['basebackup']
        self.p.config['basebackup'] = [{'max_rate': '100M'}, 'no-sync']
        self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        del self.p.config['basebackup']
        mock_cancellable_subprocess_call.return_value = 1
        self.assertEquals(self.p.create_replica(self.leader), 1)

    def test_basebackup(self):
        self.p.cancel()
        self.p.basebackup(None, None, {'foo': 'bar'})

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, None, self.leader, 0,
                          [self.me, self.other, self.leadermem], None, None,
                          None)
        with mock.patch('patroni.postgresql.Postgresql._query',
                        Mock(side_effect=psycopg2.OperationalError)):
            self.p.sync_replication_slots(cluster)
        self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.Postgresql.role',
                        new_callable=PropertyMock(return_value='replica')):
            self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.logger.error',
                        new_callable=Mock()) as errorlog_mock:
            self.p.query = Mock()
            alias1 = Member(
                0, 'test-3', 28, {
                    'conn_url':
                    'postgres://*****:*****@127.0.0.1:5436/postgres'
                })
            alias2 = Member(
                0, 'test.3', 28, {
                    'conn_url':
                    'postgres://*****:*****@127.0.0.1:5436/postgres'
                })
            cluster.members.extend([alias1, alias2])
            self.p.sync_replication_slots(cluster)
            errorlog_mock.assert_called_once()
            self.assertTrue(
                "test-3" in errorlog_mock.call_args[0][1],
                "non matching {0}".format(errorlog_mock.call_args[0][1]))
            self.assertTrue(
                "test.3" in errorlog_mock.call_args[0][1],
                "non matching {0}".format(errorlog_mock.call_args[0][1]))

    @patch.object(MockCursor, 'execute',
                  Mock(side_effect=psycopg2.OperationalError))
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query,
                          'RetryFailedError')
        self.assertRaises(psycopg2.ProgrammingError, self.p.query, 'blabla')

    @patch.object(Postgresql, 'pg_isready', Mock(return_value=STATE_REJECT))
    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())
        self.p.reset_cluster_info_state()
        with patch.object(Postgresql, '_query',
                          Mock(side_effect=RetryFailedError(''))):
            self.assertRaises(PostgresConnectionException, self.p.is_leader)

    def test_reload(self):
        self.assertTrue(self.p.reload())

    @patch.object(Postgresql, 'is_running')
    def test_is_healthy(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.is_healthy())
        mock_is_running.return_value = False
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p.set_role('replica')
        self.assertIsNone(self.p.promote(0))
        self.assertTrue(self.p.promote(0))

    def test_timeline_wal_position(self):
        self.assertEquals(self.p.timeline_wal_position(), (1, 2))
        Thread(target=self.p.timeline_wal_position).start()

    @patch.object(PostmasterProcess, 'from_pidfile')
    def test_is_running(self, mock_frompidfile):
        # Cached postmaster running
        mock_postmaster = self.p._postmaster_proc = MockPostmaster()
        self.assertEquals(self.p.is_running(), mock_postmaster)

        # Cached postmaster not running, no postmaster running
        mock_postmaster.is_running.return_value = False
        mock_frompidfile.return_value = None
        self.assertEquals(self.p.is_running(), None)
        self.assertEquals(self.p._postmaster_proc, None)

        # No cached postmaster, postmaster running
        mock_frompidfile.return_value = mock_postmaster2 = MockPostmaster()
        self.assertEquals(self.p.is_running(), mock_postmaster2)
        self.assertEquals(self.p._postmaster_proc, mock_postmaster2)

    @patch('shlex.split', Mock(side_effect=OSError))
    def test_call_nowait(self):
        self.p.set_role('replica')
        self.assertIsNone(self.p.call_nowait('on_start'))
        self.p.bootstrapping = True
        self.assertIsNone(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    @patch.object(Postgresql, 'is_running',
                  Mock(return_value=MockPostmaster()))
    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(
            side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError)):
            self.p.move_data_directory()

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertFalse(self.p.bootstrap({}))

        config = {
            'users': {
                'replicator': {
                    'password': '******',
                    'options': ['replication']
                }
            }
        }

        self.p.bootstrap(config)
        with open(os.path.join(self.config_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            self.assertTrue('host all all 0.0.0.0/0 md5\n' in lines)

        self.p.config.pop('pg_hba')
        config.update({
            'post_init':
            '/bin/false',
            'pg_hba': [
                'host replication replicator 127.0.0.1/32 md5',
                'hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'
            ]
        })
        self.p.bootstrap(config)
        with open(os.path.join(self.data_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            self.assertTrue(
                'host replication replicator 127.0.0.1/32 md5\n' in lines)

    @patch.object(Postgresql, 'cancellable_subprocess_call')
    def test_custom_bootstrap(self, mock_cancellable_subprocess_call):
        self.p.config.pop('pg_hba')
        config = {'method': 'foo', 'foo': {'command': 'bar'}}

        mock_cancellable_subprocess_call.return_value = 1
        self.assertFalse(self.p.bootstrap(config))

        mock_cancellable_subprocess_call.return_value = 0
        with patch('subprocess.Popen', Mock(side_effect=Exception("42"))),\
                patch('os.path.isfile', Mock(return_value=True)),\
                patch('os.unlink', Mock()),\
                patch.object(Postgresql, 'save_configuration_files', Mock()),\
                patch.object(Postgresql, 'restore_configuration_files', Mock()),\
                patch.object(Postgresql, 'write_recovery_conf', Mock()):
            with self.assertRaises(Exception) as e:
                self.p.bootstrap(config)
            self.assertEqual(str(e.exception), '42')

            config['foo']['recovery_conf'] = {'foo': 'bar'}

            with self.assertRaises(Exception) as e:
                self.p.bootstrap(config)
            self.assertEqual(str(e.exception), '42')

        mock_cancellable_subprocess_call.side_effect = Exception
        self.assertFalse(self.p.bootstrap(config))

    @patch('time.sleep', Mock())
    @patch('os.unlink', Mock())
    @patch.object(Postgresql, 'run_bootstrap_post_init',
                  Mock(return_value=True))
    @patch.object(Postgresql, '_custom_bootstrap', Mock(return_value=True))
    @patch.object(Postgresql, 'start', Mock(return_value=True))
    def test_post_bootstrap(self):
        config = {'method': 'foo', 'foo': {'command': 'bar'}}
        self.p.bootstrap(config)

        task = CriticalTask()
        with patch.object(Postgresql, 'create_or_update_role',
                          Mock(side_effect=Exception)):
            self.p.post_bootstrap({}, task)
            self.assertFalse(task.result)

        self.p.config.pop('pg_hba')
        self.p.post_bootstrap({}, task)
        self.assertTrue(task.result)

        self.p.bootstrap(config)
        with patch.object(Postgresql, 'pending_restart', PropertyMock(return_value=True)), \
                patch.object(Postgresql, 'restart', Mock()) as mock_restart:
            self.p.post_bootstrap({}, task)
            mock_restart.assert_called_once()

        self.p.bootstrap(config)
        self.p.set_state('stopped')
        self.p.reload_config({
            'authentication': {
                'superuser': {
                    'username': '******',
                    'password': '******'
                },
                'replication': {
                    'username': '******',
                    'password': '******'
                }
            },
            'listen': '*',
            'retry_timeout': 10,
            'parameters': {
                'hba_file': 'foo'
            }
        })
        with patch.object(Postgresql, 'restart', Mock()) as mock_restart:
            self.p.post_bootstrap({}, task)
            mock_restart.assert_called_once()

    @patch.object(Postgresql, 'cancellable_subprocess_call')
    def test_run_bootstrap_post_init(self, mock_cancellable_subprocess_call):
        mock_cancellable_subprocess_call.return_value = 1
        self.assertFalse(
            self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        mock_cancellable_subprocess_call.return_value = 0
        self.p._superuser.pop('username')
        self.assertTrue(
            self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))
        mock_cancellable_subprocess_call.assert_called()
        args, kwargs = mock_cancellable_subprocess_call.call_args
        self.assertTrue('PGPASSFILE' in kwargs['env'])
        self.assertEquals(args[0],
                          ['/bin/false', 'postgres://127.0.0.2:5432/postgres'])

        mock_cancellable_subprocess_call.reset_mock()
        self.p._local_address.pop('host')
        self.assertTrue(
            self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))
        mock_cancellable_subprocess_call.assert_called()
        self.assertEquals(mock_cancellable_subprocess_call.call_args[0][0],
                          ['/bin/false', 'postgres://:5432/postgres'])

        mock_cancellable_subprocess_call.side_effect = OSError
        self.assertFalse(
            self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

    @patch('patroni.postgresql.Postgresql.create_replica',
           Mock(return_value=0))
    def test_clone(self):
        self.p.clone(self.leader)

    @patch('os.listdir', Mock(return_value=['recovery.conf']))
    @patch('os.path.exists', Mock(return_value=True))
    def test_get_postgres_role_from_data_directory(self):
        self.assertEquals(self.p.get_postgres_role_from_data_directory(),
                          'replica')

    def test_remove_data_directory(self):
        self.p.remove_data_directory()
        open(self.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.data_dir)
        with patch('os.unlink', Mock(side_effect=OSError)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('patroni.postgresql.Postgresql._version_file_exists',
           Mock(return_value=True))
    def test_controldata(self):
        with patch('subprocess.check_output',
                   Mock(return_value=0, side_effect=pg_controldata_string)):
            data = self.p.controldata()
            self.assertEquals(len(data), 50)
            self.assertEquals(data['Database cluster state'],
                              'shut down in recovery')
            self.assertEquals(data['wal_log_hints setting'], 'on')
            self.assertEquals(int(data['Database block size']), 8192)

        with patch('subprocess.check_output',
                   Mock(side_effect=subprocess.CalledProcessError(1, ''))):
            self.assertEquals(self.p.controldata(), {})

    @patch('patroni.postgresql.Postgresql._version_file_exists',
           Mock(return_value=True))
    @patch('subprocess.check_output',
           MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_save_configuration_files(self):
        self.p.save_configuration_files()

    @patch('os.path.isfile', Mock(side_effect=[False, True]))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_restore_configuration_files(self):
        self.p.restore_configuration_files()

    def test_can_create_replica_without_replication_connection(self):
        self.p.config['create_replica_method'] = []
        self.assertFalse(
            self.p.can_create_replica_without_replication_connection())
        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo', 'no_master': 1}
        self.assertTrue(
            self.p.can_create_replica_without_replication_connection())

    def test_replica_method_can_work_without_replication_connection(self):
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'basebackup'))
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'foobar'))
        self.p.config['foo'] = {'command': 'bar', 'no_master': 1}
        self.assertTrue(
            self.p.replica_method_can_work_without_replication_connection(
                'foo'))
        self.p.config['foo'] = {'command': 'bar'}
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'foo'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_reload_config(self):
        parameters = self._PARAMETERS.copy()
        parameters.pop('f.oo')
        config = {
            'pg_hba': [''],
            'use_unix_socket': True,
            'authentication': {},
            'retry_timeout': 10,
            'listen': '*',
            'parameters': parameters
        }
        self.p.reload_config(config)
        parameters['b.ar'] = 'bar'
        self.p.reload_config(config)
        parameters['autovacuum'] = 'on'
        self.p.reload_config(config)
        parameters['autovacuum'] = 'off'
        parameters.pop('search_path')
        config['listen'] = '*:5433'
        self.p.reload_config(config)
        parameters['unix_socket_directories'] = '.'
        self.p.reload_config(config)
        self.p.resolve_connection_addresses()

    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_get_major_version(self):
        with patch.object(builtins, 'open', mock_open(read_data='9.4')):
            self.assertEquals(self.p.get_major_version(), 90400)
        with patch.object(builtins, 'open', Mock(side_effect=Exception)):
            self.assertEquals(self.p.get_major_version(), 0)

    def test_postmaster_start_time(self):
        with patch.object(
                MockCursor, "fetchone",
                Mock(return_value=('foo', True, '', '', '', '', False))):
            self.assertEqual(self.p.postmaster_start_time(), 'foo')
        with patch.object(MockCursor, "execute", side_effect=psycopg2.Error):
            self.assertIsNone(self.p.postmaster_start_time())

    def test_check_for_startup(self):
        with patch('subprocess.call', return_value=0):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=1):
            self.p._state = 'starting'
            self.assertTrue(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'starting')

        with patch('subprocess.call', return_value=2):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'start failed')

        with patch('subprocess.call', return_value=0):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=127):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

    def test_wait_for_startup(self):
        state = {'sleeps': 0, 'num_rejects': 0, 'final_return': 0}
        self.__thread_ident = current_thread().ident

        def increment_sleeps(*args):
            if current_thread().ident == self.__thread_ident:
                print("Sleep")
                state['sleeps'] += 1

        def isready_return(*args):
            ret = 1 if state['sleeps'] < state['num_rejects'] else state[
                'final_return']
            print("Isready {0} {1}".format(ret, state))
            return ret

        def time_in_state(*args):
            return state['sleeps']

        with patch('subprocess.call', side_effect=isready_return):
            with patch('time.sleep', side_effect=increment_sleeps):
                self.p.time_in_state = Mock(side_effect=time_in_state)

                self.p._state = 'stopped'
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 0)

                self.p._state = 'starting'
                state['num_rejects'] = 5
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 5)

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 2
                self.assertFalse(self.p.wait_for_startup())

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 0
                self.assertFalse(self.p.wait_for_startup(timeout=2))
                self.assertEquals(state['sleeps'], 3)

        with patch.object(Postgresql, 'check_startup_state_changed',
                          Mock(return_value=False)):
            self.p.cancel()
            self.p._state = 'starting'
            self.assertIsNone(self.p.wait_for_startup())

    def test_pick_sync_standby(self):
        cluster = Cluster(True, None, self.leader, 0,
                          [self.me, self.other, self.leadermem], None,
                          SyncState(0, self.me.name,
                                    self.leadermem.name), None)

        with patch.object(Postgresql,
                          "query",
                          return_value=[
                              (self.leadermem.name, 'streaming', 'sync'),
                              (self.me.name, 'streaming', 'async'),
                              (self.other.name, 'streaming', 'async'),
                          ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (self.leadermem.name, True))

        with patch.object(Postgresql,
                          "query",
                          return_value=[
                              (self.me.name, 'streaming', 'async'),
                              (self.leadermem.name, 'streaming', 'potential'),
                              (self.other.name, 'streaming', 'async'),
                          ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (self.leadermem.name, False))

        with patch.object(Postgresql,
                          "query",
                          return_value=[
                              (self.me.name, 'streaming', 'async'),
                              (self.other.name, 'streaming', 'async'),
                          ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (self.me.name, False))

        with patch.object(Postgresql,
                          "query",
                          return_value=[
                              ('missing', 'streaming', 'sync'),
                              (self.me.name, 'streaming', 'async'),
                              (self.other.name, 'streaming', 'async'),
                          ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (self.me.name, False))

        with patch.object(Postgresql, "query", return_value=[]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster),
                              (None, False))

    def test_set_sync_standby(self):
        def value_in_conf():
            with open(os.path.join(self.data_dir, 'postgresql.conf')) as f:
                for line in f:
                    if line.startswith('synchronous_standby_names'):
                        return line.strip()

        mock_reload = self.p.reload = Mock()
        self.p.set_synchronous_standby('n1')
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")
        mock_reload.assert_called()

        mock_reload.reset_mock()
        self.p.set_synchronous_standby('n1')
        mock_reload.assert_not_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")

        self.p.set_synchronous_standby('n2')
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n2'")

        mock_reload.reset_mock()
        self.p.set_synchronous_standby(None)
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), None)

    def test_get_server_parameters(self):
        config = {
            'synchronous_mode': True,
            'parameters': {
                'wal_level': 'hot_standby'
            },
            'listen': '0'
        }
        self.p.get_server_parameters(config)
        config['synchronous_mode_strict'] = True
        self.p.get_server_parameters(config)
        self.p.set_synchronous_standby('foo')
        self.p.get_server_parameters(config)

    @patch('time.sleep', Mock())
    def test__wait_for_connection_close(self):
        mock_postmaster = MockPostmaster()
        with patch.object(Postgresql, 'is_running',
                          Mock(return_value=mock_postmaster)):
            mock_postmaster.is_running.side_effect = [True, False, False]
            mock_callback = Mock()
            self.p.stop(on_safepoint=mock_callback)

            mock_postmaster.is_running.side_effect = [True, False, False]
            with patch.object(MockCursor, "execute",
                              Mock(side_effect=psycopg2.Error)):
                self.p.stop(on_safepoint=mock_callback)

    def test_terminate_starting_postmaster(self):
        mock_postmaster = MockPostmaster()
        self.p.terminate_starting_postmaster(mock_postmaster)
        mock_postmaster.signal_stop.assert_called()
        mock_postmaster.wait.assert_called()

    def test_read_postmaster_opts(self):
        m = mock_open(
            read_data=
            '/usr/lib/postgres/9.6/bin/postgres "-D" "data/postgresql0" \
"--listen_addresses=127.0.0.1" "--port=5432" "--hot_standby=on" "--wal_level=hot_standby" \
"--wal_log_hints=on" "--max_wal_senders=5" "--max_replication_slots=5"\n')
        with patch.object(builtins, 'open', m):
            data = self.p.read_postmaster_opts()
            self.assertEquals(data['wal_level'], 'hot_standby')
            self.assertEquals(int(data['max_replication_slots']), 5)
            self.assertEqual(data.get('D'), None)

            m.side_effect = IOError
            data = self.p.read_postmaster_opts()
            self.assertEqual(data, dict())

    @patch('subprocess.Popen')
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(
            self.p.single_user_mode('CHECKPOINT', {'archive_mode': 'on'}), 0)

    @patch('os.listdir', Mock(side_effect=[OSError, ['a', 'b']]))
    @patch('os.unlink', Mock(side_effect=OSError))
    @patch('os.remove', Mock())
    @patch('os.path.islink', Mock(side_effect=[True, False]))
    @patch('os.path.isfile', Mock(return_value=True))
    def test_cleanup_archive_status(self):
        self.p.cleanup_archive_status()
        self.p.cleanup_archive_status()

    @patch('os.unlink', Mock())
    @patch('os.path.isfile', Mock(return_value=True))
    @patch.object(Postgresql, 'single_user_mode', Mock(return_value=0))
    def test_fix_cluster_state(self):
        self.assertTrue(self.p.fix_cluster_state())

    def test_replica_cached_timeline(self):
        self.assertEquals(self.p.replica_cached_timeline(1), 2)

    def test_get_master_timeline(self):
        self.assertEquals(self.p.get_master_timeline(), 1)

    def test_cancellable_subprocess_call(self):
        self.p.cancel()
        self.assertRaises(PostgresException,
                          self.p.cancellable_subprocess_call,
                          communicate_input=None)

    @patch('patroni.postgresql.polling_loop', Mock(return_value=[0, 0]))
    def test_cancel(self):
        self.p._cancellable = Mock()
        self.p._cancellable.returncode = None
        self.p.cancel()
        type(self.p._cancellable).returncode = PropertyMock(
            side_effect=[None, -15])
        self.p.cancel()

    @patch.object(Postgresql, 'get_postgres_role_from_data_directory',
                  Mock(return_value='replica'))
    def test__build_effective_configuration(self):
        with patch.object(
                Postgresql, 'controldata',
                Mock(
                    return_value={
                        'max_connections setting': '200',
                        'max_worker_processes setting': '20',
                        'max_prepared_xacts setting': '100',
                        'max_locks_per_xact setting': '100'
                    })):
            self.p.cancel()
            self.assertFalse(self.p.start())
            self.assertTrue(self.p.pending_restart)
Пример #36
0
class TestPostgresql(unittest.TestCase):
    _PARAMETERS = {'wal_level': 'hot_standby', 'max_replication_slots': 5, 'f.oo': 'bar',
                   'search_path': 'public', 'hot_standby': 'on', 'max_wal_senders': 5,
                   'wal_keep_segments': 8, 'wal_log_hints': 'on', 'max_locks_per_transaction': 64,
                   'max_worker_processes': 8, 'max_connections': 100, 'max_prepared_transactions': 0,
                   'track_commit_timestamp': 'off', 'unix_socket_directories': '/tmp'}

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('os.rename', Mock())
    @patch.object(Postgresql, 'get_major_version', Mock(return_value=90600))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def setUp(self):
        self.data_dir = 'data/test0'
        self.config_dir = self.data_dir
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': self.data_dir,
                             'config_dir': self.config_dir, 'retry_timeout': 10, 'pgpass': '******',
                             'listen': '127.0.0.2, 127.0.0.3:5432', 'connect_address': '127.0.0.2:5432',
                             'authentication': {'superuser': {'username': '******', 'password': '******'},
                                                'replication': {'username': '******', 'password': '******'}},
                             'remove_data_directory_on_rewind_failure': True,
                             'use_pg_rewind': True, 'pg_ctl_timeout': 'bla',
                             'parameters': self._PARAMETERS,
                             'recovery_conf': {'foo': 'bar'},
                             'pg_hba': ['host all all 0.0.0.0/0 md5'],
                             'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_reload': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true'}})
        self.p._callback_executor = Mock()
        self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(0, 'test-1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres',
                            'tags': {'replicatefrom': 'leader'}})
        self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})

    def tearDown(self):
        shutil.rmtree('data')

    def test_get_initdb_options(self):
        self.assertEquals(self.p.get_initdb_options([{'encoding': 'UTF8'}, 'data-checksums']),
                          ['--encoding=UTF8', '--data-checksums'])
        self.assertRaises(Exception, self.p.get_initdb_options, [{'pgdata': 'bar'}])
        self.assertRaises(Exception, self.p.get_initdb_options, [{'foo': 'bar', 1: 2}])
        self.assertRaises(Exception, self.p.get_initdb_options, [1])

    @patch('os.path.exists', Mock(return_value=True))
    @patch('os.unlink', Mock())
    def test_delete_trigger_file(self):
        self.p.delete_trigger_file()

    @patch('subprocess.Popen')
    @patch.object(Postgresql, 'wait_for_startup')
    @patch.object(Postgresql, 'wait_for_port_open')
    @patch.object(Postgresql, 'is_running')
    def test_start(self, mock_is_running, mock_wait_for_port_open, mock_wait_for_startup, mock_popen):
        mock_is_running.return_value = MockPostmaster()
        mock_wait_for_port_open.return_value = True
        mock_wait_for_startup.return_value = False
        mock_popen.return_value.stdout.readline.return_value = '123'
        self.assertTrue(self.p.start())
        mock_is_running.return_value = None

        mock_postmaster = MockPostmaster()
        with patch.object(PostmasterProcess, 'start', return_value=mock_postmaster):
            pg_conf = os.path.join(self.data_dir, 'postgresql.conf')
            open(pg_conf, 'w').close()
            self.assertFalse(self.p.start(task=CriticalTask()))

            with open(pg_conf) as f:
                lines = f.readlines()
                self.assertTrue("f.oo = 'bar'\n" in lines)

            mock_wait_for_startup.return_value = None
            self.assertFalse(self.p.start(10))
            self.assertIsNone(self.p.start())

            mock_wait_for_port_open.return_value = False
            self.assertFalse(self.p.start())
            task = CriticalTask()
            task.cancel()
            self.assertFalse(self.p.start(task=task))

        self.p.cancel()
        self.assertFalse(self.p.start())

    @patch.object(Postgresql, 'pg_isready')
    @patch('patroni.postgresql.polling_loop', Mock(return_value=range(1)))
    def test_wait_for_port_open(self, mock_pg_isready):
        mock_pg_isready.return_value = STATE_NO_RESPONSE
        mock_postmaster = MockPostmaster(is_running=False)

        # No pid file and postmaster death
        self.assertFalse(self.p.wait_for_port_open(mock_postmaster, 1))

        mock_postmaster.is_running.return_value = True

        # timeout
        self.assertFalse(self.p.wait_for_port_open(mock_postmaster, 1))

        # pg_isready failure
        mock_pg_isready.return_value = 'garbage'
        self.assertTrue(self.p.wait_for_port_open(mock_postmaster, 1))

        # cancelled
        self.p.cancel()
        self.assertFalse(self.p.wait_for_port_open(mock_postmaster, 1))

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'is_running')
    @patch.object(Postgresql, '_wait_for_connection_close', Mock())
    def test_stop(self, mock_is_running):
        # Postmaster is not running
        mock_callback = Mock()
        mock_is_running.return_value = None
        self.assertTrue(self.p.stop(on_safepoint=mock_callback))
        mock_callback.assert_called()

        # Is running, stopped successfully
        mock_is_running.return_value = mock_postmaster = MockPostmaster()
        mock_callback.reset_mock()
        self.assertTrue(self.p.stop(on_safepoint=mock_callback))
        mock_callback.assert_called()
        mock_postmaster.signal_stop.assert_called()

        # Stop signal failed
        mock_postmaster.signal_stop.return_value = False
        self.assertFalse(self.p.stop())

        # Stop signal failed to find process
        mock_postmaster.signal_stop.return_value = True
        mock_callback.reset_mock()
        self.assertTrue(self.p.stop(on_safepoint=mock_callback))
        mock_callback.assert_called()

    def test_restart(self):
        self.p.start = Mock(return_value=False)
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******'})
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******', 'password': '******'})

    def test_checkpoint(self):
        with patch.object(MockCursor, 'fetchone', Mock(return_value=(True, ))):
            self.assertEquals(self.p.checkpoint({'user': '******'}), 'is_in_recovery=true')
        with patch.object(MockCursor, 'execute', Mock(return_value=None)):
            self.assertIsNone(self.p.checkpoint())
        self.assertEquals(self.p.checkpoint(), 'not accessible or not healty')

    @patch.object(Postgresql, 'cancellable_subprocess_call')
    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_cancellable_subprocess_call):
        r = {'user': '', 'host': '', 'port': '', 'database': '', 'password': ''}
        mock_cancellable_subprocess_call.return_value = 0
        self.assertTrue(self.p.pg_rewind(r))
        mock_cancellable_subprocess_call.side_effect = OSError
        self.assertFalse(self.p.pg_rewind(r))

    def test_check_recovery_conf(self):
        self.p.write_recovery_conf({'primary_conninfo': 'foo'})
        self.assertFalse(self.p.check_recovery_conf(None))
        self.p.write_recovery_conf({})
        self.assertTrue(self.p.check_recovery_conf(None))

    @patch.object(Postgresql, 'start', Mock())
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    def test__get_local_timeline_lsn(self):
        self.p.trigger_check_diverged_lsn()
        with patch.object(Postgresql, 'controldata',
                          Mock(return_value={'Database cluster state': 'shut down in recovery',
                                             'Minimum recovery ending location': '0/0',
                                             "Min recovery ending loc's timeline": '0'})):
            self.p.rewind_needed_and_possible(self.leader)
        with patch.object(Postgresql, 'is_running', Mock(return_value=True)):
            with patch.object(MockCursor, 'fetchone', Mock(side_effect=[(False, ), Exception])):
                self.p.rewind_needed_and_possible(self.leader)

    @patch.object(Postgresql, 'start', Mock())
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, '_get_local_timeline_lsn', Mock(return_value=(2, '40159C1')))
    @patch.object(Postgresql, 'check_leader_is_not_in_recovery')
    def test__check_timeline_and_lsn(self, mock_check_leader_is_not_in_recovery):
        mock_check_leader_is_not_in_recovery.return_value = False
        self.p.trigger_check_diverged_lsn()
        self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        mock_check_leader_is_not_in_recovery.return_value = True
        self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch('psycopg2.connect', Mock(side_effect=Exception)):
            self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch.object(MockCursor, 'fetchone', Mock(side_effect=[('', 2, '0/0'), ('', b'3\t0/40159C0\tn\n')])):
            self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
        self.p.trigger_check_diverged_lsn()
        with patch.object(MockCursor, 'fetchone', Mock(return_value=('', 1, '0/0'))):
            with patch.object(Postgresql, '_get_local_timeline_lsn', Mock(return_value=(1, '0/0'))):
                self.assertFalse(self.p.rewind_needed_and_possible(self.leader))
            self.p.trigger_check_diverged_lsn()
            self.assertTrue(self.p.rewind_needed_and_possible(self.leader))

    @patch.object(MockCursor, 'fetchone', Mock(side_effect=[(True,), Exception]))
    def test_check_leader_is_not_in_recovery(self):
        self.p.check_leader_is_not_in_recovery()
        self.p.check_leader_is_not_in_recovery()

    @patch.object(Postgresql, 'cancellable_subprocess_call', Mock(return_value=0))
    @patch.object(Postgresql, 'checkpoint', side_effect=['', '1'])
    @patch.object(Postgresql, 'stop', Mock(return_value=False))
    @patch.object(Postgresql, 'start', Mock())
    def test_rewind(self, mock_checkpoint):
        self.p.rewind(self.leader)
        with patch.object(Postgresql, 'pg_rewind', Mock(return_value=False)):
            mock_checkpoint.side_effect = ['1', '', '', '']
            self.p.rewind(self.leader)
            self.p.rewind(self.leader)
            with patch.object(Postgresql, 'check_leader_is_not_in_recovery', Mock(return_value=False)):
                self.p.rewind(self.leader)
            self.p.config['remove_data_directory_on_rewind_failure'] = False
            self.p.trigger_check_diverged_lsn()
            self.p.rewind(self.leader)
        with patch.object(Postgresql, 'is_running', Mock(return_value=True)):
            self.p.rewind(self.leader)
            self.p.is_leader = Mock(return_value=False)
            self.p.rewind(self.leader)

    @patch.object(Postgresql, 'is_running', Mock(return_value=False))
    @patch.object(Postgresql, 'start', Mock())
    def test_follow(self):
        self.p.follow(None)

    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    def test_can_rewind(self):
        with patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with patch('subprocess.call', side_effect=OSError):
            self.assertFalse(self.p.can_rewind)
        with patch.object(Postgresql, 'controldata', Mock(return_value={'wal_log_hints setting': 'on'})):
            self.assertTrue(self.p.can_rewind)
        self.p.config['use_pg_rewind'] = False
        self.assertFalse(self.p.can_rewind)

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'cancellable_subprocess_call')
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    def test_create_replica(self, mock_cancellable_subprocess_call):
        self.p.delete_trigger_file = Mock(side_effect=OSError)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        mock_cancellable_subprocess_call.return_value = 0
        self.assertEquals(self.p.create_replica(self.leader), 0)
        del self.p.config['wale']
        self.assertEquals(self.p.create_replica(self.leader), 0)

        mock_cancellable_subprocess_call.return_value = 1
        self.assertEquals(self.p.create_replica(self.leader), 1)

        mock_cancellable_subprocess_call.side_effect = Exception('foo')
        self.assertEquals(self.p.create_replica(self.leader), 1)

        mock_cancellable_subprocess_call.side_effect = [1, 0]
        self.assertEquals(self.p.create_replica(self.leader), 0)

        mock_cancellable_subprocess_call.side_effect = [Exception(), 0]
        self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.cancel()
        self.assertEquals(self.p.create_replica(self.leader), 1)

    def test_basebackup(self):
        self.p.cancel()
        self.p.basebackup(None, None)

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, None, self.leader, 0, [self.me, self.other, self.leadermem], None, None, None)
        with mock.patch('patroni.postgresql.Postgresql._query', Mock(side_effect=psycopg2.OperationalError)):
            self.p.sync_replication_slots(cluster)
        self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.Postgresql.role', new_callable=PropertyMock(return_value='replica')):
            self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.logger.error', new_callable=Mock()) as errorlog_mock:
            self.p.query = Mock()
            alias1 = Member(0, 'test-3', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5436/postgres'})
            alias2 = Member(0, 'test.3', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5436/postgres'})
            cluster.members.extend([alias1, alias2])
            self.p.sync_replication_slots(cluster)
            errorlog_mock.assert_called_once()
            assert "test-3" in errorlog_mock.call_args[0][1]
            assert "test.3" in errorlog_mock.call_args[0][1]

    @patch.object(MockCursor, 'execute', Mock(side_effect=psycopg2.OperationalError))
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError')
        self.assertRaises(psycopg2.ProgrammingError, self.p.query, 'blabla')

    @patch.object(Postgresql, 'pg_isready', Mock(return_value=STATE_REJECT))
    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())
        self.p.reset_cluster_info_state()
        with patch.object(Postgresql, '_query', Mock(side_effect=RetryFailedError(''))):
            self.assertRaises(PostgresConnectionException, self.p.is_leader)

    def test_reload(self):
        self.assertTrue(self.p.reload())

    @patch.object(Postgresql, 'is_running')
    def test_is_healthy(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.is_healthy())
        mock_is_running.return_value = False
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p.set_role('replica')
        self.assertIsNone(self.p.promote(0))
        self.assertTrue(self.p.promote(0))

    def test_timeline_wal_position(self):
        self.assertEquals(self.p.timeline_wal_position(), (1, 2))
        Thread(target=self.p.timeline_wal_position).start()

    @patch.object(PostmasterProcess, 'from_pidfile')
    def test_is_running(self, mock_frompidfile):
        # Cached postmaster running
        mock_postmaster = self.p._postmaster_proc = MockPostmaster()
        self.assertEquals(self.p.is_running(), mock_postmaster)

        # Cached postmaster not running, no postmaster running
        mock_postmaster.is_running.return_value = False
        mock_frompidfile.return_value = None
        self.assertEquals(self.p.is_running(), None)
        self.assertEquals(self.p._postmaster_proc, None)

        # No cached postmaster, postmaster running
        mock_frompidfile.return_value = mock_postmaster2 = MockPostmaster()
        self.assertEquals(self.p.is_running(), mock_postmaster2)
        self.assertEquals(self.p._postmaster_proc, mock_postmaster2)

    @patch('shlex.split', Mock(side_effect=OSError))
    def test_call_nowait(self):
        self.p.set_role('replica')
        self.assertIsNone(self.p.call_nowait('on_start'))
        self.p.bootstrapping = True
        self.assertIsNone(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=MockPostmaster()))
    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError)):
            self.p.move_data_directory()

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertFalse(self.p.bootstrap({}))

        config = {'users': {'replicator': {'password': '******', 'options': ['replication']}}}

        self.p.bootstrap(config)
        with open(os.path.join(self.config_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            self.assertTrue('host all all 0.0.0.0/0 md5\n' in lines)

        self.p.config.pop('pg_hba')
        config.update({'post_init': '/bin/false',
                       'pg_hba': ['host replication replicator 127.0.0.1/32 md5',
                                  'hostssl all all 0.0.0.0/0 md5',
                                  'host all all 0.0.0.0/0 md5']})
        self.p.bootstrap(config)
        with open(os.path.join(self.data_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            self.assertTrue('host replication replicator 127.0.0.1/32 md5\n' in lines)

    @patch.object(Postgresql, 'cancellable_subprocess_call')
    def test_custom_bootstrap(self, mock_cancellable_subprocess_call):
        self.p.config.pop('pg_hba')
        config = {'method': 'foo', 'foo': {'command': 'bar'}}

        mock_cancellable_subprocess_call.return_value = 1
        self.assertFalse(self.p.bootstrap(config))

        mock_cancellable_subprocess_call.return_value = 0
        with patch('subprocess.Popen', Mock(side_effect=Exception("42"))),\
                patch('os.path.isfile', Mock(return_value=True)),\
                patch('os.unlink', Mock()),\
                patch.object(Postgresql, 'save_configuration_files', Mock()),\
                patch.object(Postgresql, 'restore_configuration_files', Mock()),\
                patch.object(Postgresql, 'write_recovery_conf', Mock()):
            with self.assertRaises(Exception) as e:
                self.p.bootstrap(config)
            self.assertEqual(str(e.exception), '42')

            config['foo']['recovery_conf'] = {'foo': 'bar'}

            with self.assertRaises(Exception) as e:
                self.p.bootstrap(config)
            self.assertEqual(str(e.exception), '42')

        mock_cancellable_subprocess_call.side_effect = Exception
        self.assertFalse(self.p.bootstrap(config))

    @patch('time.sleep', Mock())
    @patch('os.unlink', Mock())
    @patch.object(Postgresql, 'run_bootstrap_post_init', Mock(return_value=True))
    @patch.object(Postgresql, '_custom_bootstrap', Mock(return_value=True))
    @patch.object(Postgresql, 'start', Mock(return_value=True))
    def test_post_bootstrap(self):
        config = {'method': 'foo', 'foo': {'command': 'bar'}}
        self.p.bootstrap(config)

        task = CriticalTask()
        with patch.object(Postgresql, 'create_or_update_role', Mock(side_effect=Exception)):
            self.p.post_bootstrap({}, task)
            self.assertFalse(task.result)

        self.p.config.pop('pg_hba')
        self.p.post_bootstrap({}, task)
        self.assertTrue(task.result)

        self.p.bootstrap(config)
        self.p.set_state('stopped')
        self.p.reload_config({'authentication': {'superuser': {'username': '******', 'password': '******'},
                                                 'replication': {'username': '******', 'password': '******'}},
                              'listen': '*', 'retry_timeout': 10, 'parameters': {'hba_file': 'foo'}})
        with patch.object(Postgresql, 'restart', Mock()) as mock_restart:
            self.p.post_bootstrap({}, task)
            mock_restart.assert_called_once()

    @patch.object(Postgresql, 'cancellable_subprocess_call')
    def test_run_bootstrap_post_init(self, mock_cancellable_subprocess_call):
        mock_cancellable_subprocess_call.return_value = 1
        self.assertFalse(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        mock_cancellable_subprocess_call.return_value = 0
        self.p._superuser.pop('username')
        self.assertTrue(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))
        mock_cancellable_subprocess_call.assert_called()
        args, kwargs = mock_cancellable_subprocess_call.call_args
        self.assertTrue('PGPASSFILE' in kwargs['env'])
        self.assertEquals(args[0], ['/bin/false', 'postgres://127.0.0.2:5432/postgres'])

        mock_cancellable_subprocess_call.reset_mock()
        self.p._local_address.pop('host')
        self.assertTrue(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))
        mock_cancellable_subprocess_call.assert_called()
        self.assertEquals(mock_cancellable_subprocess_call.call_args[0][0], ['/bin/false', 'postgres://:5432/postgres'])

        mock_cancellable_subprocess_call.side_effect = OSError
        self.assertFalse(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

    @patch('patroni.postgresql.Postgresql.create_replica', Mock(return_value=0))
    def test_clone(self):
        self.p.clone(self.leader)

    @patch('os.listdir', Mock(return_value=['recovery.conf']))
    @patch('os.path.exists', Mock(return_value=True))
    def test_get_postgres_role_from_data_directory(self):
        self.assertEquals(self.p.get_postgres_role_from_data_directory(), 'replica')

    def test_remove_data_directory(self):
        self.p.remove_data_directory()
        open(self.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.data_dir)
        with patch('os.unlink', Mock(side_effect=OSError)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('patroni.postgresql.Postgresql._version_file_exists', Mock(return_value=True))
    def test_controldata(self):
        with patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string)):
            data = self.p.controldata()
            self.assertEquals(len(data), 50)
            self.assertEquals(data['Database cluster state'], 'shut down in recovery')
            self.assertEquals(data['wal_log_hints setting'], 'on')
            self.assertEquals(int(data['Database block size']), 8192)

        with patch('subprocess.check_output', Mock(side_effect=subprocess.CalledProcessError(1, ''))):
            self.assertEquals(self.p.controldata(), {})

    @patch('patroni.postgresql.Postgresql._version_file_exists', Mock(return_value=True))
    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_save_configuration_files(self):
        self.p.save_configuration_files()

    @patch('os.path.isfile', Mock(side_effect=[False, True]))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_restore_configuration_files(self):
        self.p.restore_configuration_files()

    def test_can_create_replica_without_replication_connection(self):
        self.p.config['create_replica_method'] = []
        self.assertFalse(self.p.can_create_replica_without_replication_connection())
        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo', 'no_master': 1}
        self.assertTrue(self.p.can_create_replica_without_replication_connection())

    def test_replica_method_can_work_without_replication_connection(self):
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('basebackup'))
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foobar'))
        self.p.config['foo'] = {'command': 'bar', 'no_master': 1}
        self.assertTrue(self.p.replica_method_can_work_without_replication_connection('foo'))
        self.p.config['foo'] = {'command': 'bar'}
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foo'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_reload_config(self):
        parameters = self._PARAMETERS.copy()
        parameters.pop('f.oo')
        config = {'pg_hba': [''], 'use_unix_socket': True, 'authentication': {},
                  'retry_timeout': 10, 'listen': '*', 'parameters': parameters}
        self.p.reload_config(config)
        parameters['b.ar'] = 'bar'
        self.p.reload_config(config)
        parameters['autovacuum'] = 'on'
        self.p.reload_config(config)
        parameters['autovacuum'] = 'off'
        parameters.pop('search_path')
        config['listen'] = '*:5433'
        self.p.reload_config(config)
        parameters['unix_socket_directories'] = '.'
        self.p.reload_config(config)
        self.p.resolve_connection_addresses()

    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_get_major_version(self):
        with patch.object(builtins, 'open', mock_open(read_data='9.4')):
            self.assertEquals(self.p.get_major_version(), 90400)
        with patch.object(builtins, 'open', Mock(side_effect=Exception)):
            self.assertEquals(self.p.get_major_version(), 0)

    def test_postmaster_start_time(self):
        with patch.object(MockCursor, "fetchone", Mock(return_value=('foo', True, '', '', '', '', False))):
            self.assertEqual(self.p.postmaster_start_time(), 'foo')
        with patch.object(MockCursor, "execute", side_effect=psycopg2.Error):
            self.assertIsNone(self.p.postmaster_start_time())

    def test_check_for_startup(self):
        with patch('subprocess.call', return_value=0):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=1):
            self.p._state = 'starting'
            self.assertTrue(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'starting')

        with patch('subprocess.call', return_value=2):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'start failed')

        with patch('subprocess.call', return_value=0):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=127):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

    def test_wait_for_startup(self):
        state = {'sleeps': 0, 'num_rejects': 0, 'final_return': 0}

        def increment_sleeps(*args):
            print("Sleep")
            state['sleeps'] += 1

        def isready_return(*args):
            ret = 1 if state['sleeps'] < state['num_rejects'] else state['final_return']
            print("Isready {0} {1}".format(ret, state))
            return ret

        def time_in_state(*args):
            return state['sleeps']

        with patch('subprocess.call', side_effect=isready_return):
            with patch('time.sleep', side_effect=increment_sleeps):
                self.p.time_in_state = Mock(side_effect=time_in_state)

                self.p._state = 'stopped'
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 0)

                self.p._state = 'starting'
                state['num_rejects'] = 5
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 5)

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 2
                self.assertFalse(self.p.wait_for_startup())

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 0
                self.assertFalse(self.p.wait_for_startup(timeout=2))
                self.assertEquals(state['sleeps'], 3)

        with patch.object(Postgresql, 'check_startup_state_changed', Mock(return_value=False)):
            self.p.cancel()
            self.p._state = 'starting'
            self.assertIsNone(self.p.wait_for_startup())

    def test_read_pid_file(self):
        pidfile = os.path.join(self.data_dir, 'postmaster.pid')
        if os.path.exists(pidfile):
            os.remove(pidfile)
        self.assertEquals(self.p._read_pid_file(), {})
        with open(pidfile, 'w') as fd:
            fd.write("123\n/foo/bar\n123456789\n5432")
        self.assertEquals(self.p._read_pid_file(), {"pid": "123", "data_dir": "/foo/bar",
                                                    "start_time": "123456789", "port": "5432"})

    def test_pick_sync_standby(self):
        cluster = Cluster(True, None, self.leader, 0, [self.me, self.other, self.leadermem], None,
                          SyncState(0, self.me.name, self.leadermem.name), None)

        with patch.object(Postgresql, "query", return_value=[
                    (self.leadermem.name, 'streaming', 'sync'),
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.leadermem.name, True))

        with patch.object(Postgresql, "query", return_value=[
                    (self.me.name, 'streaming', 'async'),
                    (self.leadermem.name, 'streaming', 'potential'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.leadermem.name, False))

        with patch.object(Postgresql, "query", return_value=[
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.me.name, False))

        with patch.object(Postgresql, "query", return_value=[
                    ('missing', 'streaming', 'sync'),
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.me.name, False))

        with patch.object(Postgresql, "query", return_value=[]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (None, False))

    def test_set_sync_standby(self):
        def value_in_conf():
            with open(os.path.join(self.data_dir, 'postgresql.conf')) as f:
                for line in f:
                    if line.startswith('synchronous_standby_names'):
                        return line.strip()

        mock_reload = self.p.reload = Mock()
        self.p.set_synchronous_standby('n1')
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")
        mock_reload.assert_called()

        mock_reload.reset_mock()
        self.p.set_synchronous_standby('n1')
        mock_reload.assert_not_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")

        self.p.set_synchronous_standby('n2')
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n2'")

        mock_reload.reset_mock()
        self.p.set_synchronous_standby(None)
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), None)

    def test_get_server_parameters(self):
        config = {'synchronous_mode': True, 'parameters': {'wal_level': 'hot_standby'}, 'listen': '0'}
        self.p.get_server_parameters(config)
        config['synchronous_mode_strict'] = True
        self.p.get_server_parameters(config)
        self.p.set_synchronous_standby('foo')
        self.p.get_server_parameters(config)

    @patch('time.sleep', Mock())
    def test__wait_for_connection_close(self):
        mock_postmaster = MockPostmaster()
        with patch.object(Postgresql, 'is_running', Mock(return_value=mock_postmaster)):
            mock_postmaster.is_running.side_effect = [True, False, False]
            mock_callback = Mock()
            self.p.stop(on_safepoint=mock_callback)

            mock_postmaster.is_running.side_effect = [True, False, False]
            with patch.object(MockCursor, "execute", Mock(side_effect=psycopg2.Error)):
                self.p.stop(on_safepoint=mock_callback)

    def test_terminate_starting_postmaster(self):
        mock_postmaster = MockPostmaster()
        self.p.terminate_starting_postmaster(mock_postmaster)
        mock_postmaster.signal_stop.assert_called()
        mock_postmaster.wait.assert_called()

    def test_read_postmaster_opts(self):
        m = mock_open(read_data='/usr/lib/postgres/9.6/bin/postgres "-D" "data/postgresql0" \
"--listen_addresses=127.0.0.1" "--port=5432" "--hot_standby=on" "--wal_level=hot_standby" \
"--wal_log_hints=on" "--max_wal_senders=5" "--max_replication_slots=5"\n')
        with patch.object(builtins, 'open', m):
            data = self.p.read_postmaster_opts()
            self.assertEquals(data['wal_level'], 'hot_standby')
            self.assertEquals(int(data['max_replication_slots']), 5)
            self.assertEqual(data.get('D'), None)

            m.side_effect = IOError
            data = self.p.read_postmaster_opts()
            self.assertEqual(data, dict())

    @patch('subprocess.Popen')
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(self.p.single_user_mode('CHECKPOINT', {'archive_mode': 'on'}), 0)

    @patch('os.listdir', Mock(side_effect=[OSError, ['a', 'b']]))
    @patch('os.unlink', Mock(side_effect=OSError))
    @patch('os.remove', Mock())
    @patch('os.path.islink', Mock(side_effect=[True, False]))
    @patch('os.path.isfile', Mock(return_value=True))
    def test_cleanup_archive_status(self):
        self.p.cleanup_archive_status()
        self.p.cleanup_archive_status()

    @patch('os.unlink', Mock())
    @patch('os.path.isfile', Mock(return_value=True))
    @patch.object(Postgresql, 'single_user_mode', Mock(return_value=0))
    def test_fix_cluster_state(self):
        self.assertTrue(self.p.fix_cluster_state())

    def test_replica_cached_timeline(self):
        self.assertEquals(self.p.replica_cached_timeline(1), 2)

    def test_get_master_timeline(self):
        self.assertEquals(self.p.get_master_timeline(), 1)

    def test_cancellable_subprocess_call(self):
        self.p.cancel()
        self.assertRaises(PostgresException, self.p.cancellable_subprocess_call, communicate_input=None)

    @patch('patroni.postgresql.polling_loop', Mock(return_value=[0, 0]))
    def test_cancel(self):
        self.p._cancellable = Mock()
        self.p._cancellable.returncode = None
        self.p.cancel()
        type(self.p._cancellable).returncode = PropertyMock(side_effect=[None, -15])
        self.p.cancel()
Пример #37
0
class TestHa(unittest.TestCase):

    @patch('socket.getaddrinfo', socket_getaddrinfo)
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('patroni.dcs.dcs_modules', Mock(return_value=['patroni.dcs.foo', 'patroni.dcs.etcd']))
    @patch.object(etcd.Client, 'read', etcd_read)
    def setUp(self):
        with patch.object(Client, 'machines') as mock_machines:
            mock_machines.__get__ = Mock(return_value=['http://*****:*****@patch.object(Postgresql, 'fix_cluster_state', Mock())
    def test_crash_recovery(self):
        self.p.is_running = false
        self.p.controldata = lambda: {'Database cluster state': 'in production'}
        self.assertEquals(self.ha.run_cycle(), 'doing crash recovery in a single user mode')

    @patch.object(Postgresql, 'rewind_needed_and_possible', Mock(return_value=True))
    def test_recover_with_rewind(self):
        self.p.is_running = false
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'running pg_rewind from leader')

    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, 'fix_cluster_state', Mock())
    def test_single_user_after_recover_failed(self):
        self.p.controldata = lambda: {'Database cluster state': 'in recovery'}
        self.p.is_running = false
        self.p.follow = false
        self.assertEquals(self.ha.run_cycle(), 'starting as a secondary')
        self.assertEquals(self.ha.run_cycle(), 'fixing cluster state in a single user mode')

    @patch('sys.exit', return_value=1)
    @patch('patroni.ha.Ha.sysid_valid', MagicMock(return_value=True))
    def test_sysid_no_match(self, exit_mock):
        self.ha.run_cycle()
        exit_mock.assert_called_once_with(1)

    @patch.object(Cluster, 'is_unlocked', Mock(return_value=False))
    def test_start_as_readonly(self):
        self.p.is_leader = false
        self.p.is_healthy = true
        self.ha.has_lock = true
        self.p.controldata = lambda: {'Database cluster state': 'in production'}
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader because i had the session lock')

    @patch('psycopg2.connect', psycopg2_connect)
    def test_acquire_lock_as_master(self):
        self.assertEquals(self.ha.run_cycle(), 'acquired session lock as a leader')

    def test_promoted_by_acquiring_lock(self):
        self.ha.is_healthiest_node = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')

    def test_long_promote(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.p.set_role('master')
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

    def test_demote_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.assertEquals(self.ha.run_cycle(), 'demoted self after trying and failing to obtain lock')

    def test_follow_new_leader_after_failing_to_obtain_lock(self):
        self.ha.is_healthiest_node = true
        self.ha.acquire_lock = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'following new leader after trying and failing to obtain lock')

    def test_demote_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.assertEquals(self.ha.run_cycle(), 'demoting self because i am not the healthiest node')

    def test_follow_new_leader_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')

    def test_promote_because_have_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader because i had the session lock')

    def test_promote_without_watchdog(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = true
        with patch.object(Watchdog, 'activate', Mock(return_value=False)):
            self.assertEquals(self.ha.run_cycle(), 'Demoting self because watchdog could not be activated')
            self.p.is_leader = false
            self.assertEquals(self.ha.run_cycle(), 'Not promoting self because watchdog could not be activated')

    def test_leader_with_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

    def test_demote_because_not_having_lock(self):
        self.ha.cluster.is_unlocked = false
        with patch.object(Watchdog, 'is_running', PropertyMock(return_value=True)):
            self.assertEquals(self.ha.run_cycle(), 'demoting self because i do not have the lock and i was a leader')

    def test_demote_because_update_lock_failed(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.ha.update_lock = false
        self.assertEquals(self.ha.run_cycle(), 'demoted self because failed to update leader lock in DCS')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'not promoting because failed to update leader lock in DCS')

    def test_follow(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')
        self.ha.patroni.replicatefrom = "foo"
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')

    def test_follow_in_pause(self):
        self.ha.cluster.is_unlocked = false
        self.ha.is_paused = true
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: no action')

    @patch.object(Postgresql, 'rewind_needed_and_possible', Mock(return_value=True))
    def test_follow_triggers_rewind(self):
        self.p.is_leader = false
        self.p.trigger_check_diverged_lsn()
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'running pg_rewind from leader')

    def test_no_etcd_connection_master_demote(self):
        self.ha.load_cluster_from_dcs = Mock(side_effect=DCSError('Etcd is not responding properly'))
        self.assertEquals(self.ha.run_cycle(), 'demoted self because DCS is not accessible and i was a leader')

    @patch('time.sleep', Mock())
    def test_bootstrap_from_another_member(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.bootstrap(), 'trying to bootstrap from replica \'other\'')

    def test_bootstrap_waiting_for_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(), 'waiting for leader to bootstrap')

    def test_bootstrap_without_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.p.can_create_replica_without_replication_connection = MagicMock(return_value=True)
        self.assertEquals(self.ha.bootstrap(), 'trying to bootstrap (without leader)')

    def test_bootstrap_initialize_lock_failed(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(), 'failed to acquire initialize lock')

    def test_bootstrap_initialized_new_cluster(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.assertEquals(self.ha.bootstrap(), 'trying to bootstrap a new cluster')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'waiting for end of recovery after bootstrap')
        self.p.is_leader = true
        self.assertEquals(self.ha.run_cycle(), 'running post_bootstrap')
        self.assertEquals(self.ha.run_cycle(), 'initialized a new cluster')

    def test_bootstrap_release_initialize_key_on_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.ha.bootstrap()
        self.p.is_running = false
        self.assertRaises(PatroniException, self.ha.post_bootstrap)

    def test_bootstrap_release_initialize_key_on_watchdog_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.ha.bootstrap()
        self.p.is_running.return_value = MockPostmaster()
        self.p.is_leader = true
        with patch.object(Watchdog, 'activate', Mock(return_value=False)):
            self.assertEquals(self.ha.post_bootstrap(), 'running post_bootstrap')
            self.assertRaises(PatroniException, self.ha.post_bootstrap)

    @patch('psycopg2.connect', psycopg2_connect)
    def test_reinitialize(self):
        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertIsNone(self.ha.reinitialize(True))

        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.state_handler.name = self.ha.cluster.leader.name
        self.assertIsNotNone(self.ha.reinitialize())

    @patch('time.sleep', Mock())
    def test_restart(self):
        self.assertEquals(self.ha.restart({}), (True, 'restarted successfully'))
        self.p.restart = Mock(return_value=None)
        self.assertEquals(self.ha.restart({}), (False, 'postgres is still starting'))
        self.p.restart = false
        self.assertEquals(self.ha.restart({}), (False, 'restart failed'))
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.reinitialize()
        self.assertEquals(self.ha.restart({}), (False, 'reinitialize already in progress'))
        with patch.object(self.ha, "restart_matches", return_value=False):
            self.assertEquals(self.ha.restart({'foo': 'bar'}), (False, "restart conditions are not satisfied"))

    @patch('os.kill', Mock())
    def test_restart_in_progress(self):
        with patch('patroni.async_executor.AsyncExecutor.busy', PropertyMock(return_value=True)):
            self.ha.restart({}, run_async=True)
            self.assertTrue(self.ha.restart_scheduled())
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.cluster = get_cluster_initialized_with_leader()
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.has_lock = true
            self.assertEquals(self.ha.run_cycle(), 'updated leader lock during restart')

            self.ha.update_lock = false
            self.p.set_role('master')
            with patch('patroni.async_executor.CriticalTask.cancel', Mock(return_value=False)):
                with patch('patroni.postgresql.Postgresql.terminate_starting_postmaster') as mock_terminate:
                    self.assertEquals(self.ha.run_cycle(), 'lost leader lock during restart')
                    mock_terminate.assert_called()

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader(self):
        self.ha.fetch_node_status = get_node_status()
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', '', None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', 'blabla', None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')
        self.p.rewind_needed_and_possible = true
        self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')
        self.ha.fetch_node_status = get_node_status(nofailover=True)
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(watchdog_failed=True)
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(wal_position=1)
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
        # manual failover from the previous leader to us won't happen if we hold the nofailover flag
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

        # Failover scheduled time must include timezone
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.ha.run_cycle()

        scheduled = datetime.datetime.utcnow().replace(tzinfo=tzutc)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=30)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=-600)
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

        scheduled = None
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_pause(self):
        self.ha.has_lock = true
        self.ha.is_paused = true
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock', self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, '', None))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock', self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_synchronous_mode(self):
        self.p.is_leader = true
        self.ha.has_lock = true
        self.ha.is_synchronous_mode = true
        self.ha.is_failover_possible = false
        self.ha.process_sync_replication = Mock()
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, None))
        self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, 'a'))
        self.ha.is_failover_possible = true
        self.assertEquals('manual failover: demoting myself', self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_process_no_leader(self):
        self.p.is_leader = false
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'leader', None))
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, self.p.name, '', None))
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')
        self.ha.fetch_node_status = get_node_status(reachable=False)  # inaccessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        # set failover flag to True for all members of the cluster
        # this should elect the current member, as we are not going to call the API for it.
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'other', None))
        self.ha.fetch_node_status = get_node_status(nofailover=True)  # accessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')
        # same as previous, but set the current member to nofailover. In no case it should be elected as a leader
        self.ha.patroni.nofailover = True
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because I am not allowed to promote')

    def test_manual_failover_process_no_leader_in_pause(self):
        self.ha.is_paused = true
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'other', None))
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', '', None))
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', 'blabla', None))
        self.assertEquals('PAUSE: acquired session lock as a leader', self.ha.run_cycle())
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: promoted self to leader by acquiring session lock')

    def test_is_healthiest_node(self):
        self.ha.state_handler.is_leader = false
        self.ha.patroni.nofailover = False
        self.ha.fetch_node_status = get_node_status()
        self.assertTrue(self.ha.is_healthiest_node())
        with patch.object(Watchdog, 'is_healthy', PropertyMock(return_value=False)):
            self.assertFalse(self.ha.is_healthiest_node())
        with patch('patroni.postgresql.Postgresql.is_starting', return_value=True):
            self.assertFalse(self.ha.is_healthiest_node())
        self.ha.is_paused = true
        self.assertFalse(self.ha.is_healthiest_node())

    def test__is_healthiest_node(self):
        self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.p.is_leader = false
        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertTrue(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = get_node_status(in_recovery=False)  # accessible, not in_recovery
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = get_node_status(wal_position=11)  # accessible, in_recovery, wal position ahead
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        with patch('patroni.postgresql.Postgresql.timeline_wal_position', return_value=(1, 1)):
            self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = True
        self.assertFalse(self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = False

    @patch('requests.get', requests_get)
    def test_fetch_node_status(self):
        member = Member(0, 'test', 1, {'api_url': 'http://127.0.0.1:8011/patroni'})
        self.ha.fetch_node_status(member)
        member = Member(0, 'test', 1, {'api_url': 'http://*****:*****@patch('patroni.ha.Ha.update_lock', return_value=True)
    @patch('patroni.ha.Ha.demote')
    def test_starting_timeout(self, demote, update_lock):
        def check_calls(seq):
            for mock, called in seq:
                if called:
                    mock.assert_called_once()
                else:
                    mock.assert_not_called()
                mock.reset_mock()
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.p.check_for_startup = true
        self.p.time_in_state = lambda: 30
        self.assertEquals(self.ha.run_cycle(), 'PostgreSQL is still starting up, 270 seconds until timeout')
        check_calls([(update_lock, True), (demote, False)])

        self.p.time_in_state = lambda: 350
        self.ha.fetch_node_status = get_node_status(reachable=False)  # inaccessible, in_recovery
        self.assertEquals(self.ha.run_cycle(),
                          'master start has timed out, but continuing to wait because failover is not possible')
        check_calls([(update_lock, True), (demote, False)])

        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'stopped PostgreSQL because of startup timeout')
        check_calls([(update_lock, True), (demote, True)])

        update_lock.return_value = False
        self.assertEquals(self.ha.run_cycle(), 'stopped PostgreSQL while starting up because leader key was lost')
        check_calls([(update_lock, True), (demote, True)])

        self.ha.has_lock = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')
        check_calls([(update_lock, False), (demote, False)])

    def test_manual_failover_while_starting(self):
        self.ha.has_lock = true
        self.p.check_for_startup = true
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')

    @patch('patroni.ha.Ha.demote')
    def test_failover_immediately_on_zero_master_start_timeout(self, demote):
        self.p.is_running = false
        self.ha.cluster = get_cluster_initialized_with_leader(sync=(self.p.name, 'other'))
        self.ha.cluster.config.data['synchronous_mode'] = True
        self.ha.patroni.config.set_dynamic_configuration({'master_start_timeout': 0})
        self.ha.has_lock = true
        self.ha.update_lock = true
        self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(), 'stopped PostgreSQL to fail over after a crash')
        demote.assert_called_once()

    @patch('patroni.postgresql.Postgresql.follow')
    def test_demote_immediate(self, follow):
        self.ha.has_lock = true
        self.e.get_cluster = Mock(return_value=get_cluster_initialized_without_leader())
        self.ha.demote('immediate')
        follow.assert_called_once_with(None)

    def test_process_sync_replication(self):
        self.ha.has_lock = true
        mock_set_sync = self.p.set_synchronous_standby = Mock()
        self.p.name = 'leader'

        # Test sync key removed when sync mode disabled
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        with patch.object(self.ha.dcs, 'delete_sync_state') as mock_delete_sync:
            self.ha.run_cycle()
            mock_delete_sync.assert_called_once()
            mock_set_sync.assert_called_once_with(None)

        mock_set_sync.reset_mock()
        # Test sync key not touched when not there
        self.ha.cluster = get_cluster_initialized_with_leader()
        with patch.object(self.ha.dcs, 'delete_sync_state') as mock_delete_sync:
            self.ha.run_cycle()
            mock_delete_sync.assert_not_called()
            mock_set_sync.assert_called_once_with(None)

        mock_set_sync.reset_mock()

        self.ha.is_synchronous_mode = true

        # Test sync standby not touched when picking the same node
        self.p.pick_synchronous_standby = Mock(return_value=('other', True))
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        self.ha.run_cycle()
        mock_set_sync.assert_not_called()

        mock_set_sync.reset_mock()

        # Test sync standby is replaced when switching standbys
        self.p.pick_synchronous_standby = Mock(return_value=('other2', False))
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.run_cycle()
        mock_set_sync.assert_called_once_with('other2')

        mock_set_sync.reset_mock()
        # Test sync standby is not disabled when updating dcs fails
        self.ha.dcs.write_sync_state = Mock(return_value=False)
        self.ha.run_cycle()
        mock_set_sync.assert_not_called()

        mock_set_sync.reset_mock()
        # Test changing sync standby
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(return_value=get_cluster_initialized_with_leader(sync=('leader', 'other')))
        # self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        self.p.pick_synchronous_standby = Mock(return_value=('other2', True))
        self.ha.run_cycle()
        self.ha.dcs.get_cluster.assert_called_once()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 2)

        # Test updating sync standby key failed due to race
        self.ha.dcs.write_sync_state = Mock(side_effect=[True, False])
        self.ha.run_cycle()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 2)

        # Test changing sync standby failed due to race
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(return_value=get_cluster_initialized_with_leader(sync=('somebodyelse', None)))
        self.ha.run_cycle()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 1)

        # Test sync set to '*' when synchronous_mode_strict is enabled
        mock_set_sync.reset_mock()
        self.ha.is_synchronous_mode_strict = true
        self.p.pick_synchronous_standby = Mock(return_value=(None, False))
        self.ha.run_cycle()
        mock_set_sync.assert_called_once_with('*')

    def test_sync_replication_become_master(self):
        self.ha.is_synchronous_mode = true

        mock_set_sync = self.p.set_synchronous_standby = Mock()
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.has_lock = true
        mock_write_sync = self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.p.name = 'leader'
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('other', None))

        # When we just became master nobody is sync
        self.assertEquals(self.ha.enforce_master_role('msg', 'promote msg'), 'promote msg')
        mock_set_sync.assert_called_once_with(None)
        mock_write_sync.assert_called_once_with('leader', None, index=0)

        mock_set_sync.reset_mock()

        # When we just became master nobody is sync
        self.p.set_role('replica')
        mock_write_sync.return_value = False
        self.assertTrue(self.ha.enforce_master_role('msg', 'promote msg') != 'promote msg')
        mock_set_sync.assert_not_called()

    def test_unhealthy_sync_mode(self):
        self.ha.is_synchronous_mode = true

        self.p.is_leader = false
        self.p.set_role('replica')
        self.p.name = 'other'
        self.ha.cluster = get_cluster_initialized_without_leader(sync=('leader', 'other2'))
        mock_write_sync = self.ha.dcs.write_sync_state = Mock(return_value=True)
        mock_acquire = self.ha.acquire_lock = Mock(return_value=True)
        mock_follow = self.p.follow = Mock()
        mock_promote = self.p.promote = Mock()

        # If we don't match the sync replica we are not allowed to acquire lock
        self.ha.run_cycle()
        mock_acquire.assert_not_called()
        mock_follow.assert_called_once()
        self.assertEquals(mock_follow.call_args[0][0], None)
        mock_write_sync.assert_not_called()

        mock_follow.reset_mock()
        # If we do match we will try to promote
        self.ha._is_healthiest_node = true

        self.ha.cluster = get_cluster_initialized_without_leader(sync=('leader', 'other'))
        self.ha.run_cycle()
        mock_acquire.assert_called_once()
        mock_follow.assert_not_called()
        mock_promote.assert_called_once()
        mock_write_sync.assert_called_once_with('other', None, index=0)

    def test_disable_sync_when_restarting(self):
        self.ha.is_synchronous_mode = true

        self.p.name = 'other'
        self.p.is_leader = false
        self.p.set_role('replica')
        mock_restart = self.p.restart = Mock(return_value=True)
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        self.ha.touch_member = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(side_effect=[
            get_cluster_initialized_with_leader(sync=('leader', syncstandby))
            for syncstandby in ['other', None]])

        with patch('time.sleep') as mock_sleep:
            self.ha.restart({})
            mock_restart.assert_called_once()
            mock_sleep.assert_called()

        # Restart is still called when DCS connection fails
        mock_restart.reset_mock()
        self.ha.dcs.get_cluster = Mock(side_effect=DCSError("foo"))
        self.ha.restart({})

        mock_restart.assert_called_once()

        # We don't try to fetch the cluster state when touch_member fails
        mock_restart.reset_mock()
        self.ha.dcs.get_cluster.reset_mock()
        self.ha.touch_member = Mock(return_value=False)

        self.ha.restart({})

        mock_restart.assert_called_once()
        self.ha.dcs.get_cluster.assert_not_called()

    def test_effective_tags(self):
        self.ha._disable_sync = True
        self.assertEquals(self.ha.get_effective_tags(), {'foo': 'bar', 'nosync': True})
        self.ha._disable_sync = False
        self.assertEquals(self.ha.get_effective_tags(), {'foo': 'bar'})

    def test_restore_cluster_config(self):
        self.ha.cluster.config.data.clear()
        self.ha.has_lock = true
        self.ha.cluster.is_unlocked = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

    def test_watch(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.watch(0)

    def test_wakup(self):
        self.ha.wakeup()

    def test_shutdown(self):
        self.p.is_running = false
        self.ha.has_lock = true
        self.ha.shutdown()

    @patch('time.sleep', Mock())
    def test_leader_with_empty_directory(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.has_lock = true
        self.p.data_directory_empty = true
        self.assertEquals(self.ha.run_cycle(), 'released leader key voluntarily as data dir empty and currently leader')
        self.assertEquals(self.p.role, 'uninitialized')

        # as has_lock is mocked out, we need to fake the leader key release
        self.ha.has_lock = false
        # will not say bootstrap from leader as replica can't self elect
        self.assertEquals(self.ha.run_cycle(), "trying to bootstrap from replica 'other'")

    def test_update_cluster_history(self):
        self.p.get_master_timeline = Mock(return_value=1)
        self.ha.has_lock = true
        self.ha.cluster.is_unlocked = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')
Пример #38
0
class TestPostgresql(unittest.TestCase):
    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('os.rename', Mock())
    def setUp(self):
        self.data_dir = 'data/test0'
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
        self.p = Postgresql({
            'name':
            'test0',
            'scope':
            'batman',
            'data_dir':
            self.data_dir,
            'listen':
            '127.0.0.1, *:5432',
            'connect_address':
            '127.0.0.2:5432',
            'pg_hba': [
                'host replication replicator 127.0.0.1/32 md5',
                'hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'
            ],
            'superuser': {
                'username': '******',
                'password': '******'
            },
            'admin': {
                'username': '******',
                'password': '******'
            },
            'pg_rewind': {
                'username': '******',
                'password': '******'
            },
            'replication': {
                'username': '******',
                'password': '******'
            },
            'parameters': {
                'foo': 'bar'
            },
            'recovery_conf': {
                'foo': 'bar'
            },
            'callbacks': {
                'on_start': 'true',
                'on_stop': 'true',
                'on_restart': 'true',
                'on_role_change': 'true',
                'on_reload': 'true'
            },
            'restore':
            'true'
        })
        self.leadermem = Member(0, 'leader', 28, {
            'conn_url':
            'postgres://*****:*****@127.0.0.1:5435/postgres'
        })
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(
            0, 'test1', 28, {
                'conn_url':
                'postgres://*****:*****@127.0.0.1:5433/postgres',
                'tags': {
                    'replicatefrom': 'leader'
                }
            })
        self.me = Member(0, 'test0', 28, {
            'conn_url':
            'postgres://*****:*****@127.0.0.1:5434/postgres'
        })

    def tearDown(self):
        shutil.rmtree('data')

    def test_get_initdb_options(self):
        self.p.initdb_options = [{'encoding': 'UTF8'}, 'data-checksums']
        self.assertEquals(self.p.get_initdb_options(),
                          ['--encoding=UTF8', '--data-checksums'])
        self.p.initdb_options = [{'pgdata': 'bar'}]
        self.assertRaises(Exception, self.p.get_initdb_options)
        self.p.initdb_options = [{'foo': 'bar', 1: 2}]
        self.assertRaises(Exception, self.p.get_initdb_options)
        self.p.initdb_options = [1]
        self.assertRaises(Exception, self.p.get_initdb_options)

    def test_initialize(self):
        self.assertTrue(self.p.initialize())

        with open(os.path.join(self.data_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            assert 'host replication replicator 127.0.0.1/32 md5\n' in lines
            assert 'host all all 0.0.0.0/0 md5\n' in lines

    @patch('os.path.exists', Mock(return_value=True))
    @patch('os.unlink', Mock())
    def test_delete_trigger_file(self):
        self.p.delete_trigger_file()

    def test_start(self):
        self.assertTrue(self.p.start())
        self.p.is_running = false
        open(os.path.join(self.data_dir, 'postmaster.pid'), 'w').close()
        pg_conf = os.path.join(self.data_dir, 'postgresql.conf')
        open(pg_conf, 'w').close()
        self.assertTrue(self.p.start())
        with open(pg_conf) as f:
            lines = f.readlines()
            self.assertTrue("foo = 'bar'\n" in lines)

    def test_stop(self):
        self.assertTrue(self.p.stop())
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertTrue(self.p.stop())
            self.p.is_running = Mock(return_value=True)
            self.assertFalse(self.p.stop())

    def test_restart(self):
        self.p.start = false
        self.p.is_running = false
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({
            'host': 'localhost',
            'port': '5432',
            'user': '******',
            'password': '******'
        })

    @patch('subprocess.call', side_effect=OSError)
    @patch('patroni.postgresql.Postgresql.write_pgpass',
           MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_call):
        self.assertTrue(self.p.rewind(self.leader))
        subprocess.call = mock_call
        self.assertFalse(self.p.rewind(self.leader))

    @patch('patroni.postgresql.Postgresql.rewind', return_value=False)
    @patch('patroni.postgresql.Postgresql.remove_data_directory',
           MagicMock(return_value=True))
    @patch('patroni.postgresql.Postgresql.single_user_mode',
           MagicMock(return_value=1))
    @patch('patroni.postgresql.Postgresql.write_pgpass',
           MagicMock(return_value=dict()))
    @patch('subprocess.check_output',
           Mock(return_value=0, side_effect=pg_controldata_string))
    def test_follow(self, mock_pg_rewind):
        self.p.follow(None)
        self.p.follow(self.leader)
        self.p.follow(Leader(-1, 28, self.other))
        self.p.rewind = mock_pg_rewind
        self.p.follow(self.leader)
        with mock.patch('os.path.islink', MagicMock(return_value=True)):
            with mock.patch('patroni.postgresql.Postgresql.can_rewind',
                            new_callable=PropertyMock(return_value=True)):
                with mock.patch('os.unlink', MagicMock(return_value=True)):
                    self.p.follow(self.leader, recovery=True)
        with mock.patch('patroni.postgresql.Postgresql.can_rewind',
                        new_callable=PropertyMock(return_value=True)):
            self.p.rewind.return_value = True
            self.p.follow(self.leader, recovery=True)
            self.p.rewind.return_value = False
            self.p.follow(self.leader, recovery=True)
        with mock.patch('patroni.postgresql.Postgresql.check_recovery_conf',
                        MagicMock(return_value=True)):
            self.assertTrue(self.p.follow(None))

    @patch('subprocess.check_output',
           Mock(return_value=0, side_effect=pg_controldata_string))
    def test_can_rewind(self):
        tmp = self.p.pg_rewind
        self.p.pg_rewind = None
        self.assertFalse(self.p.can_rewind)
        self.p.pg_rewind = tmp
        with mock.patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with mock.patch('subprocess.call', side_effect=OSError):
            self.assertFalse(self.p.can_rewind)
        tmp = self.p.controldata
        self.p.controldata = lambda: {'wal_log_hints setting': 'on'}
        self.assertTrue(self.p.can_rewind)
        self.p.controldata = tmp

    @patch('time.sleep', Mock())
    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError)
        with patch('subprocess.call', Mock(side_effect=[1, 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)
        with patch('subprocess.call', Mock(side_effect=[Exception(), 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        with patch('subprocess.call', Mock(return_value=0)):
            self.assertEquals(self.p.create_replica(self.leader), 0)
            del self.p.config['wale']
            self.assertEquals(self.p.create_replica(self.leader), 0)

        with patch('subprocess.call', Mock(side_effect=Exception("foo"))):
            self.assertEquals(self.p.create_replica(self.leader), 1)

    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, self.leader, 0,
                          [self.me, self.other, self.leadermem], None)
        self.p.sync_replication_slots(cluster)
        self.p.query = Mock(side_effect=psycopg2.OperationalError)
        self.p.schedule_load_slots = True
        self.p.sync_replication_slots(cluster)
        self.p.schedule_load_slots = False
        with mock.patch('patroni.postgresql.Postgresql.role',
                        new_callable=PropertyMock(return_value='replica')):
            self.p.sync_replication_slots(cluster)

    @patch.object(MockConnect, 'closed', 2)
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query,
                          'RetryFailedError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())

    def test_reload(self):
        self.assertTrue(self.p.reload())

    def test_is_healthy(self):
        self.assertTrue(self.p.is_healthy())
        self.p.is_running = false
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p._role = 'replica'
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')

    @patch('subprocess.Popen', Mock(side_effect=OSError))
    def test_call_nowait(self):
        self.assertFalse(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(
            side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    def test_check_replication_lag(self):
        self.assertTrue(self.p.check_replication_lag(0))

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.is_running = false
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError)):
            self.p.move_data_directory()

    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertRaises(PostgresException, self.p.bootstrap)
        self.p.bootstrap()

    @patch('patroni.postgresql.Postgresql.create_replica',
           Mock(return_value=0))
    def test_clone(self):
        self.p.clone(self.leader)

    @patch('os.listdir', Mock(return_value=['recovery.conf']))
    @patch('os.path.exists', Mock(return_value=True))
    def test_get_postgres_role_from_data_directory(self):
        self.assertEquals(self.p.get_postgres_role_from_data_directory(),
                          'replica')

    def test_remove_data_directory(self):
        self.p.remove_data_directory()
        open(self.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.data_dir)
        with patch('os.unlink', Mock(side_effect=OSError)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    def test_controldata(self):
        with patch('subprocess.check_output',
                   Mock(return_value=0, side_effect=pg_controldata_string)):
            data = self.p.controldata()
            self.assertEquals(len(data), 50)
            self.assertEquals(data['Database cluster state'],
                              'shut down in recovery')
            self.assertEquals(data['wal_log_hints setting'], 'on')
            self.assertEquals(int(data['Database block size']), 8192)

        with patch('subprocess.check_output',
                   Mock(side_effect=subprocess.CalledProcessError(1, ''))):
            self.assertEquals(self.p.controldata(), {})

    @patch('subprocess.Popen')
    @patch.object(builtins, 'open', MagicMock(return_value=42))
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(
            self.p.single_user_mode(
                options=dict(archive_mode='on', archive_command='false')), 0)
        subprocess_popen_mock.assert_called_once_with([
            'postgres', '--single', '-D', self.data_dir, '-c',
            'archive_command=false', '-c', 'archive_mode=on', 'postgres'
        ],
                                                      stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.reset_mock()
        self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0)
        subprocess_popen_mock.assert_called_once_with(
            ['postgres', '--single', '-D', self.data_dir, 'postgres'],
            stdin=subprocess.PIPE,
            stdout=42,
            stderr=subprocess.STDOUT)
        subprocess_popen_mock.return_value = None
        self.assertEquals(self.p.single_user_mode(), 1)

    @patch('os.listdir', MagicMock(side_effect=fake_listdir))
    @patch('os.unlink', return_value=True)
    @patch('os.remove', return_value=True)
    @patch('os.path.islink', return_value=False)
    @patch('os.path.isfile', return_value=True)
    def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove,
                                    mock_unlink):
        ap = os.path.join(self.data_dir, 'pg_xlog', 'archive_status/')
        self.p.cleanup_archive_status()
        mock_remove.assert_has_calls(
            [mock.call(ap + 'a'),
             mock.call(ap + 'b'),
             mock.call(ap + 'c')])
        mock_unlink.assert_not_called()

        mock_remove.reset_mock()

        mock_file.return_value = False
        mock_link.return_value = True
        self.p.cleanup_archive_status()
        mock_unlink.assert_has_calls(
            [mock.call(ap + 'a'),
             mock.call(ap + 'b'),
             mock.call(ap + 'c')])
        mock_remove.assert_not_called()

        mock_unlink.reset_mock()
        mock_remove.reset_mock()

        mock_file.side_effect = OSError
        mock_link.side_effect = OSError
        self.p.cleanup_archive_status()
        mock_unlink.assert_not_called()
        mock_remove.assert_not_called()

    @patch('subprocess.check_output',
           MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_save_configuration_files(self):
        self.p.save_configuration_files()

    @patch('os.path.isfile', Mock(side_effect=[False, True]))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_restore_configuration_files(self):
        self.p.restore_configuration_files()

    def test_can_create_replica_without_replication_connection(self):
        self.p.config['create_replica_method'] = []
        self.assertFalse(
            self.p.can_create_replica_without_replication_connection())
        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo', 'no_master': 1}
        self.assertTrue(
            self.p.can_create_replica_without_replication_connection())

    def test_replica_method_can_work_without_replication_connection(self):
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'basebackup'))
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'foobar'))
        self.p.config['foo'] = {'command': 'bar', 'no_master': 1}
        self.assertTrue(
            self.p.replica_method_can_work_without_replication_connection(
                'foo'))
        self.p.config['foo'] = {'command': 'bar'}
        self.assertFalse(
            self.p.replica_method_can_work_without_replication_connection(
                'foo'))
Пример #39
0
class TestPostgresql(unittest.TestCase):

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    def setUp(self):
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0',
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'],
                             'superuser': {'password': ''},
                             'admin': {'username': '******', 'password': '******'},
                             'pg_rewind': {'username': '******', 'password': '******'},
                             'replication': {'username': '******',
                                             'password': '******',
                                             'network': '127.0.0.1/32'},
                             'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        if not os.path.exists(self.p.data_dir):
            os.makedirs(self.p.data_dir)
        self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres'})
        self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})

    def tearDown(self):
        shutil.rmtree('data')

    def test_data_directory_empty(self):
        self.assertTrue(self.p.data_directory_empty())

    def test_initialize(self):
        self.assertTrue(self.p.initialize())
        self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf')))

    def test_start(self):
        self.assertTrue(self.p.start())
        self.p.is_running = false
        open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w').close()
        self.assertTrue(self.p.start())

    def test_stop(self):
        self.assertTrue(self.p.stop())
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertTrue(self.p.stop())
            self.p.is_running = Mock(return_value=True)
            self.assertFalse(self.p.stop())

    def test_restart(self):
        self.p.start = false
        self.p.is_running = false
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    def test_sync_from_leader(self):
        self.assertTrue(self.p.sync_from_leader(self.leader))

    @patch('subprocess.call', side_effect=Exception("Test"))
    def test_pg_rewind(self, mock_call):
        self.assertTrue(self.p.rewind(self.leader))
        subprocess.call = mock_call
        self.assertFalse(self.p.rewind(self.leader))

    @patch('patroni.postgresql.Postgresql.rewind', return_value=False)
    @patch('patroni.postgresql.Postgresql.remove_data_directory', MagicMock(return_value=True))
    @patch('patroni.postgresql.Postgresql.single_user_mode', MagicMock(return_value=1))
    def test_follow_the_leader(self, mock_pg_rewind):
        self.p.demote()
        self.p.follow_the_leader(None)
        self.p.demote()
        self.p.follow_the_leader(self.leader)
        self.p.follow_the_leader(Leader(-1, 28, self.other))
        self.p.rewind = mock_pg_rewind
        self.p.follow_the_leader(self.leader)
        self.p.require_rewind()
        with mock.patch('os.path.islink', MagicMock(return_value=True)):
            with mock.patch('os.unlink', MagicMock(return_value=True)):
                with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)):
                    self.p.follow_the_leader(self.leader, recovery=True)
        self.p.require_rewind()
        with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)):
            self.p.rewind.return_value = True
            self.p.follow_the_leader(self.leader, recovery=True)
            self.p.rewind.return_value = False
            self.p.follow_the_leader(self.leader, recovery=True)

    def test_can_rewind(self):
        tmp = self.p.pg_rewind
        self.p.pg_rewind = None
        self.assertFalse(self.p.can_rewind)
        self.p.pg_rewind = tmp
        with mock.patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with mock.patch('subprocess.call', side_effect=OSError("foo")):
            self.assertFalse(self.p.can_rewind)
        tmp = self.p.controldata()
        self.p.controldata = lambda: {'wal_log_hints setting': 'on'}
        self.assertTrue(self.p.can_rewind)
        self.p.controldata = tmp

    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError())
        self.assertEquals(self.p.create_replica({'host': '', 'port': '', 'user': ''}, ''), 1)

    def test_create_connection_users(self):
        cfg = self.p.config
        cfg['superuser']['username'] = '******'
        p = Postgresql(cfg)
        p.create_connection_users()

    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem], None)
        self.p.sync_replication_slots(cluster)
        self.p.query = Mock(side_effect=psycopg2.OperationalError)
        self.p.schedule_load_slots = True
        self.p.sync_replication_slots(cluster)

    @patch.object(MockConnect, 'closed', 2)
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())

    def test_reload(self):
        self.assertTrue(self.p.reload())

    def test_is_healthy(self):
        self.assertTrue(self.p.is_healthy())
        self.p.is_running = false
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p._role = 'replica'
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')

    @patch('subprocess.Popen', Mock(side_effect=OSError()))
    def test_call_nowait(self):
        self.assertFalse(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    def test_check_replication_lag(self):
        self.assertTrue(self.p.check_replication_lag(0))

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.is_running = false
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError())):
            self.p.move_data_directory()

    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertRaises(PostgresException, self.p.bootstrap)
        self.p.bootstrap()
        self.p.bootstrap(self.leader)

    def test_remove_data_directory(self):
        self.p.data_dir = 'data_dir'
        self.p.remove_data_directory()
        os.mkdir(self.p.data_dir)
        self.p.remove_data_directory()
        open(self.p.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.p.data_dir)
        with patch('os.unlink', Mock(side_effect=Exception)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    @patch('subprocess.check_output', side_effect=subprocess.CalledProcessError)
    @patch('subprocess.check_output', side_effect=Exception('Failed'))
    def test_controldata(self, check_output_call_error, check_output_generic_exception):
        data = self.p.controldata()
        self.assertEquals(len(data), 50)
        self.assertEquals(data['Database cluster state'], 'shut down in recovery')
        self.assertEquals(data['wal_log_hints setting'], 'on')
        self.assertEquals(int(data['Database block size']), 8192)

        subprocess.check_output = check_output_call_error
        data = self.p.controldata()
        self.assertEquals(data, dict())

        subprocess.check_output = check_output_generic_exception
        self.assertRaises(Exception, self.p.controldata())

    def test_read_postmaster_opts(self):
        m = mock_open(read_data=postmaster_opts_string())
        with patch.object(builtins, 'open', m):
            data = self.p.read_postmaster_opts()
            self.assertEquals(data['wal_level'], 'hot_standby')
            self.assertEquals(int(data['max_replication_slots']), 5)
            self.assertEqual(data.get('D'), None)

            m.side_effect = IOError("foo")
            data = self.p.read_postmaster_opts()
            self.assertEqual(data, dict())

            m.side_effect = Exception("foo")
            self.assertRaises(Exception, self.p.read_postmaster_opts())

    @patch('subprocess.Popen')
    @patch.object(builtins, 'open', MagicMock(return_value=42))
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir,
                                                      '-c', 'archive_command=false', '-c', 'archive_mode=on',
                                                       'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.reset_mock()
        self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir,
                                                      'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.return_value = None
        self.assertEquals(self.p.single_user_mode(), 1)

    def fake_listdir(path):
        if path.endswith(os.path.join('pg_xlog', 'archive_status')):
            return ["a", "b", "c"]
        return []

    @patch('os.listdir', MagicMock(side_effect=fake_listdir))
    @patch('os.path.isdir', MagicMock(return_value=True))
    @patch('os.unlink', return_value=True)
    @patch('os.remove', return_value=True)
    @patch('os.path.islink', return_value=False)
    @patch('os.path.isfile', return_value=True)
    def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink):
        ap = os.path.join(self.p.data_dir, 'pg_xlog', 'archive_status/')
        self.p.cleanup_archive_status()
        mock_remove.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')])
        mock_unlink.assert_not_called()

        mock_remove.reset_mock()

        mock_file.return_value = False
        mock_link.return_value = True
        self.p.cleanup_archive_status()
        mock_unlink.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')])
        mock_remove.assert_not_called()

        mock_unlink.reset_mock()
        mock_remove.reset_mock()

        mock_file.side_effect = Exception("foo")
        mock_link.side_effect = Exception("foo")
        self.p.cleanup_archive_status()
        mock_unlink.assert_not_called()
        mock_remove.assert_not_called()
Пример #40
0
class Patroni(AbstractPatroniDaemon):
    def __init__(self, config):
        from patroni.api import RestApiServer
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.request import PatroniRequest
        from patroni.watchdog import Watchdog

        super(Patroni, self).__init__(config)

        self.version = __version__
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.request = PatroniRequest(self.config, True)
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')
                time.sleep(5)

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self, sighup=False, local=False):
        try:
            super(Patroni, self).reload_config(sighup, local)
            if local:
                self.tags = self.get_tags()
                self.request.reload_config(self.config)
                self.api.reload_config(self.config['restapi'])
            self.watchdog.reload_config(self.config)
            self.postgresql.reload_config(self.config['postgresql'], sighup)
            self.dcs.reload_config(self.config)
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()
        super(Patroni, self).run()

    def _run_cycle(self):
        logger.info(self.ha.run_cycle())

        if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                and self.config.set_dynamic_configuration(self.dcs.cluster.config):
            self.reload_config()

        if self.postgresql.role != 'uninitialized':
            self.config.save_cache()

        self.schedule_next_run()

    def _shutdown(self):
        try:
            self.api.shutdown()
        except Exception:
            logger.exception('Exception during RestApi.shutdown')
        try:
            self.ha.shutdown()
        except Exception:
            logger.exception('Exception during Ha.shutdown')
Пример #41
0
class TestPostgresql(unittest.TestCase):
    _PARAMETERS = {'wal_level': 'hot_standby', 'max_replication_slots': 5, 'f.oo': 'bar',
                   'search_path': 'public', 'hot_standby': 'on', 'max_wal_senders': 5,
                   'wal_keep_segments': 8, 'wal_log_hints': 'on', 'max_locks_per_transaction': 64,
                   'max_worker_processes': 8, 'max_connections': 100, 'max_prepared_transactions': 0,
                   'track_commit_timestamp': 'off'}

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('os.rename', Mock())
    @patch.object(Postgresql, 'get_major_version', Mock(return_value=9.6))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def setUp(self):
        self.data_dir = 'data/test0'
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': self.data_dir, 'retry_timeout': 10,
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'authentication': {'superuser': {'username': '******', 'password': '******'},
                                                'replication': {'username': '******', 'password': '******'}},
                             'remove_data_directory_on_rewind_failure': True,
                             'use_pg_rewind': True, 'pg_ctl_timeout': 'bla',
                             'parameters': self._PARAMETERS,
                             'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        self.p._callback_executor = Mock()
        self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(0, 'test-1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres',
                            'tags': {'replicatefrom': 'leader'}})
        self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})

    def tearDown(self):
        shutil.rmtree('data')

    def test_get_initdb_options(self):
        self.assertEquals(self.p.get_initdb_options([{'encoding': 'UTF8'}, 'data-checksums']),
                          ['--encoding=UTF8', '--data-checksums'])
        self.assertRaises(Exception, self.p.get_initdb_options, [{'pgdata': 'bar'}])
        self.assertRaises(Exception, self.p.get_initdb_options, [{'foo': 'bar', 1: 2}])
        self.assertRaises(Exception, self.p.get_initdb_options, [1])

    @patch('os.path.exists', Mock(return_value=True))
    @patch('os.unlink', Mock())
    def test_delete_trigger_file(self):
        self.p.delete_trigger_file()

    @patch('subprocess.Popen')
    @patch.object(Postgresql, 'wait_for_startup')
    @patch.object(Postgresql, 'wait_for_port_open')
    @patch.object(Postgresql, 'is_running')
    def test_start(self, mock_is_running, mock_wait_for_port_open, mock_wait_for_startup, mock_popen):
        mock_is_running.return_value = True
        mock_wait_for_port_open.return_value = True
        mock_wait_for_startup.return_value = False
        mock_popen.stdout.readline.return_value = '123'
        self.assertTrue(self.p.start())
        mock_is_running.return_value = False
        open(os.path.join(self.data_dir, 'postmaster.pid'), 'w').close()
        pg_conf = os.path.join(self.data_dir, 'postgresql.conf')
        open(pg_conf, 'w').close()
        self.assertFalse(self.p.start())
        with open(pg_conf) as f:
            lines = f.readlines()
            self.assertTrue("f.oo = 'bar'\n" in lines)

        mock_wait_for_startup.return_value = None
        self.assertFalse(self.p.start(10))
        self.assertIsNone(self.p.start())

        mock_wait_for_port_open.return_value = False
        self.assertFalse(self.p.start())

    @patch.object(Postgresql, 'pg_isready')
    @patch.object(Postgresql, 'read_pid_file')
    @patch.object(Postgresql, 'is_pid_running')
    @patch('patroni.postgresql.polling_loop', Mock(return_value=range(1)))
    def test_wait_for_port_open(self, mock_is_pid_running, mock_read_pid_file, mock_pg_isready):
        mock_is_pid_running.return_value = False
        mock_pg_isready.return_value = STATE_NO_RESPONSE

        # No pid file and postmaster death
        mock_read_pid_file.return_value = {}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        mock_is_pid_running.return_value = True

        # timeout
        mock_read_pid_file.return_value = {'pid', 1}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # Garbage pid
        mock_read_pid_file.return_value = {'pid': 'garbage', 'start_time': '101', 'data_dir': '',
                                           'socket_dir': '', 'port': '', 'listen_addr': ''}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # Not ready
        mock_read_pid_file.return_value = {'pid': '42', 'start_time': '101', 'data_dir': '',
                                           'socket_dir': '', 'port': '', 'listen_addr': ''}
        self.assertFalse(self.p.wait_for_port_open(42, 100., 1))

        # pg_isready failure
        mock_pg_isready.return_value = 'garbage'
        self.assertTrue(self.p.wait_for_port_open(42, 100., 1))

    @patch.object(Postgresql, 'is_running')
    def test_stop(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.stop())
        with patch('subprocess.call', Mock(return_value=1)):
            mock_is_running.return_value = False
            self.assertTrue(self.p.stop())

    def test_restart(self):
        self.p.start = Mock(return_value=False)
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******'})
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******', 'password': '******'})

    def test_checkpoint(self):
        with patch.object(MockCursor, 'fetchone', Mock(return_value=(True, ))):
            self.assertEquals(self.p.checkpoint({'user': '******'}), 'is_in_recovery=true')
        with patch.object(MockCursor, 'execute', Mock(return_value=None)):
            self.assertIsNone(self.p.checkpoint())
        self.assertEquals(self.p.checkpoint(), 'not accessible or not healty')

    @patch('subprocess.call', side_effect=OSError)
    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_call):
        r = {'user': '', 'host': '', 'port': '', 'database': '', 'password': ''}
        self.assertTrue(self.p.rewind(r))
        subprocess.call = mock_call
        self.assertFalse(self.p.rewind(r))

    @patch('os.unlink', Mock(return_value=True))
    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    @patch.object(Postgresql, 'single_user_mode', Mock(return_value=1))
    @patch.object(Postgresql, 'write_pgpass', Mock(return_value={}))
    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, 'rewind', return_value=False)
    def test_follow(self, mock_pg_rewind):
        with patch.object(Postgresql, 'check_recovery_conf', Mock(return_value=True)):
            self.assertTrue(self.p.follow(None, None))  # nothing to do, recovery.conf has good primary_conninfo

        self.p.follow(self.me, self.me)  # follow is called when the node is holding leader lock

        with patch.object(Postgresql, 'restart', Mock(return_value=False)):
            self.p.set_role('replica')
            self.p.follow(None, None)  # restart without rewind

        with patch.object(Postgresql, 'stop', Mock(return_value=False)):
            self.p.follow(self.leader, self.leader, need_rewind=True)  # failed to stop postgres

        self.p.follow(self.leader, self.leader)  # "leader" is not accessible or is_in_recovery

        with patch.object(Postgresql, 'checkpoint', Mock(return_value=None)):
            self.p.follow(self.leader, self.leader)
            mock_pg_rewind.return_value = True
            self.p.follow(self.leader, self.leader, need_rewind=True)

        self.p.follow(None, None)  # check_recovery_conf...

    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    def test_can_rewind(self):
        with patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with patch('subprocess.call', side_effect=OSError):
            self.assertFalse(self.p.can_rewind)
        with patch.object(Postgresql, 'controldata', Mock(return_value={'wal_log_hints setting': 'on'})):
            self.assertTrue(self.p.can_rewind)
        self.p.config['use_pg_rewind'] = False
        self.assertFalse(self.p.can_rewind)

    @patch('time.sleep', Mock())
    @patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError)
        with patch('subprocess.call', Mock(side_effect=[1, 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)
        with patch('subprocess.call', Mock(side_effect=[Exception(), 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        with patch('subprocess.call', Mock(return_value=0)):
            self.assertEquals(self.p.create_replica(self.leader), 0)
            del self.p.config['wale']
            self.assertEquals(self.p.create_replica(self.leader), 0)

        with patch('subprocess.call', Mock(side_effect=Exception("foo"))):
            self.assertEquals(self.p.create_replica(self.leader), 1)

        with patch('subprocess.call', Mock(return_value=1)):
            self.assertEquals(self.p.create_replica(self.leader), 1)

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, None, self.leader, 0, [self.me, self.other, self.leadermem], None, None)
        with mock.patch('patroni.postgresql.Postgresql._query', Mock(side_effect=psycopg2.OperationalError)):
            self.p.sync_replication_slots(cluster)
        self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.Postgresql.role', new_callable=PropertyMock(return_value='replica')):
            self.p.sync_replication_slots(cluster)
        with mock.patch('patroni.postgresql.logger.error', new_callable=Mock()) as errorlog_mock:
            self.p.query = Mock()
            alias1 = Member(0, 'test-3', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5436/postgres'})
            alias2 = Member(0, 'test.3', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5436/postgres'})
            cluster.members.extend([alias1, alias2])
            self.p.sync_replication_slots(cluster)
            errorlog_mock.assert_called_once()
            assert "test-3" in errorlog_mock.call_args[0][1]
            assert "test.3" in errorlog_mock.call_args[0][1]

    @patch.object(MockConnect, 'closed', 2)
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    @patch.object(Postgresql, 'pg_isready', Mock(return_value=STATE_REJECT))
    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())
        with patch.object(Postgresql, '_query', Mock(side_effect=RetryFailedError(''))):
            self.assertRaises(PostgresConnectionException, self.p.is_leader)

    def test_reload(self):
        self.assertTrue(self.p.reload())

    @patch.object(Postgresql, 'is_running')
    def test_is_healthy(self, mock_is_running):
        mock_is_running.return_value = True
        self.assertTrue(self.p.is_healthy())
        mock_is_running.return_value = False
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p._role = 'replica'
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')
        Thread(target=self.p.last_operation).start()

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('os.kill', Mock(side_effect=Exception))
    @patch('os.getpid', Mock(return_value=2))
    @patch('os.getppid', Mock(return_value=2))
    @patch.object(builtins, 'open', mock_open(read_data='-1'))
    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_is_running(self):
        self.assertFalse(self.p.is_running())

    @patch('shlex.split', Mock(side_effect=OSError))
    def test_call_nowait(self):
        self.assertIsNone(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError)):
            self.p.move_data_directory()

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertRaises(PostgresException, self.p.bootstrap, {})

        with patch.object(Postgresql, 'run_bootstrap_post_init', Mock(return_value=False)):
            self.assertRaises(PostgresException, self.p.bootstrap, {})

        self.p.bootstrap({'users': {'replicator': {'password': '******', 'options': ['replication']}},
                          'pg_hba': ['host replication replicator 127.0.0.1/32 md5',
                                     'hostssl all all 0.0.0.0/0 md5',
                                     'host all all 0.0.0.0/0 md5'],
                          'post_init': '/bin/false'})
        with open(os.path.join(self.data_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            assert 'host replication replicator 127.0.0.1/32 md5\n' in lines
            assert 'host all all 0.0.0.0/0 md5\n' in lines

    def test_run_bootstrap_post_init(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertFalse(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        with patch('subprocess.call', Mock(side_effect=OSError)):
            self.assertFalse(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        with patch('subprocess.call', Mock(return_value=0)) as mock_method:
            self.p._superuser.pop('username')
            self.assertTrue(self.p.run_bootstrap_post_init({'post_init': '/bin/false'}))

        mock_method.assert_called()
        args, kwargs = mock_method.call_args
        assert 'PGPASSFILE' in kwargs['env'].keys()
        self.assertEquals(args[0], ['/bin/false', 'postgres://*****:*****@patch('patroni.postgresql.Postgresql.create_replica', Mock(return_value=0))
    def test_clone(self):
        self.p.clone(self.leader)

    @patch('os.listdir', Mock(return_value=['recovery.conf']))
    @patch('os.path.exists', Mock(return_value=True))
    def test_get_postgres_role_from_data_directory(self):
        self.assertEquals(self.p.get_postgres_role_from_data_directory(), 'replica')

    def test_remove_data_directory(self):
        self.p.remove_data_directory()
        open(self.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.data_dir)
        with patch('os.unlink', Mock(side_effect=OSError)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('patroni.postgresql.Postgresql._version_file_exists', Mock(return_value=True))
    def test_controldata(self):
        with patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string)):
            data = self.p.controldata()
            self.assertEquals(len(data), 50)
            self.assertEquals(data['Database cluster state'], 'shut down in recovery')
            self.assertEquals(data['wal_log_hints setting'], 'on')
            self.assertEquals(int(data['Database block size']), 8192)

        with patch('subprocess.check_output', Mock(side_effect=subprocess.CalledProcessError(1, ''))):
            self.assertEquals(self.p.controldata(), {})

    def test_read_postmaster_opts(self):
        m = mock_open(read_data=postmaster_opts_string())
        with patch.object(builtins, 'open', m):
            data = self.p.read_postmaster_opts()
            self.assertEquals(data['wal_level'], 'hot_standby')
            self.assertEquals(int(data['max_replication_slots']), 5)
            self.assertEqual(data.get('D'), None)

            m.side_effect = IOError
            data = self.p.read_postmaster_opts()
            self.assertEqual(data, dict())

    @patch('subprocess.Popen')
    @patch.object(builtins, 'open', MagicMock(return_value=42))
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.data_dir,
                                                      '-c', 'archive_command=false', '-c', 'archive_mode=on',
                                                       'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.reset_mock()
        self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.data_dir,
                                                      'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.return_value = None
        self.assertEquals(self.p.single_user_mode(), 1)

    @patch('os.listdir', MagicMock(side_effect=fake_listdir))
    @patch('os.unlink', return_value=True)
    @patch('os.remove', return_value=True)
    @patch('os.path.islink', return_value=False)
    @patch('os.path.isfile', return_value=True)
    def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink):
        ap = os.path.join(self.data_dir, 'pg_xlog', 'archive_status/')
        self.p.cleanup_archive_status()
        mock_remove.assert_has_calls([mock.call(ap + 'a'), mock.call(ap + 'b'), mock.call(ap + 'c')])
        mock_unlink.assert_not_called()

        mock_remove.reset_mock()

        mock_file.return_value = False
        mock_link.return_value = True
        self.p.cleanup_archive_status()
        mock_unlink.assert_has_calls([mock.call(ap + 'a'), mock.call(ap + 'b'), mock.call(ap + 'c')])
        mock_remove.assert_not_called()

        mock_unlink.reset_mock()
        mock_remove.reset_mock()

        mock_file.side_effect = OSError
        mock_link.side_effect = OSError
        self.p.cleanup_archive_status()
        mock_unlink.assert_not_called()
        mock_remove.assert_not_called()

    @patch('patroni.postgresql.Postgresql._version_file_exists', Mock(return_value=True))
    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_save_configuration_files(self):
        self.p.save_configuration_files()

    @patch('os.path.isfile', Mock(side_effect=[False, True]))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_restore_configuration_files(self):
        self.p.restore_configuration_files()

    def test_can_create_replica_without_replication_connection(self):
        self.p.config['create_replica_method'] = []
        self.assertFalse(self.p.can_create_replica_without_replication_connection())
        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo', 'no_master': 1}
        self.assertTrue(self.p.can_create_replica_without_replication_connection())

    def test_replica_method_can_work_without_replication_connection(self):
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('basebackup'))
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foobar'))
        self.p.config['foo'] = {'command': 'bar', 'no_master': 1}
        self.assertTrue(self.p.replica_method_can_work_without_replication_connection('foo'))
        self.p.config['foo'] = {'command': 'bar'}
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foo'))

    @patch.object(Postgresql, 'is_running', Mock(return_value=True))
    def test_reload_config(self):
        parameters = self._PARAMETERS.copy()
        parameters.pop('f.oo')
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['b.ar'] = 'bar'
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['autovacuum'] = 'on'
        self.p.reload_config({'retry_timeout': 10, 'listen': '*', 'parameters': parameters})
        parameters['autovacuum'] = 'off'
        parameters.pop('search_path')
        self.p.reload_config({'retry_timeout': 10, 'listen': '*:5433', 'parameters': parameters})

    @patch.object(Postgresql, '_version_file_exists', Mock(return_value=True))
    def test_get_major_version(self):
        with patch.object(builtins, 'open', mock_open(read_data='9.4')):
            self.assertEquals(self.p.get_major_version(), 9.4)
        with patch.object(builtins, 'open', Mock(side_effect=Exception)):
            self.assertEquals(self.p.get_major_version(), 0.0)

    def test_postmaster_start_time(self):
        with patch.object(MockCursor, "fetchone", Mock(return_value=('foo', True, '', '', '', '', False))):
            self.assertEqual(self.p.postmaster_start_time(), 'foo')
        with patch.object(MockCursor, "execute", side_effect=psycopg2.Error):
            self.assertIsNone(self.p.postmaster_start_time())

    def test_check_for_startup(self):
        with patch('subprocess.call', return_value=0):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=1):
            self.p._state = 'starting'
            self.assertTrue(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'starting')

        with patch('subprocess.call', return_value=2):
            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'start failed')

        with patch('subprocess.call', return_value=0):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

        with patch('subprocess.call', return_value=127):
            self.p._state = 'running'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

            self.p._state = 'starting'
            self.assertFalse(self.p.check_for_startup())
            self.assertEquals(self.p.state, 'running')

    def test_wait_for_startup(self):
        state = {'sleeps': 0, 'num_rejects': 0, 'final_return': 0}

        def increment_sleeps(*args):
            print("Sleep")
            state['sleeps'] += 1

        def isready_return(*args):
            ret = 1 if state['sleeps'] < state['num_rejects'] else state['final_return']
            print("Isready {0} {1}".format(ret, state))
            return ret

        def time_in_state(*args):
            return state['sleeps']

        with patch('subprocess.call', side_effect=isready_return):
            with patch('time.sleep', side_effect=increment_sleeps):
                self.p.time_in_state = Mock(side_effect=time_in_state)

                self.p._state = 'stopped'
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 0)

                self.p._state = 'starting'
                state['num_rejects'] = 5
                self.assertTrue(self.p.wait_for_startup())
                self.assertEquals(state['sleeps'], 5)

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 2
                self.assertFalse(self.p.wait_for_startup())

                self.p._state = 'starting'
                state['sleeps'] = 0
                state['final_return'] = 0
                self.assertFalse(self.p.wait_for_startup(timeout=2))
                self.assertEquals(state['sleeps'], 3)

    def test_read_pid_file(self):
        pidfile = os.path.join(self.data_dir, 'postmaster.pid')
        if os.path.exists(pidfile):
            os.remove(pidfile)
        self.assertEquals(self.p.read_pid_file(), {})

    @patch('os.kill')
    def test_is_pid_running(self, mock_kill):
        mock_kill.return_value = True
        self.assertTrue(self.p.is_pid_running(-100))
        self.assertFalse(self.p.is_pid_running(0))
        self.assertFalse(self.p.is_pid_running(None))

    def test_pick_sync_standby(self):
        cluster = Cluster(True, None, self.leader, 0, [self.me, self.other, self.leadermem], None,
                          SyncState(0, self.me.name, self.leadermem.name))

        with patch.object(Postgresql, "query", return_value=[
                    (self.leadermem.name, 'streaming', 'sync'),
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.leadermem.name, True))

        with patch.object(Postgresql, "query", return_value=[
                    (self.me.name, 'streaming', 'async'),
                    (self.leadermem.name, 'streaming', 'potential'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.leadermem.name, False))

        with patch.object(Postgresql, "query", return_value=[
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.me.name, False))

        with patch.object(Postgresql, "query", return_value=[
                    ('missing', 'streaming', 'sync'),
                    (self.me.name, 'streaming', 'async'),
                    (self.other.name, 'streaming', 'async'),
                ]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (self.me.name, False))

        with patch.object(Postgresql, "query", return_value=[]):
            self.assertEquals(self.p.pick_synchronous_standby(cluster), (None, False))

    def test_set_sync_standby(self):
        def value_in_conf():
            with open(os.path.join(self.data_dir, 'postgresql.conf')) as f:
                for line in f:
                    if line.startswith('synchronous_standby_names'):
                        return line.strip()

        mock_reload = self.p.reload = Mock()
        self.p.set_synchronous_standby('n1')
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")
        mock_reload.assert_called()

        mock_reload.reset_mock()
        self.p.set_synchronous_standby('n1')
        mock_reload.assert_not_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n1'")

        self.p.set_synchronous_standby('n2')
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), "synchronous_standby_names = 'n2'")

        mock_reload.reset_mock()
        self.p.set_synchronous_standby(None)
        mock_reload.assert_called()
        self.assertEquals(value_in_conf(), None)

    def test_get_server_parameters(self):
        config = {'synchronous_mode': True, 'parameters': {}, 'listen': '0'}
        self.p.get_server_parameters(config)
        self.p.set_synchronous_standby('foo')
        self.p.get_server_parameters(config)
Пример #42
0
class Patroni(object):

    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {tag: value for tag, value in self.config.get('tags', {}).items()
                if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value}

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s', self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
Пример #43
0
class Patroni(object):
    def __init__(self):
        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.nap_time = self.config['loop_wait']
        self.next_run = time.time()

        self._reload_config_scheduled = False
        self._received_sighup = False
        self._received_sigterm = False

    def load_dynamic_configuration(self):
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    self.config.set_dynamic_configuration(cluster.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    self.config.set_dynamic_configuration(
                        self.config['bootstrap']['dcs'])
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value
        }

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.nap_time = self.config['loop_wait']
            self.dcs.set_ttl(self.config.get('ttl') or 30)
            self.dcs.set_retry_timeout(
                self.config.get('retry_timeout') or self.nap_time)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(
                    cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            reap_children()
            self.schedule_next_run()

    def setup_signal_handlers(self):
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
        signal.signal(signal.SIGCHLD, sigchld_handler)
Пример #44
0
class TestPostgresql(unittest.TestCase):

    def __init__(self, method_name='runTest'):
        self.setUp = self.set_up
        self.tearDown = self.tear_down
        super(TestPostgresql, self).__init__(method_name)

    def set_up(self):
        subprocess.call = subprocess_call
        shutil.copy = nop
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0',
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'],
                             'superuser': {'password': ''},
                             'admin': {'username': '******', 'password': '******'},
                             'replication': {'username': '******',
                                             'password': '******',
                                             'network': '127.0.0.1/32'},
                             'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        psycopg2.connect = psycopg2_connect
        if not os.path.exists(self.p.data_dir):
            os.makedirs(self.p.data_dir)
        self.leadermem = Member(0, 'leader', 'postgres://*****:*****@127.0.0.1:5435/postgres', None, None, 28)
        self.leader = Leader(-1, None, 28, self.leadermem)
        self.other = Member(0, 'test1', 'postgres://*****:*****@127.0.0.1:5433/postgres', None, None, 28)
        self.me = Member(0, 'test0', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, None, 28)

    def tear_down(self):
        shutil.rmtree('data')

    def mock_query(self, p):
        raise psycopg2.OperationalError("not supported")

    def test_data_directory_empty(self):
        self.assertTrue(self.p.data_directory_empty())

    def test_initialize(self):
        self.assertTrue(self.p.initialize())
        self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf')))

    def test_start_stop(self):
        self.assertFalse(self.p.start())
        self.p.is_running = false
        with open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w'):
            pass
        self.assertTrue(self.p.start())
        self.assertTrue(self.p.stop())

    def test_sync_from_leader(self):
        self.assertTrue(self.p.sync_from_leader(self.leader))

    def test_follow_the_leader(self):
        self.p.demote(self.leader)
        self.p.follow_the_leader(None)
        self.p.demote(self.leader)
        self.p.follow_the_leader(self.leader)
        self.p.follow_the_leader(Leader(-1, None, 28, self.other))

    def test_create_replica(self):
        self.p.delete_trigger_file = raise_exception
        self.assertEquals(self.p.create_replica({'host': '', 'port': '', 'user': ''}, ''), 1)

    def test_create_connection_users(self):
        cfg = self.p.config
        cfg['superuser']['username'] = '******'
        p = Postgresql(cfg)
        p.create_connection_users()

    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem])
        self.p.sync_replication_slots(cluster)

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(psycopg2.InterfaceError, self.p.query, 'InterfaceError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')
        self.p._connection.closed = 2
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')
        self.p._connection.closed = 2
        self.p.disconnect = false
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    def test_is_healthiest_node(self):
        cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem])
        self.assertTrue(self.p.is_healthiest_node(cluster))
        self.p.is_leader = false
        self.assertFalse(self.p.is_healthiest_node(cluster))
        self.p.xlog_position = lambda: 1
        self.assertTrue(self.p.is_healthiest_node(cluster))
        self.p.xlog_position = lambda: 2
        self.assertFalse(self.p.is_healthiest_node(cluster))
        self.p.config['maximum_lag_on_failover'] = -3
        self.assertFalse(self.p.is_healthiest_node(cluster))

    def test_reload(self):
        self.assertTrue(self.p.reload())

    def test_is_healthy(self):
        self.assertTrue(self.p.is_healthy())
        self.p.is_running = false
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')

    def test_call_nowait(self):
        popen = subprocess.Popen
        subprocess.Popen = raise_exception
        self.assertFalse(self.p.call_nowait('on_start'))
        subprocess.Popen = popen

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = self.mock_query
        self.assertTrue(self.p.stop())

    def test_move_data_directory(self):
        self.p.is_running = false
        os.rename = nop
        os.path.isdir = true
        self.p.move_data_directory()
        os.rename = raise_exception
        self.p.move_data_directory()
Пример #45
0
 def test_create_connection_users(self):
     cfg = self.p.config
     cfg['superuser']['username'] = '******'
     p = Postgresql(cfg)
     p.create_connection_users()
Пример #46
0
class Patroni:

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.postgresql = Postgresql(config['postgresql'])
        self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config))
        host, port = config['restapi']['listen'].split(':')
        self.api = RestApiServer(self, config['restapi'])
        self.next_run = time.time()
        self.shutdown_member_ttl = 300

    @staticmethod
    def get_dcs(name, config):
        if 'etcd' in config:
            return Etcd(name, config['etcd'])
        if 'zookeeper' in config:
            return ZooKeeper(name, config['zookeeper'])
        raise Exception('Can not find sutable configuration of distributed configuration store')

    def touch_member(self, ttl=None):
        connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string
        if self.ha.cluster:
            for m in self.ha.cluster.members:
                # Do not update member TTL when it is far from being expired
                if m.name == self.postgresql.name and m.real_ttl() > self.shutdown_member_ttl:
                    return True
        return self.ha.dcs.touch_member(connection_string, ttl)

    def cleanup_on_failed_initialization(self):
        """ cleanup the DCS if initialization was not successfull """
        logger.info("removing initialize key after failed attempt to initialize the cluster")
        self.ha.dcs.cancel_initialization()
        self.touch_member(self.shutdown_member_ttl)
        self.postgresql.stop()
        self.postgresql.move_data_directory()

    def initialize(self):
        # wait for etcd to be available
        while not self.touch_member():
            logger.info('waiting on DCS')
            sleep(5)

        # is data directory empty?
        if self.postgresql.data_directory_empty():
            while True:
                try:
                    cluster = self.ha.dcs.get_cluster()
                    if not cluster.is_unlocked():  # the leader already exists
                        if not cluster.initialize:
                            self.ha.dcs.initialize()
                        self.postgresql.bootstrap(cluster.leader)
                        break
                    # racing to initialize
                    elif not cluster.initialize and self.ha.dcs.initialize():
                        try:
                            self.postgresql.bootstrap()
                        except:
                            # bail out and clean the initialize flag.
                            self.cleanup_on_failed_initialization()
                            raise
                        self.ha.dcs.take_leader()
                        break
                except DCSError:
                    logger.info('waiting on DCS')
                sleep(5)
        elif self.postgresql.is_running():
            self.postgresql.load_replication_slots()

    def schedule_next_run(self):
        if self.postgresql.is_promoted:
            self.next_run = time.time()
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        else:
            self.ha.dcs.watch(nap_time)

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            self.touch_member()
            logger.info(self.ha.run_cycle())
            try:
                if self.ha.state_handler.is_leader():
                    self.ha.cluster and self.ha.state_handler.create_replication_slots(self.ha.cluster)
                else:
                    self.ha.state_handler.drop_replication_slots()
            except:
                logger.exception('Exception when changing replication slots')
            reap_children()
            self.schedule_next_run()
Пример #47
0
class Patroni(object):
    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__
        from patroni.watchdog import Watchdog

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.watchdog.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                    and self.config.set_dynamic_configuration(self.dcs.cluster.config):
                self.reload_config()

            if self.postgresql.role != 'uninitialized':
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)

    def shutdown(self):
        self.api.shutdown()
        self.ha.shutdown()
Пример #48
0
class TestHa(unittest.TestCase):
    @patch('socket.getaddrinfo', socket_getaddrinfo)
    @patch('psycopg2.connect', psycopg2_connect)
    @patch.object(etcd.Client, 'read', etcd_read)
    def setUp(self):
        with patch.object(Client, 'machines') as mock_machines:
            mock_machines.__get__ = Mock(
                return_value=['http://*****:*****@patch('sys.exit', return_value=1)
    @patch('patroni.ha.Ha.sysid_valid', MagicMock(return_value=True))
    def test_sysid_no_match(self, exit_mock):
        self.ha.run_cycle()
        exit_mock.assert_called_once_with(1)

    @patch.object(Cluster, 'is_unlocked', Mock(return_value=False))
    def test_start_as_readonly(self):
        self.p.is_leader = false
        self.p.is_healthy = true
        self.ha.has_lock = true
        self.assertEquals(
            self.ha.run_cycle(),
            'promoted self to leader because i had the session lock')

    def test_acquire_lock_as_master(self):
        self.assertEquals(self.ha.run_cycle(),
                          'acquired session lock as a leader')

    def test_promoted_by_acquiring_lock(self):
        self.ha.is_healthiest_node = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')

    def test_demote_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self after trying and failing to obtain lock')

    def test_follow_new_leader_after_failing_to_obtain_lock(self):
        self.ha.is_healthiest_node = true
        self.ha.acquire_lock = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'following new leader after trying and failing to obtain lock')

    def test_demote_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoting self because i am not the healthiest node')

    def test_follow_new_leader_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )

    def test_promote_because_have_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'promoted self to leader because i had the session lock')

    def test_leader_with_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

    def test_demote_because_not_having_lock(self):
        self.ha.cluster.is_unlocked = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoting self because i do not have the lock and i was a leader')

    def test_demote_because_update_lock_failed(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.ha.update_lock = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self because failed to update leader lock in DCS')

    def test_follow(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')
        self.ha.patroni.replicatefrom = "foo"
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')

    def test_follow_in_pause(self):
        self.ha.cluster.is_unlocked = false
        self.ha.is_paused = true
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: no action')

    def test_no_etcd_connection_master_demote(self):
        self.ha.load_cluster_from_dcs = Mock(
            side_effect=DCSError('Etcd is not responding properly'))
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self because DCS is not accessible and i was a leader')

    def test_bootstrap_from_another_member(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'trying to bootstrap from replica \'other\'')

    def test_bootstrap_waiting_for_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'waiting for leader to bootstrap')

    def test_bootstrap_without_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.p.can_create_replica_without_replication_connection = MagicMock(
            return_value=True)
        self.assertEquals(self.ha.bootstrap(),
                          'trying to bootstrap (without leader)')

    def test_bootstrap_initialize_lock_failed(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'failed to acquire initialize lock')

    def test_bootstrap_initialized_new_cluster(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.assertEquals(self.ha.bootstrap(), 'initialized a new cluster')

    def test_bootstrap_release_initialize_key_on_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.p.bootstrap = Mock(side_effect=PostgresException(
            "Could not bootstrap master PostgreSQL"))
        self.assertRaises(PostgresException, self.ha.bootstrap)

    def test_reinitialize(self):
        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertIsNone(self.ha.reinitialize())

        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.state_handler.name = self.ha.cluster.leader.name
        self.assertIsNotNone(self.ha.reinitialize())

    def test_restart(self):
        self.assertEquals(self.ha.restart(), (True, 'restarted successfully'))
        self.p.restart = false
        self.assertEquals(self.ha.restart(), (False, 'restart failed'))
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.reinitialize()
        self.assertEquals(self.ha.restart(),
                          (False, 'reinitialize already in progress'))
        with patch.object(self.ha, "restart_matches", return_value=False):
            self.assertEquals(self.ha.restart({'foo': 'bar'}),
                              (False, "restart conditions are not satisfied"))

    def test_restart_in_progress(self):
        with patch('patroni.async_executor.AsyncExecutor.busy',
                   PropertyMock(return_value=True)):
            self.ha.restart(run_async=True)
            self.assertTrue(self.ha.restart_scheduled())
            self.assertEquals(self.ha.run_cycle(),
                              'not healthy enough for leader race')

            self.ha.cluster = get_cluster_initialized_with_leader()
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.has_lock = true
            self.assertEquals(self.ha.run_cycle(),
                              'updated leader lock during restart')

            self.ha.update_lock = false
            self.assertEquals(self.ha.run_cycle(),
                              'failed to update leader lock during restart')

    @patch('requests.get', requests_get)
    @patch('time.sleep', Mock())
    def test_manual_failover_from_leader(self):
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', '', None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', 'blabla', None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.assertEquals(self.ha.run_cycle(),
                          'manual failover: demoting myself')
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {
            'nofailover': 'True'
        })
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        # manual failover from the previous leader to us won't happen if we hold the nofailover flag
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

        # Failover scheduled time must include timezone
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.ha.run_cycle()

        scheduled = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=30)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=-600)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = None
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_pause(self):
        self.ha.has_lock = true
        self.ha.is_paused = true
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock',
                          self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, self.p.name, '', None))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock',
                          self.ha.run_cycle())

    @patch('requests.get', requests_get)
    @patch('time.sleep', Mock())
    def test_manual_failover_process_no_leader(self):
        self.p.is_leader = false
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'leader', None))
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {}
                                               )  # accessible, in_recovery
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, self.p.name, '', None))
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )
        self.ha.fetch_node_status = lambda e: (e, False, True, 0, {}
                                               )  # inaccessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        # set failover flag to True for all members of the cluster
        # this should elect the current member, as we are not going to call the API for it.
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'other', None))
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {
            'nofailover': 'True'
        })  # accessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        # same as previous, but set the current member to nofailover. In no case it should be elected as a leader
        self.ha.patroni.nofailover = True
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because I am not allowed to promote')

    @patch('time.sleep', Mock())
    def test_manual_failover_process_no_leader_in_pause(self):
        self.ha.is_paused = true
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'other', None))
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', '', None))
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', 'blabla', None))
        self.assertEquals('PAUSE: acquired session lock as a leader',
                          self.ha.run_cycle())
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', self.p.name, None))
        self.assertEquals(
            self.ha.run_cycle(),
            'PAUSE: promoted self to leader by acquiring session lock')

    def test_is_healthiest_node(self):
        self.ha.state_handler.is_leader = false
        self.ha.patroni.nofailover = False
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {})
        self.assertTrue(self.ha.is_healthiest_node())
        self.ha.is_paused = true
        self.assertFalse(self.ha.is_healthiest_node())

    def test__is_healthiest_node(self):
        self.assertTrue(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.p.is_leader = false
        self.ha.fetch_node_status = lambda e: (e, True, True, 0, {}
                                               )  # accessible, in_recovery
        self.assertTrue(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = lambda e: (e, True, False, 0, {}
                                               )  # accessible, not in_recovery
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = lambda e: (e, True, True, 1, {
        })  # accessible, in_recovery, xlog location ahead
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.p.check_replication_lag = false
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = True
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = False

    @patch('requests.get', requests_get)
    def test_fetch_node_status(self):
        member = Member(0, 'test', 1,
                        {'api_url': 'http://127.0.0.1:8011/patroni'})
        self.ha.fetch_node_status(member)
        member = Member(0, 'test', 1,
                        {'api_url': 'http://localhost:8011/patroni'})
        self.ha.fetch_node_status(member)

    def test_post_recover(self):
        self.p.is_running = false
        self.ha.has_lock = true
        self.assertEqual(
            self.ha.post_recover(),
            'removed leader key after trying and failing to start postgres')
        self.ha.has_lock = false
        self.assertEqual(self.ha.post_recover(), 'failed to start postgres')
        self.p.is_running = true
        self.assertIsNone(self.ha.post_recover())

    def test_schedule_future_restart(self):
        self.ha.patroni.scheduled_restart = {}
        # do the restart 2 times. The first one should succeed, the second one should fail
        self.assertTrue(
            self.ha.schedule_future_restart({'schedule': future_restart_time}))
        self.assertFalse(
            self.ha.schedule_future_restart({'schedule': future_restart_time}))

    def test_delete_future_restarts(self):
        self.ha.delete_future_restart()

    def test_evaluate_scheduled_restart(self):
        self.p.postmaster_start_time = Mock(
            return_value=str(postmaster_start_time))
        # restart while the postmaster has been already restarted, fails
        with patch.object(
                self.ha, 'future_restart_scheduled',
                Mock(
                    return_value={
                        'postmaster_start_time':
                        str(postmaster_start_time -
                            datetime.timedelta(days=1)),
                        'schedule':
                        str(future_restart_time)
                    })):
            self.assertIsNone(self.ha.evaluate_scheduled_restart())
        with patch.object(
                self.ha, 'future_restart_scheduled',
                Mock(
                    return_value={
                        'postmaster_start_time': str(postmaster_start_time),
                        'schedule': str(future_restart_time)
                    })):
            with patch.object(self.ha, 'should_run_scheduled_action',
                              Mock(return_value=True)):
                # restart in the future, ok
                self.assertIsNotNone(self.ha.evaluate_scheduled_restart())
                with patch.object(self.ha, 'restart',
                                  Mock(return_value=(False, "Test"))):
                    # restart in the future, bit the actual restart failed
                    self.assertIsNone(self.ha.evaluate_scheduled_restart())

    def test_scheduled_restart(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        with patch.object(self.ha, "evaluate_scheduled_restart",
                          Mock(return_value="restart scheduled")):
            self.assertEquals(self.ha.run_cycle(), "restart scheduled")

    def test_restart_matches(self):
        self.p._role = 'replica'
        self.p.server_version = 90500
        self.p._pending_restart = True
        self.assertFalse(self.ha.restart_matches("master", "9.5.0", True))
        self.assertFalse(self.ha.restart_matches("replica", "9.4.3", True))
        self.p._pending_restart = False
        self.assertFalse(self.ha.restart_matches("replica", "9.5.2", True))
        self.assertTrue(self.ha.restart_matches("replica", "9.5.2", False))

    def test_process_healthy_cluster_in_pause(self):
        self.p.is_leader = false
        self.ha.is_paused = true
        self.p.name = 'leader'
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(
            self.ha.run_cycle(),
            'PAUSE: removed leader lock because postgres is not running as master'
        )
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: waiting to become master after promote...')

    def test_postgres_unhealthy_in_pause(self):
        self.ha.is_paused = true
        self.p.is_healthy = false
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: postgres is not running')
        self.ha.has_lock = true
        self.assertEquals(
            self.ha.run_cycle(),
            'PAUSE: removed leader lock because postgres is not running')

    def test_no_etcd_connection_in_pause(self):
        self.ha.is_paused = true
        self.ha.load_cluster_from_dcs = Mock(
            side_effect=DCSError('Etcd is not responding properly'))
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: DCS is not accessible')
Пример #49
0
class TestHa(unittest.TestCase):
    @patch('socket.getaddrinfo', socket_getaddrinfo)
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('patroni.dcs.dcs_modules',
           Mock(return_value=['patroni.dcs.foo', 'patroni.dcs.etcd']))
    @patch.object(etcd.Client, 'read', etcd_read)
    def setUp(self):
        with patch.object(Client, 'machines') as mock_machines:
            mock_machines.__get__ = Mock(
                return_value=['http://*****:*****@patch.object(Postgresql, 'fix_cluster_state', Mock())
    def test_crash_recovery(self):
        self.p.is_running = false
        self.p.controldata = lambda: {
            'Database cluster state': 'in production'
        }
        self.assertEquals(self.ha.run_cycle(),
                          'doing crash recovery in a single user mode')

    @patch.object(Postgresql, 'rewind_needed_and_possible',
                  Mock(return_value=True))
    def test_recover_with_rewind(self):
        self.p.is_running = false
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'running pg_rewind from leader')

    @patch.object(Postgresql, 'can_rewind', PropertyMock(return_value=True))
    @patch.object(Postgresql, 'fix_cluster_state', Mock())
    def test_single_user_after_recover_failed(self):
        self.p.controldata = lambda: {'Database cluster state': 'in recovery'}
        self.p.is_running = false
        self.p.follow = false
        self.assertEquals(self.ha.run_cycle(), 'starting as a secondary')
        self.assertEquals(self.ha.run_cycle(),
                          'fixing cluster state in a single user mode')

    @patch('sys.exit', return_value=1)
    @patch('patroni.ha.Ha.sysid_valid', MagicMock(return_value=True))
    def test_sysid_no_match(self, exit_mock):
        self.ha.run_cycle()
        exit_mock.assert_called_once_with(1)

    @patch.object(Cluster, 'is_unlocked', Mock(return_value=False))
    def test_start_as_readonly(self):
        self.p.is_leader = false
        self.p.is_healthy = true
        self.ha.has_lock = true
        self.assertEquals(
            self.ha.run_cycle(),
            'promoted self to leader because i had the session lock')

    def test_acquire_lock_as_master(self):
        self.assertEquals(self.ha.run_cycle(),
                          'acquired session lock as a leader')

    def test_promoted_by_acquiring_lock(self):
        self.ha.is_healthiest_node = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')

    def test_demote_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self after trying and failing to obtain lock')

    def test_follow_new_leader_after_failing_to_obtain_lock(self):
        self.ha.is_healthiest_node = true
        self.ha.acquire_lock = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'following new leader after trying and failing to obtain lock')

    def test_demote_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoting self because i am not the healthiest node')

    def test_follow_new_leader_because_not_healthiest(self):
        self.ha.is_healthiest_node = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )

    def test_promote_because_have_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'promoted self to leader because i had the session lock')

    def test_promote_without_watchdog(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = true
        with patch.object(Watchdog, 'activate', Mock(return_value=False)):
            self.assertEquals(
                self.ha.run_cycle(),
                'Demoting self because watchdog could not be activated')
            self.p.is_leader = false
            self.assertEquals(
                self.ha.run_cycle(),
                'Not promoting self because watchdog could not be activated')

    def test_leader_with_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

    def test_demote_because_not_having_lock(self):
        self.ha.cluster.is_unlocked = false
        with patch.object(Watchdog, 'is_running',
                          PropertyMock(return_value=True)):
            self.assertEquals(
                self.ha.run_cycle(),
                'demoting self because i do not have the lock and i was a leader'
            )

    def test_demote_because_update_lock_failed(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.ha.update_lock = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self because failed to update leader lock in DCS')
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'not promoting because failed to update leader lock in DCS')

    def test_follow(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')
        self.ha.patroni.replicatefrom = "foo"
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')

    def test_follow_in_pause(self):
        self.ha.cluster.is_unlocked = false
        self.ha.is_paused = true
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'PAUSE: no action')

    @patch.object(Postgresql, 'rewind_needed_and_possible',
                  Mock(return_value=True))
    def test_follow_triggers_rewind(self):
        self.p.is_leader = false
        self.p.trigger_check_diverged_lsn()
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.run_cycle(), 'running pg_rewind from leader')

    def test_no_etcd_connection_master_demote(self):
        self.ha.load_cluster_from_dcs = Mock(
            side_effect=DCSError('Etcd is not responding properly'))
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self because DCS is not accessible and i was a leader')

    @patch('time.sleep', Mock())
    def test_bootstrap_from_another_member(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'trying to bootstrap from replica \'other\'')

    def test_bootstrap_waiting_for_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'waiting for leader to bootstrap')

    def test_bootstrap_without_leader(self):
        self.ha.cluster = get_cluster_initialized_without_leader()
        self.p.can_create_replica_without_replication_connection = MagicMock(
            return_value=True)
        self.assertEquals(self.ha.bootstrap(),
                          'trying to bootstrap (without leader)')

    def test_bootstrap_initialize_lock_failed(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.assertEquals(self.ha.bootstrap(),
                          'failed to acquire initialize lock')

    def test_bootstrap_initialized_new_cluster(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.assertEquals(self.ha.bootstrap(),
                          'trying to bootstrap a new cluster')
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(),
                          'waiting for end of recovery after bootstrap')
        self.p.is_leader = true
        self.assertEquals(self.ha.run_cycle(), 'running post_bootstrap')
        self.assertEquals(self.ha.run_cycle(), 'initialized a new cluster')

    def test_bootstrap_release_initialize_key_on_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.ha.bootstrap()
        self.p.is_running = false
        self.assertRaises(PatroniException, self.ha.post_bootstrap)

    def test_bootstrap_release_initialize_key_on_watchdog_failure(self):
        self.ha.cluster = get_cluster_not_initialized_without_leader()
        self.e.initialize = true
        self.ha.bootstrap()
        self.p.is_running.return_value = MockPostmaster()
        self.p.is_leader = true
        with patch.object(Watchdog, 'activate', Mock(return_value=False)):
            self.assertEquals(self.ha.post_bootstrap(),
                              'running post_bootstrap')
            self.assertRaises(PatroniException, self.ha.post_bootstrap)

    @patch('psycopg2.connect', psycopg2_connect)
    def test_reinitialize(self):
        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.cluster = get_cluster_initialized_with_leader()
        self.assertIsNone(self.ha.reinitialize())

        self.assertIsNotNone(self.ha.reinitialize())

        self.ha.state_handler.name = self.ha.cluster.leader.name
        self.assertIsNotNone(self.ha.reinitialize())

    @patch('time.sleep', Mock())
    def test_restart(self):
        self.assertEquals(self.ha.restart({}),
                          (True, 'restarted successfully'))
        self.p.restart = Mock(return_value=None)
        self.assertEquals(self.ha.restart({}),
                          (False, 'postgres is still starting'))
        self.p.restart = false
        self.assertEquals(self.ha.restart({}), (False, 'restart failed'))
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.reinitialize()
        self.assertEquals(self.ha.restart({}),
                          (False, 'reinitialize already in progress'))
        with patch.object(self.ha, "restart_matches", return_value=False):
            self.assertEquals(self.ha.restart({'foo': 'bar'}),
                              (False, "restart conditions are not satisfied"))

    @patch('os.kill', Mock())
    def test_restart_in_progress(self):
        with patch('patroni.async_executor.AsyncExecutor.busy',
                   PropertyMock(return_value=True)):
            self.ha.restart({}, run_async=True)
            self.assertTrue(self.ha.restart_scheduled())
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.cluster = get_cluster_initialized_with_leader()
            self.assertEquals(self.ha.run_cycle(), 'restart in progress')

            self.ha.has_lock = true
            self.assertEquals(self.ha.run_cycle(),
                              'updated leader lock during restart')

            self.ha.update_lock = false
            self.p.set_role('master')
            with patch('patroni.async_executor.CriticalTask.cancel',
                       Mock(return_value=False)):
                with patch(
                        'patroni.postgresql.Postgresql.terminate_starting_postmaster'
                ) as mock_terminate:
                    self.assertEquals(self.ha.run_cycle(),
                                      'lost leader lock during restart')
                    mock_terminate.assert_called()

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader(self):
        self.ha.fetch_node_status = get_node_status()
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', '', None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', 'blabla', None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.assertEquals(self.ha.run_cycle(),
                          'manual failover: demoting myself')
        self.p.rewind_needed_and_possible = true
        self.assertEquals(self.ha.run_cycle(),
                          'manual failover: demoting myself')
        self.ha.fetch_node_status = get_node_status(nofailover=True)
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(watchdog_failed=True)
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(wal_position=1)
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        # manual failover from the previous leader to us won't happen if we hold the nofailover flag
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

        # Failover scheduled time must include timezone
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.ha.run_cycle()

        scheduled = datetime.datetime.utcnow().replace(tzinfo=tzutc)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=30)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=-600)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = None
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_pause(self):
        self.ha.has_lock = true
        self.ha.is_paused = true
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock',
                          self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, self.p.name, '', None))
        self.assertEquals('PAUSE: no action.  i am the leader with the lock',
                          self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_from_leader_in_synchronous_mode(self):
        self.p.is_leader = true
        self.ha.has_lock = true
        self.ha.is_synchronous_mode = true
        self.ha.is_failover_possible = false
        self.ha.process_sync_replication = Mock()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, self.p.name, 'a', None), (self.p.name, None))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, self.p.name, 'a', None), (self.p.name, 'a'))
        self.ha.is_failover_possible = true
        self.assertEquals('manual failover: demoting myself',
                          self.ha.run_cycle())

    @patch('requests.get', requests_get)
    def test_manual_failover_process_no_leader(self):
        self.p.is_leader = false
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'leader', None))
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, self.p.name, '', None))
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )
        self.ha.fetch_node_status = get_node_status(
            reachable=False)  # inaccessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        # set failover flag to True for all members of the cluster
        # this should elect the current member, as we are not going to call the API for it.
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'other', None))
        self.ha.fetch_node_status = get_node_status(
            nofailover=True)  # accessible, in_recovery
        self.p.set_role('replica')
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')
        # same as previous, but set the current member to nofailover. In no case it should be elected as a leader
        self.ha.patroni.nofailover = True
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because I am not allowed to promote')

    def test_manual_failover_process_no_leader_in_pause(self):
        self.ha.is_paused = true
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, '', 'other', None))
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', '', None))
        self.assertEquals(self.ha.run_cycle(),
                          'PAUSE: continue to run as master without lock')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', 'blabla', None))
        self.assertEquals('PAUSE: acquired session lock as a leader',
                          self.ha.run_cycle())
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.cluster = get_cluster_initialized_without_leader(
            failover=Failover(0, 'leader', self.p.name, None))
        self.assertEquals(
            self.ha.run_cycle(),
            'PAUSE: promoted self to leader by acquiring session lock')

    def test_is_healthiest_node(self):
        self.ha.state_handler.is_leader = false
        self.ha.patroni.nofailover = False
        self.ha.fetch_node_status = get_node_status()
        self.assertTrue(self.ha.is_healthiest_node())
        with patch.object(Watchdog, 'is_healthy',
                          PropertyMock(return_value=False)):
            self.assertFalse(self.ha.is_healthiest_node())
        with patch('patroni.postgresql.Postgresql.is_starting',
                   return_value=True):
            self.assertFalse(self.ha.is_healthiest_node())
        self.ha.is_paused = true
        self.assertFalse(self.ha.is_healthiest_node())

    def test__is_healthiest_node(self):
        self.assertTrue(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.p.is_leader = false
        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertTrue(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = get_node_status(
            in_recovery=False)  # accessible, not in_recovery
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.fetch_node_status = get_node_status(
            wal_position=11)  # accessible, in_recovery, wal position ahead
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        with patch('patroni.postgresql.Postgresql.wal_position',
                   return_value=1):
            self.assertFalse(
                self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = True
        self.assertFalse(
            self.ha._is_healthiest_node(self.ha.old_cluster.members))
        self.ha.patroni.nofailover = False

    @patch('requests.get', requests_get)
    def test_fetch_node_status(self):
        member = Member(0, 'test', 1,
                        {'api_url': 'http://127.0.0.1:8011/patroni'})
        self.ha.fetch_node_status(member)
        member = Member(0, 'test', 1,
                        {'api_url': 'http://*****:*****@patch('patroni.ha.Ha.update_lock', return_value=True)
    @patch('patroni.ha.Ha.demote')
    def test_starting_timeout(self, demote, update_lock):
        def check_calls(seq):
            for mock, called in seq:
                if called:
                    mock.assert_called_once()
                else:
                    mock.assert_not_called()
                mock.reset_mock()

        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.p.check_for_startup = true
        self.p.time_in_state = lambda: 30
        self.assertEquals(
            self.ha.run_cycle(),
            'PostgreSQL is still starting up, 270 seconds until timeout')
        check_calls([(update_lock, True), (demote, False)])

        self.p.time_in_state = lambda: 350
        self.ha.fetch_node_status = get_node_status(
            reachable=False)  # inaccessible, in_recovery
        self.assertEquals(
            self.ha.run_cycle(),
            'master start has timed out, but continuing to wait because failover is not possible'
        )
        check_calls([(update_lock, True), (demote, False)])

        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(),
                          'stopped PostgreSQL because of startup timeout')
        check_calls([(update_lock, True), (demote, True)])

        update_lock.return_value = False
        self.assertEquals(
            self.ha.run_cycle(),
            'stopped PostgreSQL while starting up because leader key was lost')
        check_calls([(update_lock, True), (demote, True)])

        self.ha.has_lock = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')
        check_calls([(update_lock, False), (demote, False)])

    def test_manual_failover_while_starting(self):
        self.ha.has_lock = true
        self.p.check_for_startup = true
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(),
                          'manual failover: demoting myself')

    @patch('patroni.ha.Ha.demote')
    def test_failover_immediately_on_zero_master_start_timeout(self, demote):
        self.p.is_running = false
        self.ha.cluster = get_cluster_initialized_with_leader(
            sync=(self.p.name, 'other'))
        self.ha.cluster.config.data['synchronous_mode'] = True
        self.ha.patroni.config.set_dynamic_configuration(
            {'master_start_timeout': 0})
        self.ha.has_lock = true
        self.ha.update_lock = true
        self.ha.fetch_node_status = get_node_status(
        )  # accessible, in_recovery
        self.assertEquals(self.ha.run_cycle(),
                          'stopped PostgreSQL to fail over after a crash')
        demote.assert_called_once()

    @patch('patroni.postgresql.Postgresql.follow')
    def test_demote_immediate(self, follow):
        self.ha.has_lock = true
        self.e.get_cluster = Mock(
            return_value=get_cluster_initialized_without_leader())
        self.ha.demote('immediate')
        follow.assert_called_once_with(None)

    def test_process_sync_replication(self):
        self.ha.has_lock = true
        mock_set_sync = self.p.set_synchronous_standby = Mock()
        self.p.name = 'leader'

        # Test sync key removed when sync mode disabled
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader',
                                                                    'other'))
        with patch.object(self.ha.dcs,
                          'delete_sync_state') as mock_delete_sync:
            self.ha.run_cycle()
            mock_delete_sync.assert_called_once()
            mock_set_sync.assert_called_once_with(None)

        mock_set_sync.reset_mock()
        # Test sync key not touched when not there
        self.ha.cluster = get_cluster_initialized_with_leader()
        with patch.object(self.ha.dcs,
                          'delete_sync_state') as mock_delete_sync:
            self.ha.run_cycle()
            mock_delete_sync.assert_not_called()
            mock_set_sync.assert_called_once_with(None)

        mock_set_sync.reset_mock()

        self.ha.is_synchronous_mode = true

        # Test sync standby not touched when picking the same node
        self.p.pick_synchronous_standby = Mock(return_value=('other', True))
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader',
                                                                    'other'))
        self.ha.run_cycle()
        mock_set_sync.assert_not_called()

        mock_set_sync.reset_mock()

        # Test sync standby is replaced when switching standbys
        self.p.pick_synchronous_standby = Mock(return_value=('other2', False))
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.run_cycle()
        mock_set_sync.assert_called_once_with('other2')

        mock_set_sync.reset_mock()
        # Test sync standby is not disabled when updating dcs fails
        self.ha.dcs.write_sync_state = Mock(return_value=False)
        self.ha.run_cycle()
        mock_set_sync.assert_not_called()

        mock_set_sync.reset_mock()
        # Test changing sync standby
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(
            return_value=get_cluster_initialized_with_leader(sync=('leader',
                                                                   'other')))
        # self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader', 'other'))
        self.p.pick_synchronous_standby = Mock(return_value=('other2', True))
        self.ha.run_cycle()
        self.ha.dcs.get_cluster.assert_called_once()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 2)

        # Test updating sync standby key failed due to race
        self.ha.dcs.write_sync_state = Mock(side_effect=[True, False])
        self.ha.run_cycle()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 2)

        # Test changing sync standby failed due to race
        self.ha.dcs.write_sync_state = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(
            return_value=get_cluster_initialized_with_leader(
                sync=('somebodyelse', None)))
        self.ha.run_cycle()
        self.assertEquals(self.ha.dcs.write_sync_state.call_count, 1)

        # Test sync set to '*' when synchronous_mode_strict is enabled
        mock_set_sync.reset_mock()
        self.ha.is_synchronous_mode_strict = true
        self.p.pick_synchronous_standby = Mock(return_value=(None, False))
        self.ha.run_cycle()
        mock_set_sync.assert_called_once_with('*')

    def test_sync_replication_become_master(self):
        self.ha.is_synchronous_mode = true

        mock_set_sync = self.p.set_synchronous_standby = Mock()
        self.p.is_leader = false
        self.p.set_role('replica')
        self.ha.has_lock = true
        mock_write_sync = self.ha.dcs.write_sync_state = Mock(
            return_value=True)
        self.p.name = 'leader'
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('other',
                                                                    None))

        # When we just became master nobody is sync
        self.assertEquals(self.ha.enforce_master_role('msg', 'promote msg'),
                          'promote msg')
        mock_set_sync.assert_called_once_with(None)
        mock_write_sync.assert_called_once_with('leader', None, index=0)

        mock_set_sync.reset_mock()

        # When we just became master nobody is sync
        self.p.set_role('replica')
        mock_write_sync.return_value = False
        self.assertTrue(
            self.ha.enforce_master_role('msg', 'promote msg') != 'promote msg')
        mock_set_sync.assert_not_called()

    def test_unhealthy_sync_mode(self):
        self.ha.is_synchronous_mode = true

        self.p.is_leader = false
        self.p.set_role('replica')
        self.p.name = 'other'
        self.ha.cluster = get_cluster_initialized_without_leader(
            sync=('leader', 'other2'))
        mock_write_sync = self.ha.dcs.write_sync_state = Mock(
            return_value=True)
        mock_acquire = self.ha.acquire_lock = Mock(return_value=True)
        mock_follow = self.p.follow = Mock()
        mock_promote = self.p.promote = Mock()

        # If we don't match the sync replica we are not allowed to acquire lock
        self.ha.run_cycle()
        mock_acquire.assert_not_called()
        mock_follow.assert_called_once()
        self.assertEquals(mock_follow.call_args[0][0], None)
        mock_write_sync.assert_not_called()

        mock_follow.reset_mock()
        # If we do match we will try to promote
        self.ha._is_healthiest_node = true

        self.ha.cluster = get_cluster_initialized_without_leader(
            sync=('leader', 'other'))
        self.ha.run_cycle()
        mock_acquire.assert_called_once()
        mock_follow.assert_not_called()
        mock_promote.assert_called_once()
        mock_write_sync.assert_called_once_with('other', None, index=0)

    def test_disable_sync_when_restarting(self):
        self.ha.is_synchronous_mode = true

        self.p.name = 'other'
        self.p.is_leader = false
        self.p.set_role('replica')
        mock_restart = self.p.restart = Mock(return_value=True)
        self.ha.cluster = get_cluster_initialized_with_leader(sync=('leader',
                                                                    'other'))
        self.ha.touch_member = Mock(return_value=True)
        self.ha.dcs.get_cluster = Mock(side_effect=[
            get_cluster_initialized_with_leader(sync=('leader', syncstandby))
            for syncstandby in ['other', None]
        ])

        with patch('time.sleep') as mock_sleep:
            self.ha.restart({})
            mock_restart.assert_called_once()
            mock_sleep.assert_called()

        # Restart is still called when DCS connection fails
        mock_restart.reset_mock()
        self.ha.dcs.get_cluster = Mock(side_effect=DCSError("foo"))
        self.ha.restart({})

        mock_restart.assert_called_once()

        # We don't try to fetch the cluster state when touch_member fails
        mock_restart.reset_mock()
        self.ha.dcs.get_cluster.reset_mock()
        self.ha.touch_member = Mock(return_value=False)

        self.ha.restart({})

        mock_restart.assert_called_once()
        self.ha.dcs.get_cluster.assert_not_called()

    def test_effective_tags(self):
        self.ha._disable_sync = True
        self.assertEquals(self.ha.get_effective_tags(), {
            'foo': 'bar',
            'nosync': True
        })
        self.ha._disable_sync = False
        self.assertEquals(self.ha.get_effective_tags(), {'foo': 'bar'})

    def test_restore_cluster_config(self):
        self.ha.cluster.config.data.clear()
        self.ha.has_lock = true
        self.ha.cluster.is_unlocked = false
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

    def test_watch(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.watch(0)

    def test_wakup(self):
        self.ha.wakeup()

    def test_shutdown(self):
        self.p.is_running = false
        self.ha.shutdown()

    @patch('time.sleep', Mock())
    def test_leader_with_empty_directory(self):
        self.ha.cluster = get_cluster_initialized_with_leader()
        self.ha.has_lock = true
        self.p.data_directory_empty = true
        self.assertEquals(
            self.ha.run_cycle(),
            'released leader key voluntarily as data dir empty and currently leader'
        )
        self.assertEquals(self.p.role, 'uninitialized')

        # as has_lock is mocked out, we need to fake the leader key release
        self.ha.has_lock = false
        # will not say bootstrap from leader as replica can't self elect
        self.assertEquals(self.ha.run_cycle(),
                          "trying to bootstrap from replica 'other'")
class TestPostgresql(unittest.TestCase):

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    @patch('os.rename', Mock())
    def setUp(self):
        self.data_dir = 'data/test0'
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': self.data_dir,
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'pg_hba': ['host replication replicator 127.0.0.1/32 md5',
                                        'hostssl all all 0.0.0.0/0 md5',
                                        'host all all 0.0.0.0/0 md5'],
                             'superuser': {'username': '******', 'password': '******'},
                             'admin': {'username': '******', 'password': '******'},
                             'pg_rewind': {'username': '******', 'password': '******'},
                             'replication': {'username': '******',
                                             'password': '******'},
                             'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres',
                            'tags': {'replicatefrom': 'leader'}})
        self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})

    def tearDown(self):
        shutil.rmtree('data')

    def test_get_initdb_options(self):
        self.p.initdb_options = [{'encoding': 'UTF8'}, 'data-checksums']
        self.assertEquals(self.p.get_initdb_options(), ['--encoding=UTF8', '--data-checksums'])
        self.p.initdb_options = [{'pgdata': 'bar'}]
        self.assertRaises(Exception, self.p.get_initdb_options)
        self.p.initdb_options = [{'foo': 'bar', 1: 2}]
        self.assertRaises(Exception, self.p.get_initdb_options)
        self.p.initdb_options = [1]
        self.assertRaises(Exception, self.p.get_initdb_options)

    def test_initialize(self):
        self.assertTrue(self.p.initialize())

        with open(os.path.join(self.data_dir, 'pg_hba.conf')) as f:
            lines = f.readlines()
            assert 'host replication replicator 127.0.0.1/32 md5\n' in lines
            assert 'host all all 0.0.0.0/0 md5\n' in lines

    @patch('os.path.exists', Mock(return_value=True))
    @patch('os.unlink', Mock())
    def test_delete_trigger_file(self):
        self.p.delete_trigger_file()

    def test_start(self):
        self.assertTrue(self.p.start())
        self.p.is_running = false
        open(os.path.join(self.data_dir, 'postmaster.pid'), 'w').close()
        pg_conf = os.path.join(self.data_dir, 'postgresql.conf')
        open(pg_conf, 'w').close()
        self.assertTrue(self.p.start())
        with open(pg_conf) as f:
            lines = f.readlines()
            self.assertTrue("foo = 'bar'\n" in lines)

    def test_stop(self):
        self.assertTrue(self.p.stop())
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertTrue(self.p.stop())
            self.p.is_running = Mock(return_value=True)
            self.assertFalse(self.p.stop())

    def test_restart(self):
        self.p.start = false
        self.p.is_running = false
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******', 'password': '******'})

    @patch('subprocess.call', side_effect=OSError)
    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_call):
        self.assertTrue(self.p.rewind(self.leader))
        subprocess.call = mock_call
        self.assertFalse(self.p.rewind(self.leader))

    @patch('patroni.postgresql.Postgresql.rewind', return_value=False)
    @patch('patroni.postgresql.Postgresql.remove_data_directory', MagicMock(return_value=True))
    @patch('patroni.postgresql.Postgresql.single_user_mode', MagicMock(return_value=1))
    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    def test_follow(self, mock_pg_rewind):
        self.p.follow(None)
        self.p.follow(self.leader)
        self.p.follow(Leader(-1, 28, self.other))
        self.p.rewind = mock_pg_rewind
        self.p.follow(self.leader)
        with mock.patch('os.path.islink', MagicMock(return_value=True)):
            with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)):
                with mock.patch('os.unlink', MagicMock(return_value=True)):
                    self.p.follow(self.leader, recovery=True)
        with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)):
            self.p.rewind.return_value = True
            self.p.follow(self.leader, recovery=True)
            self.p.rewind.return_value = False
            self.p.follow(self.leader, recovery=True)
        with mock.patch('patroni.postgresql.Postgresql.check_recovery_conf', MagicMock(return_value=True)):
            self.assertTrue(self.p.follow(None))

    @patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string))
    def test_can_rewind(self):
        tmp = self.p.pg_rewind
        self.p.pg_rewind = None
        self.assertFalse(self.p.can_rewind)
        self.p.pg_rewind = tmp
        with mock.patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with mock.patch('subprocess.call', side_effect=OSError):
            self.assertFalse(self.p.can_rewind)
        tmp = self.p.controldata
        self.p.controldata = lambda: {'wal_log_hints setting': 'on'}
        self.assertTrue(self.p.can_rewind)
        self.p.controldata = tmp

    @patch('time.sleep', Mock())
    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError)
        with patch('subprocess.call', Mock(side_effect=[1, 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)
        with patch('subprocess.call', Mock(side_effect=[Exception(), 0])):
            self.assertEquals(self.p.create_replica(self.leader), 0)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        with patch('subprocess.call', Mock(return_value=0)):
            self.assertEquals(self.p.create_replica(self.leader), 0)
            del self.p.config['wale']
            self.assertEquals(self.p.create_replica(self.leader), 0)

        with patch('subprocess.call', Mock(side_effect=Exception("foo"))):
            self.assertEquals(self.p.create_replica(self.leader), 1)

    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem], None)
        self.p.sync_replication_slots(cluster)
        self.p.query = Mock(side_effect=psycopg2.OperationalError)
        self.p.schedule_load_slots = True
        self.p.sync_replication_slots(cluster)
        self.p.schedule_load_slots = False
        with mock.patch('patroni.postgresql.Postgresql.role', new_callable=PropertyMock(return_value='replica')):
            self.p.sync_replication_slots(cluster)

    @patch.object(MockConnect, 'closed', 2)
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())

    def test_reload(self):
        self.assertTrue(self.p.reload())

    def test_is_healthy(self):
        self.assertTrue(self.p.is_healthy())
        self.p.is_running = false
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p._role = 'replica'
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')

    @patch('subprocess.Popen', Mock(side_effect=OSError))
    def test_call_nowait(self):
        self.assertFalse(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    def test_check_replication_lag(self):
        self.assertTrue(self.p.check_replication_lag(0))

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.is_running = false
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError)):
            self.p.move_data_directory()

    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertRaises(PostgresException, self.p.bootstrap)
        self.p.bootstrap()

    @patch('patroni.postgresql.Postgresql.create_replica', Mock(return_value=0))
    def test_clone(self):
        self.p.clone(self.leader)

    @patch('os.listdir', Mock(return_value=['recovery.conf']))
    @patch('os.path.exists', Mock(return_value=True))
    def test_get_postgres_role_from_data_directory(self):
        self.assertEquals(self.p.get_postgres_role_from_data_directory(), 'replica')

    def test_remove_data_directory(self):
        self.p.remove_data_directory()
        open(self.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.data_dir)
        with patch('os.unlink', Mock(side_effect=OSError)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    def test_controldata(self):
        with patch('subprocess.check_output', Mock(return_value=0, side_effect=pg_controldata_string)):
            data = self.p.controldata()
            self.assertEquals(len(data), 50)
            self.assertEquals(data['Database cluster state'], 'shut down in recovery')
            self.assertEquals(data['wal_log_hints setting'], 'on')
            self.assertEquals(int(data['Database block size']), 8192)

        with patch('subprocess.check_output', Mock(side_effect=subprocess.CalledProcessError(1, ''))):
            self.assertEquals(self.p.controldata(), {})

    @patch('subprocess.Popen')
    @patch.object(builtins, 'open', MagicMock(return_value=42))
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.data_dir,
                                                      '-c', 'archive_command=false', '-c', 'archive_mode=on',
                                                       'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.reset_mock()
        self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.data_dir,
                                                      'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.return_value = None
        self.assertEquals(self.p.single_user_mode(), 1)

    @patch('os.listdir', MagicMock(side_effect=fake_listdir))
    @patch('os.unlink', return_value=True)
    @patch('os.remove', return_value=True)
    @patch('os.path.islink', return_value=False)
    @patch('os.path.isfile', return_value=True)
    def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink):
        ap = os.path.join(self.data_dir, 'pg_xlog', 'archive_status/')
        self.p.cleanup_archive_status()
        mock_remove.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')])
        mock_unlink.assert_not_called()

        mock_remove.reset_mock()

        mock_file.return_value = False
        mock_link.return_value = True
        self.p.cleanup_archive_status()
        mock_unlink.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')])
        mock_remove.assert_not_called()

        mock_unlink.reset_mock()
        mock_remove.reset_mock()

        mock_file.side_effect = OSError
        mock_link.side_effect = OSError
        self.p.cleanup_archive_status()
        mock_unlink.assert_not_called()
        mock_remove.assert_not_called()

    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', Mock(return_value=True))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_save_configuration_files(self):
        self.p.save_configuration_files()

    @patch('os.path.isfile', Mock(side_effect=[False, True]))
    @patch('shutil.copy', Mock(side_effect=IOError))
    def test_restore_configuration_files(self):
        self.p.restore_configuration_files()

    def test_can_create_replica_without_replication_connection(self):
        self.p.config['create_replica_method'] = []
        self.assertFalse(self.p.can_create_replica_without_replication_connection())
        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo', 'no_master': 1}
        self.assertTrue(self.p.can_create_replica_without_replication_connection())

    def test_replica_method_can_work_without_replication_connection(self):
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('basebackup'))
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foobar'))
        self.p.config['foo'] = {'command': 'bar', 'no_master': 1}
        self.assertTrue(self.p.replica_method_can_work_without_replication_connection('foo'))
        self.p.config['foo'] = {'command': 'bar'}
        self.assertFalse(self.p.replica_method_can_work_without_replication_connection('foo'))