def __init__(self, config): self.name = config['name'] self.scope = config['scope'] self._data_dir = config['data_dir'] self._database = config.get('database', 'postgres') self._version_file = os.path.join(self._data_dir, 'PG_VERSION') self._pg_control = os.path.join(self._data_dir, 'global', 'pg_control') self._major_version = self.get_major_version() self._state_lock = Lock() self.set_state('stopped') self._pending_restart = False self._connection = Connection() self.config = ConfigHandler(self, config) self.config.check_directories() self._bin_dir = config.get('bin_dir') or '' self.bootstrap = Bootstrap(self) self.bootstrapping = False self.__thread_ident = current_thread().ident self.slots_handler = SlotsHandler(self) self._callback_executor = CallbackExecutor() self.__cb_called = False self.__cb_pending = None self.cancellable = CancellableSubprocess() self._sysid = None self.retry = Retry(max_tries=-1, deadline=config['retry_timeout']/2.0, max_delay=1, retry_exceptions=PostgresConnectionException) # Retry 'pg_is_in_recovery()' only once self._is_leader_retry = Retry(max_tries=1, deadline=config['retry_timeout']/2.0, max_delay=1, retry_exceptions=PostgresConnectionException) self._role_lock = Lock() self.set_role(self.get_postgres_role_from_data_directory()) self._state_entry_timestamp = None self._cluster_info_state = {} self._cached_replica_timeline = None # Last known running process self._postmaster_proc = None if self.is_running(): self.set_state('running') self.set_role('master' if self.is_leader() else 'replica') self.config.write_postgresql_conf() # we are "joining" already running postgres hba_saved = self.config.replace_pg_hba() ident_saved = self.config.replace_pg_ident() if hba_saved or ident_saved: self.reload() elif self.role == 'master': self.set_role('demoted')
def __init__(self, config): self.config = config self.name = config['name'] self.scope = config['scope'] self._database = config.get('database', 'postgres') self._data_dir = config['data_dir'] self._pending_restart = False self._server_parameters = self.get_server_parameters(config) self._connect_address = config.get('connect_address') self._superuser = config['authentication'].get('superuser', {}) self._replication = config['authentication']['replication'] self.resolve_connection_addresses() self._need_rewind = False self._use_slots = config.get('use_slots', True) self._version_file = os.path.join(self._data_dir, 'PG_VERSION') self._major_version = self.get_major_version() self._schedule_load_slots = self.use_slots self._pgpass = config.get('pgpass') or os.path.join( os.path.expanduser('~'), 'pgpass') self.callback = config.get('callbacks') or {} config_base_name = config.get('config_base_name', 'postgresql') self._postgresql_conf = os.path.join(self._data_dir, config_base_name + '.conf') self._postgresql_base_conf_name = config_base_name + '.base.conf' self._postgresql_base_conf = os.path.join( self._data_dir, self._postgresql_base_conf_name) self._recovery_conf = os.path.join(self._data_dir, 'recovery.conf') self._configuration_to_save = (self._postgresql_conf, self._postgresql_base_conf, os.path.join(self._data_dir, 'pg_hba.conf')) self._postmaster_pid = os.path.join(self._data_dir, 'postmaster.pid') self._trigger_file = config.get('recovery_conf', {}).get('trigger_file') or 'promote' self._trigger_file = os.path.abspath( os.path.join(self._data_dir, self._trigger_file)) self._connection = None self._cursor_holder = None self._sysid = None self._replication_slots = [ ] # list of already existing replication slots self.retry = Retry(max_tries=-1, deadline=config['retry_timeout'] / 2.0, max_delay=1, retry_exceptions=PostgresConnectionException) self._state_lock = Lock() self.set_state('stopped') self._role_lock = Lock() self.set_role(self.get_postgres_role_from_data_directory()) if self.is_running(): self.set_state('running') self.set_role('master' if self.is_leader() else 'replica') self._write_postgresql_conf( ) # we are "joining" already running postgres
def __init__(self, cluster_name): self.available = False self.cluster_name = cluster_name if cluster_name is not None else 'unknown' self._retry = Retry(deadline=300, max_delay=30, max_tries=-1, retry_exceptions=(boto.exception.StandardError, )) try: # get the instance id r = requests.get( 'http://169.254.169.254/latest/dynamic/instance-identity/document', timeout=2.1) except RequestException: logger.error('cannot query AWS meta-data') return if r.ok: try: content = r.json() self.instance_id = content['instanceId'] self.region = content['region'] except Exception: logger.exception( 'unable to fetch instance id and region from AWS meta-data' ) return self.available = True
def __init__(self, config): self._labels = config['labels'] self._labels[config.get('scope_label', 'cluster-name')] = config['scope'] self._label_selector = ','.join('{0}={1}'.format(k, v) for k, v in self._labels.items()) self._namespace = config.get('namespace') or 'default' self._role_label = config.get('role_label', 'role') config['namespace'] = '' super(Kubernetes, self).__init__(config) self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=(KubernetesRetriableException, HTTPException, HTTPError, socket.error, socket.timeout)) self._ttl = None try: k8s_config.load_incluster_config() except k8s_config.ConfigException: k8s_config.load_kube_config(context=config.get('context', 'local')) self.__subsets = None use_endpoints = config.get('use_endpoints') and (config.get('patronictl') or 'pod_ip' in config) if use_endpoints: addresses = [k8s_client.V1EndpointAddress(ip=config['pod_ip'])] ports = [] for p in config.get('ports', [{}]): port = {'port': int(p.get('port', '5432'))} port.update({n: p[n] for n in ('name', 'protocol') if p.get(n)}) ports.append(k8s_client.V1EndpointPort(**port)) self.__subsets = [k8s_client.V1EndpointSubset(addresses=addresses, ports=ports)] self._api = CoreV1ApiProxy(use_endpoints) self.set_retry_timeout(config['retry_timeout']) self.set_ttl(config.get('ttl') or 30) self._leader_observed_record = {} self._leader_observed_time = None self._leader_resource_version = None self._leader_observed_subsets = [] self.__do_not_watch = False
def test_api_execute(self, mock_machines): mock_machines.__get__ = Mock(return_value=['http://localhost:4001', 'http://localhost:2379']) self.client._base_uri = 'http://localhost:4001' self.assertRaises(etcd.EtcdException, self.client.api_execute, '/', 'POST', timeout=0) self.client._base_uri = 'http://localhost:4001' rtry = Retry(deadline=10, max_delay=1, max_tries=-1, retry_exceptions=(etcd.EtcdLeaderElectionInProgress,)) rtry(self.client.api_execute, '/', 'POST', timeout=0, params={'retry': rtry}) self.client._machines_cache_updated = 0 self.client.api_execute('/', 'POST', timeout=0) self.client._machines_cache = [self.client._base_uri] self.assertRaises(etcd.EtcdWatchTimedOut, self.client.api_execute, '/timeout', 'POST', params={'wait': 'true'}) self.assertRaises(etcd.EtcdWatchTimedOut, self.client.api_execute, '/timeout', 'POST', params={'wait': 'true'}) with patch.object(EtcdClient, '_calculate_timeouts', Mock(side_effect=[(1, 1, 0), (1, 1, 0), (0, 1, 0)])),\ patch.object(EtcdClient, '_load_machines_cache', Mock(side_effect=Exception)): self.client.http.request = Mock(side_effect=socket.error) self.assertRaises(etcd.EtcdException, rtry, self.client.api_execute, '/', 'GET', params={'retry': rtry}) with patch.object(EtcdClient, '_calculate_timeouts', Mock(side_effect=[(1, 1, 0), (1, 1, 0), (0, 1, 0)])),\ patch.object(EtcdClient, '_load_machines_cache', Mock(return_value=True)): self.assertRaises(etcd.EtcdException, rtry, self.client.api_execute, '/', 'GET', params={'retry': rtry}) with patch.object(EtcdClient, '_do_http_request', Mock(side_effect=etcd.EtcdException)): self.client._read_timeout = 0.01 self.assertRaises(etcd.EtcdException, self.client.api_execute, '/', 'GET')
def test_copy(self): def _sleep(t): pass retry = Retry(sleep_func=_sleep) rcopy = retry.copy() self.assertTrue(rcopy.sleep_func is _sleep)
def __init__(self, config): super(Consul, self).__init__(config) self._scope = config['scope'] self._session = None self.__do_not_watch = False self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=(ConsulInternalError, HTTPException, HTTPError, socket.error, socket.timeout)) kwargs = {} if 'url' in config: r = urlparse(config['url']) config.update({ 'scheme': r.scheme, 'host': r.hostname, 'port': r.port or 8500 }) elif 'host' in config: host, port = split_host_port(config.get('host', '127.0.0.1:8500'), 8500) config['host'] = host if 'port' not in config: config['port'] = int(port) if config.get('cacert'): config['ca_cert'] = config.pop('cacert') if config.get('key') and config.get('cert'): config['cert'] = (config['cert'], config['key']) config_keys = ('host', 'port', 'token', 'scheme', 'cert', 'ca_cert', 'dc') kwargs = {p: config.get(p) for p in config_keys if config.get(p)} verify = config.get('verify') if not isinstance(verify, bool): verify = parse_bool(verify) if isinstance(verify, bool): kwargs['verify'] = verify self._client = ConsulClient(**kwargs) self.set_retry_timeout(config['retry_timeout']) self.set_ttl(config.get('ttl') or 30) self._last_session_refresh = 0 self.__session_checks = config.get('checks') self._register_service = config.get('register_service', False) if self._register_service: self._service_name = service_name_from_scope_name(self._scope) if self._scope != self._service_name: logger.warning( 'Using %s as consul service name instead of scope name %s', self._service_name, self._scope) self._service_check_interval = config.get('service_check_interval', '5s') if not self._ctl: self.create_session()
def __init__(self, name, config): super(Etcd, self).__init__(name, config) self.ttl = config.get('ttl', 30) self._retry = Retry(deadline=10, max_delay=1, max_tries=-1, retry_exceptions=(etcd.EtcdConnectionFailed, etcd.EtcdLeaderElectionInProgress, etcd.EtcdWatcherCleared, etcd.EtcdEventIndexCleared)) self._client = self.get_etcd_client(config)
def __init__(self, config): super(Etcd, self).__init__(config) self._ttl = int(config.get('ttl') or 30) self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=(etcd.EtcdLeaderElectionInProgress, etcd.EtcdWatcherCleared, etcd.EtcdEventIndexCleared)) self._client = self.get_etcd_client(config) self.__do_not_watch = False
def __init__(self, config): self.config = config self.name = config['name'] self.server_parameters = config.get('parameters', {}) self.scope = config['scope'] self.listen_addresses, self.port = config['listen'].split(':') self.data_dir = config['data_dir'] self.replication = config['replication'] self.superuser = config['superuser'] self.admin = config['admin'] self.initdb_options = config.get('initdb', []) self.pgpass = config.get('pgpass', None) or os.path.join( os.path.expanduser('~'), 'pgpass') self.pg_rewind = config.get('pg_rewind', {}) self.callback = config.get('callbacks', {}) self.use_slots = config.get('use_slots', True) self.schedule_load_slots = self.use_slots self.recovery_conf = os.path.join(self.data_dir, 'recovery.conf') self.configuration_to_save = (os.path.join(self.data_dir, 'pg_hba.conf'), os.path.join(self.data_dir, 'postgresql.conf')) self.postmaster_pid = os.path.join(self.data_dir, 'postmaster.pid') self.trigger_file = config.get('recovery_conf', {}).get( 'trigger_file', None) or 'promote' self.trigger_file = os.path.abspath( os.path.join(self.data_dir, self.trigger_file)) self._pg_ctl = ['pg_ctl', '-w', '-D', self.data_dir] self.local_address = self.get_local_address() connect_address = config.get('connect_address', None) or self.local_address self.connection_string = 'postgres://{username}:{password}@{connect_address}/postgres'.format( connect_address=connect_address, **self.replication) self._connection = None self._cursor_holder = None self._need_rewind = False self._sysid = None self.replication_slots = [ ] # list of already existing replication slots self.retry = Retry(max_tries=-1, deadline=5, max_delay=1, retry_exceptions=PostgresConnectionException) self._state = 'stopped' self._state_lock = Lock() self._role = 'replica' self._role_lock = Lock() if self.is_running(): self._state = 'running' self._role = 'master' if self.is_leader() else 'replica'
def __init__(self, config): super(Etcd, self).__init__(config) self._ttl = int(config.get('ttl') or 30) self._retry = Retry( deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=(etcd.EtcdLeaderElectionInProgress, EtcdRaftInternal)) self._client = self.get_etcd_client(config) self.__do_not_watch = False self._has_failed = False
def __init__(self, config): self.config = config self.name = config['name'] self.database = config.get('database', 'postgres') self._server_parameters = self.get_server_parameters(config) self._listen_addresses, self._port = (config['listen'] + ':5432').split(':')[:2] self.scope = config['scope'] self._data_dir = config['data_dir'] self.replication = config['replication'] self.superuser = config.get('superuser') or {} self.admin = config.get('admin') or {} self.initdb_options = config.get('initdb') or [] self.pgpass = config.get('pgpass') or os.path.join(os.path.expanduser('~'), 'pgpass') self.pg_rewind = config.get('pg_rewind') or {} self.callback = config.get('callbacks') or {} self.use_slots = config.get('use_slots', True) self._schedule_load_slots = self.use_slots config_base_name = config.get('config_base_name', 'postgresql') self._postgresql_conf = os.path.join(self._data_dir, config_base_name + '.conf') self._postgresql_base_conf_name = config_base_name + '.base.conf' self._postgresql_base_conf = os.path.join(self._data_dir, self._postgresql_base_conf_name) self._recovery_conf = os.path.join(self._data_dir, 'recovery.conf') self._configuration_to_save = (self._postgresql_conf, self._postgresql_base_conf, os.path.join(self._data_dir, 'pg_hba.conf')) self._postmaster_pid = os.path.join(self._data_dir, 'postmaster.pid') self._trigger_file = config.get('recovery_conf', {}).get('trigger_file') or 'promote' self._trigger_file = os.path.abspath(os.path.join(self._data_dir, self._trigger_file)) self._pg_ctl = ['pg_ctl', '-w', '-D', self._data_dir] self.local_address = self.get_local_address() connect_address = config.get('connect_address') or self.local_address self.connection_string = 'postgres://{username}:{password}@{connect_address}/{database}'.format( connect_address=connect_address, database=self.database, **self.replication) self._connection = None self._cursor_holder = None self._sysid = None self._replication_slots = [] # list of already existing replication slots self.retry = Retry(max_tries=-1, deadline=5, max_delay=1, retry_exceptions=PostgresConnectionException) self._state_lock = Lock() self.set_state('stopped') self._role_lock = Lock() self.set_role(self.get_postgres_role_from_data_directory()) if self.is_running(): self.set_state('running') self.set_role('master' if self.is_leader() else 'replica') self._write_postgresql_conf() # we are "joining" already running postgres
def __init__(self, config): super(Consul, self).__init__(config) self._scope = config['scope'] self._session = None self.__do_not_watch = False self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=(ConsulInternalError, HTTPException, HTTPError, socket.error, socket.timeout)) self._my_member_data = None kwargs = {} if 'url' in config: r = urlparse(config['url']) config.update({ 'scheme': r.scheme, 'host': r.hostname, 'port': r.port or 8500 }) elif 'host' in config: host, port = (config.get('host', '127.0.0.1:8500') + ':8500').split(':')[:2] config['host'] = host if 'port' not in config: config['port'] = int(port) if config.get('cacert'): config['ca_cert'] = config.pop('cacert') if config.get('key') and config.get('cert'): config['cert'] = (config['cert'], config['key']) config_keys = ('host', 'port', 'token', 'scheme', 'cert', 'ca_cert', 'dc') kwargs = {p: config.get(p) for p in config_keys if config.get(p)} verify = config.get('verify') if not isinstance(verify, bool): verify = parse_bool(verify) if isinstance(verify, bool): kwargs['verify'] = verify self._client = ConsulClient(**kwargs) self.set_retry_timeout(config['retry_timeout']) self.set_ttl(config.get('ttl') or 30) self._last_session_refresh = 0 self.__session_checks = config.get('checks') if not self._ctl: self.create_session()
def __init__(self, config): super(Consul, self).__init__(config) self._scope = config['scope'] self._session = None self.__do_not_watch = False self._retry = Retry(deadline=config['retry_timeout'], max_delay=1, max_tries=-1, retry_exceptions=(ConsulInternalError, HTTPException, HTTPError, socket.error, socket.timeout)) self._my_member_data = None host, port = config.get('host', '127.0.0.1:8500').split(':') self._client = ConsulClient(host=host, port=port) self.set_retry_timeout(config['retry_timeout']) self.set_ttl(config.get('ttl') or 30) self._last_session_refresh = 0 if not self._ctl: self.create_session()
def test_deadline(self): retry = Retry(deadline=0.0001) self.assertRaises(RetryFailedError, retry, self._fail(times=100))
def test_maximum_delay(self): retry = Retry(delay=10, max_tries=100) retry(self._fail(times=10)) self.assertTrue(retry._cur_delay < 4000, retry._cur_delay) # gevent's sleep function is picky about the type self.assertEquals(type(retry._cur_delay), float)
def test_too_many_tries(self): retry = Retry(delay=0) self.assertRaises(RetryFailedError, retry, self._fail(times=999)) self.assertEquals(retry._attempts, 1)
def test_reset(self): retry = Retry(delay=0, max_tries=2) retry(self._fail()) self.assertEquals(retry._attempts, 1) retry.reset() self.assertEquals(retry._attempts, 0)
def query(self, sql, *params, **kwargs): if not kwargs.get('retry', False): return self.server.query(sql, *params) retry = Retry(delay=1, retry_exceptions=PostgresConnectionException) return retry(self.server.query, sql, *params)
def _makeOne(self, *args, **kwargs): return Retry(*args, **kwargs)