def get_output_format_and_apply_config_func_for_lua(console_sock): assert console_sock is not None, "Console socket is required for Lua mode" control_console = helpers.get_control_console(console_sock) return FILE_OUTPUT_FORMAT, lambda path: patch_file_clusterwide( control_console, path)
def manage_failover(params): failover_params = params.get('failover_params') if isinstance(failover_params, bool): failover_params = { 'mode': 'eventual' if failover_params is True else 'disabled' } rename_dict_key_if_exists(failover_params, 'stateboard_params', 'tarantool_params') if failover_params.get('state_provider') == 'stateboard': failover_params['state_provider'] = 'tarantool' control_console = helpers.get_control_console(params['console_sock']) helpers.set_twophase_options_from_params(control_console, params) version = get_tarantool_version(control_console) if version is not None and version >= NEW_FAILOVER_API_CARTRIDGE_VERSION: return manage_failover_new(control_console, failover_params) else: if failover_params['mode'] == 'stateful': errmsg = 'Stateful failover is supported since cartridge {}'.format( NEW_FAILOVER_API_CARTRIDGE_VERSION) return helpers.ModuleRes(failed=True, msg=errmsg) return manage_failover_old(control_console, failover_params)
def get_control_instance(params): module_hostvars = params['module_hostvars'] play_hosts = params['play_hosts'] console_sock = params['console_sock'] app_name = params['app_name'] control_console = helpers.get_control_console(console_sock) control_instance_name, err = get_control_instance_name( module_hostvars, play_hosts, control_console) if err is not None: return helpers.ModuleRes(failed=True, msg=err) # in the ideal imagined world we could just use # instance_vars['instance_info'], but if control instance is not # in play_hosts, instance_info isn't computed for it instance_vars = module_hostvars[control_instance_name] run_dir = instance_vars.get('cartridge_run_dir') control_instance_console_sock = helpers.get_instance_console_sock( run_dir, app_name, control_instance_name, ) http_port = instance_vars.get('config', {}).get('http_port') return helpers.ModuleRes(changed=False, fact={ 'name': control_instance_name, 'console_sock': control_instance_console_sock, 'http_port': http_port, })
def get_tdg_http_headers(console_sock, tdg_token): if tdg_token is None: return {} assert console_sock is not None, "Console socket is required for TDG mode" control_console = helpers.get_control_console(console_sock) tdg_version = control_console.eval_res_err(''' local ok, app_version = pcall(require, 'common.app_version') if not ok then return '1.6.0-0-0' end return app_version.get() ''')[0].split('.') if tdg_version[0] == 'scm-1': return { 'auth-token': tdg_token, # TDG <= 1.6 'Authorization': 'Bearer ' + tdg_token, # TDG >= 1.7 } major = int(tdg_version[0]) minor = int(tdg_version[1]) if major < 1 or major == 1 and minor <= 6: return {'auth-token': tdg_token} return {'Authorization': 'Bearer ' + tdg_token}
def connect_to_membership(params): control_console = helpers.get_control_console(params['console_sock']) module_hostvars = params['module_hostvars'] play_hosts = params['play_hosts'] changed = False for instance_name, instance_vars in module_hostvars.items(): if helpers.is_expelled(instance_vars) or helpers.is_stateboard( instance_vars): continue if 'config' not in instance_vars or 'advertise_uri' not in instance_vars[ 'config']: continue connected, err = probe_server(control_console, instance_vars['config']['advertise_uri']) if err is not None and instance_name in play_hosts: return helpers.ModuleRes(failed=True, msg=err) if connected: changed = True return helpers.ModuleRes(changed=changed)
def check_members_alive(params): console_sock = params['console_sock'] allowed_states = params['allowed_states'] control_console = helpers.get_control_console(console_sock) bad_members, err = control_console.eval_res_err( ''' local fun = require('fun') local membership = require('membership') local cartridge_topology = require('cartridge.topology') local confapplier = require('cartridge.confapplier') local topology_cfg = confapplier.get_readonly('topology') if topology_cfg == nil then return nil, "Instances aren't joined to cluster yet" end local allowed_states = ... local bad_members = {} for _it, instance_uuid, server in fun.filter(cartridge_topology.not_disabled, topology_cfg.servers) do local member = membership.get_member(server.uri) or {} if (member.payload.uuid ~= instance_uuid) then table.insert(bad_members, string.format( '%s uuid mismatch: expected %s, have %s', server.uri, instance_uuid, member.payload.uuid )) elseif (member.status ~= 'alive') then table.insert(bad_members, string.format( '%s status is %s', server.uri, member.status )) elseif allowed_states ~= nil and next(allowed_states) ~= nil then local member_state = member.payload.state if fun.index(member_state, allowed_states) == nil then table.insert(bad_members, string.format( '%s state is %s', server.uri, member_state )) end end end return bad_members ''', allowed_states) if err is not None: return helpers.ModuleRes(failed=True, msg=err) if bad_members: return helpers.ModuleRes(failed=True, msg="Some instances aren't alive: %s" % ', '.join(sorted(bad_members))) return helpers.ModuleRes(changed=False)
def eval_code(params): body = params['body'] args = params['args'] control_console = helpers.get_control_console(params['console_sock']) eval_res = control_console.eval(body, *args) return helpers.ModuleRes(changed=False, fact=eval_res)
def apply_tdg_config(console_sock, path): control_console = helpers.get_control_console(console_sock) _, err = control_console.eval_res_err( ''' return admin.upload_config_api(...) ''', path) assert err is None, err return True
def get_tdg_upload_mode(console_sock): if console_sock is None: return 'http' control_console = helpers.get_control_console(console_sock) return control_console.eval_res_err(''' if rawget(_G, 'admin') ~= nil and rawget(_G.admin, 'upload_config_api') ~= nil then return 'lua' end return 'http' ''')[0]
def check_state(params): try: control_console = helpers.get_control_console(params['console_sock']) if params['stateboard']: return check_stateboard_state(control_console) else: return check_instance_state( control_console, params['expected_states'], params['check_buckets_are_discovered'], ) except helpers.CartridgeException as e: return helpers.ModuleRes(exception=e)
def config_app(params): control_console = helpers.get_control_console(params['console_sock']) config = params['app_config'] new_sections = {} for section_name, section in config.items(): if section_is_deleted(section): new_sections[section_name] = None else: new_sections[section_name] = section.get('body') helpers.set_twophase_options_from_params(control_console, params) changed, err = helpers.patch_clusterwide_config(control_console, new_sections) if err is not None: return helpers.ModuleRes(failed=True, msg=err) return helpers.ModuleRes(changed=changed)
def bootstrap_vshard(params): control_console = helpers.get_control_console(params['console_sock']) helpers.set_twophase_options_from_params(control_console, params) can_bootstrap, _ = control_console.eval_res_err(''' return require('cartridge.vshard-utils').can_bootstrap() ''') if not can_bootstrap: return helpers.ModuleRes(changed=False) ok, err = control_console.eval_res_err(''' return require('cartridge.admin').bootstrap_vshard() ''') if not ok: errmsg = 'Vshard bootstrap failed: {}'.format(err) return helpers.ModuleRes(failed=True, msg=errmsg) return helpers.ModuleRes()
def check_cluster_issues(params): allow_warnings = params['allow_warnings'] show_issues = params['show_issues'] console_sock = params['console_sock'] control_console = helpers.get_control_console(console_sock) issues, err = control_console.eval_res_err(''' return require('cartridge.issues').list_on_cluster() ''') if err is not None: msg = "Received error on getting list of cluster issues: %s" % err if issues is None: return helpers.ModuleRes(failed=True, msg=msg) helpers.warn(msg) issues_by_level = {} for issue in issues: level = issue['level'] if level not in issues_by_level: issues_by_level[level] = [] issues_by_level[level].append(issue) if show_issues: messages = get_messages(issues_by_level) helpers.warn(*messages) if issues: if allow_warnings: critical_issues_num = len(issues) - len(issues_by_level.get('warning', [])) if critical_issues_num > 0: return helpers.ModuleRes(failed=True, msg="Cluster has %s critical issues" % critical_issues_num) else: return helpers.ModuleRes(failed=True, msg="Cluster has %s issues" % len(issues)) return helpers.ModuleRes(changed=False)
def set_needs_restart(params): instance_info = params['instance_info'] console_sock = instance_info['console_sock'] # check if instance was not started yet if not os.path.exists(console_sock): return helpers.ModuleRes(changed=True, fact=True) try: control_console = helpers.get_control_console(console_sock) except helpers.CartridgeException as e: allowed_errcodes = [ helpers.CartridgeErrorCodes.SOCKET_NOT_FOUND, helpers.CartridgeErrorCodes.FAILED_TO_CONNECT_TO_SOCKET, helpers.CartridgeErrorCodes.INSTANCE_IS_NOT_STARTED_YET ] if e.code in allowed_errcodes: return helpers.ModuleRes(changed=True, fact=True) raise e if params['check_package_updated']: needs_restart, err = check_needs_restart_to_update_package(params) if err is not None: return helpers.ModuleRes(failed=True, msg=err) if needs_restart: return helpers.ModuleRes(changed=True, fact=True) if params['check_config_updated']: needs_restart, err = check_needs_restart_to_update_config( params, control_console) if err is not None: return helpers.ModuleRes(failed=True, msg=err) if needs_restart: return helpers.ModuleRes(changed=True, fact=True) return helpers.ModuleRes(changed=False, fact=False)
def edit_topology(params): console_sock = params['console_sock'] module_hostvars = params['module_hostvars'] play_hosts = params['play_hosts'] healthy_timeout = params['healthy_timeout'] allow_missed_instances = params['allow_missed_instances'] replicasets = get_configured_replicasets(module_hostvars, play_hosts) instances = get_instances_to_configure(module_hostvars, play_hosts) if not replicasets and not instances: return helpers.ModuleRes(changed=False) control_console = helpers.get_control_console(console_sock) helpers.set_twophase_options_from_params(control_console, params) set_enabled_roles(replicasets, control_console) cluster_instances = helpers.get_cluster_instances(control_console) cluster_replicasets = helpers.get_cluster_replicasets(control_console) # Configure replicasets and instances: # * Create new replicasets. # * Edit existent replicasets and join new instances to them. # In this case failover_priority isn't changed since # new instances hasn't UUIDs before join. # * Expel instances. # * Configure instances that are already joined. # New instances aren't configured here since they don't have # UUIDs before join. topology_params, err = get_topology_params(replicasets, cluster_replicasets, instances, cluster_instances, allow_missed_instances) if err is not None: return helpers.ModuleRes( failed=True, msg="Failed to collect edit topology params: %s" % err) topology_changed = False if topology_params: res, err = control_console.eval_res_err(edit_topology_func_body, topology_params) if err is not None: return helpers.ModuleRes(failed=True, msg="Failed to edit topology: %s" % err) topology_changed = True # Without this `Peer closed` error is returned on second `edit_topology` # call in some cases (e.g. when new instance is joined at first call # and then it's configured on second) # See https://github.com/tarantool/cartridge/issues/1320 # The simplest w/a is to add a little delay between this calls, # and we just perform `is_healthy` call here. # If everything is Ok - this call doesn't take a long time, but # guarantees that next `edit_topology` call wouldn't fail. # If cluster isn't healthy then it's good to show error. if not wait_for_cluster_is_healthy(control_console, healthy_timeout): return helpers.ModuleRes( failed=True, msg="Cluster isn't healthy after editing topology") # Now we need to get updated instances and replicasets # configuration to check if we need one more call. # `edit_topology` returns summary of updated instances # so let's use it to update cluster_instances and cluster_replicasets. update_cluster_instances_and_replicasets(res, instances, cluster_instances, cluster_replicasets) # Configure failover_priority and instances that were joined on previous call: # * Edit failover_priority of replicasets if it's needed. # * Configure instances that weren't configured on first `edit_topology` call. topology_params, err = get_replicasets_failover_priority_and_instances_params( replicasets, cluster_replicasets, instances, cluster_instances, allow_missed_instances) if err is not None: return helpers.ModuleRes( failed=True, msg= "Failed to collect edit topology params for changing failover_priority " "and configuring new instances: %s" % err) if topology_params: res, err = control_console.eval_res_err(edit_topology_func_body, topology_params) if err is not None: return helpers.ModuleRes( failed=True, msg= "Failed to edit failover priority and configure instances: %s" % err) topology_changed = True if not wait_for_cluster_is_healthy(control_console, healthy_timeout): return helpers.ModuleRes( failed=True, msg= "Cluster isn't healthy after editing failover priority and configuring instances" ) return helpers.ModuleRes(changed=topology_changed)
def failover_promote(params): console_sock = params['console_sock'] control_console = helpers.get_control_console(console_sock) err = check_leaders_promotion_is_possible(control_console) if err is not None: return helpers.ModuleRes(failed=True, msg=err) failover_promote_params = params.get('failover_promote_params') if failover_promote_params is None: failover_promote_params = {} critical_warnings = [] # get replicaset leaders if params['promote_play_hosts']: module_hostvars = params['module_hostvars'] play_hosts = params['play_hosts'] replicaset_leaders, dead_replicasets = get_replicaset_leaders_by_play_hosts( play_hosts, module_hostvars, control_console) if dead_replicasets: critical_warnings.append( 'These replicasets have no alive instances across specified play hosts: %s' % ', '.join(sorted(dead_replicasets))) else: specified_replicaset_leaders = failover_promote_params.get( 'replicaset_leaders') replicaset_leaders, err = get_replicaset_leaders_by_aliases( specified_replicaset_leaders, control_console) if err is not None: return helpers.ModuleRes(failed=True, msg=err) if not replicaset_leaders: if critical_warnings: return helpers.ModuleRes( failed=True, msg="Failed to promote leaders: %s" % err, warnings=critical_warnings, ) return helpers.ModuleRes(changed=False) force_inconsistency = failover_promote_params.get('force_inconsistency') # set two-phase commit opts helpers.set_twophase_options_from_params(control_console, params) active_leaders = get_active_leaders(control_console) _, err = call_failover_promote(control_console, replicaset_leaders, force_inconsistency) if err is not None: return helpers.ModuleRes( failed=True, msg="Failed to promote leaders: %s" % err, warnings=critical_warnings, ) new_active_leaders = get_active_leaders(control_console) if critical_warnings: return helpers.ModuleRes(failed=True, msg="Promoted with critical warnings", warnings=critical_warnings) return helpers.ModuleRes(changed=active_leaders != new_active_leaders)