def get_delete_amphora_flow(self): """Creates a flow to delete an amphora. This should be configurable in the config file :returns: The flow for deleting the amphora :raises AmphoraNotFound: The referenced Amphora was not found """ delete_amphora_flow = linear_flow.Flow(constants.DELETE_AMPHORA_FLOW) delete_amphora_flow.add( lifecycle_tasks.AmphoraToErrorOnRevertTask( requires=constants.AMPHORA)) delete_amphora_flow.add( database_tasks.MarkAmphoraPendingDeleteInDB( requires=constants.AMPHORA)) delete_amphora_flow.add( database_tasks.MarkAmphoraHealthBusy(requires=constants.AMPHORA)) delete_amphora_flow.add( compute_tasks.ComputeDelete(requires=constants.AMPHORA)) delete_amphora_flow.add( database_tasks.DisableAmphoraHealthMonitoring( requires=constants.AMPHORA)) delete_amphora_flow.add( database_tasks.MarkAmphoraDeletedInDB(requires=constants.AMPHORA)) return delete_amphora_flow
def cert_rotate_amphora_flow(self): """Implement rotation for amphora's cert. 1. Create a new certificate 2. Upload the cert to amphora 3. update the newly created certificate info to amphora 4. update the cert_busy flag to be false after rotation :returns: The flow for updating an amphora """ rotated_amphora_flow = linear_flow.Flow( constants.CERT_ROTATE_AMPHORA_FLOW) rotated_amphora_flow.add(lifecycle_tasks.AmphoraToErrorOnRevertTask( requires=constants.AMPHORA)) # create a new certificate, the returned value is the newly created # certificate rotated_amphora_flow.add(cert_task.GenerateServerPEMTask( provides=constants.SERVER_PEM)) # update it in amphora task rotated_amphora_flow.add(amphora_driver_tasks.AmphoraCertUpload( requires=(constants.AMPHORA, constants.SERVER_PEM))) # update the newly created certificate info to amphora rotated_amphora_flow.add(database_tasks.UpdateAmphoraDBCertExpiration( requires=(constants.AMPHORA_ID, constants.SERVER_PEM))) # update the cert_busy flag to be false after rotation rotated_amphora_flow.add(database_tasks.UpdateAmphoraCertBusyToFalse( requires=constants.AMPHORA)) return rotated_amphora_flow
def test_AmphoraToErrorOnRevertTask(self, mock_amp_status_error, mock_amp_health_busy): amp_to_error_on_revert = lifecycle_tasks.AmphoraToErrorOnRevertTask() # Execute amp_to_error_on_revert.execute(self.AMPHORA) self.assertFalse(mock_amp_status_error.called) # Revert amp_to_error_on_revert.revert(self.AMPHORA) mock_amp_status_error.assert_called_once_with(self.AMPHORA_ID) self.assertFalse(mock_amp_health_busy.called)
def update_amphora_config_flow(self): """Creates a flow to update the amphora agent configuration. :returns: The flow for updating an amphora """ update_amphora_flow = linear_flow.Flow( constants.UPDATE_AMPHORA_CONFIG_FLOW) update_amphora_flow.add(lifecycle_tasks.AmphoraToErrorOnRevertTask( requires=constants.AMPHORA)) update_amphora_flow.add(amphora_driver_tasks.AmphoraConfigUpdate( requires=(constants.AMPHORA, constants.FLAVOR))) return update_amphora_flow
def get_failover_amphora_flow(self, failed_amphora, lb_amp_count): """Get a Taskflow flow to failover an amphora. 1. Build a replacement amphora. 2. Delete the old amphora. 3. Update the amphorae listener configurations. 4. Update the VRRP configurations if needed. :param failed_amphora: The amphora object to failover. :param lb_amp_count: The number of amphora on this load balancer. :returns: The flow that will provide the failover. """ failover_amp_flow = linear_flow.Flow(constants.FAILOVER_AMPHORA_FLOW) # Revert amphora to status ERROR if this flow goes wrong failover_amp_flow.add( lifecycle_tasks.AmphoraToErrorOnRevertTask( requires=constants.AMPHORA, inject={constants.AMPHORA: failed_amphora})) if failed_amphora.role in (constants.ROLE_MASTER, constants.ROLE_BACKUP): amp_role = 'master_or_backup' elif failed_amphora.role == constants.ROLE_STANDALONE: amp_role = 'standalone' elif failed_amphora.role is None: amp_role = 'spare' else: amp_role = 'undefined' LOG.info( "Performing failover for amphora: %s", { "id": failed_amphora.id, "load_balancer_id": failed_amphora.load_balancer_id, "lb_network_ip": failed_amphora.lb_network_ip, "compute_id": failed_amphora.compute_id, "role": amp_role }) failover_amp_flow.add( database_tasks.MarkAmphoraPendingDeleteInDB( requires=constants.AMPHORA, inject={constants.AMPHORA: failed_amphora})) failover_amp_flow.add( database_tasks.MarkAmphoraHealthBusy( requires=constants.AMPHORA, inject={constants.AMPHORA: failed_amphora})) failover_amp_flow.add( network_tasks.GetVIPSecurityGroupID( requires=constants.LOADBALANCER_ID, provides=constants.VIP_SG_ID)) is_spare = True is_vrrp_ipv6 = False if failed_amphora.load_balancer_id: is_spare = False if failed_amphora.vrrp_ip: is_vrrp_ipv6 = utils.is_ipv6(failed_amphora.vrrp_ip) # Get a replacement amphora and plug all of the networking. # # Do this early as the compute services have been observed to be # unreliable. The community decided the chance that deleting first # would open resources for an instance is less likely than the # compute service failing to boot an instance for other reasons. # TODO(johnsom) Move this back out to run for spares after # delete amphora API is available. failover_amp_flow.add( self.get_amphora_for_lb_failover_subflow( prefix=constants.FAILOVER_LOADBALANCER_FLOW, role=failed_amphora.role, failed_amp_vrrp_port_id=failed_amphora.vrrp_port_id, is_vrrp_ipv6=is_vrrp_ipv6, is_spare=is_spare)) failover_amp_flow.add( self.get_delete_amphora_flow( failed_amphora, retry_attempts=CONF.controller_worker.amphora_delete_retries, retry_interval=( CONF.controller_worker.amphora_delete_retry_interval))) failover_amp_flow.add( database_tasks.DisableAmphoraHealthMonitoring( requires=constants.AMPHORA, inject={constants.AMPHORA: failed_amphora})) if not failed_amphora.load_balancer_id: # This is an unallocated amphora (spares pool), we are done. return failover_amp_flow failover_amp_flow.add( database_tasks.GetLoadBalancer(requires=constants.LOADBALANCER_ID, inject={ constants.LOADBALANCER_ID: failed_amphora.load_balancer_id }, provides=constants.LOADBALANCER)) failover_amp_flow.add( database_tasks.GetAmphoraeFromLoadbalancer( name=constants.GET_AMPHORAE_FROM_LB, requires=constants.LOADBALANCER_ID, inject={ constants.LOADBALANCER_ID: failed_amphora.load_balancer_id }, provides=constants.AMPHORAE)) # Setup timeouts for our requests to the amphorae timeout_dict = { constants.CONN_MAX_RETRIES: CONF.haproxy_amphora.active_connection_max_retries, constants.CONN_RETRY_INTERVAL: CONF.haproxy_amphora.active_connection_rety_interval } # Listeners update needs to be run on all amphora to update # their peer configurations. So parallelize this with an # unordered subflow. update_amps_subflow = unordered_flow.Flow( constants.UPDATE_AMPS_SUBFLOW) for amp_index in range(0, lb_amp_count): update_amps_subflow.add( amphora_driver_tasks.AmphoraIndexListenerUpdate( name=str(amp_index) + '-' + constants.AMP_LISTENER_UPDATE, requires=(constants.LOADBALANCER, constants.AMPHORAE), inject={ constants.AMPHORA_INDEX: amp_index, constants.TIMEOUT_DICT: timeout_dict })) failover_amp_flow.add(update_amps_subflow) # Configure and enable keepalived in the amphora if lb_amp_count == 2: failover_amp_flow.add( self.get_vrrp_subflow(constants.GET_VRRP_SUBFLOW, timeout_dict, create_vrrp_group=False)) # Reload the listener. This needs to be done here because # it will create the required haproxy check scripts for # the VRRP deployed above. # A "U" or newer amphora-agent will remove the need for this # task here. # TODO(johnsom) Remove this in the "W" cycle reload_listener_subflow = unordered_flow.Flow( constants.AMPHORA_LISTENER_RELOAD_SUBFLOW) for amp_index in range(0, lb_amp_count): reload_listener_subflow.add( amphora_driver_tasks.AmphoraIndexListenersReload( name=(str(amp_index) + '-' + constants.AMPHORA_RELOAD_LISTENER), requires=(constants.LOADBALANCER, constants.AMPHORAE), inject={ constants.AMPHORA_INDEX: amp_index, constants.TIMEOUT_DICT: timeout_dict })) failover_amp_flow.add(reload_listener_subflow) # Remove any extraneous ports # Note: Nova sometimes fails to delete ports attached to an instance. # For example, if you create an LB with a listener, then # 'openstack server delete' the amphora, you will see the vrrp # port attached to that instance will remain after the instance # is deleted. # TODO(johnsom) Fix this as part of # https://storyboard.openstack.org/#!/story/2007077 # Mark LB ACTIVE failover_amp_flow.add( database_tasks.MarkLBActiveInDB(mark_subobjects=True, requires=constants.LOADBALANCER)) return failover_amp_flow
def get_delete_amphora_flow( self, amphora, retry_attempts=CONF.controller_worker.amphora_delete_retries, retry_interval=(CONF.controller_worker.amphora_delete_retry_interval)): """Creates a subflow to delete an amphora and it's port. This flow is idempotent and safe to retry. :param amphora: An amphora object. :param retry_attempts: The number of times the flow is retried. :param retry_interval: The time to wait, in seconds, between retries. :returns: The subflow for deleting the amphora. :raises AmphoraNotFound: The referenced Amphora was not found. """ delete_amphora_flow = linear_flow.Flow( name=constants.DELETE_AMPHORA_FLOW + '-' + amphora.id, retry=retry_tasks.SleepingRetryTimesController( name='retry-' + constants.DELETE_AMPHORA_FLOW + '-' + amphora.id, attempts=retry_attempts, interval=retry_interval)) delete_amphora_flow.add( lifecycle_tasks.AmphoraToErrorOnRevertTask( name=constants.AMPHORA_TO_ERROR_ON_REVERT + '-' + amphora.id, inject={constants.AMPHORA: amphora})) delete_amphora_flow.add( database_tasks.MarkAmphoraPendingDeleteInDB( name=constants.MARK_AMPHORA_PENDING_DELETE + '-' + amphora.id, inject={constants.AMPHORA: amphora})) delete_amphora_flow.add( database_tasks.MarkAmphoraHealthBusy( name=constants.MARK_AMPHORA_HEALTH_BUSY + '-' + amphora.id, inject={constants.AMPHORA: amphora})) delete_amphora_flow.add( compute_tasks.ComputeDelete(name=constants.DELETE_AMPHORA + '-' + amphora.id, inject={ constants.AMPHORA: amphora, constants.PASSIVE_FAILURE: True })) delete_amphora_flow.add( database_tasks.DisableAmphoraHealthMonitoring( name=constants.DISABLE_AMP_HEALTH_MONITORING + '-' + amphora.id, inject={constants.AMPHORA: amphora})) delete_amphora_flow.add( database_tasks.MarkAmphoraDeletedInDB( name=constants.MARK_AMPHORA_DELETED + '-' + amphora.id, inject={constants.AMPHORA: amphora})) if amphora.vrrp_port_id: delete_amphora_flow.add( network_tasks.DeletePort( name=(constants.DELETE_PORT + '-' + str(amphora.id) + '-' + str(amphora.vrrp_port_id)), inject={ constants.PORT_ID: amphora.vrrp_port_id, constants.PASSIVE_FAILURE: True })) # TODO(johnsom) What about cleaning up any member ports? # maybe we should get the list of attached ports prior to delete # and call delete on them here. Fix this as part of # https://storyboard.openstack.org/#!/story/2007077 return delete_amphora_flow
def get_failover_flow(self, role=constants.ROLE_STANDALONE, load_balancer=None): """Creates a flow to failover a stale amphora :returns: The flow for amphora failover """ failover_amphora_flow = linear_flow.Flow( constants.FAILOVER_AMPHORA_FLOW) failover_amphora_flow.add( lifecycle_tasks.AmphoraToErrorOnRevertTask( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) failover_amphora_flow.add( network_tasks.FailoverPreparationForAmphora( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) # Note: It seems intuitive to boot an amphora prior to deleting # the old amphora, however this is a complicated issue. # If the target host (due to anit-affinity) is resource # constrained, this will fail where a post-delete will # succeed. Since this is async with the API it would result # in the LB ending in ERROR though the amps are still alive. # Consider in the future making this a complicated # try-on-failure-retry flow, or move upgrade failovers to be # synchronous with the API. For now spares pool and act/stdby # will mitigate most of this delay. # Delete the old amphora failover_amphora_flow.add( database_tasks.MarkAmphoraPendingDeleteInDB( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) failover_amphora_flow.add( database_tasks.MarkAmphoraHealthBusy( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) failover_amphora_flow.add( compute_tasks.ComputeDelete( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) failover_amphora_flow.add( network_tasks.WaitForPortDetach( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) failover_amphora_flow.add( database_tasks.MarkAmphoraDeletedInDB( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) # If this is an unallocated amp (spares pool), we're done if not load_balancer: failover_amphora_flow.add( database_tasks.DisableAmphoraHealthMonitoring( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) return failover_amphora_flow # Save failed amphora details for later failover_amphora_flow.add( database_tasks.GetAmphoraDetails( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA, provides=constants.AMP_DATA)) # Get a new amphora # Note: Role doesn't matter here. We will update it later. get_amp_subflow = self.get_amphora_for_lb_subflow( prefix=constants.FAILOVER_AMPHORA_FLOW) failover_amphora_flow.add(get_amp_subflow) # Update the new amphora with the failed amphora details failover_amphora_flow.add( database_tasks.UpdateAmpFailoverDetails( requires=(constants.AMPHORA, constants.AMP_DATA))) # Update the data stored in the flow from the database failover_amphora_flow.add( database_tasks.ReloadLoadBalancer( requires=constants.LOADBALANCER_ID, provides=constants.LOADBALANCER)) failover_amphora_flow.add( database_tasks.ReloadAmphora(requires=constants.AMPHORA_ID, provides=constants.AMPHORA)) # Prepare to reconnect the network interface(s) failover_amphora_flow.add( network_tasks.GetAmphoraeNetworkConfigs( requires=constants.LOADBALANCER, provides=constants.AMPHORAE_NETWORK_CONFIG)) failover_amphora_flow.add( database_tasks.GetListenersFromLoadbalancer( requires=constants.LOADBALANCER, provides=constants.LISTENERS)) failover_amphora_flow.add( database_tasks.GetAmphoraeFromLoadbalancer( requires=constants.LOADBALANCER, provides=constants.AMPHORAE)) # Plug the VIP ports into the new amphora # The reason for moving these steps here is the udp listeners want to # do some kernel configuration before Listener update for forbidding # failure during rebuild amphora. failover_amphora_flow.add( network_tasks.PlugVIPPort( requires=(constants.AMPHORA, constants.AMPHORAE_NETWORK_CONFIG))) failover_amphora_flow.add( amphora_driver_tasks.AmphoraPostVIPPlug( requires=(constants.AMPHORA, constants.LOADBALANCER, constants.AMPHORAE_NETWORK_CONFIG))) # Listeners update needs to be run on all amphora to update # their peer configurations. So parallelize this with an # unordered subflow. update_amps_subflow = unordered_flow.Flow( constants.UPDATE_AMPS_SUBFLOW) timeout_dict = { constants.CONN_MAX_RETRIES: CONF.haproxy_amphora.active_connection_max_retries, constants.CONN_RETRY_INTERVAL: CONF.haproxy_amphora.active_connection_rety_interval } # Setup parallel flows for each amp. We don't know the new amp # details at flow creation time, so setup a subflow for each # amp on the LB, they let the task index into a list of amps # to find the amphora it should work on. amp_index = 0 for amp in load_balancer.amphorae: if amp.status == constants.DELETED: continue update_amps_subflow.add( amphora_driver_tasks.AmpListenersUpdate( name=constants.AMP_LISTENER_UPDATE + '-' + str(amp_index), requires=(constants.LOADBALANCER, constants.AMPHORAE), inject={ constants.AMPHORA_INDEX: amp_index, constants.TIMEOUT_DICT: timeout_dict })) amp_index += 1 failover_amphora_flow.add(update_amps_subflow) # Plug the member networks into the new amphora failover_amphora_flow.add( network_tasks.CalculateAmphoraDelta( requires=(constants.LOADBALANCER, constants.AMPHORA), provides=constants.DELTA)) failover_amphora_flow.add( network_tasks.HandleNetworkDelta(requires=(constants.AMPHORA, constants.DELTA), provides=constants.ADDED_PORTS)) failover_amphora_flow.add( amphora_driver_tasks.AmphoraePostNetworkPlug( requires=(constants.LOADBALANCER, constants.ADDED_PORTS))) failover_amphora_flow.add( database_tasks.ReloadLoadBalancer( name='octavia-failover-LB-reload-2', requires=constants.LOADBALANCER_ID, provides=constants.LOADBALANCER)) # Handle the amphora role and VRRP if necessary if role == constants.ROLE_MASTER: failover_amphora_flow.add( database_tasks.MarkAmphoraMasterInDB( name=constants.MARK_AMP_MASTER_INDB, requires=constants.AMPHORA)) vrrp_subflow = self.get_vrrp_subflow(role) failover_amphora_flow.add(vrrp_subflow) elif role == constants.ROLE_BACKUP: failover_amphora_flow.add( database_tasks.MarkAmphoraBackupInDB( name=constants.MARK_AMP_BACKUP_INDB, requires=constants.AMPHORA)) vrrp_subflow = self.get_vrrp_subflow(role) failover_amphora_flow.add(vrrp_subflow) elif role == constants.ROLE_STANDALONE: failover_amphora_flow.add( database_tasks.MarkAmphoraStandAloneInDB( name=constants.MARK_AMP_STANDALONE_INDB, requires=constants.AMPHORA)) failover_amphora_flow.add( amphora_driver_tasks.ListenersStart( requires=(constants.LOADBALANCER, constants.AMPHORA))) failover_amphora_flow.add( database_tasks.DisableAmphoraHealthMonitoring( rebind={constants.AMPHORA: constants.FAILED_AMPHORA}, requires=constants.AMPHORA)) return failover_amphora_flow