def rebuild_device(device_id): new_device_id = None new_device_name = None with db_session() as session: new_device = session.query(Device).\ filter(~Device.id.in_( session.query(loadbalancers_devices.c.device) )).\ filter(Device.status == "OFFLINE").\ filter(Device.pingCount == 0).\ with_lockmode('update').\ first() if new_device is None: session.rollback() LOG.error( 'No spare devices when trying to rebuild device {0}'.format( device_id)) return (500, dict(faultcode="Server", faultstring='No spare devices when trying to rebuild ' 'device {0}'.format(device_id))) new_device_id = new_device.id new_device_name = new_device.name LOG.info("Moving device {0} to device {1}".format( device_id, new_device_id)) lbs = session.query(LoadBalancer).\ join(LoadBalancer.devices).\ filter(Device.id == device_id).all() for lb in lbs: lb.devices = [new_device] lb.status = "ERROR(REBUILDING)" new_device.status = 'BUILDING' lbid = lbs[0].id session.commit() submit_job('UPDATE', new_device_name, new_device_id, lbid) with db_session() as session: new_device = session.query(Device).\ filter(Device.id == new_device_id).first() vip = session.query(Vip).filter(Vip.device == device_id).first() if vip: vip.device = new_device_id device = session.query(Device).\ filter(Device.id == device_id).first() device.status = 'DELETED' lbs = session.query(LoadBalancer).\ join(LoadBalancer.devices).\ filter(Device.id == new_device_id).all() for lb in lbs: lb.errmsg = "Load Balancer rebuild on new device" if vip: LOG.info( "Moving IP {0} and marking device {1} for deletion".format( str(ipaddress.IPv4Address(vip.ip)), device_id)) submit_vip_job('ASSIGN', new_device_name, vip.id) new_device.status = 'ONLINE' counter = session.query(Counters).\ filter(Counters.name == 'loadbalancers_rebuild').first() counter.value += 1 session.commit() return (200, dict(oldId=device_id, newId=new_device_id))
def send_assign(self, data): NULL = None # For pep8 with db_session() as session: device = session.query(Device).\ filter(Device.name == data).first() if device is None: self.LOG.error( "VIP assign have been given non existent device {0}". format(data)) session.rollback() return False if not self.lbid: vip = session.query(Vip).\ filter(Vip.device == NULL).\ with_lockmode('update').\ first() if vip is None: errmsg = 'Floating IP assign failed (none available)' LOG.error( "Failed to assign IP to device {0} (none available)". format(data)) self._set_error(device.id, errmsg, session) session.commit() return False else: vip = session.query(Vip).\ filter(Vip.id == self.lbid).first() if vip is None: errmsg = 'Cannot find existing floating IP' self.LOG.error( "Failed to assign IP to device {0}".format(data)) self._set_error(device.id, errmsg, session) session.commit() return False vip.device = device.id vip_id = vip.id vip_ip = vip.ip session.commit() ip_str = str(ipaddress.IPv4Address(vip_ip)) job_data = {'action': 'ASSIGN_IP', 'name': data, 'ip': ip_str} status, response = self._send_message(job_data, 'response') if status: return True elif self.lbid: self.LOG.error("Failed to assign IP {0} to device {1}".format( ip_str, data)) else: self.LOG.error("Failed to assign IP {0} to device {1}".format( ip_str, data)) # set to device 0 to make sure it won't be used again with db_session() as session: vip = session.query(Vip).filter(Vip.id == vip_id).first() vip.device = 0 session.commit() submit_vip_job('REMOVE', None, ip_str) return False
def probe_devices(self): minute = datetime.now().minute if self.server_id != minute % self.number_of_servers: LOG.info('Not our turn to run probe check, sleeping') self.start_probe_sched() return LOG.info('Running device count probe check') try: with db_session() as session: # Double check we have no outstanding builds assigned to us session.query(PoolBuilding).\ filter(PoolBuilding.server_id == self.server_id).\ delete() session.flush() dev_count = session.query(Device).\ filter(Device.status == 'OFFLINE').count() if dev_count >= self.node_pool_size: LOG.info("Enough devices exist, no work to do") session.commit() self.start_probe_sched() return build_count = self.node_pool_size - dev_count built = session.query(func.sum(PoolBuilding.qty)).first() if not built[0]: built = 0 else: built = built[0] if build_count - built <= 0: LOG.info( "Other servers are building enough nodes" ) session.commit() self.start_probe_sched() return build_count -= built building = PoolBuilding() building.server_id = self.server_id building.qty = build_count session.add(building) session.commit() # Closed the DB session because we don't want it hanging around # for a long time locking tables self._build_nodes(build_count) with db_session() as session: session.query(PoolBuilding).\ filter(PoolBuilding.server_id == self.server_id).\ delete() session.commit() except: LOG.exception("Uncaught exception during pool expansion") self.start_probe_sched()
def probe_devices(self): minute = datetime.now().minute if self.server_id != minute % self.number_of_servers: LOG.info('Not our turn to run probe check, sleeping') self.start_probe_sched() return LOG.info('Running device count probe check') try: with db_session() as session: # Double check we have no outstanding builds assigned to us session.query(PoolBuilding).\ filter(PoolBuilding.server_id == self.server_id).\ delete() session.flush() dev_count = session.query(Device).\ filter(Device.status == 'OFFLINE').count() if dev_count >= self.node_pool_size: LOG.info("Enough devices exist, no work to do") session.commit() self.start_probe_sched() return build_count = self.node_pool_size - dev_count built = session.query(func.sum(PoolBuilding.qty)).first() if not built[0]: built = 0 else: built = built[0] if build_count - built <= 0: LOG.info("Other servers are building enough nodes") session.commit() self.start_probe_sched() return build_count -= built building = PoolBuilding() building.server_id = self.server_id building.qty = build_count session.add(building) session.commit() # Closed the DB session because we don't want it hanging around # for a long time locking tables self._build_nodes(build_count) with db_session() as session: session.query(PoolBuilding).\ filter(PoolBuilding.server_id == self.server_id).\ delete() session.commit() except: LOG.exception("Uncaught exception during pool expansion") self.start_probe_sched()
def delete_devices(self): """ Searches for all devices in the DELETED state and removes them """ minute = datetime.now().minute if self.server_id != minute % self.number_of_servers: LOG.info('Not our turn to run delete check, sleeping') self.start_delete_sched() return LOG.info('Running device delete check') try: message = [] with db_session() as session: devices = session.query(Device).\ filter(Device.status == 'DELETED').all() for device in devices: job_data = {'action': 'DELETE_DEVICE', 'name': device.name} message.append(dict(task='libra_pool_mgm', data=job_data)) session.commit() if not message: LOG.info("No devices to delete") else: gear = GearmanWork() gear.send_delete_message(message) except: LOG.exception("Exception when deleting devices") self.start_delete_sched()
def _exec_ping(self): pings = 0 failed = 0 node_list = [] LOG.info('Running ping check') with db_session() as session: devices = session.query( Device.id, Device.name).filter(Device.status == 'ONLINE').all() pings = len(devices) if pings == 0: LOG.info('No LBs to ping') return (0, 0) for lb in devices: node_list.append(lb.name) gearman = GearJobs() failed_lbs, node_status = gearman.send_pings(node_list) failed = len(failed_lbs) if failed > self.error_limit: LOG.error('Too many simultaneous Load Balancer Failures.' ' Aborting recovery attempt') return pings, failed if failed > 0: self._send_fails(failed_lbs) # Process node status after lb status self._update_nodes(node_status) session.commit() return pings, failed
def _add_vip(self, data): LOG.info('Adding vip {0} to DB'.format(data['ip'])) vip = Vip() vip.ip = int(ipaddress.IPv4Address(unicode(data['ip']))) with db_session() as session: session.add(vip) session.commit()
def send_delete_message(self, message): LOG.info("Sending %d gearman messages", len(message)) job_status = self.gearman_client.submit_multiple_jobs( message, background=False, wait_until_complete=True, max_retries=10, poll_timeout=30.0) delete_count = 0 for status in job_status: if status.state == JOB_UNKNOWN: LOG.error('Gearman Job server fail') continue if status.timed_out: LOG.error('Gearman timeout whilst deleting device') continue if status.result['response'] == 'FAIL': LOG.error( 'Pool manager failed to delete a device, removing from DB') delete_count += 1 with db_session() as session: session.query(Device).\ filter(Device.name == status.result['name']).delete() session.commit() LOG.info('%d freed devices delete from pool', delete_count)
def _exec_ping(self): pings = 0 failed = 0 node_list = [] LOG.info('Running ping check') with db_session() as session: devices = session.query( Device.id, Device.name ).filter(Device.status == 'ONLINE').all() pings = len(devices) if pings == 0: LOG.info('No LBs to ping') return (0, 0) for lb in devices: node_list.append(lb.name) gearman = GearJobs() failed_lbs, node_status = gearman.send_pings(node_list) failed = len(failed_lbs) if failed > self.error_limit: LOG.error( 'Too many simultaneous Load Balancer Failures.' ' Aborting recovery attempt' ) return pings, failed if failed > 0: self._send_fails(failed_lbs) # Process node status after lb status self._update_nodes(node_status) session.commit() return pings, failed
def send_alert(self, message, device_id, device_ip, device_name, device_tenant): with db_session() as session: device = session.query(Device).filter(Device.id == device_id).first() device.status = "ERROR" errmsg = "Load Balancer has failed, attempting rebuild" lbs = ( session.query(loadbalancers_devices.c.loadbalancer) .filter(loadbalancers_devices.c.device == device_id) .all() ) # TODO: make it so that we don't get stuck in LB ERROR here when # a rebuild fails due to something like a bad device. Maybe have # an attempted rebuild count? for lb in lbs: session.query(LoadBalancer).filter(LoadBalancer.id == lb[0]).update( {"status": "ERROR", "errmsg": errmsg}, synchronize_session="fetch" ) session.flush() session.commit() self._rebuild_device(device_id)
def _send_delete(self, failed_nodes): with db_session() as session: for lb in failed_nodes: # Get the current ping count data = session.query( Device.id, Device.pingCount).\ filter(Device.name == lb).first() if not data: LOG.error('Device {0} no longer exists'.format(data.id)) continue if data.pingCount < self.ping_limit: data.pingCount += 1 LOG.error( 'Offline Device {0} has failed {1} ping attempts'. format(lb, data.pingCount)) session.query(Device).\ filter(Device.name == lb).\ update({"pingCount": data.pingCount}, synchronize_session='fetch') session.flush() continue message = ( 'Load balancer {0} unreachable and marked for deletion'. format(lb)) for driver in self.drivers: instance = driver() LOG.info('Sending delete request for {0} to {1}'.format( lb, instance.__class__.__name__)) instance.send_delete(message, data.id) session.commit()
def run_expunge(self): day = datetime.now().day if self.server_id != day % self.number_of_servers: LOG.info('Not our turn to run expunge check, sleeping') self.expunge_timer = threading.Timer( 24 * 60 * 60, self.run_expunge, () ) with db_session() as session: try: exp = datetime.now() - timedelta( days=int(self.expire_days) ) exp_time = exp.strftime('%Y-%m-%d %H:%M:%S') LOG.info( 'Expunging deleted loadbalancers older than {0}' .format(exp_time) ) count = session.query( LoadBalancer.status ).filter(LoadBalancer.updated < exp_time).\ filter(LoadBalancer.status == 'DELETED').delete() counter = session.query(Counters).\ filter(Counters.name == 'loadbalancers_expunged').first() counter.value += count session.commit() LOG.info( '{0} deleted load balancers expunged'.format(count) ) except: LOG.exception('Exception occurred during expunge') LOG.info('Expunge thread sleeping for 24 hours') self.expunge_timer = threading.Timer( 24 * 60 * 60, self.run_expunge, ())
def _send_fails(self, failed_lbs): with db_session() as session: for lb in failed_lbs: data = self._get_lb(lb, session) if not data: LOG.error( 'Device {0} has no Loadbalancer attached'. format(lb) ) continue message = ( 'Load balancer failed\n' 'ID: {0}\n' 'IP: {1}\n' 'tenant: {2}\n'.format( data.id, data.floatingIpAddr, data.tenantid ) ) for driver in self.drivers: instance = driver() LOG.info( 'Sending failure of {0} to {1}'.format( lb, instance.__class__.__name__ ) ) instance.send_alert(message, data.id) session.commit()
def send_remove(self, data=None): job_data = { 'action': 'DELETE_IP', 'ip': self.lbid } ip_int = int(ipaddress.IPv4Address(unicode(self.lbid))) for x in xrange(0, 5): LOG.info( 'Attempt to delete IP {0} #{1}' .format(self.lbid, x) ) status, response = self._send_message(job_data, 'response') if status: break with db_session() as session: if not status: LOG.error( "Failed to delete IP {0}" .format(self.lbid) ) # Set to 0 to mark as something that needs cleaning up # but cannot be used again vip = session.query(Vip).\ filter(Vip.ip == ip_int).first() vip.device = 0 else: session.query(Vip).\ filter(Vip.ip == ip_int).delete() counter = session.query(Counters).\ filter(Counters.name == 'vips_deleted').first() counter.value += 1 session.commit()
def get_one(self, tenant_id=None): """ Get a single Admin API user or details about self """ if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function" ) with db_session() as session: user = session.query(AdminAuth).\ filter(AdminAuth.tenant_id == tenant_id).first() if user is None: response.status = 404 return dict( faultcode="Client", faultstatus="User not found" ) ret = { "tenant": user.tenant_id, "level": user.level } session.commit() return ret
def probe_vips(self): minute = datetime.now().minute if self.server_id != minute % self.number_of_servers: LOG.info('Not our turn to run vips check, sleeping') self.start_vips_sched() return LOG.info('Running vips count probe check') try: with db_session() as session: NULL = None # For pep8 vip_count = session.query(Vip).\ filter(Vip.device == NULL).count() if vip_count >= self.vip_pool_size: LOG.info("Enough vips exist, no work to do") session.commit() self.start_vips_sched() return build_count = self.vip_pool_size - vip_count self._build_vips(build_count) except: LOG.exception( "Uncaught exception during vip pool expansion" ) self.start_vips_sched()
def client_job(job_type, host, data, lbid): try: client = GearmanClientThread(host, lbid) LOG.info("Sending Gearman job {0} to {1} for loadbalancer {2}".format( job_type, host, lbid)) if job_type == 'UPDATE': client.send_update(data) if job_type == 'DELETE': client.send_delete(data) if job_type == 'ARCHIVE': client.send_archive(data) if job_type == 'ASSIGN': # Try the assign 5 times for x in xrange(0, 5): status = client.send_assign(data) if status: break with db_session() as session: device = session.query(Device).\ filter(Device.name == data).first() if device is None: LOG.error( "Device {0} not found in ASSIGN, this shouldn't happen" .format(data)) return mnb_data = {} if not status: LOG.error( "Giving up vip assign for device {0}".format(data)) errmsg = 'Floating IP assign failed' client._set_error(device.id, errmsg, session) else: lbs = session.query( LoadBalancer ).join(LoadBalancer.nodes).\ join(LoadBalancer.devices).\ filter(Device.id == device.id).\ filter(LoadBalancer.status != 'DELETED').\ all() for lb in lbs: if lb.status == 'BUILD': # Only send a create message to MnB if we # are going from BUILD to ACTIVE. After the # DB is updated. mnb_data["lbid"] = lb.id mnb_data["tenantid"] = lb.tenantid lb.status = 'ACTIVE' device.status = 'ONLINE' session.commit() # Send the MnB create if needed if "lbid" in mnb_data: update_mnb('lbaas.instance.create', mnb_data["lbid"], mnb_data["tenantid"]) if job_type == 'REMOVE': client.send_remove(data) return except: LOG.exception("Gearman thread unhandled exception")
def discover(self, device_id): """ Discovers information about a given libra worker based on device ID """ if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function") with db_session() as session: device = session.query(Device.name).\ filter(Device.id == device_id).scalar() device_name = str(device) session.commit() if device_name is None: response.status = 404 return dict(faultcode="Client", faultstring="Device " + device_id + " not found") gearman = GearJobs() discover = gearman.get_discover(device_name) if discover is None: response.status = 500 return dict(faultcode="Server", faultstring="Could not discover device") return dict(id=device_id, version=discover['version'], release=discover['release'])
def send_archive(self, data): with db_session() as session: lb = session.query(LoadBalancer).\ filter(LoadBalancer.id == self.lbid).\ first() job_data = { 'hpcs_action': 'ARCHIVE', 'hpcs_object_store_basepath': data['objectStoreBasePath'], 'hpcs_object_store_endpoint': data['objectStoreEndpoint'], 'hpcs_object_store_token': data['authToken'], 'hpcs_object_store_type': data['objectStoreType'], 'loadBalancers': [{ 'id': str(lb.id), 'name': lb.name, 'protocol': lb.protocol }] } status, response = self._send_message(job_data, 'hpcs_response') device = session.query(Device).\ filter(Device.id == data['deviceid']).\ first() if status: device.errmsg = 'Log archive successful' else: device.errmsg = 'Log archive failed: {0}'.format(response) lb.status = 'ACTIVE' counter = session.query(Counters).\ filter(Counters.name == 'log_archives').first() counter.value += 1 session.commit()
def send_delete_message(self, message): LOG.info("Sending %d gearman messages", len(message)) job_status = self.gearman_client.submit_multiple_jobs( message, background=False, wait_until_complete=True, max_retries=10, poll_timeout=30.0 ) delete_count = 0 for status in job_status: if status.state == JOB_UNKNOWN: LOG.error('Gearman Job server fail') continue if status.timed_out: LOG.error('Gearman timeout whilst deleting device') continue if status.result['response'] == 'FAIL': LOG.error( 'Pool manager failed to delete a device, removing from DB' ) delete_count += 1 with db_session() as session: session.query(Device).\ filter(Device.name == status.result['name']).delete() session.commit() LOG.info('%d freed devices delete from pool', delete_count)
def get(self): """Returns a list of virtual ips attached to a specific Load Balancer. :param load_balancer_id: id of lb Url: GET /loadbalancers/{load_balancer_id}/virtualips Returns: dict """ tenant_id = get_limited_to_project(request.headers) if not self.lbid: response.status = 400 return dict(message="Bad Request", details="Load Balancer ID not provided") with db_session() as session: vip = ( session.query(Vip.id, Vip.ip) .join(LoadBalancer.devices) .join(Device.vip) .filter(LoadBalancer.id == self.lbid) .filter(LoadBalancer.tenantid == tenant_id) .first() ) if not vip: session.rollback() response.status = 404 return dict(message="Not Found", details="Load Balancer ID not valid") resp = { "virtualIps": [ {"id": vip.id, "address": str(ipaddress.IPv4Address(vip.ip)), "type": "PUBLIC", "ipVersion": "IPV4"} ] } session.rollback() return resp
def discover(self, device_id): """ Discovers information about a given libra worker based on device ID """ if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function" ) with db_session() as session: device = session.query(Device.name).\ filter(Device.id == device_id).scalar() device_name = str(device) session.commit() if device_name is None: response.status = 404 return dict( faultcode="Client", faultstring="Device " + device_id + " not found" ) gearman = GearJobs() discover = gearman.get_discover(device_name) if discover is None: response.status = 500 return dict( faultcode="Server", faultstring="Could not discover device" ) return dict( id=device_id, version=discover['version'], release=discover['release'] )
def _send_fails(self, failed_lbs): with db_session() as session: for lb in failed_lbs: data = self._get_lb(lb, session) if not data: LOG.error( 'Device {0} has no Loadbalancer attached'. format(lb) ) continue message = ( 'Load balancer failed\n' 'ID: {0}\n' 'IP: {1}\n' 'name: {2}\n' 'tenant: {3}\n'.format( data.id, data.floatingIpAddr, data.name, data.tenantid ) ) for driver in self.drivers: instance = driver() LOG.info( 'Sending failure of {0} to {1}'.format( lb, instance.__class__.__name__ ) ) instance.send_alert(message, data.id, data.floatingIpAddr, data.name, data.tenantid) session.commit()
def send_alert(self, message, device_id, device_ip, device_name, device_tenant): with db_session() as session: device = session.query(Device).\ filter(Device.id == device_id).first() device.status = "ERROR" errmsg = "Load Balancer has failed, attempting rebuild" lbs = session.query( loadbalancers_devices.c.loadbalancer).\ filter(loadbalancers_devices.c.device == device_id).\ all() # TODO: make it so that we don't get stuck in LB ERROR here when # a rebuild fails due to something like a bad device. Maybe have # an attempted rebuild count? for lb in lbs: session.query(LoadBalancer).\ filter(LoadBalancer.id == lb[0]).\ update({"status": "ERROR", "errmsg": errmsg}, synchronize_session='fetch') session.flush() session.commit() self._rebuild_device(device_id)
def delete_devices(self): """ Searches for all devices in the DELETED state and removes them """ minute = datetime.now().minute if self.server_id != minute % self.number_of_servers: LOG.info('Not our turn to run delete check, sleeping') self.start_delete_sched() return LOG.info('Running device delete check') try: message = [] with db_session() as session: devices = session.query(Device).\ filter(Device.status == 'DELETED').all() for device in devices: job_data = { 'action': 'DELETE_DEVICE', 'name': device.name } message.append(dict(task='libra_pool_mgm', data=job_data)) session.commit() if not message: LOG.info("No devices to delete") else: gear = GearmanWork() gear.send_delete_message(message) except: LOG.exception("Exception when deleting devices") self.start_delete_sched()
def post(self, body=None): if self.lbid is None: raise ClientSideError('Load Balancer ID has not been supplied') tenant_id = get_limited_to_project(request.headers) with db_session() as session: load_balancer = session.query(LoadBalancer).\ filter(LoadBalancer.tenantid == tenant_id).\ filter(LoadBalancer.id == self.lbid).\ filter(LoadBalancer.status != 'DELETED').\ first() if load_balancer is None: session.rollback() raise NotFound('Load Balancer not found') if load_balancer.status in ImmutableStates: session.rollback() raise ImmutableEntity( 'Cannot get logs from a Load Balancer in a non-ACTIVE ' 'state, current state: {0}'.format(load_balancer.status) ) load_balancer.status = 'PENDING_UPDATE' device = session.query( Device.id, Device.name, Device.status ).join(LoadBalancer.devices).\ filter(LoadBalancer.id == self.lbid).\ first() session.commit() data = { 'deviceid': device.id } if body.objectStoreType != Unset: data['objectStoreType'] = body.objectStoreType.lower() else: data['objectStoreType'] = 'swift' if body.objectStoreBasePath != Unset: data['objectStoreBasePath'] = body.objectStoreBasePath else: data['objectStoreBasePath'] = conf.swift.swift_basepath if body.objectStoreEndpoint != Unset: data['objectStoreEndpoint'] = body.objectStoreEndpoint else: data['objectStoreEndpoint'] = '{0}/{1}'.\ format(conf.swift.swift_endpoint.rstrip('/'), tenant_id) if body.authToken != Unset: data['authToken'] = body.authToken else: data['authToken'] = request.headers.get('X-Auth-Token') submit_job( 'ARCHIVE', device.name, data, self.lbid ) return
def delete(self): """Remove a load balancer from the account. :param load_balancer_id: id of lb Urls: DELETE /loadbalancers/{load_balancer_id} Notes: curl -i -H "Accept: application/json" -X DELETE http://dev.server:8080/loadbalancers/1 Returns: None """ load_balancer_id = self.lbid tenant_id = get_limited_to_project(request.headers) # grab the lb with db_session() as session: lb = session.query(LoadBalancer).\ filter(LoadBalancer.id == load_balancer_id).\ filter(LoadBalancer.tenantid == tenant_id).\ filter(LoadBalancer.status != 'DELETED').first() if lb is None: session.rollback() raise NotFound("Load Balancer ID is not valid") # So we can delete ERROR, but not other Immutable states if lb.status in ImmutableStatesNoError: session.rollback() raise ImmutableEntity( 'Cannot delete a Load Balancer in a non-ACTIVE state' ', current state: {0}'.format(lb.status)) lb.status = 'PENDING_DELETE' device = session.query( Device.id, Device.name ).join(LoadBalancer.devices).\ filter(LoadBalancer.id == load_balancer_id).\ first() counter = session.query(Counters).\ filter(Counters.name == 'api_loadbalancers_delete').first() counter.value += 1 if device is None: # This can happen if a device was manually deleted from the DB lb.status = 'DELETED' session.execute(loadbalancers_devices.delete().where( loadbalancers_devices.c.loadbalancer == lb.id)) session.query(Node).\ filter(Node.lbid == lb.id).delete() session.query(HealthMonitor).\ filter(HealthMonitor.lbid == lb.id).delete() session.commit() else: session.commit() submit_job('DELETE', device.name, device.id, lb.id) return None
def get(self): if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function") with db_session() as session: counters = session.query(Counters.name, Counters.value).all() return counters
def delete(self, device_id): """ Deletes a given device :param device_id: id of device to delete Urls: DELETE /devices/{device_id} Returns: None """ if not tenant_is_admin(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function" ) with db_session() as session: # check for the device device = session.query(Device.id).\ filter(Device.id == device_id).first() if device is None: session.rollback() response.status = 404 return dict( faultcode="Client", faultstring="Device " + device_id + " not found" ) # Is the device is attached to a LB lb = session.query( loadbalancers_devices.c.loadbalancer).\ filter(loadbalancers_devices.c.device == device_id).\ all() if lb: # Rebuild device resp = rebuild_device(device_id) response.status = resp[0] return resp[1] # If we get here there are no load balancers so delete device response.status = 204 try: device = session.query(Device).\ filter(Device.id == device_id).first() device.status = 'DELETED' session.commit() return None except: session.rollback() LOG.exception('Error deleting device from pool') response.status = 500 return dict( faultcode="Server", faultstring="Error deleting device from pool" ) return None
def _update_stats(self, results, failed_list): with db_session() as session: lbs = session.query( LoadBalancer.id, LoadBalancer.protocol, LoadBalancer.status, Device.name ).join(LoadBalancer.devices).\ filter(Device.status == 'ONLINE').all() if lbs is None: session.rollback() LOG.error('No Loadbalancers found when updating stats') return total = len(lbs) added = 0 for lb in lbs: if lb.name not in results: if lb.name not in failed_list: LOG.error( 'No stats results found for Device {0}, LBID {1}' .format(lb.name, lb.id)) continue result = results[lb.name] protocol = lb.protocol.lower() if protocol != "http": # GALERA or TCP = TCP at the worker protocol = "tcp" bytes_out = -1 for data in result["loadBalancers"]: if data["protocol"] == protocol: bytes_out = data["bytes_out"] if bytes_out == -1: LOG.error( 'No stats found for Device {0}, ' 'LBID {1}, protocol {2}' .format(lb.name, lb.id, protocol)) continue new_entry = Stats() new_entry.lbid = lb.id new_entry.period_start = result["utc_start"] new_entry.period_end = result["utc_end"] new_entry.bytes_out = bytes_out new_entry.status = lb.status session.add(new_entry) session.flush added += 1 session.commit() LOG.info( '{total} loadbalancers stats queried, {fail} failed' .format(total=total, fail=total - added))
def _add_vip(self, data): LOG.info('Adding vip {0} to DB'.format(data['ip'])) vip = Vip() vip.ip = int(ipaddress.IPv4Address(unicode(data['ip']))) with db_session() as session: session.add(vip) counter = session.query(Counters).\ filter(Counters.name == 'vips_built').first() counter.value += 1 session.commit()
def get(self): if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function" ) with db_session() as session: counters = session.query(Counters.name, Counters.value).all() return counters
def _exec_stats(self): failed = 0 node_list = [] with db_session() as session: delta = datetime.timedelta(minutes=self.stats_freq) exp = timeutils.utcnow() - delta exp_time = exp.strftime('%Y-%m-%d %H:%M:%S') updated = session.query( Billing.last_update ).filter(Billing.name == "stats").\ filter(Billing.last_update > exp_time).\ first() if updated is not None: # Not time yet LOG.info('Not time to gather stats yet {0}'.format(exp_time)) session.rollback() return 0, 0 # Update the stats timestamp session.query(Billing).\ filter(Billing.name == "stats").\ update({"last_update": func.now()}, synchronize_session='fetch') # Get all the online devices to query for stats devices = session.query( Device.id, Device.name).filter(Device.status == 'ONLINE').all() if devices is None or len(devices) == 0: LOG.error('No ONLINE devices to gather usage stats from') session.rollback() return 0, 0 total = len(devices) for device in devices: node_list.append(device.name) gearman = GearJobs() failed_list, results = gearman.get_stats(node_list) failed = len(failed_list) if failed > 0: self._send_fails(failed_list) if total > failed: # We have some success self._update_stats(results, failed_list) session.commit() else: # Everything failed. Retry these on the next timer firing session.rollback() return failed, total
def _update_stats(self, results, failed_list): with db_session() as session: lbs = session.query( LoadBalancer.id, LoadBalancer.protocol, LoadBalancer.status, Device.name ).join(LoadBalancer.devices).\ filter(Device.status == 'ONLINE').all() if lbs is None: session.rollback() LOG.error('No Loadbalancers found when updating stats') return total = len(lbs) added = 0 for lb in lbs: if lb.name not in results: if lb.name not in failed_list: LOG.error( 'No stats results found for Device {0}, LBID {1}'. format(lb.name, lb.id)) continue result = results[lb.name] protocol = lb.protocol.lower() if protocol != "http": # GALERA or TCP = TCP at the worker protocol = "tcp" bytes_out = -1 for data in result["loadBalancers"]: if data["protocol"] == protocol: bytes_out = data["bytes_out"] if bytes_out == -1: LOG.error('No stats found for Device {0}, ' 'LBID {1}, protocol {2}'.format( lb.name, lb.id, protocol)) continue new_entry = Stats() new_entry.lbid = lb.id new_entry.period_start = result["utc_start"] new_entry.period_end = result["utc_end"] new_entry.bytes_out = bytes_out new_entry.status = lb.status session.add(new_entry) session.flush added += 1 session.commit() LOG.info( '{total} loadbalancers stats queried, {fail} failed'.format( total=total, fail=total - added))
def _exec_stats(self): failed = 0 node_list = [] with db_session() as session: delta = datetime.timedelta(minutes=self.stats_freq) exp = timeutils.utcnow() - delta exp_time = exp.strftime('%Y-%m-%d %H:%M:%S') updated = session.query( Billing.last_update ).filter(Billing.name == "stats").\ filter(Billing.last_update > exp_time).\ first() if updated is not None: # Not time yet LOG.info('Not time to gather stats yet {0}'.format(exp_time)) session.rollback() return 0, 0 #Update the stats timestamp session.query(Billing).\ filter(Billing.name == "stats").\ update({"last_update": func.now()}, synchronize_session='fetch') # Get all the online devices to query for stats devices = session.query( Device.id, Device.name ).filter(Device.status == 'ONLINE').all() if devices is None or len(devices) == 0: LOG.error('No ONLINE devices to gather usage stats from') session.rollback() return 0, 0 total = len(devices) for device in devices: node_list.append(device.name) gearman = GearJobs() failed_list, results = gearman.get_stats(node_list) failed = len(failed_list) if failed > 0: self._send_fails(failed_list) if total > failed: # We have some success self._update_stats(results, failed_list) session.commit() else: # Everything failed. Retry these on the next timer firing session.rollback() return failed, total
def delete(self): """Remove a load balancer from the account. :param load_balancer_id: id of lb Urls: DELETE /loadbalancers/{load_balancer_id} Notes: curl -i -H "Accept: application/json" -X DELETE http://dev.server:8080/loadbalancers/1 Returns: None """ load_balancer_id = self.lbid tenant_id = get_limited_to_project(request.headers) # grab the lb with db_session() as session: lb = ( session.query(LoadBalancer) .filter(LoadBalancer.id == load_balancer_id) .filter(LoadBalancer.tenantid == tenant_id) .filter(LoadBalancer.status != "DELETED") .first() ) if lb is None: session.rollback() raise NotFound("Load Balancer ID is not valid") # So we can delete ERROR, but not other Immutable states if lb.status in ImmutableStatesNoError: session.rollback() raise ImmutableEntity( "Cannot delete a Load Balancer in a non-ACTIVE state" ", current state: {0}".format(lb.status) ) lb.status = "PENDING_DELETE" device = ( session.query(Device.id, Device.name) .join(LoadBalancer.devices) .filter(LoadBalancer.id == load_balancer_id) .first() ) if device is None: # This can happen if a device was manually deleted from the DB lb.status = "DELETED" session.execute(loadbalancers_devices.delete().where(loadbalancers_devices.c.loadbalancer == lb.id)) session.query(Node).filter(Node.lbid == lb.id).delete() session.query(HealthMonitor).filter(HealthMonitor.lbid == lb.id).delete() session.commit() else: session.commit() submit_job("DELETE", device.name, device.id, lb.id) return None
def delete(self, device_id): """ Deletes a given device :param device_id: id of device to delete Urls: DELETE /devices/{device_id} Returns: None """ if not tenant_is_admin(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function") with db_session() as session: # check for the device device = session.query(Device.id).\ filter(Device.id == device_id).first() if device is None: session.rollback() response.status = 404 return dict(faultcode="Client", faultstring="Device " + device_id + " not found") # Is the device is attached to a LB lb = session.query( loadbalancers_devices.c.loadbalancer).\ filter(loadbalancers_devices.c.device == device_id).\ all() if lb: # Rebuild device resp = rebuild_device(device_id) response.status = resp[0] return resp[1] # If we get here there are no load balancers so delete device response.status = 204 try: device = session.query(Device).\ filter(Device.id == device_id).first() device.status = 'DELETED' session.commit() return None except: session.rollback() LOG.exception('Error deleting device from pool') response.status = 500 return dict(faultcode="Server", faultstring="Error deleting device from pool") return None
def send_node_change(self, message, lbid, degraded): with db_session() as session: lb = session.query(LoadBalancer).filter(LoadBalancer.id == lbid).first() if lb.status == "ERROR": lb.errmsg = "Load balancer has failed" elif lb.status == "ACTIVE" and degraded: lb.errmsg = "A node on the load balancer has failed" lb.status = "DEGRADED" elif lb.status == "DEGRADED" and not degraded: lb.errmsg = "A node on the load balancer has recovered" lb.status = "ACTIVE" session.commit()
def delete(self): """Remove the health monitor. :param load_balancer_id: id of lb Url: DELETE /loadbalancers/{load_balancer_id}/healthmonitor Returns: void """ if not self.lbid: raise ClientSideError('Load Balancer ID has not been supplied') tenant_id = get_limited_to_project(request.headers) with db_session() as session: query = session.query( LoadBalancer, HealthMonitor ).outerjoin(LoadBalancer.monitors).\ filter(LoadBalancer.tenantid == tenant_id).\ filter(LoadBalancer.id == self.lbid).\ filter(LoadBalancer.status != 'DELETED').\ first() if query is None: session.rollback() raise NotFound("Load Balancer not found") lb, monitor = query if lb is None: session.rollback() raise NotFound("Load Balancer not found") if monitor is not None: session.delete(monitor) session.flush() device = session.query( Device.id, Device.name ).join(LoadBalancer.devices).\ filter(LoadBalancer.id == self.lbid).\ first() counter = session.query(Counters).\ filter(Counters.name == 'api_healthmonitor.delete').first() counter.value += 1 session.commit() submit_job( 'UPDATE', device.name, device.id, self.lbid ) return None
def _send_fails(self, failed_list): with db_session() as session: for device_name in failed_list: data = self._get_lb(device_name, session) if not data: LOG.error('Device {0} has no Loadbalancer attached during ' 'statistics gathering'.format(device_name)) continue LOG.error('Load balancer failed statistics gathering request ' 'ID: {0}\n' 'IP: {1}\n' 'tenant: {2}\n'.format(data.id, data.floatingIpAddr, data.tenantid))
def get_one(self, tenant_id): if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function") ret = {} with db_session() as session: limit = session.query(TenantLimits.loadbalancers).\ filter(TenantLimits.tenantid == tenant_id).scalar() ret['maxLoadBalancers'] = limit session.commit() return ret
def put(self, body=None): """ Updates a device entry in devices table with new status. Also, updates status of loadbalancers using this device with ERROR or ACTIVE and the errmsg field :param - NOTE the _lookup() hack used to get the device id Url: PUT /devices/<device ID> JSON Request Body { "status": <ERROR | ONLINE> "statusDescription": "Error Description" } Returns: None """ if not self.devid: raise ClientSideError('Device ID is required') with db_session() as session: device = session.query(Device).\ filter(Device.id == self.devid).first() if not device: session.rollback() raise ClientSideError('Device ID is not valid') device.status = body.status session.flush() lb_status = 'ACTIVE' if body.status == 'ONLINE' else body.status lb_descr = body.statusDescription # Now find LB's associated with this Device and update their status lbs = session.query( loadbalancers_devices.c.loadbalancer).\ filter(loadbalancers_devices.c.device == self.devid).\ all() for lb in lbs: session.query(LoadBalancer).\ filter(LoadBalancer.id == lb[0]).\ update({"status": lb_status, "errmsg": lb_descr}, synchronize_session='fetch') session.flush() session.commit() return
def _add_node(self, data): LOG.info('Adding device {0} to DB'.format(data['name'])) device = Device() device.name = data['name'] device.publicIpAddr = data['addr'] # TODO: kill this field, make things use publicIpAddr instead device.floatingIpAddr = data['addr'] device.az = data['az'] device.type = data['type'] device.pingCount = 0 device.status = 'OFFLINE' device.created = None with db_session() as session: session.add(device) session.commit()
def get_one(self, tenant_id): if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function" ) ret = {} with db_session() as session: limit = session.query(TenantLimits.loadbalancers).\ filter(TenantLimits.tenantid == tenant_id).scalar() ret['maxLoadBalancers'] = limit session.commit() return ret
def send_node_change(self, message, lbid, degraded): with db_session() as session: lb = session.query(LoadBalancer).\ filter(LoadBalancer.id == lbid).first() if lb.status == 'ERROR': lb.errmsg = "Load balancer has failed" elif lb.status == 'ACTIVE' and degraded: lb.errmsg = "A node on the load balancer has failed" lb.status = 'DEGRADED' elif lb.status == 'DEGRADED' and not degraded: lb.errmsg = "A node on the load balancer has recovered" lb.status = 'ACTIVE' session.commit()
def usage(self): """Reports the device usage statistics for total, taken, and free :param None Url: GET /devices/usage Returns: dict """ with db_session() as session: total = session.query(Device).count() free = session.query(Device).filter(Device.status == 'OFFLINE').\ count() session.commit() response.status = 200 return dict(total=total, free=free, taken=total - free)
def get(self): protocols = [] with db_session() as session: ports = session.query(Ports.protocol, Ports.portnum).\ filter(Ports.enabled == 1).all() for item in ports: data = {} item = item._asdict() data["name"] = item["protocol"] data["port"] = item["portnum"] protocols.append(data) resp = {"protocols": protocols} session.rollback() return resp
def delete(self): """Remove the health monitor. :param load_balancer_id: id of lb Url: DELETE /loadbalancers/{load_balancer_id}/healthmonitor Returns: void """ if not self.lbid: raise ClientSideError('Load Balancer ID has not been supplied') tenant_id = get_limited_to_project(request.headers) with db_session() as session: query = session.query( LoadBalancer, HealthMonitor ).outerjoin(LoadBalancer.monitors).\ filter(LoadBalancer.tenantid == tenant_id).\ filter(LoadBalancer.id == self.lbid).\ filter(LoadBalancer.status != 'DELETED').\ first() if query is None: session.rollback() raise NotFound("Load Balancer not found") lb, monitor = query if lb is None: session.rollback() raise NotFound("Load Balancer not found") if monitor is not None: session.delete(monitor) session.flush() device = session.query( Device.id, Device.name ).join(LoadBalancer.devices).\ filter(LoadBalancer.id == self.lbid).\ first() counter = session.query(Counters).\ filter(Counters.name == 'api_healthmonitor.delete').first() counter.value += 1 session.commit() submit_job('UPDATE', device.name, device.id, self.lbid) return None
def tenant_is_type(headers, tenant_types): """ Check the tenant ID is a user of the Admin API and allowed to use the API command specified """ tenant_id = get_limited_to_project(headers) if not tenant_id: return False with db_session() as session: is_auth = session.query(AdminAuth).\ filter(AdminAuth.tenant_id == tenant_id).\ filter(AdminAuth.level.in_(tenant_types)).count() if is_auth > 0: session.commit() return True session.commit() return False
def post(self): """ Add a new user to the Admin API """ if not tenant_is_admin(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function" ) try: data = json.loads(request.body) except: response.status = 400 return dict( faultcode="Client", faultstring="Invalid JSON received" ) if data['tenant'] is None: response.status = 400 return dict( faultcode="Client", faultstring="Tenant ID required" ) tenant_id = data['tenant'] if 'level' not in data: level = 'USER' elif data['level'] not in ['USER', 'ADMIN']: response.status = 400 return dict( faultcode="Client", faultstring="Only USER or ADMIN levels allowed" ) else: level = data['level'] with db_session() as session: user_test = session.query(AdminAuth).\ filter(AdminAuth.tenant_id == tenant_id).count() if user_test > 0: response.status = 400 return dict( faultcode="Client", faultstring="Tenant already has an account" ) user = AdminAuth() user.tenant_id = tenant_id user.level = level session.add(user) session.commit()
def put(self, tenant_id=None): if not tenant_is_admin(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function" ) try: data = json.loads(request.body) except: response.status = 400 return dict( faultcode="Client", faultstring="Invalid JSON received" ) with db_session() as session: if tenant_id is None: for key, value in data.iteritems(): limit = session.query(Limits).filter(Limits.name == key).\ first() if limit is None: session.rollback() response.status = 400 return dict( faultcode="Client", faultstring="Limit not found: {0}".format(key) ) limit.value = value else: if 'maxLoadBalancers' in data: limit = session.query(TenantLimits).\ filter(TenantLimits.tenantid == tenant_id).first() if limit is not None: limit.loadbalancers = data['maxLoadBalancers'] else: new_limit = TenantLimits() new_limit.tenantid = tenant_id new_limit.loadbalancers = data['maxLoadBalancers'] session.add(new_limit) else: session.rollback() response.status = 400 return dict( faultcode="Client", faultstring="No user settable limit in json" ) session.commit()
def get_all(self): if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function") ret = {} with db_session() as session: limits = session.query(Limits.name, Limits.value).all() if limits is None: response.status = 500 return dict(faultcode="Server", faultstring="Error obtaining limits") for limit in limits: ret[limit.name] = limit.value session.commit() return ret