def _exec_ping(self): pings = 0 failed = 0 node_list = [] LOG.info('Running ping check') with db_session() as session: devices = session.query( Device.id, Device.name ).filter(Device.status == 'ONLINE').all() pings = len(devices) if pings == 0: LOG.info('No LBs to ping') return (0, 0) for lb in devices: node_list.append(lb.name) gearman = GearJobs() failed_lbs, node_status = gearman.send_pings(node_list) failed = len(failed_lbs) if failed > self.error_limit: LOG.error( 'Too many simultaneous Load Balancer Failures.' ' Aborting recovery attempt' ) return pings, failed if failed > 0: self._send_fails(failed_lbs) # Process node status after lb status self._update_nodes(node_status) session.commit() return pings, failed
def discover(self, device_id): """ Discovers information about a given libra worker based on device ID """ if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function" ) with db_session() as session: device = session.query(Device.name).\ filter(Device.id == device_id).scalar() device_name = str(device) session.commit() if device_name is None: response.status = 404 return dict( faultcode="Client", faultstring="Device " + device_id + " not found" ) gearman = GearJobs() discover = gearman.get_discover(device_name) if discover is None: response.status = 500 return dict( faultcode="Server", faultstring="Could not discover device" ) return dict( id=device_id, version=discover['version'], release=discover['release'] )
def _exec_ping(self): pings = 0 failed = 0 node_list = [] LOG.info('Running ping check') with db_session() as session: devices = session.query( Device.id, Device.name).filter(Device.status == 'ONLINE').all() pings = len(devices) if pings == 0: LOG.info('No LBs to ping') return (0, 0) for lb in devices: node_list.append(lb.name) gearman = GearJobs() failed_lbs, node_status = gearman.send_pings(node_list) failed = len(failed_lbs) if failed > self.error_limit: LOG.error('Too many simultaneous Load Balancer Failures.' ' Aborting recovery attempt') return pings, failed if failed > 0: self._send_fails(failed_lbs) # Process node status after lb status self._update_nodes(node_status) session.commit() return pings, failed
def discover(self, device_id): """ Discovers information about a given libra worker based on device ID """ if not tenant_is_user(request.headers): response.status = 401 return dict( faultcode="Client", faultstring="Client not authorized to access this function") with db_session() as session: device = session.query(Device.name).\ filter(Device.id == device_id).scalar() device_name = str(device) session.commit() if device_name is None: response.status = 404 return dict(faultcode="Client", faultstring="Device " + device_id + " not found") gearman = GearJobs() discover = gearman.get_discover(device_name) if discover is None: response.status = 500 return dict(faultcode="Server", faultstring="Could not discover device") return dict(id=device_id, version=discover['version'], release=discover['release'])
def _exec_stats(self): failed = 0 node_list = [] with db_session() as session: delta = datetime.timedelta(minutes=self.stats_freq) exp = timeutils.utcnow() - delta exp_time = exp.strftime('%Y-%m-%d %H:%M:%S') updated = session.query( Billing.last_update ).filter(Billing.name == "stats").\ filter(Billing.last_update > exp_time).\ first() if updated is not None: # Not time yet LOG.info('Not time to gather stats yet {0}'.format(exp_time)) session.rollback() return 0, 0 #Update the stats timestamp session.query(Billing).\ filter(Billing.name == "stats").\ update({"last_update": func.now()}, synchronize_session='fetch') # Get all the online devices to query for stats devices = session.query( Device.id, Device.name ).filter(Device.status == 'ONLINE').all() if devices is None or len(devices) == 0: LOG.error('No ONLINE devices to gather usage stats from') session.rollback() return 0, 0 total = len(devices) for device in devices: node_list.append(device.name) gearman = GearJobs() failed_list, results = gearman.get_stats(node_list) failed = len(failed_list) if failed > 0: self._send_fails(failed_list) if total > failed: # We have some success self._update_stats(results, failed_list) session.commit() else: # Everything failed. Retry these on the next timer firing session.rollback() return failed, total
def _exec_stats(self): failed = 0 node_list = [] with db_session() as session: delta = datetime.timedelta(minutes=self.stats_freq) exp = timeutils.utcnow() - delta exp_time = exp.strftime('%Y-%m-%d %H:%M:%S') updated = session.query( Billing.last_update ).filter(Billing.name == "stats").\ filter(Billing.last_update > exp_time).\ first() if updated is not None: # Not time yet LOG.info('Not time to gather stats yet {0}'.format(exp_time)) session.rollback() return 0, 0 # Update the stats timestamp session.query(Billing).\ filter(Billing.name == "stats").\ update({"last_update": func.now()}, synchronize_session='fetch') # Get all the online devices to query for stats devices = session.query( Device.id, Device.name).filter(Device.status == 'ONLINE').all() if devices is None or len(devices) == 0: LOG.error('No ONLINE devices to gather usage stats from') session.rollback() return 0, 0 total = len(devices) for device in devices: node_list.append(device.name) gearman = GearJobs() failed_list, results = gearman.get_stats(node_list) failed = len(failed_list) if failed > 0: self._send_fails(failed_list) if total > failed: # We have some success self._update_stats(results, failed_list) session.commit() else: # Everything failed. Retry these on the next timer firing session.rollback() return failed, total
def _exec_offline_check(self): tested = 0 failed = 0 node_list = [] LOG.info('Running OFFLINE check') with db_session() as session: # Join to ensure device is in-use devices = session.query( Device.id, Device.name ).filter(Device.status == 'OFFLINE').all() tested = len(devices) if tested == 0: LOG.info('No OFFLINE Load Balancers to check') return (0, 0) for lb in devices: node_list.append(lb.name) gearman = GearJobs() failed_lbs = gearman.offline_check(node_list) failed = len(failed_lbs) if failed > self.error_limit: LOG.error( 'Too many simultaneous Load Balancer Failures.' ' Aborting deletion attempt' ) return tested, failed if failed > 0: self._send_delete(failed_lbs) # Clear the ping counts for all devices not in # the failed list succeeded = list(set(node_list) - set(failed_lbs)) session.query(Device.name, Device.pingCount).\ filter(Device.name.in_(succeeded)).\ update({"pingCount": 0}, synchronize_session='fetch') session.commit() return tested, failed