def post(self, hostname: str): """Restore configuration to previous version""" json_data = request.get_json() apply_kwargs = {'hostname': hostname} config = None if not Device.valid_hostname(hostname): return empty_result(status='error', data=f"Invalid hostname specified"), 400 if 'job_id' in json_data: try: job_id = int(json_data['job_id']) except Exception: return empty_result('error', "job_id must be an integer"), 400 else: return empty_result('error', "job_id must be specified"), 400 with sqla_session() as session: try: prev_config_result = Job.get_previous_config(session, hostname, job_id=job_id) failed = prev_config_result['failed'] if not failed and 'config' in prev_config_result: config = prev_config_result['config'] except JobNotFoundError as e: return empty_result('error', str(e)), 404 except InvalidJobError as e: return empty_result('error', str(e)), 500 except Exception as e: return empty_result('error', "Unhandled exception: {}".format(e)), 500 if failed: return empty_result( 'error', "The specified job_id has a failed status"), 400 if not config: return empty_result('error', "No config found in this job"), 500 if 'dry_run' in json_data and isinstance(json_data['dry_run'], bool) \ and not json_data['dry_run']: apply_kwargs['dry_run'] = False else: apply_kwargs['dry_run'] = True apply_kwargs['config'] = config scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:apply_config', when=1, scheduled_by=get_jwt_identity(), kwargs=apply_kwargs, ) res = empty_result(data=f"Scheduled job to restore {hostname}") res['job_id'] = job_id return res, 200
def setUp(self): data_dir = pkg_resources.resource_filename(__name__, 'data') with open(os.path.join(data_dir, 'testdata.yml'), 'r') as f_testdata: self.testdata = yaml.safe_load(f_testdata) scheduler = Scheduler() scheduler.start()
def post(self) -> tuple: """ Download new firmware """ json_data = request.get_json() kwargs = dict() if 'url' not in json_data: return empty_result(status='error', data='Missing parameter url') if 'sha1' not in json_data: return empty_result(status='error', data='Missing parameter sha1') if 'verify_tls' not in json_data: return empty_result(status='error', data='Missing parameter verify_tls') kwargs['url'] = json_data['url'] kwargs['sha1'] = json_data['sha1'] kwargs['verify_tls'] = json_data['verify_tls'] scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.api.firmware:get_firmware', when=1, scheduled_by=get_jwt_identity(), kwargs=kwargs) res = empty_result(data='Scheduled job to download firmware') res['job_id'] = job_id return res
def delete(self, device_id): """ Delete device from ID """ json_data = request.get_json() if json_data and 'factory_default' in json_data: if isinstance(json_data['factory_default'], bool) and json_data['factory_default'] is True: scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.erase:device_erase', when=1, scheduled_by=get_jwt_identity(), kwargs={'device_id': device_id}) return empty_result(data='Scheduled job {} to factory default device'.format(job_id)) else: with sqla_session() as session: dev: Device = session.query(Device).filter(Device.id == device_id).one_or_none() if not dev: return empty_result('error', "Device not found"), 404 try: session.delete(dev) session.commit() except IntegrityError as e: session.rollback() return empty_result( status='error', data="Could not remove device because existing references: {}".format(e)) except Exception as e: session.rollback() return empty_result( status='error', data="Could not remove device: {}".format(e)) return empty_result(status="success", data={"deleted_device": dev.as_dict()}), 200
def get_app(): from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.plugins.pluginmanager import PluginManagerHandler from cnaas_nms.db.session import sqla_session from cnaas_nms.db.joblock import Joblock from cnaas_nms.db.job import Job # If running inside uwsgi, a separate "mule" will run the scheduler try: import uwsgi print("Running inside uwsgi") except (ModuleNotFoundError, ImportError): scheduler = Scheduler() scheduler.start() pmh = PluginManagerHandler() pmh.load_plugins() try: with sqla_session() as session: Joblock.clear_locks(session) except Exception as e: print("Unable to clear old locks from database at startup: {}".format(str(e))) try: with sqla_session() as session: Job.clear_jobs(session) except Exception as e: print("Unable to clear jobs with invalid states: {}".format(str(e))) return app.app
def get_app(): # If running inside uwsgi, a separate "mule" will run the scheduler try: import uwsgi print("Running inside uwsgi") except (ModuleNotFoundError, ImportError): scheduler = Scheduler() scheduler.start() return app.app
def init_access_device(self): scheduler = Scheduler() job_id = scheduler.add_onetime_job( cnaas_nms.confpush.init_device.init_access_device_step1, when=0, kwargs={ 'device_id': self.testdata['init_access_device_id'], 'new_hostname': self.testdata['init_access_new_hostname'] }) print(f"Step1 scheduled as ID { job_id }")
def post(self): json_data = request.get_json() kwargs: dict = {} if 'hostname' in json_data: hostname = str(json_data['hostname']) if not Device.valid_hostname(hostname): return empty_result( status='error', data=f"Hostname '{hostname}' is not a valid hostname"), 400 with sqla_session() as session: dev: Device = session.query(Device).\ filter(Device.hostname == hostname).one_or_none() if not dev or dev.state != DeviceState.MANAGED: return empty_result( status='error', data= f"Hostname '{hostname}' not found or is not a managed device" ), 400 kwargs['hostname'] = hostname what = hostname elif 'device_type' in json_data: if DeviceType.has_name(str(json_data['device_type']).upper()): kwargs['device_type'] = str(json_data['device_type']).upper() else: return empty_result( status='error', data= f"Invalid device type '{json_data['device_type']}' specified" ), 400 what = f"{json_data['device_type']} devices" elif 'all' in json_data and isinstance(json_data['all'], bool) and json_data['all']: what = "all devices" else: return empty_result( status='error', data=f"No devices to synchronize was specified"), 400 if 'dry_run' in json_data and isinstance(json_data['dry_run'], bool) \ and not json_data['dry_run']: kwargs['dry_run'] = False if 'force' in json_data and isinstance(json_data['force'], bool): kwargs['force'] = json_data['force'] scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:sync_devices', when=1, kwargs=kwargs) res = empty_result(data=f"Scheduled job to synchronize {what}") res['job_id'] = job_id return res
def test_add_schedule(self): scheduler = Scheduler() job1 = scheduler.add_onetime_job(testfunc_success, when=1, kwargs={'text': 'success'}) job2 = scheduler.add_onetime_job(testfunc_exception, when=1, kwargs={'text': 'exception'}) assert isinstance(job1, Job) assert isinstance(job2, Job) print(f"Job1 scheduled as ID { job1.id }") print(f"Job2 scheduled as ID { job2.id }")
def delete(self, filename: str) -> dict: """ Remove firmware """ scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.api.firmware:remove_file', when=1, scheduled_by=get_jwt_identity(), kwargs={'filename': filename}) res = empty_result(data='Scheduled job to remove firmware') res['job_id'] = job_id return res
def get(self, filename: str) -> dict: """ Get information about a single firmware """ scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.api.firmware:get_firmware_chksum', when=1, scheduled_by=get_jwt_identity(), kwargs={'filename': filename}) res = empty_result(data='Scheduled job get firmware information') res['job_id'] = job_id return res
def test_add_schedule(self): scheduler = Scheduler() job1_id = scheduler.add_onetime_job(testfunc_success, when=1, scheduled_by='test_user', kwargs={'text': 'success'}) job2_id = scheduler.add_onetime_job(testfunc_exception, when=1, scheduled_by='test_user', kwargs={'text': 'exception'}) assert isinstance(job1_id, str) assert isinstance(job2_id, str) print(f"Job1 scheduled as ID { job1_id }") print(f"Job2 scheduled as ID { job2_id }")
def test_abort_schedule(self): scheduler = Scheduler() job3_id = scheduler.add_onetime_job(testfunc_success, when=600, scheduled_by='test_user', kwargs={'text': 'abort'}) assert isinstance(job3_id, int) print(f"Test job 3 scheduled as ID { job3_id }") scheduler.remove_scheduled_job(job3_id) time.sleep(3) with sqla_session() as session: job3 = session.query(Job).filter(Job.id == job3_id).one_or_none() self.assertIsInstance(job3, Job, "Test job 3 could not be found") self.assertEqual(job3.status, JobStatus.ABORTED, "Test job 3 did not abort") self.assertEqual(job3.result, {'message': 'removed'}, "Test job 3 returned bad status")
def schedule_init_access_device_step2(device_id: int, iteration: int) -> Optional[Job]: max_iterations = 2 if iteration > 0 and iteration < max_iterations: scheduler = Scheduler() next_job = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_access_device_step2', when=(30 * iteration), kwargs={ 'device_id': device_id, 'iteration': iteration + 1 }) return next_job else: return None
def put(self, job_id): json_data = request.get_json() if 'action' not in json_data: return empty_result(status='error', data="Action must be specified"), 400 with sqla_session() as session: job = session.query(Job).filter(Job.id == job_id).one_or_none() if not job: return empty_result( status='error', data="No job with id {} found".format(job_id)), 400 job_status = job.status action = str(json_data['action']).upper() if action == 'ABORT': allowed_jobstates = [JobStatus.SCHEDULED, JobStatus.RUNNING] if job_status not in allowed_jobstates: return empty_result( status='error', data="Job id {} is in state {}, must be {} to abort". format(job_id, job_status, (" or ".join([x.name for x in allowed_jobstates])))), 400 abort_reason = "Aborted via API call" if 'abort_reason' in json_data and isinstance( json_data['abort_reason'], str): abort_reason = json_data['abort_reason'][:255] abort_reason += " (aborted by {})".format(get_jwt_identity()) if job_status == JobStatus.SCHEDULED: scheduler = Scheduler() scheduler.remove_scheduled_job(job_id=job_id, abort_message=abort_reason) time.sleep(2) elif job_status == JobStatus.RUNNING: with sqla_session() as session: job = session.query(Job).filter( Job.id == job_id).one_or_none() job.status = JobStatus.ABORTING with sqla_session() as session: job = session.query(Job).filter(Job.id == job_id).one_or_none() return empty_result(data={"jobs": [job.as_dict()]}) else: return empty_result(status='error', data="Unknown action: {}".format(action)), 400
def post(self, device_id: int): if not isinstance(device_id, int): return empty_result(status='error', data="'device_id' must be an integer"), 400 json_data = request.get_json() if 'hostname' not in json_data: return empty_result( status='error', data="POST data must include new 'hostname'"), 400 else: if not Device.valid_hostname(json_data['hostname']): return empty_result(status='error', data='Provided hostname is not valid'), 400 else: new_hostname = json_data['hostname'] if 'device_type' not in json_data: return empty_result( status='error', data="POST data must include 'device_type'"), 400 else: try: device_type = str(json_data['device_type']).upper() except: return empty_result(status='error', data="'device_type' must be a string"), 400 if not DeviceType.has_name(device_type): return empty_result(status='error', data="Invalid 'device_type' provided"), 400 if device_type == DeviceType.ACCESS.name: scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_access_device_step1', when=1, kwargs={ 'device_id': device_id, 'new_hostname': new_hostname }) res = empty_result( data=f"Scheduled job to initialize device_id { device_id }") res['job_id'] = job_id return res
def schedule_discover_device(ztp_mac: str, dhcp_ip: str, iteration: int, scheduled_by: str) -> Optional[Job]: max_iterations = 3 if 0 < iteration <= max_iterations: scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:discover_device', when=(60 * iteration), scheduled_by=scheduled_by, kwargs={ 'ztp_mac': ztp_mac, 'dhcp_ip': dhcp_ip, 'iteration': iteration }) return next_job_id else: return None
def post(self): """ Start update facts of device(s) """ json_data = request.get_json() kwargs: dict = {} total_count: Optional[int] = None if 'hostname' in json_data: hostname = str(json_data['hostname']) if not Device.valid_hostname(hostname): return empty_result( status='error', data=f"Hostname '{hostname}' is not a valid hostname"), 400 with sqla_session() as session: dev: Device = session.query(Device). \ filter(Device.hostname == hostname).one_or_none() if not dev or (dev.state != DeviceState.MANAGED and dev.state != DeviceState.UNMANAGED): return empty_result( status='error', data= f"Hostname '{hostname}' not found or is in invalid state" ), 400 kwargs['hostname'] = hostname total_count = 1 else: return empty_result( status='error', data="No target to be updated was specified"), 400 scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.update:update_facts', when=1, scheduled_by=get_jwt_identity(), kwargs=kwargs) res = empty_result( data=f"Scheduled job to update facts for {hostname}") res['job_id'] = job_id resp = make_response(json.dumps(res), 200) if total_count: resp.headers['X-Total-Count'] = total_count resp.headers['Content-Type'] = "application/json" return resp
def test_add_schedule(self): scheduler = Scheduler() job1_id = scheduler.add_onetime_job(testfunc_success, when=1, scheduled_by='test_user', kwargs={'text': 'success'}) job2_id = scheduler.add_onetime_job(testfunc_exception, when=1, scheduled_by='test_user', kwargs={'text': 'exception'}) assert isinstance(job1_id, int) assert isinstance(job2_id, int) print(f"Test job 1 scheduled as ID { job1_id }") print(f"Test job 2 scheduled as ID { job2_id }") time.sleep(3) with sqla_session() as session: job1 = session.query(Job).filter(Job.id == job1_id).one_or_none() self.assertIsInstance(job1, Job, "Test job 1 could not be found") self.assertEqual(job1.status, JobStatus.FINISHED, "Test job 1 did not finish") self.assertEqual(job1.result, {'status': 'success'}, "Test job 1 returned bad status") job2 = session.query(Job).filter(Job.id == job2_id).one_or_none() self.assertIsInstance(job2, Job, "Test job 2 could not be found") self.assertEqual(job2.status, JobStatus.EXCEPTION, "Test job 2 did not make exception") self.assertIn("message", job2.exception, "Test job 2 did not contain message in exception")
def post(self, hostname: str): """Apply exact specified configuration to device without using templates""" json_data = request.get_json() apply_kwargs = {'hostname': hostname} allow_live_run = get_apidata()['allow_apply_config_liverun'] if not Device.valid_hostname(hostname): return empty_result( status='error', data=f"Invalid hostname specified" ), 400 if 'full_config' not in json_data: return empty_result('error', "full_config must be specified"), 400 if 'dry_run' in json_data and isinstance(json_data['dry_run'], bool) \ and not json_data['dry_run']: if allow_live_run: apply_kwargs['dry_run'] = False else: return empty_result('error', "Apply config live_run is not allowed"), 400 else: apply_kwargs['dry_run'] = True apply_kwargs['config'] = json_data['full_config'] scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:apply_config', when=1, scheduled_by=get_jwt_identity(), kwargs=apply_kwargs, ) res = empty_result(data=f"Scheduled job to apply config {hostname}") res['job_id'] = job_id return res, 200
def main_loop(): try: import uwsgi except Exception as e: logger.exception("Mule not running in uwsgi, exiting: {}".format(str(e))) print("Error, not running in uwsgi") return print("Running scheduler in uwsgi mule") scheduler = Scheduler() scheduler.start() pmh = PluginManagerHandler() pmh.load_plugins() try: with sqla_session() as session: Joblock.clear_locks(session) except Exception as e: logger.exception("Unable to clear old locks from database at startup: {}".format(str(e))) while True: mule_data = uwsgi.mule_get_msg() data: dict = json.loads(mule_data) action = "add" if 'scheduler_action' in data: if data['scheduler_action'] == "remove": action = "remove" if 'when' in data and isinstance(data['when'], int): data['run_date'] = datetime.datetime.utcnow() + datetime.timedelta(seconds=data['when']) del data['when'] kwargs = {} for k, v in data.items(): if k not in ['func', 'trigger', 'id', 'run_date', 'scheduler_action']: kwargs[k] = v # Perform pre-schedule job checks try: if action == "add" and not pre_schedule_checks(scheduler, kwargs): continue except Exception as e: logger.exception("Unable to perform pre-schedule job checks: {}".format(e)) if action == "add": scheduler.add_local_job(data['func'], trigger=data['trigger'], kwargs=kwargs, id=data['id'], run_date=data['run_date'], name=data['func']) elif action == "remove": scheduler.remove_local_job(data['id'])
def tearDown(self): scheduler = Scheduler() ap_scheduler = scheduler.get_scheduler() time.sleep(1) for i in range(1, 11): num_scheduled_jobs = len(ap_scheduler.get_jobs()) num_running_jobs = len(Jobtracker.get_running_jobs()) print("Number of jobs scheduled: {}, number of jobs running: {}".\ format(num_scheduled_jobs, num_running_jobs)) if num_scheduled_jobs > 0 or num_running_jobs > 0: print("Scheduled jobs still in queue: ") ap_scheduler.print_jobs() print("Sleeping 10 seconds") time.sleep(10) else: print("Shutting down scheduler") scheduler.shutdown() return scheduler.shutdown()
def main_loop(): try: import uwsgi except: print("Error, not running in uwsgi") return print("Running scheduler in uwsgi mule") scheduler = Scheduler() scheduler.start() while True: mule_data = uwsgi.mule_get_msg() data: dict = json.loads(mule_data) if data['when'] and isinstance(data['when'], int): data['run_date'] = datetime.datetime.utcnow() + datetime.timedelta(seconds=data['when']) del data['when'] kwargs = {} for k, v in data.items(): if k not in ['func', 'trigger', 'id', 'run_date']: kwargs[k] = v scheduler.add_job(data['func'], trigger=data['trigger'], kwargs=kwargs, id=data['id'], run_date=data['run_date'])
def post(self): """ Start sync of device(s) """ json_data = request.get_json() # default args kwargs: dict = { 'dry_run': True, 'auto_push': False, 'force': False, 'resync': False } if 'dry_run' in json_data and isinstance(json_data['dry_run'], bool) \ and not json_data['dry_run']: kwargs['dry_run'] = False if 'force' in json_data and isinstance(json_data['force'], bool): kwargs['force'] = json_data['force'] if 'auto_push' in json_data and isinstance(json_data['auto_push'], bool): kwargs['auto_push'] = json_data['auto_push'] if 'resync' in json_data and isinstance(json_data['resync'], bool): kwargs['resync'] = json_data['resync'] if 'comment' in json_data and isinstance(json_data['comment'], str): kwargs['job_comment'] = json_data['comment'] if 'ticket_ref' in json_data and isinstance(json_data['ticket_ref'], str): kwargs['job_ticket_ref'] = json_data['ticket_ref'] total_count: Optional[int] = None nr = cnaas_init() if 'hostname' in json_data: hostname = str(json_data['hostname']) if not Device.valid_hostname(hostname): return empty_result( status='error', data=f"Hostname '{hostname}' is not a valid hostname"), 400 _, total_count, _ = inventory_selector(nr, hostname=hostname) if total_count != 1: return empty_result( status='error', data= f"Hostname '{hostname}' not found or is not a managed device" ), 400 kwargs['hostnames'] = [hostname] what = hostname elif 'device_type' in json_data: devtype_str = str(json_data['device_type']).upper() if DeviceType.has_name(devtype_str): kwargs['device_type'] = devtype_str else: return empty_result( status='error', data= f"Invalid device type '{json_data['device_type']}' specified" ), 400 what = f"{json_data['device_type']} devices" _, total_count, _ = inventory_selector(nr, resync=kwargs['resync'], device_type=devtype_str) elif 'group' in json_data: group_name = str(json_data['group']) if group_name not in get_groups(): return empty_result( status='error', data='Could not find a group with name {}'.format( group_name)) kwargs['group'] = group_name what = 'group {}'.format(group_name) _, total_count, _ = inventory_selector(nr, resync=kwargs['resync'], group=group_name) elif 'all' in json_data and isinstance(json_data['all'], bool) and json_data['all']: what = "all devices" _, total_count, _ = inventory_selector(nr, resync=kwargs['resync']) else: return empty_result( status='error', data=f"No devices to synchronize were specified"), 400 scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:sync_devices', when=1, scheduled_by=get_jwt_identity(), kwargs=kwargs) res = empty_result(data=f"Scheduled job to synchronize {what}") res['job_id'] = job_id resp = make_response(json.dumps(res), 200) if total_count: resp.headers['X-Total-Count'] = total_count resp.headers['Content-Type'] = "application/json" return resp
def tearDown(self): scheduler = Scheduler() time.sleep(3) scheduler.get_scheduler().print_jobs() print_jobs(2) scheduler.shutdown()
def init_fabric_device_step1( device_id: int, new_hostname: str, device_type: str, neighbors: Optional[List[str]] = [], job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: """Initialize fabric (CORE/DIST) device for management by CNaaS-NMS. Args: device_id: Device to select for initialization new_hostname: Hostname to configure on this device device_type: String representing DeviceType neighbors: Optional list of hostnames of peer devices job_id: job_id provided by scheduler when adding job scheduled_by: Username from JWT. Returns: Nornir result object Raises: DeviceStateException ValueError """ logger = get_logger() if DeviceType.has_name(device_type): devtype = DeviceType[device_type] else: raise ValueError("Invalid 'device_type' provided") if devtype not in [DeviceType.CORE, DeviceType.DIST]: raise ValueError( "Init fabric device requires device type DIST or CORE") with sqla_session() as session: dev = pre_init_checks(session, device_id) # Test update of linknets using LLDP data linknets = update_linknets(session, dev.hostname, devtype, ztp_hostname=new_hostname, dry_run=True) try: verified_neighbors = pre_init_check_neighbors( session, dev, devtype, linknets, neighbors) logger.debug("Found valid neighbors for INIT of {}: {}".format( new_hostname, ", ".join(verified_neighbors))) check_neighbor_sync(session, verified_neighbors) except Exception as e: raise e else: dev.state = DeviceState.INIT dev.device_type = devtype session.commit() # If neighbor check works, commit new linknets # This will also mark neighbors as unsynced linknets = update_linknets(session, dev.hostname, devtype, ztp_hostname=new_hostname, dry_run=False) logger.debug("New linknets for INIT of {} created: {}".format( new_hostname, linknets)) # Select and reserve a new management and infra IP for the device ReservedIP.clean_reservations(session, device=dev) session.commit() mgmt_ip = cnaas_nms.confpush.underlay.find_free_mgmt_lo_ip(session) infra_ip = cnaas_nms.confpush.underlay.find_free_infra_ip(session) reserved_ip = ReservedIP(device=dev, ip=mgmt_ip) session.add(reserved_ip) dev.infra_ip = infra_ip session.commit() mgmt_variables = { 'mgmt_ipif': str(IPv4Interface('{}/32'.format(mgmt_ip))), 'mgmt_prefixlen': 32, 'infra_ipif': str(IPv4Interface('{}/32'.format(infra_ip))), 'infra_ip': str(infra_ip), } device_variables = populate_device_vars(session, dev, new_hostname, devtype) device_variables = {**device_variables, **mgmt_variables} # Update device state dev.hostname = new_hostname session.commit() hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) # TODO: certicate # step2. push management config nrresult = nr_filtered.run(task=push_base_management, device_variables=device_variables, devtype=devtype, job_id=job_id) with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() dev.management_ip = mgmt_ip # Remove the reserved IP since it's now saved in the device database instead reserved_ip = session.query(ReservedIP).filter( ReservedIP.device == dev).one_or_none() if reserved_ip: session.delete(reserved_ip) # Plugin hook, allocated IP try: pmh = PluginManagerHandler() pmh.pm.hook.allocated_ipv4(vrf='mgmt', ipv4_address=str(mgmt_ip), ipv4_network=None, hostname=hostname) except Exception as e: logger.exception( "Error while running plugin hooks for allocated_ipv4: ".format( str(e))) # step3. resync neighbors scheduler = Scheduler() sync_nei_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:sync_devices', when=1, scheduled_by=scheduled_by, kwargs={ 'hostnames': verified_neighbors, 'dry_run': False }) logger.info(f"Scheduled job {sync_nei_job_id} to resynchronize neighbors") # step4. register apscheduler job that continues steps scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_device_step2', when=60, scheduled_by=scheduled_by, kwargs={ 'device_id': device_id, 'iteration': 1 }) logger.info("Init step 2 for {} scheduled as job # {}".format( new_hostname, next_job_id)) return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
def init_access_device_step1( device_id: int, new_hostname: str, mlag_peer_id: Optional[int] = None, mlag_peer_new_hostname: Optional[str] = None, uplink_hostnames_arg: Optional[List[str]] = [], job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: """Initialize access device for management by CNaaS-NMS. If a MLAG/MC-LAG pair is to be configured both mlag_peer_id and mlag_peer_new_hostname must be set. Args: device_id: Device to select for initialization new_hostname: Hostname to configure on this device mlag_peer_id: Device ID of MLAG peer device (optional) mlag_peer_new_hostname: Hostname to configure on peer device (optional) uplink_hostnames_arg: List of hostnames of uplink peer devices (optional) Used when initializing MLAG peer device job_id: job_id provided by scheduler when adding job scheduled_by: Username from JWT. Returns: Nornir result object Raises: DeviceStateException ValueError """ logger = get_logger() with sqla_session() as session: dev = pre_init_checks(session, device_id) # update linknets using LLDP data update_linknets(session, dev.hostname, DeviceType.ACCESS) # If this is the first device in an MLAG pair if mlag_peer_id and mlag_peer_new_hostname: mlag_peer_dev = pre_init_checks(session, mlag_peer_id) update_linknets(session, mlag_peer_dev.hostname, DeviceType.ACCESS) update_interfacedb_worker( session, dev, replace=True, delete_all=False, mlag_peer_hostname=mlag_peer_dev.hostname) update_interfacedb_worker(session, mlag_peer_dev, replace=True, delete_all=False, mlag_peer_hostname=dev.hostname) uplink_hostnames = dev.get_uplink_peer_hostnames(session) uplink_hostnames += mlag_peer_dev.get_uplink_peer_hostnames( session) # check that both devices see the correct MLAG peer pre_init_check_mlag(session, dev, mlag_peer_dev) pre_init_check_mlag(session, mlag_peer_dev, dev) # If this is the second device in an MLAG pair elif uplink_hostnames_arg: uplink_hostnames = uplink_hostnames_arg elif mlag_peer_id or mlag_peer_new_hostname: raise ValueError( "mlag_peer_id and mlag_peer_new_hostname must be specified together" ) # If this device is not part of an MLAG pair else: update_interfacedb_worker(session, dev, replace=True, delete_all=False) uplink_hostnames = dev.get_uplink_peer_hostnames(session) # TODO: check compatability, same dist pair and same ports on dists mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain( session, uplink_hostnames) if not mgmtdomain: raise Exception( "Could not find appropriate management domain for uplink peer devices: {}" .format(uplink_hostnames)) # Select a new management IP for the device ReservedIP.clean_reservations(session, device=dev) session.commit() mgmt_ip = mgmtdomain.find_free_mgmt_ip(session) if not mgmt_ip: raise Exception( "Could not find free management IP for management domain {}/{}" .format(mgmtdomain.id, mgmtdomain.description)) reserved_ip = ReservedIP(device=dev, ip=mgmt_ip) session.add(reserved_ip) # Populate variables for template rendering mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw) mgmt_variables = { 'mgmt_ipif': str( IPv4Interface('{}/{}'.format(mgmt_ip, mgmt_gw_ipif.network.prefixlen))), 'mgmt_ip': str(mgmt_ip), 'mgmt_prefixlen': int(mgmt_gw_ipif.network.prefixlen), 'mgmt_vlan_id': mgmtdomain.vlan, 'mgmt_gw': mgmt_gw_ipif.ip, } device_variables = populate_device_vars(session, dev, new_hostname, DeviceType.ACCESS) device_variables = {**device_variables, **mgmt_variables} # Update device state dev.hostname = new_hostname session.commit() hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) # step2. push management config nrresult = nr_filtered.run(task=push_base_management, device_variables=device_variables, devtype=DeviceType.ACCESS, job_id=job_id) with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() dev.management_ip = device_variables['mgmt_ip'] dev.state = DeviceState.INIT dev.device_type = DeviceType.ACCESS # Remove the reserved IP since it's now saved in the device database instead reserved_ip = session.query(ReservedIP).filter( ReservedIP.device == dev).one_or_none() if reserved_ip: session.delete(reserved_ip) # Plugin hook, allocated IP try: pmh = PluginManagerHandler() pmh.pm.hook.allocated_ipv4(vrf='mgmt', ipv4_address=str(mgmt_ip), ipv4_network=str(mgmt_gw_ipif.network), hostname=hostname) except Exception as e: logger.exception( "Error while running plugin hooks for allocated_ipv4: ".format( str(e))) # step3. register apscheduler job that continues steps if mlag_peer_id and mlag_peer_new_hostname: step2_delay = 30 + 60 + 30 # account for delayed start of peer device plus mgmt timeout else: step2_delay = 30 scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_device_step2', when=step2_delay, scheduled_by=scheduled_by, kwargs={ 'device_id': device_id, 'iteration': 1 }) logger.info("Init step 2 for {} scheduled as job # {}".format( new_hostname, next_job_id)) if mlag_peer_id and mlag_peer_new_hostname: mlag_peer_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_access_device_step1', when=60, scheduled_by=scheduled_by, kwargs={ 'device_id': mlag_peer_id, 'new_hostname': mlag_peer_new_hostname, 'uplink_hostnames_arg': uplink_hostnames, 'scheduled_by': scheduled_by }) logger.info("MLAG peer (id {}) init scheduled as job # {}".format( mlag_peer_id, mlag_peer_job_id)) return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
def post(self): """Update/scan interfaces of device""" json_data = request.get_json() kwargs: dict = { "replace": False, "delete_all": False, "mlag_peer_hostname": None } total_count: Optional[int] = None if 'hostname' in json_data: hostname = str(json_data['hostname']) if not Device.valid_hostname(hostname): return empty_result( status='error', data=f"Hostname '{hostname}' is not a valid hostname"), 400 with sqla_session() as session: dev: Device = session.query(Device). \ filter(Device.hostname == hostname).one_or_none() if not dev or (dev.state != DeviceState.MANAGED and dev.state != DeviceState.UNMANAGED): return empty_result( status='error', data= f"Hostname '{hostname}' not found or is in invalid state" ), 400 if dev.device_type != DeviceType.ACCESS: return empty_result( status='error', data= f"Only devices of type ACCESS has interface database to update" ), 400 kwargs['hostname'] = hostname total_count = 1 else: return empty_result( status='error', data="No target to be updated was specified"), 400 if 'mlag_peer_hostname' in json_data: mlag_peer_hostname = str(json_data['mlag_peer_hostname']) if not Device.valid_hostname(mlag_peer_hostname): return empty_result( status='error', data= f"Hostname '{mlag_peer_hostname}' is not a valid hostname" ), 400 with sqla_session() as session: dev: Device = session.query(Device). \ filter(Device.hostname == mlag_peer_hostname).one_or_none() if not dev or (dev.state != DeviceState.MANAGED and dev.state != DeviceState.UNMANAGED): return empty_result( status='error', data= f"Hostname '{mlag_peer_hostname}' not found or is in invalid state" ), 400 if dev.device_type != DeviceType.ACCESS: return empty_result( status='error', data= f"Only devices of type ACCESS has interface database to update" ), 400 kwargs['mlag_peer_hostname'] = mlag_peer_hostname if 'replace' in json_data and isinstance(json_data['replace'], bool) \ and json_data['replace']: kwargs['replace'] = True if 'delete_all' in json_data and isinstance(json_data['delete_all'], bool) \ and json_data['delete_all']: kwargs['delete_all'] = True scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.update:update_interfacedb', when=1, scheduled_by=get_jwt_identity(), kwargs=kwargs) res = empty_result( data=f"Scheduled job to update interfaces for {hostname}") res['job_id'] = job_id resp = make_response(json.dumps(res), 200) if total_count: resp.headers['X-Total-Count'] = total_count resp.headers['Content-Type'] = "application/json" return resp
def init_access_device_step1(device_id: int, new_hostname: str) -> NornirJobResult: """Initialize access device for management by CNaaS-NMS Args: hostname (str): Hostname of device to initialize Returns: Nornir result object Raises: DeviceStateException """ # Check that we can find device and that it's in the correct state to start init with sqla_session() as session: dev: Device = session.query(Device).filter( Device.id == device_id).one() if dev.state != DeviceState.DISCOVERED: raise DeviceStateException( "Device must be in state DISCOVERED to begin init") old_hostname = dev.hostname # Perform connectivity check nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_old_filtered = nr.filter(name=old_hostname) try: nrresult_old = nr_old_filtered.run(task=networking.napalm_get, getters=["facts"]) except Exception as e: raise ConnectionCheckError( f"Failed to connect to device_id {device_id}: {str(e)}") if nrresult_old.failed: raise ConnectionCheckError( f"Failed to connect to device_id {device_id}") cnaas_nms.confpush.get.update_linknets(old_hostname) uplinks = [] neighbor_hostnames = [] with sqla_session() as session: dev = session.query(Device).filter( Device.hostname == old_hostname).one() for neighbor_d in dev.get_neighbors(session): if neighbor_d.device_type == DeviceType.DIST: local_if = dev.get_link_to_local_ifname(session, neighbor_d) if local_if: uplinks.append({'ifname': local_if}) neighbor_hostnames.append(neighbor_d.hostname) logger.debug("Uplinks for device {} detected: {} neighbor_hostnames: {}".\ format(device_id, uplinks, neighbor_hostnames)) #TODO: check compatability, same dist pair and same ports on dists mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain( session, neighbor_hostnames) if not mgmtdomain: raise Exception( "Could not find appropriate management domain for uplink peer devices: {}" .format(neighbor_hostnames)) mgmt_ip = mgmtdomain.find_free_mgmt_ip(session) if not mgmt_ip: raise Exception( "Could not find free management IP for management domain {}". format(mgmtdomain.id)) mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw) device_variables = { 'mgmt_ipif': IPv4Interface('{}/{}'.format(mgmt_ip, mgmt_gw_ipif.network.prefixlen)), 'uplinks': uplinks, 'mgmt_vlan_id': mgmtdomain.vlan, 'mgmt_gw': mgmt_gw_ipif.ip } dev = session.query(Device).filter(Device.id == device_id).one() dev.state = DeviceState.INIT dev.hostname = new_hostname session.commit() hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) # step2. push management config try: nrresult = nr_filtered.run(task=push_base_management, device_variables=device_variables) except Exception as e: pass # ignore exception, we expect to loose connectivity. # sometimes we get no exception here, but it's saved in result # other times we get socket.timeout, pyeapi.eapilib.ConnectionError or # napalm.base.exceptions.ConnectionException to handle here? if not nrresult.failed: raise #we don't expect success here print_result(nrresult) with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() dev.management_ip = device_variables['mgmt_ipif'].ip # step3. register apscheduler job that continues steps scheduler = Scheduler() next_job = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_access_device_step2', when=0, kwargs={ 'device_id': device_id, 'iteration': 1 }) logger.debug(f"Step 2 scheduled as ID {next_job.id}") return NornirJobResult(nrresult=nrresult, next_job_id=next_job.id)
def sync_devices(hostnames: Optional[List[str]] = None, device_type: Optional[str] = None, group: Optional[str] = None, dry_run: bool = True, force: bool = False, auto_push: bool = False, job_id: Optional[int] = None, scheduled_by: Optional[str] = None, resync: bool = False) -> NornirJobResult: """Synchronize devices to their respective templates. If no arguments are specified then synchronize all devices that are currently out of sync. Args: hostname: Specify a single host by hostname to synchronize device_type: Specify a device type to synchronize group: Specify a group of devices to synchronize dry_run: Don't commit generated config to device force: Commit config even if changes made outside CNaaS will get overwritten auto_push: Automatically do live-run after dry-run if change score is low job_id: job_id provided by scheduler when adding a new job scheduled_by: Username from JWT resync: Re-synchronize a device even if it's marked as synced in the database, a device selected by hostname is always re-synced Returns: NornirJobResult """ logger = get_logger() nr = cnaas_init() dev_count = 0 skipped_hostnames = [] if hostnames: nr_filtered, dev_count, skipped_hostnames = \ inventory_selector(nr, hostname=hostnames) else: if device_type: nr_filtered, dev_count, skipped_hostnames = \ inventory_selector(nr, resync=resync, device_type=device_type) elif group: nr_filtered, dev_count, skipped_hostnames = \ inventory_selector(nr, resync=resync, group=group) else: # all devices nr_filtered, dev_count, skipped_hostnames = \ inventory_selector(nr, resync=resync) if skipped_hostnames: logger.info("Device(s) already synchronized, skipping ({}): {}".format( len(skipped_hostnames), ", ".join(skipped_hostnames) )) device_list = list(nr_filtered.inventory.hosts.keys()) logger.info("Device(s) selected for synchronization ({}): {}".format( dev_count, ", ".join(device_list) )) try: nrresult = nr_filtered.run(task=sync_check_hash, force=force, job_id=job_id) except Exception as e: logger.exception("Exception while checking config hash: {}".format(str(e))) raise e else: if nrresult.failed: # Mark devices as unsynchronized if config hash check failed with sqla_session() as session: session.query(Device).filter(Device.hostname.in_(nrresult.failed_hosts.keys())).\ update({Device.synchronized: False}, synchronize_session=False) raise Exception('Configuration hash check failed for {}'.format( ' '.join(nrresult.failed_hosts.keys()))) if not dry_run: with sqla_session() as session: logger.info("Trying to acquire lock for devices to run syncto job: {}".format(job_id)) if not Joblock.acquire_lock(session, name='devices', job_id=job_id): raise JoblockError("Unable to acquire lock for configuring devices") try: nrresult = nr_filtered.run(task=push_sync_device, dry_run=dry_run, job_id=job_id) except Exception as e: logger.exception("Exception while synchronizing devices: {}".format(str(e))) try: if not dry_run: with sqla_session() as session: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) except Exception: logger.error("Unable to release devices lock after syncto job") return NornirJobResult(nrresult=nrresult) failed_hosts = list(nrresult.failed_hosts.keys()) for hostname in failed_hosts: logger.error("Synchronization of device '{}' failed".format(hostname)) if nrresult.failed: logger.error("Not all devices were successfully synchronized") total_change_score = 1 change_scores = [] changed_hosts = [] unchanged_hosts = [] # calculate change impact score for host, results in nrresult.items(): if len(results) != 3: logger.debug("Unable to calculate change score for failed device {}".format(host)) elif results[2].diff: changed_hosts.append(host) if "change_score" in results[0].host: change_scores.append(results[0].host["change_score"]) logger.debug("Change score for host {}: {:.1f}".format( host, results[0].host["change_score"])) else: unchanged_hosts.append(host) change_scores.append(0) logger.debug("Empty diff for host {}, 0 change score".format( host)) nr_confighash = None if dry_run and force: # update config hash for devices that had an empty diff because local # changes on a device can cause reordering of CLI commands that results # in config hash mismatch even if the calculated diff was empty def include_filter(host, include_list=unchanged_hosts): if host.name in include_list: return True else: return False nr_confighash = nr_filtered.filter(filter_func=include_filter) elif not dry_run: # set new config hash for devices that was successfully updated def exclude_filter(host, exclude_list=failed_hosts+unchanged_hosts): if host.name in exclude_list: return False else: return True nr_confighash = nr_filtered.filter(filter_func=exclude_filter) if nr_confighash: try: nrresult_confighash = nr_confighash.run(task=update_config_hash) except Exception as e: logger.exception("Exception while updating config hashes: {}".format(str(e))) else: if nrresult_confighash.failed: logger.error("Unable to update some config hashes: {}".format( list(nrresult_confighash.failed_hosts.keys()))) # set devices as synchronized if needed with sqla_session() as session: for hostname in changed_hosts: if dry_run: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = False else: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True for hostname in unchanged_hosts: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True if not dry_run: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) if len(device_list) == 0: total_change_score = 0 elif not change_scores or total_change_score >= 100 or failed_hosts: total_change_score = 100 else: # use individual max as total_change_score, range 1-100 total_change_score = max(min(int(max(change_scores) + 0.5), 100), 1) logger.info( "Change impact score: {:.1f} (dry_run: {}, selected devices: {}, changed devices: {})". format(total_change_score, dry_run, len(device_list), len(changed_hosts))) next_job_id = None if auto_push and len(device_list) == 1 and hostnames and dry_run: if not changed_hosts: logger.info("None of the selected host has any changes (diff), skipping auto-push") elif total_change_score < AUTOPUSH_MAX_SCORE: scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:sync_devices', when=0, scheduled_by=scheduled_by, kwargs={'hostnames': hostnames, 'dry_run': False, 'force': force}) logger.info(f"Auto-push scheduled live-run of commit as job id {next_job_id}") else: logger.info( f"Auto-push of config to device {hostnames} failed because change score of " f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id, change_score=total_change_score)