def get_free_ip_addrs(self): networks = list(self.get_network_ip_addrs()) if not networks: raise DatasetError('No networks') # Index host and network addresses separately used_hosts = set() used_networks = list() for obj in type(self)( { 'intern_ip': Any(*(ContainedOnlyBy(n) for n in networks)), }, ['intern_ip']): addr = obj['intern_ip'] if isinstance(addr, (IPv4Address, IPv6Address)): used_hosts.add(addr) else: assert isinstance(addr, (IPv4Network, IPv6Network)) used_networks.append(addr) # Now, we are ready to return. for network in networks: for host in network.hosts(): for other_network in used_networks: if host in other_network: break else: if host not in used_hosts: yield host
def choose_ip_addr(request): if 'network' not in request.GET: servers = list( Query( {'servertype': 'route_network'}, ['hostname', 'intern_ip'], ['hostname'], )) return TemplateResponse(request, 'servershell/choose_ip_addr.html', {'servers': servers}) network = request.GET['network'] servers = list( Query( { 'servertype': Any(*(s.servertype_id for s in Servertype.objects.filter( ip_addr_type='network'))), 'intern_ip': ContainedOnlyBy(network), }, ['hostname', 'intern_ip'], ['hostname'], )) if servers: return TemplateResponse(request, 'servershell/choose_ip_addr.html', {'servers': servers}) network_query = Query({'intern_ip': network}, ['intern_ip']) return TemplateResponse( request, 'servershell/choose_ip_addr.html', {'ip_addrs': islice(network_query.get_free_ip_addrs(), 1000)})
def setUp(self): self.hardware_models = set([ x['hardware_model'] for x in Query({ 'servertype': 'hypervisor', 'project': 'ndco', 'state': Any('online', 'online_reserved'), }, ['hardware_model']) ])
def test_filter_any(self): hostnames = set() for s in Query({'hostname': Any('test1', 'test3')}): hostnames.add(s['hostname']) self.assertNotIn('test0', hostnames) self.assertIn('test1', hostnames) self.assertNotIn('test2', hostnames) self.assertIn('test3', hostnames)
def _get_vm(hostname, unlock=True, allow_retired=False): """Get a server from Serveradmin by hostname to return VM object The function is accepting hostnames in any length as long as it resolves to a single server on Serveradmin. """ object_id = Query({ 'hostname': Any(hostname, StartsWith(hostname + '.')), 'servertype': 'vm', }, ['object_id']).get()['object_id'] def vm_query(): return Query({ 'object_id': object_id, }, VM_ATTRIBUTES).get() dataset_obj = vm_query() hypervisor = None if dataset_obj['hypervisor']: hypervisor = Hypervisor(dataset_obj['hypervisor']) # XXX: Ugly hack until adminapi supports modifying joined objects dict.__setitem__( dataset_obj, 'hypervisor', dataset_obj['hypervisor']['hostname'] ) vm = VM(dataset_obj, hypervisor) vm.acquire_lock() try: if not allow_retired and dataset_obj['state'] == 'retired': raise InvalidStateError( 'VM {} is in state retired, I refuse to work on it!'.format( hostname, ) ) yield vm except (Exception, KeyboardInterrupt): VM(vm_query(), hypervisor).release_lock() raise else: # We re-fetch the VM because we can't risk commiting any other changes # to the VM than unlocking. There can be changes from failed things, # like setting memory. # Most operations require unlocking, the only exception is deleting of # a VM. After object is deleted, it can't be unlocked. if unlock: VM(vm_query(), hypervisor).release_lock()
def diff(request: HttpRequest) -> HttpResponse: attrs = request.GET.getlist('attr') objects = request.GET.getlist('object') if not objects or not all([o.isnumeric() for o in objects]): return bad_request(request, HttpResponseBadRequest) # Can raise ApiError for unknown attributes - let it flow ... qs = Query({'object_id': Any(*objects)}, attrs if attrs else None) diff_data = [] for attribute in sorted(set(chain(*[o.keys() for o in qs]))): # object_id is always different and special if attribute == 'object_id': continue # Show hostname only if request by user if attribute == 'hostname' and attrs != [] and attribute not in attrs: continue values = [] for obj in qs: values.append(obj[attribute]) diff_data.append([attribute, values]) # Fetch hostnames if not requested by user to display as header in result. if 'hostname' in attrs: hosts = qs else: hosts = Query({'object_id': Any(*objects)}, ['hostname']) context = { 'hosts': hosts, 'diff_data': diff_data, } return render(request, 'servershell/diff.html', context)
def _override_servertype_filter(filters, servertype_ids): """Override the servertype filter using the possible servertypes""" # If the servertype filter is also on the filters, we can deal with # it ourself. This is an optimization. We could not do this, but # then we wouldn't be able to override the same filter. if 'servertype' in filters: servertype_ids = [ s for s in servertype_ids if filters['servertype'].matches(s) ] # Here we add the servertype filter or override the existing one. filters['servertype'] = Any(*servertype_ids) return servertype_ids
def _get_best_hypervisor(vm, hypervisor_states, offline=False): hypervisors = (Hypervisor(o) for o in Query( { 'servertype': 'hypervisor', 'environment': environ.get('IGVM_MODE', 'production'), 'vlan_networks': vm.dataset_obj['route_network'], 'state': Any(*hypervisor_states), }, HYPERVISOR_ATTRIBUTES)) for hypervisor in sorted_hypervisors(HYPERVISOR_PREFERENCES, vm, hypervisors): # The actual resources are not checked during sorting for performance. # We need to validate the hypervisor using the actual values before # the final decision. try: hypervisor.acquire_lock() except InvalidStateError as error: log.warning(error) continue try: hypervisor.check_vm(vm, offline) except libvirtError as error: hypervisor.release_lock() log.warning('Preferred hypervisor "{}" is skipped: {}'.format( hypervisor, error)) continue except HypervisorError as error: hypervisor.release_lock() log.warning('Preferred hypervisor "{}" is skipped: {}'.format( hypervisor, error)) continue try: yield hypervisor finally: hypervisor.release_lock() break else: raise IGVMError('Cannot find a hypervisor')
def clean_all(route_network, datacenter_type, vm_hostname=None): # Cancelled builds are forcefully killed by Jenkins. They did not have the # opportunity to clean up so we forcibly destroy everything found on any HV # which would interrupt our work in the current JENKINS_EXECUTOR. hvs = [Hypervisor(o) for o in Query({ 'servertype': 'hypervisor', 'environment': 'testing', 'vlan_networks': route_network, 'state': 'online', }, HYPERVISOR_ATTRIBUTES)] # If a VM hostname is given, only that will be cleaned from HVs. if vm_hostname is None: pattern = '^([0-9]+_)?(vm-rename-)?{}$'.format( VM_HOSTNAME_PATTERN.format(JENKINS_EXECUTOR, '[0-9]+'), ) else: pattern = '^([0-9]+_)?(vm-rename-)?{}$'.format(vm_hostname) # Clean HVs one by one. if datacenter_type == 'kvm.dct': for hv in hvs: clean_hv(hv, pattern) if datacenter_type == 'aws.dct': clean_aws(vm_hostname) # Remove all connected Serveradmin objects. clean_serveradmin({'hostname': Regexp(pattern)}) # Try to remove VMs with the same IP in any case because we use custom # logic to assign them and we want to avoid IP address conflicts. # Index 1 is usually used for the test's subject VM, # 2 might be used for testing IP change. ips = [get_next_address(VM_NET, i) for i in [1, 2]] clean_serveradmin({'intern_ip': Any(*ips)})
def index(request): """The hardware resources page""" term = request.GET.get('term', request.session.get('term', '')) collections = list(Collection.objects.filter(overview=True)) # If a graph collection was specified, use it. Otherwise use the first # one. for collection in collections: if request.GET.get('current_collection'): if str(collection.id) != request.GET['current_collection']: continue current_collection = collection break else: return HttpResponseBadRequest('No matching current collection') template_info = { 'search_term': term, 'collections': collections, 'current_collection': current_collection.id, } # TODO: Generalize this part using the relations hostnames = [] matched_hostnames = [] if term: try: query_args = parse_query(term) host_query = Query(query_args, ['hostname', 'hypervisor']) for host in host_query: matched_hostnames.append(host['hostname']) if host.get('hypervisor'): hostnames.append(host['hypervisor']) else: # If it's not guest, it might be a server, so we add it hostnames.append(host['hostname']) understood = repr(host_query) request.session['term'] = term if len(hostnames) == 0: template_info.update({ 'understood': understood, }) return TemplateResponse(request, 'resources/index.html', template_info) except (DatatypeError, ValidationError) as error: template_info.update({'error': str(error)}) return TemplateResponse(request, 'resources/index.html', template_info) else: understood = repr(Query({})) variations = list(current_collection.variation_set.all()) columns = [] attribute_ids = ['hostname', 'servertype'] graph_index = 0 sprite_width = settings.GRAPHITE_SPRITE_WIDTH for template in current_collection.template_set.all(): for variation in variations: columns.append({ 'name': str(template) + ' ' + str(variation), 'type': 'graph', 'graph_index': graph_index, 'sprite_offset': graph_index * sprite_width, }) graph_index += 1 for numeric in current_collection.numeric_set.all(): columns.append({ 'name': str(numeric), 'type': 'numeric', }) attribute_ids.append(numeric.attribute_id) for relation in current_collection.relation_set.all(): columns.append({ 'name': str(relation), 'type': 'relation', }) attribute_ids.append(relation.attribute_id) hosts = OrderedDict() filters = {GRAPHITE_ATTRIBUTE_ID: collection.name} if len(hostnames) > 0: filters['hostname'] = Any(*hostnames) for server in Query(filters, attribute_ids): hosts[server['hostname']] = dict(server) sprite_url = settings.MEDIA_URL + 'graph_sprite/' + collection.name template_info.update({ 'columns': columns, 'hosts': hosts.values(), 'matched_hostnames': matched_hostnames, 'understood': understood, 'error': None, 'sprite_url': sprite_url, }) return TemplateResponse(request, 'resources/index.html', template_info)
def test_not_filter(self): s = Query({'os': Not(Any('squeeze', 'lenny'))}).get() self.assertEqual(s['hostname'], 'test0')
def graph_table(request): """Generate graph table page :param request: :return: """ hostnames = [h.strip() for h in request.GET.getlist('hostname', []) if h] object_ids = [o.strip() for o in request.GET.getlist('object_id', []) if o] if len(hostnames) == 0 and len(object_ids) == 0: return HttpResponseBadRequest('No hostname or object_id provided') # For convenience we will cache the servers in a dictionary. servers = { s['hostname']: s for s in Query({'hostname': Any(*hostnames)}, None) } servers.update({ s['hostname']: s for s in Query({'object_id': Any(*object_ids)}, None) }) if len(servers) != len(hostnames) + len(object_ids): messages.error( request, 'One or more objects with hostname: {} or object_ids: {} does not ' 'exist'.format(','.join(hostnames), ','.join(object_ids))) # Find the collections which are related with all of the hostnames. # If there are two collections with same match, use only the one which # is not an overview. collections = [] for collection in Collection.objects.order_by('overview', 'sort_order'): if any(collection.name == c.name for c in collections): continue for hostname in servers.keys(): if GRAPHITE_ATTRIBUTE_ID not in servers[hostname]: break # The server hasn't got this attribute at all. value = servers[hostname][GRAPHITE_ATTRIBUTE_ID] assert isinstance(value, MultiAttr) if not any(collection.name == v for v in value): break # The server hasn't got this attribute value. else: collections.append(collection) # Prepare the graph descriptions descriptions = [] for collection in collections: for template in collection.template_set.all(): descriptions += ([(template.name, template.description)] * len(servers.keys())) # Prepare the graph tables for all hosts graph_tables = [] for hostname in servers.keys(): graph_table = [] if request.GET.get('action') == 'Submit': custom_params = request.GET.urlencode() for collection in collections: column = collection.graph_column(servers[hostname], custom_params) graph_table += [(k, [('Custom', v)]) for k, v in column] else: for collection in collections: graph_table += collection.graph_table(servers[hostname]) graph_tables.append(graph_table) if len(servers) > 1: # Add hostname to the titles for order, hostname in enumerate(servers.keys()): graph_tables[order] = [(k + ' on ' + hostname, v) for k, v in graph_tables[order]] # Combine them graph_table = [] for combined_tables in zip(*graph_tables): graph_table += list(combined_tables) # One can optionally specify a Grafana dashboard which has a parameter # called SERVER that receives a coded hostname as alternative to the # builtin graphs. grafana_links = [] if hasattr(settings, 'GRAFANA_DASHBOARD'): def _get_grafana_link(hostname): return settings.GRAFANA_DASHBOARD + '?' + urlencode( {'var-SERVER': format_attribute_value(hostname)}) for hostname in servers.keys(): grafana_links.append((hostname, _get_grafana_link(hostname))) return TemplateResponse( request, 'graphite/graph_table.html', { 'hostnames': servers.keys(), 'descriptions': descriptions, 'graph_table': graph_table, 'grafana_links': grafana_links, 'link': request.get_full_path(), 'from': request.GET.get('from', '-24h'), 'until': request.GET.get('until', 'now'), })
def parse_query(term, hostname=None): # NOQA: C901 parsed_args = parse_function_string(term, strict=True) if not parsed_args: return {} # If first token is not a key, we assume that a hostname is meant token, value = parsed_args[0] if token != 'key': if hostname: # We already parsed a hostname, so we don't expect another one raise DatatypeError("Garbled hostname: {0}".format(hostname)) term_parts = term.split(None, 1) if len(term_parts) == 2: hostname_part, remaining_part = term_parts query_args = parse_query(remaining_part, hostname_part) else: hostname_part = term query_args = {} if any(x in hostname_part for x in _trigger_re_chars): hostname = Regexp(hostname_part) else: hostname = BaseFilter(hostname_part) if 'hostname' in query_args: query_args['hostname'] = Any(query_args['hostname'], hostname) else: query_args['hostname'] = hostname return query_args # Otherwise just parse all attributes query_args = {} stack = [] call_depth = 0 for arg in parsed_args: token, value = arg if token == 'key': if stack: query_args[stack[0][1]] = stack[1][1] stack = [] stack.append(arg) elif token == 'func': # Do not allow functions without preceding key # if they are on top level (e.g. call_depth = 0) if not stack or (call_depth == 0 and stack[-1][0] != 'key'): raise DatatypeError( 'Invalid term: top level function requires ' 'preceding attribute' ) call_depth += 1 stack.append(arg) elif token == 'endfunc': call_depth -= 1 fn_args = [] while True: s_token, s_value = stack.pop() if s_token == 'func': break else: fn_args.append(s_value) fn_args.reverse() for filter_class in filter_classes: if filter_class.__name__.lower() == s_value.lower(): try: instance = filter_class(*fn_args) except TypeError: raise DatatypeError( 'Invalid function args ' + filter_class.__name__ ) break else: raise DatatypeError('Invalid function ' + s_value) stack.append(('instance', instance)) elif token == 'literal': # Do not allow literals without key or function context if not stack or (call_depth == 0 and stack[-1][0] != 'key'): raise DatatypeError( 'Invalid term: Top level literals are not ' 'allowed when attributes are used' ) if call_depth == 0: stack.append(('literal', BaseFilter(value))) else: stack.append(('literal', value)) if stack and stack[0][0] == 'key': if len(stack) != 2: raise DatatypeError( 'Invalid term: Attribute requires one argument' ) query_args[stack[0][1]] = stack[1][1] return query_args
def main(): parser = argparse.ArgumentParser( description='nagios check for long running igvm_locked attribute', ) parser.add_argument( 'monitoring_master', type=str, help='Server which will receive the passive check results') parser.add_argument( '--time-in-minutes', type=int, default=480, help='Time in minutes that a machine can be igvm_locked before being ' 'considered stale') parser.add_argument('-v', action='store_true', dest='verbose', help='Run the check in verbose mode') args = parser.parse_args() if args.verbose: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.ERROR) master = args.monitoring_master max_time = args.time_in_minutes max_hours = int(max_time / 60) max_minutes = int(max_time - max_hours * 60) hosts = Query( { 'servertype': Any('hypervisor', 'vm'), 'no_monitoring': False, 'state': Not('retired') }, ['igvm_locked', 'hostname']) hosts_not_locked = [] hosts_locked = [] for host in hosts: if not host['igvm_locked']: hosts_not_locked.append(host) continue locked_time = datetime.now(timezone.utc) - host['igvm_locked'] if locked_time >= timedelta(minutes=max_time): hosts_locked.append(host) else: hosts_not_locked.append(host) console_out(hosts_locked, max_minutes, max_hours) results = nagios_create(hosts_locked, hosts_not_locked, max_minutes, max_hours) nsca_output = "" exit_code = 0 for result in results: # send_nsca has a maximum input buffer of ~5100 bytes. # We need to split our data in chunks no bigger than 5000 characters, # otherwise Nagios will get partial output and a lot of services won't # get new data if (len(nsca_output) + len(result)) >= 5000: ret = nagios_send(master, nsca_output) if not ret: exit_code = 1 nsca_output = result else: nsca_output += result # This last nagios sending covers the remaining output that wasn't sent # inside the loop. ret = nagios_send(master, nsca_output) if not ret: exit_code = 1 if exit_code: logger.error('Failed to submit NSCA results') exit(exit_code)
def _get_best_hypervisor(vm, hypervisor_states, offline=False): hv_env = environ.get('IGVM_MODE', 'production') # Get all (theoretically) possible HVs sorted by HV preferences hypervisors = (Hypervisor(o) for o in Query( { 'servertype': 'hypervisor', 'environment': hv_env, 'vlan_networks': vm.route_network, 'state': Any(*hypervisor_states), }, HYPERVISOR_ATTRIBUTES)) hypervisors = sorted_hypervisors(HYPERVISOR_PREFERENCES, vm, hypervisors) possible_hvs = OrderedDict() for possible_hv in hypervisors: possible_hvs[str(possible_hv)] = possible_hv # Check all HVs in parallel. This will check live data on those HVs # but without locking them. This allows us to do a real quick first # filtering round. Below follows another one on the filtered HVs only. results = parallel( _check_vm, identifiers=list(possible_hvs.keys()), args=[[possible_hv, vm, offline] for possible_hv in possible_hvs.values()], ) # Remove unsupported HVs from the list for checked_hv, success in results.items(): if not success: possible_hvs.pop(checked_hv) # No supported HV was found not_found_err = IGVMError( 'Cannot find hypervisor matching environment: {}, ' 'states: {}, vlan_network: {}, offline: {}'.format( hv_env, ', '.join(hypervisor_states), vm.route_network, offline, )) if len(possible_hvs) == 0: raise not_found_err # Do another checking iteration, this time with HV locking for possible_hv in possible_hvs.values(): try: possible_hv.acquire_lock() except InvalidStateError as e: log.warning(e) continue if not _check_vm(possible_hv, vm, offline): possible_hv.release_lock() continue try: yield possible_hv break finally: possible_hv.release_lock() else: raise not_found_err
def _get_best_hypervisor( vm, hypervisor_states, offline=False, enforce_vm_env=False, soft_preferences=False, ): hv_filter = { 'servertype': 'hypervisor', 'vlan_networks': vm.route_network, 'state': Any(*hypervisor_states), } # Enforce IGVM_MODE used for tests if 'IGVM_MODE' in environ: hv_filter['environment'] = environ.get('IGVM_MODE') else: if enforce_vm_env: hv_filter['environment'] = vm.dataset_obj['environment'] # Get all (theoretically) possible HVs sorted by HV preferences hypervisors = ( Hypervisor(o) for o in Query(hv_filter, HYPERVISOR_ATTRIBUTES) ) hypervisors = sort_by_preference( vm, HYPERVISOR_PREFERENCES, hypervisors, soft_preferences, ) possible_hvs = OrderedDict() for possible_hv in hypervisors: possible_hvs[str(possible_hv)] = possible_hv # Check all HVs in parallel. This will check live data on those HVs # but without locking them. This allows us to do a real quick first # filtering round. Below follows another one on the filtered HVs only. chunk_size = 10 iterations = math.ceil(len(possible_hvs) / chunk_size) found_hv = None # We are checking HVs in chunks. This will enable us to select HVs early # without looping through all of them if unnecessary. for i in range(iterations): start_idx = i * chunk_size end_idx = start_idx + chunk_size hv_chunk = dict(list(possible_hvs.items())[start_idx:end_idx]) results = parallel( _check_vm, identifiers=list(hv_chunk.keys()), args=[ [possible_hv, vm, offline] for possible_hv in hv_chunk.values() ], workers=chunk_size, ) # Remove unsupported HVs from the list for checked_hv, success in results.items(): if not success: hv_chunk.pop(checked_hv) # Do another checking iteration, this time with HV locking for possible_hv in hv_chunk.values(): try: possible_hv.acquire_lock() except InvalidStateError as e: log.warning(e) continue if not _check_vm(possible_hv, vm, offline): possible_hv.release_lock() continue # HV found found_hv = possible_hv break if found_hv: break if not found_hv: # No supported HV was found raise IGVMError( 'Automatically finding the best Hypervisor failed! ' 'Can not find a suitable hypervisor with the preferences and ' 'the Query: {}'.format(hv_filter)) # Yield the hypervisor locked for working on it try: log.info('Picked {} as destination Hypervisor'.format(str(found_hv))) yield found_hv finally: found_hv.release_lock()
def index(request): """The hardware resources page""" term = request.GET.get('term', request.session.get('term', '')) collections = list(Collection.objects.filter(overview=True)) # If a graph collection was specified, use it. Otherwise use the first one for collection in collections: if request.GET.get('current_collection'): if str(collection.id) != request.GET['current_collection']: continue current_collection = collection break else: return HttpResponseBadRequest('No matching current collection') template_info = { 'search_term': term, 'collections': collections, 'current_collection': current_collection.id, } # TODO: Generalize this part using the relations hostnames = [] matched_hostnames = [] if term: query_args = parse_query(term) # @TODO: This is the slowest part here unfortunately the Query object # does not support pagination yet so there is nothing to speed this # up right now. host_query = Query(query_args, ['hostname', 'hypervisor']) for host in host_query: matched_hostnames.append(host['hostname']) if host.get('hypervisor'): hostnames.append(host['hypervisor']) else: # If it's not guest, it might be a server, so we add it hostnames.append(host['hostname']) understood = repr(host_query) request.session['term'] = term else: understood = repr(Query({})) variations = list(current_collection.variation_set.all()) columns = [] columns_selected = request.GET.getlist( 'columns', request.session.get('resources_columns', [])) request.session['resources_columns'] = columns_selected attribute_ids = ['hostname', 'servertype'] graph_index = 0 sprite_width = settings.GRAPHITE_SPRITE_WIDTH for template in current_collection.template_set.all(): for variation in variations: name = str(template) + ' ' + str(variation) columns.append({ 'name': name, 'type': 'graph', 'graph_index': graph_index, 'sprite_offset': graph_index * sprite_width, 'visible': slugify(name) in columns_selected, }) graph_index += 1 for numeric in current_collection.numeric_set.all(): columns.append({ 'name': str(numeric), 'type': 'numeric', 'visible': slugify(numeric) in columns_selected, }) attribute_ids.append(numeric.attribute_id) for relation in current_collection.relation_set.all(): columns.append({ 'name': str(relation), 'type': 'relation', 'visible': slugify(relation) in columns_selected, }) attribute_ids.append(relation.attribute_id) hosts = OrderedDict() filters = {GRAPHITE_ATTRIBUTE_ID: collection.name} if len(hostnames) > 0: filters['hostname'] = Any(*hostnames) for server in Query(filters, attribute_ids): hosts[server['hostname']] = dict(server) page = abs(int(request.GET.get('page', 1))) per_page = int(request.GET.get( 'per_page', request.session.get('resources_per_page', 8))) # Save settings in session request.session['resources_per_page'] = per_page try: hosts_pager = Paginator(list(hosts.values()), per_page) # Term or data in DB has changed if page > hosts_pager.num_pages: page = 1 hosts_pager = hosts_pager.page(page) except (PageNotAnInteger, EmptyPage): raise SuspiciousOperation('{} is not a valid!'.format(page)) sprite_url = settings.MEDIA_URL + 'graph_sprite/' + collection.name template_info.update({ 'columns': columns, 'hosts': hosts_pager, 'page': page, 'per_page': per_page, 'matched_hostnames': matched_hostnames, 'understood': understood, 'error': None, 'sprite_url': sprite_url, }) return TemplateResponse(request, 'resources/index.html', template_info)