def check_network_status(): """ Performs the network status check. """ # Initialize the state of nodes and subnets, remove out of date ap clients and graph items Node.objects.all().update(visible = False) Subnet.objects.all().update(visible = False) Link.objects.all().update(visible = False) APClient.objects.filter(last_update__lt = datetime.now() - timedelta(minutes = 11)).delete() # Reset some states NodeWarning.objects.all().update(source = EventSource.Monitor, dirty = False) Node.objects.all().update(warnings = False, conflicting_subnets = False) # Fetch routing tables from OLSR try: nodes, hna = wifi_utils.get_tables(settings.MONITOR_OLSR_HOST) except TypeError: logging.error("Unable to fetch routing tables from '%s'!" % settings.MONITOR_OLSR_HOST) return # Ping nodes present in the database and visible in OLSR dbNodes = {} nodesToPing = [] for nodeIp in nodes.keys(): try: # Try to get the node from the database n = Node.get_exclusive(ip = nodeIp) n.visible = True n.peers = len(nodes[nodeIp].links) # If we have succeeded, add to list (if not invalid) if not n.is_invalid(): if n.awaiting_renumber: # Reset any status from awaiting renumber to invalid for notice in n.renumber_notices.all(): try: rn = Node.objects.get(ip = notice.original_ip) if rn.status == NodeStatus.AwaitingRenumber: rn.status = NodeStatus.Invalid rn.node_type = NodeType.Unknown rn.awaiting_renumber = False rn.save() except Node.DoesNotExist: pass notice.delete() n.awaiting_renumber = False n.save() nodesToPing.append(nodeIp) else: n.last_seen = datetime.now() n.peers = len(nodes[nodeIp].links) # Create a warning since node is not registered NodeWarning.create(n, WarningCode.UnregisteredNode, EventSource.Monitor) n.save() dbNodes[nodeIp] = n except Node.DoesNotExist: # Node does not exist, create an invalid entry for it n = Node(ip = nodeIp, status = NodeStatus.Invalid, last_seen = datetime.now()) n.visible = True n.node_type = NodeType.Unknown n.peers = len(nodes[nodeIp].links) # Check if there are any renumber notices for this IP address try: notice = RenumberNotice.objects.get(original_ip = nodeIp) n.status = NodeStatus.AwaitingRenumber n.node_type = notice.node.node_type n.awaiting_renumber = True except RenumberNotice.DoesNotExist: pass n.save(force_insert = True) dbNodes[nodeIp] = n # Create an event and append a warning since an unknown node has appeared NodeWarning.create(n, WarningCode.UnregisteredNode, EventSource.Monitor) Event.create_event(n, EventCode.UnknownNodeAppeared, '', EventSource.Monitor) # Add a warning to all nodes that have been stuck in renumbering state for over a week for node in Node.objects.filter(renumber_notices__renumbered_at__lt = datetime.now() - timedelta(days = 7)): NodeWarning.create(node, WarningCode.LongRenumber, EventSource.Monitor) node.save() # Mark invisible nodes as down for node in Node.objects.exclude(status__in = (NodeStatus.Invalid, NodeStatus.AwaitingRenumber)): oldStatus = node.status if node.ip not in dbNodes: if node.status == NodeStatus.New: node.status = NodeStatus.Pending elif node.status != NodeStatus.Pending: node.status = NodeStatus.Down node.save() if oldStatus in (NodeStatus.Up, NodeStatus.Visible, NodeStatus.Duped) and node.status == NodeStatus.Down: Event.create_event(node, EventCode.NodeDown, '', EventSource.Monitor) # Invalidate uptime credit for this node node.uptime_last = None node.save() # Generate timestamp and snapshot identifier timestamp = datetime.now() snapshot_id = int(time.time()) # Setup all node peerings for nodeIp, node in nodes.iteritems(): n = dbNodes[nodeIp] n.redundancy_link = False links = [] # Find old VPN server peers old_vpn_peers = set([p.dst for p in n.get_peers().filter(dst__vpn_server = True)]) for peerIp, lq, ilq, etx, vtime in node.links: try: l = Link.objects.get(src = n, dst = dbNodes[peerIp]) except Link.DoesNotExist: l = Link(src = n, dst = dbNodes[peerIp]) l.lq = float(lq) l.ilq = float(ilq) l.etx = float(etx) l.vtime = vtime l.visible = True l.save() links.append(l) # Check if any of the peers has never peered with us before if n.is_adjacency_important() and l.dst.is_adjacency_important() and not n.peer_history.filter(pk = l.dst.pk).count(): n.peer_history.add(l.dst) Event.create_event(n, EventCode.AdjacencyEstablished, '', EventSource.Monitor, data = 'Peer node: %s' % l.dst, aggregate = False) Event.create_event(l.dst, EventCode.AdjacencyEstablished, '', EventSource.Monitor, data = 'Peer node: %s' % n, aggregate = False) # Check if we have a peering with any VPN servers if l.dst.vpn_server: n.redundancy_link = True if not n.is_invalid(): # Determine new VPN server peers new_vpn_peers = set([p.dst for p in n.get_peers().filter(visible = True, dst__vpn_server = True)]) if old_vpn_peers != new_vpn_peers: for p in old_vpn_peers: if p not in new_vpn_peers: # Redundancy loss has ocurred Event.create_event(n, EventCode.RedundancyLoss, '', EventSource.Monitor, data = 'VPN server: %s' % p) for p in new_vpn_peers: if p not in old_vpn_peers: # Redundancy restoration has ocurred Event.create_event(n, EventCode.RedundancyRestored, '', EventSource.Monitor, data = 'VPN server: %s' % p) # Issue a warning when node requires peering but has none if n.redundancy_req and not n.redundancy_link: NodeWarning.create(n, WarningCode.NoRedundancy, EventSource.Monitor) n.save() # Archive topology information data_archive.record_topology_entry(snapshot_id, timestamp, n, links) # Update valid subnet status in the database for nodeIp, subnets in hna.iteritems(): if nodeIp not in dbNodes: continue for subnet in subnets: subnet, cidr = subnet.split("/") try: s = Subnet.objects.get(node__ip = nodeIp, subnet = subnet, cidr = int(cidr)) s.last_seen = datetime.now() s.visible = True except Subnet.DoesNotExist: s = Subnet(node = dbNodes[nodeIp], subnet = subnet, cidr = int(cidr), last_seen = datetime.now()) s.visible = True s.allocated = False # Save previous subnet status for later use old_status = s.status # Set status accoording to allocation flag if s.allocated: s.status = SubnetStatus.AnnouncedOk else: s.status = SubnetStatus.NotAllocated # Check if this is a more specific prefix announce for an allocated prefix if s.is_more_specific() and not s.allocated: s.status = SubnetStatus.Subset # Check if this is a hijack try: origin = Subnet.objects.ip_filter( # Subnet overlaps with another one ip_subnet__contains = '%s/%s' % (subnet, cidr) ).exclude( # Of another node (= filter all subnets belonging to current node) node = s.node ).get( # That is allocated and visible allocated = True, visible = True ) s.status = SubnetStatus.Hijacked except Subnet.DoesNotExist: pass # Generate an event if status has changed if old_status != s.status and s.status == SubnetStatus.Hijacked: Event.create_event(n, EventCode.SubnetHijacked, '', EventSource.Monitor, data = 'Subnet: %s/%s\n Allocated to: %s' % (s.subnet, s.cidr, origin.node)) # Flag node entry with warnings flag for unregistered announces if not s.is_properly_announced(): if s.node.border_router and not s.is_from_known_pool(): # TODO when we have peering announce registration this should first check if # the subnet is registered as a peering s.status = SubnetStatus.Peering if not s.node.border_router or s.status == SubnetStatus.Hijacked or s.is_from_known_pool(): # Add a warning message for unregistered announced subnets NodeWarning.create(s.node, WarningCode.UnregisteredAnnounce, EventSource.Monitor) s.node.save() s.save() # Detect subnets that cause conflicts and raise warning flags for all involved # nodes if s.is_conflicting(): NodeWarning.create(s.node, WarningCode.AnnounceConflict, EventSource.Monitor) s.node.conflicting_subnets = True s.node.save() for cs in s.get_conflicting_subnets(): NodeWarning.create(cs.node, WarningCode.AnnounceConflict, EventSource.Monitor) cs.node.conflicting_subnets = True cs.node.save() # Remove subnets that were hijacked but are not visible anymore for s in Subnet.objects.filter(status = SubnetStatus.Hijacked, visible = False): Event.create_event(s.node, EventCode.SubnetRestored, '', EventSource.Monitor, data = 'Subnet: %s/%s' % (s.subnet, s.cidr)) s.delete() # Remove (or change their status) subnets that are not visible Subnet.objects.filter(allocated = False, visible = False).delete() Subnet.objects.filter(allocated = True, visible = False).update(status = SubnetStatus.NotAnnounced) for subnet in Subnet.objects.filter(status = SubnetStatus.NotAnnounced, node__visible = True): NodeWarning.create(subnet.node, WarningCode.OwnNotAnnounced, EventSource.Monitor) subnet.node.save() # Remove invisible unknown nodes for node in Node.objects.filter(status = NodeStatus.Invalid, visible = False).all(): # Create an event since an unknown node has disappeared Event.create_event(node, EventCode.UnknownNodeDisappeared, '', EventSource.Monitor) Node.objects.filter(status__in = (NodeStatus.Invalid, NodeStatus.AwaitingRenumber), visible = False).delete() # Remove invisible links Link.objects.filter(visible = False).delete() # Add nodes to topology map and generate output if not getattr(settings, 'MONITOR_DISABLE_GRAPHS', None): # Only generate topology when graphing is not disabled topology = DotTopologyPlotter() for node in dbNodes.values(): topology.addNode(node) topology.save(os.path.join(settings.GRAPH_DIR, 'network_topology.png'), os.path.join(settings.GRAPH_DIR, 'network_topology.dot')) # Ping the nodes to prepare information for later node processing varsize_results = {} results, dupes = wifi_utils.ping_hosts(10, nodesToPing) for packet_size in (100, 500, 1000, 1480): r, d = wifi_utils.ping_hosts(10, nodesToPing, packet_size - 8) for node_ip in nodesToPing: varsize_results.setdefault(node_ip, []).append(r[node_ip][3] if node_ip in r else None) if getattr(settings, 'MONITOR_DISABLE_MULTIPROCESSING', None): # Multiprocessing is disabled (the MONITOR_DISABLE_MULTIPROCESSING option is usually # used for debug purpuses where a single process is prefered) for node_ip in nodesToPing: process_node(node_ip, results.get(node_ip), node_ip in dupes, nodes[node_ip].links, varsize_results.get(node_ip)) # Commit the transaction here since we do everything in the same session transaction.commit() else: # We MUST commit the current transaction here, because we will be processing # some transactions in parallel and must ensure that this transaction that has # modified the nodes is commited. Otherwise this will deadlock! transaction.commit() worker_results = [] for node_ip in nodesToPing: worker_results.append( WORKER_POOL.apply_async(process_node, (node_ip, results.get(node_ip), node_ip in dupes, nodes[node_ip].links, varsize_results.get(node_ip))) ) # Wait for all workers to finish processing objects = {} for result in worker_results: try: k, v = result.get() objects[k] = v except Exception, e: logging.warning(format_exc()) # When GC debugging is enabled make some additional computations if getattr(settings, 'MONITOR_ENABLE_GC_DEBUG', None): global _MAX_GC_OBJCOUNT objcount = sum(objects.values()) if '_MAX_GC_OBJCOUNT' not in globals(): _MAX_GC_OBJCOUNT = objcount logging.debug("GC object count: %d %s" % (objcount, "!M" if objcount > _MAX_GC_OBJCOUNT else "")) _MAX_GC_OBJCOUNT = max(_MAX_GC_OBJCOUNT, objcount)