def find_port_name(devices, name): """Search the list *devices* for the end port with *name* and *name* may be a device name in which case the first end port is returned, otherwise it may be device/port. :rtype: :class:`EndPort` :raises rdma.RDMAError: If no matching device is found.""" parts = name.split('/') try: device = devices[parts[0]] except KeyError: raise rdma.RDMAError("RDMA device %r not found." % (name)) if len(parts) == 1: return device.end_ports.first() if len(parts) != 2: raise rdma.RDMAError("Invalid end port specification %r" % (name)) try: idx = int(parts[1]) except ValueError: raise rdma.RDMAError("Invalid end port specification %r" % (name)) try: return device.end_ports[idx] except KeyError: raise rdma.RDMAError("RDMA device %r port %u not found." % (parts[0], idx))
def __init__(self, parent): """*parent* is the owning :class:`rdma.devices.EndPort`.""" rdma.madtransactor.MADTransactor.__init__(self) for I in parent._iterate_services_end_port(SYS_INFINIBAND_MAD, "umad\d+"): rdma.tools.SysFSDevice.__init__(self, parent, I) break else: raise rdma.RDMAError("Unable to open umad device for %s" % (repr(parent))) with open(SYS_INFINIBAND_MAD + "abi_version") as F: self.abi_version = int(F.read().strip()) if self.abi_version < 5: raise rdma.RDMAError( "UMAD ABI version is %u but we need at least 5." % (self.abi_version)) if not self._ioctl_enable_pkey(): raise rdma.RDMAError( "UMAD ABI is not compatible, we need PKey support.") self.sbuf = bytearray(320) fcntl.fcntl( self.dev.fileno(), fcntl.F_SETFL, fcntl.fcntl(self.dev.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK) self._poll = select.poll() self._poll.register(self.dev.fileno(), select.POLLIN) self._agent_cache = {} self._agent_id_dqpn = {} self._tid = int(os.urandom(4).encode("hex"), 16) self.end_port = parent
def _open_dev_from_sysfs(self, sysfs): '''Given the sysfs path to a device node descriptor find that device in /dev/''' name = os.path.basename(sysfs) try: with open(sysfs + "/dev") as F: dev = F.read().strip().split(':') dev = os.makedev(int(dev[0]), int(dev[1])) except IOError: raise rdma.RDMAError( "Unable to open device node for %s, bad dev file?" % (repr(name))) F = self._try_open("/dev/infiniband/%s" % (name), dev) if F: return F F = self._try_open("/dev/%s" % (name), dev) if F: return F for root, dirs, files in os.walk("/dev/"): for I in files: F = self._try_open("%s/%s" % (root, I), dev) if F: return F raise rdma.RDMAError("Unable to open device node for %s" % (repr(name)))
def get_path_smp(self, sched, end_port): """Return a VL15 SMP path to *end_port*. If directed routing is being used then this must be used to get paths. *sched* is an object with an `end_port` attribute.""" assert (end_port == end_port.to_end_port()) if self.paths is not None: path = self.paths.get(end_port) if path is not None: return path if (end_port.LID is None or end_port.LID == 0 or end_port.LID >= IBA.LID_MULTICAST): raise rdma.RDMAError( "Cannot setup a LID routed path to end port %s" % (end_port.portGUID)) path = rdma.path.IBPath(sched.end_port, SLID=sched.end_port.lid, DLID=end_port.LID, dqpn=0, sqpn=0, qkey=IBA.IB_DEFAULT_QP0_QKEY) path._cached_subnet_end_port = end_port if self.paths is not None: self.paths[end_port] = path return path
def sa_path(self): """The path to the SA. This path should only be used for GMPs of class :data:`~rdma.IBA.MAD_SUBNET_ADMIN` and it should never be changed. See IBA 15.4.2.""" try: return self._cached_sa_path except AttributeError: pass try: pkey_idx = self.pkey_index(IBA.PKEY_DEFAULT) except ValueError: try: pkey_idx = self.pkey_index(IBA.PKEY_PARTIAL_DEFAULT) except ValueError: raise rdma.RDMAError("Could not find the SA default PKey") self._cached_sa_path = rdma.path.IBPath( self, DLID=self.sm_lid, SLID=self.lid, SL=self.sm_sl, dqpn=1, sqpn=1, qkey=IBA.IB_DEFAULT_QP1_QKEY, pkey_index=pkey_idx, packet_life_time=self.subnet_timeout) return self._cached_sa_path
def __init__(self, parent, path, depth=16): """*path* is used to set the PKey and QKey for all MADs sent through this interface.""" rdma.madtransactor.MADTransactor.__init__(self) self._tid = int(os.urandom(8).encode("hex"), 16) if isinstance(parent, rdma.devices.EndPort): self._ctx = rdma.get_verbs(parent) self._allocated_ctx = True elif isinstance(parent, ibv.Context): self._ctx = parent self.end_port = path.end_port if path.end_port.parent != self._ctx.node: raise rdma.RDMAError("Cannot connect path %r to verbs %r" % (self._ctx, path)) self._cc = self._ctx.comp_channel() self._cq = self._ctx.cq(2 * depth, self._cc) self._poller = rdma.vtools.CQPoller(self._cq) self._pd = self._ctx.pd() self._pool = rdma.vtools.BufferPool(self._pd, 2 * depth, 256 + 40) self._qp = self._pd.qp(ibv.IBV_QPT_UD, depth, self._cq, depth, self._cq) self._pool.post_recvs(self._qp, min(self._qp.max_recv_wr, depth)) self._recvs = collections.deque() # Adjust the number of buffers so that we can't exceed the send q depth while len(self._pool._buffers) > self._qp.max_send_wr: self._pool._buffers.pop() path = path.copy(sqpn=self._qp.qp_num, sqpsn=self._tid & 0xFFFFFF) self._qp.establish(path) self.qkey = path.qkey self.pkey = path.pkey
def get_node(self, type_, **kwargs): """Return an existing or new :class:`Node` and :class:`Port` instance associated with the end port described by *kwargs*. *kwargs* is the same signature as for :meth:`search_port`. *kwargs* must include enough information to link a :class:`Port` to the :class:`Node`. :rtype: tuple(:class:`Node`, :class:`Port`)""" port = self.search_end_port(**kwargs) if port is None: node = type_() self.all_nodes.add(node) else: node = port.parent if not isinstance(node, type_): if node.__class__ == Node: # This was a temporary node, re-type it appropriately. node.__class__ = type_ if isinstance(node, Switch): port = self._fixup_change_to_switch(node) else: # FIXME: This can happen if the network is messed up, make a # better message. raise rdma.RDMAError("Node changed type.") if isinstance(node, Switch): kwargs["portIdx"] = 0 port = node.get_port(0) if port is None: port = Port(node) self.link_end_port(port, **kwargs) return (node, port)
def enable_sa_capability(self): """Enable the SA capability mask. This returns an instance that supports the context manager protocol that should be closed once the SA is finished.""" for I in self._iterate_services_end_port(SYS_INFINIBAND_MAD,"issm\d+"): return rdma.tools.SysFSDevice(self,I); else: raise rdma.RDMAError("Unable to open issm device for %s"%(repr(parent)));
def find_node_guid(devices, guid): """Search the list *devices* for the device with *guid*. :rtype: :class:`Device` :raises rdma.RDMAError: If no matching device is found.""" for I in devices: if I.node_guid == guid: return I raise rdma.RDMAError("RDMA device %r not found." % (guid))
def set_end_port(self,node): """Set self.end_port to the end port on node that matches the source description in this path""" for I in node.end_ports: for J in I.gids: if J == self.SGID: self.end_port = I; return; raise rdma.RDMAError("RDMA end port %r not found."%(self.SGID));
def find_port_guid(devices, guid): """Search the list *devices* for the end port with *guid*. :rtype: :class:`EndPort` :raises rdma.RDMAError: If no matching device is found.""" for I in devices: for J in I.end_ports: if J.port_guid == guid: return J raise rdma.RDMAError("RDMA end port %r not found." % (guid))
def find_port_gid(devices, gid): """Search the list *devices* for the end port with *gid*. :returns: (:class:`EndPort`,gid_index) :raises rdma.RDMAError: If no matching device is found.""" # The link local prefix should always be valid if gid.prefix() == IBA.GUID(IBA.GID_DEFAULT_PREFIX): return find_port_guid(devices, gid.guid()), gid if gid.guid() == IBA.GUID(0): raise rdma.RDMAError("RDMA end port %r not found." % (gid)) for I in devices: for J in I.end_ports: try: return (J, J.gids.index(gid)) except ValueError: continue raise rdma.RDMAError("RDMA end port %r not found." % (gid))
def SGID_index(self): """Cache and return the index of the SGID for the associated :class:`~rdma.devices.EndPort`. Assignment updates the :attr:`SGID` value.""" try: return self._cached_SGID_index; except AttributeError: pass try: self._cached_SGID_index = self.end_port.gids.index(self.SGID); return self._cached_SGID_index; except ValueError: raise rdma.RDMAError("GID %s not available on %s"%(self.SGID,self.end_port));
def pkey_index(self): """Cache and return the index of the PKey for the associated :class:`~rdma.devices.EndPort`. Assignment updates the :attr:`pkey` value.""" try: return self._cached_pkey_index except AttributeError: pass try: self._cached_pkey_index = self.end_port.pkeys.index(self.pkey) return self._cached_pkey_index except ValueError: raise rdma.RDMAError("PKey 0x%x not available on %s" % (self.pkey, self.end_port))
def sendto(self, buf, path): '''Send a MAD packet. *buf* is the raw MAD to send, starting with the first byte of :class:`rdma.IBA.MADHeader`. *path* is the destination.''' while not self._pool._buffers: self._cq_drain() if not self._pool._buffers: self._cq_sleep(None) if path.qkey != self.qkey or path.pkey != self.pkey: raise rdma.RDMAError( "Destination %r does not match the qkey or pkey of this VMAD instance." % (path)) buf_idx = self._pool.pop() self._pool.copy_to(buf, buf_idx) self._qp.post_send(self._pool.make_send_wr(buf_idx, len(buf), path))
def recvfrom(self, wakeat): '''Receive a MAD packet. If the value of :func:`rdma.tools.clock_monotonic()` exceeds *wakeat* then :class:`None` is returned. :returns: tuple(buf,path)''' buf = bytearray(320) first = True while True: try: rc = self.dev.readinto(buf) except IOError as err: if err.errno == errno.ENOSPC: # Hmm.. Must be RMPP.. Resize the buffer accordingly. rmpp_data2 = struct.unpack_from(">L", bytes(buf), 32) buf = bytearray(min(len(buf) * 2, rmpp_data2)) continue raise if rc is None: if not first: raise IOError(errno.EAGAIN, "Invalid read after poll") if wakeat is None: if not self._poll.poll(-1): return None else: timeout = wakeat - rdma.tools.clock_monotonic() if timeout <= 0 or not self._poll.poll(timeout * 1000): return None first = False continue path = rdma.path.IBPath(self.parent) (path.umad_agent_id, status, timeout_ms, retries, length, path._cached_umad_ah) = self.ib_user_mad_t.unpack_from( bytes(buf), 0) path.dqpn = self._agent_id_dqpn.get(path.umad_agent_id, 0) path.__class__ = LazyIBPath if status != 0: if status == errno.ETIMEDOUT: first = True continue raise rdma.RDMAError("umad send failure code=%d for %s" % (status, repr(buf))) return (buf[64:rc], path)
def advance_dr(self, path, portIdx): """Create a new :class:`~rdma.path.IBDRPath` that goes to the device connected to *port_idx* of *path*.""" # LID route to a HCA followed by DR route after does not work, in the local # host case I think this is a kernel bug, but other cases seem to be as the # spec intends. drPath = getattr(path, "drPath", "\0") + chr(portIdx) if len(drPath) > 64: raise rdma.RDMAError("DR path length limit exceeded, %r" % (drPath)) if (path.DLID == path.end_port.lid and path.DLID != IBA.LID_PERMISSIVE and path.DLID != 0): # Local loopback return rdma.path.IBDRPath(path.end_port, drPath=drPath) else: if isinstance(path, rdma.path.IBDRPath): ret = path.copy(drPath=drPath) else: ret = rdma.path.IBDRPath(path.end_port, SLID=path.SLID, drSLID=path.SLID, DLID=path.DLID, drPath=drPath, retries=path.retries) ep = self.path_to_port(path) if ep is not None and not isinstance(ep.parent, Switch): # If we are DR'ing from a non-CA then the only possible legal # thing is to go back out the same port. Dropping the last entry # from the DR list is the same thing. if len(drPath) >= 3 and ep.port_id == portIdx: ret.drPath = drPath[:-2] else: # Hum, we know this will fail, try and fix it up with our topology # database.. np = self.topology.get(ep.parent.get_port(portIdx)) if np is not None: ret = self.get_path_smp(path, np.to_end_port()) # When we eat the DR path like this it breaks # localPortNum, but since we are going in and out of the # same port we can just record what it should have been # here. ret._cached_subnet_localPortNum = ord(drPath[-2]) return ret
def second(self): self.count = self.count + 1 raise rdma.RDMAError("moo")