def _http_body_and_headers(data: bytes) -> Tuple[bytes, Dict[str, str]]: """ Parse the headers and decompressed body from a HTTP response, such as... :: HTTP/1.0 200 OK Date: Mon, 23 Apr 2018 18:43:47 GMT Content-Type: text/plain X-Your-Address-Is: 216.161.254.25 Content-Encoding: identity Expires: Wed, 25 Apr 2018 18:43:47 GMT router dannenberg 193.23.244.244 443 0 80 identity-ed25519 ... rest of the descriptor content... :param data: HTTP response :returns: **tuple** with the decompressed data and headers :raises: * **stem.ProtocolError** if response was unsuccessful or malformed * **ValueError** if encoding is unrecognized * **ImportError** if missing the decompression module """ first_line, data = data.split(b'\r\n', 1) header_data, body_data = data.split(b'\r\n\r\n', 1) if not first_line.startswith(b'HTTP/1.0 2'): raise stem.ProtocolError( "Response should begin with HTTP success, but was '%s'" % str_tools._to_unicode(first_line)) headers = {} for line in str_tools._to_unicode(header_data).splitlines(): if ': ' not in line: raise stem.ProtocolError("'%s' is not a HTTP header:\n\n%s" % (line, header_data.decode('utf-8'))) key, value = line.split(': ', 1) headers[key] = value encoding = headers.get('Content-Encoding') if encoding == 'deflate': return stem.descriptor.Compression.GZIP.decompress( body_data).rstrip(), headers for compression in stem.descriptor.Compression: if encoding == compression.encoding: return compression.decompress(body_data).rstrip(), headers raise ValueError("'%s' is an unrecognized encoding" % encoding)
def is_valid_ipv4_address(address): """ Checks if a string is a valid IPv4 address. :param str address: string to be checked :returns: **True** if input is a valid IPv4 address, **False** otherwise """ if isinstance(address, bytes): address = str_tools._to_unicode(address) elif not isinstance(address, (bytes, str)): return False # checks if theres four period separated values if address.count('.') != 3: return False # checks that each value in the octet are decimal values between 0-255 for entry in address.split('.'): if not entry.isdigit() or int(entry) < 0 or int(entry) > 255: return False elif entry[0] == '0' and len(entry) > 1: return False # leading zeros, for instance in '1.2.3.001' return True
def _match_with(lines, regexes, required=None): """ Scans the given content against a series of regex matchers, providing back a mapping of regexes to their capture groups. This maping is with the value if the regex has just a single capture group, and a tuple otherwise. :param list lines: text to parse :param list regexes: regexes to match against :param list required: matches that must be in the content :returns: **dict** mapping matchers against their capture groups :raises: **ValueError** if a required match is not present """ matches = {} for line in lines: for matcher in regexes: m = matcher.search(str_tools._to_unicode(line)) if m: match_groups = m.groups() matches[matcher] = match_groups if len( match_groups) > 1 else match_groups[0] if required: for required_matcher in required: if required_matcher not in matches: raise ValueError('Failed to parse mandatory data from:\n\n%s' % '\n'.join(lines)) return matches
def from_remote(timeout = 60): """ Reads and parses tor's latest fallback directories `from gitweb.torproject.org <https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc>`_. Note that while convenient, this reliance on GitWeb means you should alway call with a fallback, such as... :: try: fallback_directories = stem.descriptor.remote.from_remote() except IOError: fallback_directories = stem.descriptor.remote.from_cache() :param int timeout: seconds to wait before timing out the request :returns: **dict** of **str** fingerprints to their :class:`~stem.descriptor.remote.FallbackDirectory` :raises: **IOError** if unable to retrieve the fallback directories """ try: fallback_dir_page = str_tools._to_unicode(urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout = timeout).read()) except: exc = sys.exc_info()[1] raise IOError("Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_DIR_URL, exc)) # Example of an entry... # # "5.175.233.86:80 orport=443 id=5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33" # " weight=43680", results = {} for line in fallback_dir_page.splitlines(): if line.startswith('"'): addr_line_match = re.match('"([\d\.]+):(\d+) orport=(\d+) id=([\dA-F]{40}).*', line) if addr_line_match: address, dir_port, or_port, fingerprint = addr_line_match.groups() if not connection.is_valid_ipv4_address(address): raise IOError('%s has an invalid address: %s' % (fingerprint, address)) elif not connection.is_valid_port(or_port): raise IOError('%s has an invalid or_port: %s' % (fingerprint, or_port)) elif not connection.is_valid_port(dir_port): raise IOError('%s has an invalid dir_port: %s' % (fingerprint, dir_port)) elif not tor_tools.is_valid_fingerprint(fingerprint): raise IOError('%s has an invalid fingerprint: %s' % (fingerprint, fingerprint)) results[fingerprint] = FallbackDirectory( address = address, or_port = int(or_port), dir_port = int(dir_port), fingerprint = fingerprint, ) return results
def from_remote(timeout = 60): try: lines = str_tools._to_unicode(urllib.urlopen(GITWEB_AUTHORITY_URL, timeout = timeout).read()).splitlines() except Exception as exc: raise IOError("Unable to download tor's directory authorities from %s: %s" % (GITWEB_AUTHORITY_URL, exc)) if not lines: raise IOError('%s did not have any content' % GITWEB_AUTHORITY_URL) # Entries look like... # # "moria1 orport=9101 " # "v3ident=D586D18309DED4CD6D57C18FDB97EFA96D330566 " # "128.31.0.39:9131 9695 DFC3 5FFE B861 329B 9F1A B04C 4639 7020 CE31", try: results = {} for matches in _directory_entries(lines, Authority._pop_section, (AUTHORITY_NAME, AUTHORITY_V3IDENT, AUTHORITY_IPV6, AUTHORITY_ADDR), required = (AUTHORITY_NAME, AUTHORITY_ADDR)): nickname, or_port = matches.get(AUTHORITY_NAME) address, dir_port, fingerprint = matches.get(AUTHORITY_ADDR) results[nickname] = Authority( address = address, or_port = or_port, dir_port = dir_port, fingerprint = fingerprint.replace(' ', ''), nickname = nickname, orport_v6 = matches.get(AUTHORITY_IPV6), v3ident = matches.get(AUTHORITY_V3IDENT), ) except ValueError as exc: raise IOError(str(exc)) return results
def _download_from_orport(endpoint, compression, resource): """ Downloads descriptors from the given orport. Payload is just like an http response (headers and all)... :: HTTP/1.0 200 OK Date: Mon, 23 Apr 2018 18:43:47 GMT Content-Type: text/plain X-Your-Address-Is: 216.161.254.25 Content-Encoding: identity Expires: Wed, 25 Apr 2018 18:43:47 GMT router dannenberg 193.23.244.244 443 0 80 identity-ed25519 ... rest of the descriptor content... :param stem.ORPort endpoint: endpoint to download from :param list compression: compression methods for the request :param str resource: descriptor resource to download :returns: two value tuple of the form (data, reply_headers) :raises: * :class:`stem.ProtocolError` if not a valid descriptor response * :class:`stem.SocketError` if unable to establish a connection """ link_protocols = endpoint.link_protocols if endpoint.link_protocols else [3] with stem.client.Relay.connect(endpoint.address, endpoint.port, link_protocols) as relay: with relay.create_circuit() as circ: request = '\r\n'.join(( 'GET %s HTTP/1.0' % resource, 'Accept-Encoding: %s' % ', '.join(compression), 'User-Agent: %s' % stem.USER_AGENT, )) + '\r\n\r\n' circ.send(RelayCommand.BEGIN_DIR, stream_id = 1) response = b''.join([cell.data for cell in circ.send(RelayCommand.DATA, request, stream_id = 1)]) first_line, data = response.split(b'\r\n', 1) header_data, body_data = data.split(b'\r\n\r\n', 1) if not first_line.startswith(b'HTTP/1.0 2'): raise stem.ProtocolError("Response should begin with HTTP success, but was '%s'" % str_tools._to_unicode(first_line)) headers = {} for line in str_tools._to_unicode(header_data).splitlines(): if ': ' not in line: raise stem.ProtocolError("'%s' is not a HTTP header:\n\n%s" % line) key, value = line.split(': ', 1) headers[key] = value return _decompress(body_data, headers.get('Content-Encoding')), headers
def is_valid_ipv6_address(address, allow_brackets=False): """ Checks if a string is a valid IPv6 address. :param str address: string to be checked :param bool allow_brackets: ignore brackets which form '[address]' :returns: **True** if input is a valid IPv6 address, **False** otherwise """ if isinstance(address, bytes): address = str_tools._to_unicode(address) elif not isinstance(address, (bytes, str)): return False if allow_brackets: if address.startswith('[') and address.endswith(']'): address = address[1:-1] if address.count('.') == 3: # Likely an ipv4-mapped portion. Check that its vaild, then replace with a # filler. ipv4_start = address.rfind(':', 0, address.find('.')) + 1 ipv4_end = address.find(':', ipv4_start + 1) if ipv4_end == -1: ipv4_end = None # don't crop the last character if not is_valid_ipv4_address(address[ipv4_start:ipv4_end]): return False addr_comp = [ address[:ipv4_start - 1] if ipv4_start != 0 else None, 'ff:ff', address[ipv4_end + 1:] if ipv4_end else None ] address = ':'.join(filter(None, addr_comp)) # addresses are made up of eight colon separated groups of four hex digits # with leading zeros being optional # https://en.wikipedia.org/wiki/IPv6#Address_format colon_count = address.count(':') if colon_count > 7: return False # too many groups elif colon_count != 7 and '::' not in address: return False # not enough groups and none are collapsed elif address.count('::') > 1 or ':::' in address: return False # multiple groupings of zeros can't be collapsed for entry in address.split(':'): if not re.match('^[0-9a-fA-f]{0,4}$', entry): return False return True
def from_remote(timeout: int = 60) -> Dict[str, 'stem.directory.Fallback']: try: lines = str_tools._to_unicode(urllib.request.urlopen(GITWEB_FALLBACK_URL, timeout = timeout).read()).splitlines() if not lines: raise IOError('no content') except: exc, stacktrace = sys.exc_info()[1:3] message = "Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_URL, exc) raise stem.DownloadFailed(GITWEB_FALLBACK_URL, exc, stacktrace, message) # header metadata if lines[0] != '/* type=fallback */': raise IOError('%s does not have a type field indicating it is fallback directory metadata' % GITWEB_FALLBACK_URL) header = {} for line in Fallback._pop_section(lines): mapping = FALLBACK_MAPPING.match(line) if mapping: header[mapping.group(1)] = mapping.group(2) else: raise IOError('Malformed fallback directory header line: %s' % line) Fallback._pop_section(lines) # skip human readable comments # Entries look like... # # "5.9.110.236:9030 orport=9001 id=0756B7CD4DFC8182BE23143FAC0642F515182CEB" # " ipv6=[2a01:4f8:162:51e2::2]:9001" # /* nickname=rueckgrat */ # /* extrainfo=1 */ try: results = {} for matches in _directory_entries(lines, Fallback._pop_section, (FALLBACK_ADDR, FALLBACK_NICKNAME, FALLBACK_EXTRAINFO, FALLBACK_IPV6), required = (FALLBACK_ADDR,)): address, dir_port, or_port, fingerprint = matches[FALLBACK_ADDR] results[fingerprint] = Fallback( address = address, or_port = int(or_port), dir_port = int(dir_port), fingerprint = fingerprint, nickname = matches.get(FALLBACK_NICKNAME), # type: ignore has_extrainfo = matches.get(FALLBACK_EXTRAINFO) == '1', orport_v6 = matches.get(FALLBACK_IPV6), # type: ignore header = header, ) except ValueError as exc: raise IOError(str(exc)) return results
def test_to_int(self): """ Checks the _to_int() function. """ test_inputs = { '': 0, 'h': 104, 'hi': 26729, 'hello': 448378203247, str_tools._to_bytes('hello'): 448378203247, str_tools._to_unicode('hello'): 448378203247, } for arg, expected in test_inputs.items(): self.assertEqual(expected, str_tools._to_int(arg))
def from_remote( timeout: int = 60) -> Dict[str, 'stem.directory.Authority']: try: lines = str_tools._to_unicode( urllib.request.urlopen(GITWEB_AUTHORITY_URL, timeout=timeout).read()).splitlines() if not lines: raise OSError('no content') except: exc, stacktrace = sys.exc_info()[1:3] message = "Unable to download tor's directory authorities from %s: %s" % ( GITWEB_AUTHORITY_URL, exc) raise stem.DownloadFailed(GITWEB_AUTHORITY_URL, exc, stacktrace, message) # Entries look like... # # "moria1 orport=9101 " # "v3ident=D586D18309DED4CD6D57C18FDB97EFA96D330566 " # "128.31.0.39:9131 9695 DFC3 5FFE B861 329B 9F1A B04C 4639 7020 CE31", try: results = {} for matches in _directory_entries( lines, Authority._pop_section, (AUTHORITY_NAME, AUTHORITY_V3IDENT, AUTHORITY_IPV6, AUTHORITY_ADDR), required=(AUTHORITY_NAME, AUTHORITY_ADDR)): nickname, or_port = matches.get(AUTHORITY_NAME) # type: ignore address, dir_port, fingerprint = matches.get( AUTHORITY_ADDR) # type: ignore results[nickname] = Authority( address=address, or_port=or_port, dir_port=dir_port, fingerprint=fingerprint.replace(' ', ''), nickname=nickname, orport_v6=matches.get(AUTHORITY_IPV6), # type: ignore v3ident=matches.get(AUTHORITY_V3IDENT), # type: ignore ) except ValueError as exc: raise OSError(str(exc)) return results
def from_remote(timeout=60): """ Reads and parses tor's latest fallback directories `from gitweb.torproject.org <https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc>`_. Note that while convenient, this reliance on GitWeb means you should alway call with a fallback, such as... :: try: fallback_directories = stem.descriptor.remote.from_remote() except IOError: fallback_directories = stem.descriptor.remote.from_cache() :param int timeout: seconds to wait before timing out the request :returns: **dict** of **str** fingerprints to their :class:`~stem.descriptor.remote.FallbackDirectory` :raises: **IOError** if unable to retrieve the fallback directories """ try: fallback_dir_page = str_tools._to_unicode( urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout=timeout).read()) except: exc = sys.exc_info()[1] raise IOError( "Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_DIR_URL, exc)) if '/* nickname=' in fallback_dir_page: return FallbackDirectory._parse_v2(fallback_dir_page) else: return FallbackDirectory._parse_v1(fallback_dir_page)
def from_remote(timeout=60): """ Reads and parses tor's latest fallback directories `from gitweb.torproject.org <https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc>`_. Note that while convenient, this reliance on GitWeb means you should alway call with a fallback, such as... :: try: fallback_directories = stem.descriptor.remote.from_remote() except IOError: fallback_directories = stem.descriptor.remote.from_cache() :param int timeout: seconds to wait before timing out the request :returns: **dict** of **str** fingerprints to their :class:`~stem.descriptor.remote.FallbackDirectory` :raises: **IOError** if unable to retrieve the fallback directories """ try: fallback_dir_page = str_tools._to_unicode( urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout=timeout).read()) except: exc = sys.exc_info()[1] raise IOError( "Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_DIR_URL, exc)) # Example of an entry... # # "5.175.233.86:80 orport=443 id=5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33" # " ipv6=[2a03:b0c0:0:1010::a4:b001]:9001" # " weight=43680", results, attr = {}, {} for line in fallback_dir_page.splitlines(): if line.startswith('"'): addr_line_match = re.match( '"([\d\.]+):(\d+) orport=(\d+) id=([\dA-F]{40}).*', line) ipv6_line_match = re.match('" ipv6=\[([\da-f:]+)\]:(\d+)"', line) if addr_line_match: address, dir_port, or_port, fingerprint = addr_line_match.groups( ) if not connection.is_valid_ipv4_address(address): raise IOError('%s has an invalid IPv4 address: %s' % (fingerprint, address)) elif not connection.is_valid_port(or_port): raise IOError('%s has an invalid or_port: %s' % (fingerprint, or_port)) elif not connection.is_valid_port(dir_port): raise IOError('%s has an invalid dir_port: %s' % (fingerprint, dir_port)) elif not tor_tools.is_valid_fingerprint(fingerprint): raise IOError('%s has an invalid fingerprint: %s' % (fingerprint, fingerprint)) attr = { 'address': address, 'or_port': int(or_port), 'dir_port': int(dir_port), 'fingerprint': fingerprint, } elif ipv6_line_match: address, port = ipv6_line_match.groups() if not connection.is_valid_ipv6_address(address): raise IOError('%s has an invalid IPv6 address: %s' % (fingerprint, address)) elif not connection.is_valid_port(port): raise IOError( '%s has an invalid ORPort for its IPv6 endpoint: %s' % (fingerprint, port)) attr['orport_v6'] = (address, int(port)) elif line.startswith('" weight=') and 'fingerprint' in attr: results[attr.get('fingerprint')] = FallbackDirectory( address=attr.get('address'), or_port=attr.get('or_port'), dir_port=attr.get('dir_port'), fingerprint=attr.get('fingerprint'), orport_v6=attr.get('orport_v6'), ) attr = {} return results
def expand_ipv6_address(address: str) -> str: """ Expands abbreviated IPv6 addresses to their full colon separated hex format. For instance... :: >>> expand_ipv6_address('2001:db8::ff00:42:8329') '2001:0db8:0000:0000:0000:ff00:0042:8329' >>> expand_ipv6_address('::') '0000:0000:0000:0000:0000:0000:0000:0000' >>> expand_ipv6_address('::ffff:5.9.158.75') '0000:0000:0000:0000:0000:ffff:0509:9e4b' :param address: IPv6 address to be expanded :raises: **ValueError** if the address can't be expanded due to being malformed """ if isinstance(address, bytes): address = str_tools._to_unicode(address) if not is_valid_ipv6_address(address): raise ValueError("'%s' isn't a valid IPv6 address" % address) # expand ipv4-mapped portions of addresses if address.count('.') == 3: ipv4_start = address.rfind(':', 0, address.find('.')) + 1 ipv4_end = address.find(':', ipv4_start + 1) if ipv4_end == -1: ipv4_end = None # don't crop the last character # Converts ipv4 address to its hex ipv6 representation. For instance... # # '5.9.158.75' => '0509:9e4b' ipv4_bin = _address_to_binary(address[ipv4_start:ipv4_end]) groupings = [ipv4_bin[16 * i:16 * (i + 1)] for i in range(2)] ipv6_snippet = ':'.join( ['%04x' % int(group, 2) for group in groupings]) addr_comp = [ address[:ipv4_start - 1] if ipv4_start != 0 else None, ipv6_snippet, address[ipv4_end + 1:] if ipv4_end else None ] address = ':'.join(filter(None, addr_comp)) # expands collapsed groupings, there can only be a single '::' in a valid # address if '::' in address: missing_groups = 7 - address.count(':') address = address.replace('::', '::' + ':' * missing_groups) # inserts missing zeros for index in range(8): start = index * 5 end = address.index(':', start) if index != 7 else len(address) missing_zeros = 4 - (end - start) if missing_zeros > 0: address = address[:start] + '0' * missing_zeros + address[start:] return address