def get_obfs4_bridges(filename): """ Extract obfs4 extra-info descriptors and return dictionary. The dictionary maps an obfs4 bridge's fingerprint to its extra-info descriptor. """ fpr2obfs4desc = {} # We are parsing our descriptor files as type "extra-info 1.0" because # unredacted bridge extra-info descriptors are normal extra-info # descriptors. with open(filename, "rb") as desc_file: for desc in parse_file(desc_file, descriptor_type="extra-info 1.0"): if "obfs4" in desc.transport: fpr2obfs4desc[desc.fingerprint] = desc # Use "cached-extrainfo.new" to augment "cached-extrainfo". with open(filename + ".new", "rb") as desc_file: for desc in parse_file(desc_file, descriptor_type="extra-info 1.0"): if "obfs4" in desc.transport: fpr2obfs4desc[desc.fingerprint] = desc return fpr2obfs4desc
def main(): parser = argparse.ArgumentParser() parser.add_argument('--ctrlport', type=int, help='default: 9051', default=9051) args = parser.parse_args() with Controller.from_port(port=args.ctrlport) as controller: controller.authenticate() for desc in parse_file('/var/lib/tor/data/cached-consensus'): ip = 'v4' if is_valid_ipv4_address(desc.address) else 'v6' desc_versions[desc.fingerprint] = [ desc.address, desc.or_port, ip, desc.version ] for desc in parse_file('/var/lib/tor/data2/cached-consensus'): ip = 'v4' if is_valid_ipv4_address(desc.address) else 'v6' desc_versions[desc.fingerprint] = [ desc.address, desc.or_port, ip, desc.version ] orconn_listener = functools.partial(orconn_event, controller) controller.add_event_listener(orconn_listener, EventType.ORCONN) while True: try: time.sleep(1) except KeyboardInterrupt: break
def main(): REPLICAS = 2 parser = argparse.ArgumentParser() parser.add_argument('onion_address', help='The hidden service address - e.g. (idnxcnkne4qt76tg.onion)') parser.add_argument('--consensus', help='The optional consensus file', required=False) args = parser.parse_args() if args.consensus is None: downloader = DescriptorDownloader() consensus = downloader.get_consensus(document_handler = DocumentHandler.DOCUMENT).run()[0] t = time() else: with open(args.consensus) as f: consensus = next(parse_file(f, 'network-status-consensus-3 1.0', document_handler = DocumentHandler.DOCUMENT)) t = mktime(consensus.valid_after.timetuple()) service_id, tld = args.onion_address.split(".") if tld == 'onion' and len(service_id) == 16 and service_id.isalnum(): for replica in range(0, REPLICAS): descriptor_id = rend_compute_v2_desc_id(service_id, replica, t) print descriptor_id + '\t' + b32decode(descriptor_id, True).encode('hex') for router in find_responsible_HSDir(descriptor_id, consensus): print router['fingerprint'] + '\t' + router['nickname'] else: print "[!] The onion address you provided is not valid"
def parseServerDescriptorsFile(filename, validate=True): """Parse a file which contains ``@type bridge-server-descriptor``s. .. note:: ``validate`` defaults to ``False`` because there appears to be a bug in Leekspin, the fake descriptor generator, where Stem thinks the fingerprint doesn't match the key… .. note:: We have to lie to Stem, pretending that these are ``@type server-descriptor``s, **not** ``@type bridge-server-descriptor``s. See ticket #`11257`_. .. _`11257`: https://bugs.torproject.org/11257 :param str filename: The file to parse descriptors from. :param bool validate: Whether or not to validate descriptor contents. (default: ``False``) :rtype: list :returns: A list of :api:`stem.descriptor.server_descriptor.RelayDescriptor`s. """ logging.info("Parsing server descriptors with Stem: %s" % filename) descriptorType = 'server-descriptor 1.0' document = parse_file(filename, descriptorType, validate=validate) routers = list(document) return routers
def load(cls, file_path): logger.info("Parsing content of %s.", file_path) document = descriptor.parse_file(file_path) bwfiles = list(document) if bwfiles: bwfile = bwfiles[0] return cls(bwfile.header)
def parse_serverdesc(path): relay = next(parse_file(path, document_handler='DOCUMENT', descriptor_type='server-descriptor 1.0', validate=False)) if relay == None: return None if relay.observed_bandwidth is None: return None advertised_bw = relay.observed_bandwidth avg_bw = relay.average_bandwidth bst_bw = relay.burst_bandwidth if avg_bw != None and avg_bw < advertised_bw: advertised_bw = avg_bw if bst_bw != None and bst_bw < advertised_bw: advertised_bw = bst_bw result = { 'type': 'serverdesc', 'pub_dt': relay.published, 'fprint': relay.fingerprint, 'address': relay.address, 'bw_obs': relay.observed_bandwidth, 'bw_rate': avg_bw if avg_bw != None else 0, 'bw_burst': bst_bw if bst_bw != None else 0, 'bw_adv': advertised_bw, } return result
def parse_consensuses_naive_way(consensus_dir): """ Parses a bunch of consensuses in a naive manner, and marks how many times each guard has appeared in those consensuses. """ # A dictionary mapping <guard fingerprints> to <times seen in consensus> guards_dict = {} # Walk all files in the consensus directory and try to parse them # and import them to our database. dir_listing = os.listdir(consensus_dir) for f in dir_listing: consensus_f = os.path.join(consensus_dir, f) if not os.path.isfile(consensus_f): # skip non-files continue # Parse consensus consensus = parse_file(consensus_f, 'network-status-microdesc-consensus-3 1.0', document_handler = DocumentHandler.DOCUMENT).next() # For every guard: # * If we see it for the first time, initialize its counter to 1. # * If we've seen it before, increment its counter by one. for router in consensus.routers.values(): if stem.Flag.GUARD in router.flags: # It's a guard if router.fingerprint not in guards_dict: # First time we see this guard. guards_dict[router.fingerprint] = 1 else: # Seen this guard before, increment counter by one guards_dict[router.fingerprint] += 1 return guards_dict
def parseExtraInfoFiles(*filenames, **kwargs): """Open **filenames** and parse any ``@type bridge-extrainfo-descriptor`` contained within. .. warning:: This function will *not* check that the ``router-signature`` at the end of the extrainfo descriptor is valid. See ``bridgedb.bridges.Bridge._verifyExtraInfoSignature`` for a method for checking the signature. The signature cannot be checked here, because to do so, we would need the latest, valid, corresponding ``signing-key`` for the Bridge. .. note:: This function will call :func:`deduplicate` to deduplicate the extrainfo descriptors parsed from all **filenames**. :kwargs validate: If there is a ``'validate'`` keyword argument, its value will be passed along as the ``'validate'`` argument to :class:`stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`. The ``'validate'`` keyword argument defaults to ``True``, meaning that the hash digest stored in the ``router-digest`` line will be checked against the actual contents of the descriptor and the extrainfo document's signature will be verified. :rtype: dict :returns: A dictionary mapping bridge fingerprints to their corresponding, deduplicated :class:`stem.descriptor.extrainfo_descriptor.RelayExtraInfoDescriptor`. """ descriptors = [] # The ``stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`` # class (with ``descriptorType = 'bridge-extra-info 1.1``) is unsuitable # for our purposes for the following reasons: # # 1. It expects a ``router-digest`` line, which is only present in # sanitised bridge extrainfo descriptors. # # 2. It doesn't check the ``router-signature`` (nor does it expect there # to be a signature). descriptorType = 'extra-info 1.0' validate = True if ('validate' in kwargs) and (kwargs['validate'] is False): validate = False for filename in filenames: logging.info("Parsing %s descriptors in %s..." % (descriptorType, filename)) document = parse_file(filename, descriptorType, validate=validate) try: for router in document: descriptors.append(router) except (ValueError, ProtocolError) as error: logging.error( ("Stem exception while parsing extrainfo descriptor from " "file '%s':\n%s") % (filename, str(error))) _copyUnparseableDescriptorFile(filename) routers = deduplicate(descriptors) return routers
def find_flags(consensus_paths, fingerprint, state_obj): """ Finds flags of a given relay by fingerprint and consensus files. Creates an empty flag list and looks in the current state object for cached consensuses. If the required consensus files are not cached, it parses the corresponding files. It then looks for the relay in the loaded consensuses, populating the flags list upon finding it. :param fingerprint: string :param consensus_paths: list :param state_object: StateObject :returns: list """ consensus = None flags = [] if consensus_paths == state_obj.filenames: consensuses = state_obj.consensuses else: consensuses = [ list(parse_file(consensus_path)) for consensus_path in consensus_paths ] state_obj.update(consensus_paths, consensuses) for consensus in consensuses: for desc in consensus: if desc.fingerprint == fingerprint: flags.extend(desc.flags) return flags
def parse_extrainfo(path): # unused right now, but might be useful xinfo = next( parse_file(path, document_handler='DOCUMENT', descriptor_type='extra-info 1.0', validate=False)) read_max_rate, read_avg_rate = 0, 0 if xinfo.read_history_values != None and xinfo.read_history_interval != None: read_max_rate = int( max(xinfo.read_history_values) / xinfo.read_history_interval) read_mean_rate = int( (sum(xinfo.read_history_values) / len(xinfo.read_history_values)) / xinfo.read_history_interval) write_max_rate, write_avg_rate = 0, 0 if xinfo.write_history_values != None and xinfo.write_history_interval != None: write_max_rate = int( max(xinfo.write_history_values) / xinfo.write_history_interval) write_mean_rate = int( (sum(xinfo.write_history_values) / len(xinfo.write_history_values)) / xinfo.write_history_interval) result = { 'type': type, 'pub_dt': xinfo.published, 'fprint': xinfo.fingerprint, 'nickname': xinfo.nickname, 'bytes_read_max': read_max_rate, 'bytes_read_avg': read_avg_rate, 'bytes_write_max': write_max_rate, 'bytes_write_avg': write_avg_rate, } return result
def parseServerDescriptorsFile(filename, validate=True): """Open and parse **filename**, which should contain ``@type bridge-server-descriptor``. .. note:: We have to lie to Stem, pretending that these are ``@type server-descriptor``, **not** ``@type bridge-server-descriptor``. See :trac:`11257`. :param str filename: The file to parse descriptors from. :param bool validate: Whether or not to validate descriptor contents. (default: ``True``) :rtype: list :returns: A list of :class:`stem.descriptor.server_descriptor.RelayDescriptor`s. """ logging.info("Parsing server descriptors with Stem: %s" % filename) descriptorType = 'server-descriptor 1.0' document = parse_file(filename, descriptorType, validate=validate) routers = list() # Work around https://bugs.torproject.org/26023 by parsing each descriptor # at a time and catching any errors not handled in stem: while True: try: routers.append(document.next()) except StopIteration: break except Exception as error: logging.debug("Error while parsing a bridge server descriptor: %s" % error) return routers
def parseServerDescriptorsFile(filename, validate=True): """Open and parse **filename**, which should contain ``@type bridge-server-descriptor``. .. note:: We have to lie to Stem, pretending that these are ``@type server-descriptor``, **not** ``@type bridge-server-descriptor``. See :trac:`11257`. :param str filename: The file to parse descriptors from. :param bool validate: Whether or not to validate descriptor contents. (default: ``True``) :rtype: list :returns: A list of :class:`stem.descriptor.server_descriptor.RelayDescriptor`s. """ logging.info("Parsing server descriptors with Stem: %s" % filename) descriptorType = 'server-descriptor 1.0' document = parse_file(filename, descriptorType, validate=validate) routers = list() # Work around https://bugs.torproject.org/26023 by parsing each descriptor # at a time and catching any errors not handled in stem: while True: try: routers.append(document.next()) except StopIteration: break except Exception as error: logging.debug( "Error while parsing a bridge server descriptor: %s" % error) return routers
def parse_consensus(path): net_status = next(parse_file(path, document_handler='DOCUMENT', validate=False)) relays = {} weights = {"total": 0, "exit": 0, "guard": 0, "exitguard": 0, "middle": 0} counts = {"total": 0, "exit": 0, "guard": 0, "exitguard": 0, "middle": 0} for (fingerprint, router_entry) in net_status.routers.items(): if Flag.BADEXIT in router_entry.flags or Flag.RUNNING not in router_entry.flags or Flag.VALID not in router_entry.flags: continue relays.setdefault(fingerprint, {}) relays[fingerprint]['address'] = router_entry.address relays[fingerprint]['weight'] = router_entry.bandwidth if Flag.GUARD in router_entry.flags and Flag.FAST in router_entry.flags and Flag.STABLE in router_entry.flags: relays[fingerprint]['is_guard'] = True else: relays[fingerprint]['is_guard'] = False if Flag.EXIT in router_entry.flags and router_entry.exit_policy.is_exiting_allowed(): relays[fingerprint]['is_exit'] = True else: relays[fingerprint]['is_exit'] = False # fill in the weights bw_weight = float(router_entry.bandwidth) weights["total"] += bw_weight counts["total"] += 1 if relays[fingerprint]['is_guard'] and relays[fingerprint]['is_exit']: weights["exitguard"] += bw_weight counts["exitguard"] += 1 elif relays[fingerprint]['is_guard']: weights["guard"] += bw_weight counts["guard"] += 1 elif relays[fingerprint]['is_exit']: weights["exit"] += bw_weight counts["exit"] += 1 else: weights["middle"] += bw_weight counts["middle"] += 1 # weights are normalized on a per-consensus basis for fingerprint in relays: relays[fingerprint]['weight'] /= weights["total"] for position_type in weights: if position_type == "total": continue weights[position_type] /= weights["total"] result = { 'type': 'consensus', 'pub_dt': net_status.valid_after, # valid_after is for V3 descriptors, V2 use net_status.published 'relays': relays, 'weights': weights, 'counts': counts, } return result
def find_responsible_HSDir(descriptor_id, cached_consensus): with open(cached_consensus, 'rb') as consensus_file: # Processes the routers as we read them in. The routers refer to a document # with an unset 'routers' attribute. router_list = [] fingerprint_list = [] responsible_HSDirs = [] for router in parse_file(consensus_file, 'network-status-consensus-3 1.0', document_handler=DocumentHandler.ENTRIES): if "HSDir" in router.flags: # Inefficient but should be fine for the current number of routers fingerprint_base32 = b32encode( router.fingerprint.decode("hex")).lower() fingerprint_list.append(router.fingerprint.decode("hex")) router_list.append({ 'nickname': router.nickname, 'fingerprint_base32': fingerprint_base32, 'fingerprint': router.fingerprint, 'address': router.address, 'dir_port': router.dir_port, 'descriptor_id': descriptor_id }) # Get location descriptor id would be in router list descriptor_position = bisect_left(fingerprint_list, b32decode(descriptor_id, 1)) for i in range(0, 3): responsible_HSDirs.append(router_list[descriptor_position + i]) return responsible_HSDirs
def parse_consensus(relays, filename): for desc in parse_file(filename): relays.setdefault(desc.address, []).append(desc.or_port) for address, port, is_ipv6 in desc.or_addresses: if is_ipv6: address = ipaddress.IPv6Address(address).exploded relays.setdefault(address, []).append(port) return relays
def load(cls, file_path): logger.info("Parsing content of %s.", file_path) document = descriptor.parse_file(file_path) bwfiles = list(document) if bwfiles: # When parsing one file, there is only 1 bwfile bwfile = bwfiles[0] return cls(bwfile.header, bwfile.measurements.values())
def initial_consensus(): global c, consensus, consensus_nickname with open('/home/ohyee/.tor/cached-consensus', 'rb') as consensus_file: routers = parse_file(consensus_file, 'network-status-consensus-3 1.0', document_handler=DocumentHandler.ENTRIES) for router in routers: consensus_nickname[router.nickname] = router consensus[router.fingerprint] = router
def tutorial_example_2(): from stem.descriptor import DocumentHandler, parse_file consensus = next(parse_file( '/tmp/descriptor_dump', descriptor_type = 'network-status-consensus-3 1.0', document_handler = DocumentHandler.DOCUMENT, )) for fingerprint, relay in consensus.routers.items(): print('%s: %s' % (fingerprint, relay.nickname))
def get_consensus(self, da, consensus_file=None): path = 'tor/status-vote/current/consensus/' if consensus_file is None: consensus_file = tempfile.NamedTemporaryFile() urllib.urlretrieve(da + path, consensus_file.name) consensus = next( parse_file(consensus_file.name, descriptor_type=self.descriptor_type, document_handler=DocumentHandler.DOCUMENT)) return consensus
def parse_consensus(fname, desc_fname=None): guards = Guards() with open(fname, 'rb') as consensus_file: consensus = parse_file(consensus_file, 'network-status-consensus-3 1.0', document_handler = DocumentHandler.DOCUMENT).next() for router in consensus.routers.values(): if 'Guard' in router.flags: guard = Guard(router.nickname, router.fingerprint) # XXX These should just be arguments to the __init__ of Guard: guard.set_flags(router.flags) guard.set_bandwidth(router.bandwidth) guards.add(guard) logging.debug("Saw %s with %s (%s)", router.nickname, router.flags, router.bandwidth) Wgd = consensus.bandwidth_weights['Wgd'] / float(10000) Wgg = consensus.bandwidth_weights['Wgg'] / float(10000) guards.set_consensus_parameters(Wgd, Wgg) logging.debug("Wgd: %s, Wgg: %s", Wgd, Wgg) assert(len(guards) > 0) # If no descriptor files were provided, we are done! if not desc_fname: return guards logging.warning("Using descriptor bandwidths!") # If a descriptor file was provided, parse it and use those bandwidths instead. with open(desc_fname, 'rb') as desc_file: descs = parse_file(desc_file, 'server-descriptor 1.0') # XXX most inefficient method ever. for desc in descs: bandwidth = min(desc.average_bandwidth, desc.burst_bandwidth, desc.observed_bandwidth) bandwidth = bandwidth >> 10 # from (bytes per second) to (kilobytes per second) guards.update_bw_of_guard_by_fpr(desc.fingerprint, bandwidth) return guards
def get_consensus(): consensus = None try: print('Reading cached-consensus') consensus = next( parse_file(CONSENSUS_PATH, document_handler=DocumentHandler.DOCUMENT, validate=True)) if datetime.utcnow() > consensus.valid_until: print('Cached consensus is stale') consensus = None except FileNotFoundError: # TODO: add logging here print('Consensus not found') except ValueError: print('Consensus is invalid') except Exception: pass if not consensus: print('Downloading consensus...') download_consensus() try: print('Trying read cached-consensus again') consensus = next( parse_file(CONSENSUS_PATH, document_handler=DocumentHandler.DOCUMENT, validate=True)) except FileNotFoundError: raise FileNotFoundError except ValueError: raise ValueError except Exception: raise Exception return consensus
def make_monthly_csv(year, month, day): """ Create the CSV files for the given year/month. If day is defined, only create the file for that day else all the day at midnight. """ match_found = False str_month = str(month) if month < 10: str_month = "0%d" % (month) consensus_path = dl_consensus(year, str_month) if consensus_path is None: return None sd_path = dl_server_descriptors(year, str_month) ei_path = dl_extra_infos(year, str_month) if sd_path is None or ei_path is None: print("Unable to create CSV files for %s-%s" % (year, str_month)) return None prev_sd_path, prev_ei_path = get_previous_data(year, month, day) if prev_sd_path is not None: uncompress(prev_sd_path, './server-descriptors') uncompress(prev_ei_path, './extra-infos') uncompress(consensus_path, './consensuses') uncompress(sd_path, './server-descriptors') uncompress(ei_path, './extra-infos') # We have the data, let's create the csv files for the requested date. for dir_day in os.listdir('./%s' % (consensus_path[:-7])): str_day = str(day) if day < 10: str_day = "0%d" % (day) if day != 0 and str_day != dir_day: continue match_found = True consensus_pathname = \ "./consensuses/consensuses-%s-%s/%s/%s-%s-%s-00-00-00-consensus" % \ (year, str_month, dir_day, year, str_month, dir_day) print(" [+] Reading consensus %s" % (consensus_pathname)) try: consensus = next(parse_file(consensus_pathname, document_handler = DocumentHandler.DOCUMENT)) except Exception as e: print(" [-] Consensus %s not found. Skipping!" % (consensus_pathname)) continue # Nullify the previous path if we aren't the first of the month. if dir_day != "01": prev_ei_path = None prev_sd_path = None write_csv_data(consensus, sd_path, prev_sd_path, ei_path, prev_ei_path, str(year), str_month, dir_day) if match_found is False: print(" [-] Date not found in consensus")
async def cached_consensus(): if not os.path.isfile(CONS_FILE): logger.info('not found, downloading') with open(CONS_FILE, 'w') as f: cons = await try_all_for_url(CONS_URL) f.write(cons) else: logger.info('found, loading from disk') return next( parse_file( CONS_FILE, descriptor_type='network-status-consensus-3 1.0', document_handler=DocumentHandler.DOCUMENT, ))
def get_contact_info(filename): """ Extract bridge contact information and return dictionary. The dictionary maps a bridge's fingerprint to its contact information. """ fpr2contact = {} with open(filename, "rb") as desc_file: for desc in parse_file(desc_file, descriptor_type="server-descriptor 1.0"): if desc.contact: fpr2contact[desc.fingerprint] = desc.contact return fpr2contact
def loop_through_all_files(root_str, con_files_list, guard_ip_set, exit_ip_set): for consensus_fname in con_files_list: consensus_file = open(root_str + os.path.sep + consensus_fname, 'rb') consensus = next( parse_file( consensus_file, descriptor_type='network-status-consensus-3 1.0', document_handler=DocumentHandler.DOCUMENT, )) consensus_file.close() if not consensus.is_consensus: print 'not a consensus file' else: update_guard_and_exit_ip_sets(consensus, guard_ip_set, exit_ip_set)
def main(): bridges = Bridges() if len(sys.argv) < 2: usage() sys.exit(1) for desc in parse_file(sys.argv[1], "bridge-extra-info 1.2"): if not desc.read_history_values: # XXX is this a bug? Why some descs don't have read_history_values? print "[!] %s %s" % (desc.fingerprint, desc.read_history_values) continue bridges.add_bridge(desc.fingerprint, desc.read_history_values, desc.write_history_values) bridges.report_statistics()
def get_router_list_from_consensus(tor_state, consensus): """ arguments tor_state is a txtorcon TorState object consensus_file is a file containing a tor network-status-consensus-3 document returns a list of routers (txtorcon Router router object) """ routers = [] with open(consensus, 'rb') as consensus_file: for relay in parse_file(consensus_file): if relay is not None and relay.fingerprint is not None: router = tor_state.router_from_id("$" + relay.fingerprint) routers.append(router) if len(routers) == 0: print "failed to parse consensus file" sys.exit(1) return routers
def _parse_and_import_consensus(self, consensus_fd, db_cursor): """Friend of parse_and_import_consensus().""" # Use stem to parse the consensus. consensus = parse_file( consensus_fd, 'network-status-microdesc-consensus-3 1.0', document_handler=DocumentHandler.DOCUMENT).next() # Insert the consensus to the database try: db_cursor.execute( "INSERT INTO consensus (consensus_date) VALUES (?)", (consensus.valid_after, )) except sqlite3.IntegrityError, err: logging.info("Didn't add duplicate consensus (%s) (%s).", consensus.valid_after, err) return
def analyzeHSDirs(entry, digest): digests = digest[1] digest_one = digests[0] digest_two = digests[1] cons = next(parse_file(entry, document_handler=DocumentHandler.DOCUMENT)) descriptors = cons.routers.items() hsdirs = [desc[1] for desc in descriptors if Flag.HSDIR in desc[1].flags] hsdirs_sorted = sorted( hsdirs, key=lambda descriptor: binascii.unhexlify(descriptor.fingerprint)) hsdirs_keys = [ binascii.unhexlify(descriptor.fingerprint) for descriptor in hsdirs_sorted ] onelist = getDirs(digest_one, hsdirs_sorted, hsdirs_keys) twolist = getDirs(digest_two, hsdirs_sorted, hsdirs_keys) dirlist = onelist + twolist return createDict(dirlist)
def getConsensusDataFrame(path, fp_column=fingerprint_default, name=name_default, bandwidth=bandwidth_default): consensus = next(parse_file(path, descriptor_type = 'network-status-consensus-3 1.0',\ document_handler = DocumentHandler.DOCUMENT,)) # Get list of relays routers = consensus.routers.items() relays = [] for fingerprint, relay in routers: relays.append(relay) # Return DF containing name and bandwidth df = pd.DataFrame() df[fp_column] = [r.nickname + ' ' + r.fingerprint for r in relays] df[name] = [r.nickname for r in relays] df[bandwidth] = [r.bandwidth for r in relays] return df
def parseServerDescriptorsFile(filename, validate=True): """Open and parse **filename**, which should contain ``@type bridge-server-descriptor``. .. note:: We have to lie to Stem, pretending that these are ``@type server-descriptor``, **not** ``@type bridge-server-descriptor``. See :trac:`11257`. :param str filename: The file to parse descriptors from. :param bool validate: Whether or not to validate descriptor contents. (default: ``True``) :rtype: list :returns: A list of :class:`stem.descriptor.server_descriptor.RelayDescriptor`s. """ logging.info("Parsing server descriptors with Stem: %s" % filename) descriptorType = 'server-descriptor 1.0' document = parse_file(filename, descriptorType, validate=validate) return list(document)
def get_or_consensus(self): self._or = {} self._relayBW = {} self._exit_or = set() data_dir = self._controller.get_conf("DataDirectory") try: for desc in parse_file(os.path.join(data_dir, "cached-descriptors")): self._or[desc.fingerprint] = desc.nickname self._relayBW[desc.fingerprint] = desc.observed_bandwidth if desc.exit_policy.can_exit_to(address=self.ip, port=self._port) and self._num_hops > 1: self._exit_or.add(desc.fingerprint) if ( desc.exit_policy.can_exit_to(address=self.ip, port=self._port) and self._num_hops == 1 and desc.allow_single_hop_exits ): self._exit_or.add(desc.fingerprint) except Exception as exc: self.get_or_from_network()
def _processDescriptors(self, raw): '''Decompress and parse descriptors, then build a dict mapping fingerprint -> RelayDescriptor for all relays found in both the network consensus and the server descriptor set. We throw away and relays that are not found in the network consensus. We also add a new attribute 'flags' to each RelayDescriptor. 'flags' is an attribue of RouterStatusEntry's found in the consensus, and adding them here simplifies path selection. 'flags' is a set of unicode strings. .. note: This runs in a separate work thread using twisted.internet.threads.deferToThread() because parsing tends to take a while. :param str raw: compressed server descriptor bytes :returns: **dict** mapping fingerprint -> RelayDescriptor for every relay found in both the current network consensus and the set of server descriptors. ''' raw = zlib.decompress(raw) gen = parse_file( io.BytesIO(raw), DEF.STEM_DESCRIPTORS_TYPE, validate=True, document_handler=DocumentHandler.DOCUMENT, ) descriptors = {} # only use descriptors that are also found in the consensus, and # also add the 'flags' attribute, a set of unicode strings describing # the flags a given RelayDescriptor has for relay in gen: try: flags = set(self._consensus.routers[relay.fingerprint].flags) relay.flags = flags descriptors[relay.fingerprint] = relay # skip any relays not found in the consensus except KeyError: pass return descriptors
def find_responsible_HSDir(descriptor_id, cached_consensus): with open(cached_consensus, 'rb') as consensus_file: # Processes the routers as we read them in. The routers refer to a document # with an unset 'routers' attribute. router_list = [] fingerprint_list = [] responsible_HSDirs = [] for router in parse_file(consensus_file, 'network-status-consensus-3 1.0', document_handler = DocumentHandler.ENTRIES): if "HSDir" in router.flags: # Inefficient but should be fine for the current number of routers fingerprint_base32 = b32encode(router.fingerprint.decode("hex")).lower() fingerprint_list.append(router.fingerprint.decode("hex")) router_list.append( {'nickname': router.nickname, 'fingerprint_base32': fingerprint_base32, 'fingerprint': router.fingerprint, 'address': router.address, 'dir_port': router.dir_port, 'descriptor_id': descriptor_id }) # Get location descriptor id would be in router list descriptor_position = bisect_left(fingerprint_list, b32decode(descriptor_id,1)) for i in range(0,3): responsible_HSDirs.append(router_list[descriptor_position+i]) return responsible_HSDirs
from stem.descriptor import parse_file server_descriptors = parse_file('/tmp/descriptor_dump', descriptor_type = 'server-descriptor 1.0') for relay in server_descriptors: print(relay.fingerprint)
from stem.descriptor import parse_file for desc in parse_file('/home/atagar/.tor/cached-consensus'): print('found relay %s (%s)' % (desc.nickname, desc.fingerprint))
from stem.descriptor import parse_file import sys try: path = sys.argv[1] for desc in parse_file(path): print('found relay %s (%s)' % (desc.nickname, desc.fingerprint)) except IOError: print("File not found. make sure you supply it with a cached consensus file location: %s" % path)
def tutorial_example(): from stem.descriptor import parse_file for desc in parse_file(open('/home/atagar/.tor/cached-consensus')): print('found relay %s (%s)' % (desc.nickname, desc.fingerprint))
def write_csv_data(consensus, sd_path, prev_sd_path, ei_path, prev_ei_path, year, month, day): """ Write data from consensus to CSV file """ csv_fp = create_csv_file(year, month, day) if csv_fp is None: # CSV file already exists. return None for desc in consensus.routers.values(): # Check for longitude and latitude. Without this, the entry is useless. lon, lat = geo_ip_lookup(desc.address) if lon is False and lat is False: continue fp = desc.fingerprint digest = desc.digest.lower() sd_filename = "%s/%s/%s/%s" % (sd_path[:-7], digest[0], digest[1], digest) try: sd = next(parse_file(sd_filename)) except Exception as e: if prev_sd_path is None: continue sd_filename = "%s/%s/%s/%s" % (prev_sd_path[:-7], digest[0], digest[1], digest) try: sd = next(parse_file(sd_filename)) except Exception as e: print(" [-] Server descriptor %s not found" % (digest)) continue # Open extra info. entry_ips = "" dir_ips = "" if sd.extra_info_digest is not None: digest = sd.extra_info_digest.lower() ei_filename = "%s/%s/%s/%s" % (ei_path[:-7], digest[0], digest[1], digest) try: ei = next(parse_file(ei_filename)) except Exception as e: if prev_ei_path is None: continue ei_filename = "%s/%s/%s/%s" % (prev_ei_path[:-7], digest[0], digest[1], digest) try: ei = next(parse_file(ei_filename)) except Exception as e: print(" [-] Extra info %s not found" % (ei_filename)) continue try: # Any Guard client ips? if ei.entry_ips is not None and len(ei.entry_ips) != 0: entry_ips = client_ips_to_string(ei.entry_ips, "|") except Exception as e: pass try: # Any Directory client ips? if ei.dir_v3_requests is not None and len(ei.dir_v3_requests) != 0: dir_ips = client_ips_to_string(ei.dir_v3_requests, "|") except Exception as e: pass # Get relay flags. flag = "M" if stem.Flag.GUARD in desc.flags: flag += "G" if stem.Flag.EXIT in desc.flags: flag += "E" if stem.Flag.HSDIR in desc.flags: flag += "H" csv_fp.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (desc.nickname, desc.fingerprint, flag, desc.address, desc.or_port, float(sd.observed_bandwidth/1000.0/1000.0), entry_ips, dir_ips, sd.uptime, lon, lat)) csv_fp.close()
from stem.descriptor import parse_file for desc in parse_file('/home/atagar/.tor/cached-consensus', validate = True): print('found relay %s (%s)' % (desc.nickname, desc.fingerprint))
## ## CHECK_PORT = 80 # exit == allows port 80 TOP_PERCENT = 80 # limit analysis to 80% of total observed bw from stem.descriptor.remote import DescriptorDownloader # to fetch descriptors from stem.descriptor import parse_file # alternatively, for local parsing import os import collections from difflib import SequenceMatcher print "Fetching latest descriptors, can take a while..." if os.path.exists('cached-consensus'): descriptors = parse_file('cached-consensus') else: downloader = DescriptorDownloader(use_mirrors=True, timeout=10) query = downloader.get_server_descriptors() descriptors = query.run() print "" #exits_only = filter(lambda descriptor:descriptor.exit_policy.is_exiting_allowed(), descriptors) exits_only = filter(lambda descriptor:descriptor.exit_policy.can_exit_to(port=CHECK_PORT), descriptors) exits_sorted = sorted(exits_only, key=lambda descriptor:descriptor.observed_bandwidth,reverse=True) print "%s relays (%s exits)" % (len(descriptors), len(exits_sorted)) total_bw = 0 total_exit_bw = 0
with open("db/notbleeding.db", "a") as text_file: text_file.write(desc.fingerprint+"\r\n") else: print result # create cache files / verify their existence touch('db/notbleeding.db') touch('db/bleedingexit.db') touch('db/bleedingguard.db') # get guard and exit bandwidth weights # verify /var/lib/tor/cached-consensus exists if os.path.exists('/var/lib/tor/cached-consensus'): for desc in parse_file(open("/var/lib/tor/cached-consensus")): for d in desc.flags: if d == "Guard": total_guard_w = total_guard_w + desc.bandwidth guard_count = guard_count + 1; elif d =="Exit": total_exit_w = total_exit_w + desc.bandwidth exit_count = exit_count + 1 print "Total # of Guards: " + str(guard_count) print "Total Bandwidth of Guards: " + str(total_guard_w) print "Total # of Exits: " + str(exit_count) print "Total Bandwidth of Exits: " + str(total_exit_w) # loop thourgh guards and check the ones we have not marked as # notbleeding by appending their fingerprint to notbleeding.db
final_time_info_bound = datetime(2013, 1, 1) # exclusive # data range for consensuses initial_time_data_bound = datetime(2011, 12, 1) # inclusive final_time_data_bound = datetime(2013, 1, 1) # exclusive # load information cur_datetime = initial_time_data_bound while cur_datetime < final_time_data_bound: cur_filepath = filepath_from_time(cur_datetime) cur_filename = os.path.basename(cur_filepath) try: with open(cur_filepath) as consensus_file: router_data[cur_filename] = dict([(r.fingerprint, r.bandwidth) for r in parse_file(consensus_file)]) except IOError: pass # file does not exist (possible situation) and iterate # next file to read cur_datetime += time_interval # iterate over base consensuses for frac_relays, frac_cw cur_datetime = initial_time_info_bound while cur_datetime < final_time_info_bound: cur_filepath = filepath_from_time(cur_datetime) # current cur_filename = os.path.basename(cur_filepath) # current # find base data, if data exists if cur_filename in router_data: base_routers = router_data[cur_filename]
def main(consensuses, exit_lists): exits = {} now = datetime.now(tzlocal()) for f in consensuses: # strip -consensus d = f[:-10] # consensus from t hours ago p = parse(d).replace(tzinfo=tzutc()) t = get_hours(now - p) # read in consensus and store routes in exits for router in parse_file("data/consensuses/" + f, "network-status-consensus-3 1.0", validate=False): if router.fingerprint in exits: continue r = Router(router, t) if r.IsAllowed: for x in router.exit_policy._get_rules(): r.Rules.append({ "IsAddressWildcard": True, "Address": "", "Mask": "", "IsAccept": x.is_accept, "MinPort": x.min_port, "MaxPort": x.max_port }) exits[router.fingerprint] = r # get a corresponding exit list m = [x for x in exit_lists if x.startswith(d[:-5])] if len(m) == 0: continue # update exit addresses with data from TorDNSEL for descriptor in parse_file("data/exit-lists/" + m[0], "tordnsel 1.0", validate=False): e = exits.get(descriptor.fingerprint, None) if e is not None: if e.Tminus == t: e.Address = [] for a in descriptor.exit_addresses: if a[0] not in e.Address: e.Address.append(a[0]) # update all with server descriptor info for descriptor in parse_file("data/cached-descriptors", "server-descriptor 1.0", validate=False): if descriptor.fingerprint in exits: r = exits[descriptor.fingerprint] r.IsAllowed = descriptor.exit_policy.is_exiting_allowed() if r.IsAllowed: rules = [] for x in descriptor.exit_policy._get_rules(): is_address_wildcard = x.is_address_wildcard() mask = "" if not is_address_wildcard: address_type = x.get_address_type() if (address_type == AddressType.IPv4 and x._masked_bits != 32) or \ (address_type == AddressType.IPv6 and x._masked_bits != 128): mask = x.get_mask() rules.append({ "IsAddressWildcard": is_address_wildcard, "Address": "" if x.address is None else x.address, "Mask": "" if mask is None else mask, "IsAccept": x.is_accept, "MinPort": x.min_port, "MaxPort": x.max_port }) r.Rules = rules # output exits to file with open("data/exit-policies", "w") as exit_file: for e in exits: if exits[e].IsAllowed: exit_file.write(json.dumps(exits[e].__dict__) + "\n")
for relay in relays: relay_ips[relay] = {} pathnames = [] for dirpath, dirnames, fnames in os.walk(in_consensuses_dir, followlinks=True): for fname in fnames: pathnames.append(os.path.join(dirpath,fname)) pathnames.sort() for pathname in pathnames: filename = os.path.basename(pathname) if (filename[0] == '.'): continue print('Processing consensus file {0}'.format(filename)) cons_valid_after = None cons_f = open(pathname, 'rb') for r_stat in sd.parse_file(cons_f, validate=False): if (cons_valid_after == None): cons_valid_after = r_stat.document.valid_after # compute timestamp version once here valid_after_ts = pathsim.timestamp(cons_valid_after) if (r_stat.fingerprint in relay_ips): if (r_stat.address not in relay_ips[r_stat.fingerprint]): relay_ips[r_stat.fingerprint][r_stat.address] = [valid_after_ts] else: relay_ips[r_stat.fingerprint][r_stat.address].append(valid_after_ts) for relay in relays: print('{0}:'.format(relay), end='') for ip in relay_ips[relay]: print('\t{0}'.format(ip), end='')
""" Parses the cached consensus of a running Tor instance from file and outputs the hidden service directories as a CSV file. Useful for determining the responsible hidden services directories at a particular time - Donncha O' Cearbhaill - [email protected] PGP: 0xAEC10762 """ from stem.descriptor import parse_file, DocumentHandler from stem.descriptor.router_status_entry import RouterStatusEntryV3, _decode_fingerprint import base64 with open('/var/lib/tor/cached-consensus', 'rb') as consensus_file: # Processes the routers as we read them in. The routers refer to a document # with an unset 'routers' attribute. for router in parse_file(consensus_file, 'network-status-consensus-3 1.0', document_handler = DocumentHandler.ENTRIES): if "HSDir" in router.flags: fingerprint_base32 = base64.b32encode(router.fingerprint.decode("hex")) print "'%s','%s','%s','%s'" % (fingerprint_base32, router.nickname, router.fingerprint, router.address)
from stem.descriptor import DocumentHandler, parse_file consensus = next(parse_file( '/tmp/descriptor_dump', descriptor_type = 'network-status-consensus-3 1.0', document_handler = DocumentHandler.DOCUMENT, )) for fingerprint, relay in consensus.routers.items(): print("%s: %s" % (fingerprint, relay.nickname))
#!/usr/bin/env python import json import operator from stem.descriptor import parse_file class Router(): def __init__(self, router): self.Address = router.address self.IsAllowedDefault = router.exit_policy._is_allowed_default self.Rules = [] exits = {} for descriptor in parse_file("public/exit-addresses", "tordnsel 1.0"): descriptor.exit_addresses.sort(key=operator.itemgetter(1), reverse=True) exits[descriptor.fingerprint] = descriptor.exit_addresses[0][0] with open("data/exit-policies", "w") as exit_file: for router in parse_file("data/consensus", "network-status-consensus-3 1.0"): if router.exit_policy.is_exiting_allowed(): r = Router(router) if router.fingerprint in exits: r.Address = exits[router.fingerprint] for x in router.exit_policy._get_rules(): r.Rules.append({ "Address": x.address, "IsAccept": x.is_accept, "MinPort": x.min_port,