def save_ip_mapping(): account_ips = get_transfers_to_validation_account() SCRIPT_DIR = Path(__file__).parent.absolute() DATA_FOLDER = SCRIPT_DIR / "data" VALIDATION_FILE = DATA_FOLDER / "validate_ip.pbz2" save_bz2_pickle(account_ips, VALIDATION_FILE)
def get_all_deploys(): """ retrieves all deploys on chain and caches will be REALLY slow with large downloads as calls are throttled. Key "last_height" stores last_height of block deploys have been sync up to. """ ANNOUNCE_INTERVAL = 100 cached_deploys_file = DATA_PATH / "deploy_cache.pbz2" if Path.exists(cached_deploys_file): deploys = load_bz2_pickle(cached_deploys_file) else: deploys = {} cur_height = 0 cache_height = deploys.get("last_height", 0) blocks = get_all_blocks() print( f"Downloading deploys from block height {cache_height} to {blocks[-1]['header']['height']}" ) announce_height = cache_height + ANNOUNCE_INTERVAL for block in blocks[cache_height:]: cur_height = block["header"]["height"] if cur_height < cache_height: continue for deploy_hash in block["header"]["deploy_hashes"]: if deploy_hash not in deploys.keys(): deploys[deploy_hash] = get_deploy(deploy_hash) if block["header"]["height"] == announce_height: print(f"At block {announce_height}") announce_height += ANNOUNCE_INTERVAL deploys["last_height"] = cur_height save_bz2_pickle(deploys, cached_deploys_file) return deploys
def get_all_blocks(): """ retrieves all blocks on chain and caches when possible will be REALLY slow with large block downloads as calls are throttled. """ cached_blocks_file = DATA_PATH / "block_cache.pbz2" if Path.exists(cached_blocks_file): blocks = load_bz2_pickle(cached_blocks_file) last_height = blocks[-1]["header"]["height"] else: blocks = [] last_height = -1 block = get_block()["result"]["block"] new_blocks = [] cur_height = block["header"]["height"] print( f"Downloading blocks from cur height: {cur_height} down to cached height: {last_height}." ) for _ in range(cur_height - last_height): new_blocks.append(block) time.sleep(0.1) parent_hash = block["header"]["parent_hash"] if parent_hash != '0000000000000000000000000000000000000000000000000000000000000000': block = get_block(parent_hash)["result"]["block"] new_blocks.reverse() blocks.extend(new_blocks) save_bz2_pickle(blocks, cached_blocks_file) return blocks
def get_all_transfers(): """ retrieves all transfers on chain and caches will be REALLY slow with large downloads as calls are throttled. Key "last_height" stores last_height of block deploys have been sync up to. """ cached_transfers_file = DATA_PATH / "transfer_cache.pbz2" if Path.exists(cached_transfers_file): transfers = load_bz2_pickle(cached_transfers_file) else: transfers = {} cur_height = 0 cache_height = transfers.get("last_height", 0) blocks = get_all_blocks() print( f"Downloading transfers from block height {cache_height} to {blocks[-1]['header']['height']}" ) for block in blocks[cache_height:]: cur_height = block["header"]["height"] if cur_height < cache_height: continue for transfer_hash in block["header"]["transfer_hashes"]: if transfer_hash not in transfers.keys(): transfers[transfer_hash] = get_deploy(transfer_hash) transfers["last_height"] = cur_height save_bz2_pickle(transfers, cached_transfers_file) return transfers
def save_data(network: Network): print(f"Network {network.name}") network_folder = DATA_FOLDER / network.name nodes_file = network_folder / "nodes" / f"nodes_{int(datetime.now().timestamp())}.pbz2" latest_file = network_folder / "nodes_latest.pbz2" graph_file = network_folder / "graph_latest.png" graph_ips_latest = network_folder / "ips_latest.csv" network_info_file_path = network_folder / "network_info.pbz2" (network_folder / "nodes").mkdir(parents=True, exist_ok=True) print("Getting key weight") key_weight = get_last_auction_era_key_weight( f"http://{network.ips[0]}:7777") total_weight = sum(key_weight.values()) print("Getting nodes from spider") spider = Spider(network.ips, network.name) spider.get_all_nodes() for node in spider.nodes.values(): key = node["our_public_signing_key"] if key in key_weight: node["weight"] = key_weight.get(key, 0) node["weight_percent"] = round(node["weight"] / total_weight * 100, 3) save_bz2_pickle(spider.nodes, nodes_file) save_bz2_pickle(spider.nodes, latest_file) print("Graphing nodes") ip_list = graph_nodes(spider.nodes, graph_file, network_info_file_path) graph_ips_latest.write_text('\n'.join( [f"{ip},{index}" for index, ip in ip_list]))
def geo_data_for_ip(ip) -> dict: geo_data = load_bz2_pickle(GEO_DATA) if GEO_DATA.exists() else {} if ip not in geo_data: geo_response = requests.get(f"http://api.ipstack.com/{ip}?access_key={ACCESS_KEY}") if geo_response.status_code == 200: geo_data[ip] = geo_response.json() save_bz2_pickle(geo_data, GEO_DATA) return geo_data[ip]
def save_network_info(g, ip_index, nodes, network_info_file_path): peer_count = defaultdict(int) two_way_count = defaultdict(int) for node in nodes.values(): # filter out peers from other network peers = len(set(node["peers"]).intersection(set(nodes.keys()))) two_ways = len(node["two_way_peers"]) peer_count[peers] += 1 two_way_count[two_ways] += 1 combined_count = [] full_counts = set(peer_count.keys()).union(set(two_way_count.keys())) for count in sorted(full_counts, reverse=True): combined_count.append( (count, peer_count.get(count, 0), two_way_count.get(count, 0))) path_len = defaultdict(int) for ip in ip_index.keys(): for nip in ip_index.keys(): try: id_a = ip_index[ip] id_b = ip_index[nip] except KeyError: continue if id_a == id_b: continue if id_a not in g: continue if id_b not in g: continue # This is really slow path_len[nx.shortest_path_length(g, id_a, id_b)] += 1 path_count = [ (path, count) for path, count in sorted(path_len.items(), key=lambda d: d[0]) ] save_bz2_pickle( { "node_count": len(nodes.keys()), "peer_count": combined_count, "path_count": path_count }, network_info_file_path)
def get_all_era_info(): cached_era_info_file = DATA_PATH / "era_info.pbz2" if Path.exists(cached_era_info_file): era_info = load_bz2_pickle(cached_era_info_file) last_era = max(era_info.keys()) else: era_info = {} last_era = -1 blocks = get_all_blocks() print( f"Downloading era data from {last_era} to {blocks[-1]['header']['era_id']}" ) last_block_hash = blocks[0]["hash"] for block in blocks: cur_era = block["header"]["era_id"] if last_era < cur_era: last_era = cur_era era_info_by_switch = get_era_info_by_switch_block(last_block_hash) era_info[cur_era] = era_info_by_switch["result"]["era_summary"] last_block_hash = block["hash"] save_bz2_pickle(era_info, cached_era_info_file) return era_info
if not DATA_FOLDER.exists(): raise Exception(f"{DATA_FOLDER} does not exist.") def load_ips(): try: if IPS_FILE.exists(): return load_bz2_pickle(IPS_FILE) return INTERNAL_NODES except Exception: # Should overwrite bad file after generating new ip list return INTERNAL_NODES ips = load_ips() spider = Spider(ips) spider.get_all_nodes() save_bz2_pickle(list(spider.nodes.keys()), IPS_FILE) save_bz2_pickle(spider.nodes, NODES_FILE) save_bz2_pickle(spider.nodes, LATEST_FILE) ip_list = graph_nodes(spider.nodes, GRAPH_FILE) GRAPH_IPS_LATEST.write_text('\n'.join( [f"{ip},{index}" for index, ip in ip_list])) # Cache blocks and deploys cache_all()
from pickle_util import load_pickle, save_bz2_pickle from pathlib import Path SCRIPT_DIR = Path(__file__).parent.absolute() DATA_DIR = SCRIPT_DIR / "data" for file in DATA_DIR.glob('*.pickle'): data = load_pickle(file) save_bz2_pickle(data, file.with_suffix(".pbz2"))