def inner_loop(artifact_name): web_interface = WebDB() symbols = web_interface.get_artifact_symbols(artifact_name) all_symbol_tables = {} for top_level_name, keys in groupby(sorted(symbols), lambda x: x.partition(".")[0].lower()): print(top_level_name) # carve out for star imports which don't have dots if top_level_name == "*": continue # download the existing symbol table symbol_table_with_metadata = web_interface.get_symbol_table( top_level_name) symbol_table = symbol_table_with_metadata.get("symbol table", {}) metadata = symbol_table_with_metadata.get("metadata", {}) # update the symbol table for k in list(symbols): symbol_table.setdefault(k, []).append(artifact_name) # add artifacts to metadata metadata["version"] = version metadata.setdefault("indexed artifacts", []).append(artifact_name) # push back to server web_interface.push_symbol_table(top_level_name, { "symbol table": symbol_table, "metadata": metadata }) all_symbol_tables[top_level_name] = symbol_table return all_symbol_tables
def inner_loop(artifact_name): web_interface = WebDB() symbols = web_interface.get_artifact_symbols(artifact_name) all_symbol_tables = {} for top_level_name, keys in groupby(sorted(symbols), lambda x: x.partition(".")[0].lower()): print(top_level_name) # carve out for star imports which don't have dots if top_level_name == "*": continue # download the existing symbol table metadata metadata = web_interface.get_symbol_table_metadata( top_level_name=top_level_name) if artifact_name in metadata.get("indexed artifacts", []): continue # download the existing symbol table symbol_table_with_metadata = web_interface.get_symbol_table( top_level_name) symbol_table = symbol_table_with_metadata.get("symbol table", {}) # update the symbol table for k in list(keys): symbol_table_entry_value = {"artifact name": artifact_name} shadows = symbols[k].get("data", {}).get("shadows") if shadows: symbol_table_entry_value.update(shadows=shadows) symbol_table.setdefault(k, []).append(symbol_table_entry_value) # add artifacts to metadata metadata["version"] = version metadata.setdefault("indexed artifacts", []).append(artifact_name) # push back to server try: web_interface.push_symbol_table(top_level_name, { "symbol table": symbol_table, "metadata": metadata }) except requests.RequestException as e: print(e) all_symbol_tables[top_level_name] = symbol_table return all_symbol_tables
"""tools for matching the volumes with artifacts that supply the symbols""" from concurrent.futures._base import as_completed from concurrent.futures.thread import ThreadPoolExecutor from itertools import groupby from symbol_exporter.ast_symbol_extractor import builtin_symbols from symbol_exporter.db_access_model import WebDB web_interface = WebDB() def get_supply(top_level_import, v_symbols, get_symbol_table_func=web_interface.get_symbol_table): supplies = None bad_symbols = set() symbol_table = get_symbol_table_func(top_level_import) # TODO: handle star imports recursion here? for v_symbol in v_symbols: supply = symbol_table.get(v_symbol) if not supply: bad_symbols.add(v_symbol) continue if supplies is None: supplies = set(supply) else: supplies &= set(supply) return supplies or set(), bad_symbols def find_supplying_version_set(
for k in list(symbols): symbol_table.setdefault(k, []).append(artifact_name) # add artifacts to metadata metadata["version"] = version metadata.setdefault("indexed artifacts", []).append(artifact_name) # push back to server web_interface.push_symbol_table(top_level_name, { "symbol table": symbol_table, "metadata": metadata }) all_symbol_tables[top_level_name] = symbol_table return all_symbol_tables if __name__ == "__main__": web_interface = WebDB() extracted_artifacts = web_interface.get_current_symbol_table_artifacts() all_artifacts = web_interface.get_current_extracted_pkgs().values() artifacts_to_index = list(set(all_artifacts) - set(extracted_artifacts)) print(f"Number of artifacts to index: {len(artifacts_to_index)}") # The shuffle here is to try to not have two threads running on the same symbol table json at once if possible shuffle(artifacts_to_index) pool = ThreadPoolExecutor() # Note that this is a race condition here, two threads could try to write to the same symbol table # however one of those will win so next round there will be one added safely and this continues # until none are left to be added print("issuing futures") futures = { pool.submit(inner_loop, artifact_name): artifact_name
except requests.RequestException as e: print(e) all_symbol_tables[top_level_name] = symbol_table return all_symbol_tables def invert_dict(d: dict): return_dict = defaultdict(set) for k, v in d.items(): for vv in v: return_dict[vv].add(k) return dict(return_dict) if __name__ == "__main__": web_interface = WebDB() indexed_artifacts_by_top_symbol = web_interface.get_current_symbol_table_artifacts_by_top_level( ) all_artifacts = web_interface.get_all_extracted_artifacts() with Client(threads_per_worker=100): compute = db.from_sequence(all_artifacts).map( web_interface.get_top_level_symbols).compute() all_symbols_by_artifact = {k: v for k, v in zip(all_artifacts, compute)} all_artifacts_by_symbol = invert_dict(all_symbols_by_artifact) artifacts_to_index = set() for symbol, artifacts_set in all_artifacts_by_symbol.items(): artifacts_to_index.update( artifacts_set - indexed_artifacts_by_top_symbol.get(symbol.lower(), set())) artifacts_to_index = list(artifacts_to_index)