def getdefisandtrefis(): displayedgraph = graph.Graph() displayedfiles = graph.add_omgroup_data(mathhub_dir, root_repo, root_doc, displayedgraph, ONLY_COVERED_PART) displayedfiles += displayedgraph.module_nodes logger = harvest.SimpleLogger(2) ctx = harvest.HarvestContext(logger, harvest.DataGatherer(), mathhub_dir) newfiles = displayedfiles[:] while newfiles: for filename in newfiles: if not os.path.isfile(filename): print("File " + filename + " doesn't exist") continue ctx.repo = harvest.split_path_repo_doc(filename)[1] ctx.gatherer.push_repo(ctx.repo, ctx) root, name = os.path.split(filename) harvest.harvest_file(root, name, ctx) newfiles = [] for inp in ctx.gatherer.mhinputrefs: destfile = inp["dest_path"] if destfile not in displayedfiles: newfiles.append(destfile) displayedfiles.append(destfile) return (ctx.gatherer.defis, ctx.gatherer.trefis)
def getdefisandtrefis(): displayedgraph = graph.Graph() displayedfiles = graph.add_omgroup_data(mathhub_dir, root_repo, root_doc, displayedgraph, ONLY_COVERED_PART) displayedfiles += displayedgraph.module_nodes logger = harvest.SimpleLogger(2) ctx = harvest.HarvestContext(logger, harvest.DataGatherer(), mathhub_dir) newfiles = displayedfiles[:] while newfiles: for filename in newfiles: if not os.path.isfile(filename): print("File " + filename + " doesn't exist") continue ctx.repo = os.path.join(mathhub_dir, harvest.split_path_repo_doc(filename)[0]) ctx.gatherer.push_repo(ctx.repo, ctx) root, name = os.path.split(filename) harvest.harvest_file(root, name, ctx) newfiles = [] fringe = [m["dest_path"] for m in ctx.gatherer.mhinputrefs] if BIG: fringe += [m["dest_path"] for m in ctx.gatherer.importmhmodules] # both signature and language bindings? fringe += [ os.path.join(mathhub_dir, m["dest_repo"], "source", m["dest_mod"] + "." + args.LANGUAGE + ".tex") for m in ctx.gatherer.gimports ] fringe += [ os.path.join(mathhub_dir, m["dest_repo"], "source", m["dest_mod"] + ".tex") for m in ctx.gatherer.gimports ] for destfile in fringe: if destfile not in displayedfiles: newfiles.append(destfile) displayedfiles.append(destfile) return (ctx.gatherer.defis, ctx.gatherer.trefis)
help="git repo or higher level directory which is debugged") args = parser.parse_args() verbosity = args.verbosity if args.emacs: import datetime emacs_bufferpath = "/tmp/lmh_debug-" + str( datetime.datetime.now()).replace(" ", "T") + ".log" logger = EmacsLogger(verbosity, emacs_bufferpath) else: logger = harvest.SimpleLogger(verbosity) logger.log("GATHERING DATA\n", minverbosity=2) mathhub_dir = harvest.get_mathhub_dir(args.DIRECTORY[0]) ctx = harvest.HarvestContext(logger, harvest.DataGatherer(), mathhub_dir) for directory in args.DIRECTORY: harvest.gather_data_for_all_repos(directory, ctx) logger.log("\n\nCHECKING DATA\n", minverbosity=2) check_data(ctx.gatherer, verbosity, logger) if args.incomplete_mhmodnl: logger.log("\n\nLOOKING FOR MISSING VERBALIZATIONS IN MHMODNLs\n", minverbosity=2) check_mvx(ctx.gatherer, logger) mv_langs = args.missing_verbalizations if not mv_langs: mv_langs = [] all_langs = sorted(list(set([e["lang"] for e in ctx.gatherer.langfiles]))) if "all" in mv_langs:
import lmh_harvest as harvest import sys PATH = sys.argv[1] VERBOSITY = 1 gatherer = harvest.DataGatherer() logger = harvest.SimpleLogger(VERBOSITY) harvest.gather_data_for_all_repos(PATH, harvest.HarvestContext(logger, gatherer)) for symi in gatherer.symis: if "gfc" in symi["params"]: print("I found a symi with gfc:") print(" ", symi) for defi in gatherer.defis: if "gfa" in defi["params"]: print("I found a defi with gfa:") print(" ", defi)
""" Very experimental tool to generate a GF (Grammatical Framework) Lexicon from SMGloM """ import lmh_harvest as harvest import sys directories = sys.argv[1:] mathhub_dir = harvest.get_mathhub_dir(directories[0]) ctx = harvest.HarvestContext(harvest.SimpleLogger(2), harvest.DataGatherer(), mathhub_dir) for directory in directories: harvest.gather_data_for_all_repos(directory, ctx) symi_dict = {} for symi in ctx.gatherer.symis: symb = symi["mod_name"].replace("-", "_") + "_" + symi["name"].replace( "-", "_") if "gfc" in symi["params"]: symi_dict[symb] = symi["params"]["gfc"] def umlautSubst(s): return s.replace("\"a", "ä")\ .replace("\"o", "ö")\ .replace("\"u", "ü")\ .replace("\"A", "Ä")\ .replace("\"O", "Ö")\ .replace("\"U", "Ü")\ .replace("\"s", "ß")
def fill_graph(mathhub, root_repo, root_doc, graph, onlycovered = False): potential_modules = add_omgroup_data(mathhub, root_repo, root_doc, graph, onlycovered) blocked_nodes = potential_modules[:] logger = harvest.SimpleLogger(2) potential_nodes = {} potential_edges = [] gimports = [] while potential_modules: gatherer = harvest.DataGatherer() context = harvest.HarvestContext(logger, gatherer, mathhub) for pm in potential_modules: context.repo = "/".join(pm.split("/")[:mathhub.count("/")+3]) # TODO: platform independence path = pm root, filename = os.path.split(path) try: harvest.harvest_file(root, filename, context) except FileNotFoundError: print("couldn't find '" + path + "'") for mod in gatherer.modules: node = mod["path"] if node not in potential_nodes.keys(): name = mod["mod_name"] if not name: name = os.path.split(node)[1][:-4] potential_nodes[node] = {"label" : name, "type" : "module"} for mod in gatherer.langfiles: node = mod["path"] if node not in potential_nodes.keys(): name = mod["mod_name"] if not name: name = os.path.split(node)[1][:-4] potential_nodes[node] = {"label" : name, "type" : "langfile"} for file_ in gatherer.textfiles: node = file_["path"] if node not in potential_nodes.keys(): potential_nodes[node] = {"label" : os.path.split(node)[1], "type" : "text"} assert not gatherer.sigfiles potential_modules = [] # includes text files for inp in gatherer.mhinputrefs: destnode = inp["dest_path"] if destnode not in blocked_nodes: blocked_nodes.append(destnode) potential_modules.append(destnode) potential_edges.append((inp["path"], destnode)) for imp in gatherer.importmhmodules: gimports.append((imp["path"], imp["dest_path"])) graph.g_edges[gimports[-1]] = {"type":{"importmhmodule":"import","usemhmodule":"use"}[imp["type"]]} for gimport in gatherer.gimports: gimports.append((gimport["path"], os.path.join(gimport["dest_repo"], "source", gimport["dest_mod"]) + ".tex")) graph.g_edges[gimports[-1]] = {"type":{"gimport":"import","guse":"use"}[gimport["type"]]} for node in potential_nodes.keys(): graph.module_nodes[node] = { "label" : potential_nodes[node]["label"], "type" : potential_nodes[node]["type"], } for start, end in potential_edges: if start in potential_nodes.keys() and end in potential_nodes.keys(): graph.module_edges[(start, end)] = {} ## handle gimports assert graph.g_nodes == {} while gimports: gatherer = harvest.DataGatherer() context = harvest.HarvestContext(logger, gatherer, mathhub) for source, dest in gimports: if dest not in graph.g_nodes.keys() and dest not in potential_nodes.keys(): context.repo = "/".join(dest.split("/")[:mathhub.count("/")+3]) # TODO: platform independence root, filename = os.path.split(dest) try: harvest.harvest_file(root, filename, context) except FileNotFoundError: print("couldn't find '" + dest + "'") assert not gatherer.langfiles assert not gatherer.textfiles for mod in gatherer.modules + gatherer.sigfiles: node = mod["path"] if node not in potential_nodes.keys() and node not in graph.g_nodes.keys(): name = mod["mod_name"] if not name: name = os.path.split(node)[1][:-4] graph.g_nodes[node] = {"label" : name, "type" : "module"} gimports = [] for gimport in gatherer.gimports: pair = (gimport["path"], os.path.join(gimport["dest_repo"], "source", gimport["dest_mod"]) + ".tex") graph.g_edges[pair] = {"type":{"gimport":"import","guse":"use"}[gimport["type"]]} if pair[1] not in graph.g_nodes.keys() and pair[1] not in potential_nodes.keys(): gimports.append(pair) for imp in gatherer.importmhmodules: pair = (imp["path"], imp["dest_path"]) graph.g_edges[pair] = {"type":{"importmhmodule":"import","usemhmodule":"use"}[imp["type"]]} if pair[1] not in graph.g_nodes.keys() and pair[1] not in potential_nodes.keys(): gimports.append(pair)