示例#1
0
def get_url(entry: Entry):
    path = entry.get_path()
    major = Entry.normalize(path[1])  # cut off wiki root
    assert (major in wiki_pages)
    minor = path[-1]

    return create_wiki_link(major, minor)
示例#2
0
    def switch_context_by_level(
        self, title
    ):  # go up until "title" is a child of working_node (concerning levels)
        while True:
            level_old = Entry.level_of_title(self.working_node.print_name)
            level_new = Entry.level_of_title(title)

            if level_old >= level_new:
                self.working_node = self.working_node.parent
            else:
                break
示例#3
0
    def extract_and_replace_links(text, wiki):
        links = []

        # ms = re.findall(r"\[(.+?)\]\((.+?)\)", text)
        # for m in ms:
        #    title = m.group(1)
        #    url = m.group(2)
        #    full = m.group(0)
        #    if url.strip().startswith("#"):
        #        links.append((title, url, full))

        ms = re.findall(r"\[\[(.+?)(\|(.+?))?\]\]", text)
        for m in ms:
            full = m.group(0)
            url = m.group(1)
            title = m.group(3) or url
            links.append((title, url, full))

        output = []

        for title, url, full in links:
            alias = Entry.normalize(url)
            entry = wiki.find_child(alias)
            if entry is None:  # couldn't resolve
                continue
            link = Link.from_entry(entry)
            output.append(link)
            text = text.replace(full, repr(link))
        return text, output
示例#4
0
def parse_comments():
    repo.git.checkout("comments")

    files_comm = glob.glob(f"./{reddit_crawl.submodule_repo}/{thread_id}/*.md")
    files_wiki = glob.glob(f"./{reddit_crawl.submodule_repo}/wiki/*.md")

    wiki = Entry.create_wiki_root()
    wiki_parser.parse_entries_and_insert_with_overwrite(
        wiki, files_wiki, "wiki")
    wiki_parser.parse_entries_and_insert_with_overwrite(
        wiki, files_comm, "comment")

    wiki.sort_children_recursive()

    wiki_pages = wiki_parser.split_into_files(wiki)

    for page, node in wiki_pages.items():
        with open(wiki_reader.file_wiki_page(page), "w") as file:
            if node is not None:
                content = node.to_string(short=False)
            else:
                content = ""
            file.write(content)

    repo.git.add(f'./wiki')
    repo.git.commit("--allow-empty", m=f"Parsed-{commit_msg}")
    print(repo.git.status())

    return wiki
示例#5
0
    def parse_tree(self):
        flat = []
        while True:
            e = self.parse_entry()
            if e is None:  # no entry is coming
                break
            else:
                flat.append(e)

        root = Entry("root")  # level 0 root
        branch = [root]

        while len(flat) > 0:
            node = flat.pop(0)
            level_new = node.level()
            while len(branch) > 1:  # cut back the branch if necessary
                level_old = branch[-1].level()
                if level_new <= level_old:  # cut
                    branch = branch[:-1]
                else:  # no cut
                    break
            parent = branch[-1]
            parent.add_child(node)
            branch.append(node)

        return root
示例#6
0
def parse_wiki():
    wiki = Entry.create_wiki_root()
    for p in wiki_pages:
        content = fetch_wiki_page(p)
        WikiParser.take_and_parse(content.splitlines(), None, wiki)
    wiki.sort_children_recursive()
    return wiki
示例#7
0
def split_into_files(wiki: Entry):
    wiki_pages = RedditWikiReader.wiki_pages

    topcats = {}

    wiki = wiki.copy()

    for page in wiki_pages:
        topcats[page] = wiki.find_child(page)
        if topcats[page] is not None:
            wiki.children.remove(topcats[page])

    if len(wiki.children) > 0:
        topcats['misc'] = wiki
    else:
        topcats['misc'] = None

    return topcats
示例#8
0
    def end_entry(self):
        if self.entry is None or self.entry[
                'print_name'] is None:  # shouldn't happen thanks to prev. null-checks
            raise RuntimeError("Don't have an entry to end!", self)

        content = "\n".join(self.entry['content'])
        content = Entry.format_content(content)
        info = self.entry['info']
        print_name = self.entry['print_name']
        child = Entry(print_name=print_name,
                      aliases=None,
                      info=info,
                      content=content)

        self.working_node.merge_with(child, replace=True)

        self.mode = "entry-title"
        self.entry = None
示例#9
0
    def start_entry(self, title: str):
        node = self.tree.find_child(title)

        if node is None:
            node = Entry(print_name=title,
                         info=None,
                         content=None,
                         aliases=None)
            self.working_node.add_child(node)
            self.working_node = node
        else:
            if Entry.level_of_title(
                    title) != node.level():  # the two don't fit!
                raise RuntimeError(
                    "Existing Node and New Node collide with different levels!",
                    self)
            # switch to existing node
            self.working_node = node

        self.mode = "entry-info"
        self.entry = self.new_entry()
        self.entry['print_name'] = title
        self.entry['node'] = node
示例#10
0
 def attention_filter(lines):
     filtered = []
     active = False
     for line in lines:
         cat = WikiParser.line_attention_start(line)
         end = WikiParser.line_attention_end(line)
         if cat is not None:
             cat = Entry.normalize(cat)
             active = True
             filtered.append("# " + cat)  # top-level context switch
         elif end is True:
             assert (active is True)
             active = False
         else:
             if active:
                 filtered.append(line)
             else:
                 continue
     return filtered
示例#11
0
    def parse_entry(self):
        old_ptr = self.pointer

        self.skip_empty_lines()
        if self.ended():
            self.pointer = old_ptr
            return None  # can't parse, roll back

        name = self.line_title(self.ptr())
        if name is None:
            self.pointer = old_ptr
            return None  # can't parse, roll back
        else:
            self.inc()

        self.skip_empty_lines()
        if not self.ended():
            info = self.line_entry_info(self.ptr())
            if info is not None:
                self.inc()
            else:
                info = self.default_info(name)
        else:
            info = self.default_info(name)

        content = ""
        while not self.ended() and self.line_content(self.ptr()):
            content += self.ptr() + "\n"
            self.inc()

        entry = Entry(print_name=name,
                      aliases=None,
                      parent=None,
                      children=None,
                      info=info,
                      content=content)

        return entry
示例#12
0
 def merge_into(dst: Entry,
                src: Entry):  # jump over missing layers wherever possible
     opt = dst.find_children(src.print_name)
     if len(opt) == 1 and opt[0] != dst:  # jump downwards the branch
         WikiParser.merge_into(opt[0], src)
         return
     elif len(opt) == 1 and opt[0] == dst:  # we arrived
         dst.merge_with_no_recursion(src)
         for c in src.children:
             WikiParser.merge_into(dst, c)
         return
     elif len(opt) >= 2:  # dst was illegal
         return RuntimeError
     else:  # nothing found, let's make a new destination and use it
         trg = src.copy_no_children()
         dst.add_child(trg)
         WikiParser.merge_into(trg, src)
         return
示例#13
0
from wiki import Entry
import wiki_parser
import reddit_crawl
import glob

# files = reddit_crawl.crawl_all(reddit_crawl.test_url, 8)
files_comm = glob.glob("./data/fhftm0/*.md")
files_wiki = glob.glob("./data/wiki/*.md")

wiki = Entry.create_wiki_root()
wiki_parser.parse_entries_and_insert_with_overwrite(wiki, files_wiki, "wiki")
wiki_parser.parse_entries_and_insert_with_overwrite(wiki, files_comm,
                                                    "comment")

wiki_pages = wiki_parser.split_into_files(wiki)
示例#14
0
def write_wiki(wiki: Entry):
    for p in wiki_pages:
        node = wiki.find_child(p)
        if node is None:
            continue
        write_wiki_page(p, repr(node))
示例#15
0
 def default_info(self, title):
     level = Entry.level_of_title(title)
     if level <= 2:  # part of the standard tree
         return None
     else:
         return self.default_info_string
示例#16
0
 def get_player(self, name: str):
     for player in self.players:
         if Entry.normalize(player.name) == Entry.normalize(name):
             return player
     return None
示例#17
0
 def __init__(self, file):
     objs = json.loads(file)
     self.players = [Player(obj) for obj in objs]
     self.players.sort(key=lambda p: Entry.normalize(p.name))
示例#18
0
 def from_entry(entry: Entry):
     title = entry.reference_name()
     url = reddit_utils.get_url(entry)
     tooltip = Link.simplify_tooltip(entry.content)
     return Link(title, url, tooltip)