Exemplo n.º 1
0
 def __init__(self, article):
     self.article = article
     self.name = None
     self.values = {}
     
     #nested templates; list bestehend aus:
     #{"start":startcursor, "end":endcursor, "template":Template()}
     self.subtemplates = []
     
     #"anonyme" Werte in Vorlagen ({{Vorlage|Wert}})
     self.anonymous = 0
     
     #Setup State Machine
     self.fsm = StateMachine()
     self.fsm.addState("start", self.start_state)
     self.fsm.setStart("start")
     self.fsm.addState("name", self.name_state)
     self.fsm.addState("value", self.value_state)
     self.fsm.addState("end", None, end=True)
     self.fsm.addState("link", self.link_state)
     
     self.p_start = re.compile(r"\{\{")
     self.p_end = re.compile(r"\}\}")
     self.p_val = re.compile(r"\s*\|\s*([^=|}]*)\s*=?\s*([^|}]*)")
     self.p_linkstart = re.compile(r"\[\[")
     self.p_link = re.compile(r"\[\[(.*?)\]\]")
     self.p_slicer = re.compile(r"\|")
     #Marker für nächsten Abschnitt; dh Ende der Vorlage oder nächster Wert
     self.slicers = {
         self.p_end    : "end",
         self.p_slicer : "value",
         self.p_start  : "start",
         self.p_linkstart : "link",
     }
     
     self.fsm.run()
Exemplo n.º 2
0
class Template:

    def __init__(self, article):
        self.article = article
        self.name = None
        self.values = {}
        
        #nested templates; list bestehend aus:
        #{"start":startcursor, "end":endcursor, "template":Template()}
        self.subtemplates = []
        
        #"anonyme" Werte in Vorlagen ({{Vorlage|Wert}})
        self.anonymous = 0
        
        #Setup State Machine
        self.fsm = StateMachine()
        self.fsm.addState("start", self.start_state)
        self.fsm.setStart("start")
        self.fsm.addState("name", self.name_state)
        self.fsm.addState("value", self.value_state)
        self.fsm.addState("end", None, end=True)
        self.fsm.addState("link", self.link_state)
        
        self.p_start = re.compile(r"\{\{")
        self.p_end = re.compile(r"\}\}")
        self.p_val = re.compile(r"\s*\|\s*([^=|}]*)\s*=?\s*([^|}]*)")
        self.p_linkstart = re.compile(r"\[\[")
        self.p_link = re.compile(r"\[\[(.*?)\]\]")
        self.p_slicer = re.compile(r"\|")
        #Marker für nächsten Abschnitt; dh Ende der Vorlage oder nächster Wert
        self.slicers = {
            self.p_end    : "end",
            self.p_slicer : "value",
            self.p_start  : "start",
            self.p_linkstart : "link",
        }
        
        self.fsm.run()
        
    def link_state(self):
        print("cursor: "+str(self.article.cursor))
        raise Exception("link state")
        
    """
    State Machine Handlers
    """
    """Start bzw. bisher keine Vorlage gefunden"""
    def start_state(self):
        start = self.p_start.search(self.article.line)
        if not start:
            try:
                self.article.__next__()
            except StopIteration:
                raise NoTemplate()
            return "start"
            
        cursor = { "line" : self.article.cursor["line"] }
        cursor["char"] = start.span()[1] + self.article.cursor["char"]
        self.article.cursor = cursor
        return "name"
        
        
    """Name der Vorlage"""
    def name_state(self):
        line = self.article.line
        newState = None
        
        #Hinteren Vorlagenkram abhacken
        startCursor = self.article.cursor
        for slicer in self.slicers:
            match = slicer.search(line)
            if not match:
                continue
            
            line = line[:match.span()[0]]
            self.article.cursor = match.span()[1] + startCursor["char"]
            newState = self.slicers[slicer]
            
        #TODO
        #if self.slicers[slicer] == "start":
        #    raise Exception("template in template name: " + line.rstrip('\n'))
                
        line = line.strip()
        if line == "":
            return "name"
            
        self.name = line.strip()
        
        if newState:
            return newState
            
        #Nächsten Status in nächster Zeile suchen
        newState = None
        while True:
            try:
                line = self.article.__next__()
            except StopIteration:
                raise NoTemplate()

            span = [len(line)+1, len(line)+1]                
            for slicer in self.slicers:
                match = slicer.search(line)
                if not match:
                    continue
                if not match.span()[0] < span[0]:
                    continue

                span = match.span()
                newState = self.slicers[slicer]

                #TODO
                #template name over multiple lines

            if newState:
                c = self.article.cursor["char"] + span[1]
                self.article.cursor = c
                return newState
                
    
    """Vorlageneintrag /-wert; sucht über mehrere Zeilen hinweg"""
    def value_state(self):
        #hinteren Kram abhacken; mehrere Zeilen zusammensammeln
        newState = "continue"
        value = ""
        while True:
            line = self.article.line
            span = None
            for slicer in self.slicers:
                match = slicer.search(line)
                if not match:
                    continue
            
                newState = self.slicers[slicer]
                span = match.span()
                line = line[:span[0]]
                
            value += line
            
            #link parsen [[ ... ]]
            if newState is "link":
                cursor = self.article.cursor
                self.article.cursor = cursor["char"] + span[0]
                del(cursor)
                m = self.p_link.match(self.article.line)
                endCursor = None
                asString = None
                
                #Angetäuschtes [[ ohne Link
                if m is None:
                    endCursor = self.article.cursor
                    endCursor["char"] += 2
                    asString = "[["
                #Echter Link
                else:
                    endCursor = self.article.cursor
                    endCursor["char"] += m.span()[1]
                    asString = self.article.extract(self.article.cursor,endCursor)
                    
                value += asString
                self.article.cursor = endCursor
                
                newState = "continue"
                continue
            
            #nested template
            elif newState is "start":
                cursor = self.article.cursor
                cursor["char"] += span[0]
                self.article.cursor = cursor
                
                template = Template(self.article)
                subt = {"startcursor" : cursor,
                        "template" : template,
                        "endcursor" : self.article.cursor}
                self.subtemplates.append(subt)
                
                asString = self.article.extract(cursor, subt["endcursor"])
                value += asString
                self.article.cursor = subt["endcursor"]
                
                newState = "continue"
                continue
            
            #v.a. Cursor setzen
            elif newState is not "continue":
                self.article.cursor = span[1] + self.article.cursor["char"]
                break
                
            try:
                line = self.article.__next__()
                value += "\n"
            except StopIteration:
                raise NoTemplate()
                
        #value parsen
        split = value.split("=")
        if len(split) > 1:
            value = split[1]
            #mögliche weitere = in value abfangen
            for el in range(2, len(split)):
                value += "=" + split[el]
                
            key = split[0]
            if "{{" in key:
                raise Exception("template in key: "+key)
            self.values[key] = value.strip()
            
        #anonyme values
        else:
            key = 1
            while True:
                if key in self.values:
                    key += 1
                else:
                    break
                    
            self.values[key] = split[0].strip()
            
        return newState