Exemplo n.º 1
0
def traverseInOrder(item):
    result = TypeCheckList(Tree)
    result.append(traverseInOrder(item.childlist[0]))
    result.append(item)
    for childindex in range(1, len(item.childlist)):
        result += traverseInOrder(item.childlist[childindex])
    return result
Exemplo n.º 2
0
def traversePreOrder(item):
    from pydsl.Abstract import TypeCheckList
    result = TypeCheckList(Tree)
    result.append(item)
    for child in item.childlist:
        result += traversePreOrder(child)
    return result
Exemplo n.º 3
0
 def split(self):
     """splits a result"""
     result = TypeCheckList(ParseTree)
     for symbol in self.symbollist:
         currentlist = self.childlist
         while len(currentlist) > 1 and len(currentlist[0].symbollist) > 1 and currentlist[0].symbollist[0] != symbol:
             for dpr in currentlist:
                 if symbol in dpr.symbollist:
                     currentlist = dpr.childlist
                     break
             break #Not found
         assert(len(currentlist < 2))
         dpr = currentlist[0]
         currentsymbol = dpr.symbollist[0]
         if len(dpr.symbollist) > 0:
             pass
             #Ir eliminando los simbolos de alrededor hasta que se quede solo el que nos interesa junto a su word
         if currentsymbol != symbol:
             #Not found
             raise Exception
         result.append(dpr)
     return result
Exemplo n.º 4
0
def terminal_symbol_reducer(symbol, word, production):
    """ Reduces a terminal symbol """
    from pydsl.Abstract import TypeCheckList
    from pydsl.Grammar.Tree import ParseTree
    if not isinstance(word, str):
        word = str(word)
    validresults = TypeCheckList(ParseTree)
    if symbol.boundariesrules.policy == "min":
        LOG.debug("terminal_symbol_reducer: policy: min")
        for begin in range(0, len(word)):
            for end in range(begin, len(word)+1):
                if symbol.check(word[begin:end]):
                    LOG.debug("terminal_symbol_reducer: parsed:"+ str(word[begin:end]))
                    validresults.append(ParseTree(begin, end, [symbol], word[begin:end], production))
                    break #found the smallest valid symbol at begin
    elif symbol.boundariesrules.policy == "max":
        LOG.debug("terminal_symbol_reducer: policy: max")
        for begin in range(0, len(word)):
            maxword = 0
            for end in range(begin, len(word)+1):
                if symbol.check(word[begin:end]):
                    LOG.debug("terminal_symbol_reducer: parsed:"+ str(word[begin:end]))
                    maxword = end
            if maxword > 0:
                validresults.append(ParseTree(begin, maxword, [symbol], word[begin:maxword], production))
    elif symbol.boundariesrules.policy == "fixed":
        LOG.debug("terminal_symbol_reducer: policy: fixed")
        size = symbol.boundariesrules.size
        for begin in range(0, len(word)):
            if symbol.check(word[begin:begin + size]):
                LOG.debug("__auxReducer: parsed:"+ str(word[begin:begin + size]))
                validresults.append(ParseTree(begin, begin + size, [symbol], word[begin:begin + size], production))
    else:
        LOG.warning("terminal_symbol_reducer: Unknown size policy")
        return TypeCheckList(ParseTree)
    return validresults
Exemplo n.º 5
0
    def __recursive_parser(self, onlysymbol, data, production, showerrors:bool = False, recurssions:int = 0):
        import pdb
        #pdb.set_trace()
        """ Aux function. helps check_word"""
        LOG.debug("__recursive_parser: Begin ")
        if not isinstance(data, str):
            data = str(data)
        if recurssions > 16*len(data):
            LOG.debug("RECURSSION LIMIT")
            return TypeCheckList(ParseTree)
        if len(data) == 0:
            return TypeCheckList(ParseTree)
        #if(len(data) < 4):
        #    import pdb
        #    pdb.set_trace()
        from ..Symbol import TerminalSymbol, NullSymbol, NonTerminalSymbol
        if isinstance(onlysymbol, TerminalSymbol):
            #Locate every ocurrence of word and return a set of results. Follow boundariesrules
            LOG.debug("Iteration: terminalsymbol")
            sproduction = self._productionset.getProductionsBySide([onlysymbol])[0]
            result = terminal_symbol_consume(onlysymbol, data, sproduction )
            if showerrors and not result:
                LOG.debug("error symbolo: " + str(onlysymbol))
                #print("ERROR!: " + str(data) + str(onlysymbol))
                return TypeCheckList(ParseTree, [ParseTree(0,len(data), [onlysymbol] , data, sproduction, valid = False)])
            return result
        elif isinstance(onlysymbol, NonTerminalSymbol):
            result = TypeCheckList(ParseTree)
            tmpresults = []
            invalidlist = []
            for alternative in self._productionset.getProductionsBySide([onlysymbol]):
                alternativetree = RecursiveDescentResultTree(None)
                alternativesuccess = True
                alternativeinvalidlist = []
                for symbolindex, symbol in enumerate(alternative.rightside):
                    symbolsuccess = False
                    for totalpos in alternativetree.last_poss():
                        if totalpos >= len(data):
                            LOG.debug("Alternative length problem:" + str(alternative) + " Recursion: "+ str(recurssions) + " INTENTO SIMBOLO " + str(symbolindex) + ":" + str(symbol) +  " Input: "+ str(data) +" SHIFT " + str(totalpos))
                            continue
                        LOG.debug("Alternative:" + str(alternative) + " Recursion: "+ str(recurssions) + " INTENTO SIMBOLO " + str(symbolindex) + ":" + str(symbol) +  " Input: "+ str(data) +" SHIFT " + str(totalpos))
                        if symbol == onlysymbol:
                            #recurssions += 1
                            pass
                        thisresult =  self.__recursive_parser(symbol, data[totalpos:], alternative, showerrors, recurssions+1)
                        #pdb.set_trace()
                        #print("RECIBE: " + str([(str(x.leftpos),str(x.rightpos)) for x in thisresult]))
                        allvalids = all([x.valid for x in thisresult])
                        if thisresult and allvalids:
                            LOG.debug("Alternative:" + str(alternative) + " Recursion: "+ str(recurssions) + " Trying Symbol " + str(symbolindex) + ":" + str(symbol) +  " Input: "+ str(data) +" SHIFT " + str(totalpos)+  " results: " + str(thisresult) + "leftpos: " + str(thisresult[0].leftpos) + " rightpos: " + str(thisresult[-1].rightpos))
                            symbolsuccess = True
                            for x in thisresult:
                                x.shift(totalpos)
                                exito = alternativetree.append(x, totalpos)
                                if not exito:
                                    #TODO: Añadir como error al arbol o a otro sitio
                                    LOG.debug("Discarded symbol :" + str(symbol) + " position:" + str(totalpos))
                                else:
                                    LOG.debug("Added symbol :" + str(symbol) + " position:" + str(totalpos))
                        else:
                            for x in thisresult:
                                if not x.valid:
                                    alternativeinvalidlist.append(x)
                    if not symbolsuccess:
                        LOG.debug("Symbol doesn't work" + str(symbol))
                        alternativesuccess = False
                        break #Try another alternative
                    else:
                        LOG.debug("Symbol work at recursion" + str(recurssions) + ": " + str(symbol))
                if not alternativesuccess:
                    continue #A symbol doesn't work, this alternative doesn't neither
                else: 
                    invalidlist += alternativeinvalidlist

                for x in alternativetree.get_lists():
                    LOG.debug("Recurssion" + str(recurssions) + " Adding...",[y.content for y in x])
                    tmpresults.append(x)

            LOG.debug("La iteracion " + str(recurssions) + "result collection finished:" + str(tmpresults))
            for alternative in self._productionset.getProductionsBySide([onlysymbol]):
                for results in tmpresults:
                    allvalids = all([x.valid for x in results])
                    LOG.debug("result: " + str([x.content for x in results]) + ", alternative:" + str(alternative))
                    nullcounter = alternative.rightside.count(NullSymbol())
                    nnullresults = 0
                    for y in [x.symbollist for x in results]:
                        nnullresults += y.count(NullSymbol())
                    #print(len(results),nnullresults,len(alternative.rightside),nullcounter)
                    if len(results) - nnullresults != len(alternative.rightside) - nullcounter:
                        LOG.debug("Discarded: Bad result number")
                        continue
                    if results[-1].rightpos > len(data):
                        LOG.debug("Discarded: Bad rightpos")
                        continue
                    for x in range(min(len(alternative.rightside), len(results))):
                        #It is the same rule?
                        if results[x].content != alternative.rightside[x]:
                            continue
                    #print([(x.leftpos, x.rightpos) for x in results])
                    #print(recurssions, results[0].leftpos, results[-1].rightpos, allvalids, data, alternative)
                    newresult = ParseTree(0, results[-1].rightpos - results[0].leftpos, [onlysymbol], data[results[0].leftpos:results[-1].rightpos], production)
                    #print(newresult)
                    for child in results:
                        newresult.append_child(child)
                        if not child.valid:
                            newresult.valid = False
                    if newresult.valid:
                        result.append(newresult)
            if showerrors and not result:
                erroresult = TypeCheckList(ParseTree, [ParseTree(0,len(data), [onlysymbol] , data, production, valid = False)])
                for invalid in invalidlist:
                    if invalid.production.leftside[0] in production.rightside:
                        erroresult[0].append_child(invalid)
                return erroresult
            #LOG.debug("Returns: " + str([(str(x.leftpos),str(x.rightpos)) for x in result]))
            return result
        elif isinstance(onlysymbol, NullSymbol):
            return TypeCheckList(ParseTree,[ParseTree(0, 0, [onlysymbol], "", production)])
        else:
            raise Exception
Exemplo n.º 6
0
 def __init__(self, leftpos, rightpos, content, valid = True):
     self.leftpos = leftpos
     self.rightpos = rightpos
     self.childlist = TypeCheckList(Tree)
     self.content = content
     self.valid = valid
Exemplo n.º 7
0
class Tree(metaclass = ABCMeta):
    def __init__(self, leftpos, rightpos, content, valid = True):
        self.leftpos = leftpos
        self.rightpos = rightpos
        self.childlist = TypeCheckList(Tree)
        self.content = content
        self.valid = valid
        
    def __bool__(self):
        """checks if it is a null result"""
        return self.valid

    def __getitem__(self, key, order = "preorder"): #FIXME: getitem and optional argument??
        result = []
        mylist = self.getAllByOrder(order)
        for element in mylist:
            if element.content == key:
                result.append(element)
        return result

    def getItemByOrder(self, key, order):
        if order == "preorder":
            return traversePreOrder(self)
        elif order == "inorder":
            return traverseInOrder(self)
        elif order == "postorder":
            return traversePostOrder(self)
        else:
            raise KeyError

    def getAllByOrder(self, order = "preorder"):
        if order == "preorder":
            return traversePreOrder(self)
        elif order == "inorder":
            return traverseInOrder(self)
        elif order == "postorder":
            return traversePostOrder(self)
        else:
            raise KeyError

    def shift(self, amount):
        """ shifts position """
        self.leftpos += amount
        self.rightpos += amount

    def append_child(self, dpr):
        """appends dpr to childlist"""
        self.childlist.append(dpr)

    def __len__(self):
        return self.rightpos - self.leftpos

    def coverage(self):
        if not(self):
            return 0, len(self)
        if childlist:
            childtotal = 0
            childcoverage = 0
            for child in childlist:
                newcoverage, newtotal = child.coverage()
                childcoverage += newcoverage
                childtotal += newtotal
            assert(childtotal == len(self))
            return childcoverage, childtotal
        else:
            return len(self), len(self)

    def first_leaf(self):
        """Devuelve la primera hoja a la izq"""
        if self.childlist:
            return self.childlist[0].first_leaf()
        else:
            return self
Exemplo n.º 8
0
def traversePostOrder(item):
    result = TypeCheckList(Tree)
    for child in item.childlist:
        result += traversePostOrder(child)
    result.append(item)
    return result
Exemplo n.º 9
0
def mix_results(resultll:list, productionset):
    """ Mix n sets of results """
    from pydsl.Grammar.Tree import ParseTree
    production = None
    for resultl in resultll:
        assert(isinstance(resultl, TypeCheckList) and resultl.instancetype == ParseTree)
    midlist = [] #All blocks combinations are stored here
    firstindex = 0
    while firstindex < len(resultll) and len(resultll[firstindex]) == 0: 
        firstindex += 1
    if firstindex == len(resultll):
        return []
    validsets = 1

    #Processing head set

    firstresultl = resultll[firstindex]
    for result in firstresultl:
        if not(isinstance(result, ParseTree)):
            raise TypeError
        if result.leftpos == 0:
            midlist.append([result])
        elif result.leftpos == None:
            raise Exception #FIXME:What's the right thing to do here? 

    #Processing Tail sets
    for resultl in resultll[firstindex + 1:]:
        #For each result list
        if len(resultl) == 0:
            continue
        for result in resultl:
            #for each result
            tmp = []
            for middleresult in midlist:
                #combinamos todos los elementos con los ya apuntados en la lista intermedia
                #Here we mix every result with intermediate results list
                lastresult = middleresult[-1]
                if lastresult.production != result.production:
                    pass
                if result.rightpos == None:
                    result.rightpos = lastresult.rightpos
                    result.leftpos = lastresult.rightpos
                if lastresult.rightpos == None or result.leftpos == None:
                    tmp.append(middleresult + [ParseTree(result.leftpos, result.rightpos, \
                            result.symbollist, result.content, result.production, \
                            TypeCheckList(ParseTree, result.childlist), result.valid)])
                elif lastresult.rightpos == result.leftpos:
                    tmp.append(middleresult + [ParseTree(result.leftpos, result.rightpos, \
                            result.symbollist, result.content, result.production, \
                            TypeCheckList(ParseTree, result.childlist), result.valid)])
            midlist += tmp
        validsets += 1
    
        #eliminamos los resultados intermedios que no contienen tantos elementos como hemos procesado. Es decir, no se ha encontrado una combinacion valida en la ultima mezcla
        #Removes all results that have less elements than the number of valid sets
        for element in midlist[:]:
            if len(element) != validsets:
                midlist.remove(element)

    #Combinamos resultados en la lista final
    #We mix all results into final result
    finallist = TypeCheckList(ParseTree)
    for middleresult in midlist:
        if len(middleresult) == 1:
            finallist.append(middleresult[0])
        elif middleresult[0].leftpos != None and middleresult[-1].rightpos != None:
            #mezclamos la coleccion y dejamos los originales como hijos
            #Creates a node with all elements, and originals nodes are the childs of the new node
            symbollist = []
            for element in middleresult:
                compoundword += element.content
                symbollist += element.symbollist
            finalresult = ParseTree(middleresult[0].leftpos, middleresult[-1].rightpos, symbollist, compoundword, middleresult[0].production, valid = result.valid)
            psl = middleresult[0].production
            #Add childs to result. FIXME El problema es que estamos añadiendo como hijos del nuevo los elementos ya creados
            error = False
            rightside = []
            for child in middleresult:
                assert(child != finalresult)
                finalresult.append_child(child)
                if child.production:
                    rightside += child.production.leftside
                if not child.valid:
                    finalresult.valid = False #valid status propagates upwards
            #if error:
            #    print([str(x.production) for x in middleresult])
            #    continue
            if productionset:
                try:
                    finalresult.production = productionset.getProductionsBySide(rightside, "right")
                except IndexError:
                    finallist += middleresult #rule not found: we add it unprocessed (non joined version)
                else:
                    finallist.append(finalresult) #rule found; we add binded together version
        else:
            raise Exception
    return finallist