Пример #1
0
 def getCost(config, transType=None):
     sent = config.sent
     cost = 0
     for vmwe in sent.vMWEs:
         if vmwe.isInterleaving or vmwe.In(sent.identifiedVMWEs):
             continue
         b0BelongsToVmwe = config.buffer[0].In(vmwe)
         if not b0BelongsToVmwe:
             continue
         s0BelongsToVmwe = True
         if len(config.stack) > 0:
             for token in Sentence.getTokens(config.stack[-1]):
                 if not token.In(vmwe):
                     s0BelongsToVmwe = False
             if len(config.stack) > 1:
                 otherStackElementsBelongtoVmwe = False
                 for sElem in config.stack[:-1]:
                     for token in Sentence.getTokens(sElem):
                         if token.In(vmwe):
                             otherStackElementsBelongtoVmwe = True
                             break
                         if otherStackElementsBelongtoVmwe:
                             break
             else:
                 otherStackElementsBelongtoVmwe = False
         else:
             s0BelongsToVmwe = False
             otherStackElementsBelongtoVmwe = False
         if not s0BelongsToVmwe and otherStackElementsBelongtoVmwe:
             cost += 1
     return cost
Пример #2
0
 def getCost(config, transType=None):
     sent = config.sent
     cost = 0
     for vmwe in sent.vMWEs:
         if vmwe.isInterleaving or vmwe.In(sent.identifiedVMWEs):
             continue
         s0BelongsToVmwe = True
         increaseCost = False
         for token in Sentence.getTokens(config.stack[-1]):
             if not token.In(vmwe):
                 s0BelongsToVmwe = False
         if len(vmwe.tokens) == 1 and s0BelongsToVmwe:
             if isinstance(config.stack[-1], Token):
                 cost += 1
                 continue
             elif isinstance(config.stack[-1], list) and len(config.stack[-1]) == 1:
                 if len(config.stack[-1][0].parentMWEs) == 1:
                     return 0
                 else:
                     cost += 1
         if not s0BelongsToVmwe:
             continue
         for s in config.stack[:-1]:
             for token in Sentence.getTokens(s):
                 if token.In(vmwe):
                     increaseCost = True
                     break
         if increaseCost:
             cost += 1
         else:
             for b in config.buffer:
                 if b.In(vmwe):
                     cost += 1
                     break
     return cost
Пример #3
0
    def apply(self, parent, sent, vMWEId=None, parse=False, vMWEType=None, mwtMerge=False):
        Counters.blackMergeNum += 1
        if sent and not parse:
            sent.blackMergeNum += 1
        config = parent.configuration
        newBuffer = list(config.buffer)
        if mwtMerge:
            newStack = list(config.stack)[:-1]
            newStack.append([config.stack[-1]])
        else:
            newStack = list(config.stack)[:-2]
            newStack.append([config.stack[-2], config.stack[-1]])
        newTokens = list(config.tokens)
        vMWETokens = Sentence.getTokens(newStack[-1])
        if len(vMWETokens) > 1 or (len(vMWETokens) == 1 and mwtMerge):
            if vMWEId is None:
                vMWEId = VMWE.getVMWENumber(newTokens) + 1
            vMWE = VMWE(vMWEId, vMWETokens[0])
            if parse:
                sent.identifiedVMWEs.append(vMWE)
            vMWE.tokens = vMWETokens
            if vMWEType is not None:
                vMWE.type = vMWEType
            newTokens.append(vMWE)
        elif len(vMWETokens) == 1:
            newTokens.append(vMWETokens[0])

        newConfig = Configuration(stack=newStack, buffer=newBuffer, tokens=newTokens, sent=sent, transition=self)

        super(BlackMerge, self).__init__(config=newConfig, previous=parent, sent=sent)
Пример #4
0
    def getCost(config, transType=None):
        sent = config.sent
        cost = 0
        if isinstance(config.stack[-1], list):
            for vmwe in sent.vMWEs:
                if vmwe.isInterleaving or vmwe.In(sent.identifiedVMWEs):
                    continue
                allInList, someInList = True, False
                stackTokens = Sentence.getTokens(config.stack[-1])
                for t in stackTokens:
                    if not t.In(vmwe):
                        allInList = False
                    else:
                        someInList = True
                if len(stackTokens) == len(vmwe.tokens) and allInList:
                    return 0
                else:
                    allInList = False
                if not allInList and someInList:
                    cost += 1

        elif isinstance(config.stack[-1], Token):
            if config.stack[-1].parentMWEs is None or len(config.stack[-1].parentMWEs) == 0:
                return 0
            else:
                for vmwe in config.stack[-1].parentMWEs:
                    if config.stack[-1].In(vmwe) and len(vmwe.tokens) > 1:
                        cost += 1
                return cost
        return cost
Пример #5
0
 def checkForVMWE(transition):
     config = transition.configuration
     sent = config.sent
     # Check up for a possible COMPLETE of MWE after a MERGE transition
     if transition.type == TransitionType.MERGE:
         if len(config.stack) == 1 and isinstance(config.stack[0], list):
             vMWE = None
             parents = []
             tokens = Sentence.getTokens(config.stack[0])
             for token in tokens:
                 if len(token.parentMWEs) == 1:
                     vMWE = token.parentMWEs[0]
                     break
                 for parent in token.parentMWEs:
                     if parent not in parents:
                         parents.append(parent)
             if vMWE is None:
                 for parent in parents:
                     for token in tokens:
                         if parent not in token.parentMWEs:
                             parents.remove(parent)
                 if len(parents) > 1:
                     for parent in parents:
                         if parent.isInterleaving or parent.isEmbedded:
                             parents.remove(parent)
                 vMWE = parents[0]
             if vMWE is not None and len(vMWE.tokens) == len(tokens):
                 complete = Complete(sent=sent)
                 complete.apply(transition, sent, vMWEId=vMWE.id)
                 return complete
     return None
Пример #6
0
    def getCost(config, transType=None):
        sent = config.sent
        cost = 0
        for vmwe in sent.vMWEs:
            if vmwe.isInterleaving or vmwe.In(sent.identifiedVMWEs):
                continue

            s0BelongsToVmwe = True
            s1BelongsToVmwe = True
            increaseCost = False

            for token in Sentence.getTokens(config.stack[-1]):
                if not token.In(vmwe):
                    s0BelongsToVmwe = False
            if len(vmwe.tokens) == 1 and s0BelongsToVmwe and isinstance(config.stack[-1], Token):
                cost += 1
                continue

            for token in Sentence.getTokens(config.stack[-2]):
                if not token.In(vmwe):
                    s1BelongsToVmwe = False

            if (s0BelongsToVmwe and s1BelongsToVmwe) and len(config.stack) > 1 and len(vmwe.tokens) == len(
                    Sentence.getTokens(config.stack[-2:])):
                cost += 1
                continue

            if (s0BelongsToVmwe and s1BelongsToVmwe) or (not s0BelongsToVmwe and not s1BelongsToVmwe):
                continue
            if len(config.stack) > 2:
                for stackElement in config.stack[:-2]:
                    for token in Sentence.getTokens(stackElement):
                        if token.In(vmwe):
                            increaseCost = True
                            break
                    if increaseCost:
                        break
            if increaseCost:
                cost += 1
                continue
            for b in config.buffer:
                if b.In(vmwe):
                    cost += 1
                    break
        return cost
Пример #7
0
    def check(transition):
        config = transition.configuration
        sent = config.sent
        # Check up of a possible MERGE
        if len(config.stack) > 1:

            s0Tokens = Sentence.getTokens(config.stack[-1])
            s1Tokens = Sentence.getTokens(config.stack[-2])
            # #TODO getParent MWE for WHite merge
            tokens = s1Tokens + s0Tokens
            # tokens = Sentence.getTokens(config.stack)
            selectedParents = VMWE.getParents(tokens)
            if selectedParents and len(selectedParents) > 1:
                reports.annotationReport += str(sent)
            if selectedParents and len(selectedParents) == 1:

                selectedParent = selectedParents[0]
                if selectedParent.type is not None and selectedParent.type != '':
                    if selectedParent.type.lower() == 'id':
                        merge = MergeAsID(sent=sent)
                    elif selectedParent.type.lower() == 'ireflv':
                        merge = MergeAsIReflV(sent=sent)
                    elif selectedParent.type.lower() == 'lvc':
                        merge = MergeAsLVC(sent=sent)
                    elif selectedParent.type.lower() == 'vpc':
                        merge = MergeAsVPC(sent=sent)
                    else:
                        merge = MergeAsOTH(sent=sent)
                else:
                    merge = MergeAsOTH(sent=sent)
                merge.apply(transition, sent=sent)
                return merge
            # selectedParents = VMWE.getSharedVMWEs(Sentence.getTokens(config.stack))
            # if selectedParents and len(selectedParents) > 1:
            #     reports.annotationReport += str(sent)
            selectedParents = VMWE.haveSameParents(tokens)
            if selectedParents and len(selectedParents) == 1:
                if selectedParents[0].tokens[-1] == tokens[-1]:
                    # if len(config.stack) > 2:
                    merge = WhiteMerge(sent=sent)
                    merge.apply(transition, sent)
                    return merge

        return None
Пример #8
0
 def check(transition):
     config = transition.configuration
     sent = config.sent
     # Check up of a possible MERGE
     if len(config.stack) > 1:
         tokens = Sentence.getTokens(config.stack)
         selectedParents = VMWE.getParents(tokens)
         if selectedParents and len(selectedParents) == 1 and not selectedParents[0].isEmbedded \
                 and not selectedParents[0].isInterleaving:
             merge = Merge(sent=sent)
             merge.apply(transition, sent)
             return merge
     return None
Пример #9
0
 def generateDisconinousFeatures(configuration, sent, transDic):
     tokens = Sentence.getTokens([configuration.stack[-1]])
     tokenTxt = Sentence.getTokenLemmas(tokens)
     for key in Corpus.mweDictionary.keys():
         if tokenTxt in key and tokenTxt != key:
             bufidx = 0
             for bufElem in configuration.buffer[:5]:
                 if bufElem.lemma != '' and (
                                 (tokenTxt + ' ' + bufElem.lemma) in key or (bufElem.lemma + ' ' + tokenTxt) in key):
                     transDic['S0B' + str(bufidx) + 'ArePartsOfMWE'] = True
                     transDic['S0B' + str(bufidx) + 'ArePartsOfMWEDistance'] = sent.tokens.index(
                         bufElem) - sent.tokens.index(tokens[-1])
                 bufidx += 1
             break
Пример #10
0
    def getCost(config, transType=None):
        sent = config.sent
        cost = 0
        for vmwe in sent.vMWEs:
            if vmwe.isInterleaving or vmwe.In(sent.identifiedVMWEs):
                continue
            s0BelongsToVmwe = True
            s1BelongsToVmwe = True

            for token in Sentence.getTokens(config.stack[-1]):
                if not token.In(vmwe):
                    s0BelongsToVmwe = False
                    break

            for token in Sentence.getTokens(config.stack[-2]):
                if not token.In(vmwe):
                    s1BelongsToVmwe = False
                    break

            if s0BelongsToVmwe and s1BelongsToVmwe:
                return 0
            else:
                cost += 1
        return cost
Пример #11
0
    def getCost(config, transType=None, type=None):
        sent = config.sent
        cost = 0
        for vmwe in sent.vMWEs:
            if vmwe.isInterleaving or vmwe.In(sent.identifiedVMWEs):
                continue
            if (config.stack[-1]).In(vmwe) and len(vmwe.tokens) == 1 and vmwe.type.lower() == type.lower():
                return 0
            if (config.stack[-1]).In(vmwe) and len(vmwe.tokens) > 1:
                cost += 1
                continue

        # Precision score:
        vmwes = VMWE.getParents(Sentence.getTokens(config.stack[-1]), type.lower())
        if not vmwes:
            cost += 1
        return cost
Пример #12
0
 def concatenateTokens(tokens):
     idx = 0
     tokenDic = {}
     result = []
     for token in tokens:
         if isinstance(token, Token):
             result.append(Token(-1, token.text, token.lemma, token.posTag))
         elif isinstance(token, list):
             tokenDic[idx] = Token(-1, '', '', '')
             for subToken in Sentence.getTokens(token):
                 tokenDic[idx].text += subToken.text + '_'
                 tokenDic[idx].lemma += subToken.lemma + '_'
                 tokenDic[idx].posTag += subToken.posTag + '_'
             tokenDic[idx].text = tokenDic[idx].text[:-1]
             tokenDic[idx].lemma = tokenDic[idx].lemma[:-1]
             tokenDic[idx].posTag = tokenDic[idx].posTag[:-1]
             result.append(tokenDic[idx])
         idx += 1
     return result
Пример #13
0
    def check(parent):
        config = parent.configuration
        sent = config.sent

        reduce = Reduce(sent=sent)

        stackWithTopTokenWitoutParents = config.stack and isinstance(config.stack[-1], Token) and (
            not config.stack[-1].parentMWEs)
        if stackWithTopTokenWitoutParents:
            reduce.apply(parent, sent)
            return reduce

        empyBufferWithFullStack = not config.buffer and config.stack
        if empyBufferWithFullStack:
            reduce.apply(parent, sent)
            return reduce

        stackWithMWT = config.stack and isinstance(config.stack[-1], list) and len(config.stack[-1]) == 1 and \
                       config.stack[-1][0].parentMWEs == 1
        if stackWithMWT:
            reduce.apply(parent, sent)
            return reduce

        stackWithSingleListWitOneSharedParentOnly = False
        if config.stack and isinstance(config.stack[-1], list):
            tokens = Sentence.getTokens(config.stack[-1])
            if len(VMWE.getParents(tokens)) == 1 and not VMWE.getParents(tokens)[0].isEmbedded:
                stackWithSingleListWitOneSharedParentOnly = True

        if stackWithSingleListWitOneSharedParentOnly:
            reduce.apply(parent, sent)
            return reduce

        stackWithTopTokenOfInterleavingMWE = sent.containsInterleaving and config.stack and isinstance(config.stack[-1],
                                                                                                       Token) and (
                                                 config.stack[-1].parentMWEs and len(
                                                     config.stack[-1].parentMWEs) == 1 and
                                                 config.stack[-1].parentMWEs[0].isInterleaving)

        if stackWithTopTokenOfInterleavingMWE:
            reduce.apply(parent, sent)
            return reduce
        return None
Пример #14
0
 def getCost(config, transType=None):
     sent = config.sent
     cost = 0
     for vmwe in sent.vMWEs:
         if vmwe.isInterleaving or vmwe.In(sent.identifiedVMWEs):
             continue
         s0BelongsToVmwe = True
         s1BelongsToVmwe = True
         increaseCost = False
         for token in Sentence.getTokens(config.stack[-1]):
             if not token.In(vmwe):
                 s0BelongsToVmwe = False
         if len(vmwe.tokens) == 1 and s0BelongsToVmwe and isinstance(config.stack[-1], Token):
             cost += 1
             continue
         for token in Sentence.getTokens(config.stack[-2]):
             if not token.In(vmwe):
                 s1BelongsToVmwe = False
         if (s0BelongsToVmwe and not s1BelongsToVmwe) or (not s0BelongsToVmwe and s1BelongsToVmwe):
             cost += 1
             continue
         if s0BelongsToVmwe and s1BelongsToVmwe:
             if len(Sentence.getTokens(config.stack[-1])) + len(Sentence.getTokens(config.stack[-2])) == len(
                     vmwe.tokens) and transType.name[7:].lower() == vmwe.type.lower():
                 return 0
             if len(config.stack) > 2:
                 for stackElement in config.stack[:-2]:
                     for token in Sentence.getTokens(stackElement):
                         if token.In(vmwe):
                             increaseCost = True
                             break
                     if increaseCost:
                         break
                 if increaseCost:
                     cost += 1
                     continue
             for b in config.buffer:
                 if b.In(vmwe):
                     cost += 1
                     break
     # Precision score:
     correctlyIdentified = False
     vmwes = VMWE.getParents(Sentence.getTokens(config.stack[-2:]))
     if vmwes:
         for vmwe in vmwes:
             if vmwe.type.lower() in str(transType.name).lower():
                 correctlyIdentified = True
     if not correctlyIdentified:
         cost += 1
     return cost
Пример #15
0
    def apply(self, parent, sent, vMWEId=None, parse=False):
        Counters.completeNum += 1
        config = parent.configuration
        newBuffer = list(config.buffer)
        newStack = list(config.stack)
        vMWETokens = Sentence.getTokens(newStack[0])
        newStack = newStack[:-1]
        newTokens = list(config.tokens)
        if len(vMWETokens) > 1:
            if sent is not None and not parse:
                sent.blackMergeNum += 1

            if vMWEId is None:
                vMWEId = VMWE.getVMWENumber(newTokens) + 1
            vMWE = VMWE(vMWEId, vMWETokens[0])
            if parse:
                sent.identifiedVMWEs.append(vMWE)
            vMWE.tokens = vMWETokens
            newTokens.append(vMWE)
        elif len(vMWETokens) == 1:
            newTokens.append(vMWETokens[0])
        newConfig = Configuration(stack=newStack, buffer=newBuffer, tokens=newTokens, sent=sent, transition=self)

        super(Complete, self).__init__(config=newConfig, previous=parent, sent=sent)