Пример #1
0
 def parseOffer(mo, i, j, start, end, unit, root):
     X = mo.group(1)
     unit.find('characterisation/type').text = 'Offer'
     feats = unit.find('characterisation/featureSet')
     f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
     f_elm1.text = 'Assertion'
     f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
     f_elm2.text = '?'
     #maybe we can shorten this function and make a single loop
     for index in range(1, i + 1):
         N = mo.group(2 * index)
         R = mo.group(2 * index + 1)
         if index == 1:
             left = start + len(X) + 24
             right = left + len(N) + 1 + len(R)
         else:
             left = right + 2
             right = left + len(N) + 1 + len(R)
         append_unit(root, 'Resource', [('Status', 'Givable'),
                                        ('Quantity', N),
                                        ('Correctness', 'True'),
                                        ('Kind', R)], left, right)
     for index in range(i + 1, i + j + 1):
         N = mo.group(2 * index)
         R = mo.group(2 * index + 1)
         if index == i + 1:
             left = right + 5
             right = left + len(N) + 1 + len(R)
         else:
             left = right + 2
             right = left + len(N) + 1 + len(R)
         append_unit(root, 'Resource', [('Status', 'Receivable'),
                                        ('Quantity', N),
                                        ('Correctness', 'True'),
                                        ('Kind', R)], left, right)
Пример #2
0
 def parseOffer(mo, i, j, start, end, unit, root):
     X = mo.group(1)
     unit.find('characterisation/type').text = 'Offer'
     feats = unit.find('characterisation/featureSet')
     f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
     f_elm1.text = 'Assertion'
     f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
     f_elm2.text = '?'
     #maybe we can shorten this function and make a single loop
     for index in range(1, i+1):
         N = mo.group(2*index)
         R = mo.group(2*index+1)
         if index == 1:
             left = start + len(X) + 24
             right = left + len(N) + 1 + len(R)
         else:
             left = right + 2
             right = left + len(N) + 1 + len(R)
         append_unit(root, 'Resource',
                     [('Status', 'Givable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)],
                     left, right)
     for index in range(i+1, i+j+1):
         N = mo.group(2*index)
         R = mo.group(2*index+1)
         if index == i+1:
             left = right + 5
             right = left + len(N) + 1 + len(R)
         else:
             left = right + 2
             right = left + len(N) + 1 + len(R)
         append_unit(root, 'Resource',
                         [('Status', 'Receivable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)],
                         left, right)
Пример #3
0
    def parse_offer(m, start, end, unit, root):
        """Reimplementation of parseOffer.

        Parameters
        ----------
        m: TODO
            Match object for the offer.
        start: int
            Start of the offer.
        end: int
            End of the offer.
        unit: TODO
            XML element for this unit annotation.
        root: TODO
            Root of the XML tree.
        """
        X = m.group('X')
        # 1. update the unit annotation:
        # * type = 'Offer'
        unit.find('characterisation/type').text = 'Offer'

        feats = unit.find('characterisation/featureSet')
        # * surface act = 'Assertion'
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        # * addressee = '?'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        f_elm2.text = '?'
        # 2. add 'Resource' annotations for both offered and asked resources
        # * resources offered
        # expected position of the leftmost character of the first one
        left = start + len(X) + 24
        right = left  # useful when m.group('V') is None
        if m.group('V') is not None:
            resources_offered = m.group('V').split(', ')
            for resource in resources_offered:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root,
                            'Resource', [('Status', 'Givable'),
                                         ('Quantity', qty),
                                         ('Correctness', 'True'),
                                         ('Kind', kind)],
                            left,
                            right,
                            author=_AUTHOR)
                # expected position of the leftmost character of the next
                # offered resource (if any)
                left = right + 2
        # * resources asked
        left = right + 5
        right = left
        if m.group('W') is not None:
            resources_asked = m.group('W').split(', ')
            for resource in resources_asked:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root,
                            'Resource', [('Status', 'Receivable'),
                                         ('Quantity', qty),
                                         ('Correctness', 'True'),
                                         ('Kind', kind)],
                            left,
                            right,
                            author=_AUTHOR)
                # expected position of the leftmost character of the next
                # asked resource (if any)
                left = right + 2
Пример #4
0
def add_units_annotations(tree, text):
    """Add units annotations on non-linguistic events.

    Parameters
    ----------
    tree :
        XML tree extracted from the .aa file to modify
    text : string
        raw text extracted from the .ac file

    Returns
    -------
    root :
        modified XML tree with additional units annotations on
        non-linguistic events
    """
    root = tree

    #That's the moment I hope I didn't make any typo...

    RejectRegEx = re.compile(r'(.+) rejected trade offer\.')

    GetRegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood)\.')
    Get2RegEx = re.compile(
        r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)\.'
    )
    #It is impossible in "Settlers of Catan" to get more than 2 different types of resources with one roll dice.
    #That's why we actually don't need to bother with complex regular expression since there are in fact just two cases to consider. :)

    MonopolyRegEx = re.compile(
        r'(.+) monopolized (clay|ore|sheep|wheat|wood)\.')

    Trader = ''

    def parse_offer(m, start, end, unit, root):
        """Reimplementation of parseOffer.

        Parameters
        ----------
        m: TODO
            Match object for the offer.
        start: int
            Start of the offer.
        end: int
            End of the offer.
        unit: TODO
            XML element for this unit annotation.
        root: TODO
            Root of the XML tree.
        """
        X = m.group('X')
        # 1. update the unit annotation:
        # * type = 'Offer'
        unit.find('characterisation/type').text = 'Offer'

        feats = unit.find('characterisation/featureSet')
        # * surface act = 'Assertion'
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        # * addressee = '?'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        f_elm2.text = '?'
        # 2. add 'Resource' annotations for both offered and asked resources
        # * resources offered
        # expected position of the leftmost character of the first one
        left = start + len(X) + 24
        right = left  # useful when m.group('V') is None
        if m.group('V') is not None:
            resources_offered = m.group('V').split(', ')
            for resource in resources_offered:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root,
                            'Resource', [('Status', 'Givable'),
                                         ('Quantity', qty),
                                         ('Correctness', 'True'),
                                         ('Kind', kind)],
                            left,
                            right,
                            author=_AUTHOR)
                # expected position of the leftmost character of the next
                # offered resource (if any)
                left = right + 2
        # * resources asked
        left = right + 5
        right = left
        if m.group('W') is not None:
            resources_asked = m.group('W').split(', ')
            for resource in resources_asked:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root,
                            'Resource', [('Status', 'Receivable'),
                                         ('Quantity', qty),
                                         ('Correctness', 'True'),
                                         ('Kind', kind)],
                            left,
                            right,
                            author=_AUTHOR)
                # expected position of the leftmost character of the next
                # asked resource (if any)
                left = right + 2
        # the eventual Y (if m comes from BANK_OFFER_PROG) is currently unused

    def parse_trade(m, start, end, unit, root):
        """Reimplementation of parseTrade.

        Parameters
        ----------
        m: TODO
            Match object for the offer.
        start: int
            Start of the offer.
        end: int
            End of the offer.
        unit: TODO
            XML element for this unit annotation.
        root: TODO
            Root of the XML tree.
        """
        X = m.group('X')
        Y = m.group('Y')
        # 1. update the unit annotation:
        # * type = 'Offer'
        unit.find('characterisation/type').text = 'Accept'
        feats = unit.find('characterisation/featureSet')
        # * surface act = 'Assertion'
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        # * addressee = Y or 'All' if Y = 'the bank' or 'a port'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        if Y == 'the bank' or Y == 'a port':
            f_elm2.text = 'All'
        else:
            f_elm2.text = Y
        # 2. add 'Resource' annotations for both offered and asked resources
        # * resources offered
        # expected position of the leftmost character of the first one
        left = start + len(X) + 8
        right = left  # not sure it's useful here, but harmless anyway
        if m.group('V') is not None:
            resources_offered = m.group('V').split(', ')
            for resource in resources_offered:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root,
                            'Resource', [('Status', '?'), ('Quantity', qty),
                                         ('Correctness', 'True'),
                                         ('Kind', kind)],
                            left,
                            right,
                            author=_AUTHOR)
                left = right + 2
        # * resources asked
        left = right + 5  # ' for '
        right = left
        if m.group('W') is not None:
            resources_asked = m.group('W').split(', ')
            for resource in resources_asked:
                right = left + len(resource)
                qty, kind = resource.split(' ')
                append_unit(root,
                            'Resource', [('Status', 'Possessed'),
                                         ('Quantity', qty),
                                         ('Correctness', 'True'),
                                         ('Kind', kind)],
                            left,
                            right,
                            author=_AUTHOR)
                left = right + 2

    for unit in root:
        if unit.findtext('characterisation/type') == 'NonplayerSegment':
            start = int(
                unit.find('positioning/start/singlePosition').get('index'))
            end = int(unit.find('positioning/end/singlePosition').get('index'))
            event = text[start:end]

            # WIP 2016-07-11
            if OFFER_PROG.search(event) is not None:
                # <X> made an offer to trade <N1> <R1> for <N2> <R2>.
                m = OFFER_PROG.search(event)
                parse_offer(m, start, end, unit, root)
                Trader = m.group('X')
                continue
            elif BANK_OFFER_PROG.search(event) is not None:
                # <X> made an offer to trade <N1> <R1> for <N2> <R2> with
                # the bank or a port.
                m = BANK_OFFER_PROG.search(event)
                parse_offer(m, start, end, unit, root)
                Trader = m.group('X')
                continue
            elif TRADE_PROG.search(event) is not None:
                m = TRADE_PROG.search(event)
                parse_trade(m, start, end, unit, root)
                continue
            # end WIP 2016-07-11

            elif RejectRegEx.search(event) != None:
                # <Y> rejected trade offer.
                mo = RejectRegEx.search(event)
                Y = mo.group(1)

                unit.find('characterisation/type').text = 'Refusal'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                if Trader != '':
                    f_elm2.text = Trader
                else:
                    f_elm2.text = 'All'
                continue

            elif event == "You can't make that trade.":
                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                if Trader != '':
                    f_elm2.text = Trader
                else:
                    f_elm2.text = 'All'
                continue

            elif GetRegEx.search(event) != None:
                # <Y> gets <N> <R>.
                mo = GetRegEx.search(event)
                Y = mo.group(1)
                N = mo.group(2)
                R = mo.group(3)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                left = start + len(Y) + 6
                right = end - 1
                append_unit(root,
                            'Resource', [('Status', 'Possessed'),
                                         ('Quantity', N),
                                         ('Correctness', 'True'), ('Kind', R)],
                            left,
                            right,
                            author=_AUTHOR)
                continue

            elif Get2RegEx.search(event) != None:
                # <Y> gets <N1> <R1>, <N2> <R2>.
                mo = Get2RegEx.search(event)
                Y = mo.group(1)
                N1 = mo.group(2)
                R1 = mo.group(3)
                N2 = mo.group(2)
                R2 = mo.group(3)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                left1 = start + len(Y) + 6
                right1 = left1 + len(N1) + 1 + len(R1)
                append_unit(root,
                            'Resource', [('Status', 'Possessed'),
                                         ('Quantity', N1),
                                         ('Correctness', 'True'),
                                         ('Kind', R1)],
                            left1,
                            right1,
                            author=_AUTHOR)
                left2 = right1 + 2
                right2 = left2 + len(N2) + 1 + len(R2)
                append_unit(root,
                            'Resource', [('Status', 'Possessed'),
                                         ('Quantity', N2),
                                         ('Correctness', 'True'),
                                         ('Kind', R2)],
                            left2,
                            right2,
                            author=_AUTHOR)
                continue

            elif MonopolyRegEx.search(event) != None:
                # <X> monopolized <R>.
                mo = MonopolyRegEx.search(event)
                X = mo.group(1)
                R = mo.group(2)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                right = end - 1
                left = right - len(R)
                append_unit(root,
                            'Resource', [('Status', 'Possessed'),
                                         ('Quantity', '?'),
                                         ('Correctness', 'True'), ('Kind', R)],
                            left,
                            right,
                            author=_AUTHOR)
                continue

            else:
                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'
                continue

    return root
Пример #5
0
    def parse_trade(m, start, end, unit, root):
        """Reimplementation of parseTrade.

        Parameters
        ----------
        m: TODO
            Match object for the offer.
        start: int
            Start of the offer.
        end: int
            End of the offer.
        unit: TODO
            XML element for this unit annotation.
        root: TODO
            Root of the XML tree.
        """
        X = m.group('X')
        Y = m.group('Y')
        # 1. update the unit annotation:
        # * type = 'Offer'
        unit.find('characterisation/type').text = 'Accept'
        feats = unit.find('characterisation/featureSet')
        # * surface act = 'Assertion'
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        # * addressee = Y or 'All' if Y = 'the bank' or 'a port'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        if Y == 'the bank' or Y == 'a port':
            f_elm2.text = 'All'
        else:
            f_elm2.text = Y
        # 2. add 'Resource' annotations for both offered and asked resources
        # * resources offered
        # expected position of the leftmost character of the first one
        left = start + len(X) + 8
        right = left  # not sure it's useful here, but harmless anyway
        if m.group('V') is not None:
            resources_offered = m.group('V').split(', ')
            for resource in resources_offered:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root,
                            'Resource', [('Status', '?'), ('Quantity', qty),
                                         ('Correctness', 'True'),
                                         ('Kind', kind)],
                            left,
                            right,
                            author=_AUTHOR)
                left = right + 2
        # * resources asked
        left = right + 5  # ' for '
        right = left
        if m.group('W') is not None:
            resources_asked = m.group('W').split(', ')
            for resource in resources_asked:
                right = left + len(resource)
                qty, kind = resource.split(' ')
                append_unit(root,
                            'Resource', [('Status', 'Possessed'),
                                         ('Quantity', qty),
                                         ('Correctness', 'True'),
                                         ('Kind', kind)],
                            left,
                            right,
                            author=_AUTHOR)
                left = right + 2
Пример #6
0
    def parse_offer(m, start, end, unit, root):
        """Reimplementation of parseOffer.

        Parameters
        ----------
        m: TODO
            Match object for the offer.
        start: int
            Start of the offer.
        end: int
            End of the offer.
        unit: TODO
            XML element for this unit annotation.
        root: TODO
            Root of the XML tree.
        """
        X = m.group('X')
        # 1. update the unit annotation:
        # * type = 'Offer'
        unit.find('characterisation/type').text = 'Offer'

        feats = unit.find('characterisation/featureSet')
        # * surface act = 'Assertion'
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        # * addressee = '?'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        f_elm2.text = '?'
        # 2. add 'Resource' annotations for both offered and asked resources
        # * resources offered
        # expected position of the leftmost character of the first one
        left = start + len(X) + 24
        right = left  # useful when m.group('V') is None
        if m.group('V') is not None:
            resources_offered = m.group('V').split(', ')
            for resource in resources_offered:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root, 'Resource', [('Status', 'Givable'),
                                               ('Quantity', qty),
                                               ('Correctness', 'True'),
                                               ('Kind', kind)],
                            left, right, author=_AUTHOR)
                # expected position of the leftmost character of the next
                # offered resource (if any)
                left = right + 2
        # * resources asked
        left = right + 5
        right = left
        if m.group('W') is not None:
            resources_asked = m.group('W').split(', ')
            for resource in resources_asked:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root, 'Resource', [('Status', 'Receivable'),
                                               ('Quantity', qty),
                                               ('Correctness', 'True'),
                                               ('Kind', kind)],
                            left, right, author=_AUTHOR)
                # expected position of the leftmost character of the next
                # asked resource (if any)
                left = right + 2
Пример #7
0
def add_units_annotations(tree, text):
    """Add units annotations on non-linguistic events.

    Parameters
    ----------
    tree :
        XML tree extracted from the .aa file to modify
    text : string
        raw text extracted from the .ac file

    Returns
    -------
    root :
        modified XML tree with additional units annotations on
        non-linguistic events
    """
    root = tree

    #That's the moment I hope I didn't make any typo...

    RejectRegEx = re.compile(r'(.+) rejected trade offer\.')

    GetRegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood)\.')
    Get2RegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)\.')
    #It is impossible in "Settlers of Catan" to get more than 2 different types of resources with one roll dice.
    #That's why we actually don't need to bother with complex regular expression since there are in fact just two cases to consider. :)

    MonopolyRegEx = re.compile(r'(.+) monopolized (clay|ore|sheep|wheat|wood)\.')


    Trader = ''

    def parse_offer(m, start, end, unit, root):
        """Reimplementation of parseOffer.

        Parameters
        ----------
        m: TODO
            Match object for the offer.
        start: int
            Start of the offer.
        end: int
            End of the offer.
        unit: TODO
            XML element for this unit annotation.
        root: TODO
            Root of the XML tree.
        """
        X = m.group('X')
        # 1. update the unit annotation:
        # * type = 'Offer'
        unit.find('characterisation/type').text = 'Offer'

        feats = unit.find('characterisation/featureSet')
        # * surface act = 'Assertion'
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        # * addressee = '?'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        f_elm2.text = '?'
        # 2. add 'Resource' annotations for both offered and asked resources
        # * resources offered
        # expected position of the leftmost character of the first one
        left = start + len(X) + 24
        right = left  # useful when m.group('V') is None
        if m.group('V') is not None:
            resources_offered = m.group('V').split(', ')
            for resource in resources_offered:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root, 'Resource', [('Status', 'Givable'),
                                               ('Quantity', qty),
                                               ('Correctness', 'True'),
                                               ('Kind', kind)],
                            left, right, author=_AUTHOR)
                # expected position of the leftmost character of the next
                # offered resource (if any)
                left = right + 2
        # * resources asked
        left = right + 5
        right = left
        if m.group('W') is not None:
            resources_asked = m.group('W').split(', ')
            for resource in resources_asked:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root, 'Resource', [('Status', 'Receivable'),
                                               ('Quantity', qty),
                                               ('Correctness', 'True'),
                                               ('Kind', kind)],
                            left, right, author=_AUTHOR)
                # expected position of the leftmost character of the next
                # asked resource (if any)
                left = right + 2
        # the eventual Y (if m comes from BANK_OFFER_PROG) is currently unused

    def parse_trade(m, start, end, unit, root):
        """Reimplementation of parseTrade.

        Parameters
        ----------
        m: TODO
            Match object for the offer.
        start: int
            Start of the offer.
        end: int
            End of the offer.
        unit: TODO
            XML element for this unit annotation.
        root: TODO
            Root of the XML tree.
        """
        X = m.group('X')
        Y = m.group('Y')
        # 1. update the unit annotation:
        # * type = 'Offer'
        unit.find('characterisation/type').text = 'Accept'
        feats = unit.find('characterisation/featureSet')
        # * surface act = 'Assertion'
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        # * addressee = Y or 'All' if Y = 'the bank' or 'a port'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        if Y == 'the bank' or Y == 'a port':
            f_elm2.text = 'All'
        else:
            f_elm2.text = Y
        # 2. add 'Resource' annotations for both offered and asked resources
        # * resources offered
        # expected position of the leftmost character of the first one
        left = start + len(X) + 8
        right = left  # not sure it's useful here, but harmless anyway
        if m.group('V') is not None:
            resources_offered = m.group('V').split(', ')
            for resource in resources_offered:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root, 'Resource', [('Status', '?'),
                                               ('Quantity', qty),
                                               ('Correctness', 'True'),
                                               ('Kind', kind)],
                            left, right, author=_AUTHOR)
                left = right + 2
        # * resources asked
        left = right + 5  # ' for '
        right = left
        if m.group('W') is not None:
            resources_asked = m.group('W').split(', ')
            for resource in resources_asked:
                right = left + len(resource)
                qty, kind = resource.split(' ')
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', qty),
                                               ('Correctness', 'True'),
                                               ('Kind', kind)],
                            left, right, author=_AUTHOR)
                left = right + 2

    for unit in root:
        if unit.findtext('characterisation/type') == 'NonplayerSegment':
            start = int(unit.find('positioning/start/singlePosition').get(
                'index'))
            end = int(unit.find('positioning/end/singlePosition').get(
                'index'))
            event = text[start:end]

            # WIP 2016-07-11
            if OFFER_PROG.search(event) is not None:
                # <X> made an offer to trade <N1> <R1> for <N2> <R2>.
                m = OFFER_PROG.search(event)
                parse_offer(m, start, end, unit, root)
                Trader = m.group('X')
                continue
            elif BANK_OFFER_PROG.search(event) is not None:
                # <X> made an offer to trade <N1> <R1> for <N2> <R2> with
                # the bank or a port.
                m = BANK_OFFER_PROG.search(event)
                parse_offer(m, start, end, unit, root)
                Trader = m.group('X')
                continue
            elif TRADE_PROG.search(event) is not None:
                m = TRADE_PROG.search(event)
                parse_trade(m, start, end, unit, root)
                continue
            # end WIP 2016-07-11

            elif RejectRegEx.search(event) != None:
                # <Y> rejected trade offer.
                mo = RejectRegEx.search(event)
                Y = mo.group(1)

                unit.find('characterisation/type').text = 'Refusal'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature',
                                       {'name': 'Addressee'})
                if Trader != '':
                    f_elm2.text = Trader
                else:
                    f_elm2.text = 'All'
                continue

            elif event == "You can't make that trade.":
                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature',
                                       {'name': 'Addressee'})
                if Trader != '':
                    f_elm2.text = Trader
                else:
                    f_elm2.text = 'All'
                continue


            elif GetRegEx.search(event) != None:
                # <Y> gets <N> <R>.
                mo = GetRegEx.search(event)
                Y = mo.group(1)
                N = mo.group(2)
                R = mo.group(3)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature',
                                       {'name': 'Addressee'})
                f_elm2.text = 'All'

                left = start + len(Y) + 6
                right = end - 1
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', N),
                                               ('Correctness', 'True'),
                                               ('Kind', R)],
                            left, right, author=_AUTHOR)
                continue

            elif Get2RegEx.search(event) != None:
                # <Y> gets <N1> <R1>, <N2> <R2>.
                mo = Get2RegEx.search(event)
                Y = mo.group(1)
                N1 = mo.group(2)
                R1 = mo.group(3)
                N2 = mo.group(2)
                R2 = mo.group(3)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature',
                                       {'name': 'Addressee'})
                f_elm2.text = 'All'

                left1 = start + len(Y) + 6
                right1 = left1 + len(N1) + 1 + len(R1)
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', N1),
                                               ('Correctness', 'True'),
                                               ('Kind', R1)],
                            left1, right1, author=_AUTHOR)
                left2 = right1 + 2
                right2 = left2 + len(N2) + 1 + len(R2)
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', N2),
                                               ('Correctness', 'True'),
                                               ('Kind', R2)],
                            left2, right2, author=_AUTHOR)
                continue


            elif MonopolyRegEx.search(event) != None:
                # <X> monopolized <R>.
                mo = MonopolyRegEx.search(event)
                X = mo.group(1)
                R = mo.group(2)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature',
                                       {'name': 'Addressee'})
                f_elm2.text = 'All'

                right = end - 1
                left = right - len(R)
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', '?'),
                                               ('Correctness', 'True'),
                                               ('Kind', R)],
                            left, right, author=_AUTHOR)
                continue

            else:
                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature',
                                       {'name': 'Addressee'})
                f_elm2.text = 'All'
                continue

    return root
Пример #8
0
    def parse_trade(m, start, end, unit, root):
        """Reimplementation of parseTrade.

        Parameters
        ----------
        m: TODO
            Match object for the offer.
        start: int
            Start of the offer.
        end: int
            End of the offer.
        unit: TODO
            XML element for this unit annotation.
        root: TODO
            Root of the XML tree.
        """
        X = m.group('X')
        Y = m.group('Y')
        # 1. update the unit annotation:
        # * type = 'Offer'
        unit.find('characterisation/type').text = 'Accept'
        feats = unit.find('characterisation/featureSet')
        # * surface act = 'Assertion'
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        # * addressee = Y or 'All' if Y = 'the bank' or 'a port'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        if Y == 'the bank' or Y == 'a port':
            f_elm2.text = 'All'
        else:
            f_elm2.text = Y
        # 2. add 'Resource' annotations for both offered and asked resources
        # * resources offered
        # expected position of the leftmost character of the first one
        left = start + len(X) + 8
        right = left  # not sure it's useful here, but harmless anyway
        if m.group('V') is not None:
            resources_offered = m.group('V').split(', ')
            for resource in resources_offered:
                right = left + len(resource)  # end of span
                qty, kind = resource.split(' ')
                append_unit(root, 'Resource', [('Status', '?'),
                                               ('Quantity', qty),
                                               ('Correctness', 'True'),
                                               ('Kind', kind)],
                            left, right, author=_AUTHOR)
                left = right + 2
        # * resources asked
        left = right + 5  # ' for '
        right = left
        if m.group('W') is not None:
            resources_asked = m.group('W').split(', ')
            for resource in resources_asked:
                right = left + len(resource)
                qty, kind = resource.split(' ')
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', qty),
                                               ('Correctness', 'True'),
                                               ('Kind', kind)],
                            left, right, author=_AUTHOR)
                left = right + 2
Пример #9
0
def add_units_annotations(tree, text):
    """
    Add units annotations for non-linguistical event
    
    Parameters
    ----------
    tree :
        XML tree extracted from the .aa file to modify
    text : string
        raw text extracted from the .ac file

    Returns
    -------
    root :
        modified XML tree with units annotations for non-linguistical event
    """
    root = tree

    #So I know this is like the ugliest possible way to solve a problem
    #but those offer/trade events can be really tricky
    #and for the moment I really see no other way than
    #considering every possibilities in a exhaustive way...

    Offer11RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer12RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer21RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer13RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer22RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer31RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer14RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer23RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer32RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )
    Offer41RegEx = re.compile(
        r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.'
    )

    FromRegEx = re.compile(r'from (.+)')

    Trade11RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade12RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade21RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade13RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade22RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade31RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade14RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade23RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade32RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )
    Trade41RegEx = re.compile(
        r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.'
    )

    #That's the moment I hope I didn't make any typo...

    RejectRegEx = re.compile(r'(.+) rejected trade offer\.')

    GetRegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood)\.')
    Get2RegEx = re.compile(
        r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)\.'
    )
    #It is impossible in "Settlers of Catan" to get more than 2 different types of resources with one roll dice.
    #That's why we actually don't need to bother with complex regular expression since there are in fact just two cases to consider. :)

    MonopolyRegEx = re.compile(
        r'(.+) monopolized (clay|ore|sheep|wheat|wood)\.')

    Trader = ''

    def parseOffer(mo, i, j, start, end, unit, root):
        X = mo.group(1)
        unit.find('characterisation/type').text = 'Offer'
        feats = unit.find('characterisation/featureSet')
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        f_elm2.text = '?'
        #maybe we can shorten this function and make a single loop
        for index in range(1, i + 1):
            N = mo.group(2 * index)
            R = mo.group(2 * index + 1)
            if index == 1:
                left = start + len(X) + 24
                right = left + len(N) + 1 + len(R)
            else:
                left = right + 2
                right = left + len(N) + 1 + len(R)
            append_unit(root, 'Resource', [('Status', 'Givable'),
                                           ('Quantity', N),
                                           ('Correctness', 'True'),
                                           ('Kind', R)], left, right)
        for index in range(i + 1, i + j + 1):
            N = mo.group(2 * index)
            R = mo.group(2 * index + 1)
            if index == i + 1:
                left = right + 5
                right = left + len(N) + 1 + len(R)
            else:
                left = right + 2
                right = left + len(N) + 1 + len(R)
            append_unit(root, 'Resource', [('Status', 'Receivable'),
                                           ('Quantity', N),
                                           ('Correctness', 'True'),
                                           ('Kind', R)], left, right)

    def parseTrade(mo, i, j, start, end, unit, root):
        X = mo.group(1)
        Y = mo.group(2 * (i + j + 1))
        unit.find('characterisation/type').text = 'Accept'
        feats = unit.find('characterisation/featureSet')
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        if Y == 'the bank' or Y == 'a port':
            f_elm2.text = 'All'
        else:
            f_elm2.text = Y
        #maybe we can shorten this function and make a single loop
        for index in range(1, i + 1):
            N = mo.group(2 * index)
            R = mo.group(2 * index + 1)
            if index == 1:
                left = start + len(X) + 8
                right = left + len(N) + 1 + len(R)
            else:
                left = right + 2
                right = left + len(N) + 1 + len(R)
            append_unit(root, 'Resource', [('Status', '?'), ('Quantity', N),
                                           ('Correctness', 'True'),
                                           ('Kind', R)], left, right)
        for index in range(i + 1, i + j + 1):
            N = mo.group(2 * index)
            R = mo.group(2 * index + 1)
            if index == i + 1:
                left = right + 5
                right = left + len(N) + 1 + len(R)
            else:
                left = right + 2
                right = left + len(N) + 1 + len(R)
            append_unit(root, 'Resource', [('Status', 'Possessed'),
                                           ('Quantity', N),
                                           ('Correctness', 'True'),
                                           ('Kind', R)], left, right)

    for unit in root:
        if unit.findtext('characterisation/type') == 'NonplayerSegment':
            start = int(
                unit.find('positioning/start/singlePosition').get('index'))
            end = int(unit.find('positioning/end/singlePosition').get('index'))
            event = text[start:end]

            if Offer11RegEx.search(
                    event
            ) != None:  #<X> made an offer to trade <N1> <R1> for <N2> <R2>.
                mo = Offer11RegEx.search(event)
                parseOffer(mo, 1, 1, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer12RegEx.search(event) != None:
                mo = Offer12RegEx.search(event)
                parseOffer(mo, 1, 2, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer21RegEx.search(event) != None:
                mo = Offer21RegEx.search(event)
                parseOffer(mo, 2, 1, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer13RegEx.search(event) != None:
                mo = Offer13RegEx.search(event)
                parseOffer(mo, 1, 3, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer22RegEx.search(event) != None:
                mo = Offer22RegEx.search(event)
                parseOffer(mo, 2, 2, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer31RegEx.search(event) != None:
                mo = Offer31RegEx.search(event)
                parseOffer(mo, 3, 1, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer14RegEx.search(event) != None:
                mo = Offer14RegEx.search(event)
                parseOffer(mo, 1, 4, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer23RegEx.search(event) != None:
                mo = Offer23RegEx.search(event)
                parseOffer(mo, 2, 3, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer32RegEx.search(event) != None:
                mo = Offer32RegEx.search(event)
                parseOffer(mo, 3, 2, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer41RegEx.search(event) != None:
                mo = Offer41RegEx.search(event)
                parseOffer(mo, 4, 1, start, end, unit, root)
                Trader = mo.group(1)
                continue

            elif Trade11RegEx.search(
                    event
            ) != None:  #<X> traded <N1> <R1> for <N2> <R2> from <Y>.
                parseTrade(Trade11RegEx.search(event), 1, 1, start, end, unit,
                           root)
                continue
            elif Trade12RegEx.search(event) != None:
                parseTrade(Trade12RegEx.search(event), 1, 2, start, end, unit,
                           root)
                continue
            elif Trade21RegEx.search(event) != None:
                parseTrade(Trade21RegEx.search(event), 2, 1, start, end, unit,
                           root)
                continue
            elif Trade13RegEx.search(event) != None:
                parseTrade(Trade13RegEx.search(event), 1, 3, start, end, unit,
                           root)
                continue
            elif Trade22RegEx.search(event) != None:
                parseTrade(Trade22RegEx.search(event), 2, 2, start, end, unit,
                           root)
                continue
            elif Trade31RegEx.search(event) != None:
                parseTrade(Trade31RegEx.search(event), 3, 1, start, end, unit,
                           root)
                continue
            elif Trade14RegEx.search(event) != None:
                parseTrade(Trade14RegEx.search(event), 1, 4, start, end, unit,
                           root)
                continue
            elif Trade23RegEx.search(event) != None:
                parseTrade(Trade23RegEx.search(event), 2, 3, start, end, unit,
                           root)
                continue
            elif Trade32RegEx.search(event) != None:
                parseTrade(Trade32RegEx.search(event), 3, 2, start, end, unit,
                           root)
                continue
            elif Trade41RegEx.search(event) != None:
                parseTrade(Trade41RegEx.search(event), 4, 1, start, end, unit,
                           root)
                continue

            elif RejectRegEx.search(event) != None:  #<Y> rejected trade offer.
                mo = RejectRegEx.search(event)
                Y = mo.group(1)

                unit.find('characterisation/type').text = 'Refusal'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                if Trader != '':
                    f_elm2.text = Trader
                else:
                    f_elm2.text = 'All'
                continue

            elif event == "You can't make that trade.":
                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                if Trader != '':
                    f_elm2.text = Trader
                else:
                    f_elm2.text = 'All'
                continue

            elif GetRegEx.search(event) != None:  #<Y> gets <N> <R>.
                mo = GetRegEx.search(event)
                Y = mo.group(1)
                N = mo.group(2)
                R = mo.group(3)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                left = start + len(Y) + 6
                right = end - 1
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', N),
                                               ('Correctness', 'True'),
                                               ('Kind', R)], left, right)
                continue

            elif Get2RegEx.search(
                    event) != None:  #<Y> gets <N1> <R1>, <N2> <R2>.
                mo = Get2RegEx.search(event)
                Y = mo.group(1)
                N1 = mo.group(2)
                R1 = mo.group(3)
                N2 = mo.group(2)
                R2 = mo.group(3)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                left1 = start + len(Y) + 6
                right1 = left1 + len(N1) + 1 + len(R1)
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', N1),
                                               ('Correctness', 'True'),
                                               ('Kind', R1)], left1, right1)
                left2 = right1 + 2
                right2 = left2 + len(N2) + 1 + len(R2)
                append_unit(root, 'Resource', [('Status', 'Possessed'),
                                               ('Quantity', N2),
                                               ('Correctness', 'True'),
                                               ('Kind', R2)], left2, right2)
                continue

            elif MonopolyRegEx.search(event) != None:  #<X> monopolized <R>.
                mo = MonopolyRegEx.search(event)
                X = mo.group(1)
                R = mo.group(2)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                right = end - 1
                left = right - len(R)
                append_unit(
                    root,
                    'Resource',
                    [('Status', 'Possessed'), ('Quantity', '?'),
                     ('Correctness', 'True'), ('Kind', R)],
                    left,
                    right,
                )
                continue

            else:
                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature',
                                       {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'
                continue

    return root
Пример #10
0
def add_units_annotations(tree, text):
    """
    Add units annotations for non-linguistical event
    
    Parameters
    ----------
    tree :
        XML tree extracted from the .aa file to modify
    text : string
        raw text extracted from the .ac file

    Returns
    -------
    root :
        modified XML tree with units annotations for non-linguistical event
    """
    root = tree

    #So I know this is like the ugliest possible way to solve a problem
    #but those offer/trade events can be really tricky
    #and for the moment I really see no other way than
    #considering every possibilities in a exhaustive way...

    Offer11RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer12RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer21RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer13RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer22RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer31RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer14RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer23RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer32RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')
    Offer41RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.')

    FromRegEx = re.compile(r'from (.+)')

    Trade11RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade12RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade21RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade13RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade22RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade31RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade14RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade23RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade32RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    Trade41RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.')
    
    #That's the moment I hope I didn't make any typo...

    RejectRegEx = re.compile(r'(.+) rejected trade offer\.')

    GetRegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood)\.')
    Get2RegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)\.')
    #It is impossible in "Settlers of Catan" to get more than 2 different types of resources with one roll dice.
    #That's why we actually don't need to bother with complex regular expression since there are in fact just two cases to consider. :)

    MonopolyRegEx = re.compile(r'(.+) monopolized (clay|ore|sheep|wheat|wood)\.')


    Trader = ''

    def parseOffer(mo, i, j, start, end, unit, root):
        X = mo.group(1)
        unit.find('characterisation/type').text = 'Offer'
        feats = unit.find('characterisation/featureSet')
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        f_elm2.text = '?'
        #maybe we can shorten this function and make a single loop
        for index in range(1, i+1):
            N = mo.group(2*index)
            R = mo.group(2*index+1)
            if index == 1:
                left = start + len(X) + 24
                right = left + len(N) + 1 + len(R)
            else:
                left = right + 2
                right = left + len(N) + 1 + len(R)
            append_unit(root, 'Resource',
                        [('Status', 'Givable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)],
                        left, right)
        for index in range(i+1, i+j+1):
            N = mo.group(2*index)
            R = mo.group(2*index+1)
            if index == i+1:
                left = right + 5
                right = left + len(N) + 1 + len(R)
            else:
                left = right + 2
                right = left + len(N) + 1 + len(R)
            append_unit(root, 'Resource',
                            [('Status', 'Receivable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)],
                            left, right)

    def parseTrade(mo, i, j, start, end, unit, root):
        X = mo.group(1)
        Y = mo.group(2*(i+j+1))
        unit.find('characterisation/type').text = 'Accept'
        feats = unit.find('characterisation/featureSet')
        f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
        f_elm1.text = 'Assertion'
        f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
        if Y == 'the bank' or Y == 'a port':
            f_elm2.text = 'All'
        else:
            f_elm2.text = Y
        #maybe we can shorten this function and make a single loop
        for index in range(1, i+1):
            N = mo.group(2*index)
            R = mo.group(2*index+1)
            if index == 1:
                left = start + len(X) + 8
                right = left + len(N) + 1 + len(R)
            else:
                left = right + 2
                right = left + len(N) + 1 + len(R)
            append_unit(root, 'Resource',
                        [('Status', '?'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)],
                        left, right)
        for index in range(i+1, i+j+1):
            N = mo.group(2*index)
            R = mo.group(2*index+1)
            if index == i+1:
                left = right + 5
                right = left + len(N) + 1 + len(R)
            else:
                left = right + 2
                right = left + len(N) + 1 + len(R)
            append_unit(root, 'Resource',
                        [('Status', 'Possessed'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)],
                        left, right)
            
    for unit in root:
        if unit.findtext('characterisation/type') == 'NonplayerSegment':
            start = int(unit.find('positioning/start/singlePosition').get('index'))
            end = int(unit.find('positioning/end/singlePosition').get('index'))
            event = text[start:end]

            if Offer11RegEx.search(event) != None: #<X> made an offer to trade <N1> <R1> for <N2> <R2>.
                mo = Offer11RegEx.search(event)
                parseOffer(mo, 1, 1, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer12RegEx.search(event) != None:
                mo = Offer12RegEx.search(event)
                parseOffer(mo, 1, 2, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer21RegEx.search(event) != None:
                mo = Offer21RegEx.search(event)
                parseOffer(mo, 2, 1, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer13RegEx.search(event) != None:
                mo = Offer13RegEx.search(event)
                parseOffer(mo, 1, 3, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer22RegEx.search(event) != None:
                mo = Offer22RegEx.search(event)
                parseOffer(mo, 2, 2, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer31RegEx.search(event) != None:
                mo = Offer31RegEx.search(event)
                parseOffer(mo, 3, 1, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer14RegEx.search(event) != None:
                mo = Offer14RegEx.search(event)
                parseOffer(mo, 1, 4, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer23RegEx.search(event) != None:
                mo = Offer23RegEx.search(event)
                parseOffer(mo, 2, 3, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer32RegEx.search(event) != None:
                mo = Offer32RegEx.search(event)
                parseOffer(mo, 3, 2, start, end, unit, root)
                Trader = mo.group(1)
                continue
            elif Offer41RegEx.search(event) != None:
                mo = Offer41RegEx.search(event)
                parseOffer(mo, 4, 1, start, end, unit, root)
                Trader = mo.group(1)
                continue


            elif Trade11RegEx.search(event) != None: #<X> traded <N1> <R1> for <N2> <R2> from <Y>.
                parseTrade(Trade11RegEx.search(event), 1, 1, start, end, unit, root)
                continue
            elif Trade12RegEx.search(event) != None:
                parseTrade(Trade12RegEx.search(event), 1, 2, start, end, unit, root)
                continue
            elif Trade21RegEx.search(event) != None:
                parseTrade(Trade21RegEx.search(event), 2, 1, start, end, unit, root)
                continue
            elif Trade13RegEx.search(event) != None:
                parseTrade(Trade13RegEx.search(event), 1, 3, start, end, unit, root)
                continue
            elif Trade22RegEx.search(event) != None:
                parseTrade(Trade22RegEx.search(event), 2, 2, start, end, unit, root)
                continue
            elif Trade31RegEx.search(event) != None:
                parseTrade(Trade31RegEx.search(event), 3, 1, start, end, unit, root)
                continue
            elif Trade14RegEx.search(event) != None:
                parseTrade(Trade14RegEx.search(event), 1, 4, start, end, unit, root)
                continue
            elif Trade23RegEx.search(event) != None:
                parseTrade(Trade23RegEx.search(event), 2, 3, start, end, unit, root)
                continue
            elif Trade32RegEx.search(event) != None:
                parseTrade(Trade32RegEx.search(event), 3, 2, start, end, unit, root)
                continue
            elif Trade41RegEx.search(event) != None:
                parseTrade(Trade41RegEx.search(event), 4, 1, start, end, unit, root)
                continue


            elif RejectRegEx.search(event) != None: #<Y> rejected trade offer.
                mo = RejectRegEx.search(event)
                Y = mo.group(1)

                unit.find('characterisation/type').text = 'Refusal'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                if Trader != '':
                    f_elm2.text = Trader
                else:
                    f_elm2.text = 'All'
                continue

            elif event == "You can't make that trade.":
                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                if Trader != '':
                    f_elm2.text = Trader
                else:
                    f_elm2.text = 'All'
                continue


            elif GetRegEx.search(event) != None: #<Y> gets <N> <R>.
                mo = GetRegEx.search(event)
                Y = mo.group(1)
                N = mo.group(2)
                R = mo.group(3)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                left = start + len(Y) + 6
                right = end - 1
                append_unit(root, 'Resource',
                            [('Status', 'Possessed'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)],
                            left, right)
                continue

            elif Get2RegEx.search(event) != None: #<Y> gets <N1> <R1>, <N2> <R2>.
                mo = Get2RegEx.search(event)
                Y = mo.group(1)
                N1 = mo.group(2)
                R1 = mo.group(3)
                N2 = mo.group(2)
                R2 = mo.group(3)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                left1 = start + len(Y) + 6
                right1 = left1 + len(N1) + 1 + len(R1)
                append_unit(root, 'Resource',
                            [('Status', 'Possessed'), ('Quantity', N1), ('Correctness', 'True'), ('Kind', R1)],
                            left1, right1)
                left2 = right1 + 2
                right2 = left2 + len(N2) + 1 + len(R2)
                append_unit(root, 'Resource',
                            [('Status', 'Possessed'), ('Quantity', N2), ('Correctness', 'True'), ('Kind', R2)],
                            left2, right2)
                continue


            elif MonopolyRegEx.search(event) != None: #<X> monopolized <R>.
                mo = MonopolyRegEx.search(event)
                X = mo.group(1)
                R = mo.group(2)

                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'

                right = end - 1
                left = right - len(R)
                append_unit(root, 'Resource',
                            [('Status', 'Possessed'), ('Quantity', '?'), ('Correctness', 'True'), ('Kind', R)],
                            left, right,)
                continue

            else:
                unit.find('characterisation/type').text = 'Other'
                feats = unit.find('characterisation/featureSet')
                f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'})
                f_elm1.text = 'Assertion'
                f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'})
                f_elm2.text = 'All'
                continue

    return root