Exemplo n.º 1
0
def __message_strkey_tupleval2xml(msg, key, val):
    """
    helper function for __message2xml(). converts those parts of a message
    to XML whose key is a string (i.e. most of them).

    @type xml_msg: C{etree._Element}
    @type msg_key: C{Feature}
    @type msg_val: C{frozenset} or C{str}
    """
    value, rating = val
    if isinstance(value, frozenset):  # authors, keywords, proglangs etc.
        msgkey = etree.SubElement(msg, key, rating=rating)
        for element in value:
            msgval = etree.SubElement(msgkey, "value")
            msgval.text = ensure_unicode(element)
    else:  # isinstance(value, (str, int)) # title, language, year, pages etc.
        msgkey = etree.SubElement(msg, key, value=ensure_unicode(value), rating=rating)
Exemplo n.º 2
0
def __diamond_fs2xml(diamond):
    """
    converts a {Diamond} feature structure into its corresponding HLDS 
    XML structure (stored in an etree element).
    
    :type diamond: ``Diamond``
    :param diamond: a Diamond feature structure containing nom? prop? diamond* 
    elements
    
    :rtype: ``etree._Element``
    :return: a Diamond in HLDS XML tree notation, represented as an etree 
    element
    """
    E = ElementMaker()
    NOM = E.nom
    PROP = E.prop
    DIAMOND = E.diamond

    diamond_etree = DIAMOND(mode=ensure_unicode(diamond[Feature("mode")]))

    if "prop" in diamond:
        diamond_etree.insert(0, PROP(name=ensure_unicode(diamond["prop"])))
    if "nom" in diamond:
        # if present, nom(inal) has to be the first argument/sub tag of a diamond
        diamond_etree.insert(0, NOM(name=ensure_unicode(diamond["nom"])))

    subdiamonds = []
    for key in sorted(diamond.keys()):
        # keys need to be sorted, otherwise Diamonds within a Sentence will have a
        # different order than before. Diamond keys seem ordered, but they aren't
        # (keys beginning with numbers seem to be in descending order, those
        # beginning with letters in ascending order)
        if isinstance(diamond[key], Diamond):
            subdiamonds.append(diamond[key])

    etree_subdiamonds = []
    for subdiamond in subdiamonds:
        etree_subdiamonds.append(__diamond_fs2xml(subdiamond))

    for subdiamond in etree_subdiamonds:
        final_position = len(diamond_etree)
        diamond_etree.insert(final_position, subdiamond)

    return diamond_etree
Exemplo n.º 3
0
def __diamond_fs2xml(diamond):
    """
    converts a {Diamond} feature structure into its corresponding HLDS 
    XML structure (stored in an etree element).
    
    @type diamond: C{Diamond}
    @param diamond: a Diamond feature structure containing nom? prop? diamond* 
    elements
    
    @rtype: C{etree._Element}
    @return: a Diamond in HLDS XML tree notation, represented as an etree 
    element
    """
    E = ElementMaker()
    NOM = E.nom
    PROP = E.prop
    DIAMOND = E.diamond

    diamond_etree = DIAMOND(mode=ensure_unicode(diamond[Feature("mode")]))
    
    if "prop" in diamond:    
        diamond_etree.insert(0, PROP(name=ensure_unicode(diamond["prop"])) )
    if "nom" in diamond:
    # if present, nom(inal) has to be the first argument/sub tag of a diamond
        diamond_etree.insert(0, NOM(name=ensure_unicode(diamond["nom"])) )

    subdiamonds = []    
    for key in sorted(diamond.keys()):
    # keys need to be sorted, otherwise Diamonds within a Sentence will have a
    # different order than before. Diamond keys seem ordered, but they aren't
    # (keys beginning with numbers seem to be in descending order, those 
    # beginning with letters in ascending order)
        if isinstance(diamond[key], Diamond):
            subdiamonds.append(diamond[key])
    
    etree_subdiamonds = []    
    for subdiamond in subdiamonds:
        etree_subdiamonds.append(__diamond_fs2xml(subdiamond))
        
    for subdiamond in etree_subdiamonds:
        final_position = len(diamond_etree)
        diamond_etree.insert(final_position, subdiamond)
        
    return diamond_etree
Exemplo n.º 4
0
def __sentence_fs2xml(sentence, mode="test"):
    """    
    transforms a sentence (in NLTK feature structure notation) into its 
    corresponding HLDS XML <item></item> structure.
    
    :type sentence: ``Sentence``
    :param sentence: a sentence in NLTK feature structure notation
    
    :type mode: ``str``    
    :param mode: "test", if the sentence will be part of a (regression) 
    testbed file (ccg-test). "realize", if the sentence will be put in a 
    file on its own (ccg-realize).
    
    :rtype: ``etree._Element``
    :return: the input sentence in HLDS XML format (represented as an etree 
    element)
    """
    if mode is "test":
        expected_parses = sentence[Feature("expected_parses")]
        text = sentence[Feature("text")]
        item = etree.Element("item",
                             numOfParses=str(expected_parses),
                             string=ensure_unicode(text))
        xml = etree.SubElement(item, "xml")
        lf = etree.SubElement(xml, "lf")
    else:  # mode is "realize"
        lf = etree.Element("lf")

    root_nom = sentence[Feature("root_nom")]
    satop = etree.SubElement(lf, "satop", nom=root_nom)

    if Feature("root_prop") in sentence:
        root_prop = sentence[Feature("root_prop")]
        etree.SubElement(satop, "prop", name=root_prop)

    diamonds = []
    for key in sorted(sentence.keys()):
        # keys need to be sorted, otherwise Diamonds within a Sentence will have a
        # different order than before. Diamond keys seem ordered, but they aren't
        # (keys beginning with numbers seem to be in descending order, those
        # beginning with letters in ascending order)
        if isinstance(sentence[key], Diamond):
            diamonds.append(sentence[key])

    etree_diamonds = []
    for diamond in diamonds:
        etree_diamonds.append(__diamond_fs2xml(diamond))

    for diamond in etree_diamonds:
        final_position = len(satop)
        satop.insert(final_position, diamond)

    if mode is "test":
        return item
    else:
        return lf
Exemplo n.º 5
0
def __sentence_fs2xml(sentence, mode="test"):
    """    
    transforms a sentence (in NLTK feature structure notation) into its 
    corresponding HLDS XML <item></item> structure.
    
    @type sentence: C{Sentence}
    @param sentence: a sentence in NLTK feature structure notation
    
    @type mode: C{str}    
    @param mode: "test", if the sentence will be part of a (regression) 
    testbed file (ccg-test). "realize", if the sentence will be put in a 
    file on its own (ccg-realize).
    
    @rtype: C{etree._Element}
    @return: the input sentence in HLDS XML format (represented as an etree 
    element)
    """
    if mode is "test":
        expected_parses = sentence[Feature("expected_parses")]
        text = sentence[Feature("text")]
        item = etree.Element("item", numOfParses=str(expected_parses),
                             string=ensure_unicode(text))
        xml = etree.SubElement(item, "xml")
        lf = etree.SubElement(xml, "lf")
    else: # mode is "realize"
        lf = etree.Element("lf")
    
    root_nom = sentence[Feature("root_nom")]
    satop = etree.SubElement(lf, "satop", nom=root_nom)

    if Feature("root_prop") in sentence:
        root_prop = sentence[Feature("root_prop")]
        etree.SubElement(satop, "prop", name=root_prop)
    
    diamonds = []
    for key in sorted(sentence.keys()):
    # keys need to be sorted, otherwise Diamonds within a Sentence will have a
    # different order than before. Diamond keys seem ordered, but they aren't
    # (keys beginning with numbers seem to be in descending order, those 
    # beginning with letters in ascending order)
        if isinstance(sentence[key], Diamond):
            diamonds.append(sentence[key])
    
    etree_diamonds = []
    for diamond in diamonds:
        etree_diamonds.append(__diamond_fs2xml(diamond))
        
    for diamond in etree_diamonds:
        final_position = len(satop)
        satop.insert(final_position, diamond)
   
    if mode is "test":
        return item
    else:
        return lf
Exemplo n.º 6
0
def answer_simple_qa(question, simple=True):
    if simple:
        ask_url = 'http://10.1.1.28:8000/api/graph/simple_qa/?q=' + ensure_unicode(
            question).encode('utf-8')
    else:
        ask_url = 'http://10.1.1.28:8000/api/graph/qa/?q=' + ensure_unicode(
            question).encode('utf-8')
    data = get_data_url(ask_url)
    if data == '\"No answer !\"':
        return None
    ans_dict = json.loads(data)
    ans_triples = []
    for k, v in ans_dict.iteritems():
        if k == 'triples':
            for triple in v:
                ans_triples.append([
                    delete_source_prefix(triple[0]['name']), triple[1],
                    delete_source_prefix(triple[2]['name'])
                ])
    return ans_triples
Exemplo n.º 7
0
def __message_strkey_tupleval2xml(msg, key, val):
    """
    helper function for __message2xml(). converts those parts of a message
    to XML whose key is a string (i.e. most of them).

    :type xml_msg: ``etree._Element``
    :type msg_key: ``Feature``
    :type msg_val: ``frozenset`` or ``str``
    """
    value, rating = val
    if isinstance(value, frozenset):  # authors, keywords, proglangs etc.
        msgkey = etree.SubElement(msg, key, rating=rating)
        for element in value:
            msgval = etree.SubElement(msgkey, "value")
            msgval.text = ensure_unicode(element)
    else:  # isinstance(value, (str, int)) # title, language, year, pages etc.
        msgkey = etree.SubElement(msg,
                                  key,
                                  value=ensure_unicode(value),
                                  rating=rating)
Exemplo n.º 8
0
def __message_featurekey2xml(xml_msg, msg_key, msg_val):
    """
    helper function for __message2xml(). converts those parts of a message
    to XML whose key is of type C{Feature}, i.e. *reference_authors* and *reference_title*.

    @type xml_msg: C{etree._Element}
    @type msg_key: C{Feature}
    @type msg_val: C{frozenset} or C{str}
    """
    value, rating = msg_val
    if isinstance(value, frozenset):  # *reference_authors*
        featkey = etree.SubElement(xml_msg, msg_key.name, feature="true", rating=rating)
        for element in value:
            featval = etree.SubElement(featkey, "value")
            featval.text = str(element)
    else:  # isinstance(value, str) # *reference_title*
        featkey = etree.SubElement(xml_msg, msg_key.name, feature="true", value=ensure_unicode(value), rating=rating)
Exemplo n.º 9
0
def add_nom_prefixes(diamond):
    """
    Adds a prefix/index to the name attribute of every <nom> tag of a 
    C{Diamond} or C{Sentence} structure. Without this, I{ccg-realize} will 
    only produce gibberish.
    
    Every <nom> tag has a 'name' attribute, which contains a category/type-like
    description of the corresponding <prop> tag's name attribute, e.g.::
    
        <diamond mode="PRÄP">
            <nom name="v1:zugehörigkeit"/>
            <prop name="von"/>
        </diamond>

    Here 'zugehörigkeit' is the name of a category that the preposition 
    'von' belongs to. usually, the nom prefix is the first character of the 
    prop name attribute with an added index. index iteration is done by a 
    depth-first walk through all diamonds contained in the given feature 
    structure. In this example 'v1:zugehörigkeit' means, that "von" is the 
    first C{diamond} in the structure that starts with 'v' and belongs to 
    the category 'zugehörigkeit'.
    """
    prop_dict = defaultdict(int)
    elements = [element for element in diamond.walk()]

    for e in elements:
        if type(e) is Diamond:
            if "nom" in e.keys():
                nom_prefix_char = __determine_nom_prefix(e)
                    
                prop_dict[nom_prefix_char] += 1
                nom_without_prefix = e["nom"]
                nom_type = type(nom_without_prefix)
                e["nom"] = "{0}{1}:{2}".format(ensure_utf8(nom_prefix_char), 
                                               prop_dict[nom_prefix_char],
                                               ensure_utf8(nom_without_prefix))
                if nom_type == unicode:
                # preserve unicode, if the string was unicode encoded before
                    e["nom"] = ensure_unicode(e["nom"])
Exemplo n.º 10
0
def add_nom_prefixes(diamond):
    """
    Adds a prefix/index to the name attribute of every <nom> tag of a 
    ``Diamond`` or ``Sentence`` structure. Without this, ``ccg-realize`` will 
    only produce gibberish.
    
    Every <nom> tag has a 'name' attribute, which contains a category/type-like
    description of the corresponding <prop> tag's name attribute, e.g.::
    
        <diamond mode="PRÄP">
            <nom name="v1:zugehörigkeit"/>
            <prop name="von"/>
        </diamond>

    Here 'zugehörigkeit' is the name of a category that the preposition 
    'von' belongs to. usually, the nom prefix is the first character of the 
    prop name attribute with an added index. index iteration is done by a 
    depth-first walk through all diamonds contained in the given feature 
    structure. In this example 'v1:zugehörigkeit' means, that "von" is the 
    first ``diamond`` in the structure that starts with 'v' and belongs to 
    the category 'zugehörigkeit'.
    """
    prop_dict = defaultdict(int)
    elements = [element for element in diamond.walk()]

    for e in elements:
        if type(e) is Diamond:
            if "nom" in e.keys():
                nom_prefix_char = __determine_nom_prefix(e)

                prop_dict[nom_prefix_char] += 1
                nom_without_prefix = e["nom"]
                nom_type = type(nom_without_prefix)
                e["nom"] = "{0}{1}:{2}".format(ensure_utf8(nom_prefix_char),
                                               prop_dict[nom_prefix_char],
                                               ensure_utf8(nom_without_prefix))
                if nom_type == unicode:
                    # preserve unicode, if the string was unicode encoded before
                    e["nom"] = ensure_unicode(e["nom"])
Exemplo n.º 11
0
def answer_seq2seq_qa(question):
    seg_list, ner_list, simple_ner_list = ner_ques(question)
    print('crf: ' + ' '.join(ner_list))
    print('simple: ' + ' '.join(simple_ner_list))
    ques_pattern, entity_name = get_pattern(ner_list, seg_list)
    if ques_pattern == question:
        ques_pattern, entity_name = get_pattern(simple_ner_list, seg_list)
    print('|'.join((ques_pattern, entity_name)))

    sub_entity_dict = get_top_entity(entity_name)
    if not sub_entity_dict:
        return None
    rel_score_list = decode_ques(ques_pattern, 20)
    test_rel = None
    for rel, rel_score in rel_score_list:
        if rel in sub_entity_dict.keys():
            test_rel = ensure_unicode(rel)
            break
    ans_triples = []
    if test_rel:
        ans_triples = [[
            sub_entity_dict['name'], test_rel, sub_entity_dict[test_rel]
        ]]
    return ans_triples
Exemplo n.º 12
0
def __message_featurekey2xml(xml_msg, msg_key, msg_val):
    """
    helper function for __message2xml(). converts those parts of a message
    to XML whose key is of type ``Feature``, i.e. *reference_authors* and *reference_title*.

    :type xml_msg: ``etree._Element``
    :type msg_key: ``Feature``
    :type msg_val: ``frozenset`` or ``str``
    """
    value, rating = msg_val
    if isinstance(value, frozenset):  # *reference_authors*
        featkey = etree.SubElement(xml_msg,
                                   msg_key.name,
                                   feature="true",
                                   rating=rating)
        for element in value:
            featval = etree.SubElement(featkey, "value")
            featval.text = str(element)
    else:  # isinstance(value, str) # *reference_title*
        featkey = etree.SubElement(xml_msg,
                                   msg_key.name,
                                   feature="true",
                                   value=ensure_unicode(value),
                                   rating=rating)