示例#1
0
def output_triple(arg1, arg2, relation, raw, sources):
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    relation = normalize_rel(relation).strip()
    found_relation = False
    if relation == 'be for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be used for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be not':
        found_relation = True
        relation = 'IsNot'
    if relation == 'be part of':
        found_relation = True
        relation = 'PartOf'
    if relation == 'be similar to':
        found_relation = True
        relation = 'SimilarTo'
    if relation.startswith('be ') and relation.endswith(
            ' of') and relation[3:-3] in TYPE_WORDS:
        found_relation = True
        relation = 'IsA'
    if found_relation:
        rel_node = GRAPH.get_or_create_relation(relation)
    else:
        rel_node = GRAPH.get_or_create_concept('en', relation)
    print '%s(%s, %s)' % \
        (relation, arg1, arg2),

    assertion = GRAPH.get_or_create_assertion(rel_node, [
        GRAPH.get_or_create_concept('en', arg1),
        GRAPH.get_or_create_concept('en', arg2)
    ], {
        'dataset': 'reverb/en',
        'license': 'CC-By-SA',
        'normalized': True
    })
    GRAPH.derive_normalized(raw, assertion)

    conjunction = GRAPH.get_or_create_conjunction([raw, reverb_triple])
    GRAPH.justify(conjunction, assertion)
    for source in sources:
        # Put in context with Wikipedia articles.
        topic = article_url_to_topic(source)
        context = GRAPH.get_or_create_concept('en', topic)
        context_normal = GRAPH.get_or_create_concept('en',
                                                     *normalize_topic(topic))
        GRAPH.add_context(assertion, context_normal)
        GRAPH.get_or_create_edge('normalized', context, context_normal)
        print "in", context_normal
    return assertion
示例#2
0
def output_triple(arg1, arg2, relation, raw, sources):
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    relation = normalize_rel(relation).strip()
    found_relation = False
    if relation == 'be for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be used for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be not':
        found_relation = True
        relation = 'IsNot'
    if relation == 'be part of':
        found_relation = True
        relation = 'PartOf'
    if relation == 'be similar to':
        found_relation = True
        relation = 'SimilarTo'
    if relation.startswith('be ') and relation.endswith(' of') and relation[3:-3] in TYPE_WORDS:
        found_relation = True
        relation = 'IsA'
    if found_relation:
        rel_node = GRAPH.get_or_create_relation(relation)
    else:
        rel_node = GRAPH.get_or_create_concept('en', relation)
    print '%s(%s, %s)' % \
        (relation, arg1, arg2),

    assertion = GRAPH.get_or_create_assertion(
        rel_node,
        [GRAPH.get_or_create_concept('en', arg1),
         GRAPH.get_or_create_concept('en', arg2)],
        {'dataset': 'reverb/en', 'license': 'CC-By-SA',
         'normalized': True}
    )
    GRAPH.derive_normalized(raw, assertion)
    
    conjunction = GRAPH.get_or_create_conjunction([raw, reverb_triple])
    GRAPH.justify(conjunction, assertion)
    for source in sources:
        # Put in context with Wikipedia articles.
        topic = article_url_to_topic(source)
        context = GRAPH.get_or_create_concept('en', topic)
        context_normal = GRAPH.get_or_create_concept('en', *normalize_topic(topic))
        GRAPH.add_context(assertion, context_normal)
        GRAPH.get_or_create_edge('normalized', context, context_normal)
        print "in", context_normal
    return assertion
示例#3
0
def output_sentence(arg1, arg2, arg3, relation, raw, sources, prep=None):
    # arg3 is vestigial; we weren't getting sensible statements from it.
    if arg2.strip() == "":  # Remove "A is for B" sentence
        return
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    assertion = None
    if arg3 == None:
        print '%s(%s, %s)' % (relation, arg1, arg2)
        assertion = GRAPH.get_or_create_assertion('/relation/' + relation, [
            GRAPH.get_or_create_concept('en', arg1),
            GRAPH.get_or_create_concept('en', arg2)
        ], {
            'dataset': 'reverb/en',
            'license': 'CC-By-SA',
            'normalized': True
        })
        assertions = (assertion, )
    else:
        print '%s(%s, %s)' % \
            (relation, arg1, arg2)
        assertion1 = GRAPH.get_or_create_assertion('/relation/' + relation, [
            GRAPH.get_or_create_concept('en', arg1),
            GRAPH.get_or_create_concept('en', arg2)
        ], {
            'dataset': 'reverb/en',
            'license': 'CC-By-SA',
            'normalized': True
        })
        #arg3 = normalize(arg3).strip()
        #assertion2 = GRAPH.get_or_create_assertion(
        #    GRAPH.get_or_create_concept('en', prep, 'p'),
        #    [GRAPH.get_or_create_concept('en', arg2),
        #     GRAPH.get_or_create_concept('en', arg3)],
        #    {'dataset': 'reverb/en', 'license': 'CC-By-SA',
        #     'normalized': True}
        #)
        assertions = (assertion1, )

    for assertion in assertions:
        conjunction = GRAPH.get_or_create_conjunction([raw, reverb_object])
        GRAPH.justify(conjunction, assertion)
        for source in sources:
            # Put in context with Wikipedia articles.
            topic = article_url_to_topic(source)
            context = GRAPH.get_or_create_concept('en',
                                                  *normalize_topic(topic))
            GRAPH.add_context(assertion, context)

    return assertion
示例#4
0
def output_sentence(arg1, arg2, arg3, relation, raw, sources, prep=None):
    # arg3 is vestigial; we weren't getting sensible statements from it.
    if arg2.strip() == "": # Remove "A is for B" sentence
        return
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    assertion = None
    if arg3 == None:
        print '%s(%s, %s)' % (relation, arg1, arg2)
        assertion = GRAPH.get_or_create_assertion(
            '/relation/'+relation,
            [GRAPH.get_or_create_concept('en', arg1),
             GRAPH.get_or_create_concept('en', arg2)],
            {'dataset': 'reverb/en', 'license': 'CC-By-SA',
             'normalized': True}
        )
        assertions = (assertion,)
    else:
        print '%s(%s, %s)' % \
            (relation, arg1, arg2)
        assertion1 = GRAPH.get_or_create_assertion(
            '/relation/'+relation,
            [GRAPH.get_or_create_concept('en', arg1),
             GRAPH.get_or_create_concept('en', arg2)],
            {'dataset': 'reverb/en', 'license': 'CC-By-SA',
             'normalized': True}
        )
        #arg3 = normalize(arg3).strip()
        #assertion2 = GRAPH.get_or_create_assertion(
        #    GRAPH.get_or_create_concept('en', prep, 'p'),
        #    [GRAPH.get_or_create_concept('en', arg2),
        #     GRAPH.get_or_create_concept('en', arg3)],
        #    {'dataset': 'reverb/en', 'license': 'CC-By-SA',
        #     'normalized': True}
        #)
        assertions = (assertion1,)
    
    for assertion in assertions:
        conjunction = GRAPH.get_or_create_conjunction(
            [raw, reverb_object]
        )
        GRAPH.justify(conjunction, assertion)
        for source in sources:
            # Put in context with Wikipedia articles.
            topic = article_url_to_topic(source)
            context = GRAPH.get_or_create_concept('en', *normalize_topic(topic))
            GRAPH.add_context(assertion, context)

    return assertion
示例#5
0
def output_steps(goal, steps, source):
    # add raw assertions
    args = []
    for step in steps:
        args.append(GRAPH.get_or_create_concept('en', step))
    raw_sequence = GRAPH.get_or_create_assertion(
        '/relation/Sequence', args,
        {'dataset': 'goalnet/en', 'license': 'CC-By-SA'}
    )
    args = [GRAPH.get_or_create_concept('en', goal)]
    args.append(raw_sequence)
    raw_assertion = GRAPH.get_or_create_assertion(
        '/relation/HasSteps', args,
        {'dataset': 'goalnet/en', 'license': 'CC-By-SA'}
    )
    # add assertions
    args = []
    goal = normalize(goal).strip().lower()
    steps = map(lambda x: normalize(x).strip().lower(), steps)
    for step in steps:
        args.append(GRAPH.get_or_create_concept('en', step))
    sequence = GRAPH.get_or_create_assertion(
        '/relation/Sequence', args,
        {'dataset': 'goalnet/en', 'license': 'CC-By-SA'}
    )
    args = [GRAPH.get_or_create_concept('en', goal)]
    args.append(sequence)
    assertion = GRAPH.get_or_create_assertion(
        '/relation/HasSteps', args,
        {'dataset': 'goalnet/en', 'license': 'CC-By-SA'}
    )
    GRAPH.derive_normalized(raw_sequence, sequence)
    GRAPH.derive_normalized(raw_assertion, assertion)
    # add justification
    if source == 'wikihow':
        pass
        #conjunction = GRAPH.get_or_create_conjunction([wikihow, goalnet, raw_sequence])
        #GRAPH.justify(conjunction, sequence, 0.8)
        #conjunction = GRAPH.get_or_create_conjunction([wikihow, goalnet, raw_assertion])
        #GRAPH.justify(conjunction, assertion, 0.8)
    elif source == 'omics':
        conjunction = GRAPH.get_or_create_conjunction([omics, goalnet, raw_sequence])
        GRAPH.justify(conjunction, sequence)
        conjunction = GRAPH.get_or_create_conjunction([omics, goalnet, raw_assertion])
        GRAPH.justify(conjunction, assertion)
    return assertion
示例#6
0
def output_steps(goal, steps, source):
    goal = normalize(goal).strip()
    steps = map(lambda x: normalize(x).strip(), steps)
    args = [GRAPH.get_or_create_concept('en', goal)]
    for step in steps:
        args.append(GRAPH.get_or_create_concept('en', step))
    assertion = GRAPH.get_or_create_assertion(
        '/relation/HasSteps', args,
        {'dataset': 'goalnet/en', 'license': 'CC-By-SA'}
    )
    if source == 'wikihow':
        conjunction = GRAPH.get_or_create_conjunction([wikihow, goalnet])
        GRAPH.justify(conjunction, assertion, 0.8)
    elif source == 'omics':
        conjunction = GRAPH.get_or_create_conjunction([omics, goalnet])
        GRAPH.justify(conjunction, assertion)
    return assertion
示例#7
0
def output_triple(arg1, arg2, relation, raw):
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    relation = normalize(relation).strip()
    print '%s(%s, %s)' % \
        (relation, arg1, arg2)
    found_relation = False
    if relation == 'be for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be used for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be not':
        found_relation = True
        relation = 'IsNot'
    if relation == 'be part of':
        found_relation = True
        relation = 'PartOf'
    if relation == 'be similar to':
        found_relation = True
        relation = 'SimilarTo'
    if found_relation:
        rel_node = GRAPH.get_or_create_relation(relation)
    else:
        rel_node = GRAPH.get_or_create_concept('en', relation)

    assertion = GRAPH.get_or_create_assertion(
        rel_node,
        [GRAPH.get_or_create_concept('en', arg1),
         GRAPH.get_or_create_concept('en', arg2)],
        {'dataset': 'reverb/en', 'license': 'CC-By-SA'}
    )
    GRAPH.derive_normalized(raw, assertion)
    
    conjunction = GRAPH.get_or_create_conjunction([raw, reverb_triple])
    GRAPH.justify(conjunction, assertion)
    return assertion
def test_normalize():
    assert normalize('this is a test') == 'this be test'
    
    # If we're using simplenlp, this will give "catherine havasus"; this is
    # one of the reasons to switch to using Morphy
    assert normalize('Catherine Havasi') == 'catherine havasi'
def english_normalize(text):
    if text.startswith('to '):
        text = text[3:]
    result = normalize(unicodedata.normalize('NFKC', text))
    return result
示例#10
0
    if right.startswith("not "):
        right = right[4:]
        relation = "it is not"
    if relation == "it is the opposite of":
        relation = "it is not"

    freq = int(freq)
    orderscore = int(orderscore)
    if relation == "about the same size as":
        relation = "it is about the same size as"
    elif relation == "it looks like":
        relation = "it is related to"
    rel = mapping.get(relation)
    if rel is None:
        rel = "/concept/en/" + normalize(relation[3:]).replace(" ", "_")

    if relation == "it is" and (right.startswith("a ") or right.startswith("an ") or right.startswith("the ")):
        rel = "/relation/IsA"

    sls = sounds_like_score(left, right)
    text_similarities.append(sls)
    if sls > 0.35:
        # print "* %s sounds like %s (%4.4f)" % (left, right, sls)
        counts["text similarity"] += 1
        similar_out.write("%4.4d\t%s" % (sls, line))
        continue

    score = (freq * 2 - 1) * (1000 - orderscore) * (1 - sls) / 1000
    if score <= 0:
        counts["low score"] += 1
示例#11
0
def normalize_rel(text):
    parts = normalize(text).split()
    if len(parts) >= 2 and parts[1] == 'be' and parts[0] in ('be', 'have'):
        parts = parts[1:]
    parts = [p for p in parts if p != 'also']
    return ' '.join(parts)
示例#12
0
def probably_present_tense(text):
    return text in ('is', 'are') or normalize(text) == text
示例#13
0
def probably_present_tense(text):
    return text in ('is', 'are') or normalize(text) == text
示例#14
0
def check_line(line):
    parts = line.strip().split()
    norm = normalize(parts[0])
    if norm != parts[1]:
        print "Original: %s / WordNet: %s / conceptnet: %s" %\
            (parts[0], parts[1], norm)
def check_line(line):
    parts = line.strip().split()
    norm = normalize(parts[0])
    if norm != parts[1]:
        print "Original: %s / WordNet: %s / conceptnet: %s" %\
            (parts[0], parts[1], norm)
示例#16
0
    if right.startswith('not '):
        right = right[4:]
        relation = 'it is not'
    if relation == 'it is the opposite of':
        relation = 'it is not'

    freq = int(freq)
    orderscore = int(orderscore)
    if relation == 'about the same size as':
        relation = 'it is about the same size as'
    elif relation == 'it looks like':
        relation = 'it is related to'
    rel = mapping.get(relation)
    if rel is None:
        rel = '/concept/en/'+normalize(relation[3:]).replace(' ', '_')
    
    if relation == 'it is' and\
       (right.startswith('a ') or right.startswith('an ')
        or right.startswith('the ')):
        rel = '/relation/IsA'
    
    sls = sounds_like_score(left, right)
    text_similarities.append(sls)
    if sls > 0.35:
        #print "* %s sounds like %s (%4.4f)" % (left, right, sls)
        counts['text similarity'] += 1
        similar_out.write('%4.4d\t%s' % (sls, line))
        continue
    
    score = (freq*2-1) * (1000-orderscore) * (1-sls) / 1000
示例#17
0
def english_normalize(text):
    if text.startswith("to "):
        text = text[3:]
    result = normalize(unicodedata.normalize("NFKC", text))
    return result
示例#18
0
def test_normalize():
    assert normalize('this is a test') == 'this be test'

    # If we're using simplenlp, this will give "catherine havasus"; this is
    # one of the reasons to switch to using Morphy
    assert normalize('Catherine Havasi') == 'catherine havasi'
示例#19
0
def normalize_rel(text):
    parts = normalize(text).split()
    if len(parts) >= 2 and parts[1] == 'be' and parts[0] in ('be', 'have'):
        parts = parts[1:]
    parts = [p for p in parts if p != 'also']
    return ' '.join(parts)