def read_logmap_mapping(filename):
    mappings_dict = dict()
    with open(filename) as f:
        for line in f.readlines():
            m = line.strip().split('|')
            left_c = uri_prefix(m[0])
            right_c = uri_prefix(m[1])
            if left_c in mappings_dict:
                mappings_dict[left_c].append(right_c)
            else:
                mappings_dict[left_c] = [right_c]
    return mappings_dict
示例#2
0
def append_super_class(c, p):
    p.append(uri_prefix(uri=c.iri))
    supclasses = super_classes(c=c)
    if owl.Thing in supclasses or len(supclasses) == 0 or supclasses is None:
        return p
    else:
        return append_super_class(c=supclasses[0], p=p)
示例#3
0
def get_class_name(o):
    c_name = dict()
    for c in o.classes():
        name = c.name
        labels = c.label.en + c.label
        names = [name, labels[0]] if len(labels) > 0 else [name, None]
        c_name[uri_prefix(uri=c.iri)] = names
    return c_name
def read_oaei_mappings(file_name):
    tree = ET.parse(file_name)
    mappings_str = list()
    all_mappings_str = list()
    for t in tree.getroot().getchildren():
        for m in t.getchildren():
            if 'map' in m.tag:
                for c in m.getchildren():
                    mapping = list()
                    mv = '?'
                    for i, v in enumerate(c.getchildren()):
                        if i < 2:
                            for value in v.attrib.values():
                                mapping.append(uri_prefix(value).lower())
                                break
                        if i == 3:
                            mv = v.text
                    all_mappings_str.append('|'.join(mapping))
                    if not mv == '?':
                        mappings_str.append('|'.join(mapping))
    return mappings_str, all_mappings_str
示例#5
0
left_paths = [
    line.strip().split(',') for line in open(FLAGS.left_path_file).readlines()
]
right_paths = [
    line.strip().split(',')
    for line in open(FLAGS.right_path_file).readlines()
]
left_names = json.load(open(FLAGS.left_class_name_file))
right_names = json.load(open(FLAGS.right_class_name_file))

mappings, mappings_n = list(), list()
with open(FLAGS.candidate_file) as f:
    for i, line in enumerate(f.readlines()):
        m = line.strip().split(', ')[1] if ', ' in line else line.strip()
        m_split = m.split('|')
        c1 = uri_prefix(uri=m_split[0])
        c2 = uri_prefix(uri=m_split[1])
        n1 = get_label(cls=c1,
                       paths=left_paths,
                       names=left_names,
                       label_type='path',
                       keep_uri=(FLAGS.keep_uri == 'yes'))
        n2 = get_label(cls=c2,
                       paths=right_paths,
                       names=right_names,
                       label_type='path',
                       keep_uri=(FLAGS.keep_uri == 'yes'))

        origin = 'i=%d|%s|%s' % (i + 1, c1, c2)
        name = '%s|%s' % (n1, n2)
        mappings.append(origin)
                    all_mappings_str.append('|'.join(mapping))
                    if not mv == '?':
                        mappings_str.append('|'.join(mapping))
    return mappings_str, all_mappings_str


if __name__ == "__main__":

    ref_mappings_str, ref_all_mappings_str = read_oaei_mappings(file_name=FLAGS.oaei_GS_file)
    ref_excluded_mappings_str = set(ref_all_mappings_str) - set(ref_mappings_str)

    anchor_mappings_str = list()
    with open(FLAGS.anchor_mapping_file) as f:
        for line in f.readlines():
            tmp = line.strip().split('|')
            anchor_mappings_str.append('%s|%s' % (uri_prefix(tmp[0]).lower(), uri_prefix(tmp[1]).lower()))

    pred_mappings_str = list()
    with open(FLAGS.prediction_out_file) as f:
        lines = f.readlines()
        for j in range(0, len(lines), 3):
            tmp = lines[j].split('|')
            if float(tmp[3]) >= FLAGS.threshold:
                pred_mappings_str.append('%s|%s' % (tmp[1].lower(), tmp[2].lower()))

    for a in anchor_mappings_str:
        if a not in pred_mappings_str:
            pred_mappings_str.append(a)

    recall_num = 0
    for s in ref_mappings_str:
s_n = 0
for sample in samples:
    tmp = sample.split('|')
    if '%s|%s' % (tmp[0], tmp[1]) not in GS:
        s_n += 1
        if len(tmp) >= 4 and tmp[3] == 'true':
            sv_n += 1

print(
    'All three systems: sampled mappings not in GS: %d, correct samples: %d, sampled precision: %f'
    % (s_n, sv_n, sv_n / s_n))

from lib.Label import uri_prefix

mappings = set()
for line in open(LogMap_output_mapping_file).readlines():
    tmp = line.strip().split('|')
    c1 = uri_prefix(uri=tmp[0])
    c2 = uri_prefix(uri=tmp[1])
    mappings.add('%s|%s' % (c2, c2))
for line in open(LogMap_ML_output_mapping_file).readlines():
    mappings.add(line.strip())
for line in open(AML_output_mapping_file).readlines():
    mappings.add(line.strip())
G_M_n = len(mappings - GS)
print('mappings out of GS: %d' % G_M_n)

num = len(GS) + sv_n / s_n * G_M_n
print('approximate GS size: %d' % num)
print('approximate recall: %.3f' % (app_TP / num))
示例#8
0
    left_paths = [
        line.strip().split(',')
        for line in open(FLAGS.left_path_file).readlines()
    ]
    right_paths = [
        line.strip().split(',')
        for line in open(FLAGS.right_path_file).readlines()
    ]

    mappings = list()
    rule_violated_mappings = list()
    with open(FLAGS.anchor_mapping_file) as f:
        for i, line in enumerate(f.readlines()):
            tmp = line.strip().split(', ')[1] if ', ' in line else line.strip()
            tmp2 = tmp.split('|')
            c1 = uri_prefix(uri=tmp2[0])
            c2 = uri_prefix(uri=tmp2[1])
            n1 = get_label(cls=c1,
                           paths=left_paths,
                           names=left_names,
                           label_type='path',
                           keep_uri=(FLAGS.keep_uri == 'yes'))
            n2 = get_label(cls=c2,
                           paths=right_paths,
                           names=right_names,
                           label_type='path',
                           keep_uri=(FLAGS.keep_uri == 'yes'))

            if not n1 == '""' and not n2 == '""':
                if violate_rules(p1_str=n1,
                                 p2_str=n2,