Пример #1
0
 def test_shared_ic(self):
     df = plugin.DisjointFactor()
     for ic in IC_NAMES:
         ic_calculator = plugin.ICCalculator(ic)
         shared_ic_calculator_no_disjoints = \
             plugin.SharedICCalculator(ic, use_disjoints=False)
         shared_ic_calculator_disjoints = \
             plugin.SharedICCalculator(ic, use_disjoints=True)
         
         for one, two, mica, z in SHARED_IC_RESULTS:
             one_id = utils.get_id(one)
             two_id = utils.get_id(two)
             
             mica_id = utils.get_id(mica)
             ic_mica = ic_calculator.get(mica_id)
             
             result = shared_ic_calculator_no_disjoints.get(one_id, two_id)
             assert abs(result - ic_mica) < EPSILON
             
             if z is None:
                 expected = 0
             else:
                 z_id = utils.get_id(z)
                 ic_z = ic_calculator.get(z_id)
                 
                 factor = df.get(one_id, two_id)
                 expected = ic_mica - factor * (ic_mica - ic_z)
             
             result = shared_ic_calculator_disjoints.get(one_id, two_id)
             
             assert abs(result - expected) < EPSILON
Пример #2
0
    def test_shared_ic(self):
        df = plugin.DisjointFactor()
        for ic in IC_NAMES:
            ic_calculator = plugin.ICCalculator(ic)
            shared_ic_calculator_no_disjoints = \
                plugin.SharedICCalculator(ic, use_disjoints=False)
            shared_ic_calculator_disjoints = \
                plugin.SharedICCalculator(ic, use_disjoints=True)

            for one, two, mica, z in SHARED_IC_RESULTS:
                one_id = utils.get_id(one)
                two_id = utils.get_id(two)

                mica_id = utils.get_id(mica)
                ic_mica = ic_calculator.get(mica_id)

                result = shared_ic_calculator_no_disjoints.get(one_id, two_id)
                assert abs(result - ic_mica) < EPSILON

                if z is None:
                    expected = 0
                else:
                    z_id = utils.get_id(z)
                    ic_z = ic_calculator.get(z_id)

                    factor = df.get(one_id, two_id)
                    expected = ic_mica - factor * (ic_mica - ic_z)

                result = shared_ic_calculator_disjoints.get(one_id, two_id)

                assert abs(result - expected) < EPSILON
Пример #3
0
def chain_to_ids(chain):
    properties = chain[:-1]
    concept = chain[-1]

    chain = [utils.get_id(i, "ObjectProperty") for i in properties]
    chain.append(utils.get_id(concept))

    return chain
Пример #4
0
def chain_to_ids(chain):
    properties = chain[:-1]
    concept = chain[-1]
    
    chain = [utils.get_id(i, "ObjectProperty") for i in properties]
    chain.append(utils.get_id(concept))
    
    return chain
Пример #5
0
def test_utils_get_id():
    for iri, entity_type in KNOWN_ENTITIES + UNKNOWN_ENTITIES:
        entity_id = utils.get_id(iri, entity_type=entity_type)
        fetched_iri, fetched_type = utils.get_entity(entity_id)

        assert iri == fetched_iri and entity_type == fetched_type

    assert utils.get_entity(1000) == None
Пример #6
0
def test_utils_get_id():
    for iri, entity_type in KNOWN_ENTITIES + UNKNOWN_ENTITIES:
        entity_id = utils.get_id(iri, entity_type=entity_type)
        fetched_iri, fetched_type = utils.get_entity(entity_id)
        
        assert iri == fetched_iri and entity_type == fetched_type
    
    assert utils.get_entity(1000) == None
Пример #7
0
 def test_ic_values(self):
     for index, ic_name in enumerate(IC_NAMES):
         ic_calculator = plugin.ICCalculator(ic_name)
         for iri, values in IC_TESTS.items():
             concept_id = utils.get_id(iri)
             ic = ic_calculator.get(concept_id)
             expected = values[index]
             print(iri, ic, expected)
             assert abs(ic - expected) < EPSILON
Пример #8
0
 def test_ic_values(self):
     for index, ic_name in enumerate(IC_NAMES):
         ic_calculator = plugin.ICCalculator(ic_name)
         for iri, values in IC_TESTS.items():
             concept_id = utils.get_id(iri)
             ic = ic_calculator.get(concept_id)
             expected = values[index]
             print(iri, ic, expected)
             assert abs(ic - expected) < EPSILON
Пример #9
0
    def __init__(self,
                 *,
                 ic=None,
                 distance_threshold=3,
                 weight_threshold=0.3,
                 property_weights=None,
                 default_weight=0.7,
                 hierarchy_weight=0.8,
                 discover_subclasses=False):

        self.distance_threshold = distance_threshold
        self.weight_threshold = weight_threshold

        if isinstance(default_weight, LogScale):
            # We assign to each property a weight based on the amount of times
            # that property is used in the database
            self.property_weights = default_weight.get_weights()
            self.default_weight = 0
        else:
            self.property_weights = {}
            self.default_weight = default_weight

        if property_weights is not None:
            for prop, weight in property_weights.items():
                prop = utils.get_id(prop, "ObjectProperty")
                self.property_weights[prop] = weight

        # The class-subclass propertyis represented in this code
        # by the None object
        self.property_weights[None] = hierarchy_weight

        if ic:
            self.ic_calculator = ICCalculator(ic)
        else:
            self.ic_calculator = None

        self.get_relations_query = ("SELECT chain, end, distance "
                                    "FROM existential_relations "
                                    "WHERE start = %s AND distance <= %s")

        self.discover_subclasses = discover_subclasses
        if discover_subclasses:
            self.get_hierarchy_query = (
                "SELECT superclass, distance "
                "FROM hierarchy "
                "WHERE subclass = %s AND distance = 1 "
                "UNION "
                "SELECT subclass, distance "
                "FROM hierarchy "
                "WHERE superclass = %s AND distance <= %s")
        else:
            self.get_hierarchy_query = (
                "SELECT superclass, distance "
                "FROM hierarchy "
                "WHERE subclass = %s AND distance <= %s")
Пример #10
0
 def __init__(self, *,
              ic=None,
              distance_threshold=3,
              weight_threshold=0.3,
              property_weights=None,
              default_weight=0.7,
              hierarchy_weight=0.8,
              discover_subclasses=False):
     
     self.distance_threshold = distance_threshold
     self.weight_threshold = weight_threshold
     
     if isinstance(default_weight, LogScale):
         # We assign to each property a weight based on the amount of times
         # that property is used in the database
         self.property_weights = default_weight.get_weights()
         self.default_weight = 0
     else:
         self.property_weights = {}
         self.default_weight = default_weight
     
     if property_weights is not None:
         for prop, weight in property_weights.items():
             prop = utils.get_id(prop, "ObjectProperty")
             self.property_weights[prop] = weight
     
     # The class-subclass propertyis represented in this code
     # by the None object
     self.property_weights[None] = hierarchy_weight
     
     if ic:
         self.ic_calculator = ICCalculator(ic)
     else:
         self.ic_calculator = None
     
     self.get_relations_query = (
         "SELECT chain, end, distance "
         "FROM existential_relations "
         "WHERE start = %s AND distance <= %s")
     
     self.discover_subclasses = discover_subclasses
     if discover_subclasses:
         self.get_hierarchy_query = (
             "SELECT superclass, distance "
             "FROM hierarchy "
             "WHERE subclass = %s AND distance = 1 "
             "UNION "
             "SELECT subclass, distance "
             "FROM hierarchy "
             "WHERE superclass = %s AND distance <= %s")
     else:
         self.get_hierarchy_query = (
             "SELECT superclass, distance "
             "FROM hierarchy "
             "WHERE subclass = %s AND distance <= %s")
Пример #11
0
 def compare(self, one, two):
     if one == two:
         return 0
     
     one = utils.get_id(one)
     two = utils.get_id(two)
     
     ic_one = self.ic_calculator.get(one)
     ic_two = self.ic_calculator.get(two)
     
     # Special cases
     if ic_one == -1 or ic_two == -1:
         # One of them does not have an IC, so similarity is 0
         return 1
     elif ic_one + ic_two == 0:
         # If both have IC = 0, then IC(MICA) = 0
         # We say, in this case, that distance is 1
         return 1
     
     shared_ic = self.shared_ic_calculator.get(one, two)
     return (ic_one + ic_two - 2 * shared_ic) / 2
Пример #12
0
 def compare(self, one, two):
     if one == two:
         return 1
     
     one = utils.get_id(one)
     two = utils.get_id(two)
     
     ic_one = self.ic_calculator.get(one)
     ic_two = self.ic_calculator.get(two)
     
     # Special cases
     if ic_one == -1 or ic_two == -1:
         # One of them does not have an IC, so similarity is 0
         return 0
     elif ic_one + ic_two == 0:
         # If both have IC = 0, then IC(MICA) = 0
         # We say, in this case, that similarity is 0
         return 0
     
     num = 2 * self.shared_ic_calculator.get(one, two)
     den = ic_one + ic_two
     return num / den
Пример #13
0
    def compare(self, one, two):
        one = utils.get_id(one)
        two = utils.seq_to_ids(two)

        with sql.lock:
            if not self.two_args:
                args = (one, )
            else:
                args = (one, one)
            sql.cursor.execute(self.get_super_query, args)
            superclasses = {i[0] for i in sql.cursor}

        # If one of the concepts in the second argument is superclass of the
        # first argument, return 1
        if any(i in two for i in superclasses):
            return 1

        # Otherwise, compare the concept with any of the concepts of the second
        # list and return the maximum similarity value found
        result = 0
        for second in two:
            result = max(result, self.inner.compare(one, second))
        return result
Пример #14
0
 def test_disjoint_factor(self):
     df = plugin.DisjointFactor()
     for one, two, result in DISJOINT_FACTOR_RESULTS:
         one_id = utils.get_id(one)
         two_id = utils.get_id(two)
         assert abs(df.get(one_id, two_id) - result) < EPSILON
Пример #15
0
 def test_disjoint_factor(self):
     df = plugin.DisjointFactor()
     for one, two, result in DISJOINT_FACTOR_RESULTS:
         one_id = utils.get_id(one)
         two_id = utils.get_id(two)
         assert abs(df.get(one_id, two_id) - result) < EPSILON
Пример #16
0
 def compare(self, one, two):
     one = utils.get_id(one)
     two = utils.get_id(two)
     
     return self.shared_ic_calculator.get(one, two)