def __init__(self,
                 source_agregated_scores,
                 out_file=None,
                 n_desirable_complete_classes=1000,
                 n_instances_already_counted=False):

        self._in_ag_scores = source_agregated_scores
        self._out_file = out_file
        self._n_desirable = n_desirable_complete_classes
        self._summary_list = []
        self._n_instances_already_counted = n_instances_already_counted

        # Communications
        self._api_reader = WikidataApiReader()
class AliasesPropertiesCommand(object):
    def __init__(self, source_file, out_file, json_input=False):
        self._in_file = source_file
        self._out_file = out_file
        self._is_json_input = json_input
        self._api_reader = WikidataApiReader()

    def exec_command(self, string_return=False):
        sorted_result_list = []
        for a_property in self._read_target_properties():

            try:
                sorted_result_list.append(self._get_complete_property(a_property))
            except:
                print "Error with property " + str(a_property)
        return JsonPropertyDumper(
            out_file=self._out_file,
            indent=4,
            strict_mode=True,
            string_return=string_return,
            needed_fields=[P_ID, P_LABEL, P_APPEARANCES, P_DESC],
        ).persist_properties(sorted_result_list)

    def _read_target_properties(self):
        if not self._is_json_input:
            for a_prop in Ccv01PropertiesParser(source_file=self._in_file).yield_properties():
                yield a_prop
        else:
            for a_prop in Json05PropertiesParser(source_file=self._in_file).yield_properties():
                yield a_prop

    def _get_complete_property(self, anemic_property):
        tmp = self._api_reader.get_property(anemic_property.id)
        tmp.n_appearances = anemic_property.n_appearances
        return tmp
    def __init__(self, source_agregated_scores, out_file=None, n_desirable_complete_classes=1000,
                 n_instances_already_counted=False):

        self._in_ag_scores = source_agregated_scores
        self._out_file = out_file
        self._n_desirable = n_desirable_complete_classes
        self._summary_list = []
        self._n_instances_already_counted = n_instances_already_counted

        # Communications
        self._api_reader = WikidataApiReader()
示例#4
0
class AliasesPropertiesCommand(object):
    def __init__(self, source_file, out_file, json_input=False):
        self._in_file = source_file
        self._out_file = out_file
        self._is_json_input = json_input
        self._api_reader = WikidataApiReader()

    def exec_command(self, string_return=False):
        sorted_result_list = []
        for a_property in self._read_target_properties():

            try:
                sorted_result_list.append(
                    self._get_complete_property(a_property))
            except:
                print "Error with property " + str(a_property)
        return JsonPropertyDumper(out_file=self._out_file,
                                  indent=4,
                                  strict_mode=True,
                                  string_return=string_return,
                                  needed_fields=[P_ID, P_LABEL, P_APPEARANCES, P_DESC]). \
            persist_properties(sorted_result_list)

    def _read_target_properties(self):
        if not self._is_json_input:
            for a_prop in Ccv01PropertiesParser(
                    source_file=self._in_file).yield_properties():
                yield a_prop
        else:
            for a_prop in Json05PropertiesParser(
                    source_file=self._in_file).yield_properties():
                yield a_prop

    def _get_complete_property(self, anemic_property):
        tmp = self._api_reader.get_property(anemic_property.id)
        tmp.n_appearances = anemic_property.n_appearances
        return tmp
示例#5
0
 def __init__(self, source_file, out_file, json_input=False):
     self._in_file = source_file
     self._out_file = out_file
     self._is_json_input = json_input
     self._api_reader = WikidataApiReader()
 def __init__(self, source_file, out_file, json_input=False):
     self._in_file = source_file
     self._out_file = out_file
     self._is_json_input = json_input
     self._api_reader = WikidataApiReader()
class AgregatedClassSummaryCommand(object):
    def __init__(self,
                 source_agregated_scores,
                 out_file=None,
                 n_desirable_complete_classes=1000,
                 n_instances_already_counted=False):

        self._in_ag_scores = source_agregated_scores
        self._out_file = out_file
        self._n_desirable = n_desirable_complete_classes
        self._summary_list = []
        self._n_instances_already_counted = n_instances_already_counted

        # Communications
        self._api_reader = WikidataApiReader()

    def exec_command(self, string_return=False):
        tracked_counter = 0
        raw_classes_list = self._read_raw_classes()
        for a_class_dict in raw_classes_list:
            self._summary_list.append(
                self._get_summary_dict(a_class_dict, tracked_counter))
            tracked_counter += 1
        print "Total: ", tracked_counter

        self._serialize_results(string_return)

    def _get_summary_dict(self, raw_class_dict, counter):
        result = {
            KEY_ID: raw_class_dict[KEY_ID],
            KEY_ACCUMULATED: raw_class_dict[KEY_ACCUMULATED],
            KEY_LABEL: None,
            KEY_DESC: None
        }
        if KEY_POS_INSTANCE_COUNTING in raw_class_dict:
            result[KEY_POS_INSTANCE_COUNTING] = raw_class_dict[
                KEY_POS_INSTANCE_COUNTING]

        if KEY_POS_CLASSRANK in raw_class_dict:
            result[KEY_POS_CLASSRANK] = raw_class_dict[KEY_POS_CLASSRANK]

        if not self._n_instances_already_counted:
            result[KEY_N_INSTANCES] = self._count_dict_instances(
                raw_class_dict)
        else:
            result[KEY_N_INSTANCES] = raw_class_dict[KEY_N_INSTANCES]

        if counter < self._n_desirable:
            try:
                tracked_entity = self._api_reader.get_entity(
                    raw_class_dict[KEY_ID])
                result[KEY_LABEL] = tracked_entity.label
                result[KEY_DESC] = tracked_entity.description
            except:
                print "Hubo problemas con ", raw_class_dict[KEY_ID]
            print counter
        return result

    def _count_dict_instances(self, raw_class_dict):
        resultset = set()
        for a_prop_key in raw_class_dict[KEY_INSTANCES]:
            for an_entity_id in raw_class_dict[KEY_INSTANCES][a_prop_key]:
                resultset.add(an_entity_id)
        return len(resultset)

    def _read_raw_classes(self):
        # return [{"id": "Q31",
        #          KEY_ACCUMULATED: 8,
        #          KEY_INSTANCES: {"P1" : ["Q5", "Q3"] # 2 instances
        #                          }
        #          },
        #         {"id": "Q35",
        #          KEY_ACCUMULATED: 8,
        #          KEY_INSTANCES:{"P1" : ["Q5", "Q3"],
        #                         "P2" : ["Q3", "Q2", "Q1"]  # 4 instances (Q3 repeated)
        #                          }}]
        return read_json_object(self._in_ag_scores)

    def _serialize_results(self, string_return):
        # TODO: implement string_return mode. Now, we are assuming string_return=False
        write_json_object(json_object=self._summary_list,
                          path=self._out_file,
                          indent=4)
class AgregatedClassSummaryCommand(object):
    def __init__(self, source_agregated_scores, out_file=None, n_desirable_complete_classes=1000,
                 n_instances_already_counted=False):

        self._in_ag_scores = source_agregated_scores
        self._out_file = out_file
        self._n_desirable = n_desirable_complete_classes
        self._summary_list = []
        self._n_instances_already_counted = n_instances_already_counted

        # Communications
        self._api_reader = WikidataApiReader()

    def exec_command(self, string_return=False):
        tracked_counter = 0
        raw_classes_list = self._read_raw_classes()
        for a_class_dict in raw_classes_list:
            self._summary_list.append(self._get_summary_dict(a_class_dict, tracked_counter))
            tracked_counter += 1
        print "Total: ", tracked_counter

        self._serialize_results(string_return)

    def _get_summary_dict(self, raw_class_dict, counter):
        result = {KEY_ID: raw_class_dict[KEY_ID],
                  KEY_ACCUMULATED: raw_class_dict[KEY_ACCUMULATED],
                  KEY_LABEL: None,
                  KEY_DESC: None}
        if KEY_POS_INSTANCE_COUNTING in raw_class_dict:
            result[KEY_POS_INSTANCE_COUNTING] = raw_class_dict[KEY_POS_INSTANCE_COUNTING]

        if KEY_POS_CLASSRANK in raw_class_dict:
            result[KEY_POS_CLASSRANK] = raw_class_dict[KEY_POS_CLASSRANK]

        if not self._n_instances_already_counted:
            result[KEY_N_INSTANCES] = self._count_dict_instances(raw_class_dict)
        else:
            result[KEY_N_INSTANCES] = raw_class_dict[KEY_N_INSTANCES]

        if counter < self._n_desirable:
            try:
                tracked_entity = self._api_reader.get_entity(raw_class_dict[KEY_ID])
                result[KEY_LABEL] = tracked_entity.label
                result[KEY_DESC] = tracked_entity.description
            except:
                print "Hubo problemas con ", raw_class_dict[KEY_ID]
            print counter
        return result

    def _count_dict_instances(self, raw_class_dict):
        resultset = set()
        for a_prop_key in raw_class_dict[KEY_INSTANCES]:
            for an_entity_id in raw_class_dict[KEY_INSTANCES][a_prop_key]:
                resultset.add(an_entity_id)
        return len(resultset)

    def _read_raw_classes(self):
        # return [{"id": "Q31",
        #          KEY_ACCUMULATED: 8,
        #          KEY_INSTANCES: {"P1" : ["Q5", "Q3"] # 2 instances
        #                          }
        #          },
        #         {"id": "Q35",
        #          KEY_ACCUMULATED: 8,
        #          KEY_INSTANCES:{"P1" : ["Q5", "Q3"],
        #                         "P2" : ["Q3", "Q2", "Q1"]  # 4 instances (Q3 repeated)
        #                          }}]
        return read_json_object(self._in_ag_scores)

    def _serialize_results(self, string_return):
        # TODO: implement string_return mode. Now, we are assuming string_return=False
        write_json_object(json_object=self._summary_list,
                          path=self._out_file,
                          indent=4)