Пример #1
0
 def entities_and_features(self, class_name, features, new_dataset_name='dataset', entities_col_name='entity'):
     """
     Retrieves all entities in my graphs whose types are in the passed
     classes and their specified features.
     When an entity has two values for a  specific features, two rows are returned for the same entity.
     Equivalent to the following query:
     select ?e ?o1 ?o2 ..
     where {
             ?e  type ?class
             ?e ?p1 ?o1
             ?e ?p2 ?o2
             ..
     }
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param class_name: class that are part of my graphs
     :type class_name: string
     :param features: a list 2-tuples (feature_uri, new_col_name) where each tuple represents a feature.
     :type features: a list of tuples of strings
     :param entities_col_name: entities column name in the returned dataset
     :type entities_col_name: string
     :return: new dataset with at least two columns mapping each class URI to
         the matching entities and their features
     :rtype: Dataset
     """
     ds = ExpandableDataset(self, new_dataset_name, class_name, class_name)\
         .expand(class_name, [RDFPredicate('rdf:type', entities_col_name, False, PredicateDirection.INCOMING)])
     predicate_list = []
     for (pred_uri, col_name) in features:
         predicate_list.append(RDFPredicate(pred_uri, col_name, False, PredicateDirection.OUTGOING))
     ds.expand(entities_col_name, predicate_list)
     return ds
Пример #2
0
 def features_and_freq(self, class_name, new_dataset_name='dataset', features_col_name="feature",
                       frequency_col_name='frequency'):
     """
     retrieves all features of the specified class and their frequency.
     equivalent to the following query:
     select ?class ?p count(distinct ?e)
     where {
         ?e  type ?class.
         ?e ?p ?o
     }
         group by ?class, ?p
     :param class_name: class that are part of my graphs
     :type class_name: string
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param features_col_name: features column name in the returned dataset
     :type features_col_name: string
     :param frequency_col_name: frequency column name in the returned dataset
     :type frequency_col_name: string
     :return: new dataset with three columns mapping each class URI to the
         matching features and their frequency
     :rtype: Dataset
     """
     return ExpandableDataset(self, new_dataset_name, class_name, class_name)\
         .expand(class_name, [RDFPredicate('rdf:type', 'instance', False, PredicateDirection.INCOMING)])\
         .expand('instance', [RDFPredicate(features_col_name, 'feature_value', False, PredicateDirection.OUTGOING)])\
         .group_by([features_col_name]).\
         count('feature_value', frequency_col_name, unique=True)
Пример #3
0
 def describe_entity(self,
                     entity,
                     new_dataset_name='dataset',
                     class_col_name='class',
                     feature_col_name='feature'):
     """
     Retrieves the class and the features of a specific entity
     Equivalent to the query:
     select ?class ?p
     where {
         ?e  rdf:type ?class
         ?e ?p ?o
     }        
     :param entity: entity uri
     :type entity: string
     :param class_col_name: the class column name in the returned dataset
     :type class_col_name: string
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :return: (class, list of features)
     :rtype: tuple of (string, list of strings)
     """
     for graph in self.graph_prefixes:
         if "rdf" not in self.graph_prefixes[graph]:
             self.graph_prefixes[graph][
                 'rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     return ExpandableDataset(self, new_dataset_name, entity, 'instance')\
         .expand('instance', [('rdf:type', class_col_name, False, PredicateDirection.OUTGOING)])\
         .expand('instance', [(feature_col_name, "feature_value", False, PredicateDirection.OUTGOING)])
Пример #4
0
 def feature_domain_range(self,
                          feature,
                          domain_col_name="domain",
                          range_col_name="range",
                          new_dataset_name='dataset'):
     """
     retrieves all the subjects and objects of a given predicate. When graphs
     is passed, restrict to the specified graphs
     Equivalent to the query:
         select ?s ?o
         where {
             ?s  feature ?o
         }
     :param feature: feature to find its domain and range
     :type feature: string
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param domain_col_name: name of domain column in the returned dataset
     :type domain_col_name: string
     :param range_col_name: name of range column in the returned dataset
     :type range_col_name: string
     :return: new dataset with two columns mapping each subject URI to the
         object connected by the passed predicate
     :rtype: Dataset
     """
     return ExpandableDataset(self, new_dataset_name, domain_col_name, domain_col_name) \
         .expand(domain_col_name, [(feature, range_col_name, False, PredicateDirection.OUTGOING)])
Пример #5
0
 def num_entities(self,
                  class_name,
                  new_dataset_name='dataset',
                  num_entities_col_name='num_entities'):
     """
     Counts all entities in the predefined graphs whose type is the passed classes.
     Equivalent to the following query:
     foreach class in classes:
         select ?class count(distinct ?e)
         where {
             ?e  type ?class
         }
     :param class_name: class that are part of my graphs
     :type class_name: string
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param num_entities_col_name: count of entities column name in the
         returned dataset
     :type num_entities_col_name: string
     :return: new dataset with two columns mapping each class URI to the
         count of the matching entities
     :rtype: Dataset
     """
     for graph in self.graph_prefixes:
         if "rdf" not in self.graph_prefixes[graph]:
             self.graph_prefixes[graph][
                 'rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     return ExpandableDataset(self, new_dataset_name, class_name, class_name)\
         .expand(class_name, [('rdf:type', 'instance', False, PredicateDirection.INCOMING)])\
         .count('instance', num_entities_col_name, unique=True)
Пример #6
0
 def classes_and_freq(self,
                      new_dataset_name='dataset',
                      classes_col_name='class',
                      frequency_col_name='frequency'):
     """
     retrieves all classes in my graph and their number of instances.
     Equivalent to the following query:
     select ?class count(distinct ?e)
         where {
             ?e  type ?class.
         }
         group by ?class
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param classes_col_name: class column name in the returned dataset
     :type classes_col_name: string
     :param frequency_col_name: frequency column name in the returned dataset
     :type frequency_col_name: string
     :return: new dataset with two columns mapping each class URI to the
         number of entities of this type
     :rtype: Dataset 
     """
     for graph in self.graph_prefixes:
         if "rdf" not in self.graph_prefixes[graph]:
             self.graph_prefixes[graph][
                 'rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     return ExpandableDataset(self, new_dataset_name, "instance", "instance")\
         .expand("instance", [('rdf:type', classes_col_name, False, PredicateDirection.OUTGOING)])\
         .group_by([classes_col_name])\
         .count('instance', frequency_col_name)
Пример #7
0
 def features(self,
              class_name,
              new_dataset_name='dataset',
              features_col_name='feature_uri'):
     """
     Retrieves all features in my graphs for all entities whose type is class_name
     Equivalent to the following sparql query:
     select distinct ?p
         where {
             ?e  type ?class.
             ?e  ?p   ?o.
         }
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param class_name: class that are part of my graphs
     :type class_name: string
     :param features_col_name: features column name in the returned dataset
     :type features_col_name: string
     :return: new dataset with two columns mapping each class URI to the
         matching features
     :rtype: Dataset
     """
     for graph in self.graph_prefixes:
         if "rdf" not in self.graph_prefixes[graph]:
             self.graph_prefixes[graph][
                 'rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     return ExpandableDataset(self, new_dataset_name, class_name, class_name)\
         .expand(class_name, [('rdf:type', "entity", False, PredicateDirection.INCOMING)])\
         .expand("entity", [(features_col_name, "feature_value", False, PredicateDirection.OUTGOING)])
Пример #8
0
 def entities(self,
              class_name,
              new_dataset_name='dataset',
              entities_col_name='entity'):
     """
     Retrieves all entities in the predefined graphs whose type is the passed class_name.
     Equivalent to the following sparql query:
         select distinct ?e
         where {
             ?e  rdf:type ?class_class
         }
     :param class_name: the name of the class
     :type class_name: string
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param entities_col_name: entities column name in the returned dataset
     :type entities_col_name: string
     :return: new dataset with one column of the URIs entities of the class
     :rtype: Dataset
     """
     for graph in self.graph_prefixes:
         if "rdf" not in self.graph_prefixes[graph]:
             self.graph_prefixes[graph][
                 'rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     return ExpandableDataset(self, new_dataset_name, class_name, class_name) \
                 .expand(class_name, [
         ('rdf:type', entities_col_name, False, PredicateDirection.INCOMING)])
Пример #9
0
 def dataset_with_entities(self, entities, new_dataset_name='dataset', entities_col_name='entities'):
     """
     Creates a new one-column dataset filled with the passed entities
     :param entities: list of entities URIs
     :type entities: list of strings
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param entities_col_name: the entities column name in the created dataset
     :type entities_col_name: string
     :return: dataset with one column filled with the passed entities URIs
     :rtype: Dataset
     """
     return ExpandableDataset(self, new_dataset_name, entities, entities_col_name)
Пример #10
0
 def describe_entity(self, entity, new_dataset_name='dataset', class_col_name='class', feature_col_name='feature'):
     """
     Retrieves the class and the features of a specific entity
     Equivalent to the query:
     select ?class ?p
     where {
         ?e  type ?class
         ?e ?p ?o
     }        
     :param entity: entity uri
     :type entity: string
     :param class_col_name: the class column name in the returned dataset
     :type class_col_name: string
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :return: (class, list of features)
     :rtype: tuple of (string, list of strings)
     """
     return ExpandableDataset(self, new_dataset_name, entity, 'instance')\
         .expand('instance', [RDFPredicate('rdf:type', class_col_name, False, PredicateDirection.OUTGOING)])\
         .expand('instance', [RDFPredicate(feature_col_name, "feature_value", False, PredicateDirection.OUTGOING)])
Пример #11
0
 def entities(self, class_name, new_dataset_name='dataset', entities_col_name='entity'):
     """
     Retrieves all entities in the predefined graphs whose type is the passed class_name.
     Equivalent to the following sparql query:
         select distinct ?e
         where {
             ?e  type ?class_class
         }
     :param class_name: the name of the class
     :type class_name: string
     :param new_dataset_name: the name of the created dataset holding the entities
     :type new_dataset_name: string
     :param entities_col_name: entities column name in the returned dataset
     :type entities_col_name: string
     :return: new dataset with one column of the URIs entities of the class
     :rtype: Dataset
     """
     #return ExpandableDataset(self, new_dataset_name, class_name, "class") \
     #           .expand('class', [RDFPredicate('rdf:type', entities_col_name, PredicateDirection.INCOMING)])
     #            .filter(conditions_dict={'class': ['= {}'.format(class_name)]})
     return ExpandableDataset(self, new_dataset_name, class_name, class_name) \
                 .expand(class_name, [
         RDFPredicate('rdf:type', entities_col_name, False, PredicateDirection.INCOMING)])