def random_edges(self, limit=20): if self.connection is None: self.connection = get_db_connection(self.dbname) cursor = self.connection.cursor() cursor.execute(RANDOM_QUERY, {'limit': limit}) results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()] return results
def random_edges(self, limit=20): """ Get a collection of distinct, randomly-selected edges. """ if self.connection is None: self.connection = get_db_connection(self.dbname) if self.dbname == 'conceptnet-test': # Random queries sample 10% of edges. This makes sure we get matches in # the test database, where there isn't much data. random_query = """ SELECT uri, data, weight FROM edges TABLESAMPLE SYSTEM(10) ORDER BY random() LIMIT %(limit)s """ else: # In the real database, random queries sample 0.01% of edges. random_query = """ SELECT uri, data, weight FROM edges TABLESAMPLE SYSTEM(0.01) ORDER BY random() LIMIT %(limit)s """ cursor = self.connection.cursor() cursor.execute(random_query, {'limit': limit}) results = [ transform_for_linked_data(data) for uri, data, weight in cursor.fetchall() ] return results
def lookup_assertion(self, uri): if self.connection is None: self.connection = get_db_connection(self.dbname) cursor = self.connection.cursor() cursor.execute("SELECT data FROM edges WHERE uri=%(uri)s", {'uri': uri}) results = [transform_for_linked_data(data) for (data,) in cursor.fetchall()] return results
def lookup_grouped_by_feature(self, uri, limit=20): if self.connection is None: self.connection = get_db_connection(self.dbname) def extract_feature(row): return tuple(row[:2]) def feature_data(row): direction, _, data = row # Hacky way to figure out what the 'other' node is, the one that # (in most cases) didn't match the URI. If both start with our # given URI, take the longer one, which is either a more specific # sense or a different, longer word. shorter, longer = sorted([data['start'], data['end']], key=len) if shorter.startswith(uri): data['other'] = longer else: data['other'] = shorter return data cursor = self.connection.cursor() cursor.execute(NODE_TO_FEATURE_QUERY, {'node': uri, 'limit': limit}) results = {} for feature, rows in itertools.groupby(cursor.fetchall(), extract_feature): results[feature] = [ transform_for_linked_data(feature_data(row)) for row in rows ] return results
def lookup_assertion(self, uri): if self.connection is None: self.connection = get_db_connection(self.dbname) cursor = self.connection.cursor() cursor.execute("SELECT data FROM edges WHERE uri=:uri", {'uri': uri}) results = [transform_for_linked_data(data) for (data,) in cursor.fetchall()] return results
def query(self, criteria, limit=20, offset=0): """ The most general way to query based on a set of criteria. """ cursor = self.connection.cursor() if 'node' in criteria: query_forward = gin_jsonb_value(criteria, node_forward=True) query_backward = gin_jsonb_value(criteria, node_forward=False) cursor.execute( GIN_QUERY_2WAY, { 'query_forward': jsonify(query_forward), 'query_backward': jsonify(query_backward), 'limit': limit, 'offset': offset, }, ) else: query = gin_jsonb_value(criteria) cursor.execute( GIN_QUERY_1WAY, { 'query': jsonify(query), 'limit': limit, 'offset': offset }, ) results = [ transform_for_linked_data(data) for uri, data, weight in cursor.fetchall() ] return results
def lookup_grouped_by_feature(self, uri, limit=20): if self.connection is None: self.connection = get_db_connection(self.dbname) def extract_feature(row): return tuple(row[:2]) def feature_data(row): direction, _, data = row # Hacky way to figure out what the 'other' node is, the one that # (in most cases) didn't match the URI. If both start with our # given URI, take the longer one, which is either a more specific # sense or a different, longer word. shorter, longer = sorted([data['start'], data['end']], key=len) if shorter.startswith(uri): data['other'] = longer else: data['other'] = shorter return data cursor = self.connection.cursor() cursor.execute(NODE_TO_FEATURE_QUERY, {'node': uri, 'limit': limit}) results = {} for feature, rows in itertools.groupby(cursor.fetchall(), extract_feature): results[feature] = [transform_for_linked_data(feature_data(row)) for row in rows] return results
def query(self, criteria, limit=20, offset=0): """ The most general way to query based on a set of criteria. """ criteria = criteria.copy() if self.connection is None: self.connection = get_db_connection(self.dbname) for criterion in ['node', 'other', 'start', 'end']: if criterion in criteria and criteria[ criterion] in TOO_BIG_PREFIXES: criteria['filter_' + criterion] = criteria[criterion] + '%' query_string = make_list_query(criteria) params = { key: remove_control_chars(value) for (key, value) in criteria.items() } params['limit'] = limit params['offset'] = offset cursor = self.connection.cursor() cursor.execute(query_string, params) results = [ transform_for_linked_data(data) for uri, data in cursor.fetchall() ] return results
def query(self, criteria, limit=20, offset=0): """ The most general way to query based on a set of criteria. """ if self.connection is None: self.connection = get_db_connection(self.dbname) cursor = self.connection.cursor() if 'node' in criteria: query_forward = gin_jsonb_value(criteria, node_forward=True) query_backward = gin_jsonb_value(criteria, node_forward=False) cursor.execute( GIN_QUERY_2WAY, { 'query_forward': jsonify(query_forward), 'query_backward': jsonify(query_backward), 'limit': limit, 'offset': offset, }, ) else: query = gin_jsonb_value(criteria) cursor.execute( GIN_QUERY_1WAY, {'query': jsonify(query), 'limit': limit, 'offset': offset}, ) results = [ transform_for_linked_data(data) for uri, data, weight in cursor.fetchall() ] return results
def sample_dataset(self, uri, limit=50, offset=0): if self.connection is None: self.connection = get_db_connection(self.dbname) cursor = self.connection.cursor() dataset_json = json.dumps(uri) cursor.execute(DATASET_QUERY, {'dataset': dataset_json, 'limit': limit, 'offset': offset}) results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()] return results
def query(self, criteria, limit=20, offset=0): if self.connection is None: self.connection = get_db_connection(self.dbname) params = dict(criteria) params['limit'] = limit params['offset'] = offset query_string = make_list_query(criteria) cursor = self.connection.cursor() cursor.execute(query_string, params) results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()] return results
def random_edges(self, limit=20): """ Get a collection of distinct, randomly-selected edges. """ if self.connection is None: self.connection = get_db_connection(self.dbname) cursor = self.connection.cursor() cursor.execute(RANDOM_QUERY, {'limit': limit}) results = [ transform_for_linked_data(data) for uri, data, weight in cursor.fetchall() ] return results
def lookup_assertion(self, uri): """ Get a single assertion, given its URI starting with /a/. """ # Sanitize URIs to remove control characters such as \x00. The postgres driver would # remove \x00 anyway, but this avoids reporting a server error when that happens. uri = remove_control_chars(uri) if self.connection is None: self.connection = get_db_connection(self.dbname) cursor = self.connection.cursor() cursor.execute("SELECT data FROM edges WHERE uri=%(uri)s", {'uri': uri}) results = [transform_for_linked_data(data) for (data,) in cursor.fetchall()] return results
def sample_dataset(self, uri, limit=50, offset=0): """ Get a subsample of edges matching a particular dataset. """ uri = remove_control_chars(uri) if self.connection is None: self.connection = get_db_connection(self.dbname) cursor = self.connection.cursor() dataset_json = json.dumps(uri) cursor.execute(DATASET_QUERY, { 'dataset': dataset_json, 'limit': limit, 'offset': offset }) results = [ transform_for_linked_data(data) for uri, data in cursor.fetchall() ] return results
def lookup_grouped_by_feature(self, uri, limit=20): """ The query used by the browseable interface, which groups its results by what 'feature' they describe of the queried node. A feature is defined by the relation, the queried node, and the direction (incoming or outgoing). """ uri = remove_control_chars(uri) if self.connection is None: self.connection = get_db_connection(self.dbname) def extract_feature(row): return tuple(row[:2]) def feature_data(row): direction, _, data = row # Hacky way to figure out what the 'other' node is, the one that # (in most cases) didn't match the URI. If both start with our # given URI, take the longer one, which is either a more specific # sense or a different, longer word. shorter, longer = sorted([data['start'], data['end']], key=len) if shorter.startswith(uri): data['other'] = longer else: data['other'] = shorter return data cursor = self.connection.cursor() cursor.execute(NODE_TO_FEATURE_QUERY, {'node': uri, 'limit': limit}) results = {} for feature, rows in itertools.groupby(cursor.fetchall(), extract_feature): results[feature] = [ transform_for_linked_data(feature_data(row)) for row in rows ] return results