def recommendation_candidates(self, bucket_id, document_id, weight): bucket_id = sqlpie.Bucket(bucket_id).bucket_id document_id = sqlpie.Util.to_sha1(unicode(document_id)) sql = "SELECT HEX(o2.%s_id) AS %s_id2, " % (self.reference, self.reference) sql += " SUM(o2.predicate_value * %s) AS score " % (weight) sql += "FROM observations o2 WHERE " sql += "o2.%s_bucket_id = UNHEX('%s') " % (self.target, bucket_id) sql += "AND o2.%s_id = UNHEX('%s') " % (self.target, document_id) sql += "AND o2.predicate_id = UNHEX('%s') " % (self.predicate_id) sql += "AND NOT EXISTS (SELECT 1 FROM observations o1 " sql += " WHERE o1.%s_id = UNHEX('%s') " % (self.target, self.target_id) sql += " AND o1.%s_bucket_id = UNHEX('%s') " % ( self.target, self.target_bucket_id) sql += " AND o1.predicate_id = UNHEX('%s') " % (self.predicate_id) sql += " AND o1.%s_id = o2.%s_id " % (self.reference, self.reference) sql += " AND o1.%s_bucket_id = o2.%s_bucket_id) " % (self.reference, self.reference) sql += "GROUP BY o2.%s_id " % (self.reference) sql += "ORDER BY score DESC " g.cursor.execute(sql) if sqlpie.Util.is_debug(): print g.cursor._executed data = g.cursor.fetchall() return data
def __init__(self, bucket, document_ids, documents): if bucket is not None and document_ids is not None: documents = [] bucket_id = sqlpie.Bucket(bucket).bucket_id for doc_id in document_ids: hexed_doc_id = sqlpie.Util.to_sha1(unicode(doc_id)) documents.append(sqlpie.Document.get(bucket_id, hexed_doc_id).document) self.documents = documents
def __init__(self, subject_bucket, object_bucket, subject_id, object_id, predicate): self.subject_bucket_id = sqlpie.Bucket(subject_bucket).bucket_id self.object_bucket_id = sqlpie.Bucket(object_bucket).bucket_id self.predicate_id = sqlpie.Predicate(predicate).predicate_id if object_id is None: self.subject_id = sqlpie.Util.to_sha1(unicode(subject_id)) self.target = "subject" self.target_bucket_id = self.subject_bucket_id self.target_id = self.subject_id self.reference = "object" self.reference_bucket_id = self.object_bucket_id else: self.object_id = sqlpie.Util.to_sha1(unicode(object_id)) self.target = "object" self.target_bucket_id = self.object_bucket_id self.target_id = self.object_id self.reference = "subject" self.reference_bucket_id = self.subject_bucket_id
def __init__(self, bucket, key, value=None, expire_at=None): self.bucket_name = bucket b = sqlpie.Bucket(bucket) self.bucket_id = b.bucket_id self.key_id = sqlpie.Cache.convert_to_hash_key(key) self.key = key # key[0:2047] if len(key) > 2048 else key self.value = value #value[0:2047] if len(value) > 2048 else value if expire_at: self.expire_at = sqlpie.Util.get_current_utc_from_timestamp(expire_at) else: self.expire_at = sqlpie.Util.get_current_utc_timestamp()
def remove(bucket, key_id=None): b = sqlpie.Bucket(bucket) bucket_id = b.bucket_id sql = "DELETE FROM " sql += Cache.__tablename + " WHERE bucket_id = UNHEX(%s)" params = (bucket_id,) if key_id: sql += " and key_id = UNHEX(%s) LIMIT 1" params = (bucket_id, key_id,) g.cursor.execute(sql, params) if sqlpie.Util.is_debug(): print g.cursor._executed sql = "SELECT ROW_COUNT() " g.cursor.execute(sql) data = g.cursor.fetchone() return data[0]
def _prepare_doc_for_put_action(self): b = sqlpie.Bucket(self.bucket) b.increment() self.bucket_id = b.bucket_id self.created_at = sqlpie.Util.get_current_utc_timestamp() self.document["_bucket"] = self.bucket for parser in self.parsers.keys(): self.document = self._handle_parser(parser, self.document) raw_data = json.dumps(self.document) self.compressed_data = sqlpie.Util.compress(raw_data) if len(self.compressed_data) < len(raw_data) + (len(raw_data) * .1): self.data = self.compressed_data self.is_compressed = True else: self.data = raw_data self.is_compressed = False
def get(bucket, key=None): b = sqlpie.Bucket(bucket) bucket_id = b.bucket_id sql = "SELECT HEX(bucket_id), HEX(key_id), expire_at, cache_key, value FROM " if key: key_id = sqlpie.Cache.convert_to_hash_key(key) sql += Cache.__tablename + " WHERE bucket_id = UNHEX(%s) and key_id = UNHEX(%s) LIMIT 1" g.cursor.execute(sql, (bucket_id, key_id,)) if sqlpie.Util.is_debug(): print g.cursor._executed r = g.cursor.fetchone() else: sql += Cache.__tablename + " WHERE bucket_id = UNHEX(%s)" g.cursor.execute(sql, (bucket_id,)) if sqlpie.Util.is_debug(): print g.cursor._executed r = g.cursor.fetchall() return r
def match_all(source_bucket, search_bucket, max_matches, filter_query, output_predicate=None): engine = sqlpie.Searcher(filter_query) num_observations = 0 if output_predicate is None: output_predicate = "match_" + source_bucket.lower().strip( ) + "_" + search_bucket.lower().strip() # Delete observations from specific predicate (match_<bucket>_<search_bucket>) sqlpie.Observation.remove({"predicate": output_predicate}) sb = sqlpie.Bucket(source_bucket) sql = ["bucket_id = UNHEX(%s)", sb.bucket_id] docs = sqlpie.Document.select(sql) is_encoded_document_id = True # Loop each document from bucket for d in docs: document_id = d[1] # Get scored best matches for each document results = engine.run_docmatching(source_bucket, document_id, search_bucket, max_matches, is_encoded_document_id) observations = [] for r in results: # Store scored matches/results as observations num_observations = num_observations + 1 observation = {"subject_bucket":source_bucket, "object_bucket":search_bucket, "subject_id":document_id, \ "predicate":output_predicate, "object_id":r[sqlpie.Document.ID_FIELD], \ "value":r[sqlpie.Document.SCORE_FIELD]} observations.append(sqlpie.Observation(observation)) if len(observations) > 0: sqlpie.Observation.add_multiple(observations) return (num_observations, output_predicate)
def _conditions_to_sql(conditions): sql_statements = [] sql_replacement = [] tokens_requiring_encoding = [ "subject_bucket", "subject_id", "object_bucket", "object_id", "predicate" ] bucket_tokens = ["subject_bucket", "object_bucket"] predicate_token = ["predicate"] timestamp_token = ["created_at"] field_replacements = { "subject_bucket": "subject_bucket_id", "object_bucket": "object_bucket_id", "subject_id": "subject_id", "object_id": "object_id", "predicate": "predicate_id", "value": "predicate_value", "timestamp": "created_at", "options": "options" } valid_tokens = field_replacements.keys() options = {"limit": 10, "offset": 0} if "options" in conditions.keys(): if "limit" in conditions["options"]: options["limit"] = conditions["options"]["limit"] if "offset" in conditions["options"]: options["offset"] = conditions["options"]["offset"] for k in conditions.keys(): k = k.lower() if k in valid_tokens: v = conditions[k] v_type = type(v).__name__ if v_type == "list": if len(v) > 0: if k in tokens_requiring_encoding: sql_string_list = [] for item in v: if k in bucket_tokens: b = sqlpie.Bucket(item) lv = b.bucket_id elif k in predicate_token: p = sqlpie.Predicate(item) lv = p.predicate_id else: lv = sqlpie.Term.get_key(item) sql_string_list.append("UNHEX(%s)") sql_replacement.append(lv) else: sql_string_list = [] for item in v: sql_string_list.append("%s") sql_replacement.append(item) k = field_replacements[k] sql_statements.append(k + " in (" + ",".join(sql_string_list) + ")") else: if k in tokens_requiring_encoding: if k in bucket_tokens: b = sqlpie.Bucket(v) v = b.bucket_id elif k in predicate_token: p = sqlpie.Predicate(v) v = p.predicate_id else: v = sqlpie.Term.get_key(v) k = field_replacements[k] sql_statements.append(k + " = UNHEX(%s)") sql_replacement.append(v) else: k = field_replacements[k] if type(v).__name__ == "dict": if v.has_key("start") or v.has_key("end"): if v.has_key("start"): if k in timestamp_token: condition = k + " >= FROM_UNIXTIME(%s)" else: condition = k + " >= %s" sql_statements.append(condition) sql_replacement.append(v["start"]) if v.has_key("end"): if k in timestamp_token: condition = k + " <= FROM_UNIXTIME(%s)" else: condition = k + " <= %s" sql_statements.append(condition) sql_replacement.append(v["end"]) else: sql_statements.append(k + " = %s") sql_replacement.append(v) else: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) return (sql_statements, sql_replacement, options)