Пример #1
0
    def recommendation_candidates(self, bucket_id, document_id, weight):
        bucket_id = sqlpie.Bucket(bucket_id).bucket_id
        document_id = sqlpie.Util.to_sha1(unicode(document_id))

        sql = "SELECT HEX(o2.%s_id) AS %s_id2, " % (self.reference,
                                                    self.reference)
        sql += "       SUM(o2.predicate_value * %s) AS score " % (weight)
        sql += "FROM observations o2 WHERE "
        sql += "o2.%s_bucket_id = UNHEX('%s') " % (self.target, bucket_id)
        sql += "AND o2.%s_id = UNHEX('%s') " % (self.target, document_id)
        sql += "AND o2.predicate_id = UNHEX('%s') " % (self.predicate_id)
        sql += "AND NOT EXISTS (SELECT 1 FROM observations o1 "
        sql += "    WHERE o1.%s_id = UNHEX('%s') " % (self.target,
                                                      self.target_id)
        sql += "    AND o1.%s_bucket_id = UNHEX('%s') " % (
            self.target, self.target_bucket_id)
        sql += "    AND o1.predicate_id = UNHEX('%s') " % (self.predicate_id)
        sql += "    AND o1.%s_id = o2.%s_id " % (self.reference,
                                                 self.reference)
        sql += "    AND o1.%s_bucket_id = o2.%s_bucket_id) " % (self.reference,
                                                                self.reference)
        sql += "GROUP BY o2.%s_id " % (self.reference)
        sql += "ORDER BY score DESC "
        g.cursor.execute(sql)
        if sqlpie.Util.is_debug():
            print g.cursor._executed
        data = g.cursor.fetchall()
        return data
Пример #2
0
 def __init__(self, bucket, document_ids, documents):
     if bucket is not None and document_ids is not None:
         documents = []
         bucket_id = sqlpie.Bucket(bucket).bucket_id
         for doc_id in document_ids:
             hexed_doc_id = sqlpie.Util.to_sha1(unicode(doc_id))
             documents.append(sqlpie.Document.get(bucket_id, hexed_doc_id).document)
     self.documents = documents
Пример #3
0
 def __init__(self, subject_bucket, object_bucket, subject_id, object_id,
              predicate):
     self.subject_bucket_id = sqlpie.Bucket(subject_bucket).bucket_id
     self.object_bucket_id = sqlpie.Bucket(object_bucket).bucket_id
     self.predicate_id = sqlpie.Predicate(predicate).predicate_id
     if object_id is None:
         self.subject_id = sqlpie.Util.to_sha1(unicode(subject_id))
         self.target = "subject"
         self.target_bucket_id = self.subject_bucket_id
         self.target_id = self.subject_id
         self.reference = "object"
         self.reference_bucket_id = self.object_bucket_id
     else:
         self.object_id = sqlpie.Util.to_sha1(unicode(object_id))
         self.target = "object"
         self.target_bucket_id = self.object_bucket_id
         self.target_id = self.object_id
         self.reference = "subject"
         self.reference_bucket_id = self.subject_bucket_id
Пример #4
0
 def __init__(self, bucket, key, value=None, expire_at=None):
     self.bucket_name = bucket
     b = sqlpie.Bucket(bucket)
     self.bucket_id = b.bucket_id
     self.key_id = sqlpie.Cache.convert_to_hash_key(key)
     self.key = key # key[0:2047] if len(key) > 2048 else key
     self.value = value #value[0:2047] if len(value) > 2048 else value
     if expire_at:
         self.expire_at = sqlpie.Util.get_current_utc_from_timestamp(expire_at)
     else:
         self.expire_at = sqlpie.Util.get_current_utc_timestamp()
Пример #5
0
 def remove(bucket, key_id=None):
     b = sqlpie.Bucket(bucket)
     bucket_id = b.bucket_id
     sql = "DELETE FROM "
     sql += Cache.__tablename + " WHERE bucket_id = UNHEX(%s)"
     params = (bucket_id,)
     if key_id:
         sql += " and key_id = UNHEX(%s) LIMIT 1"
         params = (bucket_id, key_id,)
     g.cursor.execute(sql, params)
     if sqlpie.Util.is_debug():
         print g.cursor._executed
     sql = "SELECT ROW_COUNT() "
     g.cursor.execute(sql)
     data = g.cursor.fetchone()
     return data[0]
Пример #6
0
    def _prepare_doc_for_put_action(self):
        b = sqlpie.Bucket(self.bucket)
        b.increment()
        self.bucket_id = b.bucket_id
        self.created_at = sqlpie.Util.get_current_utc_timestamp()
        self.document["_bucket"] = self.bucket

        for parser in self.parsers.keys():
            self.document = self._handle_parser(parser, self.document)

        raw_data = json.dumps(self.document)
        self.compressed_data = sqlpie.Util.compress(raw_data)
        if len(self.compressed_data) < len(raw_data) + (len(raw_data) * .1):
            self.data = self.compressed_data
            self.is_compressed = True
        else:
            self.data = raw_data
            self.is_compressed = False
Пример #7
0
 def get(bucket, key=None):
     b = sqlpie.Bucket(bucket)
     bucket_id = b.bucket_id
     sql = "SELECT HEX(bucket_id), HEX(key_id), expire_at, cache_key, value FROM "
     if key:
         key_id = sqlpie.Cache.convert_to_hash_key(key)
         sql += Cache.__tablename + " WHERE bucket_id = UNHEX(%s) and key_id = UNHEX(%s) LIMIT 1"
         g.cursor.execute(sql, (bucket_id, key_id,))
         if sqlpie.Util.is_debug():
             print g.cursor._executed
         r = g.cursor.fetchone()
     else:
         sql += Cache.__tablename + " WHERE bucket_id = UNHEX(%s)"
         g.cursor.execute(sql, (bucket_id,))
         if sqlpie.Util.is_debug():
             print g.cursor._executed
         r = g.cursor.fetchall()
     return r
Пример #8
0
    def match_all(source_bucket,
                  search_bucket,
                  max_matches,
                  filter_query,
                  output_predicate=None):
        engine = sqlpie.Searcher(filter_query)

        num_observations = 0
        if output_predicate is None:
            output_predicate = "match_" + source_bucket.lower().strip(
            ) + "_" + search_bucket.lower().strip()

        #   Delete observations from specific predicate (match_<bucket>_<search_bucket>)
        sqlpie.Observation.remove({"predicate": output_predicate})

        sb = sqlpie.Bucket(source_bucket)
        sql = ["bucket_id = UNHEX(%s)", sb.bucket_id]
        docs = sqlpie.Document.select(sql)
        is_encoded_document_id = True
        #     Loop each document from bucket
        for d in docs:
            document_id = d[1]
            #     Get scored best matches for each document
            results = engine.run_docmatching(source_bucket, document_id,
                                             search_bucket, max_matches,
                                             is_encoded_document_id)
            observations = []
            for r in results:
                #   Store scored matches/results as observations
                num_observations = num_observations + 1
                observation = {"subject_bucket":source_bucket, "object_bucket":search_bucket, "subject_id":document_id, \
                                "predicate":output_predicate, "object_id":r[sqlpie.Document.ID_FIELD], \
                                "value":r[sqlpie.Document.SCORE_FIELD]}
                observations.append(sqlpie.Observation(observation))
            if len(observations) > 0:
                sqlpie.Observation.add_multiple(observations)
        return (num_observations, output_predicate)
Пример #9
0
    def _conditions_to_sql(conditions):
        sql_statements = []
        sql_replacement = []
        tokens_requiring_encoding = [
            "subject_bucket", "subject_id", "object_bucket", "object_id",
            "predicate"
        ]
        bucket_tokens = ["subject_bucket", "object_bucket"]
        predicate_token = ["predicate"]
        timestamp_token = ["created_at"]
        field_replacements = {
            "subject_bucket": "subject_bucket_id",
            "object_bucket": "object_bucket_id",
            "subject_id": "subject_id",
            "object_id": "object_id",
            "predicate": "predicate_id",
            "value": "predicate_value",
            "timestamp": "created_at",
            "options": "options"
        }
        valid_tokens = field_replacements.keys()

        options = {"limit": 10, "offset": 0}
        if "options" in conditions.keys():
            if "limit" in conditions["options"]:
                options["limit"] = conditions["options"]["limit"]
            if "offset" in conditions["options"]:
                options["offset"] = conditions["options"]["offset"]

        for k in conditions.keys():
            k = k.lower()
            if k in valid_tokens:
                v = conditions[k]
                v_type = type(v).__name__
                if v_type == "list":
                    if len(v) > 0:
                        if k in tokens_requiring_encoding:
                            sql_string_list = []
                            for item in v:
                                if k in bucket_tokens:
                                    b = sqlpie.Bucket(item)
                                    lv = b.bucket_id
                                elif k in predicate_token:
                                    p = sqlpie.Predicate(item)
                                    lv = p.predicate_id
                                else:
                                    lv = sqlpie.Term.get_key(item)
                                sql_string_list.append("UNHEX(%s)")
                                sql_replacement.append(lv)
                        else:
                            sql_string_list = []
                            for item in v:
                                sql_string_list.append("%s")
                                sql_replacement.append(item)
                        k = field_replacements[k]
                        sql_statements.append(k + " in (" +
                                              ",".join(sql_string_list) + ")")
                else:
                    if k in tokens_requiring_encoding:
                        if k in bucket_tokens:
                            b = sqlpie.Bucket(v)
                            v = b.bucket_id
                        elif k in predicate_token:
                            p = sqlpie.Predicate(v)
                            v = p.predicate_id
                        else:
                            v = sqlpie.Term.get_key(v)
                        k = field_replacements[k]
                        sql_statements.append(k + " = UNHEX(%s)")
                        sql_replacement.append(v)
                    else:
                        k = field_replacements[k]
                        if type(v).__name__ == "dict":
                            if v.has_key("start") or v.has_key("end"):
                                if v.has_key("start"):
                                    if k in timestamp_token:
                                        condition = k + " >= FROM_UNIXTIME(%s)"
                                    else:
                                        condition = k + " >= %s"
                                    sql_statements.append(condition)
                                    sql_replacement.append(v["start"])

                                if v.has_key("end"):
                                    if k in timestamp_token:
                                        condition = k + " <= FROM_UNIXTIME(%s)"
                                    else:
                                        condition = k + " <= %s"
                                    sql_statements.append(condition)
                                    sql_replacement.append(v["end"])
                        else:
                            sql_statements.append(k + " = %s")
                            sql_replacement.append(v)
            else:
                raise sqlpie.CustomException(
                    sqlpie.CustomException.INVALID_ARGUMENTS)
        return (sql_statements, sql_replacement, options)