def map(self, db, obj_id, obj): if 'links' in obj and 'type' in obj and obj['type'] == 'node': for link_uri in obj.links: link_hash = scarecrow.ident(model.node_key(link_uri)) link_obj = self.model[link_hash] if link_obj == None: continue link_owner = scarecrow.ident(model.account_key(link_obj.owner)) db.execute("INSERT INTO " + self.name + " VALUES (%s, %s, %s)", obj_id, link_hash, link_owner)
def __contains__(self, obj_id): obj_id = scarecrow.ident(obj_id) db = self._connect() result = db.get("SELECT COUNT(id) AS count FROM entities WHERE id=%s LIMIT 1", obj_id) count = result.count db.close() return count > 0
def get_ids(self, db, to_link, owner=None): to_link = scarecrow.ident(to_link) if owner: owner = scarecrow.ident(owner) query = "SELECT entity_id FROM entities JOIN %s ON entities.id=%s.entity_id WHERE to_link=%s" % (self.name, self.name, '%s') if owner: query += " AND owner=%s" results = db.query(query, to_link, owner) else: results = db.query(query, to_link) if results == None: return for row in results: yield scarecrow.ScarecrowIdent(row.entity_id)
def put(self, from_node, to_node): from_hash = scarecrow.ident(model.node_key(from_node)) to_hash = scarecrow.ident(model.node_key(to_node)) weight = util.check_weight(self.get_argument('weight', None)) tags = util.check_tags(self.get_argument('tags', None)) try: node = self.db[from_hash] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find from node') if not to_hash in self.db: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find to node') #Return a forbidden if the current user doesn't own the node if node.owner != self.current_user: raise web.HTTPError(403, 'you do not own the from node') if to_node in node.links: #Update the link if it already exists link = node.links[to_node] if weight != None: link.weight = weight if tags: link.tags = tags else: #Require the weight parameter if the link doesn't exist yet if weight == None: raise web.HTTPError(400, "requires 'weight' parameter") #Create a new link if it doesn't exist yet link = model.Storage() node.links[to_node] = link link.weight = weight link.tags = tags if tags else set([]) link.update_date = datetime.now() self.db[from_hash] = node serialize(self, link)
def create_account(db, name, password): """Creates a new account with the given name and password""" ident = scarecrow.ident(account_key(name)) if ident in db: return False account = Entity(name, 'account') account.password_hash = account_pass(password) db[ident] = account return True
def get(self, uri): hash = scarecrow.ident(model.node_key(uri)) node = request.db[hash] try: node = self.db[hash] except KeyError: raise web.HTTPError(404) serialize(self, node.tags)
def save_dynamic_setting(db, name, value): """Sets a dynamic setting (i.e. a non-user defined setting)""" settings_ident = scarecrow.ident(model.settings_key()) try: settings = db[settings_ident] except KeyError: settings = model.Storage() settings.type = 'settings' settings[name] = value db[settings_ident] = settings
def __getitem__(self, obj_id): obj_id = scarecrow.ident(obj_id) db = self._connect() result = db.get("SELECT body FROM entities WHERE id=%s LIMIT 1", obj_id) if result == None: raise KeyError() obj = pickle.loads(result.body) db.close() return obj
def __delitem__(self, obj_id): obj_id = scarecrow.ident(obj_id) db = self._connect() if not obj_id in self: raise KeyError() #Delete any index data for the object for index in self.indexes: db.execute("DELETE FROM " + index + " WHERE entity_id=%s", obj_id) #Delete the object db.execute("DELETE FROM entities WHERE id=%s", obj_id) db.close()
def delete_from(self, uri): hash = scarecrow.ident(model.node_key(uri)) try: node = self.db[hash] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find node') #Return a forbidden if the current user doesn't own the node if node.owner != self.current_user: raise web.HTTPError(403, 'you do not own the node') node.links = {} self.db[hash] = node
def get_last_update(self, obj_id): """ Gets the time in which the object identified by obj_id was last updated """ obj_id = scarecrow.ident(obj_id) db = self._connect() result = db.get("SELECT updated FROM entities WHERE id=%s LIMIT 1", obj_id) if result == None: raise KeyError() else: updated = result.updated db.close() return updated
def __setitem__(self, obj_id, obj): obj_id = scarecrow.ident(obj_id) db = self._connect() obj_body = pickle.dumps(obj) #Resets any data for the indexes for index in self.indexes: db.execute("DELETE FROM " + index + " WHERE entity_id=%s", obj_id) #Insert the new object db = self._connect() db.execute("""INSERT INTO entities (id, updated, body) VALUES (%s, NOW(), %s) ON DUPLICATE KEY UPDATE updated=NOW(), body=%s""", obj_id, obj_body, obj_body) #Run a map operation for the object for each index for index in self.indexes: self.indexes[index].map(db, obj_id, obj) db.close()
def delete_to(self, uri): hash = scarecrow.ident(model.node_key(uri)) results = False #Iterate through all the linked nodes and delete the link if it still #exists for node in self.db.index('links_index', 'get', hash, model.account_key(self.current_user)): if not uri in node.links: continue results = True del node.links[uri] self.db[model.node_key(node.id)] = node #If no changes were made, the node might not exist; throw a not found #if it doesn't if not results and not node in self.db: raise web.HTTPError(404, 'could not find node')
def delete(self, from_node, to_node): from_hash = scarecrow.ident(model.node_key(from_node)) try: node = self.db[from_hash] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find node') #Return a forbidden if the current user doesn't own the node if node.owner != self.current_user: raise web.HTTPError(403, 'you do not own the from node') if to_node in node.links: del node.links[to_node] else: #Return a not found if the link doesn't exist raise web.HTTPError(404, 'could not find link') self.db[from_hash] = node
def get_to(self, uri): hash = scarecrow.ident(model.node_key(uri)) nodes = self.db.index('links_index', 'get', hash) links = {} #Iterate through all the linked nodes and ensure the link still exists #since the index could be stale for node in nodes: try: link = node.links[uri] links[node.id] = link except: pass #If there were no results, check to see that the node exists; if not, #return a not found if len(links) == 0 and not hash in self.db: raise web.HTTPError(404, 'could not find node') serialize(self, links)
def delete(self, uri): hash = scarecrow.ident(model.node_key(uri)) try: node = self.db[hash] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find node') #Return a forbidden if the current user doesn't own the node if node.owner != self.current_user: raise web.HTTPError(403, 'you do not own the node') #Iterate through each linked node and delete the link for link_node in self.db.index('links_index', 'get', hash): if uri in link_node.links: del link_node.links[uri] self.db[model.node_key(link_node.id)] = link_node del self.db[hash]
def put(self, uri): hash = scarecrow.ident(model.node_key(uri)) tags = util.check_tags(self.get_argument('tags', None)) if not tags: raise web.HTTPError(400, "requires 'tags' parameter") try: node = self.db[hash] except KeyError: #return a not found if the node doesn't exist raise web.HTTPError(404) if node.owner != self.current_user: #return a forbidden if the current user doesn't own the node raise web.HTTPError(403) for tag in tags: node.tags.add(tag) self.db[hash] = node serialize(self, tags)
def put_node(request, uri): """Updates an existing or creates a new node identified by the given URI""" hash = scarecrow.ident(model.node_key(uri)) tags = util.check_tags(request.get_argument('tags', None)) date = util.check_datetime(request.get_argument('creation_date', None)) try: node = request.db[hash] #Update an existing node if node.owner != request.current_user: raise web.HTTPError(403, 'you do not own the node') if tags: node.tags = tags if date: node.creation_date = date except KeyError: if not tags: tags = set([]) if not date: date = datetime.now() #Create a new node if it doesn't exist node = model.Entity(uri, 'node') node.owner = request.current_user node.creation_date = date node.tags = tags node.links = {} node._cache = model.Storage() node._cache.candidates = model.Storage() node._cache.expired = False node.update_date = datetime.now() request.db[hash] = node serialize(request, node)
def delete(self, uri): hash = scarecrow.ident(model.node_key(uri)) delete_tags = util.check_tags(self.get_argument('tags', None)) try: node = self.db[hash] except KeyError: #return a not found if the node doesn't exist raise web.HTTPError(404) if node['owner'] != self.current_user: #return a forbidden if the current user doesn't own the node raise web.HTTPError(403) try: if delete_tags == None: node.tags = ([]) else: for tag in delete_tags: node.tags.remove(tag) except KeyError: raise web.HTTPError(404) self.db[hash] = node
def candidates(node_store, root, max_visit): """ Returns a set of candidates that could be used by recommendation algorithms for a given node. It is a list of sub-lists, where each sub-list contains the uri and resized weight. """ owner = scarecrow.ident(model.account_key(root.owner)) candidates = {} # Store a list of already visited links so we don't revisit them visited_links = set([uri for uri in root.links]) visited_links.add(root.id) # Store a list of already visited nodes so we don't revisit them visited_nodes = set(root.id) # A queue of nodes to process queue = [[uri, root.links[uri].weight, 1] for uri in root.links] # Keep processing all the items in the queue until we reach max_visit to # ensure that the recommendations are returned quickly enough if there are # a lot of candidates while max_visit > 0: next_queue = [] next_visited_links = set([]) # Process all nodes in the current queue for uri, weight, count in queue: if max_visit <= 0: break if uri in visited_nodes: continue hash = model.node_key(uri) node = node_store[hash] # Visit each outbound link in the currently processed node for link_uri in node.links: link_weight = node.links[link_uri].weight _visit(candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count) # Visit each inbound link to the currently processed node for link_node in node_store.db.index("links_index", "get", hash, owner): if uri in link_node.links: link_uri = link_node.id link_weight = weight + link_node.links[uri].weight _visit( candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count ) max_visit -= 1 visited_nodes.add(uri) # Skip any further logic if we've processed the maximum number of nodes if max_visit <= 0 or len(queue) == 0: break queue = next_queue visited_links.update(next_visited_links) # Each node has been potentially visited multiple times. Average out the # scores to create an overall weight for uri in candidates: weight, count = candidates[uri] candidates[uri] = weight / count # Store the results in the cache root._cache.candidates = candidates node_store.db[model.node_key(root.id)] = root return candidates