Пример #1
0
 def index_document_by_node(self, node):
     """ Index a node by supplying a node argument """
     if isinstance(node, Node):
         if node.type == Node.FILE_TYPE:
             size = node.get_size()
         else:
             size = ''
         put_url = '{idx_name}/node/{id}'.format(
             idx_name=self.idx_name,
             # dual url encoding as ES decodes it
             id=uenc(uenc(node.path.encode('utf-8'))))
         data = {
             'name': node.name,
             'parent': uenc(node.get_parent()),
             'date_modified': node.date_modified,
             'size': size,
             'type': node.type
         }
         result = self.es_service.conn.put(put_url, data=data)
         if result['status'] != 201:
             error_msg = u'Couldn\'t index document: {doc} to ES'.format(
                 doc=node.path
             )
             self.app.logger.error(error_msg)
         else:
             self.app.logger.debug(u'Indexed {name}'.format(name=node.name))
     else:
         raise TypeError(u'node is not of type domain.node.Node')
Пример #2
0
 def images(self, **kwargs):
     """BrooklynMuseumAPI.images: perform a collection.getImages request"""
     if 'item_type' in kwargs and 'item_id' in kwargs:
         req_url = '%s?%s&method=collection.getImages&%s' % \
             (self.url, uenc(self._params), uenc(kwargs))
         rsp = urllib2.urlopen(req_url).read()
         return self.parse_response(rsp)
     else:
         raise
Пример #3
0
 def do_folder_sync(self, node_id):
     """
     Pass a folder_id, read it from disk and the ES index. Compare its
     content and files and return the correct results based what is stored
     on disk.
     
     We do two queries, first to find total hits and then use total hits
     to do the second query. The reason for this is that a huge size value
     decrease Elasticsearch query performance by a huge margin.
     """
     folder = Folder.get_instance(node_id, decode=True)
     if folder:
         index_id = uenc(folder.path.encode('utf-8'))
         max_size = int(self.es_service.conn.get(self.count_url, data={
             "query": {
                 "bool": {
                     "must": [{
                                  "term": {
                                      "parent": index_id
                                  }
                              }]
                 }
             }
         })['hits']['total'])
         search_url = u'{idx_name}/node/_search'.format(
             idx_name=self.idx_name)
         results = self.es_service.conn.get(search_url, data={
             "from": 0,
             "size": max_size,
             "fields": [],
             "query": {
                 "bool": {
                     "must": [{
                                  "term": {
                                      "parent": index_id
                                  }
                              }]
                 }
             }
         })
         es_node_ids = set([doc['_id'] for doc in results['hits']['hits']])
         disk_nodes = {node.index_id: node for node in (
             folder.folders + folder.files)}
         disk_node_ids = set(disk_nodes.keys())
         deleted_docs = es_node_ids - disk_node_ids
         new_docs = disk_node_ids - es_node_ids
         for doc_to_delete in deleted_docs:
             self.delete_document_by_id(doc_to_delete)
         for new_document in new_docs:
             self.index_document_by_node(disk_nodes[new_document])
         self.flush_index()
     else:
         self.app.logger.error(
             'No folder found by passing node id: {node_id}'.format(
                 node_id=node_id
             ))
Пример #4
0
 def search(self, **kwargs):
     args = kwargs
     req_url = '%s/records/v%s.%s?api_key=%s&%s' % (\
         self.base_url,
         self.version,
         self.format,
         self.api_key,
         uenc(kwargs))
     rsp = urllib2.urlopen(req_url).read()
     return DigitalNZResponse(self, rsp)
Пример #5
0
 def delete_document_by_id(self, document_id):
     """ Deletes documents from ES by id string argument """
     del_url = '{idx_name}/node/{id}'.format(
         idx_name=self.idx_name,
         id=uenc(document_id)) # dual url encoding as ES decodes it
     result = self.es_service.conn.delete(del_url)
     if result['status'] != 200:
         error_msg = u'Couldn\'t delete document: {doc} from ES'.format(
             doc=del_url
         )
         self.app.logger.error(error_msg)
     else:
         return document_id
Пример #6
0
 def custom_search(self, title=None, **kwargs):
     args = kwargs
     if title is None:
         raise
     req_url = '%s/custom_searches/v%s/%s.%s?api_key=%s&%s' % (\
         self.base_url,
         self.version,
         title,
         self.format,
         self.api_key,
         uenc(kwargs))
     rsp = urllib2.urlopen(req_url).read()
     return DigitalNZResponse(self, rsp)        
Пример #7
0
 def index_folders_and_files(self, folder=None):
     """
     Find all folders for the user. Path to the folder is url encoded and
     assigned as a id for the Folder type in the ElasticSearch index.
     """
     items_indexed = 0
     folders = self.find_all_folders(folder)
     folder_bulk = []
     for folder in folders:
         index_id = uenc(folder['path'].encode('utf-8'))
         parent = uenc(folder['parent'].encode('utf-8'))
         folder_bulk.append({'index': {'_id': index_id}})
         data = {
             'name': folder['name'],
             'parent': parent,
             'date_modified': folder['date_modified'],
             'type': Node.FOLDER_TYPE,
             'size': ''
         }
         folder_bulk.append(data)
         items_indexed += self.index_files(folder)
     self.es_service.bulk_insert(self.bulk_insert_url, folder_bulk)
     items_indexed += len(folders)
     return items_indexed
Пример #8
0
 def search(self, **kwargs):
     """BrooklynMuseumAPI.search: perform a collection.search request"""
     req_url = '%s?%s&method=collection.search&%s' % \
         (self.url, uenc(self._params), uenc(kwargs))
     rsp = urllib2.urlopen(req_url).read()
     return self.parse_response(rsp)