def delete(self, unique_id: str) -> BaseResponse:
     response = BaseResponse()
     try:
         elastic_response = self.__client.delete(self.__index, 'index',
                                                 unique_id)
         if 'result' not in elastic_response or elastic_response[
                 'result'] not in ['deleted']:
             return response.set_error(
                 Error("IntegrationError", 500,
                       "Index failed to delete index!"))
         response = BaseResponse(True)
     except TransportError as e:
         if e.status_code == 404:
             response.set_error(
                 Error("ObjectNotFound", e.status_code,
                       'Document does not exist!'))
         else:
             response.set_error(
                 Error("IntegrationError", e.status_code,
                       'Unknown integration error!'))
     except BasicException as e:
         response.set_error(Error("InternalServerError", 500, e.message))
     except Exception as e:
         print(e)
         response.set_error(
             Error("InternalServerError", 500, 'Unknown error occurred!'))
     return response
Пример #2
0
 def finish_job(self, crawler_id: UUID, result: JobResult) -> BaseResponse:
     response = BaseResponse()
     self.context.start_transaction()
     try:
         self.context.crawler_set().register_call(crawler_id)
         self.repository.finish_job(result.job_id, crawler_id)
         for job in result.job_list:
             unique_hash = hash_generator().generate_target_hash(job.target)
             try:
                 existing_job = self.context.job_set().get_by_hash(
                     unique_hash)
                 expire_date = existing_job.date_added + timedelta(days=7)
                 if expire_date < datetime.now() and existing_job.locked:
                     self.context.job_set().unlock(existing_job.job_id)
             except EntityNotFoundException:
                 new_job = JobData(job.job_type, job.target, False,
                                   crawler_id, job.plugin_type)
                 self.context.job_set().add(new_job)
         self.context.save()
         self.context.commit()
         response = BaseResponse(True)
     except EntityNotFoundException as e:
         self.context.rollback()
         response.set_error(Error('ObjectNotFound', 404, e.message))
         return response
     except BasicException as e:
         self.context.rollback()
         response.set_error(Error("InternalServerError", 500, e.message))
         print(e)
     except Exception as e:
         self.context.rollback()
         response.set_error(Error("InternalServerError", 500, e.__str__()))
     return response
Пример #3
0
 def get_next_job(self, crawler_id: UUID,
                  available_plugins: List[str]) -> JobInformation:
     response = JobInformation()
     self.context.start_transaction()
     try:
         self.context.crawler_set().register_call(crawler_id)
         try:
             job = self.context.job_set().get_next_free_in_plugin_list(
                 available_plugins)
         except EntityNotFoundException:
             self.context.save()
             self.context.commit()
             response.set_error(
                 Error('ObjectNotFound', 404,
                       'No free jobs at the moment!'))
             return response
         self.context.job_set().lock(job.job_id)
         self.context.save()
         self.context.commit()
         response = JobInformation(
             ExtendedJobDescription(job.job_id, job.type, job.target,
                                    job.plugin_type))
     except EntityNotFoundException as e:
         self.context.rollback()
         response.set_error(Error('ObjectNotFound', 404, e.message))
         return response
     except BasicException as e:
         self.context.rollback()
         response.set_error(Error("InternalServerError", 500, e.message))
     except Exception as e:
         self.context.rollback()
         response.set_error(
             Error("InternalServerError", 500, 'Unknown error occurred!'))
     return response
Пример #4
0
 def search(self, query: SearchQuery) -> SearchResult:
     solr_query = ""
     solr_field_query = ""
     solr_range_query = []
     for criteria in query.searchCriteria:
         solr_field_query += criteria.field + '^' + str(
             criteria.weight) + " "
         words = criteria.term.split(" ")
         for word in words:
             word = word.lower()
             solr_query += " " + word
     for range_criteria in query.rangeCriteria:
         solr_range_query.append(range_criteria.field + ":[" +
                                 str(range_criteria.minimum) + " TO " +
                                 str(range_criteria.maximum) + "]")
     data = {
         "q": solr_query.strip(),
         "offset": query.page * query.items,
         "limit": query.items,
         "filter": solr_range_query,
         "defType": "edismax",
         "qf": solr_field_query
     }
     result = SearchResult(0, False)
     try:
         response = self.client.query_raw(self.index, data)
         result = SearchResult(response['response']['numFound'], True)
         for document in response['response']['docs']:
             result.add_result(
                 self._serializer.deserialize(document,
                                              self.index_object_type))
     except Exception as e:
         result.set_error(
             Error("InternalServerError", 500, 'Unknown error occurred!'))
     return result
 def add(self, document: IndexDocument) -> BaseResponse:
     response = BaseResponse()
     try:
         elastic_response = self.__client.index(
             self.__index, 'index', self.__serializer.serialize(document),
             document.unique_id)
         if 'result' not in elastic_response or elastic_response[
                 'result'] not in ['created', 'updated']:
             return response.set_error(
                 Error("IntegrationError", 500,
                       "Index failed to add index!"))
         response = BaseResponse(True)
     except BasicException as e:
         response.set_error(Error("InternalServerError", 500, e.message))
     except Exception as e:
         response.set_error(
             Error("InternalServerError", 500, 'Unknown error occurred!'))
     return response
 def delete(self, unique_id: str) -> BaseResponse:
     response = BaseResponse()
     try:
         self.get_collection().find_one_and_delete(
             {"unique_id": unique_id.__str__()})
     except Exception as e:
         response.set_error(
             Error('InternalServerError', 500,
                   'Failed to delete document to index'))
     return response
 def search(self, query: SearchQuery) -> SearchResult:
     data = self.__transform_query(query)
     result = SearchResult(0, False)
     try:
         response = self.__client.search(self.__index, 'index', data)
         if 'hits' not in response:
             result.set_error(
                 Error("InternalServerError", 500,
                       'Index did not return proper response!'))
         else:
             result = SearchResult(response['hits']['total'], True)
             for hit in response['hits']['hits']:
                 result.add_result(
                     self.__serializer.deserialize(hit['_source'],
                                                   self.index_object_type))
     except Exception as e:
         result.set_error(
             Error("InternalServerError", 500, 'Unknown error occurred!'))
     return result
Пример #8
0
 def delete(self, unique_id: str) -> BaseResponse:
     response = BaseResponse()
     try:
         solr_response = self.client.delete_doc_by_id(self.index, unique_id)
         if not solr_response:
             return response.set_error(
                 Error("IntegrationError", 500,
                       "Index failed to delete index!"))
         self.client.commit(self.index,
                            openSearcher=True,
                            waitSearcher=False)
         response = BaseResponse(True)
     except BasicException as e:
         response.set_error(Error("InternalServerError", 500, e.message))
     except Exception as e:
         print(e)
         response.set_error(
             Error("InternalServerError", 500, 'Unknown error occurred!'))
     return response
 def get(self, unique_id: str) -> DocumentResponse:
     response = DocumentResponse()
     entity = self.get_collection().find_one(
         {"unique_id": unique_id.__str__()})
     if entity is None:
         response.set_error(
             Error('NotFound', 404,
                   'Index document not found with given id'))
     else:
         response = DocumentResponse(True, entity)
     return response
Пример #10
0
 def add(self, document: IndexDocument) -> BaseResponse:
     response = BaseResponse()
     try:
         document.id = document.unique_id
         doc_body = self._serializer.serialize([document])
         solr_response = self.client.index_json(self.index, doc_body)
         if not solr_response:
             return response.set_error(
                 Error("IntegrationError", 500,
                       "Index failed to add index!"))
         self.client.commit(self.index,
                            openSearcher=True,
                            waitSearcher=False)
         response = BaseResponse(True)
     except BasicException as e:
         response.set_error(Error("InternalServerError", 500, e.message))
     except Exception as e:
         response.set_error(
             Error("InternalServerError", 500, 'Unknown error occurred!'))
     return response
    def add(self, document: IndexDocument) -> BaseResponse:
        response = BaseResponse()
        try:
            entity = self.get_collection().find_one(
                {"unique_id": document.unique_id.__str__()})
            if entity is None:
                self.get_collection().insert_one(document.__dict__)
            else:
                self.get_collection().find_one_and_replace(
                    {"unique_id": document.unique_id.__str__()},
                    document.__dict__)

            response = BaseResponse(True)
        except Exception as e:
            response.set_error(
                Error('InternalServerError', 500,
                      'Failed to add document to index'))
        return response