def put(self, corpusId, bucketId): try: body = json.loads(self.request.body.decode("utf-8")) envId = get_env_id() authorization = get_autorisation(envId, None, None) docType = None annotationId = None sett = get_settings() shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR'] if "annotationId" in body: annotationId = body["annotationId"] del body["annotationId"] else: self.write_and_set_status( {MESSAGE: "Missing annotationId field required to find an annotation to update."}, HTTPStatus.UNPROCESSABLE_ENTITY) return if "schemaType" in body: docType = body["schemaType"] else: self.write_and_set_status( {MESSAGE: "Missing schemaType field, which links the annotation to its schema."}, HTTPStatus.UNPROCESSABLE_ENTITY) return if "bucketId" in body: newBucketId = body["bucketId"] if newBucketId != bucketId: self.write_and_set_status( {MESSAGE: "bucketId from the path is different than bucketId in the body."}, HTTPStatus.UNPROCESSABLE_ENTITY) return bucket = get_master_bucket_list(envId, authorization).get_bucket(corpusId, bucketId) storedAnnotation = bucket.get_annotation(id=annotationId, docType=docType) if storedAnnotation["schemaType"] != docType: self.write_and_set_status( {MESSAGE: "You cannot change the schemaType of an annotation."}, HTTPStatus.UNPROCESSABLE_ENTITY) return bucket.update_annotation(body, docType, annotationId, shouldValidate) self.write_and_set_status(None, HTTPStatus.NO_CONTENT) except BucketNotFoundException: self.write_and_set_status({MESSAGE: "Specified bucket not found"}, HTTPStatus.NOT_FOUND) except DocumentNotFoundException: self.write_and_set_status({MESSAGE: "Annotation with provided id does not exist"}, HTTPStatus.NOT_FOUND) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)
def initialize_es(): """ Initialise elastic search if required. :return: """ sett = get_settings() es_wait_ready() EnvList.initialize_env_list(sett['CLASSES']['ENV'])
def partial_corpora_indices(corpus_ids: List[str]) -> str: # The idiomatic way would be to instantiate a corpus for each corpus Id and then do a search in each corpus # for the bucket with the right schema type. # As it represent a 2n operation before doing the main search, and fearing latency, # I, Jean-François Héon, decided to perform this (possibly premature) optimization. settings = get_settings() annotation_directory = settings['CLASSES']['DOCUMENT_DIRECTORY']['CLASS_PREFIX'] data_suffix = settings['CLASSES']['DOCUMENT_DIRECTORY']['INDEX_DATA_SUFFIX'] index_suffix = '*' + data_suffix + '_*' index_prefix = get_env_id() + annotation_directory indices = [] for corpus_id in corpus_ids: indices.append(index_prefix + corpus_id + index_suffix) joined_indices = ','.join(indices) return joined_indices
def setUp(self): try: setting = get_settings() self.envId = "unittest_" self.authorization = BaseAuthorization("unittest_", None, None, None) self.envList1 = get_env_list(self.authorization) try: self.envList1.create_env(self.envId) except EnvAlreadyExistWithSameIdException: time.sleep(1) self.envList1.delete_env(self.envId) self.envList1.create_env(self.envId) finally: pass
def setUp(self): es = get_es_conn() es.indices.delete(index="unittest_*") time.sleep(0.1) setting = get_settings() self.envId = "unittest_" self.authorization = BaseAuthorization.create_authorization( self.envId, None, None) self.masterList = DocumentDirectoryList.create( self.envId, setting['CLASSES']['DOCUMENT_DIRECTORY'], self.authorization) self.bucketList = BucketList.create(self.envId, setting['CLASSES']['BUCKET'], self.authorization) self.documentCorpusList = DocumentCorpusList.create( self.envId, setting['CLASSES']['DOCUMENT_CORPUS'], self.authorization)
def set_up_corpus(self): corpus = get_master_document_corpus_list( self.envId, self.authorization).create_corpus("corpus1") time.sleep(1) bucket1 = corpus.create_bucket("bucket1", "bucket1") setting = get_settings() self.schemaList = get_schema_list(self.envId, self.authorization) schemaNormalId = self.schemaList.add_json_schema_as_hash(SCHEMA_NORMAL) schemaOffsetsId = self.schemaList.add_json_schema_as_hash( SCHEMA_OFFSETS, False, nestedFields=["offsets"]) time.sleep(1) bucket1.add_or_update_schema_to_bucket(schemaNormalId, "sentence", TargetType.document_surface1d, {}) bucket1.add_or_update_schema_to_bucket(schemaOffsetsId, "token", TargetType.document_surface1d, {}) time.sleep(1)
def post(self, corpusId, bucketId): try: body = json.loads(self.request.body.decode("utf-8")) envId = get_env_id() authorization = get_autorisation(envId, None, None) docType = None annotationId = None sett = get_settings() shouldValidate = sett['USE_ANNOTATION_AND_SCHEMA_VALIDATOR'] if "annotationId" in body: annotationId = body["annotationId"] del body["annotationId"] if "schemaType" in body: docType = body["schemaType"] else: self.write_and_set_status( {MESSAGE: "Missing schemaType field, which links the annotation to its schema."}, HTTPStatus.UNPROCESSABLE_ENTITY) return annotationId = get_master_bucket_list(envId, authorization) \ .get_bucket(corpusId, bucketId) \ .add_annotation(body, docType, annotationId, shouldValidate) self.write_and_set_status({"id": annotationId}, HTTPStatus.OK) except BucketNotFoundException: self.write_and_set_status({MESSAGE: "Specified bucket not found"}, HTTPStatus.NOT_FOUND) except DocumentAlreadyExistsException: self.write_and_set_status({MESSAGE: "Annotation with the same id already exist"}, HTTPStatus.CONFLICT) except JSONDecodeError: self.write_and_set_status({MESSAGE: "Invalid JSON format for annotation"}, HTTPStatus.BAD_REQUEST) except Exception: trace = traceback.format_exc().splitlines() self.write_and_set_status({MESSAGE: "Internal server error", TRACE: trace}, HTTPStatus.INTERNAL_SERVER_ERROR)