def get(last_update_time: int) -> Union[Response, ConnexionResponse]: """The GET method for the corpus list REST API. It provides metadata for all available texts.""" ui_cts: UpdateInfo = DatabaseService.query( UpdateInfo, filter_by=dict(resource_type=ResourceType.cts_data.name), first=True) if ui_cts and ui_cts.last_modified_time >= last_update_time / 1000: corpora: List[Corpus] = DatabaseService.query(Corpus) return NetworkService.make_json_response([x.to_dict() for x in corpora]) return NetworkService.make_json_response(None)
def update_exercises() -> Union[Response, ConnexionResponse]: """ Gets all static exercises from the frontend code repository and looks for the lemmata in them.""" # TODO: check last update of the directory before pulling the whole zip archive response: Response = requests.get(Config.STATIC_EXERCISES_REPOSITORY_URL, stream=True) if not response.ok: return connexion.problem( 503, Config.ERROR_TITLE_SERVICE_UNAVAILABLE, Config.ERROR_MESSAGE_SERVICE_UNAVAILABLE) relevant_strings_dict: Dict[str, Set[str]] = get_relevant_strings(response) file_dict: Dict = {} lemma_set: Set[str] = set() for url in relevant_strings_dict: for word in relevant_strings_dict[url]: if word not in lemma_set: lemma_set.add(word) input_bytes = bytearray(word, encoding='utf-8', errors='strict') file_handler, file_path = mkstemp() os.write(file_handler, input_bytes) file_dict[file_path] = file_handler result_string: str = AnnotationService.get_udpipe("", False, file_dict) search_results: List[Tuple[str, str]] = re.findall(r"1\t([a-zA-Z]*)\t([a-zA-Z]*)", result_string) search_results_dict: Dict[str, int] = {item[0]: i for (i, item) in enumerate(search_results)} for url in relevant_strings_dict: # the URN points to Cicero's letters to his brother Quintus, 1.1.8-1.1.10 NetworkService.exercises[url] = StaticExercise( solutions=[], urn="urn:cts:latinLit:phi0474.phi058.perseus-lat1:1.1.8-1.1.10") for word in relevant_strings_dict[url]: # UDpipe cannot handle name abbreviations, so remove the punctuation and only keep the upper case letter if word[-1] in string.punctuation: word = word[:-1] NetworkService.exercises[url].solutions.append(list(search_results[search_results_dict[word]])) NetworkService.exercises_last_update = datetime.fromtimestamp(time()) return NetworkService.make_json_response( {x: NetworkService.exercises[x].to_dict() for x in NetworkService.exercises})
def post(file_data: dict) -> Response: """ The POST method for the File REST API. It writes learning results or HTML content to the disk for later access. """ lr_string: str = file_data.get("learning_result", None) if lr_string: lr_dict: dict = json.loads(lr_string) for exercise_id in lr_dict: xapi_statement: XapiStatement = XapiStatement(lr_dict[exercise_id]) save_learning_result(xapi_statement) return NetworkService.make_json_response(str(True)) else: file_type: FileType = file_data["file_type"] existing_file: DownloadableFile = FileService.make_tmp_file_from_html( file_data["urn"], file_type, file_data["html_content"]) return NetworkService.make_json_response(existing_file.file_name)
def post(network_data: dict) -> Response: """ The POST method for the vector network REST API. It provides sentences whose content is similar to a given word. """ vnf: VectorNetworkForm = VectorNetworkForm.from_dict(network_data) nearest_neighbor_count = vnf.nearest_neighbor_count if vnf.nearest_neighbor_count else 10 w2v: Word2Vec = Word2Vec.load(Config.PANEGYRICI_LATINI_MODEL_PATH) search_regex: Pattern[str] = re.compile(vnf.search_regex) keys: List[str] = [x for x in w2v.wv.vocab if search_regex.match(x)] relevant_vectors: List[ndarray] = [w2v.wv.get_vector(x) for x in keys] target_vector: ndarray = sum(relevant_vectors) / len(relevant_vectors) sentences: List[str] = open(Config.PANEGYRICI_LATINI_TEXT_PATH).readlines() sentence_vectors: Dict[int, ndarray] = {} for i in range(len(sentences)): toks: List[str] = sentences[i][:-1].split() if toks: vecs: List[ndarray] = [] for tok in toks: vector: ndarray = w2v.wv.get_vector(tok) vecs.append(vector) sentence_vectors[i] = sum(vecs) / len(vecs) sims: List[Tuple[int, ndarray]] = [] for key in sentence_vectors.keys(): sims.append((key, dot(matutils.unitvec(target_vector), matutils.unitvec(sentence_vectors[key])))) sims.sort(key=lambda x: x[1], reverse=True) sims = sims[:nearest_neighbor_count] return NetworkService.make_json_response( [sentences[x[0]].split() for x in sims])
def get(self): """ Returns matches from ANNIS for a given CTS URN and AQL. """ # get request arguments args: dict = flask.request.args urn: str = args["urn"] aql: str = args["aql"] return NetworkService.make_json_response( CorpusService.find_matches(urn, aql, is_csm=True))
def get() -> Union[Response, ConnexionResponse]: """ The GET method for the StaticExercises REST API. It provides a list of static exercises and their respective URLs in the frontend. """ # TODO: WRITE AND READ LAST UPDATE TIME FROM THE DATABASE if datetime.fromtimestamp(time() - Config.INTERVAL_STATIC_EXERCISES) > NetworkService.exercises_last_update \ or len(NetworkService.exercises) == 0: return update_exercises() return NetworkService.make_json_response( {x: NetworkService.exercises[x].to_dict() for x in NetworkService.exercises})
def get(urn: str) -> Union[Response, ConnexionResponse]: """Provides the raw text for a requested text passage.""" ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn, is_csm=False) if not ar.graph_data.nodes: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) ar.text_complexity = TextComplexityService.text_complexity( TextComplexityMeasure.all.name, urn, False, ar.graph_data).to_dict() return NetworkService.make_json_response(ar.to_dict())
def get(cid: int) -> Union[Response, ConnexionResponse]: """The GET method for the corpus REST API. It provides metadata for a specific text.""" corpus: Corpus = DatabaseService.query(Corpus, filter_by=dict(cid=cid), first=True) if not corpus: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) return NetworkService.make_json_response(corpus.to_dict())
def get(self): """ Returns graph data for a given CTS URN. """ # get request arguments args: Dict = flask.request.args cts_urn: str = args["urn"] ar: AnnisResponse = CorpusService.get_corpus(cts_urn=cts_urn, is_csm=True) if not ar.graph_data.nodes: abort(404) return NetworkService.make_json_response(ar.to_dict())
def get(frequency_upper_bound: int, query_urn: str, vocabulary: str) -> Response: """ Retrieves sentence ID and matching degree for each sentence in the query text. """ vc: VocabularyCorpus = VocabularyCorpus[vocabulary] vocabulary_set: Set[str] = FileService.get_vocabulary_set(vc, frequency_upper_bound) # punctuation should count as a match because we don't want to count this as part of the vocabulary for char in string.punctuation: vocabulary_set.add(char) ar: AnnisResponse = CorpusService.get_corpus(cts_urn=query_urn, is_csm=False) sentences: List[Sentence] = check_vocabulary(ar.graph_data, vocabulary_set) return NetworkService.make_json_response([x.to_dict() for x in sentences])
def get(lang: str, frequency_upper_bound: int, last_update_time: int, vocabulary: str = ""): """The GET method for the exercise list REST API. It provides metadata for all available exercises.""" vocabulary_set: Set[str] ui_exercises: UpdateInfo = DatabaseService.query( UpdateInfo, filter_by=dict(resource_type=ResourceType.exercise_list.name), first=True) if ui_exercises.last_modified_time < last_update_time / 1000: return NetworkService.make_json_response([]) try: vc: VocabularyCorpus = VocabularyCorpus[vocabulary] vocabulary_set = FileService.get_vocabulary_set( vc, frequency_upper_bound) except KeyError: vocabulary_set = set() lang: Language try: lang = Language(lang) except ValueError: lang = Language.English exercises: List[Exercise] = DatabaseService.query( Exercise, filter_by=dict(language=lang.value)) matching_exercises: List[MatchingExercise] = [ MatchingExercise.from_dict(x.to_dict()) for x in exercises ] if len(vocabulary_set): for exercise in matching_exercises: conll: List[TokenList] = conllu.parse(exercise.conll) lemmata: List[str] = [ tok["lemma"] for sent in conll for tok in sent.tokens ] exercise.matching_degree = sum( (1 if x in vocabulary_set else 0) for x in lemmata) / len(lemmata) * 100 ret_val: List[dict] = [ NetworkService.serialize_exercise(x, compress=True) for x in matching_exercises ] return NetworkService.make_json_response(ret_val)
def delete(cid: int) -> Union[Response, ConnexionResponse]: """The DELETE method for the corpus REST API. It deletes metadata for a specific text.""" corpus: Corpus = DatabaseService.query(Corpus, filter_by=dict(cid=cid), first=True) if not corpus: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) db.session.delete(corpus) DatabaseService.commit() return NetworkService.make_json_response(True)
def get(eid: str) -> Union[Response, ConnexionResponse]: exercise: TExercise = DatabaseService.query(Exercise, filter_by=dict(eid=eid), first=True) if not exercise: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND) ar: AnnisResponse = CorpusService.get_corpus(cts_urn=exercise.urn, is_csm=False) if not ar.graph_data.nodes: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) exercise.last_access_time = datetime.utcnow().timestamp() DatabaseService.commit() exercise_type: ExerciseType = ExerciseType(exercise.exercise_type) ar.solutions = json.loads(exercise.solutions) ar.uri = NetworkService.get_exercise_uri(exercise) ar.exercise_id = exercise.eid ar.exercise_type = exercise_type.value return NetworkService.make_json_response(ar.to_dict())
def patch(cid: int, **kwargs) -> Union[Response, ConnexionResponse]: """The PUT method for the corpus REST API. It provides updates metadata for a specific text.""" corpus: Corpus = DatabaseService.query(Corpus, filter_by=dict(cid=cid), first=True) if not corpus: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) for k, v in kwargs.items(): if v is not None: setattr(corpus, k, v) DatabaseService.commit() return NetworkService.make_json_response(corpus.to_dict())
def get(urn: str) -> Union[Response, ConnexionResponse]: """The GET method for the valid references REST API. It provides references for the desired text.""" try: reff: List[str] = CustomCorpusService.get_custom_corpus_reff( urn) if CustomCorpusService.is_custom_corpus_urn( urn) else CorpusService.get_standard_corpus_reff(urn) except ValueError: return connexion.problem(400, Config.ERROR_TITLE_BAD_REQUEST, Config.ERROR_MESSAGE_BAD_REQUEST) if not reff: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) return NetworkService.make_json_response(reff)
def get(urn: str): """ Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """ fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn, is_csm=True) # map the abbreviated values found by ANNIS to our own model skip_set: Set[Phenomenon] = {Phenomenon.LEMMA, Phenomenon.DEPENDENCY} for fi in fa: for i in range(len(fi.values)): if fi.phenomena[i] in skip_set: continue value_map: Dict[str, List[str]] = AnnotationService.phenomenon_map[ fi.phenomena[i]] fi.values[i] = next( (x for x in value_map if fi.values[i] in value_map[x]), None) return NetworkService.make_json_response([x.to_dict() for x in fa])
def post(exercise_data: dict) -> Union[Response, ConnexionResponse]: ef: ExerciseForm = ExerciseForm.from_dict(exercise_data) ef.urn = ef.urn if ef.urn else "" exercise_type: ExerciseType = ExerciseType(ef.type) search_values_list: List[str] = json.loads(ef.search_values) aqls: List[str] = AnnotationService.map_search_values_to_aql( search_values_list=search_values_list, exercise_type=exercise_type) search_phenomena: List[Phenomenon] = [ Phenomenon().__getattribute__(x.split("=")[0].upper()) for x in search_values_list ] # if there is custom text instead of a URN, immediately annotate it conll_string_or_urn: str = ef.urn if CorpusService.is_urn( ef.urn) else AnnotationService.get_udpipe( CorpusService.get_raw_text(ef.urn, False)) try: # construct graph from CONLL data response: dict = get_graph_data( title=ef.urn, conll_string_or_urn=conll_string_or_urn, aqls=aqls, exercise_type=exercise_type, search_phenomena=search_phenomena) except ValueError: return connexion.problem(500, Config.ERROR_TITLE_INTERNAL_SERVER_ERROR, Config.ERROR_MESSAGE_INTERNAL_SERVER_ERROR) solutions_dict_list: List[Dict] = response["solutions"] solutions: List[Solution] = [ Solution.from_dict(x) for x in solutions_dict_list ] ar: AnnisResponse = make_new_exercise( conll=response["conll"], correct_feedback=ef.correct_feedback, exercise_type=ef.type, general_feedback=ef.general_feedback, graph_data_raw=response["graph_data_raw"], incorrect_feedback=ef.incorrect_feedback, instructions=ef.instructions, language=ef.language, partially_correct_feedback=ef.partially_correct_feedback, search_values=ef.search_values, solutions=solutions, type_translation=ef.type_translation, urn=ef.urn, work_author=ef.work_author, work_title=ef.work_title) return NetworkService.make_json_response(ar.to_dict())
def post(vocabulary_data: dict): """ Indicates for each token of a corpus whether it is covered by a reference vocabulary. """ vf: VocabularyForm = VocabularyForm.from_dict(vocabulary_data) vc: VocabularyCorpus = VocabularyCorpus[vf.vocabulary] vocabulary_set: Set[str] = FileService.get_vocabulary_set(vc, vf.frequency_upper_bound) # punctuation should count as a match because we don't want to count this as part of the vocabulary for char in string.punctuation: vocabulary_set.add(char) ar: AnnisResponse = CorpusService.get_corpus(cts_urn=vf.query_urn, is_csm=False) for node in ar.graph_data.nodes: if not is_match(target_lemma=node.udep_lemma, vocabulary_set=vocabulary_set): node.is_oov = True ar: AnnisResponse = AnnisResponse( solutions=[], uri="", exercise_id="", graph_data=ar.graph_data) ar.text_complexity = TextComplexityService.text_complexity( TextComplexityMeasure.all.name, vf.query_urn, False, ar.graph_data).to_dict() return NetworkService.make_json_response(ar.to_dict())
def get(eid: str, lang: str, solution_indices: List[int]) -> Union[Response, ConnexionResponse]: """ The GET method for the H5P REST API. It provides JSON templates for client-side H5P exercise layouts. """ language: Language = determine_language(lang) exercise: Exercise = DatabaseService.query(Exercise, filter_by=dict(eid=eid), first=True) if not exercise: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND) text_field_content: str = get_text_field_content(exercise, solution_indices) if not text_field_content: return connexion.problem(422, Config.ERROR_TITLE_UNPROCESSABLE_ENTITY, Config.ERROR_MESSAGE_UNPROCESSABLE_ENTITY) response_dict: dict = TextService.json_template_mark_words response_dict = get_response(response_dict, language, TextService.json_template_drag_text, exercise, text_field_content, TextService.feedback_template) return NetworkService.make_json_response(response_dict)
def post(self): """Given the relevant corpus data, gives back search results as graph data.""" args: dict = {} try: args = json.loads(flask.request.data.decode("utf-8")) except JSONDecodeError: abort(400) title: str = args["title"] annotations_or_urn: str = args["annotations"] aqls: List[str] = args["aqls"] exercise_type: ExerciseType = ExerciseType[args["exercise_type"]] search_phenomena: List[Phenomenon] = [ Phenomenon().__getattribute__(x.upper()) for x in args["search_phenomena"] ] conll: List[TokenList] = CorpusService.get_annotations_from_string( annotations_or_urn) ret_val: dict = CorpusService.process_corpus_data( title, conll, aqls, exercise_type, search_phenomena) # serialize the results to json return NetworkService.make_json_response(ret_val)
def post(kwic_data: dict) -> Response: """ The POST method for the KWIC REST API. It provides example contexts for a given phenomenon in a given corpus. """ kwic_form: KwicForm = KwicForm.from_dict(kwic_data) search_values_list: List[str] = json.loads(kwic_form.search_values) aqls: List[str] = AnnotationService.map_search_values_to_aql( search_values_list, ExerciseType.kwic) url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}{Config.SERVER_URI_CSM_SUBGRAPH}" data: str = json.dumps( dict(urn=kwic_data["urn"], aqls=aqls, ctx_left=str(kwic_form.ctx_left), ctx_right=str(kwic_form.ctx_right))) response: requests.Response = requests.post(url, data=data) response_content: List[dict] = json.loads(response.text) exercise_data_list: List[ExerciseData] = [ ExerciseData(json_dict=x) for x in response_content ] ret_val: str = "" for i in range(len(exercise_data_list)): ret_val += handle_exercise_data(exercise_data_list[i], kwic_form.ctx_left, kwic_form.ctx_right) return NetworkService.make_json_response(ret_val)
def get(search_regex: str, highlight_regex: str, min_count: int, nearest_neighbor_count: int) -> Response: """The GET method for the vector network REST API. It provides network data for the vectors in an AI model.""" ret_val: str = get_concept_network(search_regex, min_count, highlight_regex, nearest_neighbor_count) return NetworkService.make_json_response(ret_val)
def get(measure: str, urn: str): """Gives users measures of text complexity for a given text.""" ar: AnnisResponse = CorpusService.get_corpus(urn, is_csm=False) tc: TextComplexity = TextComplexityService.text_complexity( measure, urn, False, ar.graph_data) return NetworkService.make_json_response(tc.to_dict())
def get(urn: str): """ Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """ url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}" + \ Config.SERVER_URI_FREQUENCY response: requests.Response = requests.get(url, params=dict(urn=urn)) return NetworkService.make_json_response(json.loads(response.text))