def patch(self,request): """ A copy of POST (merging list) but with the source == just an internal corpus_id params in request.GET: onto_corpus: the corpus whose lists are getting patched from: the corpus from which we take the source lists to merge in todo: an array of the list types ("map", "main", "stop") to merge in """ if not request.user.is_authenticated(): res = HttpResponse("Unauthorized") res.status_code = 401 return res params = get_parameters(request) print(params) # the corpus with the target lists to be patched corpus_id = int(params.pop("onto_corpus")) corpus_node = cache.Node[corpus_id] print(params) if request.user.id != corpus_node.user_id: res = HttpResponse("Unauthorized") res.status_code = 401 return res list_types = {'map':'MAPLIST', 'main':'MAINLIST', 'stop':'STOPLIST'} # internal DB retrieve source_lists source_corpus_id = int(params.pop("from_corpus")) source_node = cache.Node[source_corpus_id] todo_lists = params.pop("todo").split(',') # ex: ['map', 'stop'] source_lists = {} for key in todo_lists: source_lists[key] = UnweightedList( source_node.children(list_types[key]).first().id ) # add the groupings too source_lists['groupings'] = Translations( source_node.children("GROUPLIST").first().id ) # attempt to merge and send response try: # merge the source_lists onto those of the target corpus log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node) return JsonHttpResponse({ 'log': log_msg, }, 200) except Exception as e: return JsonHttpResponse({ 'err': str(e), }, 400)
def post(self, request): """ Merge the lists of a corpus with other lists from a CSV source or from another corpus params in request.GET: onto_corpus: the corpus whose lists are getting patched params in request.data: csvfile: the csv file /!\ We assume we checked the file size client-side before upload """ if not request.user.is_authenticated(): res = HttpResponse("Unauthorized") res.status_code = 401 return res # the corpus with the target lists to be patched params = get_parameters(request) corpus_id = int(params.pop("onto_corpus")) corpus_node = cache.Node[corpus_id] if request.user.id != corpus_node.user_id: res = HttpResponse("Unauthorized") res.status_code = 401 return res # request also contains the file # csv_file has type django.core.files.uploadedfile.InMemoryUploadedFile # ---------------------- csv_file = request.data['csvfile'] csv_contents = csv_file.read().decode("UTF-8").split("\n") csv_file.close() del csv_file # import the csv # try: log_msg = "Async generation" corpus_node_id = corpus_node.id scheduled(import_and_merge_ngramlists)(csv_contents, corpus_node_id, overwrite=bool( params.get('overwrite'))) return JsonHttpResponse({ 'log': log_msg, }, 200)
def put(self, request): """ Basic external access for *creating an ngram* --------------------------------------------- 1 - checks user authentication before any changes 2 - checks if ngram to Ngram table in DB if yes returns ngram_id and optionally mainform_id otherwise continues 3 - adds the ngram to Ngram table in DB 4 - (if corpus param is present) adds the ngram doc counts to NodeNgram table in DB (aka "index the ngram" throught the docs of the corpus) 5 - returns json with: 'msg' => a success msg 'text' => the initial text content 'term' => the normalized text content 'id' => the new ngram_id 'count' => the number of docs with the ngram in the corpus (if corpus param is present) 'group' => the mainform_id if applicable possible inline parameters -------------------------- @param text=<ngram_string> [required] @param corpus=<CORPUS_ID> [optional] @param testgroup (true if present) [optional, requires corpus] """ # 1 - check user authentication if not request.user.is_authenticated(): res = HttpResponse("Unauthorized") res.status_code = 401 return res # the params params = get_parameters(request) print("PARAMS", [(i,v) for (i,v) in params.items()]) if 'text' in params: original_text = str(params.pop('text')) ngram_str = normalize_forms(normalize_chars(original_text)) else: raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\ It requires a "text" parameter,\ for instance /api/ngrams?text=hydrometallurgy') if ('testgroup' in params) and (not ('corpus' in params)): raise ValidationException("'testgroup' param requires 'corpus' param") # if we have a 'corpus' param (to do the indexing)... do_indexation = False if 'corpus' in params: # we retrieve the corpus... corpus_id = int(params.pop('corpus')) corpus_node = cache.Node[corpus_id] # and the user must also have rights on the corpus if request.user.id == corpus_node.user_id: do_indexation = True else: res = HttpResponse("Unauthorized") res.status_code = 401 return res # number of "words" in the ngram ngram_size = len(findall(r' +', ngram_str)) + 1 # do the additions try: log_msg = "" ngram_id = None mainform_id = None preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first() if preexisting is not None: ngram_id = preexisting.id log_msg += "ngram already existed (id %i)\n" % ngram_id # in the context of a corpus we can also check if has mainform # (useful for) if 'testgroup' in params: groupings_id = (session.query(Node.id) .filter(Node.parent_id == corpus_id) .filter(Node.typename == 'GROUPLIST') .first() ) had_mainform = (session.query(NodeNgramNgram.ngram1_id) .filter(NodeNgramNgram.node_id == groupings_id) .filter(NodeNgramNgram.ngram2_id == preexisting.id) .first() ) if had_mainform: mainform_id = had_mainform[0] log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id else: log_msg += "ngram was not in any group for this corpus" else: # 2 - insert into Ngrams new_ngram = Ngram(terms=ngram_str, n=ngram_size) session.add(new_ngram) session.commit() ngram_id = new_ngram.id log_msg += "ngram was added with new id %i\n" % ngram_id # 3 - index the term if do_indexation: n_added = index_new_ngrams([ngram_id], corpus_node) log_msg += 'ngram indexed in corpus %i\n' % corpus_id return JsonHttpResponse({ 'msg': log_msg, 'text': original_text, 'term': ngram_str, 'id' : ngram_id, 'group' : mainform_id, 'count': n_added if do_indexation else 'no corpus provided for indexation' }, 200) # just in case except Exception as e: return JsonHttpResponse({ 'msg': str(e), 'text': original_text }, 400)