Пример #1
0
    def patch(self,request):
        """
        A copy of POST (merging list) but with the source == just an internal corpus_id

        params in request.GET:
            onto_corpus:  the corpus whose lists are getting patched
            from:         the corpus from which we take the source lists to merge in
            todo:         an array of the list types ("map", "main", "stop") to merge in

        """
        if not request.user.is_authenticated():
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        params = get_parameters(request)
        print(params)

        # the corpus with the target lists to be patched
        corpus_id = int(params.pop("onto_corpus"))
        corpus_node = cache.Node[corpus_id]

        print(params)

        if request.user.id != corpus_node.user_id:
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        list_types = {'map':'MAPLIST', 'main':'MAINLIST', 'stop':'STOPLIST'}

        # internal DB retrieve source_lists
        source_corpus_id = int(params.pop("from_corpus"))
        source_node = cache.Node[source_corpus_id]

        todo_lists = params.pop("todo").split(',')   # ex: ['map', 'stop']
        source_lists = {}
        for key in todo_lists:
            source_lists[key] = UnweightedList(
                                    source_node.children(list_types[key]).first().id
                                )

        # add the groupings too
        source_lists['groupings'] = Translations(
                                        source_node.children("GROUPLIST").first().id
                                    )

        # attempt to merge and send response
        try:
            # merge the source_lists onto those of the target corpus
            log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node)
            return JsonHttpResponse({
                'log': log_msg,
                }, 200)

        except Exception as e:
            return JsonHttpResponse({
                'err': str(e),
                }, 400)
    def post(self, request):
        """
        Merge the lists of a corpus with other lists from a CSV source
                                                 or from another corpus

        params in request.GET:
            onto_corpus:  the corpus whose lists are getting patched

        params in request.data:
            csvfile:      the csv file

        /!\ We assume we checked the file size client-side before upload
        """
        if not request.user.is_authenticated():
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        # the corpus with the target lists to be patched
        params = get_parameters(request)
        corpus_id = int(params.pop("onto_corpus"))
        corpus_node = cache.Node[corpus_id]

        if request.user.id != corpus_node.user_id:
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        # request also contains the file
        # csv_file has type django.core.files.uploadedfile.InMemoryUploadedFile
        #                                                 ----------------------
        csv_file = request.data['csvfile']

        csv_contents = csv_file.read().decode("UTF-8").split("\n")
        csv_file.close()
        del csv_file

        # import the csv
        # try:
        log_msg = "Async generation"

        corpus_node_id = corpus_node.id
        scheduled(import_and_merge_ngramlists)(csv_contents,
                                               corpus_node_id,
                                               overwrite=bool(
                                                   params.get('overwrite')))

        return JsonHttpResponse({
            'log': log_msg,
        }, 200)
Пример #3
0
    def put(self, request):
        """
        Basic external access for *creating an ngram*
        ---------------------------------------------

         1 - checks user authentication before any changes

         2 - checks if ngram to Ngram table in DB
              if yes returns ngram_id and optionally mainform_id
              otherwise continues

         3 - adds the ngram to Ngram table in DB

         4 - (if corpus param is present)
             adds the ngram doc counts to NodeNgram table in DB
             (aka "index the ngram" throught the docs of the corpus)

         5 - returns json with:
             'msg'   => a success msg
             'text'  => the initial text content
             'term'  => the normalized text content
             'id'    => the new ngram_id
             'count' => the number of docs with the ngram in the corpus
                        (if corpus param is present)
             'group' => the mainform_id if applicable

        possible inline parameters
        --------------------------
        @param    text=<ngram_string>         [required]
        @param    corpus=<CORPUS_ID>          [optional]
        @param    testgroup (true if present) [optional, requires corpus]
        """

        # 1 - check user authentication
        if not request.user.is_authenticated():
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        # the params
        params = get_parameters(request)

        print("PARAMS", [(i,v) for (i,v) in params.items()])

        if 'text' in params:
            original_text = str(params.pop('text'))
            ngram_str = normalize_forms(normalize_chars(original_text))
        else:
            raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\
                                        It requires a "text" parameter,\
                                        for instance /api/ngrams?text=hydrometallurgy')

        if ('testgroup' in params) and (not ('corpus' in params)):
            raise ValidationException("'testgroup' param requires 'corpus' param")

        # if we have a 'corpus' param (to do the indexing)...
        do_indexation = False
        if 'corpus' in params:
            # we retrieve the corpus...
            corpus_id = int(params.pop('corpus'))
            corpus_node = cache.Node[corpus_id]
            # and the user must also have rights on the corpus
            if request.user.id == corpus_node.user_id:
                do_indexation = True
            else:
                res = HttpResponse("Unauthorized")
                res.status_code = 401
                return res

        # number of "words" in the ngram
        ngram_size = len(findall(r' +', ngram_str)) + 1

        # do the additions
        try:
            log_msg = ""
            ngram_id = None
            mainform_id = None

            preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()

            if preexisting is not None:
                ngram_id = preexisting.id
                log_msg += "ngram already existed (id %i)\n" % ngram_id

                # in the context of a corpus we can also check if has mainform
                # (useful for)
                if 'testgroup' in params:
                    groupings_id = (session.query(Node.id)
                                           .filter(Node.parent_id == corpus_id)
                                           .filter(Node.typename == 'GROUPLIST')
                                           .first()
                                    )
                    had_mainform = (session.query(NodeNgramNgram.ngram1_id)
                                          .filter(NodeNgramNgram.node_id == groupings_id)
                                          .filter(NodeNgramNgram.ngram2_id == preexisting.id)
                                          .first()
                                    )
                    if had_mainform:
                        mainform_id = had_mainform[0]
                        log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id
                    else:
                        log_msg += "ngram was not in any group for this corpus"

            else:
                # 2 - insert into Ngrams
                new_ngram = Ngram(terms=ngram_str, n=ngram_size)
                session.add(new_ngram)
                session.commit()
                ngram_id = new_ngram.id
                log_msg += "ngram was added with new id %i\n" % ngram_id

            # 3 - index the term
            if do_indexation:
                n_added = index_new_ngrams([ngram_id], corpus_node)
                log_msg += 'ngram indexed in corpus %i\n' % corpus_id

            return JsonHttpResponse({
                'msg': log_msg,
                'text': original_text,
                'term': ngram_str,
                'id' : ngram_id,
                'group' : mainform_id,
                'count': n_added if do_indexation else 'no corpus provided for indexation'
                }, 200)

        # just in case
        except Exception as e:
            return JsonHttpResponse({
                'msg': str(e),
                'text': original_text
                }, 400)