Пример #1
0
    def get(self, request):
        """Displays the list of nodes corresponding to the query.
        """

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        parameters, query, count = _query_nodes(request)

        if parameters['formated'] == 'json':
            records_array = []
            add_record = records_array.append

            # FIXME filter in rawsql in _query_nodes
            for node in query:
                add_record(_filter_node_fields(node, parameters))

            return JsonHttpResponse({
                'parameters': parameters,
                'count': count,
                'records': records_array
            })

        elif parameters['formated'] == 'csv':
            # TODO add support for fields and hyperdata_filter

            response = HttpResponse(content_type='text/csv')
            response[
                'Content-Disposition'] = 'attachment; filename="Gargantext_Corpus.csv"'

            writer = csv.writer(response,
                                delimiter='\t',
                                quoting=csv.QUOTE_MINIMAL)

            keys = [
                'title', 'source', 'publication_year', 'publication_month',
                'publication_day', 'abstract', 'authors'
            ]

            writer.writerow(keys)

            for node in query:
                data = list()
                for key in keys:
                    try:
                        data.append(node.hyperdata[key])
                    except:
                        data.append("")
                writer.writerow(data)

            return response
Пример #2
0
    def get(self, request, node_id):

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        user = cache.User[request.user.id]
        # check_rights(request, node_id)
        # I commented check_rights because filter on user_id below does the job

        node = session.query(Node).filter(Node.id == node_id,
                                          Node.user_id == user.id).first()
        if node is None:
            return Response({"detail": "Node not Found for this user"},
                            status=HTTP_404_NOT_FOUND)
        else:

            # FIXME using the more generic strategy ---------------------------
            # context = format_response(node, [n for n in node.children()])
            # or perhaps ? context = format_response(None, [node])
            # -----------------------------------------------------------------

            # using a more direct strategy
            context = {}
            try:
                context["statuses"] = node.hyperdata["statuses"]
            except KeyError:
                context["statuses"] = None
            return Response(context)
Пример #3
0
    def post(self, request, data):
        '''create a new status for node'''
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        raise NotImplementedError
Пример #4
0
def CsvHttpResponse(data, headers=None, status=200):
    response = HttpResponse(content_type="text/csv", status=status)
    writer = csv.writer(response, delimiter=',')
    if headers:
        writer.writerow(headers)
    for row in data:
        writer.writerow(row)
    return response
Пример #5
0
    def get(self, request, corpus_id):

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        parameters = get_parameters(request)
        parameters = validate(parameters, {'score': str, 'ngram_ids': list})

        try:
            ngram_ids = [int(n) for n in parameters['ngram_ids'].split(',')]
        except:
            raise ValidationException(
                '"ngram_ids" needs integers separated by comma.')

        limit = DEFAULT_N_DOCS_HAVING_NGRAM
        nodes_list = []

        corpus = session.query(Node).filter(Node.id == corpus_id).first()

        tfidf_id = (session.query(Node.id).filter(
            Node.typename == "TFIDF-CORPUS",
            Node.parent_id == corpus.id).first())

        tfidf_id = tfidf_id[0]
        print(tfidf_id)
        # request data
        nodes_query = (session.query(Node, func.sum(NodeNodeNgram.score)).join(
            NodeNodeNgram, NodeNodeNgram.node2_id == Node.id).filter(
                NodeNodeNgram.node1_id == tfidf_id).filter(
                    Node.typename == 'DOCUMENT',
                    Node.parent_id == corpus.id).filter(
                        or_(*[
                            NodeNodeNgram.ngram_id == ngram_id
                            for ngram_id in ngram_ids
                        ])).group_by(Node))

        # get the total count before applying limit
        nodes_count = nodes_query.count()

        # now the query with the limit
        nodes_results_query = (nodes_query.order_by(
            func.sum(NodeNodeNgram.score).desc()).limit(limit))

        for node, score in nodes_results_query:
            print(node, score)
            print("\t corpus:", corpus_id, "\t", node.name)
            node_dict = {
                'id': node.id,
                'score': score,
            }
            for key in ('title', 'publication_date', 'source', 'authors',
                        'fields'):
                if key in node.hyperdata:
                    node_dict[key] = node.hyperdata[key]
            nodes_list.append(node_dict)

        return JsonHttpResponse({'count': nodes_count, 'records': nodes_list})
Пример #6
0
    def put(self, request, corpus_id, check_each_doc=True):
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        # user is ok
        fav_node = self._get_fav_node(corpus_id)

        response = {}

        if fav_node == None:
            response = {
                'warning':
                'No favorites node is defined for this corpus (\'%s\')' %
                self.corpus.name,
                'count_added':
                0
            }
        else:
            req_params = validate(get_parameters(request), {
                'docs': list,
                'default': ""
            })
            nodeids_to_add = [
                int(did) for did in req_params['docs'].split(',')
            ]

            if check_each_doc:
                # verification que ce sont bien des documents du bon corpus
                # un peu long => désactiver par défaut ?
                known_docs_q = (session.query(
                    Node.id).filter(Node.parent_id == corpus_id).filter(
                        Node.typename == 'DOCUMENT'))
                lookup = {
                    known_doc.id: True
                    for known_doc in known_docs_q.all()
                }
                # debug
                # print("lookup hash", lookup)
                rejected_list = []
                for doc_node_id in nodeids_to_add:
                    if (doc_node_id not in lookup):
                        rejected_list.append(doc_node_id)
                if len(rejected_list):
                    raise ValidationException(
                        "Error on some requested docs: %s (Only nodes of type 'doc' AND belonging to corpus %i can be added to favorites.)"
                        % (str(rejected_list), int(corpus_id)))

            # add them
            bulk_insert(NodeNode, ('node1_id', 'node2_id', 'score'),
                        ((fav_node.id, doc_node_id, 1.0)
                         for doc_node_id in nodeids_to_add))

            # todo count really added (here: counts input param not result)
            response = {'count_added': len(nodeids_to_add)}

        return JsonHttpResponse(response)
Пример #7
0
    def get(self, request, corpus_id):
        """
        2 possibilities with/without param

        1) GET http://localhost:8000/api/nodes/2/favorites
        (returns the full list of fav docs within corpus 2)

        2) GET http://localhost:8000/api/nodes/2/favorites?docs=53,54
        (will test if docs 53 and 54 are among the favorites of corpus 2)
        (returns the intersection of fav docs with [53,54])
        """

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        fav_node = self._get_fav_node(corpus_id)

        req_params = validate(get_parameters(request), {
            'docs': list,
            'default': ""
        })

        response = {}

        if fav_node == None:
            response = {
                'warning':
                'No favorites node is defined for this corpus (\'%s\')' %
                self.corpus.name,
                'favdocs': []
            }
        elif 'docs' not in req_params:
            # each docnode associated to the favnode of this corpusnode
            q = (session.query(
                NodeNode.node2_id).filter(NodeNode.node1_id == fav_node.id))
            all_doc_ids = [row.node2_id for row in q.all()]
            response = {'favdocs': all_doc_ids}
        else:
            nodeids_to_check = [
                int(did) for did in req_params['docs'].split(',')
            ]

            # each docnode from the input list, if it is associated to the favnode
            q = (session.query(NodeNode.node2_id).filter(
                NodeNode.node1_id == fav_node.id).filter(
                    NodeNode.node2_id.in_(nodeids_to_check)))
            present_doc_ids = [row.node2_id for row in q.all()]
            absent_doc_ids = [
                did for did in nodeids_to_check if did not in present_doc_ids
            ]
            response = {'favdocs': present_doc_ids, 'missing': absent_doc_ids}

        return JsonHttpResponse(response)
Пример #8
0
    def delete(self, request, node_id):

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        parameters, query, count = _query_nodes(request, node_id)
        if not len(query):
            raise Http404()
        result = session.execute(delete(Node).where(Node.id == node_id))
        session.commit()
        return JsonHttpResponse({'deleted': result.rowcount})
Пример #9
0
    def get(self, request, node_id):

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        parameters, query, count = _query_nodes(request, node_id)
        if not len(query):
            raise Http404()
        node = query[0]

        return JsonHttpResponse(_filter_node_fields(node, parameters))
Пример #10
0
    def get(self, request):
        params = get_parameters(request)
        corpus_id = int(params.pop("corpus"))
        corpus_node = cache.Node[corpus_id]

        # response is file-like + headers
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="corpus-%i_gargantext_term_list.csv"' % corpus_id

        # fill the response with the data
        export_ngramlists(corpus_node, fname=response, titles=True)
        return response
Пример #11
0
    def patch(self,request):
        """
        A copy of POST (merging list) but with the source == just an internal corpus_id

        params in request.GET:
            onto_corpus:  the corpus whose lists are getting patched
            from:         the corpus from which we take the source lists to merge in
            todo:         an array of the list types ("map", "main", "stop") to merge in

        """
        if not request.user.is_authenticated():
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        params = get_parameters(request)
        print(params)

        # the corpus with the target lists to be patched
        corpus_id = int(params.pop("onto_corpus"))
        corpus_node = cache.Node[corpus_id]

        print(params)

        if request.user.id != corpus_node.user_id:
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        list_types = {'map':'MAPLIST', 'main':'MAINLIST', 'stop':'STOPLIST'}

        # internal DB retrieve source_lists
        source_corpus_id = int(params.pop("from_corpus"))
        source_node = cache.Node[source_corpus_id]

        todo_lists = params.pop("todo").split(',')   # ex: ['map', 'stop']
        source_lists = {}
        for key in todo_lists:
            source_lists[key] = UnweightedList(
                                    source_node.children(list_types[key]).first().id
                                )

        # add the groupings too
        source_lists['groupings'] = Translations(
                                        source_node.children("GROUPLIST").first().id
                                    )

        # attempt to merge and send response
        try:
            # merge the source_lists onto those of the target corpus
            log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node)
            return JsonHttpResponse({
                'log': log_msg,
                }, 200)

        except Exception as e:
            return JsonHttpResponse({
                'err': str(e),
                }, 400)
Пример #12
0
    def put(self, request, data):
        '''update status for node'''

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        user = cache.User[request.user.id]
        # check_rights(request, node_id)
        node = session.query(Node).filter(Node.id == node_id,
                                          Node.user_id == user.id).first()

        raise NotImplementedError
Пример #13
0
    def get(self, request, node_id):
        # check that the node is a corpus
        #   ? faster from cache than: corpus = session.query(Node)...

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        corpus = cache.Node[node_id]
        if corpus.typename != 'CORPUS':
            raise ValidationException(
                "Only nodes of type CORPUS can accept facet queries" +
                " (but this node has type %s)..." % corpus.typename)
        else:
            self.corpus = corpus

        # check that the hyperfield parameter makes sense
        _facet_available_subfields = [
            'source', 'publication_year', 'rubrique', 'language_iso2',
            'language_iso3', 'language_name', 'authors'
        ]
        parameters = get_parameters(request)

        # validate() triggers an info message if subfield not in range
        parameters = validate(
            parameters, {
                'type': dict,
                'items': {
                    'hyperfield': {
                        'type': str,
                        'range': _facet_available_subfields
                    }
                }
            })

        subfield = parameters['hyperfield']

        # do the aggregated sum
        (xcounts, total) = self._ndocs_by_facet(subfield)

        # response
        return JsonHttpResponse({
            'doc_count': total,
            'by': {
                subfield: xcounts
            }
        })
Пример #14
0
    def delete(self, request):
        '''delete status for node'''

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        user = cache.User[request.user.id]
        # check_rights(request, node_id)
        node = session.query(Node).filter(Node.id == node_id,
                                          Node.user_id == user.id).first()
        if node is None:
            return Response({"detail": "Node not Found"},
                            status=HTTP_404_NOT_FOUND)
        node.hyperdata["status"] = []
        session.add(node)
        session.commit()
        return Response({"detail": "Deleted status for NODE #%i " % node.id},
                        status=HTTP_204_NO_CONTENT)
Пример #15
0
    def post(self, request):
        """
        Merge the lists of a corpus with other lists from a CSV source
                                                 or from another corpus

        params in request.GET:
            onto_corpus:  the corpus whose lists are getting patched

        params in request.data:
            csvfile:      the csv file

        /!\ We assume we checked the file size client-side before upload
        """
        if not request.user.is_authenticated():
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        # the corpus with the target lists to be patched
        params = get_parameters(request)
        corpus_id = int(params.pop("onto_corpus"))
        corpus_node = cache.Node[corpus_id]

        if request.user.id != corpus_node.user_id:
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        # request also contains the file
        # csv_file has type django.core.files.uploadedfile.InMemoryUploadedFile
        #                                                 ----------------------
        csv_file = request.data['csvfile']

        csv_contents = csv_file.read().decode("UTF-8").split("\n")
        csv_file.close()
        del csv_file

        # import the csv
        # try:
        log_msg = "Async generation"

        corpus_node_id = corpus_node.id
        scheduled(import_and_merge_ngramlists)(csv_contents,
                                               corpus_node_id,
                                               overwrite=bool(
                                                   params.get('overwrite')))

        return JsonHttpResponse({
            'log': log_msg,
        }, 200)
Пример #16
0
    def post(self, request, node_id):
        """
        For the moment, only used to rename a node

        params in request.GET:
            none (not allowed by _query_nodes validation)

        params in request.DATA:
            ["name": the_new_name_str]

        TODO 1 factorize with .projects.ProjectView.put and .post (thx c24b)
        TODO 2 allow other changes than name
        """

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        # contains a check on user.id (within _query_nodes)
        parameters, query, count = _query_nodes(request, node_id)

        the_node = query.pop()

        # retrieve the name
        if 'name' in request.data:
            new_name = request.data['name']
        else:
            return JsonHttpResponse(
                {"detail": "A 'name' parameter is required in data payload"},
                400)

        # check for conflicts
        other = session.query(Node).filter(Node.name == new_name).count()
        if other > 0:
            return JsonHttpResponse(
                {"detail": "A node with this name already exists"}, 409)

        # normal case: do the renaming
        else:
            setattr(the_node, 'name', new_name)
            session.commit()
            return JsonHttpResponse({'renamed': new_name}, 200)
Пример #17
0
    def delete(self, request):
        """Removes the list of nodes corresponding to the query.
        TODO : Should be a delete method!
        """
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        parameters = get_parameters(request)
        parameters = validate(parameters, {'ids': list})
        try:
            node_ids = [int(n) for n in parameters['ids'].split(',')]
        except:
            raise ValidationException(
                '"ids" needs integers separated by comma.')

        result = session.execute(delete(Node).where(Node.id.in_(node_ids)))
        session.commit()

        return JsonHttpResponse({'deleted': result.rowcount})
Пример #18
0
    def delete(self, request, corpus_id):
        """
        DELETE http://localhost:8000/api/nodes/2/favorites?docs=53,54
        (will delete docs 53 and 54 from the favorites of corpus 2)
        """
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        # user is ok
        fav_node = self._get_fav_node(corpus_id)

        response = {}

        if fav_node == None:
            response = {
                'warning':
                'No favorites node is defined for this corpus (\'%s\')' %
                self.corpus.name,
                'count_removed':
                0
            }
        else:
            req_params = validate(get_parameters(request), {
                'docs': list,
                'default': ""
            })
            nodeids_to_delete = [
                int(did) for did in req_params['docs'].split(',')
            ]
            try:
                # it deletes from favourites but not from DB
                result = session.execute(
                    delete(NodeNode).where(
                        NodeNode.node1_id == fav_node.id).where(
                            NodeNode.node2_id.in_(nodeids_to_delete)))
                session.commit()
                response = {'count_removed': result.rowcount}
            finally:
                session.close()
        return JsonHttpResponse(response)
Пример #19
0
    def patch(self, request, corpusnode_id):
        """
        PATCH triggers recount of metrics for the specified corpus.

        ex PATCH http://localhost:8000/api/metrics/14072
                                                   -----
                                                 corpus_id
        """
        print("==> update metrics request on ", corpusnode_id)

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        try:
            corpus = cache.Node[int(corpusnode_id)]
        except:
            corpus = None

        if corpus is None:
            raise ValidationException("%s is not a valid corpus node id." %
                                      corpusnode_id)

        else:
            t_before = datetime.now()
            # =============
            scheduled(recount)(corpus.id)
            # =============
            t_after = datetime.now()

            return JsonHttpResponse({
                'corpus_id':
                corpusnode_id,
                'took':
                "%f s." % (t_after - t_before).total_seconds()
            })
Пример #20
0
def DebugHttpResponse(data):
    return HttpResponse(
        '<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>'
        % (str(data), ))
Пример #21
0
def JsonHttpResponse(data, status=200):
    return HttpResponse(content=json_encoder.encode(data),
                        content_type='application/json; charset=utf-8',
                        status=status)
Пример #22
0
    def put(self, request):
        """
        Basic external access for *creating an ngram*
        ---------------------------------------------

         1 - checks user authentication before any changes

         2 - checks if ngram to Ngram table in DB
              if yes returns ngram_id and optionally mainform_id
              otherwise continues

         3 - adds the ngram to Ngram table in DB

         4 - (if corpus param is present)
             adds the ngram doc counts to NodeNgram table in DB
             (aka "index the ngram" throught the docs of the corpus)

         5 - returns json with:
             'msg'   => a success msg
             'text'  => the initial text content
             'term'  => the normalized text content
             'id'    => the new ngram_id
             'count' => the number of docs with the ngram in the corpus
                        (if corpus param is present)
             'group' => the mainform_id if applicable

        possible inline parameters
        --------------------------
        @param    text=<ngram_string>         [required]
        @param    corpus=<CORPUS_ID>          [optional]
        @param    testgroup (true if present) [optional, requires corpus]
        """

        # 1 - check user authentication
        if not request.user.is_authenticated():
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        # the params
        params = get_parameters(request)

        print("PARAMS", [(i,v) for (i,v) in params.items()])

        if 'text' in params:
            original_text = str(params.pop('text'))
            ngram_str = normalize_forms(normalize_chars(original_text))
        else:
            raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\
                                        It requires a "text" parameter,\
                                        for instance /api/ngrams?text=hydrometallurgy')

        if ('testgroup' in params) and (not ('corpus' in params)):
            raise ValidationException("'testgroup' param requires 'corpus' param")

        # if we have a 'corpus' param (to do the indexing)...
        do_indexation = False
        if 'corpus' in params:
            # we retrieve the corpus...
            corpus_id = int(params.pop('corpus'))
            corpus_node = cache.Node[corpus_id]
            # and the user must also have rights on the corpus
            if request.user.id == corpus_node.user_id:
                do_indexation = True
            else:
                res = HttpResponse("Unauthorized")
                res.status_code = 401
                return res

        # number of "words" in the ngram
        ngram_size = len(findall(r' +', ngram_str)) + 1

        # do the additions
        try:
            log_msg = ""
            ngram_id = None
            mainform_id = None

            preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()

            if preexisting is not None:
                ngram_id = preexisting.id
                log_msg += "ngram already existed (id %i)\n" % ngram_id

                # in the context of a corpus we can also check if has mainform
                # (useful for)
                if 'testgroup' in params:
                    groupings_id = (session.query(Node.id)
                                           .filter(Node.parent_id == corpus_id)
                                           .filter(Node.typename == 'GROUPLIST')
                                           .first()
                                    )
                    had_mainform = (session.query(NodeNgramNgram.ngram1_id)
                                          .filter(NodeNgramNgram.node_id == groupings_id)
                                          .filter(NodeNgramNgram.ngram2_id == preexisting.id)
                                          .first()
                                    )
                    if had_mainform:
                        mainform_id = had_mainform[0]
                        log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id
                    else:
                        log_msg += "ngram was not in any group for this corpus"

            else:
                # 2 - insert into Ngrams
                new_ngram = Ngram(terms=ngram_str, n=ngram_size)
                session.add(new_ngram)
                session.commit()
                ngram_id = new_ngram.id
                log_msg += "ngram was added with new id %i\n" % ngram_id

            # 3 - index the term
            if do_indexation:
                n_added = index_new_ngrams([ngram_id], corpus_node)
                log_msg += 'ngram indexed in corpus %i\n' % corpus_id

            return JsonHttpResponse({
                'msg': log_msg,
                'text': original_text,
                'term': ngram_str,
                'id' : ngram_id,
                'group' : mainform_id,
                'count': n_added if do_indexation else 'no corpus provided for indexation'
                }, 200)

        # just in case
        except Exception as e:
            return JsonHttpResponse({
                'msg': str(e),
                'text': original_text
                }, 400)