Exemplo n.º 1
0
def get_context(request):
    """ Returns the context for lemmatised feature words.

    The parameters object;
    {
        containerid: int,
        lemma: list[str],
        highlight: bool,
    }
    :param request:
    :return:
    """
    params = request.GET.dict() or json.loads(request.body)

    if not params:
        raise Http404
    try:
        containerid = int(params['containerid'])
        container = Container.get_object(pk=containerid)
        lemma = params['lemma']
    except (ValueError, KeyError, TypeError) as _:
        raise Http404(params)

    highlight = params.get('highlight', False)

    lemma_to_words, lemma = container.get_lemma_words(lemma)
    matchwords = []
    for i in lemma:
        try:
            mapping = next(_ for _ in lemma_to_words if _.get('lemma') == i)
            matchwords.extend(mapping.get('words'))
        except StopIteration:
            matchwords.append(i)
    data = search_texts(path=container.container_path(),
                        highlight=highlight,
                        words=matchwords)

    serialiser = SerialiserFactory().get_serialiser('search_text_csv')

    data_objs = [{
        'title': _.title,
        'url': _.url,
        'pk': _.pk,
        'dataid': _.dataid,
        'created': _.created
    } for _ in container.data_set.filter(file_id__in=list(
        uuid.UUID(_['dataid']) for _ in data['data']))]
    serialiser = serialiser(data={
        'docs': data_objs,
        'response': data,
        'lemma': lemma
    })
    zip_name = serialiser.get_zip_name(
        f'Feature-Context-ContainerID-{containerid}')
    resp = HttpResponse(serialiser.get_value(),
                        content_type='application/force-download')
    resp['Content-Disposition'] = 'attachment; filename="%s"' % zip_name
    return resp
Exemplo n.º 2
0
    def get_file_path(self, container: Container = None):
        """
        Returns the path of the file as it is saved on disk
        :return:
        """
        containerid = self.container.pk
        if not container:
            container = Container.get_object(containerid)

        return os.path.normpath(
            os.path.join(container.container_path(), self.dataid))
Exemplo n.º 3
0
 def delete_many(cls, data_ids: typing.List[int], containerid: int = None):
     """
     Delete many objects for a given containerid and a list of data ids.
     :param data_ids:
     :param containerid:
     :return:
     """
     container = Container.get_object(pk=containerid)
     for obj in cls.objects.filter(pk__in=data_ids):
         if container != obj.container:
             continue
         _path = obj.file_path
         if os.path.exists(_path):
             os.remove(_path)
         obj.delete()
Exemplo n.º 4
0
    def create(cls,
               data: (
                   str,
                   list,
               ) = None,
               containerid: int = None,
               links: list = None,
               title: str = None,
               endpoint: str = None,
               seed: bool = False):
        """
        Create and save a Data object with all the urls that make it.

        :param data:
        :param containerid:
        :param links:
        :param title:
        :param endpoint:
        :param seed:
        :return:
        """
        container_obj = Container.get_object(containerid)
        url_parse = urllib.parse.urlparse(endpoint)
        obj = cls(title=title,
                  container=container_obj,
                  url=endpoint,
                  seed=seed,
                  hostname=url_parse.hostname)

        file_path = obj.get_file_path(container=container_obj)
        obj.file_path = file_path
        try:
            hash_text = obj.write_data_to_file(path=file_path, data=data)
        except DuplicateUrlError as _:
            return None
        else:
            obj.hash_text = hash_text
        obj.save()
        for item in links:
            Link.create(url=item, data=obj)
        return obj
Exemplo n.º 5
0
 def prepare_data(self, containerid, data):
     """
     :param containerid:
     :param data:
     :return:
     """
     try:
         container = Container.get_object(pk=containerid)
     except ValueError:
         raise Http404(containerid)
     dataset = list(container.data_set.all())
     for item in data:
         try:
             rec = next(_ for _ in dataset if _.dataid == item['fileid'])
         except StopIteration:
             continue
         else:
             del item['fileid']
             item['url'] = rec.url
             item['title'] = rec.title
             item['pk'] = rec.pk
             item['created'] = rec.created
     data.reverse()
     return data