def get_context(request): """ Returns the context for lemmatised feature words. The parameters object; { containerid: int, lemma: list[str], highlight: bool, } :param request: :return: """ params = request.GET.dict() or json.loads(request.body) if not params: raise Http404 try: containerid = int(params['containerid']) container = Container.get_object(pk=containerid) lemma = params['lemma'] except (ValueError, KeyError, TypeError) as _: raise Http404(params) highlight = params.get('highlight', False) lemma_to_words, lemma = container.get_lemma_words(lemma) matchwords = [] for i in lemma: try: mapping = next(_ for _ in lemma_to_words if _.get('lemma') == i) matchwords.extend(mapping.get('words')) except StopIteration: matchwords.append(i) data = search_texts(path=container.container_path(), highlight=highlight, words=matchwords) serialiser = SerialiserFactory().get_serialiser('search_text_csv') data_objs = [{ 'title': _.title, 'url': _.url, 'pk': _.pk, 'dataid': _.dataid, 'created': _.created } for _ in container.data_set.filter(file_id__in=list( uuid.UUID(_['dataid']) for _ in data['data']))] serialiser = serialiser(data={ 'docs': data_objs, 'response': data, 'lemma': lemma }) zip_name = serialiser.get_zip_name( f'Feature-Context-ContainerID-{containerid}') resp = HttpResponse(serialiser.get_value(), content_type='application/force-download') resp['Content-Disposition'] = 'attachment; filename="%s"' % zip_name return resp
def get_file_path(self, container: Container = None): """ Returns the path of the file as it is saved on disk :return: """ containerid = self.container.pk if not container: container = Container.get_object(containerid) return os.path.normpath( os.path.join(container.container_path(), self.dataid))
def delete_many(cls, data_ids: typing.List[int], containerid: int = None): """ Delete many objects for a given containerid and a list of data ids. :param data_ids: :param containerid: :return: """ container = Container.get_object(pk=containerid) for obj in cls.objects.filter(pk__in=data_ids): if container != obj.container: continue _path = obj.file_path if os.path.exists(_path): os.remove(_path) obj.delete()
def create(cls, data: ( str, list, ) = None, containerid: int = None, links: list = None, title: str = None, endpoint: str = None, seed: bool = False): """ Create and save a Data object with all the urls that make it. :param data: :param containerid: :param links: :param title: :param endpoint: :param seed: :return: """ container_obj = Container.get_object(containerid) url_parse = urllib.parse.urlparse(endpoint) obj = cls(title=title, container=container_obj, url=endpoint, seed=seed, hostname=url_parse.hostname) file_path = obj.get_file_path(container=container_obj) obj.file_path = file_path try: hash_text = obj.write_data_to_file(path=file_path, data=data) except DuplicateUrlError as _: return None else: obj.hash_text = hash_text obj.save() for item in links: Link.create(url=item, data=obj) return obj
def prepare_data(self, containerid, data): """ :param containerid: :param data: :return: """ try: container = Container.get_object(pk=containerid) except ValueError: raise Http404(containerid) dataset = list(container.data_set.all()) for item in data: try: rec = next(_ for _ in dataset if _.dataid == item['fileid']) except StopIteration: continue else: del item['fileid'] item['url'] = rec.url item['title'] = rec.title item['pk'] = rec.pk item['created'] = rec.created data.reverse() return data