Exemplo n.º 1
0
def redact_attachment(request, slug, attachment_id):
    foirequest = get_object_or_404(FoiRequest, slug=slug)

    if not can_write_foirequest(foirequest, request):
        return render_403(request)

    attachment = get_object_or_404(FoiAttachment, pk=int(attachment_id),
            belongs_to__request=foirequest)

    already = None
    if attachment.redacted:
        already = attachment.redacted
    elif attachment.is_redacted:
        already = attachment

    if request.method == 'POST':
        # Python 2.7/3.5 requires str for json.loads
        instructions = json.loads(request.body.decode('utf-8'))
        path = redact_file(attachment.file.file, instructions)
        if path is None:
            return render_400(request)
        name = attachment.name.rsplit('.', 1)[0]
        name = re.sub(r'[^\w\.\-]', '', name)
        if already:
            att = already
        else:
            att = FoiAttachment(
                belongs_to=attachment.belongs_to,
                name=_('%s_redacted.pdf') % name,
                is_redacted=True,
                filetype='application/pdf',
                approved=True,
                can_approve=True
            )
        with open(path, 'rb') as f:
            pdf_file = File(f)
            att.file = pdf_file
            att.size = pdf_file.size
            att.approve_and_save()

        if not attachment.is_redacted:
            attachment.redacted = att
            attachment.can_approve = False
            attachment.approved = False
            attachment.save()
        return JsonResponse({'url': att.get_anchor_url()})

    attachment_url = get_accessible_attachment_url(foirequest, attachment)

    ctx = {
        'foirequest': foirequest,
        'attachment': attachment,
        'attachment_url': attachment_url,
        'config': json.dumps(get_redact_context(foirequest, attachment))
    }

    return render(request, 'foirequest/redact.html', ctx)
Exemplo n.º 2
0
def redact_attachment(request, slug, attachment_id):
    foirequest = get_object_or_404(FoiRequest, slug=slug)
    if not request.user.is_staff and not request.user == foirequest.user:
        return render_403(request)
    attachment = get_object_or_404(FoiAttachment,
                                   pk=int(attachment_id),
                                   belongs_to__request=foirequest)
    if not attachment.can_approve and not request.user.is_staff:
        return render_403(request)
    already = None
    if attachment.redacted:
        already = attachment.redacted
    elif attachment.is_redacted:
        already = attachment

    if already is not None and not already.can_approve and not request.user.is_staff:
        return render_403(request)

    if request.method == 'POST':
        # Python 2.7/3.5 requires str for json.loads
        instructions = json.loads(request.body.decode('utf-8'))
        path = redact_file(attachment.file.file, instructions)
        if path is None:
            return render_400(request)
        name = attachment.name.rsplit('.', 1)[0]
        name = re.sub(r'[^\w\.\-]', '', name)
        pdf_file = File(open(path, 'rb'))
        if already:
            att = already
        else:
            att = FoiAttachment(belongs_to=attachment.belongs_to,
                                name=_('%s_redacted.pdf') % name,
                                is_redacted=True,
                                filetype='application/pdf',
                                approved=True,
                                can_approve=True)
        att.file = pdf_file
        att.size = pdf_file.size
        att.approve_and_save()
        if not attachment.is_redacted:
            attachment.redacted = att
            attachment.can_approve = False
            attachment.approved = False
            attachment.save()
        return JsonResponse({'url': att.get_anchor_url()})
    return render(request, 'foirequest/redact.html', {
        'foirequest': foirequest,
        'attachment': attachment
    })
Exemplo n.º 3
0
def redact_attachment_task(att_id, target_id, instructions):
    try:
        attachment = FoiAttachment.objects.get(pk=att_id)
    except FoiAttachment.DoesNotExist:
        return

    if att_id != target_id:
        try:
            target = FoiAttachment.objects.get(pk=target_id)
        except FoiAttachment.DoesNotExist:
            return
    else:
        target = attachment

    logger.info('Trying redaction of %s', attachment.id)

    try:
        pdf_bytes = redact_file(attachment.file, instructions)
    except Exception:
        logger.error("PDF redaction error", exc_info=True)
        pdf_bytes = None

    if pdf_bytes is None:
        logger.info('Redaction failed %s', attachment.id)
        # Redaction has failed, remove empty attachment
        if attachment.redacted:
            attachment.redacted = None
        if attachment.is_redacted:
            attachment.approved = True
            attachment.can_approve = True
        attachment.pending = False
        attachment.save()

        if not target.file:
            target.delete()
        return

    logger.info('Redaction successful %s', attachment.id)
    pdf_file = ContentFile(pdf_bytes)
    target.size = pdf_file.size
    target.file.save(target.name, pdf_file, save=False)

    logger.info('Trying OCR %s', target.id)

    try:
        pdf_bytes = run_ocr(target.file.path,
                            language=settings.LANGUAGE_CODE,
                            timeout=60 * 4)
    except SoftTimeLimitExceeded:
        pdf_bytes = None

    if pdf_bytes is not None:
        logger.info('OCR successful %s', target.id)
        pdf_file = ContentFile(pdf_bytes)
        target.size = pdf_file.size
        target.file.save(target.name, pdf_file, save=False)
    else:
        logger.info('OCR failed %s', target.id)

    target.can_approve = True
    target.pending = False
    target.approve_and_save()