Exemplo n.º 1
0
def uploaded_new_derived_document(file):
    """
    Like `uploaded_new_derived_document' this creates the 'DerivedBlob'
    object and a 'DerivedDocument' object.

    The `DerivedDocument` object is returned, you need to fill in
    `derived_from` and `index`.
    """
    file_type, md5_sum = identify_and_md5(file)
    try:
        blob = DerivedBlob.objects.get(md5_sum=md5_sum)
    except DerivedBlob.DoesNotExist:
        blob = DerivedBlob(md5_sum=md5_sum, file=file, file_type=file_type)
        blob.upload_to_url = get_path(file_type)
        blob.save()

    document = DerivedDocument()
    document._blob = blob

    return document
Exemplo n.º 2
0
    def test_auto_derived_blob_creation(self):
        """
        Test that a blob is created when using the file attribue on a derived
        document.
        """
        doc = Document(title='New Doc', file_name='A File', file=self.upload,
                                                                author=self.u1)
        doc.save()
        derived = DerivedDocument(derived_from=doc._blob, index=0,
                                                        file=self.upload2)
        derived.save()

        self.upload2.seek(0)
        self.upload.seek(0)

        self.assertEqual(derived.file.read(), self.upload2.read())
        self.assertEqual(DerivedBlob.objects.count(), 1)

        self.upload2.seek(0)
        derived.file.seek(0)

        self.assertEqual(DerivedBlob.objects.all()[0].file.read(),
                                                        self.upload2.read())
Exemplo n.º 3
0
def create_pdf(doc):
    log.info('PDF Conversion start')
    blob = doc._blob
    orig_file = blob.file

    log.info('Starting conversion of %s to PDF' % blob)

    # Check for derived files of PDF type
    if blob.documents.filter(file_type='pdf'):
        log.info('%s is a PDF, no need to convert' % blob)
        return False
    elif blob.derived_documents.filter(file_type='pdf'):
        log.info('%s has derived PDF, no need to convert' % blob)
        return False

    tempd_loc = tempfile.mkdtemp()
    temp_file, tempf_loc = tempfile.mkstemp(dir=tempd_loc)

    temp_file = os.fdopen(temp_file, "wb")

    for chunk in orig_file.chunks():
        temp_file.write(chunk)
    temp_file.close()

    try:
        port = get_free_port()

        log.info("Launching unoconv with port %s" % port)

        proc = subprocess.Popen(UNOCONV_CALL % (port, tempd_loc, tempf_loc),
                                #stderr=subprocess.PIPE,
                                #stdout=subprocess.PIPE,
                                shell=True)

        # Create and start a watchdog thread
        t = threading.Timer(TIMEOUT, timeout, [proc, port])
        t.start()

        stderr, stdout = None, None#proc.communicate()
        proc.wait()

        t.cancel()

        if proc.returncode != 0:
            error = subprocess.CalledProcessError(proc.returncode,
                                                  UNOCONV_CALL % (
                                                  port, tempd_loc, tempf_loc))
            error.output = "%s %s" % (stderr, stdout)
            raise error

        log.info("unoconv (port %s) output: %s %s" % (port, stderr, stdout))

    except subprocess.CalledProcessError as e:
        log.error('unoconv (port %s) returned a non-zero exit status: %s' % (port, e.output))

    os.unlink(tempf_loc)

    files = os.listdir(tempd_loc)

    for pdf in files:
        if pdf.lower().endswith('.pdf'):
            break
    else:
        shutil.rmtree(tempd_loc, True)
        raise ConversionError('Unable to find PDF file')

    pdf = os.path.abspath(os.path.join(tempd_loc, pdf))
    pdf = open(pdf, 'rb')
    filename = os.path.basename(orig_file.name)
    filename = os.path.splitext(filename)[0] + '.pdf'

    doc = DerivedDocument(derived_from=blob)
    doc.file = UploadedFile(pdf, filename, 'application/pdf', 0, None)
    doc.index = type_to_priorty('pdf')

    # # Do one last check before saving the blob, just incase this task got fired
    # # twice in quick succession.

    if blob.derived_documents.filter(file_type='pdf'):
        return False
    else:
        doc.save()

    pdf.close()

    shutil.rmtree(tempd_loc, True)

    log.info("Convertion of %s complete" % blob)

    return True
Exemplo n.º 4
0
def create_pngs(doc, type='pngs'):
    blob = doc._blob
    log.info('Starting png generation of: %s', doc)

    # Check to make sure that we don't already have a pngs pack
    # Check for derived files of PDF type
    if doc.file_type == 'png':
        log.info('%s is a PNG, no need to convert' % blob)
        return False
    elif blob.derived_documents.filter(file_type='png'):
        log.info('%s has derived PNG, no need to convert' % blob)
        return False

    # Locate a pdf file
    if doc.type == 'pdf':
        pdf = doc
    else:
        pdf = doc.get_derived_documents_of_type('pdf')
        if pdf:
            pdf = pdf[0]

    if not pdf:
        log.info("No PDF avaliable for %s" % blob)
        return

    # Create a temp folder
    temp_folder = tempfile.mkdtemp()
    log.debug('working with: %s', temp_folder)

    file = tempfile.NamedTemporaryFile(dir=temp_folder, delete=False)

    for data in pdf.file.chunks():
        file.write(data)
    file.close()

    # Now call ghostscript
    return_code = subprocess.call(["gs", "-sDEVICE=png16m",
        "-sOutputFile=%s/slide-%s.png" % (temp_folder, '%03d'),
        "-r600", "-dNOPAUSE", "-dBATCH", "-dMaxBitmap=1000000000",
        #"-dFirstPage=1", "-dLastPage=1",
        "%s" % file.name])

    if return_code != 0:
        log.error('Ghostscript error')
        # Clean up
        shutil.rmtree(temp_folder)
        create_pngs.retry()

    # Process the generated files with PIL

    # First generate a list of file in the tempdir
    compiled_regex = re.compile('^slide-(\w+).png$')
    scaled_images = {}
    for file in os.listdir(temp_folder):
        # Check using regex
        match = re.match(compiled_regex, file)
        if match:
            log.debug('scaling image: %s', file)
            order = int(match.group(1))

            # Resize using PIL
            slide = Image.open(os.path.join(temp_folder, file))
            slide.thumbnail((1920, 1200), Image.ANTIALIAS)

            new_filename = os.path.join(temp_folder, 'slide-scaled-%03d.png' % order)
            slide.save(new_filename)
            scaled_images[order] = new_filename

    # Make sure that the order starts at 0 and has no gaps
    new_images = {}
    order = 0
    sorted_keys = scaled_images.keys()
    sorted_keys.sort()
    for item in [scaled_images[key] for key in sorted_keys]:
        new_images[order] = item
        order += 1
    scaled_images = new_images


    # Before uploading check that there are still no other pngs up there.
    if blob.derived_documents.filter(file_type='png'):
        log.info('%s has derived PNG now, canceling upload' % blob)
        return False

    # Now go through all the generated slides and upload
    # Create a new derivedfile pack
    try:
        for order, filename in scaled_images.iteritems():
            file = open(filename, 'rb')

            parts = os.path.split(filename)
            filename = os.path.join(parts[-2], '%s_%s' % (
                doc.file_name[0:60], parts[-1]))

            upfile = TemporaryUploadedFile(filename, 'image/png', 0, None)
            upfile.file = file

            derived_doc = DerivedDocument(derived_from=blob)
            derived_doc.file = upfile
            derived_doc.index = order

            derived_doc.save()
    except:
        log.error(filename)
        raise

    shutil.rmtree(temp_folder)

    return True