Exemplo n.º 1
0
def copy_pdf_id(src, target):
    """
        Copies the PDF is from src to target. Both
        arguments are given as a file path
    """
    src_trailer = PdfReader(src)

    target_trailer = PdfReader(target)
    target_trailer.ID = src_trailer.ID

    PdfWriter(target, trailer=target_trailer).write()
Exemplo n.º 2
0
def refingerprint_pdf(filename, directory, copy_count, suffix):
    try:

        base_dir = os.path.join('/tmp/', directory)

        base_file_path = os.path.join(base_dir, filename)

        #file size kb
        file_size = round(os.path.getsize(base_file_path) / 1024)

        content = PdfReader(base_file_path)

        if content.ID is None:
            file_id = 'No ID'
        else:
            file_id = str(content.ID[0]).replace('<', '').replace('>', '')\
                    .replace('(', '').replace(')', '')

        #bad file_ids can contain strange characters
        try:
            file_id.encode('utf-8').strip()
        except UnicodeDecodeError:
            file_id = 'Unreadable'

        processed_files = []

        for copy_index in range(copy_count):

            if suffix and suffix != '':
                save_filename = filename + '-' + suffix + '-' + str(copy_index + 1) + '.pdf'
            else:
                save_filename = filename + '-' + str(copy_index + 1) + '.pdf'

            file_path = os.path.join('/tmp', directory, save_filename)

            download_link = '/fingerprinter/download/%s?file=%s' % (directory, save_filename)

            content = PdfReader(base_file_path)

            #add some random meta data
            content.Info.randomMetaData = binascii.b2a_hex(os.urandom(20)).upper()

            _filename = filename.strip().encode('utf-8')

            #change id to random id
            md = md5(_filename)

            md.update(str(time.time()).encode('utf-8'))
            md.update(os.urandom(10))

            new_id = md.hexdigest().upper()

            #keep length 32
            new_id = new_id[0:32]

            while len(new_id) < 32:
                new_id += random.choice('0123456789ABCDEF')

            content.ID = [new_id, new_id]

            PdfWriter(file_path, trailer=content).write()

            copy_info = {'filename': save_filename,
                    'download_link': download_link, 'id': content.ID[0]}

            processed_files.append(copy_info)

        #save copy of info in file directory 
        out_file = open(os.path.join(base_dir, 'file_info.json'), 'w')

        file_info = {'filename': filename, 'size': file_size, 'id': file_id,
            'directory_name': directory, 'processed_files': processed_files}

        json.dump(file_info, out_file, indent=4)
      
        out_file.close()

        #delete generated files
        delete_refingerprint.delay(base_dir)

    except Exception as e:
        delete_refingerprint.delay(base_dir)

        raise(e)
Exemplo n.º 3
0
def fingerprinter_upload(request):
    processed_files = []

    pdf_file = request.FILES.get('pdf-file')
    copy_count = request.POST.get('copy-count', 1)
    suffix = request.POST.get('file-suffix', '')

    try:
        copy_count = int(copy_count)
    except:
        copy_count = 1

    if pdf_file is not None:
        #make save directory
        rand_path = randomword(9)
        fingerprint_dir = os.path.join(settings.BASE_DIR, settings.STATIC_ROOT,
                                       'fingerprints', rand_path)

        os.makedirs(fingerprint_dir)

        s = os.path.splitext(pdf_file.name)
        filename = s[0]

        #handle non ascii chars in file name
        #(strangly only wsgi seems to choke on those)
        if isinstance(filename, unicode):
            try:
                filename = unidecode(filename)
            except:
                filename = re.sub(r'[^\x00-\x7F]+', '.', filename)

        extension = s[1]

        file_content = pdf_file.read()

        content = PdfReader(io.BytesIO(file_content))

        if content.ID is None:
            file_id = 'No ID'
        else:
            file_id = str(content.ID[0]).replace('<', '').replace('>', '')\
                    .replace('(', '').replace(')', '')

        #bad file_ids can contain strange characters
        #TODO When we upgrade
        try:
            file_id.encode('utf-8').strip()
        except UnicodeDecodeError:
            file_id = 'Unreadable'

        file_info = {
            'filename': pdf_file.name,
            'size': pdf_file.size,
            'id': file_id,
            'directory_name': rand_path
        }

        for copy_index in range(copy_count):
            if suffix and suffix != '':
                save_filename = filename + '-' + suffix + '-' + str(
                    copy_index + 1) + extension
            else:
                save_filename = filename + '-' + str(copy_index +
                                                     1) + extension

            print('AAA', save_filename)

            file_path = os.path.join(fingerprint_dir, save_filename)

            static_link = os.path.join('/pdf', save_filename)
            download_link = os.path.join('/static/drop-pdf', save_filename)

            content = PdfReader(io.BytesIO(file_content))

            #add some random meta data
            content.Info.randomMetaData = binascii.b2a_hex(
                os.urandom(20)).upper()

            #change id to random id
            md = hashlib.md5(filename)
            md.update(str(time.time()))
            md.update(os.urandom(10))

            new_id = md.hexdigest().upper()

            #keep length 32
            new_id = new_id[0:32]

            while len(new_id) < 32:
                new_id += random.choice('0123456789ABCDEF')

            content.ID = [new_id, new_id]

            PdfWriter(file_path, trailer=content).write()

            #copy file into online annotator with unique name
            annotation_name = filename + '-' + suffix + '-' \
                    + str(copy_index + 1) + '-' + rand_path + extension

            annotation_path = os.path.join(settings.BASE_DIR,
                                           settings.STATIC_ROOT, 'drop-pdf',
                                           annotation_name)

            shutil.copy(file_path, annotation_path)

            #For some reason nested directories do not provide files from static.
            #We need to clean up double "settings" file and sanify the basic setup but
            #For now serve the file from a dedicated URL.

            copy_info = {
                'filename': save_filename,
                'download_path': os.path.join(rand_path, save_filename),
                'docdrop_link': annotation_name,
                'id': content.ID[0]
            }

            processed_files.append(copy_info)

    else:
        raise Http404('file not provided')

    data = {
        'processed_files': processed_files,
        'file_info': file_info,
        'archive_name': filename
    }

    print(data)

    return render_to_response('refingerprint_results.html', data)