def safe_filename(fn): fn = basename(fn) if not isinstance(fn, unicode): try: fn = unicode(fn, sys.getfilesystemencoding()) except UnicodeDecodeError: fn = unicode(fn, 'utf8', 'replace') return fn.encode('cp437', 'replace')
def _request_files(files=None): if files: for elt in files: yield elt return fn = None if SAVE_REQ_DIR: with _SRD_lock: fns = os.listdir(SAVE_REQ_DIR) if len(fns) > SAVE_REQ_DIR_THRESHOLD: for x in sorted(fns)[:SAVE_REQ_DIR_THRESHOLD // 2]: unlink(os.path.join(SAVE_REQ_DIR, x)) fn = os.path.join(SAVE_REQ_DIR, str(int(time.time() * 1000)) + '--' + ('%06d' % REQ_COUNTER) + urllib.quote_plus(request.url, safe='')) import gzip files = ((stor.stream, stor.mimetype, stor.name) for _, stor in request.files.iteritems(multi=True)) try: first = files.next() files = itertools.chain([first], files) except StopIteration: # nothing there LOG.warn('empty "files"') if request.data: import re rgx = re.compile('''filename=['"](.*?)["']''') files = [(StringIO(request.data), request.headers['Content-Type'], rgx.search(request.headers.get('Content-Disposition', 'filename=""')).group(1))] LOG.info('files=%r', files) i = 0 for stream, mimetype, name in files: if len(stream.read(1)) == 1: # skip empty files stream.seek(0) if fn: with gzip.open(fn + ('--file%02d' % i) + '.dmp.gz', 'wb') as fh: while 1: chunk = stream.read(65536) if not chunk: break fh.write(chunk) stream.seek(0) i += 1 yield (stream, mimetype, basename(name))
def convert_to_pdf(files=None, splitted=None, return_errors=None, dest_typ=None): '''yields PDF streams''' if splitted is None: splitted = request.args.get('splitted', '0') == '1' if return_errors is None: return_errors = request.args.get('errors', '0') == '1' errors = [] for stream, mimetype, name in request_files(files): name = basename(name) LOG.debug('to_pdf from=%s, stream=%r (%s)', mimetype, stream, name) res = None try: res = pdf.to_pdf(stream, mimetype, splitted=splitted) LOG.debug('to_pdf from=%s => %s', mimetype, repr(res)[:100]) except pdf.NonConvertible: errors.append((stream, mimetype, name)) LOG.warn('%r not convertible to pdf', errors[-1]) except: LOG.exception('error converting %s to pdf', repr(stream)[:100]) errors.append((stream, mimetype, name)) continue if not res: LOG.error('conversion of %r(%s) failed: %r', mimetype, stream, res) continue for i, chunk in enumerate(res): if not chunk: LOG.warn('chunk %r !?', chunk) continue if not pdf.assert_pdf(chunk, extra='%s(%s): %s/%d' % (stream, mimetype, name, i), raise_exc=False): LOG.warn('part %d is NOT PDF!', i) errors.append((stream, mimetype, name)) length = (len(chunk) if (pdf.MIN_PDF_SIZE > 0 and hasattr(chunk, '__len__')) else -1) LOG.debug('to_pdf yields %s (%d)', repr(chunk)[:100], length) if length < 0 or length > pdf.MIN_PDF_SIZE: yield chunk else: errors.append((stream, mimetype, name)) LOG.info('to_pdf done, errcount=%d', len(errors)) if return_errors and errors: yield errors