def generic_write_rows_to_sheet(rows, sheet): for row,values in enumerate(rows): for i, val in enumerate(values): val = csvutils.csv_convert(val, delimiter=LIST_DELIMITER_XLS) if val is not None: if len(val) > 32767: logger.error('warn, row too long, %d, key: %r, len: %d', row,key,len(val) ) sheet.write_string(row,i,val)
def generic_write_rows_to_sheet(rows, sheet): for row, values in enumerate(rows): for i, val in enumerate(values): val = csvutils.csv_convert(val, delimiter=LIST_DELIMITER_XLS) if val is not None: if len(val) > 32767: logger.error('warn, row too long, %d, key: %r, len: %d', row, key, len(val)) sheet.write_string(row, i, val)
def write_rows_to_sheet(wb, sheet_rows, sheet_basename, request=None, image_keys=None, title_function=None, list_brackets=None): ''' ***WARNING*** xlsx files load fully into memory on display - if there are >~ 2000 images, this will cause performance issues on the client.*** @param sheet_rows iterable of dicts, one per row ''' max_rows_per_sheet = 2**20 sheet_name = sheet_basename sheet = wb.add_worksheet(sheet_name) filerow = 0 sheets = 1 for row, values in enumerate(sheet_rows): if filerow >= max_rows_per_sheet: sheet_name = '%s_%d' % (sheet_basename, sheets) logger.info('rows: %d, max_rows_per_sheet: %d, adding sheet: %s', row, max_rows_per_sheet, sheet_name) sheet = wb.add_worksheet(sheet_name) filerow = 0 sheets += 1 if filerow == 0: for i, (key, val) in enumerate(values.items()): if title_function: key = title_function(key) sheet.write_string(filerow, i, key) filerow += 1 for i, (key, val) in enumerate(values.items()): val = csvutils.csv_convert(val, delimiter=LIST_DELIMITER_XLS, list_brackets=list_brackets) if val is not None: if len(val) > 32767: logger.error('warn, row too long, %d, key: %r, len: %d', row, key, len(val)) if image_keys and key in image_keys: max_rows_per_sheet = MAX_IMAGE_ROWS_PER_XLS_FILE # hack to speed things up: if (key == 'structure_image' and 'library_well_type' in values and values['library_well_type'].lower() == 'empty'): continue write_xls_image(sheet, filerow, i, val, request) else: sheet.write_string(filerow, i, val) filerow += 1 if row % 10000 == 0: logger.info('wrote %d rows to temp file', row)
def write_rows_to_sheet(wb, sheet_rows, sheet_basename, request=None, image_keys=None, title_function=None, list_brackets=None): ''' ***WARNING*** xlsx files load fully into memory on display - if there are >~ 2000 images, this will cause performance issues on the client.*** @param sheet_rows iterable of dicts, one per row ''' max_rows_per_sheet = 2**20 sheet_name = sheet_basename sheet = wb.add_worksheet(sheet_name) filerow = 0 sheets = 1 for row,values in enumerate(sheet_rows): if filerow >= max_rows_per_sheet: sheet_name = '%s_%d' % (sheet_basename,sheets) logger.info('rows: %d, max_rows_per_sheet: %d, adding sheet: %s', row,max_rows_per_sheet, sheet_name) sheet = wb.add_worksheet(sheet_name) filerow = 0 sheets += 1 if filerow == 0: for i,(key,val) in enumerate(values.items()): if title_function: key = title_function(key) sheet.write_string(filerow,i,key) filerow += 1 for i, (key,val) in enumerate(values.items()): val = csvutils.csv_convert( val, delimiter=LIST_DELIMITER_XLS, list_brackets=list_brackets) if val is not None: if len(val) > 32767: logger.error('warn, row too long, %d, key: %r, len: %d', row,key,len(val) ) if image_keys and key in image_keys: max_rows_per_sheet = MAX_IMAGE_ROWS_PER_XLS_FILE # hack to speed things up: if ( key == 'structure_image' and 'library_well_type' in values and values['library_well_type'].lower() == 'empty' ): continue write_xls_image(sheet, filerow, i, val, request) else: sheet.write_string(filerow,i,val) filerow += 1 if row % 10000 == 0: logger.info('wrote %d rows to temp file', row)
def csv_generator(data, title_function=None, list_brackets=None): pseudo_buffer = Echo() quotechar = b'"' # note that csv under python 2.7 doesn't allow multibyte quote char csvwriter = csv.writer( pseudo_buffer, delimiter=CSV_DELIMITER, quotechar=quotechar, quoting=csv.QUOTE_ALL, lineterminator="\n") try: for rownum, row in enumerate(data): if rownum == 0: titles = row.keys() if title_function: titles = [title_function(key) for key in titles] yield csvwriter.writerow(titles) yield csvwriter.writerow([ csvutils.csv_convert(val, list_brackets=list_brackets) for val in row.values()]) logger.debug('wrote %d rows to csv', rownum) except Exception, e: logger.exception('CSV streaming error') raise e
def to_csv(self, data, root='objects', options=None): data = to_simple(data) raw_data = cStringIO.StringIO() writer = csv.writer(raw_data) if 'error' in data: for row in dict_to_rows(data['error']): writer.writerow(row) # writer.writerow(['error']) # writer.writerow([data['error']]) # logger.warn(str(('error', data))) return raw_data.getvalue() if 'objects' in data: data = data['objects'] if len(data) == 0: return data if isinstance(data, dict): # usually, this happens when the data is actually an error message; # but also, it could be just one item being returned data = dict_to_rows(data) for item in data: writer.writerow(item) else: i = 0 keys = None for item in data: if i == 0: keys = item.keys() writer.writerow([smart_text(key) for key in keys]) i += 1 writer.writerow([csvutils.csv_convert(val) for val in item.values()]) return raw_data.getvalue()
def get_xls_response( data, output_filename,request=None,image_keys=None, title_function=None, list_brackets=None): ''' Create an xlsx file that will be streamed through the StreamingHttpResponse. - if length exceeds MAX_ROWS_PER_XLS_FILE, create multiple files and zip them. - TODO: when using xlsx, can simply add extra sheets to the file. @param output_filename - for naming temp files FIXME: wrap cursor with cursorgenerator; pass in the image columns as arg FIXME: rework this using the generic_xlsx_response as a template: - this method is used for all xlsx serialization at this time, except for in testing, and in ScreenResultSerializer - 20160419. ''' if not isinstance(data, dict): raise BadRequest( 'unknown data for xls serialization: %r, must be a dict of ' 'sheet_row entries' % type(data)) # create a temp dir # with TemporaryFile() as f: temp_dir = os.path.join( settings.TEMP_FILE_DIR, str(time.clock()).replace('.', '_')) os.mkdir(temp_dir) try: # Create an new Excel file and add a worksheet. filename = '%s.xlsx' % (output_filename) temp_file = os.path.join(temp_dir, filename) file_names_to_zip = [temp_file] if DEBUG_STREAMING: logger.info('temp file: %r', temp_file) workbook = xlsxwriter.Workbook(temp_file, {'constant_memory': True}) for key, sheet_rows in data.items(): logger.info('type sheet_rows: %r', type(sheet_rows)) if isinstance(sheet_rows, (dict, OrderedDict)): sheet_name = default_converter(key) logger.info('writing sheet %r...', sheet_name) sheet = workbook.add_worksheet(sheet_name) for i, row in enumerate(csvutils.dict_to_rows(sheet_rows)): sheet.write_row(i,0,row) elif isinstance(sheet_rows, basestring): sheet_name = default_converter(key) logger.info('writing single string sheet %r...', sheet_name) sheet = workbook.add_worksheet(sheet_name) sheet.write_string(0,0,sheet_rows) else: sheet_name = default_converter(key) logger.info('writing sheets for base name %r...', sheet_name) max_rows_per_sheet = 2**20 sheet = workbook.add_worksheet(sheet_name) filerow = 0 sheets = 1 for row,values in enumerate(sheet_rows): if filerow == 0: for i,(key,val) in enumerate(values.items()): title = key if title_function: title = title_function(key) sheet.write_string(filerow,i,title) filerow += 1 for i, (key,val) in enumerate(values.items()): val = csvutils.csv_convert( val, delimiter=LIST_DELIMITER_XLS, list_brackets=list_brackets) if val is not None: if len(val) > 32767: logger.error('warn, row too long, %d, key: %r, len: %d', row,key,len(val) ) if image_keys and key in image_keys: max_rows_per_sheet = MAX_IMAGE_ROWS_PER_XLS_FILE if not request: raise Exception( 'must specify the request parameter for image export') # hack to speed things up: if ( key == 'structure_image' and 'library_well_type' in values and values['library_well_type'].lower() == 'empty' ): continue write_xls_image(sheet, filerow, i, val, request) else: sheet.write_string(filerow,i,val) filerow += 1 if row % 10000 == 0: logger.info('wrote %d rows to temp file', row) if filerow > max_rows_per_sheet: workbook.close() logger.info('wrote file: %r', temp_file) # Create an new Excel file and add a worksheet. filename = '%s_%s.xlsx' % (output_filename, filerow) temp_file = os.path.join(temp_dir, filename) workbook = xlsxwriter.Workbook(temp_file, {'constant_memory': True}) sheet = workbook.add_worksheet(sheet_name) file_names_to_zip.append(temp_file) filerow = 0 workbook.close() logger.info('wrote file: %r', temp_file) content_type = '%s; charset=utf-8' % XLSX_MIMETYPE if len(file_names_to_zip) >1: # create a temp zip file content_type='application/zip; charset=utf-8' temp_file = os.path.join('/tmp',str(time.clock())) logger.info('temp ZIP file: %r', temp_file) with ZipFile(temp_file, 'w') as zip_file: for _file in file_names_to_zip: zip_file.write(_file, os.path.basename(_file)) logger.info('wrote file %r', temp_file) filename = '%s.zip' % output_filename _file = file(temp_file) logger.info('download tmp file: %r, %r',temp_file,_file) wrapper = FileWrapper(_file) response = StreamingHttpResponse( wrapper, content_type=content_type) response['Content-Length'] = os.path.getsize(temp_file) response['Content-Disposition'] = \ 'attachment; filename=%s' % filename return response except Exception, e: logger.exception('xls streaming error') raise e