Exemplo n.º 1
0
 def from_xlrdcell(xlrd_cell, sheet, col, row):
     value = xlrd_cell.value
     cell_type = XLS_TYPES.get(xlrd_cell.ctype, StringType())
     if cell_type == DateType(None):
         if value == 0:
             raise InvalidDateError
         year, month, day, hour, minute, second = \
             xlrd.xldate_as_tuple(value, sheet.book.datemode)
         value = datetime(year, month, day, hour, minute, second)
     messy_cell = XLSCell(value, type=cell_type)
     messy_cell.sheet = sheet
     messy_cell.xlrd_cell = xlrd_cell
     messy_cell.xlrd_pos = (row, col
                            )  # necessary for properties, note not (x,y)
     return messy_cell
Exemplo n.º 2
0
 def raw(self, sample=False):
     """ Iterate over all rows in this sheet. Types are automatically
     converted according to the excel data types specified, including 
     conversion of excel dates, which are notoriously buggy. """
     num_rows = self.sheet.get_highest_row()
     for i in xrange(min(self.window, num_rows) if sample else num_rows):
         row = []
         for cell in self.sheet.rows[i]:
             value = cell.value
             if cell.is_date():
                 type = DateType(None)
             elif cell.data_type == 'n':
                 type = IntegerType()
             else:
                 type = StringType()
             row.append(Cell(value, type=type))
         yield row
Exemplo n.º 3
0
 def raw(self, sample=False):
     """ Iterate over all rows in this sheet. Types are automatically
     converted according to the excel data types specified, including
     conversion of excel dates, which are notoriously buggy. """
     num_rows = self.sheet.nrows
     for i in xrange(min(self.window, num_rows) if sample else num_rows):
         row = []
         for j, cell in enumerate(self.sheet.row(i)):
             value = cell.value
             type = XLS_TYPES.get(cell.ctype, StringType())
             if type == DateType(None):
                 if value == 0:
                     raise ValueError('Invalid date at "%s":%d,%d' %
                                      (self.sheet.name, j + 1, i + 1))
                 year, month, day, hour, minute, second = \
                     xlrd.xldate_as_tuple(value, self.sheet.book.datemode)
                 value = datetime(year, month, day, hour, minute, second)
             row.append(Cell(value, type=type))
         yield row
Exemplo n.º 4
0
from datetime import datetime
import xlrd

from messytables.core import RowSet, TableSet, Cell
from messytables.types import StringType, IntegerType, \
        DateType, FloatType

XLS_TYPES = {
    1: StringType(),
    # NB: Excel does not distinguish floats from integers so we use floats
    # We could try actual type detection between floats and ints later
    # or use the excel format string info - see
    # https://groups.google.com/forum/?fromgroups=#!topic/
    #  python-excel/cAQ1ndsCVxk
    2: FloatType(),
    3: DateType(None),
    # this is actually boolean but we do not have a boolean type yet
    4: IntegerType()
}


class XLSTableSet(TableSet):
    """An excel workbook wrapper object.
    """
    def __init__(self,
                 fileobj=None,
                 filename=None,
                 window=None,
                 encoding=None):
        '''Initilize the tableset.
Exemplo n.º 5
0
from lxml import etree

from messytables.core import RowSet, TableSet, Cell
from messytables.types import (StringType, DecimalType, DateType)

ODS_NAMESPACES_TAG_MATCH = re.compile("(<office:document-content[^>]*>)",
                                      re.MULTILINE)
ODS_TABLE_MATCH = re.compile(".*?(<table:table.*?<\/.*?:table>).*?",
                             re.MULTILINE)
ODS_TABLE_NAME = re.compile('.*?table:name=\"(.*?)\".*?')
ODS_ROW_MATCH = re.compile(".*?(<table:table-row.*?<\/.*?:table-row>).*?",
                           re.MULTILINE)

ODS_TYPES = {
    'float': DecimalType(),
    'date': DateType(None),
}


class ODSTableSet(TableSet):
    """
    A wrapper around ODS files. Because they are zipped and the info we want
    is in the zipped file as content.xml we must ensure that we either have
    a seekable object (local file) or that we retrieve all of the content from
    the remote URL.
    """
    def __init__(self, fileobj, window=None):
        '''Initialize the object.

        :param fileobj: may be a file path or a file-like object. Note the
        file-like object *must* be in binary mode and must be seekable (it will
Exemplo n.º 6
0
Arquivo: ods.py Projeto: bwica/dpusher
VALUE_TYPE = 'value-type'
COLUMN_REPEAT = 'number-columns-repeated'
EMPTY_CELL_VALUE = ''

ODS_VALUE_TOKEN = {
    "float": "value",
    "date": "date-value",
    "time": "time-value",
    "boolean": "boolean-value",
    "percentage": "value",
    "currency": "value"
}

ODS_TYPES = {
    'float': DecimalType(),
    'date': DateType('%Y-%m-%d'),
    'boolean': BoolType(),
    'percentage': PercentageType(),
    'time': TimeType()
}


class ODSTableSet(TableSet):
    """
    A wrapper around ODS files. Because they are zipped and the info we want
    is in the zipped file as content.xml we must ensure that we either have
    a seekable object (local file) or that we retrieve all of the content from
    the remote URL.
    """
    def __init__(self, fileobj, window=None, **kw):
        '''Initialize the object.