示例#1
0
文件: misc.py 项目: ProgVal/datautil
def pivot(table, left, top, value):
    """Unnormalize (pivot) a normalised input set of tabular data.

    @param table: simple list of lists or a L{TabularData} object.
    
    Eg. To transform the tabular data like
    
    Name,   Year,  Value
    -----------------------
    'x', 2004, 1
    'y', 2004, 2
    'x', 2005, 3
    'y', 2005, 4
    
    into the new list:
    
    Year, 'x', 'y'
    ------------------------
    2004, 1, 2
    2005, 3, 4
    
    you would do:

        pivot(tabulardata, 1, 0, 2)

        OR (requires header to exist):

        pivot(tabulardata, 'Year', 'Name', 'Value')
    """
    if not isinstance(left, int):
        left = table.header.index(left)
    if not isinstance(top, int):
        top = table.header.index(top)
    if not isinstance(value, int):
        value = table.header.index(value)

    rs = TabularData()
    # construct double dict keyed by left values
    tdict = {}
    xvals = set()
    yvals = set()
    for row in table:
        xval = row[left]
        if not xval in tdict:
            tdict[xval] = {}
        tdict[xval][row[top]] = row[value]
        xvals.add(xval)
        yvals.add(row[top])
    xvals = sorted(list(xvals))
    yvals = sorted(list(yvals))
    xhead = 'X'
    if hasattr(table, 'header') and table.header:
        xhead = table.header[left]
    rs.header = [ xhead ] + yvals
    rs.data = [ [x] + [ tdict[x].get(y, '') for y in yvals ] for x in xvals ]
    return rs
示例#2
0
def pivot(table, left, top, value):
    """Unnormalize (pivot) a normalised input set of tabular data.

    @param table: simple list of lists or a L{TabularData} object.
    
    Eg. To transform the tabular data like
    
    Name,   Year,  Value
    -----------------------
    'x', 2004, 1
    'y', 2004, 2
    'x', 2005, 3
    'y', 2005, 4
    
    into the new list:
    
    Year, 'x', 'y'
    ------------------------
    2004, 1, 2
    2005, 3, 4
    
    you would do:

        pivot(tabulardata, 1, 0, 2)

        OR (requires header to exist):

        pivot(tabulardata, 'Year', 'Name', 'Value')
    """
    if not isinstance(left, int):
        left = table.header.index(left)
    if not isinstance(top, int):
        top = table.header.index(top)
    if not isinstance(value, int):
        value = table.header.index(value)

    rs = TabularData()
    # construct double dict keyed by left values
    tdict = {}
    xvals = set()
    yvals = set()
    for row in table:
        xval = row[left]
        if not xval in tdict:
            tdict[xval] = {}
        tdict[xval][row[top]] = row[value]
        xvals.add(xval)
        yvals.add(row[top])
    xvals = sorted(list(xvals))
    yvals = sorted(list(yvals))
    xhead = 'X'
    if hasattr(table, 'header') and table.header:
        xhead = table.header[left]
    rs.header = [xhead] + yvals
    rs.data = [[x] + [tdict[x].get(y, '') for y in yvals] for x in xvals]
    return rs
示例#3
0
 def handle_endtag(self, tag):
     if tag == 'tr':
         self._rows.append(self._row)
     if tag == 'td' or tag == 'th':
         self._row.append(self._text)
     if tag == 'table':
         self.tables.append(TabularData(data=self._rows))
         self._rows = []
示例#4
0
    def read(self, fileobj=None, sheet_index=0):
        '''Read an excel file (provide as fileobj) and return the specified
        sheet as a L{TabularData} object.

        For convenience also store:

        self.book: xlrd WorkBook object
        
        @return L{TabularData} object.
        '''
        super(XlsReader, self).read(fileobj)
        if fileobj:
            self.book = xlrd.open_workbook(file_contents=self.fileobj.read())
        tab = TabularData()
        booksheet = self.book.sheet_by_index(sheet_index)
        data = self.extract_sheet(booksheet, self.book)
        tab.data = data
        return tab
示例#5
0
文件: xls.py 项目: ProgVal/datautil
    def read(self, fileobj=None, sheet_index=0):
        '''Read an excel file (provide as fileobj) and return the specified
        sheet as a L{TabularData} object.

        For convenience also store:

        self.book: xlrd WorkBook object
        
        @return L{TabularData} object.
        '''
        super(XlsReader, self).read(fileobj)
        if fileobj:
            self.book = xlrd.open_workbook(file_contents=self.fileobj.read())
        tab = TabularData()
        booksheet = self.book.sheet_by_index(sheet_index)
        data = self.extract_sheet(booksheet, self.book)
        tab.data = data
        return tab
示例#6
0
    def read(self, sheet_name='Sheet1'):
        '''Load the specified google spreadsheet worksheet as a L{TabularData}
        object.

        @return L{TabularData} object.
        '''
        text_db_table = self.load_text_db_table(sheet_name)
        tdata = TabularData()
        text_db_table.LookupFields()
        tdata.header = text_db_table.fields
        # finds all records it seems
        rows = text_db_table.FindRecords('')
        for row in rows:
            rowdata = []
            for colname in tdata.header:
                rowdata.append(row.content[colname])
            tdata.data.append(rowdata)
        return tdata
示例#7
0
文件: gdocs.py 项目: ProgVal/datautil
    def read(self, sheet_name='Sheet1'):
        '''Load the specified google spreadsheet worksheet as a L{TabularData}
        object.

        @return L{TabularData} object.
        '''
        text_db_table = self.load_text_db_table(sheet_name)
        tdata = TabularData()
        text_db_table.LookupFields()
        tdata.header = text_db_table.fields
        # finds all records it seems
        rows = text_db_table.FindRecords('')
        for row in rows:
            rowdata = []
            for colname in tdata.header:
                rowdata.append(row.content[colname])
            tdata.data.append(rowdata)
        return tdata
示例#8
0
    def read(self, filepath_or_fileobj=None):
        '''Read JSON encoded data from source into a L{TabularData} object.

        JSON encoded data should either be:
            * dict (with header and data attributes)
            * list (first row assumed to be the header)

        @return L{TabularData}
        '''
        super(JsonReader, self).read(filepath_or_fileobj)
        jsondata = json.load(self.fileobj)
        if isinstance(jsondata, dict):
            return TabularData(header=jsondata.get('header', None),
                    data=jsondata.get('data', None)
                    )
        elif isinstance(jsondata, list):
            return TabularData(header=jsondata[0], data=jsondata[1:])
        else:
            raise Exception('Cannot load TabularData from %s' % jsondata)