def parse( path, tablenames = None, required = not REQUIRED, keycolname = None, tablename2key = lambda x: x, stream_beginTable = None, stream_row = None): f = open(path, 'r') if tablenames: tablenames_found = dict([(tablename, False) for tablename in tablenames]) if not __is_datalib_file(f): raise InvalidFileError(path) version = common_functions.get_version(f.readline()) if version > CURRENT_VERSION: raise InvalidFileError('invalid version (%s)' % version) if version < 3: schema = 'table' colformat = 'fixed' else: schema = common_functions.get_equals_decl(f.readline(), 'schema') colformat = common_functions.get_equals_decl(f.readline(), 'colformat') assert( schema in ['table', 'single'] ) assert( colformat in ['fixed', 'none'] ) class TableResult: def __init__(self): self.tables = {} def beginTable( self, tablename, colnames, coltypes, path, table_index, keycolname): self.table = Table(tablename, colnames, coltypes, path, table_index, keycolname = keycolname) self.tables[tablename2key(tablename)] = self.table def row(self, data): row = self.table.createRow() for col in self.table.columns(): col.set(row.index, data[col.index]) def retval(self): return self.tables class StreamResult: def __init__(self): pass def beginTable(self, tablename, colnames, coltypes, path, table_index, keycolname): if stream_beginTable: stream_beginTable(tablename, colnames, coltypes, path, table_index, keycolname) # a private table used for a row context in callback self.table = Table(tablename, colnames, coltypes, path, table_index, keycolname = keycolname) self.__row = self.table.createRow() def row(self, data): if stream_row: for col in self.table.columns(): col.set(self.__row.index, data[col.index]) stream_row(self.__row) def retval(self): return None if stream_beginTable == None and stream_row == None: result = TableResult() else: result = StreamResult() table_index = -1 if schema == 'single': __seek_meta(f, 'L') colnames = __parse_colnames( f ) coltypes = __parse_coltypes( f ) while True: tablename = __seek_next_tag(f) if not tablename: break table_index += 1 if tablenames: if not tablename in tablenames: __seek_end_tag(f, tablename) continue else: tablenames_found[tablename] = True if schema == 'table': colnames = __parse_colnames( f ) f.readline() # skip blank line coltypes = __parse_coltypes( f ) f.readline() # skip blank line # --- begin table result.beginTable(tablename, colnames, coltypes, path, table_index, keycolname) found_end_tag = False # --- parse data until we reach </name> while True: line = f.readline() tag = __get_end_tag(line) if tag: assert(tag == tablename) found_end_tag = True break data = line.split() if len(data) == 0: raise InvalidFileError("Missing end tag for %s" % tablename) elif len(data) != len(colnames): raise InvalidFileError("Missing data for %s" % tablename) result.row(data) if tablenames and required: for name, found in tablenames_found.items(): if not found: raise MissingTableError('Failed to find %s in %s' % (name, path)) return result.retval();
def parse(path, tablenames=None, required=not REQUIRED, keycolname=None): f = open(path, 'r') if tablenames: tablenames_found = dict([(tablename, False) for tablename in tablenames]) if not __is_datalib_file(f): raise InvalidFileError(path) version = common_functions.get_version(f.readline()) if version > CURRENT_VERSION: raise InvalidFileError('invalid version (%s)' % version) tables = {} table_index = -1 while True: tablename = __seek_next_tag(f) if not tablename: break table_index += 1 if tablenames: if not tablename in tablenames: __seek_end_tag(f, tablename) continue else: tablenames_found[tablename] = True # --- read column names colnames = f.readline().split() if len(colnames) == 0: raise InvalidFileError('expecting column labels') elif colnames[0] != COLUMN_LABEL_MARKER: raise InvalidFileError('unexpected token (%s)' % colnames[0]) colnames.pop(0) # remove marker f.readline() # skip blank line # --- read column types coltypes = f.readline().split() if len(coltypes) == 0: raise InvalidFileError('expecting column types') elif coltypes[0] != COLUMN_TYPE_MARKER: raise InvalidFileError('unexpected token (%s)' % coltypes[0]) coltypes.pop(0) # remove marker f.readline() # skip blank line # --- construct table object table = Table(tablename, colnames, coltypes, path, table_index, keycolname=keycolname) tables[tablename] = table found_end_tag = False # --- parse data until we reach </name> while True: line = f.readline() tag = __get_end_tag(line) if tag: assert (tag == tablename) found_end_tag = True break data = line.split() if len(data) == 0: raise InvalidFileError("Missing end tag for %s" % tablename) elif len(data) != len(colnames): raise InvalidFileError("Missing data for %s" % tablename) row = table.createRow() for col in table.columns(): col.set(row.index, data[col.index]) if tablenames and required: for name, found in tablenames_found.items(): if not found: raise MissingTableError('Failed to find %s in %s' % (name, path)) return tables
def parse(path, tablenames = None, required = not REQUIRED, keycolname = None): f = open(path, 'r') if tablenames: tablenames_found = dict([(tablename, False) for tablename in tablenames]) if not __is_datalib_file(f): raise InvalidFileError(path) version = common_functions.get_version(f.readline()) if version > CURRENT_VERSION: raise InvalidFileError('invalid version (%s)' % version) tables = {} table_index = -1 while True: tablename = __seek_next_tag(f) if not tablename: break table_index += 1 if tablenames: if not tablename in tablenames: __seek_end_tag(f, tablename) continue else: tablenames_found[tablename] = True # --- read column names colnames = f.readline().split() if len(colnames) == 0: raise InvalidFileError('expecting column labels') elif colnames[0] != COLUMN_LABEL_MARKER: raise InvalidFileError('unexpected token (%s)' % colnames[0]) colnames.pop(0) # remove marker f.readline() # skip blank line # --- read column types coltypes = f.readline().split() if len(coltypes) == 0: raise InvalidFileError('expecting column types') elif coltypes[0] != COLUMN_TYPE_MARKER: raise InvalidFileError('unexpected token (%s)' % coltypes[0]) coltypes.pop(0) # remove marker f.readline() # skip blank line # --- construct table object table = Table(tablename, colnames, coltypes, path, table_index, keycolname = keycolname) tables[tablename] = table found_end_tag = False # --- parse data until we reach </name> while True: line = f.readline() tag = __get_end_tag(line) if tag: assert(tag == tablename) found_end_tag = True break data = line.split() if len(data) == 0: raise InvalidFileError("Missing end tag for %s" % tablename) elif len(data) != len(colnames): raise InvalidFileError("Missing data for %s" % tablename) row = table.createRow() for col in table.columns(): col.set(row.index, data[col.index]) if tablenames and required: for name, found in tablenames_found.items(): if not found: raise MissingTableError('Failed to find %s in %s' % (name, path)) return tables