def initialize(self): """Initialize CSV source stream: #. perform autodetection if required: #. detect encoding from a sample data (if requested) #. detect whether CSV has headers from a sample data (if requested) #. create CSV reader object #. read CSV headers if requested and initialize stream fields """ self.file, self.close_file = base.open_resource(self.resource) handle = None if self._autodetection: sample = self.file.read(self.sample_size) # Encoding test if self.detect_encoding and type(sample) == unicode: self.encoding = "utf-8" if self.detect_header: sample = sample.encode('utf-8') sniffer = csv.Sniffer() self.read_header = sniffer.has_header(sample) self.file.seek(0) if self.dialect: if type(self.dialect) == str: dialect = csv.get_dialect(self.dialect) else: dialect = self.dialect self.reader_args["dialect"] = dialect # self.reader = csv.reader(handle, **self.reader_args) self.reader = UnicodeReader(self.file, encoding = self.encoding, **self.reader_args) if self.skip_rows: for i in range(0, self.skip_rows): self.reader.next() # Initialize field list if self.read_header: field_names = self.reader.next() fields = [ (name, "string", "default") for name in field_names] self._fields = base.fieldlist(fields)
def initialize(self): """Initialize CSV source stream: #. perform autodetection if required: #. detect encoding from a sample data (if requested) #. detect whether CSV has headers from a sample data (if requested) #. create CSV reader object #. read CSV headers if requested and initialize stream fields """ self.file, self.close_file = base.open_resource(self.resource) handle = None if self._autodetection: sample = self.file.read(self.sample_size) # Encoding test if self.detect_encoding: if type(sample) == unicode: handle = UTF8Recoder(self.file, None) else: sample = sample.decode(self.encoding) handle = UTF8Recoder(self.file, self.encoding) if self.detect_header: sample = sample.encode('utf-8') sniffer = csv.Sniffer() self.read_header = sniffer.has_header(sample) self.file.seek(0) if not handle: handle = UTF8Recoder(self.file, self.encoding) if self.dialect: if type(self.dialect) == str: dialect = csv.get_dialect(self.dialect) else: dialect = self.dialect self.reader_args["dialect"] = dialect self.reader = csv.reader(handle, **self.reader_args) # Initialize field list if self.read_header: fields = self.reader.next() self._fields = base.fieldlist(fields)
def initialize(self): """Connect to the Google documents, authenticate. """ self.client = gdata.spreadsheet.text_db.DatabaseClient(username=self.username, password=self.password) dbs = self.client.GetDatabases(spreadsheet_key=self.spreadsheet_key, name=self.spreadsheet_name) if len(dbs) < 1: raise Exception("No spreadsheets with key '%s' or name '%s'" % (self.spreadsheet_key, self.spreadsheet_key)) db = dbs[0] worksheets = db.GetTables(worksheet_id=self.worksheet_id, name=self.worksheet_name) self.worksheet = worksheets[0] self.worksheet.LookupFields() # FIXME: try to determine field types from next row self._fields = base.fieldlist(self.worksheet.fields)
def initialize(self): """Connect to the Google documents, authenticate. """ self.client = gdata.spreadsheet.text_db.DatabaseClient( username=self.username, password=self.password) dbs = self.client.GetDatabases(spreadsheet_key=self.spreadsheet_key, name=self.spreadsheet_name) if len(dbs) < 1: raise Exception("No spreadsheets with key '%s' or name '%s'" % (self.spreadsheet_key, self.spreadsheet_key)) db = dbs[0] worksheets = db.GetTables(worksheet_id=self.worksheet_id, name=self.worksheet_name) self.worksheet = worksheets[0] self.worksheet.LookupFields() # FIXME: try to determine field types from next row self._fields = base.fieldlist(self.worksheet.fields)
def _read_fields(self): # FIXME: be more sophisticated and read field types from next row if self.read_header: row = self.sheet.row_values(self.header_row) self._fields = base.fieldlist(row) self.skip_rows = self.header_row + 1