Exemplo n.º 1
0
    def initialize(self):
        """Initialize CSV source stream:
        
        #. perform autodetection if required:
            #. detect encoding from a sample data (if requested)
            #. detect whether CSV has headers from a sample data (if requested)
        #.  create CSV reader object
        #.   read CSV headers if requested and initialize stream fields
        
        """

        self.file, self.close_file = base.open_resource(self.resource)

        handle = None
        
        if self._autodetection:
            
            sample = self.file.read(self.sample_size)

            # Encoding test
            if self.detect_encoding and type(sample) == unicode:
                self.encoding = "utf-8"

            if self.detect_header:
                sample = sample.encode('utf-8')
                sniffer = csv.Sniffer()
                self.read_header = sniffer.has_header(sample)

            self.file.seek(0)
            
        if self.dialect:
            if type(self.dialect) == str:
                dialect = csv.get_dialect(self.dialect)
            else:
                dialect = self.dialect
                
            self.reader_args["dialect"] = dialect

        # self.reader = csv.reader(handle, **self.reader_args)
        self.reader = UnicodeReader(self.file, encoding = self.encoding, 
                                    **self.reader_args)

        if self.skip_rows:
            for i in range(0, self.skip_rows):
                self.reader.next()
                
        # Initialize field list
        if self.read_header:
            field_names = self.reader.next()
            
            fields = [ (name, "string", "default") for name in field_names]
            
            self._fields = base.fieldlist(fields)
Exemplo n.º 2
0
    def initialize(self):
        """Initialize CSV source stream:

        #. perform autodetection if required:
            #. detect encoding from a sample data (if requested)
            #. detect whether CSV has headers from a sample data (if requested)
        #.  create CSV reader object
        #.   read CSV headers if requested and initialize stream fields

        """

        self.file, self.close_file = base.open_resource(self.resource)

        handle = None

        if self._autodetection:

            sample = self.file.read(self.sample_size)

            # Encoding test
            if self.detect_encoding:
                if type(sample) == unicode:
                    handle = UTF8Recoder(self.file, None)
                else:
                    sample = sample.decode(self.encoding)
                    handle = UTF8Recoder(self.file, self.encoding)

            if self.detect_header:
                sample = sample.encode('utf-8')
                sniffer = csv.Sniffer()
                self.read_header = sniffer.has_header(sample)

            self.file.seek(0)


        if not handle:
            handle = UTF8Recoder(self.file, self.encoding)

        if self.dialect:
            if type(self.dialect) == str:
                dialect = csv.get_dialect(self.dialect)
            else:
                dialect = self.dialect

            self.reader_args["dialect"] = dialect

        self.reader = csv.reader(handle, **self.reader_args)

        # Initialize field list
        if self.read_header:
            fields = self.reader.next()
            self._fields = base.fieldlist(fields)
Exemplo n.º 3
0
    def initialize(self):
        """Initialize CSV source stream:
        
        #. perform autodetection if required:
            #. detect encoding from a sample data (if requested)
            #. detect whether CSV has headers from a sample data (if requested)
        #.  create CSV reader object
        #.   read CSV headers if requested and initialize stream fields
        
        """

        self.file, self.close_file = base.open_resource(self.resource)

        handle = None
        
        if self._autodetection:
            
            sample = self.file.read(self.sample_size)

            # Encoding test
            if self.detect_encoding:
                if type(sample) == unicode:
                    handle = UTF8Recoder(self.file, None)
                else:
                    sample = sample.decode(self.encoding)
                    handle = UTF8Recoder(self.file, self.encoding)

            if self.detect_header:
                sample = sample.encode('utf-8')
                sniffer = csv.Sniffer()
                self.read_header = sniffer.has_header(sample)

            self.file.seek(0)
            
        
        if not handle:
            handle = UTF8Recoder(self.file, self.encoding)

        if self.dialect:
            if type(self.dialect) == str:
                dialect = csv.get_dialect(self.dialect)
            else:
                dialect = self.dialect
                
            self.reader_args["dialect"] = dialect

        self.reader = csv.reader(handle, **self.reader_args)

        # Initialize field list
        if self.read_header:
            fields = self.reader.next()
            self._fields = base.fieldlist(fields)
Exemplo n.º 4
0
    def initialize(self):
        """Connect to the Google documents, authenticate.
        """
        self.client = gdata.spreadsheet.text_db.DatabaseClient(username=self.username, password=self.password)

        dbs = self.client.GetDatabases(spreadsheet_key=self.spreadsheet_key, name=self.spreadsheet_name)

        if len(dbs) < 1:
            raise Exception("No spreadsheets with key '%s' or name '%s'" % (self.spreadsheet_key, self.spreadsheet_key))

        db = dbs[0]
        worksheets = db.GetTables(worksheet_id=self.worksheet_id, name=self.worksheet_name)

        self.worksheet = worksheets[0]
        self.worksheet.LookupFields()

        # FIXME: try to determine field types from next row
        self._fields = base.fieldlist(self.worksheet.fields)
Exemplo n.º 5
0
    def initialize(self):
        """Connect to the Google documents, authenticate.
        """
        self.client = gdata.spreadsheet.text_db.DatabaseClient(
            username=self.username, password=self.password)

        dbs = self.client.GetDatabases(spreadsheet_key=self.spreadsheet_key,
                                       name=self.spreadsheet_name)

        if len(dbs) < 1:
            raise Exception("No spreadsheets with key '%s' or name '%s'" %
                            (self.spreadsheet_key, self.spreadsheet_key))

        db = dbs[0]
        worksheets = db.GetTables(worksheet_id=self.worksheet_id,
                                  name=self.worksheet_name)

        self.worksheet = worksheets[0]
        self.worksheet.LookupFields()

        # FIXME: try to determine field types from next row
        self._fields = base.fieldlist(self.worksheet.fields)
Exemplo n.º 6
0
 def _read_fields(self):
     # FIXME: be more sophisticated and read field types from next row
     if self.read_header:
         row = self.sheet.row_values(self.header_row)
         self._fields = base.fieldlist(row)
         self.skip_rows = self.header_row + 1
Exemplo n.º 7
0
 def _read_fields(self):
     # FIXME: be more sophisticated and read field types from next row
     if self.read_header:
         row = self.sheet.row_values(self.header_row)
         self._fields = base.fieldlist(row)
         self.skip_rows = self.header_row + 1