def setUp(self): self.c = table.Column(0, u'test', [u'test', u'column', None]) self.c2 = table.Column(0, u'test', [0, 1, 42], normal_type=int) self.c3 = table.Column(0, u'test', [datetime.datetime(2007, 1, 1, 12, 13, 14)], normal_type=datetime.datetime)
def test_table_row_out_of_bounds(self): c = table.Column(0, u'test', [u'test', u'column', u'']) c2 = table.Column(0, u'test', [u'test', u'column', u'']) c3 = table.Column(0, u'test', [u'test', u'column', u'']) t = table.Table([c, c2, c3]) self.assertRaises(IndexError, t.row, -1) self.assertRaises(IndexError, t.row, 3)
def setUp(self): self.csv_table = table.Table([ table.Column(0, u'text', [u'Chicago Reader', u'Chicago Sun-Times', u'Chicago Tribune', u'Row with blanks']), table.Column(1, u'integer', [u'40', u'63', u'164', u'']), table.Column(2, u'datetime', [u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', u'']), table.Column(3, u'empty_column', [u'', u'', u'', u''])], name='test_table')
def test_table_uneven_columns(self): c = table.Column(0, u'test', [u'test', u'column', u'']) c_short = table.Column(0, u'test', [u'test']) c_long = table.Column(0, u'test', [u'', u'', u'', u'way out here']) t = table.Table([c, c_short, c_long]) self.assertEqual(t.row(0), [u'test', u'test', None]) self.assertEqual(t.row(1), [u'column', None, None]) self.assertEqual(t.row(2), [None, None, None]) self.assertEqual(t.row(3), [None, None, u'way out here'])
def test_table_insert(self): c = table.Column(0, u'test', [u'test', u'column', u'']) c2 = table.Column(0, u'test', [u'test', u'column', u'']) t = table.Table([c]) t.insert(0, c2) self.assertEqual(len(t), 2) self.assertEqual(t[0], c2) self.assertEqual(t[1], c) self.assertEqual(t[0].order, 0) self.assertEqual(t[1].order, 1)
def test_table_remove(self): c = table.Column(0, u'test', [u'test', u'column', u'']) c2 = table.Column(0, u'test', [u'test', u'column', u'']) c3 = table.Column(0, u'test', [u'test', u'column', u'']) t = table.Table([c, c2, c3]) t.remove(c2) self.assertEqual(len(t), 2) self.assertEqual(t[0], c) self.assertEqual(t[1], c3) self.assertEqual(t[0].order, 0) self.assertEqual(t[1].order, 1)
def test_table_append_duplicate_name(self): c = table.Column(0, u'test', [u'test', u'column', u'']) c2 = table.Column(0, u'test', [u'test', u'column', u'']) c3 = table.Column(0, u'test', [u'test', u'column', u'']) t = table.Table() t.append(c) t.append(c2) t.append(c3) self.assertEqual(t[0].name, 'test') self.assertEqual(t[1].name, 'test_2') self.assertEqual(t[2].name, 'test_3')
def test_table_count_rows(self): c = table.Column(0, u'test', [u'test', u'column', u'']) c_short = table.Column(0, u'test', [u'test']) c_long = table.Column(0, u'test', [u'', u'', u'', u'']) t = table.Table() self.assertEqual(t.count_rows(), 0) t.append(c) self.assertEqual(t.count_rows(), 3) t.append(c_short) self.assertEqual(t.count_rows(), 3) t.append(c_long) self.assertEqual(t.count_rows(), 4)
def test_table_extend(self): c = table.Column(0, u'test', [u'test', u'column', u'']) c2 = table.Column(0, u'test', [u'test', u'column', u'']) c3 = table.Column(0, u'test', [u'test', u'column', u'']) t = table.Table([c]) t.extend([c2, c3]) self.assertEqual(len(t), 3) self.assertEqual(t[0], c) self.assertEqual(t[1], c2) self.assertEqual(t[2], c3) self.assertEqual(t[0].order, 0) self.assertEqual(t[1].order, 1) self.assertEqual(t[2].order, 2)
def test_make_column_string_length(self): c = sql.make_column( table.Column( 0, 'test', [u'this', u'is', u'test', u'data', u'that', u'is', u'awesome' ])) self.assertEqual(c.type.length, 7)
def xls2csv(f, **kwargs): """ Convert an Excel .xls file to csv. """ book = xlrd.open_workbook(file_contents=f.read()) if 'sheet' in kwargs: sheet = book.sheet_by_name(kwargs['sheet']) else: sheet = book.sheet_by_index(0) tab = table.Table() for i in range(sheet.ncols): # Trim headers column_name = sheet.col_values(i)[0] values = sheet.col_values(i)[1:] types = sheet.col_types(i)[1:] column_type = determine_column_type(types) t, normal_values = NORMALIZERS[column_type](values, datemode=book.datemode) column = table.Column(i, column_name, normal_values, normal_type=t) tab.append(column) o = six.StringIO() output = tab.to_csv(o) output = o.getvalue() o.close() return output
def test_make_column_datetime(self): c = sql.make_column( table.Column(0, 'test', [ u'Jan 1, 2008 at 4:40 AM', u'2010-01-27T03:45:00', u'3/1/08 16:14:45', '' ])) self.assertEqual(type(c.type), DateTime)
def dbf2csv(f, **kwargs): """ Convert a dBASE .dbf file to csv. """ with dbf.Table(f.name) as db: headers = db.field_names column_ids = range(len(headers)) data_columns = [[] for c in headers] for row in db: for i, d in enumerate(row): try: data_columns[i].append(six.text_type(row[column_ids[i]]).strip()) except IndexError: # Non-rectangular data is truncated break columns = [] for i, c in enumerate(data_columns): columns.append(table.Column(column_ids[i], headers[i], c)) tab = table.Table(columns=columns) o = six.StringIO() output = tab.to_csv(o) output = o.getvalue() o.close() return output
def xls2csv(f, **kwargs): """ Convert an Excel .xls file to csv. """ book = xlrd.open_workbook(file_contents=f.read()) sheet = book.sheet_by_index(0) tab = table.Table() for i in range(sheet.ncols): # Trim headers column_name = sheet.col_values(i)[0] # Empty column name? Truncate remaining data if not column_name: break values = sheet.col_values(i)[1:] types = sheet.col_types(i)[1:] column_type = determine_column_type(types) # This is terrible code. TKTK if column_type == xlrd.biffh.XL_CELL_DATE: t, normal_values = NORMALIZERS[column_type](values, book.datemode) else: t, normal_values = NORMALIZERS[column_type](values) column = table.Column(i, column_name, normal_values, normal_type=t) tab.append(column) o = StringIO() output = tab.to_csv(o) output = o.getvalue() o.close() return output
def test_column_not_nullable(self): c = sql.make_column(table.Column(0, 'test', ['1', '-87', '418000000'])) self.assertEqual(c.key, 'test') self.assertEqual(type(c.type), Integer) self.assertEqual(c.nullable, False)
def test_make_column_null(self): c = sql.make_column(table.Column(0, 'test', ['', '', ''])) self.assertEqual(type(c.type), String)
def test_make_column_string(self): c = sql.make_column( table.Column(0, 'test', ['this', 'is', 'test', 'data'])) self.assertEqual(type(c.type), String)
def test_make_column_date(self): c = sql.make_column( table.Column(0, 'test', ['Jan 1, 2008', '2010-01-27', '3/1/08', ''])) self.assertEqual(type(c.type), Date)
def test_make_column_time(self): c = sql.make_column( table.Column(0, 'test', ['4:40 AM', '03:45:00', '16:14:45', ''])) self.assertEqual(type(c.type), Time)
def test_make_column_float(self): c = sql.make_column( table.Column(0, 'test', ['1.01', '-87.34', '418000000.0', ''])) self.assertEqual(type(c.type), Float)
def csv_to_table2(): # -------------------------------------- msgt('(3) csvkit to table reformat') # -------------------------------------- fh = open(t4_out, 'rb') csv_args = dict(delimiter='\t',\ quotechar='"') csv_table = table.Table.from_csv(f=fh,\ name='tname',\ snifflimit=None,\ ) print [c.name for c in csv_table] last_col = csv_table[-1] last_col.type = unicode for idx, val in enumerate(last_col): last_col[idx] = '%s' % val #last_col = ['%s' % x for x in last_col] #print last_col[0] msg(csv_table.to_rows()) print [ '%s, %s' % (c.name, c.type) for c in csv_table] return print 'last_col', last_col.order col_num = len(csv_table) print 'col_num', col_num quoted_data = [u'"%s"' % val for val in last_col] print 'quoted_data', quoted_data #return new_column = table.Column(order=last_col.order,\ name=last_col.name,\ l=quoted_data,\ #normal_type=None,\ ) #normal_type=None) csv_table.pop(-1) csv_table.append(new_column) sql_table = csvkit_sql.make_table(csv_table, 'new_table') create_table_sql = csvkit_sql.make_create_table_statement(sql_table, dialect="postgresql") msg('create_table_sql: %s' % create_table_sql) msg(csv_table.to_rows()) return msgt('new_column') msg(new_column) print new_column.name for val in new_column: print val #print len(new_column) """
def test_make_column_bool(self): c = sql.make_column( table.Column(0, 'test', ['True', 'True', 'False', ''])) self.assertEqual(type(c.type), Boolean)
def test_make_column_int(self): c = sql.make_column( table.Column(0, 'test', ['1', '-87', '418000000', ''])) self.assertEqual(c.key, 'test') self.assertEqual(type(c.type), Integer)
def test_make_column_name(self): c = sql.make_column( table.Column(0, 'test', ['1', '-87', '418000000', ''])) self.assertEqual(c.key, 'test')
def test_table_row(self): c = table.Column(0, u'test', [u'test', u'column', u'']) c2 = table.Column(0, u'test', [u'test', u'column', u'']) c3 = table.Column(0, u'test', [u'test', u'column', u'']) t = table.Table([c, c2, c3]) self.assertEqual(t.row(1), [u'column', u'column', u'column'])
def test_column_has_nulls(self): c = sql.make_column(table.Column(0, 'test', [u'1', u'-87', u'418000000', u''])) self.assertEqual(c.key, 'test') self.assertEqual(type(c.type), Integer) self.assertEqual(c.nullable, True)
def test_make_column_big_int(self): c = sql.make_column(table.Column(0, 'test', [u'1', u'-87', u'418000000', u'2147483648'])) self.assertEqual(c.key, 'test') self.assertEqual(type(c.type), BigInteger)
def test_table_append(self): c = table.Column(0, u'test', [u'test', u'column', u'']) t = table.Table() t.append(c) self.assertEqual(len(t), 1) self.assertEqual(t[0], c)