Exemplo n.º 1
0
    def main(self):
        # Ensure we're handling a list, even if it's just one file
        if not isinstance(self.args.files, list):
            self.args.files = [self.args.files]
        else:
            if self.args.table_name:
                self.argparser.error('The --table argument is only allowed when specifying a single file.')

        for f in self.args.files:
            if self.args.table_name:
                table_name = self.args.table_name
            elif f != sys.stdin:
                # Use filename as table name
                table_name = os.path.splitext(os.path.split(f.name)[1])[0]
            else:
                self.argparser.error('The --table argument is required when providing data over STDIN.')

            if self.args.dialect and self.args.connection_string:
                self.argparser.error('The --dialect option is only valid when --db is not specified.')

            if self.args.insert and not self.args.connection_string:
                self.argparser.error('The --insert option is only valid when --db is also specified.')

            if self.args.no_create and not self.args.insert:
                self.argparser.error('The --no-create option is only valid --insert is also specified.')

            csv_table = table.Table.from_csv(f, name=table_name, snifflimit=self.args.snifflimit, blanks_as_nulls=(not self.args.blanks), **self.reader_kwargs)

            f.close()

            # Direct connections to database
            if self.args.connection_string:
                try:
                    engine, metadata = sql.get_connection(self.args.connection_string)
                except ImportError:
                    raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')

                sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints, metadata)

                if not self.args.no_create:
                    sql_table.create()

                if self.args.insert:
                    insert = sql_table.insert()
                    headers = csv_table.headers()

                    conn = engine.connect()
                    trans = conn.begin()
                    for row in csv_table.to_rows():
                        conn.execute(insert, [dict(zip(headers, row)), ])
                    trans.commit()
                    conn.close()

            # Writing to file
            else:
                sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints)
                self.output_file.write((u'%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)).encode('utf-8'))
Exemplo n.º 2
0
    def main(self):
        if self.args.table_name:
            table_name = self.args.table_name
        elif self.args.file != sys.stdin:
            # Use filename as table name
            table_name = os.path.splitext(
                os.path.split(self.args.file.name)[1])[0]
        else:
            self.argparser.error(
                'The --table argument is required when providing data over STDIN.'
            )

        if self.args.loosey:
            loosey = True

        if self.args.dialect and self.args.connection_string:
            self.argparser.error(
                'The --dialect option is only valid when --db is not specified.'
            )

        if self.args.insert and not self.args.connection_string:
            self.argparser.error(
                'The --insert option is only valid when --db is also specified.'
            )

        csv_table = table.Table.from_csv(self.args.file,
                                         name=table_name,
                                         snifflimit=self.args.snifflimit,
                                         **self.reader_kwargs)

        # Direct connections to database
        if self.args.connection_string:
            try:
                engine, metadata = sql.get_connection(
                    self.args.connection_string)
            except ImportError:
                raise ImportError(
                    'You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n'
                )

            sql_table = sql.make_table(csv_table, table_name, loosey, metadata)
            sql_table.create()

            if self.args.insert:
                insert = sql_table.insert()
                headers = csv_table.headers()

                for row in csv_table.to_rows(serialize_dates=True):
                    engine.execute(insert, [
                        dict(zip(headers, row)),
                    ])

        # Writing to file
        else:
            sql_table = sql.make_table(csv_table, table_name, loosey)
            self.output_file.write((u'%s\n' % sql.make_create_table_statement(
                sql_table, dialect=self.args.dialect)).encode('utf-8'))
Exemplo n.º 3
0
 def make_insert_statement(self):
     sql_table = sql.make_table(self.csv_table, "csvsql")
     statement = sql.make_insert_statement(sql_table, self.csv_table._prepare_rows_for_serialization()[0])
     self.assertEqual(
         statement,
         u"INSERT INTO test_table (text, integer, datetime, empty_column) VALUES ('Chicago Reader', 40, '2008-01-01T04:40:00', NULL);",
     )
Exemplo n.º 4
0
def makeRawTable(contents):
    inp = StringIO(contents)
    reader = UnicodeCSVReader(inp)
    header = reader.next()
    header = [slugify(h) for h in header]
    outp = StringIO()
    writer = UnicodeCSVWriter(outp)
    writer.writerow(header)
    writer.writerows([[preProcess(unicode(i)) for i in r] for r in reader])
    outp.seek(0)
    conn = sqlite3.connect(':memory:')
    t = Table.from_csv(outp, 
                       name='raw_table', 
                       blanks_as_nulls=False, 
                       infer_types=False)
    sql_table = make_table(t)
    create_st = make_create_table_statement(sql_table)
    parts = create_st.split('raw_table (')
    create_st = '{0} raw_table ( record_id INTEGER PRIMARY KEY,{1}'.format(*parts)
    insert = sql_table.insert()
    curs = conn.cursor()
    curs.execute(create_st)
    rows = [dict(zip(header, row)) for row in t.to_rows()]
    for row in rows:
        curs.execute(str(insert), row)
    dump = StringIO()
    for line in conn.iterdump():
        dump.write(unidecode(line))
    dump.seek(0)
    return dump.getvalue(), header
Exemplo n.º 5
0
 def load_file(self):
     """
     (1) Use csv file to Generate csvkit Table and SQL table objects
     """
     fh = open(self.fname, 'rb')
     self.csv_table = table.Table.from_csv(fh, self.table_name)
     self.sql_table = sql.make_table(self.csv_table, self.table_name)
Exemplo n.º 6
0
    def _get_column_types(self):
        self.tracker.forward('Inferring datatype of columns')
        # Load the csv and use csvkit's sql.make_table utility
        # to infer the datatypes of the columns.
        with open(self.path, 'r') as f:
            csv_table = table.Table.from_csv(f, delimiter=',')

        sql_table = sql.make_table(csv_table)
        for i, column in enumerate(sql_table.columns):
            # Clean the type and name values
            raw_type = str(column.type)
            clean_type = re.sub(re.compile(r'\(\w+\)'), '', raw_type)

            # Temporary fix for issue #19
            if raw_type == 'BOOLEAN':
                raw_type = 'VARCHAR(10)'

            if raw_type == 'DATETIME':
                # Dumb guess at the maximum length of a datetime field. Find a
                # better way!
                raw_type = 'VARCHAR(100)'

            parsed_length = re.search(re.compile(r'\((\w+)\)'), raw_type)
            if parsed_length:
                clean_length = int(parsed_length.group(1))
            else:
                clean_length = None

            self.columns[i]['datatype'] = clean_type.lower()
            self.columns[i]['raw_type'] = raw_type
            self.columns[i]['length'] = clean_length
Exemplo n.º 7
0
def load_file(file_name, db_engine):
	# Pass file name and SQLAlchemy db engine
	# Opens file, makes a file object, creates table
	# Builds and runs a COPY FROM statement

	print '-----------------'

	print '   Opening {} ({})...'.format(file_name, datetime.now() - start_time)

	f = codecs.open('data/{}'.format(file_name), 'rU')

	print '   Sniffing file dialect ({})...'.format(datetime.now() - start_time)

	dialect = sniffer.sniff_dialect(f.read())

	# Return to top of file
	f.seek(0)

	print '   Making csv Table object ({})...'.format(datetime.now() - start_time)

	from_file = Table.from_csv(f, name = file_name.rstrip('.csv'), encoding = 'utf-8') # Here be some overhead

	print '   Making SQLAlchemy Table object ({})...'.format(datetime.now() - start_time)

	sql_table = sql.make_table(from_file)

	print '   Creating db table ({})...'.format(datetime.now() - start_time)

	sql_table.create(engine, checkfirst=True)

	print '   Loading {} ({})...'.format(file_name, datetime.now() - start_time)

	copy_from_sql = '''COPY "{table_name}"
					FROM '{file_w_path}'
					DELIMITER '{delimiter}'
					QUOTE '{quote_character}'
					ENCODING 'UTF8'
					CSV
					HEADER;'''.format(
							  table_name = file_name.rstrip('.csv')
							, file_w_path = os.getcwd() + '/data/' + file_name
							, delimiter = dialect.delimiter
							, quote_character = dialect.quotechar
							, escape_character = '' if dialect.escapechar is None else dialect.escapechar 
						)

	conn = db_engine.connect() 

	t = conn.begin()

	try:
		conn.execute(copy_from_sql)
		t.commit()
	except:
		t.rollback()
		print copy_from_sql.replace('\t', '')
		print "Failed to commit."

	conn.close()
Exemplo n.º 8
0
 def make_insert_statement(self):
     sql_table = sql.make_table(self.csv_table, 'csvsql')
     statement = sql.make_insert_statement(
         sql_table,
         self.csv_table._prepare_rows_for_serialization()[0])
     self.assertEqual(
         statement,
         u'INSERT INTO test_table (text, integer, datetime, empty_column) VALUES (\'Chicago Reader\', 40, \'2008-01-01T04:40:00\', NULL);'
     )
Exemplo n.º 9
0
    def test_make_create_table_statement(self):
        sql_table = sql.make_table(self.csv_table, 'csvsql')
        statement = sql.make_create_table_statement(sql_table)

        self.assertEqual(
            statement, u"""CREATE TABLE test_table (
\ttext VARCHAR(17) NOT NULL, 
\tinteger INTEGER, 
\tdatetime DATETIME, 
\tempty_column VARCHAR(32)
);""")
Exemplo n.º 10
0
    def test_make_create_table_statement(self):
        sql_table = sql.make_table(self.csv_table, 'csvsql')
        statement = sql.make_create_table_statement(sql_table)

        self.assertEqual(statement,
                         u"""CREATE TABLE test_table (
\ttext VARCHAR(17) NOT NULL, 
\tinteger INTEGER, 
\tdatetime DATETIME, 
\tempty_column VARCHAR(32)
);""")
Exemplo n.º 11
0
    def test_make_create_table_statement_no_constraints(self):
        sql_table = sql.make_table(self.csv_table, 'csvsql', True)
        statement = sql.make_create_table_statement(sql_table)

        self.assertEqual(statement,
                         u"""CREATE TABLE test_table (
\ttext VARCHAR, 
\tinteger INTEGER, 
\tdatetime DATETIME, 
\tempty_column VARCHAR
);""")
Exemplo n.º 12
0
    def main(self):
        if self.args.table_name:
            table_name = self.args.table_name
        elif self.args.file != sys.stdin:
            # Use filename as table name
            table_name = os.path.splitext(os.path.split(self.args.file.name)[1])[0]
        else:
            self.argparser.error('The --table argument is required when providing data over STDIN.')

        if self.args.loosey:
            loosey = True

        if self.args.dialect and self.args.connection_string:
            self.argparser.error('The --dialect option is only valid when --db is not specified.')

        if self.args.insert and not self.args.connection_string:
            self.argparser.error('The --insert option is only valid when --db is also specified.')

        csv_table = table.Table.from_csv(self.args.file, name=table_name, snifflimit=self.args.snifflimit, **self.reader_kwargs)

        # Direct connections to database
        if self.args.connection_string:
            try:
                engine, metadata = sql.get_connection(self.args.connection_string)
            except ImportError:
                raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')

            sql_table = sql.make_table(csv_table, table_name, loosey, metadata)
            sql_table.create()

            if self.args.insert:
                insert = sql_table.insert()
                headers = csv_table.headers()

                for row in csv_table.to_rows(serialize_dates=True):
                    engine.execute(insert, [dict(zip(headers, row)), ])

        # Writing to file
        else:
            sql_table = sql.make_table(csv_table, table_name, loosey)
            self.output_file.write((u'%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)).encode('utf-8'))
Exemplo n.º 13
0
def make_db(fname, tblname):
    conn = sqlite3.connect(':memory:')
    t = Table.from_csv(open(fname, 'rb'), name=tblname)
    sql_table = make_table(t)
    create_st = make_create_table_statement(sql_table)
    print create_st
    insert = sql_table.insert()
    curs = conn.cursor()
    curs.execute(create_st)
    headers = t.headers()
    print headers
    rows = [dict(zip(headers, row)) for row in t.to_rows()]
    for row in rows:
        curs.execute(str(insert), row)
    return curs
def make_db(fname, tblname):
    conn = sqlite3.connect(':memory:')
    t = Table.from_csv(open(fname, 'rb'), name=tblname)
    sql_table = make_table(t)
    create_st = make_create_table_statement(sql_table)
    print create_st
    insert = sql_table.insert()
    curs = conn.cursor()
    curs.execute(create_st)
    headers = t.headers()
    print headers
    rows = [dict(zip(headers, row)) for row in t.to_rows()]
    for row in rows:
        curs.execute(str(insert), row)
    return curs
Exemplo n.º 15
0
 def main(self):
     tabname = os.path.splitext(
         os.path.basename(self.args.file._lazy_args[0]))[0]
     tab = table.Table.from_csv(self.args.file,
                                name=tabname,
                                **self.reader_kwargs)
     stmt = make_create_table_statement(make_table(tab), dialect='sqlite')
     conn = sqlite3.connect(':memory:')
     c = conn.cursor()
     c.execute(stmt)
     for row in tab.to_rows():
         vals = ','.join(['?'] * len(row))
         prepared = "INSERT INTO %s VALUES(%s)" % (tab.name, vals)
         c.execute(prepared, row)
     output = CSVKitWriter(self.output_file, **self.writer_kwargs)
     for row in c.execute(self.args.query):
         output.writerow(row)
Exemplo n.º 16
0
    def fetch(self):
        if self.isLoaded:
            tool.VERBOSE('use csv {}', self.name)
            return True

        tool.VERBOSE('fetching csv {}', self.name)

        with tool.INDENT():
            csv_table = table.Table.from_csv(
                open(self.inputs[0], 'rb'),
                name=self.name,
                # sniff_limit     = 0,
                blanks_as_nulls=True,
                infer_types=True,
                no_header_row=False)

            tool.VERBOSE('read {} row(s) from {}', csv_table.count_rows(),
                         self.inputs[0])

            connection = self.db.engine.connect()
            transaction = connection.begin()

            sql_table = sql.make_table(csv_table, self.name, False, None,
                                       self.db.metadata)

            sql_table.create()

            if csv_table.count_rows() > 0:
                insert = sql_table.insert()
                headers = csv_table.headers()
                connection.execute(
                    insert,
                    [dict(zip(headers, row)) for row in csv_table.to_rows()])

                self._count = csv_table.count_rows()
                tool.VERBOSE('wrote {} row(s) to table {}', self.count,
                             self.name)

                self._loaded = True

            transaction.commit()
            connection.close()

        return self._loaded
Exemplo n.º 17
0
def csv_to_table():
    # --------------------------------------
    msgt('(2) csvkit to table')
    # --------------------------------------
    fh = open(t4_out, 'rb')
    csv_args = dict(delimiter='\t',\
                    quotechar='"')
    print 'QUOTE_NONE', QUOTE_NONE
    csv_table = table.Table.from_csv(f=fh,\
                            name='tname',\
                            snifflimit=None,\
                            #quoting=QUOTE_NONNUMERIC,\
        #                    **csv_args\
                            )
    for col in csv_table:
        msg('%s, %s' % (col.name, col.type))

    sql_table = csvkit_sql.make_table(csv_table, 'new_table')
    create_table_sql = csvkit_sql.make_create_table_statement(sql_table, dialect="postgresql")
    msg('create_table_sql: %s' % create_table_sql)
    msg(csv_table.to_rows())
Exemplo n.º 18
0
def csv2sql(file=None, db_schema=None, tablename=None, encoding='utf-8', snifflimit=512*1024):

    try:
        conn = engine.connect()
        trans = conn.begin()

        csv_table = table.Table.from_csv(
            file.stream,
            name=tablename,
            snifflimit=snifflimit,
            blanks_as_nulls=True,
            infer_types=True,
            no_header_row=False,
            encoding=encoding
        )

        sql_table = sql.make_table(
            csv_table,
            tablename,
            False,  # self.args.no_constraints
            db_schema,  # self.args.db_schema
            metadata
        )

        sql_table.create()

        insert = sql_table.insert()
        headers = csv_table.headers()

        conn.execute(insert, [dict(zip(headers, row)) for row in csv_table.to_rows()])
        trans.commit()

    except Exception as e:
        trans.rollback()
        print(e)
        return e
    finally:
        conn.close()
        file.close()
        return json.dumps(tablename)
Exemplo n.º 19
0
    def main(self):
        if self.args.file.name != "<stdin>":
            # Use filename as table name
            table_name = os.path.splitext(os.path.split(self.args.file.name)[1])[0]
        else:
            table_name = "csvsql_table"

        csv_table = table.Table.from_csv(
            self.args.file, name=table_name, snifflimit=self.args.snifflimit, **self.reader_kwargs
        )
        sql_table = sql.make_table(csv_table)

        self.output_file.write(
            (u"%s\n" % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)).encode("utf-8")
        )

        if self.args.inserts:
            self.output_file.write("\n")
            for row in csv_table.to_rows(serialize_dates=True):
                self.output_file.write(
                    (u"%s\n" % sql.make_insert_statement(sql_table, row, dialect=self.args.dialect)).encode("utf-8")
                )
Exemplo n.º 20
0
 def create_table(self, cursor, schema, table, virtual_table):
     sql_table = csv_sql.make_table(virtual_table, db_schema=schema)
     create_table_sql = csv_sql.make_create_table_statement(sql_table, dialect='postgresql')
     print("Creating table {}.{}".format(schema, table))
     cursor.execute(create_table_sql)
Exemplo n.º 21
0
Arquivo: seeder.py Projeto: yilab/dbt
 def create_table(self, cursor, schema, table, virtual_table):
     sql_table = csv_sql.make_table(virtual_table, db_schema=schema)
     create_table_sql = csv_sql.make_create_table_statement(
         sql_table, dialect='postgresql')
     logger.info("Creating table {}.{}".format(schema, table))
     cursor.execute(create_table_sql)
Exemplo n.º 22
0
    def main(self):
        connection_string = self.args.connection_string
        do_insert = self.args.insert
        query = self.args.query

        self.input_files = []

        for path in self.args.input_paths:
            self.input_files.append(self._open_input_file(path))

        if self.args.table_names:
            table_names = self.args.table_names.split(',')
        else:
            table_names = []

        # Create an SQLite database in memory if no connection string is specified
        if query and not connection_string:
            connection_string = "sqlite:///:memory:"
            do_insert = True

        if self.args.dialect and connection_string:
            self.argparser.error('The --dialect option is only valid when --db is not specified.')

        if do_insert and not connection_string:
            self.argparser.error('The --insert option is only valid when --db is also specified.')

        if self.args.no_create and not do_insert:
            self.argparser.error('The --no-create option is only valid --insert is also specified.')

        # Establish database validity before reading CSV files
        if connection_string:
            try:
                engine, metadata = sql.get_connection(connection_string)
            except ImportError:
                raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')
            conn = engine.connect()
            trans = conn.begin()

        for f in self.input_files:
            try:
                # Try to use name specified via --table
                table_name = table_names.pop(0)
            except IndexError:
                if f == sys.stdin:
                    table_name = "stdin"
                else:
                    # Use filename as table name
                    table_name = os.path.splitext(os.path.split(f.name)[1])[0]

            csv_table = table.Table.from_csv(
                f,
                name=table_name,
                sniff_limit=self.args.sniff_limit,
                blanks_as_nulls=(not self.args.blanks),
                infer_types=(not self.args.no_inference),
                no_header_row=self.args.no_header_row,
                **self.reader_kwargs
            )

            f.close()

            if csv_table:
                if connection_string:
                    sql_table = sql.make_table(
                        csv_table,
                        table_name,
                        self.args.no_constraints,
                        self.args.db_schema,
                        metadata
                    )

                    # Create table
                    if not self.args.no_create:
                        sql_table.create()

                    # Insert data
                    if do_insert and csv_table.count_rows() > 0:
                        insert = sql_table.insert()
                        headers = csv_table.headers()
                        conn.execute(insert, [dict(zip(headers, row)) for row in csv_table.to_rows()])

                # Output SQL statements
                else:
                    sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints)
                    self.output_file.write('%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect))

        if connection_string:
            if query:
                # Execute specified SQL queries
                queries = query.split(';')
                rows = None

                for q in queries:
                    if q:
                        rows = conn.execute(q)

                # Output result of last query as CSV
                try:
                    output = agate.csv.writer(self.output_file, **self.writer_kwargs)
                    if not self.args.no_header_row:
                        output.writerow(rows._metadata.keys)
                    for row in rows:
                        output.writerow(row)
                except AttributeError:
                    pass

            trans.commit()
            conn.close()
Exemplo n.º 23
0
    def main(self):
        # Ensure we're handling a list, even if it's just one file
        if not isinstance(self.args.files, list):
            self.args.files = [self.args.files]
        else:
            if self.args.table_name:
                self.argparser.error(
                    'The --table argument is only allowed when specifying a single file.'
                )

        for f in self.args.files:
            if self.args.table_name:
                table_name = self.args.table_name
            elif f != sys.stdin:
                # Use filename as table name
                table_name = os.path.splitext(os.path.split(f.name)[1])[0]
            else:
                self.argparser.error(
                    'The --table argument is required when providing data over STDIN.'
                )

            if self.args.dialect and self.args.connection_string:
                self.argparser.error(
                    'The --dialect option is only valid when --db is not specified.'
                )

            if self.args.insert and not self.args.connection_string:
                self.argparser.error(
                    'The --insert option is only valid when --db is also specified.'
                )

            if self.args.no_create and not self.args.insert:
                self.argparser.error(
                    'The --no-create option is only valid --insert is also specified.'
                )

            csv_table = table.Table.from_csv(
                f,
                name=table_name,
                snifflimit=self.args.snifflimit,
                blanks_as_nulls=(not self.args.blanks),
                **self.reader_kwargs)

            f.close()

            # Direct connections to database
            if self.args.connection_string:
                try:
                    engine, metadata = sql.get_connection(
                        self.args.connection_string)
                except ImportError:
                    raise ImportError(
                        'You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n'
                    )

                sql_table = sql.make_table(csv_table, table_name,
                                           self.args.no_constraints, metadata)

                if not self.args.no_create:
                    sql_table.create()

                if self.args.insert:
                    insert = sql_table.insert()
                    headers = csv_table.headers()

                    conn = engine.connect()
                    trans = conn.begin()
                    for row in csv_table.to_rows():
                        conn.execute(insert, [
                            dict(zip(headers, row)),
                        ])
                    trans.commit()
                    conn.close()

            # Writing to file
            else:
                sql_table = sql.make_table(csv_table, table_name,
                                           self.args.no_constraints)
                self.output_file.write(
                    (u'%s\n' % sql.make_create_table_statement(
                        sql_table, dialect=self.args.dialect)).encode('utf-8'))
Exemplo n.º 24
0
 def test_make_create_table_statement_with_dialects(self):
     for dialect in sql.DIALECTS:
         sql_table = sql.make_table(self.csv_table,
                                    'csvsql',
                                    db_schema='test_schema')
         statement = sql.make_create_table_statement(sql_table, dialect)
Exemplo n.º 25
0
    def main(self):
        connection_string = self.args.connection_string
        do_insert = self.args.insert
        query = self.args.query

        self.input_files = []

        for path in self.args.input_paths:
            self.input_files.append(self._open_input_file(path))

        if self.args.table_names:
            table_names = self.args.table_names.split(',')
        else:
            table_names = []

        # If one or more filenames are specified, we need to add stdin ourselves (if available)
        if sys.stdin not in self.input_files:
            try:
                if not sys.stdin.isatty():
                    self.input_files.insert(0, sys.stdin)
            except:
                pass

        # Create an SQLite database in memory if no connection string is specified
        if query and not connection_string:
            connection_string = "sqlite:///:memory:"
            do_insert = True

        if self.args.dialect and connection_string:
            self.argparser.error('The --dialect option is only valid when --db is not specified.')

        if do_insert and not connection_string:
            self.argparser.error('The --insert option is only valid when --db is also specified.')

        if self.args.no_create and not do_insert:
            self.argparser.error('The --no-create option is only valid --insert is also specified.')

        # Establish database validity before reading CSV files
        if connection_string:
            try:
                engine, metadata = sql.get_connection(connection_string)
            except ImportError:
                raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')
            conn = engine.connect()
            trans = conn.begin()

        for f in self.input_files:
            try:
                # Try to use name specified via --table
                table_name = table_names.pop(0)
            except IndexError:
                if f == sys.stdin:
                    table_name = "stdin"
                else:
                    # Use filename as table name
                    table_name = os.path.splitext(os.path.split(f.name)[1])[0]

            csv_table = table.Table.from_csv(
                f,
                name=table_name,
                snifflimit=self.args.snifflimit,
                blanks_as_nulls=(not self.args.blanks),
                infer_types=(not self.args.no_inference),
                no_header_row=self.args.no_header_row,
                **self.reader_kwargs
            )

            f.close()

            if connection_string:
                sql_table = sql.make_table(
                    csv_table,
                    table_name,
                    self.args.no_constraints,
                    self.args.db_schema,
                    metadata
                )

                # Create table
                if not self.args.no_create:
                    sql_table.create()

                # Insert data
                if do_insert and csv_table.count_rows() > 0:
                    insert = sql_table.insert()
                    headers = csv_table.headers()
                    conn.execute(insert, [dict(zip(headers, row)) for row in csv_table.to_rows()])

            # Output SQL statements
            else:
                sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints)
                self.output_file.write('%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect))

        if connection_string:
            if query:
                # Execute specified SQL queries
                queries = query.split(';')
                rows = None

                for q in queries:
                    if q:
                        rows = conn.execute(q)

                # Output result of last query as CSV
                try:
                    output = CSVKitWriter(self.output_file, **self.writer_kwargs)
                    if not self.args.no_header_row:
                        output.writerow(rows._metadata.keys)
                    for row in rows:
                        output.writerow(row)
                except AttributeError:
                    pass

            trans.commit()
            conn.close()
 report_scraper = ReportScraper(url_pattern=report_pattern)
 report_scraper.cache_storage = scrapelib.cache.FileCache(cache_dir)
 report_scraper.cache_write_only = False
 conn = sqlite3.connect(DB_NAME)
 c = conn.cursor()
 for comm_url in comm_urls:
     for report_data in report_scraper.scrape_one(comm_url):
         comm_id = parse_qs(urlparse(comm_url).query)['id'][0]
         report_data['committee_id'] = comm_id
         outp = StringIO()
         writer = UnicodeCSVDictWriter(outp, fieldnames=report_data.keys())
         writer.writeheader()
         writer.writerow(report_data)
         outp.seek(0)
         t = Table.from_csv(outp, name='reports')
         sql_table = make_table(t)
         try:
             c.execute('select * from reports limit 1')
         except sqlite3.OperationalError:
             create_st = make_create_table_statement(sql_table)
             c.execute(create_st)
             conn.commit()
         c.execute('select * from reports where id = ?', (int(report_data['id']),))
         existing = c.fetchall()
         if not existing:
             insert = sql_table.insert()
             headers = t.headers()
             rows = [dict(zip(headers, row)) for row in t.to_rows()]
             for row in rows:
                 c.execute(str(insert), row)
             conn.commit()
Exemplo n.º 27
0
def process_csv_file(absolute_base_file, table_name_temp, new_table,
                     geom_table_name, geom_table_id, geom_table_columns,
                     geom_table_geom):
    # Create table based on CSV
    import csv
    f = open(absolute_base_file, 'rb')
    no_header_row = False

    with open(absolute_base_file, 'rb') as csvfile:
        # get the type of delimiter
        dialect = csv.Sniffer().sniff(csvfile.read())

    try:
        csv_table = table.Table.from_csv(f,
                                         name=table_name_temp,
                                         no_header_row=no_header_row,
                                         delimiter=dialect.delimiter)
    except:
        status_code = '400'
        errormsgs_val = "Failed to create the table from CSV."
        return errormsgs_val, status_code

    for idx, column in enumerate(csv_table):
        column.name = slugify(unicode(column.name)).replace('-', '_')
        # Check if the selected value from the dropdown menu matches the first value of the CSV header
        if idx == 0:
            print("column.name.strip()", column.name.strip())
            print("geom_table_id.strip()", geom_table_id.strip())
            if column.name.strip() != geom_table_id.strip():
                errormsgs_val = "The selected value of Layer Type doesn't match the one of the imported layer."
                status_code = '400'
                return errormsgs_val, status_code
    # Check if there are added columns in the CSV
    if idx < 2:
        errormsgs_val = "The CSV has no added columns. Please add extra columns."
        status_code = '400'
        return errormsgs_val, status_code
    else:
        try:
            sql_table = sql.make_table(csv_table, table_name_temp)
            create_table_sql = sql.make_create_table_statement(
                sql_table, dialect="postgresql")
            create_table_sql = re.sub(r'VARCHAR\([0-9]*\)', 'VARCHAR(254)',
                                      create_table_sql)
        except:
            return None, str(sys.exc_info()[0])

        constr = "dbname='{dbname}' user='******' host='{host}' password='******'".format(
            **{
                'dbname': settings.DATABASES['uploaded']['NAME'],
                'user': settings.DATABASES['uploaded']['USER'],
                'host': settings.DATABASES['uploaded']['HOST'],
                'password': settings.DATABASES['uploaded']['PASSWORD']
            })
        conn = psycopg2.connect(constr)

        try:
            # Check if there is already a table with the same name
            cur = conn.cursor()

            sqlstr = "SELECT EXISTS(SELECT * FROM information_schema.tables WHERE table_name='{new_table_name}');".format(
                **{'new_table_name': new_table})
            cur.execute(sqlstr)
            exists = cur.fetchone()[0]
            if exists:
                errormsgs_val = "There is already a layer with this name. Please choose another title."
                status_code = '400'
                return errormsgs_val, status_code

            #  If temporary table exists then drop it - the create it and add primary key
            cur.execute('DROP TABLE IF EXISTS %s CASCADE;' % table_name_temp)
            cur.execute(create_table_sql)
            conn.commit()
            sqlstr = "ALTER TABLE IF EXISTS {temp_table} ADD COLUMN fid SERIAL PRIMARY KEY;".format(
                **{'temp_table': table_name_temp})
            cur.execute(sqlstr)
            conn.commit()
        except Exception as e:
            logger.error("Error Creating Temporary table %s:%s",
                         table_name_temp, str(e))

        #  Copy data to table
        connection_string = "postgresql://%s:%s@%s:%s/%s" % (
            settings.DATABASES['uploaded']['USER'],
            settings.DATABASES['uploaded']['PASSWORD'],
            settings.DATABASES['uploaded']['HOST'],
            settings.DATABASES['uploaded']['PORT'],
            settings.DATABASES['uploaded']['NAME'])
        try:
            engine, metadata = sql.get_connection(connection_string)
        except ImportError:
            return None, str(sys.exc_info()[0])

        conn_eng = engine.connect()
        trans = conn_eng.begin()

        if csv_table.count_rows() > 0:
            insert = sql_table.insert()
            headers = csv_table.headers()
            try:
                conn_eng.execute(
                    insert,
                    [dict(zip(headers, row)) for row in csv_table.to_rows()])
            except:
                return None, str(sys.exc_info()[0])

        trans.commit()
        conn_eng.close()

        # Create joined table - drop table_name_temp
        new_clmns = []
        for idx, item in enumerate(headers):
            if (
                    idx > 1
            ):  # The downloaded layer contains two columns from the global table, which do not include them again
                new_column = "{table_name}.{item}".format(**{
                    'table_name': table_name_temp,
                    'item': item
                })
                new_clmns.append(new_column)

        added_columns = ', '.join(new_clmns)
        try:

            # Joined table
            sqlstr = "CREATE TABLE {new_table_name} AS (SELECT {geom_table_columns}, {added_columns} FROM {geom_table} INNER JOIN {temp_table} ON (g.{id} = {temp_table}.{id}));".format(
                **{
                    'new_table_name': new_table,
                    'geom_table': geom_table_name,
                    'geom_table_columns': geom_table_columns,
                    'temp_table': table_name_temp,
                    'id': geom_table_id,
                    'added_columns': added_columns
                })
            cur.execute(sqlstr)
            conn.commit()
            sqlstr = "ALTER TABLE IF EXISTS {new_table_name} ADD COLUMN fid SERIAL PRIMARY KEY;".format(
                **{'new_table_name': new_table})
            cur.execute(sqlstr)
            conn.commit()

            sqlstr = "CREATE INDEX indx_{new_table_name} ON {new_table_name} USING btree({id});".format(
                **{
                    'new_table_name': new_table,
                    'id': geom_table_id,
                })
            cur.execute(sqlstr)
            conn.commit()
            sqlstr = "CREATE INDEX indx_geom_{new_table_name} ON {new_table_name} USING GIST({geom});".format(
                **{
                    'new_table_name': new_table,
                    'geom': geom_table_geom,
                })
            cur.execute(sqlstr)
            conn.commit()

        except:
            print "Failed to create joined table."
            logger.error("Failed to create joined table.")

        try:
            sqlstr = "DROP TABLE IF EXISTS {temp_table} CASCADE;".format(
                **{'temp_table': table_name_temp})
            cur.execute(sqlstr)
            conn.commit()
        except:
            logger.error("Failed to drop temporary table.")
        conn.close()

        status_code = 200
        errormsgs_val = ''
        return errormsgs_val, status_code
Exemplo n.º 28
0
def process_csv_file(data_table,
                     is_dataverse_db,
                     delimiter=",",
                     no_header_row=False,
                     force_char_column=None):
    """
    Transform csv file and add it to the postgres DataStore

    :param instance:
    :param delimiter:
    :param no_header_row:
    :return:
        success:  (datatable, None)
        err:    (None, error message)
    """
    assert isinstance(data_table,
                      DataTable), "instance must be a DataTable object"

    # full path to csv file
    #
    csv_filename = data_table.uploaded_file.path

    # Standardize table_name for the DataTable
    #
    if data_table.id is not None:
        # This DataTable already has a unique name
        table_name = data_table.table_name
    else:
        # Get a unique name for the data table
        table_name = os.path.splitext(os.path.basename(csv_filename))[0]
        table_name = get_unique_tablename(table_name)

        data_table.table_name = table_name
        data_table.save()

    # -----------------------------------------------------
    # Transform csv file to csvkit Table
    # -----------------------------------------------------
    csv_file_handle = open(csv_filename, 'rb')

    try:
        csv_table = table.Table.from_csv(\
                                csv_file_handle,
                                name=table_name,
                                no_header_row=no_header_row,
                                delimiter=delimiter)
    except:
        data_table.delete()
        err_msg = str(sys.exc_info()[0])
        LOGGER.error('Failed to convert csv file to table.  Error: %s'\
                , err_msg)
        return None, err_msg
    #csv_file = File(f)
    csv_file_handle.close()

    # -----------------------------------------------------
    # If needed, force a column to be character
    # -----------------------------------------------------
    #for col in csv_table:
    #    print 'PRE col: %s, %s' % (col.name, col.type)
    csv_table = force_csv_column_tochar(csv_table,\
                    force_char_column)

    #for col in csv_table:
    #    print 'POST col: %s, %s' % (col.name, col.type)

    # -----------------------------------------------------
    # Create DataTableAttribute objects
    # -----------------------------------------------------
    try:
        # Iterate through header row
        #
        for column in csv_table:
            # Standardize column name
            #
            column.name = standardize_column_name(column.name)

            # Create DataTableAttribute object
            #
            is_visible = True
            if column.name == '_unnamed':
                is_visible = False

            attribute, created = DataTableAttribute.objects.get_or_create(\
                    datatable=data_table,
                    attribute=column.name,
                    attribute_label=column.name,
                    attribute_type=column.type.__name__,
                    display_order=column.order,
                    visible=is_visible)
    except:
        data_table.delete(
        )  # Deleting DataTable also deletes related DataTableAttribute objects
        err_msg = 'Failed to convert csv file to table.  Error: %s' % str(
            sys.exc_info()[0])
        LOGGER.error(err_msg)
        return None, err_msg

    msg('process_csv_file 3')
    # -----------------------------------------------------
    # Generate SQL to create table from csv file
    # -----------------------------------------------------
    try:
        sql_table = sql.make_table(csv_table, table_name)
        create_table_sql = sql.make_create_table_statement(
            sql_table, dialect="postgresql")
        data_table.create_table_sql = create_table_sql
        data_table.save()
    except:
        data_table.delete()
        err_msg = 'Generate SQL to create table from csv file.  Error: %s' % str(
            sys.exc_info()[0])
        LOGGER.error(err_msg)
        return None, err_msg

    msg('process_csv_file 4')

    # -----------------------------------------------------
    # Execute the SQL and Create the Table (No data is loaded)
    # -----------------------------------------------------
    conn = psycopg2.connect(
        get_datastore_connection_string(is_dataverse_db=is_dataverse_db))

    try:
        cur = conn.cursor()
        cur.execute('drop table if exists %s CASCADE;' % table_name)
        cur.execute(create_table_sql)
        conn.commit()
        cur.close()
    except Exception as e:
        traceback.print_exc(sys.exc_info())
        err_msg = "Error Creating table %s:%s" % (data_table.name, str(e))
        LOGGER.error(err_msg)
        return None, err_msg

    finally:
        conn.close()

    # -----------------------------------------------------
    # Copy Data to postgres csv data to Postgres
    # -----------------------------------------------------
    connection_string = get_datastore_connection_string(\
                                        url_format=True,
                                        is_dataverse_db=is_dataverse_db)
    try:
        engine, metadata = sql.get_connection(connection_string)
    except ImportError:
        err_msg = ("Failed to get SQL connection"
                   "for copying csv data to database."
                   "\n{0}".format(str(sys.exc_info()[0])))
        LOGGER.error(err_msg)
        return None, err_msg

    # -----------------------------------------------------
    # Iterate through rows and add data
    # -----------------------------------------------------
    conn = engine.connect()
    trans = conn.begin()

    if csv_table.count_rows() > 0:
        insert = sql_table.insert()  # Generate insert statement
        headers = csv_table.headers()  # Pull table headers
        try:
            # create rows of { column : value } dict's
            #
            rows_to_add = [
                dict(zip(headers, row)) for row in csv_table.to_rows()
            ]

            # Add rows
            conn.execute(insert, rows_to_add)
        except:
            # Clean up after ourselves
            conn.close()
            csv_file_handle.close()
            instance.delete()
            err_msg = "Failed to add csv DATA to table %s.\n%s" %\
                        (table_name, (sys.exc_info()[0]))
            LOGGER.error(err_msg)
            return None, err_msg

    # Commit new rows and close connection
    #
    trans.commit()
    conn.close()
    csv_file_handle.close()

    return data_table, ""
Exemplo n.º 29
0
 def test_make_create_table_statement_with_dialects(self):
     for dialect in sql.DIALECTS:
         sql_table = sql.make_table(self.csv_table, 'csvsql', db_schema='test_schema')
         statement = sql.make_create_table_statement(sql_table, dialect)
Exemplo n.º 30
0
def csv_to_table2():
    # --------------------------------------
    msgt('(3) csvkit to table reformat')
    # --------------------------------------
    fh = open(t4_out, 'rb')
    csv_args = dict(delimiter='\t',\
                    quotechar='"')
    csv_table = table.Table.from_csv(f=fh,\
                            name='tname',\
                            snifflimit=None,\
                            )
    print [c.name for c in csv_table]


    last_col = csv_table[-1]
    last_col.type = unicode

    for idx, val in enumerate(last_col):
        last_col[idx] = '%s' % val
    #last_col = ['%s' % x for x in last_col]
    #print last_col[0]

    msg(csv_table.to_rows())

    print [ '%s, %s' % (c.name, c.type) for c in csv_table]

    return

    print 'last_col', last_col.order
    col_num = len(csv_table)
    print 'col_num', col_num

    quoted_data = [u'"%s"' % val for val in last_col]
    print 'quoted_data', quoted_data
    #return

    new_column = table.Column(order=last_col.order,\
                            name=last_col.name,\
                            l=quoted_data,\
                            #normal_type=None,\
                            )
                            #normal_type=None)

    csv_table.pop(-1)



    csv_table.append(new_column)

    sql_table = csvkit_sql.make_table(csv_table, 'new_table')
    create_table_sql = csvkit_sql.make_create_table_statement(sql_table, dialect="postgresql")
    msg('create_table_sql: %s' % create_table_sql)

    msg(csv_table.to_rows())

    return
    msgt('new_column')
    msg(new_column)
    print new_column.name
    for val in new_column: print val
    #print len(new_column)

    """
Exemplo n.º 31
0
    #return

    new_column = table.Column(order=last_col.order,\
                            name=last_col.name,\
                            l=quoted_data,\
                            #normal_type=None,\
                            )
                            #normal_type=None)

    csv_table.pop(-1)



    csv_table.append(new_column)

    sql_table = csvkit_sql.make_table(csv_table, 'new_table')
    create_table_sql = csvkit_sql.make_create_table_statement(sql_table, dialect="postgresql")
    msg('create_table_sql: %s' % create_table_sql)

    msg(csv_table.to_rows())

    return
    msgt('new_column')
    msg(new_column)
    print new_column.name
    for val in new_column: print val
    #print len(new_column)

    """
    print csv_table.columns
    for col in csv_table:
 report_scraper = ReportScraper(url_pattern=report_pattern)
 report_scraper.cache_storage = scrapelib.cache.FileCache(cache_dir)
 report_scraper.cache_write_only = False
 conn = sqlite3.connect(DB_NAME)
 c = conn.cursor()
 for comm_url in comm_urls:
     for report_data in report_scraper.scrape_one(comm_url):
         comm_id = parse_qs(urlparse(comm_url).query)['id'][0]
         report_data['committee_id'] = comm_id
         outp = StringIO()
         writer = UnicodeCSVDictWriter(outp, fieldnames=report_data.keys())
         writer.writeheader()
         writer.writerow(report_data)
         outp.seek(0)
         t = Table.from_csv(outp, name='reports')
         sql_table = make_table(t)
         try:
             c.execute('select * from reports limit 1')
         except sqlite3.OperationalError:
             create_st = make_create_table_statement(sql_table)
             c.execute(create_st)
             conn.commit()
         c.execute('select * from reports where id = ?',
                   (int(report_data['id']), ))
         existing = c.fetchall()
         if not existing:
             insert = sql_table.insert()
             headers = t.headers()
             rows = [dict(zip(headers, row)) for row in t.to_rows()]
             for row in rows:
                 c.execute(str(insert), row)