示例#1
0
    def create_table(self):
        """
        (2) Make the Postgres table
        """
        if self.err_found:
            return False

        create_table_sql = sql.make_create_table_statement(self.sql_table,\
                                            dialect="postgresql")

        conn = psycopg2.connect(get_db_connection_string())

        try:
            cur = conn.cursor()
            cur.execute('drop table if exists %s CASCADE;' % self.table_name)
            cur.execute(create_table_sql)
            conn.commit()
            cur.close()
        except Exception as e:
            self.add_error("Error Creating table %s" % (str(e)))
            return False
        finally:
            conn.close()

        print 'Table created %s' % self.table_name
        return True
示例#2
0
def makeRawTable(contents):
    inp = StringIO(contents)
    reader = UnicodeCSVReader(inp)
    header = reader.next()
    header = [slugify(h) for h in header]
    outp = StringIO()
    writer = UnicodeCSVWriter(outp)
    writer.writerow(header)
    writer.writerows([[preProcess(unicode(i)) for i in r] for r in reader])
    outp.seek(0)
    conn = sqlite3.connect(':memory:')
    t = Table.from_csv(outp, 
                       name='raw_table', 
                       blanks_as_nulls=False, 
                       infer_types=False)
    sql_table = make_table(t)
    create_st = make_create_table_statement(sql_table)
    parts = create_st.split('raw_table (')
    create_st = '{0} raw_table ( record_id INTEGER PRIMARY KEY,{1}'.format(*parts)
    insert = sql_table.insert()
    curs = conn.cursor()
    curs.execute(create_st)
    rows = [dict(zip(header, row)) for row in t.to_rows()]
    for row in rows:
        curs.execute(str(insert), row)
    dump = StringIO()
    for line in conn.iterdump():
        dump.write(unidecode(line))
    dump.seek(0)
    return dump.getvalue(), header
示例#3
0
    def main(self):
        # Ensure we're handling a list, even if it's just one file
        if not isinstance(self.args.files, list):
            self.args.files = [self.args.files]
        else:
            if self.args.table_name:
                self.argparser.error('The --table argument is only allowed when specifying a single file.')

        for f in self.args.files:
            if self.args.table_name:
                table_name = self.args.table_name
            elif f != sys.stdin:
                # Use filename as table name
                table_name = os.path.splitext(os.path.split(f.name)[1])[0]
            else:
                self.argparser.error('The --table argument is required when providing data over STDIN.')

            if self.args.dialect and self.args.connection_string:
                self.argparser.error('The --dialect option is only valid when --db is not specified.')

            if self.args.insert and not self.args.connection_string:
                self.argparser.error('The --insert option is only valid when --db is also specified.')

            if self.args.no_create and not self.args.insert:
                self.argparser.error('The --no-create option is only valid --insert is also specified.')

            csv_table = table.Table.from_csv(f, name=table_name, snifflimit=self.args.snifflimit, blanks_as_nulls=(not self.args.blanks), **self.reader_kwargs)

            f.close()

            # Direct connections to database
            if self.args.connection_string:
                try:
                    engine, metadata = sql.get_connection(self.args.connection_string)
                except ImportError:
                    raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')

                sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints, metadata)

                if not self.args.no_create:
                    sql_table.create()

                if self.args.insert:
                    insert = sql_table.insert()
                    headers = csv_table.headers()

                    conn = engine.connect()
                    trans = conn.begin()
                    for row in csv_table.to_rows():
                        conn.execute(insert, [dict(zip(headers, row)), ])
                    trans.commit()
                    conn.close()

            # Writing to file
            else:
                sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints)
                self.output_file.write((u'%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)).encode('utf-8'))
示例#4
0
    def main(self):
        if self.args.table_name:
            table_name = self.args.table_name
        elif self.args.file != sys.stdin:
            # Use filename as table name
            table_name = os.path.splitext(
                os.path.split(self.args.file.name)[1])[0]
        else:
            self.argparser.error(
                'The --table argument is required when providing data over STDIN.'
            )

        if self.args.loosey:
            loosey = True

        if self.args.dialect and self.args.connection_string:
            self.argparser.error(
                'The --dialect option is only valid when --db is not specified.'
            )

        if self.args.insert and not self.args.connection_string:
            self.argparser.error(
                'The --insert option is only valid when --db is also specified.'
            )

        csv_table = table.Table.from_csv(self.args.file,
                                         name=table_name,
                                         snifflimit=self.args.snifflimit,
                                         **self.reader_kwargs)

        # Direct connections to database
        if self.args.connection_string:
            try:
                engine, metadata = sql.get_connection(
                    self.args.connection_string)
            except ImportError:
                raise ImportError(
                    'You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n'
                )

            sql_table = sql.make_table(csv_table, table_name, loosey, metadata)
            sql_table.create()

            if self.args.insert:
                insert = sql_table.insert()
                headers = csv_table.headers()

                for row in csv_table.to_rows(serialize_dates=True):
                    engine.execute(insert, [
                        dict(zip(headers, row)),
                    ])

        # Writing to file
        else:
            sql_table = sql.make_table(csv_table, table_name, loosey)
            self.output_file.write((u'%s\n' % sql.make_create_table_statement(
                sql_table, dialect=self.args.dialect)).encode('utf-8'))
示例#5
0
    def test_make_create_table_statement(self):
        sql_table = sql.make_table(self.csv_table, 'csvsql')
        statement = sql.make_create_table_statement(sql_table)

        self.assertEqual(statement,
                         u"""CREATE TABLE test_table (
\ttext VARCHAR(17) NOT NULL, 
\tinteger INTEGER, 
\tdatetime DATETIME, 
\tempty_column VARCHAR(32)
);""")
示例#6
0
    def test_make_create_table_statement_no_constraints(self):
        sql_table = sql.make_table(self.csv_table, 'csvsql', True)
        statement = sql.make_create_table_statement(sql_table)

        self.assertEqual(statement,
                         u"""CREATE TABLE test_table (
\ttext VARCHAR, 
\tinteger INTEGER, 
\tdatetime DATETIME, 
\tempty_column VARCHAR
);""")
示例#7
0
    def test_make_create_table_statement(self):
        sql_table = sql.make_table(self.csv_table, 'csvsql')
        statement = sql.make_create_table_statement(sql_table)

        self.assertEqual(
            statement, u"""CREATE TABLE test_table (
\ttext VARCHAR(17) NOT NULL, 
\tinteger INTEGER, 
\tdatetime DATETIME, 
\tempty_column VARCHAR(32)
);""")
def make_db(fname, tblname):
    conn = sqlite3.connect(':memory:')
    t = Table.from_csv(open(fname, 'rb'), name=tblname)
    sql_table = make_table(t)
    create_st = make_create_table_statement(sql_table)
    print create_st
    insert = sql_table.insert()
    curs = conn.cursor()
    curs.execute(create_st)
    headers = t.headers()
    print headers
    rows = [dict(zip(headers, row)) for row in t.to_rows()]
    for row in rows:
        curs.execute(str(insert), row)
    return curs
def make_db(fname, tblname):
    conn = sqlite3.connect(':memory:')
    t = Table.from_csv(open(fname, 'rb'), name=tblname)
    sql_table = make_table(t)
    create_st = make_create_table_statement(sql_table)
    print create_st
    insert = sql_table.insert()
    curs = conn.cursor()
    curs.execute(create_st)
    headers = t.headers()
    print headers
    rows = [dict(zip(headers, row)) for row in t.to_rows()]
    for row in rows:
        curs.execute(str(insert), row)
    return curs
示例#10
0
 def main(self):
     tabname = os.path.splitext(
         os.path.basename(self.args.file._lazy_args[0]))[0]
     tab = table.Table.from_csv(self.args.file,
                                name=tabname,
                                **self.reader_kwargs)
     stmt = make_create_table_statement(make_table(tab), dialect='sqlite')
     conn = sqlite3.connect(':memory:')
     c = conn.cursor()
     c.execute(stmt)
     for row in tab.to_rows():
         vals = ','.join(['?'] * len(row))
         prepared = "INSERT INTO %s VALUES(%s)" % (tab.name, vals)
         c.execute(prepared, row)
     output = CSVKitWriter(self.output_file, **self.writer_kwargs)
     for row in c.execute(self.args.query):
         output.writerow(row)
示例#11
0
    def main(self):
        if self.args.table_name:
            table_name = self.args.table_name
        elif self.args.file != sys.stdin:
            # Use filename as table name
            table_name = os.path.splitext(os.path.split(self.args.file.name)[1])[0]
        else:
            self.argparser.error('The --table argument is required when providing data over STDIN.')

        if self.args.loosey:
            loosey = True

        if self.args.dialect and self.args.connection_string:
            self.argparser.error('The --dialect option is only valid when --db is not specified.')

        if self.args.insert and not self.args.connection_string:
            self.argparser.error('The --insert option is only valid when --db is also specified.')

        csv_table = table.Table.from_csv(self.args.file, name=table_name, snifflimit=self.args.snifflimit, **self.reader_kwargs)

        # Direct connections to database
        if self.args.connection_string:
            try:
                engine, metadata = sql.get_connection(self.args.connection_string)
            except ImportError:
                raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')

            sql_table = sql.make_table(csv_table, table_name, loosey, metadata)
            sql_table.create()

            if self.args.insert:
                insert = sql_table.insert()
                headers = csv_table.headers()

                for row in csv_table.to_rows(serialize_dates=True):
                    engine.execute(insert, [dict(zip(headers, row)), ])

        # Writing to file
        else:
            sql_table = sql.make_table(csv_table, table_name, loosey)
            self.output_file.write((u'%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)).encode('utf-8'))
示例#12
0
def csv_to_table():
    # --------------------------------------
    msgt('(2) csvkit to table')
    # --------------------------------------
    fh = open(t4_out, 'rb')
    csv_args = dict(delimiter='\t',\
                    quotechar='"')
    print 'QUOTE_NONE', QUOTE_NONE
    csv_table = table.Table.from_csv(f=fh,\
                            name='tname',\
                            snifflimit=None,\
                            #quoting=QUOTE_NONNUMERIC,\
        #                    **csv_args\
                            )
    for col in csv_table:
        msg('%s, %s' % (col.name, col.type))

    sql_table = csvkit_sql.make_table(csv_table, 'new_table')
    create_table_sql = csvkit_sql.make_create_table_statement(sql_table, dialect="postgresql")
    msg('create_table_sql: %s' % create_table_sql)
    msg(csv_table.to_rows())
示例#13
0
    def main(self):
        if self.args.file.name != "<stdin>":
            # Use filename as table name
            table_name = os.path.splitext(os.path.split(self.args.file.name)[1])[0]
        else:
            table_name = "csvsql_table"

        csv_table = table.Table.from_csv(
            self.args.file, name=table_name, snifflimit=self.args.snifflimit, **self.reader_kwargs
        )
        sql_table = sql.make_table(csv_table)

        self.output_file.write(
            (u"%s\n" % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)).encode("utf-8")
        )

        if self.args.inserts:
            self.output_file.write("\n")
            for row in csv_table.to_rows(serialize_dates=True):
                self.output_file.write(
                    (u"%s\n" % sql.make_insert_statement(sql_table, row, dialect=self.args.dialect)).encode("utf-8")
                )
 c = conn.cursor()
 for comm_url in comm_urls:
     for report_data in report_scraper.scrape_one(comm_url):
         comm_id = parse_qs(urlparse(comm_url).query)['id'][0]
         report_data['committee_id'] = comm_id
         outp = StringIO()
         writer = UnicodeCSVDictWriter(outp, fieldnames=report_data.keys())
         writer.writeheader()
         writer.writerow(report_data)
         outp.seek(0)
         t = Table.from_csv(outp, name='reports')
         sql_table = make_table(t)
         try:
             c.execute('select * from reports limit 1')
         except sqlite3.OperationalError:
             create_st = make_create_table_statement(sql_table)
             c.execute(create_st)
             conn.commit()
         c.execute('select * from reports where id = ?',
                   (int(report_data['id']), ))
         existing = c.fetchall()
         if not existing:
             insert = sql_table.insert()
             headers = t.headers()
             rows = [dict(zip(headers, row)) for row in t.to_rows()]
             for row in rows:
                 c.execute(str(insert), row)
             conn.commit()
         else:
             print 'Already saved report %s' % report_data['detail_url']
 c.execute('select date_filed from reports order by date_filed limit 1')
示例#15
0
文件: seeder.py 项目: yilab/dbt
 def create_table(self, cursor, schema, table, virtual_table):
     sql_table = csv_sql.make_table(virtual_table, db_schema=schema)
     create_table_sql = csv_sql.make_create_table_statement(
         sql_table, dialect='postgresql')
     logger.info("Creating table {}.{}".format(schema, table))
     cursor.execute(create_table_sql)
示例#16
0
文件: csvsql.py 项目: rlugojr/csvkit
    def main(self):
        connection_string = self.args.connection_string
        do_insert = self.args.insert
        query = self.args.query

        self.input_files = []

        for path in self.args.input_paths:
            self.input_files.append(self._open_input_file(path))

        if self.args.table_names:
            table_names = self.args.table_names.split(',')
        else:
            table_names = []

        # Create an SQLite database in memory if no connection string is specified
        if query and not connection_string:
            connection_string = "sqlite:///:memory:"
            do_insert = True

        if self.args.dialect and connection_string:
            self.argparser.error('The --dialect option is only valid when --db is not specified.')

        if do_insert and not connection_string:
            self.argparser.error('The --insert option is only valid when --db is also specified.')

        if self.args.no_create and not do_insert:
            self.argparser.error('The --no-create option is only valid --insert is also specified.')

        # Establish database validity before reading CSV files
        if connection_string:
            try:
                engine, metadata = sql.get_connection(connection_string)
            except ImportError:
                raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')
            conn = engine.connect()
            trans = conn.begin()

        for f in self.input_files:
            try:
                # Try to use name specified via --table
                table_name = table_names.pop(0)
            except IndexError:
                if f == sys.stdin:
                    table_name = "stdin"
                else:
                    # Use filename as table name
                    table_name = os.path.splitext(os.path.split(f.name)[1])[0]

            csv_table = table.Table.from_csv(
                f,
                name=table_name,
                sniff_limit=self.args.sniff_limit,
                blanks_as_nulls=(not self.args.blanks),
                infer_types=(not self.args.no_inference),
                no_header_row=self.args.no_header_row,
                **self.reader_kwargs
            )

            f.close()

            if csv_table:
                if connection_string:
                    sql_table = sql.make_table(
                        csv_table,
                        table_name,
                        self.args.no_constraints,
                        self.args.db_schema,
                        metadata
                    )

                    # Create table
                    if not self.args.no_create:
                        sql_table.create()

                    # Insert data
                    if do_insert and csv_table.count_rows() > 0:
                        insert = sql_table.insert()
                        headers = csv_table.headers()
                        conn.execute(insert, [dict(zip(headers, row)) for row in csv_table.to_rows()])

                # Output SQL statements
                else:
                    sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints)
                    self.output_file.write('%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect))

        if connection_string:
            if query:
                # Execute specified SQL queries
                queries = query.split(';')
                rows = None

                for q in queries:
                    if q:
                        rows = conn.execute(q)

                # Output result of last query as CSV
                try:
                    output = agate.csv.writer(self.output_file, **self.writer_kwargs)
                    if not self.args.no_header_row:
                        output.writerow(rows._metadata.keys)
                    for row in rows:
                        output.writerow(row)
                except AttributeError:
                    pass

            trans.commit()
            conn.close()
示例#17
0
文件: csvsql.py 项目: higs4281/csvkit
    def main(self):
        # Ensure we're handling a list, even if it's just one file
        if not isinstance(self.args.files, list):
            self.args.files = [self.args.files]
        else:
            if self.args.table_name:
                self.argparser.error(
                    'The --table argument is only allowed when specifying a single file.'
                )

        for f in self.args.files:
            if self.args.table_name:
                table_name = self.args.table_name
            elif f != sys.stdin:
                # Use filename as table name
                table_name = os.path.splitext(os.path.split(f.name)[1])[0]
            else:
                self.argparser.error(
                    'The --table argument is required when providing data over STDIN.'
                )

            if self.args.dialect and self.args.connection_string:
                self.argparser.error(
                    'The --dialect option is only valid when --db is not specified.'
                )

            if self.args.insert and not self.args.connection_string:
                self.argparser.error(
                    'The --insert option is only valid when --db is also specified.'
                )

            if self.args.no_create and not self.args.insert:
                self.argparser.error(
                    'The --no-create option is only valid --insert is also specified.'
                )

            csv_table = table.Table.from_csv(
                f,
                name=table_name,
                snifflimit=self.args.snifflimit,
                blanks_as_nulls=(not self.args.blanks),
                **self.reader_kwargs)

            f.close()

            # Direct connections to database
            if self.args.connection_string:
                try:
                    engine, metadata = sql.get_connection(
                        self.args.connection_string)
                except ImportError:
                    raise ImportError(
                        'You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n'
                    )

                sql_table = sql.make_table(csv_table, table_name,
                                           self.args.no_constraints, metadata)

                if not self.args.no_create:
                    sql_table.create()

                if self.args.insert:
                    insert = sql_table.insert()
                    headers = csv_table.headers()

                    conn = engine.connect()
                    trans = conn.begin()
                    for row in csv_table.to_rows():
                        conn.execute(insert, [
                            dict(zip(headers, row)),
                        ])
                    trans.commit()
                    conn.close()

            # Writing to file
            else:
                sql_table = sql.make_table(csv_table, table_name,
                                           self.args.no_constraints)
                self.output_file.write(
                    (u'%s\n' % sql.make_create_table_statement(
                        sql_table, dialect=self.args.dialect)).encode('utf-8'))
 c = conn.cursor()
 for comm_url in comm_urls:
     for report_data in report_scraper.scrape_one(comm_url):
         comm_id = parse_qs(urlparse(comm_url).query)['id'][0]
         report_data['committee_id'] = comm_id
         outp = StringIO()
         writer = UnicodeCSVDictWriter(outp, fieldnames=report_data.keys())
         writer.writeheader()
         writer.writerow(report_data)
         outp.seek(0)
         t = Table.from_csv(outp, name='reports')
         sql_table = make_table(t)
         try:
             c.execute('select * from reports limit 1')
         except sqlite3.OperationalError:
             create_st = make_create_table_statement(sql_table)
             c.execute(create_st)
             conn.commit()
         c.execute('select * from reports where id = ?', (int(report_data['id']),))
         existing = c.fetchall()
         if not existing:
             insert = sql_table.insert()
             headers = t.headers()
             rows = [dict(zip(headers, row)) for row in t.to_rows()]
             for row in rows:
                 c.execute(str(insert), row)
             conn.commit()
         else:
             print 'Already saved report %s' % report_data['detail_url']
 c.execute('select date_filed from reports order by date_filed limit 1')
 oldest_year = parser.parse(c.fetchone()[0]).year
示例#19
0
    def main(self):
        connection_string = self.args.connection_string
        do_insert = self.args.insert
        query = self.args.query

        self.input_files = []

        for path in self.args.input_paths:
            self.input_files.append(self._open_input_file(path))

        if self.args.table_names:
            table_names = self.args.table_names.split(',')
        else:
            table_names = []

        # If one or more filenames are specified, we need to add stdin ourselves (if available)
        if sys.stdin not in self.input_files:
            try:
                if not sys.stdin.isatty():
                    self.input_files.insert(0, sys.stdin)
            except:
                pass

        # Create an SQLite database in memory if no connection string is specified
        if query and not connection_string:
            connection_string = "sqlite:///:memory:"
            do_insert = True

        if self.args.dialect and connection_string:
            self.argparser.error('The --dialect option is only valid when --db is not specified.')

        if do_insert and not connection_string:
            self.argparser.error('The --insert option is only valid when --db is also specified.')

        if self.args.no_create and not do_insert:
            self.argparser.error('The --no-create option is only valid --insert is also specified.')

        # Establish database validity before reading CSV files
        if connection_string:
            try:
                engine, metadata = sql.get_connection(connection_string)
            except ImportError:
                raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')
            conn = engine.connect()
            trans = conn.begin()

        for f in self.input_files:
            try:
                # Try to use name specified via --table
                table_name = table_names.pop(0)
            except IndexError:
                if f == sys.stdin:
                    table_name = "stdin"
                else:
                    # Use filename as table name
                    table_name = os.path.splitext(os.path.split(f.name)[1])[0]

            csv_table = table.Table.from_csv(
                f,
                name=table_name,
                snifflimit=self.args.snifflimit,
                blanks_as_nulls=(not self.args.blanks),
                infer_types=(not self.args.no_inference),
                no_header_row=self.args.no_header_row,
                **self.reader_kwargs
            )

            f.close()

            if connection_string:
                sql_table = sql.make_table(
                    csv_table,
                    table_name,
                    self.args.no_constraints,
                    self.args.db_schema,
                    metadata
                )

                # Create table
                if not self.args.no_create:
                    sql_table.create()

                # Insert data
                if do_insert and csv_table.count_rows() > 0:
                    insert = sql_table.insert()
                    headers = csv_table.headers()
                    conn.execute(insert, [dict(zip(headers, row)) for row in csv_table.to_rows()])

            # Output SQL statements
            else:
                sql_table = sql.make_table(csv_table, table_name, self.args.no_constraints)
                self.output_file.write('%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect))

        if connection_string:
            if query:
                # Execute specified SQL queries
                queries = query.split(';')
                rows = None

                for q in queries:
                    if q:
                        rows = conn.execute(q)

                # Output result of last query as CSV
                try:
                    output = CSVKitWriter(self.output_file, **self.writer_kwargs)
                    if not self.args.no_header_row:
                        output.writerow(rows._metadata.keys)
                    for row in rows:
                        output.writerow(row)
                except AttributeError:
                    pass

            trans.commit()
            conn.close()
示例#20
0
def process_csv_file(data_table,
                     is_dataverse_db,
                     delimiter=",",
                     no_header_row=False,
                     force_char_column=None):
    """
    Transform csv file and add it to the postgres DataStore

    :param instance:
    :param delimiter:
    :param no_header_row:
    :return:
        success:  (datatable, None)
        err:    (None, error message)
    """
    assert isinstance(data_table,
                      DataTable), "instance must be a DataTable object"

    # full path to csv file
    #
    csv_filename = data_table.uploaded_file.path

    # Standardize table_name for the DataTable
    #
    if data_table.id is not None:
        # This DataTable already has a unique name
        table_name = data_table.table_name
    else:
        # Get a unique name for the data table
        table_name = os.path.splitext(os.path.basename(csv_filename))[0]
        table_name = get_unique_tablename(table_name)

        data_table.table_name = table_name
        data_table.save()

    # -----------------------------------------------------
    # Transform csv file to csvkit Table
    # -----------------------------------------------------
    csv_file_handle = open(csv_filename, 'rb')

    try:
        csv_table = table.Table.from_csv(\
                                csv_file_handle,
                                name=table_name,
                                no_header_row=no_header_row,
                                delimiter=delimiter)
    except:
        data_table.delete()
        err_msg = str(sys.exc_info()[0])
        LOGGER.error('Failed to convert csv file to table.  Error: %s'\
                , err_msg)
        return None, err_msg
    #csv_file = File(f)
    csv_file_handle.close()

    # -----------------------------------------------------
    # If needed, force a column to be character
    # -----------------------------------------------------
    #for col in csv_table:
    #    print 'PRE col: %s, %s' % (col.name, col.type)
    csv_table = force_csv_column_tochar(csv_table,\
                    force_char_column)

    #for col in csv_table:
    #    print 'POST col: %s, %s' % (col.name, col.type)

    # -----------------------------------------------------
    # Create DataTableAttribute objects
    # -----------------------------------------------------
    try:
        # Iterate through header row
        #
        for column in csv_table:
            # Standardize column name
            #
            column.name = standardize_column_name(column.name)

            # Create DataTableAttribute object
            #
            is_visible = True
            if column.name == '_unnamed':
                is_visible = False

            attribute, created = DataTableAttribute.objects.get_or_create(\
                    datatable=data_table,
                    attribute=column.name,
                    attribute_label=column.name,
                    attribute_type=column.type.__name__,
                    display_order=column.order,
                    visible=is_visible)
    except:
        data_table.delete(
        )  # Deleting DataTable also deletes related DataTableAttribute objects
        err_msg = 'Failed to convert csv file to table.  Error: %s' % str(
            sys.exc_info()[0])
        LOGGER.error(err_msg)
        return None, err_msg

    msg('process_csv_file 3')
    # -----------------------------------------------------
    # Generate SQL to create table from csv file
    # -----------------------------------------------------
    try:
        sql_table = sql.make_table(csv_table, table_name)
        create_table_sql = sql.make_create_table_statement(
            sql_table, dialect="postgresql")
        data_table.create_table_sql = create_table_sql
        data_table.save()
    except:
        data_table.delete()
        err_msg = 'Generate SQL to create table from csv file.  Error: %s' % str(
            sys.exc_info()[0])
        LOGGER.error(err_msg)
        return None, err_msg

    msg('process_csv_file 4')

    # -----------------------------------------------------
    # Execute the SQL and Create the Table (No data is loaded)
    # -----------------------------------------------------
    conn = psycopg2.connect(
        get_datastore_connection_string(is_dataverse_db=is_dataverse_db))

    try:
        cur = conn.cursor()
        cur.execute('drop table if exists %s CASCADE;' % table_name)
        cur.execute(create_table_sql)
        conn.commit()
        cur.close()
    except Exception as e:
        traceback.print_exc(sys.exc_info())
        err_msg = "Error Creating table %s:%s" % (data_table.name, str(e))
        LOGGER.error(err_msg)
        return None, err_msg

    finally:
        conn.close()

    # -----------------------------------------------------
    # Copy Data to postgres csv data to Postgres
    # -----------------------------------------------------
    connection_string = get_datastore_connection_string(\
                                        url_format=True,
                                        is_dataverse_db=is_dataverse_db)
    try:
        engine, metadata = sql.get_connection(connection_string)
    except ImportError:
        err_msg = ("Failed to get SQL connection"
                   "for copying csv data to database."
                   "\n{0}".format(str(sys.exc_info()[0])))
        LOGGER.error(err_msg)
        return None, err_msg

    # -----------------------------------------------------
    # Iterate through rows and add data
    # -----------------------------------------------------
    conn = engine.connect()
    trans = conn.begin()

    if csv_table.count_rows() > 0:
        insert = sql_table.insert()  # Generate insert statement
        headers = csv_table.headers()  # Pull table headers
        try:
            # create rows of { column : value } dict's
            #
            rows_to_add = [
                dict(zip(headers, row)) for row in csv_table.to_rows()
            ]

            # Add rows
            conn.execute(insert, rows_to_add)
        except:
            # Clean up after ourselves
            conn.close()
            csv_file_handle.close()
            instance.delete()
            err_msg = "Failed to add csv DATA to table %s.\n%s" %\
                        (table_name, (sys.exc_info()[0]))
            LOGGER.error(err_msg)
            return None, err_msg

    # Commit new rows and close connection
    #
    trans.commit()
    conn.close()
    csv_file_handle.close()

    return data_table, ""
示例#21
0
def process_csv_file(absolute_base_file, table_name_temp, new_table,
                     geom_table_name, geom_table_id, geom_table_columns,
                     geom_table_geom):
    # Create table based on CSV
    import csv
    f = open(absolute_base_file, 'rb')
    no_header_row = False

    with open(absolute_base_file, 'rb') as csvfile:
        # get the type of delimiter
        dialect = csv.Sniffer().sniff(csvfile.read())

    try:
        csv_table = table.Table.from_csv(f,
                                         name=table_name_temp,
                                         no_header_row=no_header_row,
                                         delimiter=dialect.delimiter)
    except:
        status_code = '400'
        errormsgs_val = "Failed to create the table from CSV."
        return errormsgs_val, status_code

    for idx, column in enumerate(csv_table):
        column.name = slugify(unicode(column.name)).replace('-', '_')
        # Check if the selected value from the dropdown menu matches the first value of the CSV header
        if idx == 0:
            print("column.name.strip()", column.name.strip())
            print("geom_table_id.strip()", geom_table_id.strip())
            if column.name.strip() != geom_table_id.strip():
                errormsgs_val = "The selected value of Layer Type doesn't match the one of the imported layer."
                status_code = '400'
                return errormsgs_val, status_code
    # Check if there are added columns in the CSV
    if idx < 2:
        errormsgs_val = "The CSV has no added columns. Please add extra columns."
        status_code = '400'
        return errormsgs_val, status_code
    else:
        try:
            sql_table = sql.make_table(csv_table, table_name_temp)
            create_table_sql = sql.make_create_table_statement(
                sql_table, dialect="postgresql")
            create_table_sql = re.sub(r'VARCHAR\([0-9]*\)', 'VARCHAR(254)',
                                      create_table_sql)
        except:
            return None, str(sys.exc_info()[0])

        constr = "dbname='{dbname}' user='******' host='{host}' password='******'".format(
            **{
                'dbname': settings.DATABASES['uploaded']['NAME'],
                'user': settings.DATABASES['uploaded']['USER'],
                'host': settings.DATABASES['uploaded']['HOST'],
                'password': settings.DATABASES['uploaded']['PASSWORD']
            })
        conn = psycopg2.connect(constr)

        try:
            # Check if there is already a table with the same name
            cur = conn.cursor()

            sqlstr = "SELECT EXISTS(SELECT * FROM information_schema.tables WHERE table_name='{new_table_name}');".format(
                **{'new_table_name': new_table})
            cur.execute(sqlstr)
            exists = cur.fetchone()[0]
            if exists:
                errormsgs_val = "There is already a layer with this name. Please choose another title."
                status_code = '400'
                return errormsgs_val, status_code

            #  If temporary table exists then drop it - the create it and add primary key
            cur.execute('DROP TABLE IF EXISTS %s CASCADE;' % table_name_temp)
            cur.execute(create_table_sql)
            conn.commit()
            sqlstr = "ALTER TABLE IF EXISTS {temp_table} ADD COLUMN fid SERIAL PRIMARY KEY;".format(
                **{'temp_table': table_name_temp})
            cur.execute(sqlstr)
            conn.commit()
        except Exception as e:
            logger.error("Error Creating Temporary table %s:%s",
                         table_name_temp, str(e))

        #  Copy data to table
        connection_string = "postgresql://%s:%s@%s:%s/%s" % (
            settings.DATABASES['uploaded']['USER'],
            settings.DATABASES['uploaded']['PASSWORD'],
            settings.DATABASES['uploaded']['HOST'],
            settings.DATABASES['uploaded']['PORT'],
            settings.DATABASES['uploaded']['NAME'])
        try:
            engine, metadata = sql.get_connection(connection_string)
        except ImportError:
            return None, str(sys.exc_info()[0])

        conn_eng = engine.connect()
        trans = conn_eng.begin()

        if csv_table.count_rows() > 0:
            insert = sql_table.insert()
            headers = csv_table.headers()
            try:
                conn_eng.execute(
                    insert,
                    [dict(zip(headers, row)) for row in csv_table.to_rows()])
            except:
                return None, str(sys.exc_info()[0])

        trans.commit()
        conn_eng.close()

        # Create joined table - drop table_name_temp
        new_clmns = []
        for idx, item in enumerate(headers):
            if (
                    idx > 1
            ):  # The downloaded layer contains two columns from the global table, which do not include them again
                new_column = "{table_name}.{item}".format(**{
                    'table_name': table_name_temp,
                    'item': item
                })
                new_clmns.append(new_column)

        added_columns = ', '.join(new_clmns)
        try:

            # Joined table
            sqlstr = "CREATE TABLE {new_table_name} AS (SELECT {geom_table_columns}, {added_columns} FROM {geom_table} INNER JOIN {temp_table} ON (g.{id} = {temp_table}.{id}));".format(
                **{
                    'new_table_name': new_table,
                    'geom_table': geom_table_name,
                    'geom_table_columns': geom_table_columns,
                    'temp_table': table_name_temp,
                    'id': geom_table_id,
                    'added_columns': added_columns
                })
            cur.execute(sqlstr)
            conn.commit()
            sqlstr = "ALTER TABLE IF EXISTS {new_table_name} ADD COLUMN fid SERIAL PRIMARY KEY;".format(
                **{'new_table_name': new_table})
            cur.execute(sqlstr)
            conn.commit()

            sqlstr = "CREATE INDEX indx_{new_table_name} ON {new_table_name} USING btree({id});".format(
                **{
                    'new_table_name': new_table,
                    'id': geom_table_id,
                })
            cur.execute(sqlstr)
            conn.commit()
            sqlstr = "CREATE INDEX indx_geom_{new_table_name} ON {new_table_name} USING GIST({geom});".format(
                **{
                    'new_table_name': new_table,
                    'geom': geom_table_geom,
                })
            cur.execute(sqlstr)
            conn.commit()

        except:
            print "Failed to create joined table."
            logger.error("Failed to create joined table.")

        try:
            sqlstr = "DROP TABLE IF EXISTS {temp_table} CASCADE;".format(
                **{'temp_table': table_name_temp})
            cur.execute(sqlstr)
            conn.commit()
        except:
            logger.error("Failed to drop temporary table.")
        conn.close()

        status_code = 200
        errormsgs_val = ''
        return errormsgs_val, status_code
示例#22
0
 def test_make_create_table_statement_with_dialects(self):
     for dialect in sql.DIALECTS:
         sql_table = sql.make_table(self.csv_table, 'csvsql', db_schema='test_schema')
         statement = sql.make_create_table_statement(sql_table, dialect)
示例#23
0
文件: seeder.py 项目: cmcarthur/dbt
 def create_table(self, cursor, schema, table, virtual_table):
     sql_table = csv_sql.make_table(virtual_table, db_schema=schema)
     create_table_sql = csv_sql.make_create_table_statement(sql_table, dialect='postgresql')
     print("Creating table {}.{}".format(schema, table))
     cursor.execute(create_table_sql)
示例#24
0
def csv_to_table2():
    # --------------------------------------
    msgt('(3) csvkit to table reformat')
    # --------------------------------------
    fh = open(t4_out, 'rb')
    csv_args = dict(delimiter='\t',\
                    quotechar='"')
    csv_table = table.Table.from_csv(f=fh,\
                            name='tname',\
                            snifflimit=None,\
                            )
    print [c.name for c in csv_table]


    last_col = csv_table[-1]
    last_col.type = unicode

    for idx, val in enumerate(last_col):
        last_col[idx] = '%s' % val
    #last_col = ['%s' % x for x in last_col]
    #print last_col[0]

    msg(csv_table.to_rows())

    print [ '%s, %s' % (c.name, c.type) for c in csv_table]

    return

    print 'last_col', last_col.order
    col_num = len(csv_table)
    print 'col_num', col_num

    quoted_data = [u'"%s"' % val for val in last_col]
    print 'quoted_data', quoted_data
    #return

    new_column = table.Column(order=last_col.order,\
                            name=last_col.name,\
                            l=quoted_data,\
                            #normal_type=None,\
                            )
                            #normal_type=None)

    csv_table.pop(-1)



    csv_table.append(new_column)

    sql_table = csvkit_sql.make_table(csv_table, 'new_table')
    create_table_sql = csvkit_sql.make_create_table_statement(sql_table, dialect="postgresql")
    msg('create_table_sql: %s' % create_table_sql)

    msg(csv_table.to_rows())

    return
    msgt('new_column')
    msg(new_column)
    print new_column.name
    for val in new_column: print val
    #print len(new_column)

    """
示例#25
0
 def test_make_create_table_statement_with_dialects(self):
     for dialect in sql.DIALECTS:
         sql_table = sql.make_table(self.csv_table,
                                    'csvsql',
                                    db_schema='test_schema')
         statement = sql.make_create_table_statement(sql_table, dialect)
示例#26
0
    new_column = table.Column(order=last_col.order,\
                            name=last_col.name,\
                            l=quoted_data,\
                            #normal_type=None,\
                            )
                            #normal_type=None)

    csv_table.pop(-1)



    csv_table.append(new_column)

    sql_table = csvkit_sql.make_table(csv_table, 'new_table')
    create_table_sql = csvkit_sql.make_create_table_statement(sql_table, dialect="postgresql")
    msg('create_table_sql: %s' % create_table_sql)

    msg(csv_table.to_rows())

    return
    msgt('new_column')
    msg(new_column)
    print new_column.name
    for val in new_column: print val
    #print len(new_column)

    """
    print csv_table.columns
    for col in csv_table:
        msg('%s, %s' % (col.name, col.type))