def insert_data_from_file(self, filename): """Use PostgreSQL's "COPY FROM" statement to perform a bulk insert.""" self.get_cursor() ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (([ self.table.cleanup.function, self.table.delimiter, self.table.header_rows ] == [no_cleanup, ",", 1]) and not self.table.fixed_width and not ct and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)): columns = self.table.get_insert_columns() filename = os.path.abspath(filename) statement = """ COPY """ + self.table_name() + " (" + columns + """) FROM '""" + filename.replace("\\", "\\\\") + """' WITH DELIMITER ',' CSV HEADER;""" try: self.execute("BEGIN") self.execute(statement) self.execute("COMMIT") except: self.connection.rollback() return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Use PostgreSQL's "COPY FROM" statement to perform a bulk insert.""" self.get_cursor() ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (([self.table.cleanup.function, self.table.delimiter, self.table.header_rows] == [no_cleanup, ",", 1]) and not self.table.fixed_width and not ct and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)): columns = self.table.get_insert_columns() filename = os.path.abspath(filename) statement = """ COPY """ + self.table_name() + " (" + columns + """) FROM '""" + filename.replace("\\", "\\\\") + """' WITH DELIMITER ',' CSV HEADER;""" try: self.execute("BEGIN") self.execute(statement) self.execute("COMMIT") except BaseException: self.connection.rollback() return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Calls MySQL "LOAD DATA LOCAL INFILE" statement to perform a bulk insert.""" self.get_cursor() ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (self.table.cleanup.function == no_cleanup and not self.table.fixed_width and not ct and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert) ): print ("Inserting data from " + os.path.basename(filename) + "...") columns = self.table.get_insert_columns() statement = """ LOAD DATA LOCAL INFILE '""" + filename.replace("\\", "\\\\") + """' INTO TABLE """ + self.table_name() + """ FIELDS TERMINATED BY '""" + self.table.delimiter + """' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\\n' IGNORE """ + str(self.table.header_rows) + """ LINES (""" + columns + ")" try: self.cursor.execute(statement) except Exception as e: print "Failed bulk insert (%s), inserting manually" % e self.disconnect() # If the execute fails the database connection can get hung up return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Use executemany to perform a high speed bulk insert Checks to see if a given file can be bulk inserted, and if so loads it in chunks and inserts those chunks into the database using executemany. """ CHUNK_SIZE = 1000000 self.get_cursor() ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (([self.table.cleanup.function, self.table.header_rows] == [no_cleanup, 1]) and not self.table.fixed_width and not ct and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert) ): columns = self.table.get_insert_columns() filename = os.path.abspath(filename) try: bulk_insert_statement = self.get_bulk_insert_statement() with open(filename, 'r') as data_file: data_chunk = data_file.readlines(CHUNK_SIZE) del(data_chunk[:self.table.header_rows]) while data_chunk: data_chunk_split = [row.split(self.table.delimiter) for row in data_chunk] self.cursor.executemany(bulk_insert_statement, data_chunk_split) data_chunk = data_file.readlines(CHUNK_SIZE) self.connection.commit() except: self.connection.rollback() return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Calls MySQL "LOAD DATA LOCAL INFILE" statement to perform a bulk insert.""" self.get_cursor() ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (self.table.cleanup.function == no_cleanup and not self.table.fixed_width and not ct and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)): print("Inserting data from " + os.path.basename(filename) + "...") columns = self.table.get_insert_columns() statement = """ LOAD DATA LOCAL INFILE '""" + filename.replace("\\", "\\\\") + """' INTO TABLE """ + self.table_name() + """ FIELDS TERMINATED BY '""" + self.table.delimiter + """' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\\n' IGNORE """ + str(self.table.header_rows) + """ LINES (""" + columns + ")" try: self.cursor.execute(statement) except Exception as e: print "Failed bulk insert (%s), inserting manually" % e self.disconnect( ) # If the execute fails the database connection can get hung up return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Call MySQL "LOAD DATA LOCAL INFILE" statement to perform a bulk insert.""" mysql_set_autocommit_off = """SET autocommit=0; SET UNIQUE_CHECKS=0; SET FOREIGN_KEY_CHECKS=0; SET sql_log_bin=0;""" mysql_set_autocommit_on = """SET GLOBAL innodb_flush_log_at_trx_commit=1; COMMIT; SET autocommit=1; SET unique_checks=1; SET foreign_key_checks=1;""" self.get_cursor() ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (self.table.cleanup.function == no_cleanup and not self.table.fixed_width and not ct and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)): print("Inserting data from " + os.path.basename(filename) + "...") columns = self.table.get_insert_columns() statement = """ LOAD DATA LOCAL INFILE '""" + filename.replace("\\", "\\\\") + """' INTO TABLE """ + self.table_name() + """ FIELDS TERMINATED BY '""" + self.table.delimiter + """' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\\n' IGNORE """ + str(self.table.header_rows) + """ LINES (""" + columns + ")" try: self.cursor.execute(mysql_set_autocommit_off) self.cursor.execute(statement) self.cursor.execute(mysql_set_autocommit_on) except Exception as e: self.disconnect() # If the execute fails the database connection can get hung up self.cursor.execute(mysql_set_autocommit_on) return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Use PostgreSQL's "COPY FROM" statement to perform a bulk insert. Current postgres engine bulk only supports comma delimiter """ self.get_cursor() p_bulk = [ self.check_bulk_insert(), self.table.delimiter, self.table.header_rows ] if p_bulk == [True, ",", 1]: columns = self.table.get_insert_columns() filename = os.path.normpath(os.path.abspath(filename)) statement = """ COPY """ + self.table_name() + " (" + columns + """) FROM '""" + filename + """' WITH DELIMITER ',' CSV HEADER;""" try: self.execute("BEGIN") self.execute(statement) self.execute("COMMIT") print("Bulk insert on .. ", self.table_name()) return True except Exception: self.connection.rollback() return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Call MySQL "LOAD DATA LOCAL INFILE" statement to perform a bulk insert.""" mysql_set_autocommit_off = """SET autocommit=0;""" mysql_set_autocommit_on = """SET autocommit=1;""" self.get_cursor() if self.check_bulk_insert(): print("Inserting data from " + os.path.basename(filename) + "...") columns = self.table.get_insert_columns() statement = """ BEGIN; LOAD DATA LOCAL INFILE '""" + filename.replace("\\", "\\\\") + """' INTO TABLE """ + self.table_name() + """ FIELDS TERMINATED BY '""" + self.table.delimiter + """' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\\n' IGNORE """ + str(self.table.header_rows) + """ LINES (""" + columns + "); COMMIT;" try: self.cursor.execute(mysql_set_autocommit_off) self.cursor.execute(statement) self.cursor.execute(mysql_set_autocommit_on) return None except Exception: self.cursor.execute("ROLLBACK;") # If the execute fails the database connection can get hung up self.disconnect() self.cursor.execute(mysql_set_autocommit_on) return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Perform a high speed bulk insert Checks to see if a given file can be bulk inserted, and if so loads it in chunks and inserts those chunks into the database using executemany. """ chunk_size = 1000000 self.get_cursor() if [self.check_bulk_insert(), self.table.header_rows] == [True, 1]: filename = os.path.abspath(filename) try: bulk_insert_statement = self.get_bulk_insert_statement() line_endings = set(['\n', '\r', '\r\n']) with open(filename, 'r') as data_file: data_chunk = data_file.readlines(chunk_size) data_chunk = [ line.rstrip('\r\n') for line in data_chunk if line not in line_endings ] del data_chunk[:self.table.header_rows] while data_chunk: data_chunk_split = [ row.split(self.table.delimiter) for row in data_chunk ] self.cursor.executemany(bulk_insert_statement, data_chunk_split) data_chunk = data_file.readlines(chunk_size) self.connection.commit() return True except: self.connection.rollback() return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Perform a high speed bulk insert Checks to see if a given file can be bulk inserted, and if so loads it in chunks and inserts those chunks into the database using executemany. """ chunk_size = 1000000 self.get_cursor() # Determine if the dataset includes cross-tab data crosstab = len( [True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (([self.table.cleanup.function, self.table.header_rows] == [no_cleanup, 1]) and not self.table.fixed_width and not crosstab and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)): filename = os.path.abspath(filename) try: bulk_insert_statement = self.get_bulk_insert_statement() line_endings = set(['\n', '\r', '\r\n']) with open(filename, 'r') as data_file: data_chunk = data_file.readlines(chunk_size) data_chunk = [ line.rstrip('\r\n') for line in data_chunk if line not in line_endings ] del data_chunk[:self.table.header_rows] while data_chunk: data_chunk_split = [ row.split(self.table.delimiter) for row in data_chunk ] self.cursor.executemany(bulk_insert_statement, data_chunk_split) data_chunk = data_file.readlines(chunk_size) self.connection.commit() except: self.connection.rollback() return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Use executemany to perform a high speed bulk insert Checks to see if a given file can be bulk inserted, and if so loads it in chunks and inserts those chunks into the database using executemany. """ CHUNK_SIZE = 1000000 self.get_cursor() ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (([self.table.cleanup.function, self.table.header_rows] == [no_cleanup, 1]) and not self.table.fixed_width and not ct and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)): columns = self.table.get_insert_columns() filename = os.path.abspath(filename) try: bulk_insert_statement = self.get_bulk_insert_statement() with open(filename, 'r') as data_file: data_chunk = data_file.readlines(CHUNK_SIZE) del (data_chunk[:self.table.header_rows]) while data_chunk: data_chunk_split = [ row.split(self.table.delimiter) for row in data_chunk ] self.cursor.executemany(bulk_insert_statement, data_chunk_split) data_chunk = data_file.readlines(CHUNK_SIZE) self.connection.commit() except: self.connection.rollback() return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Perform a high speed bulk insert Checks to see if a given file can be bulk inserted, and if so loads it in chunks and inserts those chunks into the database using executemany. """ chunk_size = 1000000 self.get_cursor() # Determine if the dataset includes cross-tab data crosstab = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if (([self.table.cleanup.function, self.table.header_rows] == [no_cleanup, 1]) and not self.table.fixed_width and not crosstab and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)): filename = os.path.abspath(filename) try: bulk_insert_statement = self.get_bulk_insert_statement() line_endings = set(['\n', '\r', '\r\n']) with open(filename, 'r') as data_file: data_chunk = data_file.readlines(chunk_size) data_chunk = [line.rstrip('\r\n') for line in data_chunk if line not in line_endings] del data_chunk[:self.table.header_rows] while data_chunk: data_chunk_split = [row.split(self.table.delimiter) for row in data_chunk] self.cursor.executemany(bulk_insert_statement, data_chunk_split) data_chunk = data_file.readlines(chunk_size) self.connection.commit() except: self.connection.rollback() return Engine.insert_data_from_file(self, filename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Perform a bulk insert.""" self.get_cursor() ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0 if ((self.table.cleanup.function == no_cleanup and not self.table.fixed_width and self.table.header_rows < 2) and (self.table.delimiter in ["\t", ","]) and not ct and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert) ): print("Inserting data from " + os.path.basename(filename) + "...") if self.table.delimiter == "\t": fmt = "TabDelimited" elif self.table.delimiter == ",": fmt = "CSVDelimited" if self.table.header_rows == 1: hdr = "Yes" else: hdr = "No" columns = self.table.get_insert_columns() need_to_delete = False add_to_record_id = 0 if self.table.pk and not self.table.contains_pk: if '.' in os.path.basename(filename): proper_name = filename.split('.') newfilename = '.'.join((proper_name[0:-1]) if len(proper_name) > 0 else proper_name[0] ) + "_new." + filename.split(".")[-1] else: newfilename = filename + "_new" if not os.path.isfile(newfilename): print("Adding index to " + os.path.abspath(newfilename) + "...") read = open(filename, "rb") write = open(newfilename, "wb") to_write = "" for line in read: to_write += str(id) + self.table.delimiter + line.replace("\n", "\r\n") add_to_record_id += 1 self.table.record_id += add_to_record_id write.write(to_write) write.close() read.close() need_to_delete = True columns = "record_id, " + columns else: newfilename = filename newfilename = os.path.abspath(newfilename) filename_length = (len(os.path.basename(newfilename)) * -1) - 1 filepath = newfilename[:filename_length] statement = """ INSERT INTO """ + self.table_name() + " (" + columns + """) SELECT * FROM [""" + os.path.basename(newfilename) + '''] IN "''' + filepath + '''" "Text;FMT=''' + fmt + ''';HDR=''' + hdr + ''';"''' try: self.execute(statement) except: print("Couldn't bulk insert. Trying manual insert.") self.connection.rollback() self.table.record_id -= add_to_record_id return Engine.insert_data_from_file(self, filename) if need_to_delete: os.remove(newfilename) else: return Engine.insert_data_from_file(self, filename)
def insert_data_from_file(self, filename): """Perform a bulk insert.""" self.get_cursor() if self.check_bulk_insert() and self.table.header_rows < 2 and ( self.table.delimiter in ["\t", ","]): print("Inserting data from " + os.path.basename(filename) + "...") if self.table.delimiter == "\t": fmt = "TabDelimited" elif self.table.delimiter == ",": fmt = "CSVDelimited" if self.table.header_rows == 1: hdr = "Yes" else: hdr = "No" columns = self.table.get_insert_columns() need_to_delete = False add_to_record_id = 0 if self.table.pk and not self.table.contains_pk: if '.' in os.path.basename(filename): proper_name = filename.split('.') len_name = len(proper_name) newfilename = '.'.join( proper_name[0:-1] if len_name > 0 else proper_name[0] ) + "_new." + filename.split(".")[-1] else: newfilename = filename + "_new" if not os.path.isfile(newfilename): print("Adding index to " + os.path.abspath(newfilename) + "...") read = open(filename, "rb") write = open(newfilename, "wb") to_write = "" for line in read: line = line.strip() to_write += str(id) + self.table.delimiter + line add_to_record_id += 1 self.table.record_id += add_to_record_id write.write(to_write + os.linesep) write.close() read.close() need_to_delete = True columns = "record_id, " + columns else: newfilename = filename newfilename = os.path.abspath(newfilename) filename_length = (len(os.path.basename(newfilename)) * -1) - 1 filepath = newfilename[:filename_length] statement = """ INSERT INTO """ + self.table_name() + " (" + columns + """) SELECT * FROM [""" + os.path.basename(newfilename) + '''] IN "''' + filepath + '''" "Text;FMT=''' + fmt + ''';HDR=''' + hdr + ''';"''' try: self.execute(statement) return True except BaseException: print("Couldn't bulk insert. Trying manual insert.") self.connection.rollback() self.table.record_id -= add_to_record_id return None finally: if need_to_delete: os.remove(newfilename) return Engine.insert_data_from_file(self, filename)