def sort_csv(filename): """Sort CSV rows minus the header and return the file function is used for only testing and can handle the file of the size """ filename = os.path.normpath(filename) input_file = open_fr(filename) csv_reader_infile = csv.reader(input_file, escapechar="\\") # write the data to a temporary file and sort it temp_path = os.path.normpath("tempfile") temp_file = open_fw(temp_path) csv_writer = open_csvw(temp_file) i = 0 for row in csv_reader_infile: if i == 0: # The first entry is the header line infields = row i += 1 else: csv_writer.writerow(row) input_file.close() temp_file.close() # sort the temp file sorted_txt = sort_file(temp_path) tmp = open_fr(sorted_txt) in_txt = csv.reader(tmp, delimiter=',', escapechar="\\") csv_file = open_fw(filename) csv_writer = open_csvw(csv_file) csv_writer.writerow(infields) csv_writer.writerows(in_txt) tmp.close() csv_file.close() os.remove(os.path.normpath(temp_path)) return filename
def sort_file(file_path): """Sort file by line and return the file function is used for only testing and can handle the file of the size """ file_path = os.path.normpath(file_path) input_file = open_fr(file_path) lines = [line.strip().replace('\x00', '') for line in input_file] input_file.close() outfile = open_fw(file_path) lines.sort() for line in lines: outfile.write(line + "\n") outfile.close() return file_path
def disconnect(self): """Close out the JSON with a `\\n]}` and close the file. Close all the file objects that have been created Re-write the files stripping off the last comma and then close with a `\\n]}`. """ if self.table_names: for output_file_i, file_name in self.table_names: output_file_i.close() current_input_file = open_fr(file_name) file_contents = current_input_file.readlines() current_input_file.close() file_contents[-1] = file_contents[-1].strip(',\n') current_output_file = open_fw(file_name) current_output_file.writelines(file_contents) current_output_file.writelines(['\n]']) current_output_file.close() self.table_names = []
def disconnect(self): """Close out the xml files Close all the file objects that have been created Re-write the files stripping off the last comma and then close with a closing tag) """ if self.table_names: for output_file_i, file_name in self.table_names: output_file_i.close() current_input_file = open_fr(file_name, encode=False) file_contents = current_input_file.readlines() current_input_file.close() file_contents[-1] = file_contents[-1].strip(',') current_output_file = open_fw(file_name) current_output_file.writelines(file_contents) current_output_file.write(u'\n</root>') current_output_file.close() self.table_names = []
def load_data(self, filename): """Generator returning lists of values from lines in a data file 1. Works on both delimited (csv module) and fixed width data (extract_fixed_width) 2. Identifies the delimiter if not known 3. Removes extra line endings """ if not self.table.delimiter: self.set_table_delimiter(filename) dataset_file = open_fr(filename) if self.table.fixed_width: for row in dataset_file: yield self.extract_fixed_width(row) else: reg = re.compile("\\r\\n|\n|\r") for row in csv.reader(dataset_file, delimiter=self.table.delimiter): yield [reg.sub(" ", values) for values in row]
def download(self, engine=None, debug=False): Script.download(self, engine, debug) self.engine.download_file(self.urls["main"], "Succession_sampling_03-07_data_original.txt") data_path = self.engine.format_filename("Succession_sampling_03-07_data.txt") old_data = open_fr(self.engine.find_file("Succession_sampling_03-07_data_original.txt")) new_data = open_fw(data_path) # original file's header contains an end of line charactor in the middle hence creating two lines # Read in the two lines and create the full header line1 = old_data.readline().strip() line2 = old_data.readline() newline = line1 + "\t" + line2 new_data.write(newline) for line in old_data: new_data.write(line) new_data.close() old_data.close() self.engine.auto_create_table(self.tables["main"], filename="Succession_sampling_03-07_data.txt") self.engine.insert_data_from_file(data_path)
def json2csv(input_file, output_file=None, header_values=None): """Convert Json file to CSV function is used for only testing and can handle the file of the size """ file_out = open_fr(input_file) # set output file name and write header if output_file is None: output_file = os.path.splitext(os.path.basename(input_file))[0] + ".csv" csv_out = open_fw(output_file, encode=False) if os.name == 'nt': outfile = csv.DictWriter(csv_out, dialect='excel', escapechar="\\", lineterminator='\n', fieldnames=header_values) else: outfile = csv.DictWriter(csv_out, dialect='excel', escapechar="\\", fieldnames=header_values) raw_data = json.loads(file_out.read()) outfile.writeheader() for item in raw_data: outfile.writerow(item) file_out.close() os.system("rm -r {}".format(input_file)) return output_file
def download(self, engine=None, debug=False): Script.download(self, engine, debug) for key in self.urls: self.engine.download_file(self.urls[key], self.urls[key].rpartition('/')[-1]) new_file_path = self.engine.format_filename("new" + key) old_data = open_fr(self.engine.find_file(self.urls[key].rpartition('/')[-1])) new_data = open_fw(new_file_path) with old_data as file_block: # after the metadata lines, set data to True data = False for lines in file_block.readlines(): # meta data contins line with no ";" and may have "(;;;;)+" or empty lines if not data and (";" not in lines or ";;;;" in lines): pass else: data = True new_data.write(lines) file_block.close() new_data.close() self.engine.auto_create_table(Table(key, cleanup=cleanup_func_table), filename=str("new" + key)) self.engine.insert_data_from_file(new_file_path)
def download(self, engine=None, debug=False): Script.download(self, engine, debug) for key in self.urls: self.engine.download_file(self.urls[key], self.urls[key].rpartition('/')[-1]) new_file_path = self.engine.format_filename("new" + key) old_data = open_fr(self.engine.find_file(self.urls[key].rpartition('/')[-1])) new_data = open_fw(new_file_path) with old_data as file_block: # after the metadata lines, set data to True data = False for lines in file_block.readlines(): # meta data contins line with no ";" and may have "(;;;;)+" or empty lines if not data and (";" not in lines or ";;;;" in lines): pass else: data = True new_data.write(lines) file_block.close() new_data.close() self.engine.auto_create_table(Table(key, cleanup=Cleanup(correct_invalid_value, nulls=[-999.9])),filename=str("new" + key)) self.engine.insert_data_from_file(new_file_path)
def xml2csv(input_file, outputfile=None, header_values=None, row_tag="row"): """Convert xml to csv function is used for only testing and can handle the file of the size """ file_output = open_fr(input_file, encode=False) # set output file name and write header if outputfile is None: outputfile = os.path.splitext(os.path.basename(input_file))[0] + ".csv" csv_out = open_fw(outputfile) if os.name == 'nt': csv_writer = csv.writer(csv_out, dialect='excel', escapechar='\\', lineterminator='\n') else: csv_writer = csv.writer(csv_out, dialect='excel', escapechar='\\') v = file_output.read() csv_writer.writerow(header_values) tree = ET.parse(newfile(v)) root = tree.getroot() for rows in root.findall(row_tag): x = [name.text for name in header_values for name in rows.findall(name)] csv_writer.writerow(x) file_output.close() os.system("rm -r {}".format(input_file)) return outputfile
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine # download and create species table table = Table('species') self.engine.auto_create_table(table, url=self.urls['species']) self.engine.insert_data_from_url(self.urls['species']) # State abbreviations with the year annual inventory began for that state stateslist = [('AL', 2001), ('AK', 2004), ('AZ', 2001), ('AR', 2000), ('CA', 2001), ('CO', 2002), ('CT', 2003), ('DE', 2004), ('FL', 2003), ('GA', 1998), ('ID', 2004), ('IL', 2001), ('IN', 1999), ('IA', 1999), ('KS', 2001), ('KY', 1999), ('LA', 2001), ('ME', 1999), ('MD', 2004), ('MA', 2003), ('MI', 2000), ('MN', 1999), ('MO', 1999), ('MS', 2006), ('MT', 2003), ('NE', 2001), ('NV', 2004), ('NH', 2002), ('NJ', 2004), ('NM', 1999), ('NY', 2002), ('NC', 2003), ('ND', 2001), ('OH', 2001), ('OK', 2008), ('OR', 2001), ('PA', 2000), ('RI', 2003), ('SC', 1999), ('SD', 2001), ('TN', 2000), ('TX', 2001), ('UT', 2000), ('VT', 2003), ('VA', 1998), ('WA', 2002), ('WV', 2004), ('WI', 2000), ('WY', 2000), ('PR', 2001)] tablelist = ["SURVEY", "PLOT", "COND", "SUBPLOT", "SUBP_COND", "TREE", "SEEDLING"] for table in tablelist: for state, year in stateslist: engine.download_files_from_archive(self.urls["main"] + state + "_" + table + ".ZIP", [state + "_" + table + ".csv"]) for table in tablelist: print("Scanning data for table %s..." % table) prep_file_name = "%s.csv" % table prep_file = open_fw(engine.format_filename(prep_file_name)) this_file = open_fr(engine.format_filename(stateslist[0][0] + "_" + table + ".csv")) col_names = this_file.readline() prep_file.write(col_names) column_names = [col.strip('"') for col in col_names.split(',')] year_column = column_names.index("INVYR") this_file.close() for state, year in stateslist: this_file = open_fr(engine.format_filename(state + "_" + table + ".csv")) this_file.readline() for line in this_file: values = line.split(',') this_year = values[year_column] if int(this_year) >= year: prep_file.write(line) prep_file.close() engine.auto_create_table(Table(table), filename=prep_file_name) engine.insert_data_from_file(engine.format_filename(prep_file_name)) try: os.remove(engine.format_filename(prep_file_name)) except: pass return engine
def download(self, engine=None, debug=False): try: Script.download(self, engine, debug) engine = self.engine # Species table table = Table("species", cleanup=Cleanup(), contains_pk=True, header_rows=9) table.columns = [ ("species_id", ("pk-int", )), ("AOU", ("int", )), ("english_common_name", ("char", 50)), ("french_common_name", ("char", 50)), ("spanish_common_name", ("char", 50)), ("sporder", ("char", 30)), ("family", ("char", 30)), ("genus", ("char", 30)), ("species", ("char", 50)), ] table.fixed_width = [7, 6, 51, 51, 51, 51, 51, 51, 50] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["species"]) # Routes table engine.download_files_from_archive(self.urls["routes"], ["routes.csv"]) engine.auto_create_table(Table("routes", cleanup=Cleanup()), filename="routes.csv") engine.insert_data_from_file(engine.format_filename("routes.csv")) # Weather table if not os.path.isfile(engine.format_filename("weather_new.csv")): engine.download_files_from_archive(self.urls["weather"], ["weather.csv"]) read = open_fr(engine.format_filename("weather.csv")) write = open_fw(engine.format_filename("weather_new.csv")) print("Cleaning weather data...") for line in read: values = line.split(',') newvalues = [] for value in values: if ':' in value: newvalues.append(value.replace(':', '')) elif value == "N": newvalues.append(None) else: newvalues.append(value) write.write(','.join(str(value) for value in newvalues)) write.close() read.close() engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=Cleanup( correct_invalid_value, nulls=['NULL'])), filename="weather_new.csv") engine.insert_data_from_file( engine.format_filename("weather_new.csv")) # Region_codes table table = Table("region_codes", pk=False, header_rows=11, fixed_width=[11, 11, 30]) def regioncodes_cleanup(value, engine): replace = { chr(225): "a", chr(233): "e", chr(237): "i", chr(243): "o" } newvalue = str(value) for key in list(replace.keys()): if key in newvalue: newvalue = newvalue.replace(key, replace[key]) return newvalue table.cleanup = Cleanup(regioncodes_cleanup) table.columns = [("countrynum", ("int", )), ("regioncode", ("int", )), ("regionname", ("char", 30))] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["region_codes"]) # Counts table table = Table("counts", delimiter=',') table.columns = [("record_id", ("pk-auto", )), ("countrynum", ("int", )), ("statenum", ("int", )), ("Route", ("int", )), ("RPID", ("int", )), ("Year", ("int", )), ("Aou", ("int", )), ("Count10", ("int", )), ("Count20", ("int", )), ("Count30", ("int", )), ("Count40", ("int", )), ("Count50", ("int", )), ("StopTotal", ("int", )), ("SpeciesTotal", ("int", ))] stateslist = [ "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", ["New Hampshire", "NHampsh"], ["New Jersey", "NJersey"], ["New Mexico", "NMexico"], ["New York", "NYork"], ["North Carolina", "NCaroli"], ["North Dakota", "NDakota"], "Ohio", "Oklahoma", "Oregon", "Pennsylvania", ["Rhode Island", "RhodeIs"], ["South Carolina", "SCaroli"], ["South Dakota", "SDakota"], "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", ["West Virginia", "W_Virgi"], "Wisconsin", "Wyoming", "Alberta", ["British Columbia", "BritCol"], "Manitoba", ["New Brunswick", "NBrunsw"], ["Northwest Territories", "NWTerri"], "Newfoundland", ["Nova Scotia", "NovaSco"], "Nunavut", "Ontario", ["Prince Edward Island", "PEI"], "Quebec", "Saskatchewan", "Yukon" ] state = "" shortstate = "" engine.table = table engine.create_table() for state in stateslist: try: if len(state) > 2: shortstate = state[0:7] else: state, shortstate = state[0], state[1] print("Inserting data from " + state + "...") try: engine.table.cleanup = Cleanup() engine.insert_data_from_archive( self.urls["counts"] + shortstate + ".zip", [shortstate + ".csv"]) except: print("Failed bulk insert on " + state + ", inserting manually.") engine.connection.rollback() engine.table.cleanup = Cleanup(correct_invalid_value, nulls=['*']) engine.insert_data_from_archive( self.urls["counts"] + shortstate + ".zip", [shortstate + ".csv"]) except: print("There was an error in " + state + ".") raise except zipfile.BadZipfile: print( "There was an unexpected error in the Breeding Bird Survey archives." ) raise return engine
def set_table_delimiter(self, file_path): dataset_file = open_fr(file_path) self.auto_get_delimiter(dataset_file.readline()) dataset_file.close()
def download(self, engine=None, debug=False): try: Script.download(self, engine, debug) engine = self.engine # Species table table = Table("species", cleanup=Cleanup(), contains_pk=True, header_rows=9) table.columns=[("species_id", ("pk-int",) ), ("AOU", ("int",) ), ("english_common_name", ("char",50) ), ("french_common_name", ("char",50) ), ("spanish_common_name", ("char",50) ), ("sporder", ("char",30) ), ("family", ("char",30) ), ("genus", ("char",30) ), ("species", ("char",50) ), ] table.fixed_width = [7,6,51,51,51,51,51,51,50] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["species"]) # Routes table engine.download_files_from_archive(self.urls["routes"], ["routes.csv"]) engine.auto_create_table(Table("routes", cleanup=Cleanup()), filename="routes.csv") engine.insert_data_from_file(engine.format_filename("routes.csv")) # Weather table if not os.path.isfile(engine.format_filename("weather_new.csv")): engine.download_files_from_archive(self.urls["weather"], ["weather.csv"]) read = open_fr(engine.format_filename("weather.csv")) write = open_fw(engine.format_filename("weather_new.csv")) print("Cleaning weather data...") for line in read: values = line.split(',') newvalues = [] for value in values: if ':' in value: newvalues.append(value.replace(':', '')) elif value == "N": newvalues.append(None) else: newvalues.append(value) write.write(','.join(str(value) for value in newvalues)) write.close() read.close() engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=Cleanup(correct_invalid_value, nulls=['NULL'])), filename="weather_new.csv") engine.insert_data_from_file(engine.format_filename("weather_new.csv")) # Region_codes table table = Table("region_codes", pk=False, header_rows=11, fixed_width=[11, 11, 30]) def regioncodes_cleanup(value, engine): replace = {chr(225):"a", chr(233):"e", chr(237):"i", chr(243):"o"} newvalue = str(value) for key in list(replace.keys()): if key in newvalue: newvalue = newvalue.replace(key, replace[key]) return newvalue table.cleanup = Cleanup(regioncodes_cleanup) table.columns=[("countrynum" , ("int",) ), ("regioncode" , ("int",) ), ("regionname" , ("char",30) )] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["region_codes"]) # Counts table table = Table("counts", delimiter=',') table.columns=[("record_id" , ("pk-auto",) ), ("countrynum" , ("int",) ), ("statenum" , ("int",) ), ("Route" , ("int",) ), ("RPID" , ("int",) ), ("Year" , ("int",) ), ("Aou" , ("int",) ), ("Count10" , ("int",) ), ("Count20" , ("int",) ), ("Count30" , ("int",) ), ("Count40" , ("int",) ), ("Count50" , ("int",) ), ("StopTotal" , ("int",) ), ("SpeciesTotal" , ("int",) )] stateslist = ["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", ["New Hampshire", "NHampsh"], ["New Jersey", "NJersey"], ["New Mexico", "NMexico"], ["New York", "NYork"], ["North Carolina", "NCaroli"], ["North Dakota", "NDakota"], "Ohio", "Oklahoma", "Oregon", "Pennsylvania", ["Rhode Island", "RhodeIs"], ["South Carolina", "SCaroli"], ["South Dakota", "SDakota"], "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", ["West Virginia", "W_Virgi"], "Wisconsin", "Wyoming", "Alberta", ["British Columbia", "BritCol"], "Manitoba", ["New Brunswick", "NBrunsw"], ["Northwest Territories", "NWTerri"], "Newfoundland", ["Nova Scotia", "NovaSco"], "Nunavut", "Ontario", ["Prince Edward Island", "PEI"], "Quebec", "Saskatchewan", "Yukon"] state = "" shortstate = "" engine.table = table engine.create_table() for state in stateslist: try: if len(state) > 2: shortstate = state[0:7] else: state, shortstate = state[0], state[1] print("Inserting data from " + state + "...") try: engine.table.cleanup = Cleanup() engine.insert_data_from_archive(self.urls["counts"] + shortstate + ".zip", [shortstate + ".csv"]) except: print("Failed bulk insert on " + state + ", inserting manually.") engine.connection.rollback() engine.table.cleanup = Cleanup(correct_invalid_value, nulls=['*']) engine.insert_data_from_archive(self.urls["counts"] + shortstate + ".zip", [shortstate + ".csv"]) except: print("There was an error in " + state + ".") raise except zipfile.BadZipfile: print("There was an unexpected error in the Breeding Bird Survey archives.") raise return engine
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine # download and create species table table = Table('species') self.engine.auto_create_table(table, url=self.urls['species']) self.engine.insert_data_from_url(self.urls['species']) # State abbreviations with the year annual inventory began for that state stateslist = [('AL', 2001), ('AK', 2004), ('AZ', 2001), ('AR', 2000), ('CA', 2001), ('CO', 2002), ('CT', 2003), ('DE', 2004), ('FL', 2003), ('GA', 1998), ('ID', 2004), ('IL', 2001), ('IN', 1999), ('IA', 1999), ('KS', 2001), ('KY', 1999), ('LA', 2001), ('ME', 1999), ('MD', 2004), ('MA', 2003), ('MI', 2000), ('MN', 1999), ('MO', 1999), ('MS', 2006), ('MT', 2003), ('NE', 2001), ('NV', 2004), ('NH', 2002), ('NJ', 2004), ('NM', 1999), ('NY', 2002), ('NC', 2003), ('ND', 2001), ('OH', 2001), ('OK', 2008), ('OR', 2001), ('PA', 2000), ('RI', 2003), ('SC', 1999), ('SD', 2001), ('TN', 2000), ('TX', 2001), ('UT', 2000), ('VT', 2003), ('VA', 1998), ('WA', 2002), ('WV', 2004), ('WI', 2000), ('WY', 2000), ('PR', 2001)] tablelist = [ "SURVEY", "PLOT", "COND", "SUBPLOT", "SUBP_COND", "TREE", "SEEDLING" ] for table in tablelist: for state, year in stateslist: engine.download_files_from_archive( self.urls["main"] + state + "_" + table + ".ZIP", [state + "_" + table + ".csv"]) for table in tablelist: print("Scanning data for table %s..." % table) prep_file_name = "%s.csv" % table prep_file = open_fw(engine.format_filename(prep_file_name)) this_file = open_fr( engine.format_filename(stateslist[0][0] + "_" + table + ".csv")) col_names = this_file.readline() prep_file.write(col_names) column_names = [col.strip('"') for col in col_names.split(',')] year_column = column_names.index("INVYR") this_file.close() for state, year in stateslist: this_file = open_fr( engine.format_filename(state + "_" + table + ".csv")) this_file.readline() for line in this_file: values = line.split(',') this_year = values[year_column] if int(this_year) >= year: prep_file.write(line) prep_file.close() engine.auto_create_table(Table(table), filename=prep_file_name) engine.insert_data_from_file( engine.format_filename(prep_file_name)) try: os.remove(engine.format_filename(prep_file_name)) except: pass return engine
def download(self, engine=None, debug=False): try: Script.download(self, engine, debug) engine = self.engine # Species table table = Table("species", cleanup=Cleanup(), contains_pk=True, header_rows=9) table.columns=[("species_id", ("pk-int",) ), ("AOU", ("int",) ), ("english_common_name", ("char",50) ), ("french_common_name", ("char",50) ), ("spanish_common_name", ("char",50) ), ("sporder", ("char",30) ), ("family", ("char",30) ), ("genus", ("char",30) ), ("species", ("char",50) ), ] table.fixed_width = [7,6,51,51,51,51,51,51,50] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["species"]) # Routes table engine.download_files_from_archive(self.urls["routes"], ["routes.csv"]) engine.auto_create_table(Table("routes", cleanup=Cleanup()), filename="routes.csv") engine.insert_data_from_file(engine.format_filename("routes.csv")) # Weather table if not os.path.isfile(engine.format_filename("weather_new.csv")): engine.download_files_from_archive(self.urls["weather"], ["weather.csv"]) read = open_fr(engine.format_filename("weather.csv")) write = open_fw(engine.format_filename("weather_new.csv")) print("Cleaning weather data...") for line in read: values = line.split(',') newvalues = [] for value in values: if ':' in value: newvalues.append(value.replace(':', '')) elif value == "N": newvalues.append(None) else: newvalues.append(value) write.write(','.join(str(value) for value in newvalues)) write.close() read.close() engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=Cleanup(correct_invalid_value, nulls=['NULL'])), filename="weather_new.csv") engine.insert_data_from_file(engine.format_filename("weather_new.csv")) # Region_codes table table = Table("region_codes", pk=False, header_rows=11, fixed_width=[11, 11, 30]) def regioncodes_cleanup(value, engine): replace = {chr(225):"a", chr(233):"e", chr(237):"i", chr(243):"o"} newvalue = str(value) for key in list(replace.keys()): if key in newvalue: newvalue = newvalue.replace(key, replace[key]) return newvalue table.cleanup = Cleanup(regioncodes_cleanup) table.columns=[("countrynum" , ("int",) ), ("regioncode" , ("int",) ), ("regionname" , ("char",30) )] engine.table = table engine.create_table() engine.insert_data_from_url(self.urls["region_codes"]) # Counts table table = Table("counts", pk=False, delimiter=',') table.columns=[("RouteDataID" , ("int",) ), ("countrynum" , ("int",) ), ("statenum" , ("int",) ), ("Route" , ("int",) ), ("RPID" , ("int",) ), ("year" , ("int",) ), ("AOU" , ("int",) ), ("Stop1" , ("int",) ), ("Stop2" , ("int",) ), ("Stop3" , ("int",) ), ("Stop4" , ("int",) ), ("Stop5" , ("int",) ), ("Stop6" , ("int",) ), ("Stop7" , ("int",) ), ("Stop8" , ("int",) ), ("Stop9" , ("int",) ), ("Stop10" , ("int",) ), ("Stop11" , ("int",) ), ("Stop12" , ("int",) ), ("Stop13" , ("int",) ), ("Stop14" , ("int",) ), ("Stop15" , ("int",) ), ("Stop16" , ("int",) ), ("Stop17" , ("int",) ), ("Stop18" , ("int",) ), ("Stop19" , ("int",) ), ("Stop20" , ("int",) ), ("Stop21" , ("int",) ), ("Stop22" , ("int",) ), ("Stop23" , ("int",) ), ("Stop24" , ("int",) ), ("Stop25" , ("int",) ), ("Stop26" , ("int",) ), ("Stop27" , ("int",) ), ("Stop28" , ("int",) ), ("Stop29" , ("int",) ), ("Stop30" , ("int",) ), ("Stop31" , ("int",) ), ("Stop32" , ("int",) ), ("Stop33" , ("int",) ), ("Stop34" , ("int",) ), ("Stop35" , ("int",) ), ("Stop36" , ("int",) ), ("Stop37" , ("int",) ), ("Stop38" , ("int",) ), ("Stop39" , ("int",) ), ("Stop40" , ("int",) ), ("Stop41" , ("int",) ), ("Stop42" , ("int",) ), ("Stop43" , ("int",) ), ("Stop44" , ("int",) ), ("Stop45" , ("int",) ), ("Stop46" , ("int",) ), ("Stop47" , ("int",) ), ("Stop48" , ("int",) ), ("Stop49" , ("int",) ), ("Stop50" , ("int",) )] part = "" engine.table = table engine.create_table() for part in range(1,11): part = str(part) try: print("Inserting data from part " + part + "...") try: engine.table.cleanup = Cleanup() engine.insert_data_from_archive(self.urls["counts"] + "Fifty" + part + ".zip", ["fifty" + part + ".csv"]) except: print("Failed bulk insert on " + part + ", inserting manually.") engine.connection.rollback() engine.table.cleanup = Cleanup(correct_invalid_value, nulls=['*']) engine.insert_data_from_archive(self.urls["counts"] + "Fifty" + part + ".zip", ["fifty" + part + ".csv"]) except: print("There was an error in part " + part + ".") raise except zipfile.BadZipfile: print("There was an unexpected error in the Breeding Bird Survey archives.") raise return engine