def download(self, engine=None, debug=False): Script.download(self, engine, debug) # structure_plot_year table self.engine.auto_create_table(Table("structure_plot_year"), url=self.urls["structure_plot_year"]) self.engine.insert_data_from_url(self.urls["structure_plot_year"]) # structure_plot_year table self.engine.auto_create_table(Table("plots"), url=self.urls["plots"]) self.engine.insert_data_from_url(self.urls["plots"]) # species table self.engine.download_file(self.urls["species"], "original_MSH_SPECIES_DESCRIPTORS.csv") data_path = self.engine.format_filename("MSH_SPECIES_DESCRIPTORS.csv") old_data = os.path.normpath(self.engine.find_file("original_MSH_SPECIES_DESCRIPTORS.csv")) with open(old_data, 'rU') as infile, open(data_path, 'w')as new_data: for line in infile: line = str(line).encode('utf-8') new_data.write(line) infile.close() new_data.close() self.engine.auto_create_table(Table("species"), filename="MSH_SPECIES_DESCRIPTORS.csv") self.engine.insert_data_from_file(data_path) # species_plot_year tables table = Table("species_plot_year") table.delimiter = ',' table.columns = [ ('record_id', ('pk-auto',)), ('plot_id_year', ('char',)), ('plot_name', ('char',)), ('plot_number', ('int',)), ('year', ('int',)), ('species', ('ct_column',)), ('count', ('ct-double',)) ] table.ct_column = 'species' table.ct_names = ['Abilas', 'Abipro', 'Achmil', 'Achocc', 'Agoaur', 'Agrexa', 'Agrpal', 'Agrsca', 'Alnvir', 'Anamar', 'Antmic', 'Antros', 'Aqifor', 'Arcnev', 'Arnlat', 'Astled', 'Athdis', 'Blespi', 'Brocar', 'Brosit', 'Carmer', 'Carmic', 'Carpac', 'Carpay', 'Carpha', 'Carros', 'Carspe', 'Casmin', 'Chaang', 'Cirarv', 'Cisumb', 'Crycas', 'Danint', 'Descae', 'Elyely', 'Epiana', 'Eriova', 'Eripyr', 'Fesocc', 'Fravir', 'Gencal', 'Hiealb', 'Hiegra', 'Hyprad', 'Junmer', 'Junpar', 'Juncom', 'Leppun', 'Lommar', 'Luepec', 'Luihyp', 'Luplat', 'Luplep', 'Luzpar', 'Maiste', 'Pencar', 'Pencon', 'Penser', 'Phahas', 'Phlalp', 'Phldif', 'Phyemp', 'Pincon', 'Poasec', 'Poldav', 'Polmin', 'Pollon', 'Poljun', 'Popbal', 'Potarg', 'Psemen', 'Raccan', 'Rumace', 'Salsit', 'Saxfer', 'Senspp', 'Sibpro', 'Sorsit', 'Spiden', 'Trispi', 'Tsumer', 'Vacmem', 'Vervir', 'Vioadu', 'Xerten'] self.engine.table = table self.engine.create_table() self.engine.insert_data_from_url(self.urls["species_plot_year"])
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine files = ["Macroplot_data_Rev.txt", "Microplot_data.txt", "Site_variables.txt", "Species_list.txt"] engine.download_files_from_archive(self.urls["data"], files, filetype="zip") # Create table species engine.auto_create_table(Table('species', cleanup=Cleanup(correct_invalid_value, nulls=['NA'])), filename="Species_list.txt") engine.insert_data_from_file(engine.format_filename("Species_list.txt")) # Create table sites engine.auto_create_table(Table('sites', cleanup=Cleanup(correct_invalid_value, nulls=['NA'])), filename="Site_variables.txt") engine.insert_data_from_file(engine.format_filename("Site_variables.txt")) # Create table microplots table = Table('microplots') table.columns = [('record_id', ('pk-auto',)), ('SpCode', ('char', '30')), ('Count', ('ct-int',))] table.ct_names = ['BSP1', 'BSP2', 'BSP3', 'BSP4', 'BSP5', 'BSP6', 'BSP7', 'BSP8', 'BSP9', 'BSP10', 'BSP11', 'BSP12', 'BSP13', 'BSP14', 'BSP15', 'BSP16', 'BSP17', 'BSP18', 'BSP20', 'BSP21', 'BSP22', 'BSP23', 'BSP24', 'BSP25', 'BSP26', 'BSP27', 'BSP28', 'BSP29', 'BSP30', 'BSP31', 'BSP33', 'BSP34', 'BSP35', 'BSP36', 'BSP37', 'BSP41', 'BSP42', 'BSP43', 'BSP44', 'BSP45', 'BSP46', 'BSP47', 'BSP48', 'BSP49', 'BSP50', 'BSP51', 'BSP52', 'BSP53', 'BSP54', 'BSP55', 'BSP56', 'BSP57', 'BSP58', 'BSP59', 'BSP60', 'BSP61', 'BSP62', 'BSP63', 'BSP64', 'BSP65', 'BSP66', 'BSP67', 'BSP68', 'BSP69', 'BSP70', 'BSP71', 'BSP72', 'BSP73', 'BSP74', 'BSP75', 'BSP76', 'BSP78', 'BSP79', 'BSP80', 'BSP82', 'BSP83', 'BSP84', 'BSP85', 'BSP86', 'BSP87', 'BSP88', 'BSP89', 'BSP90', 'BSP91', 'BSP92', 'BSP93', 'BSP94', 'BSP95', 'BSP96', 'BSP97', 'BSP98', 'BSP99', 'BSP100', 'BSP101', 'BSP102', 'BSP104'] table.ct_column = 'PlotID' engine.auto_create_table(table, filename="Microplot_data.txt") engine.insert_data_from_file(engine.format_filename("Microplot_data.txt")) # Create table microplots table = Table('macroplots') table.ct_names = ['TreeGirth1', 'TreeGirth2', 'TreeGirth3', 'TreeGirth4', 'TreeGirth5'] table.ct_column = 'Tree' table.columns = [('record_id', ('pk-auto',)), ('PlotID', ('char', '20')), ('SpCode', ('char', '30')), ('Girth', ('ct-int',))] engine.auto_create_table(table, filename="Macroplot_data_Rev.txt") engine.insert_data_from_file(engine.format_filename("Macroplot_data_Rev.txt"))
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine files = ["Macroplot_data_Rev.txt", "Microplot_data.txt", "Site_variables.txt", "Species_list.txt"] engine.download_files_from_archive(self.urls["data"], files, filetype="zip") # Create table species engine.auto_create_table(Table('species', cleanup=self.cleanup_func_table), filename="Species_list.txt") engine.insert_data_from_file(engine.format_filename("Species_list.txt")) # Create table sites engine.auto_create_table(Table('sites', cleanup=self.cleanup_func_table), filename="Site_variables.txt") engine.insert_data_from_file(engine.format_filename("Site_variables.txt")) # Create table microplots table = Table('microplots') table.columns = [('record_id', ('pk-auto',)), ('SpCode', ('char', '30')), ('Count', ('ct-int',))] table.ct_names = ['BSP1', 'BSP2', 'BSP3', 'BSP4', 'BSP5', 'BSP6', 'BSP7', 'BSP8', 'BSP9', 'BSP10', 'BSP11', 'BSP12', 'BSP13', 'BSP14', 'BSP15', 'BSP16', 'BSP17', 'BSP18', 'BSP20', 'BSP21', 'BSP22', 'BSP23', 'BSP24', 'BSP25', 'BSP26', 'BSP27', 'BSP28', 'BSP29', 'BSP30', 'BSP31', 'BSP33', 'BSP34', 'BSP35', 'BSP36', 'BSP37', 'BSP41', 'BSP42', 'BSP43', 'BSP44', 'BSP45', 'BSP46', 'BSP47', 'BSP48', 'BSP49', 'BSP50', 'BSP51', 'BSP52', 'BSP53', 'BSP54', 'BSP55', 'BSP56', 'BSP57', 'BSP58', 'BSP59', 'BSP60', 'BSP61', 'BSP62', 'BSP63', 'BSP64', 'BSP65', 'BSP66', 'BSP67', 'BSP68', 'BSP69', 'BSP70', 'BSP71', 'BSP72', 'BSP73', 'BSP74', 'BSP75', 'BSP76', 'BSP78', 'BSP79', 'BSP80', 'BSP82', 'BSP83', 'BSP84', 'BSP85', 'BSP86', 'BSP87', 'BSP88', 'BSP89', 'BSP90', 'BSP91', 'BSP92', 'BSP93', 'BSP94', 'BSP95', 'BSP96', 'BSP97', 'BSP98', 'BSP99', 'BSP100', 'BSP101', 'BSP102', 'BSP104'] table.ct_column = 'PlotID' engine.auto_create_table(table, filename="Microplot_data.txt") engine.insert_data_from_file(engine.format_filename("Microplot_data.txt")) # Create table microplots table = Table('macroplots') table.ct_names = ['TreeGirth1', 'TreeGirth2', 'TreeGirth3', 'TreeGirth4', 'TreeGirth5'] table.ct_column = 'Tree' table.columns = [('record_id', ('pk-auto',)), ('PlotID', ('char', '20')), ('SpCode', ('char', '30')), ('Girth', ('ct-int',))] engine.auto_create_table(table, filename="Macroplot_data_Rev.txt") engine.insert_data_from_file(engine.format_filename("Macroplot_data_Rev.txt"))
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine engine.download_files_from_archive(self.urls["data"], ["Data_Files/Amniote_Database_Aug_2015.csv", "Data_Files/Amniote_Database_References_Aug_2015.csv", "Data_Files/Amniote_Range_Count_Aug_2015.csv"], filetype="zip") ct_column = 'trait' # all tables use the same ct_column name # Create tables from Amniote_Database_Aug.csv and Amniote_Database_References_Aug_2015.csv # Both reference and main have the same headers ct_names = ['female_maturity_d', 'litter_or_clutch_size_n', 'litters_or_clutches_per_y', 'adult_body_mass_g', 'maximum_longevity_y', 'gestation_d', 'weaning_d', 'birth_or_hatching_weight_g', 'weaning_weight_g', 'egg_mass_g', 'incubation_d', 'fledging_age_d', 'longevity_y', 'male_maturity_d', 'inter_litter_or_interbirth_interval_y', 'female_body_mass_g', 'male_body_mass_g', 'no_sex_body_mass_g', 'egg_width_mm', 'egg_length_mm', 'fledging_mass_g', 'adult_svl_cm', 'male_svl_cm', 'female_svl_cm', 'birth_or_hatching_svl_cm', 'female_svl_at_maturity_cm', 'female_body_mass_at_maturity_g', 'no_sex_svl_cm', 'no_sex_maturity_d'] # Create table main from Amniote_Database_Aug_2015.csv columns = [ ('record_id', ('pk-auto',)), ('class', ('char', '20')), ('order', ('char', '20')), ('family', ('char', '20')), ('genus', ('char', '20')), ('species', ('char', '50')), ('subspecies', ('char', '20')), ('common_name', ('char', '400')), ('trait_value', ('ct-double',))] table_main = Table('main', delimiter=',', cleanup=self.cleanup_func_table) table_main.ct_column = ct_column table_main.ct_names = ct_names table_main.columns = columns engine.auto_create_table(table_main, filename="Amniote_Database_Aug_2015.csv") engine.insert_data_from_file(engine.format_filename("Amniote_Database_Aug_2015.csv")) # Create table reference from Amniote_Database_References_Aug_2015.csv reference_columns = [ ('record_id', ('pk-auto',)), ('class', ('char', '20')), ('order', ('char', '20')), ('family', ('char', '20')), ('genus', ('char', '20')), ('species', ('char', '50')), ('subspecies', ('char', '20')), ('common_name', ('char', '400')), ('reference', ('ct-char',))] table_references = Table('references', delimiter=',', cleanup=self.cleanup_func_table) table_references.ct_column = ct_column table_references.ct_names = ct_names table_references.columns = reference_columns engine.auto_create_table(table_references, filename="Amniote_Database_References_Aug_2015.csv") engine.insert_data_from_file(engine.format_filename("Amniote_Database_References_Aug_2015.csv")) # Create table Range # This table has different values for headers from the above tables. range_ct_names = ["min_female_maturity", "max_female_maturity", "count_female_maturity", "min_litter_clutch_size", "max_litter_clutch_size", "count_litter_clutch_size", "min_litters_clutches", "max_litters_clutches", "count_litters_clutches", "min_adult_body_mass", "max_adult_body_mass", "count_adult_body_mass", "min_maximum_longevity", "max_maximum_longevity", "count_maximum_longevity", "min_gestation", "max_gestation", "count_gestation", "min_weaning", "max_weaning", "count_weaning", "min_birth_hatching_weight", "max_birth_hatching_weight", "count_birth_hatching_weight", "min_weaning_weight", "max_weaning_weight", "count_weaning_weight", "min_egg_mass", "max_egg_mass", "count_egg_mass", "min_incubation", "max_incubation", "count_incubation", "min_fledging_age", "max_fledging_age", "count_fledging_age", "min_male_maturity", "max_male_maturity", "count_male_maturity", "min_inter_litter_interbirth_interval", "max_inter_litter_interbirth_interval", "count_inter_litter_interbirth_interval", "min_female_body_mass", "max_female_body_mass", "count_female_body_mass", "min_male_body_mass", "max_male_body_mass", "count_male_body_mass", "min_no_sex_body_mass", "max_no_sex_body_mass", "count_no_sex_body_mass", "min_egg_width", "max_egg_width", "count_egg_width", "min_egg_length", "max_egg_length", "count_egg_length", "min_fledging_mass", "max_fledging_mass", "count_fledging_mass", "min_adult_svl", "max_adult_svl", "count_adult_svl", "min_male_svl", "max_male_svl", "count_male_svl", "min_female_svl", "max_female_svl", "count_female_svl", "min_hatching_svl", "max_hatching_svl", "count_hatching_svl", "min_female_svl_at_maturity", "max_female_svl_at_maturity", "count_female_svl_at_maturity", "min_female_body_mass_at_maturity", "max_female_body_mass_at_maturity", "count_female_body_mass_at_maturity", "min_no_sex_svl", "max_no_sex_svl", "count_no_sex_svl", "min_no_sex_maturity", "max_no_sex_maturity", "count_no_sex_maturity"] range_columns = [ ('record_id', ('pk-auto',)), ('classx', ('char', '20')), ('orderx', ('char', '20')), ('familyx', ('char', '20')), ('genus', ('char', '20')), ('species', ('char', '50')), ('subspecies', ('char', '20')), ('common_name', ('char', '400')), ('trait_value', ('ct-double',))] table_range = Table('range', delimiter=',', cleanup=self.cleanup_func_table) table_range.ct_column = ct_column table_range.ct_names = range_ct_names table_range.columns = range_columns engine.auto_create_table(table_range, filename="Amniote_Range_Count_Aug_2015.csv") engine.insert_data_from_file(engine.format_filename("Amniote_Range_Count_Aug_2015.csv"))