def import_hitran_and_populate_particles_table(): start_time = time.time() ''' #change file to loadable version with open('/home/toma/Desktop/molecule_properties.txt', 'r') as f: outfile = open('/home/toma/Desktop/molecule_properties (copy).txt', 'w') for line in f: data = line.split() data.pop(1) for i in data: outfile.write("%s " % i) outfile.write('\n') outfile.close() f.close() ''' version_name = 'HITRAN_2016' reference_link = r'https://www.sciencedirect.com/science/article/pii/S0022407317301073?via%3Dihub' #everything is in string though to be noticed #length of lists are 124 mol_ids, iso_ids, iso_names, iso_abundances, iso_masses, mol_names = \ np.loadtxt('/home/toma/Desktop/molecule_properties (copy).txt', dtype='str', skiprows=1, usecols=(1, 2, 3, 4, 5, 6), unpack=True) for i in range(len(mol_ids)): particle_property_query = "INSERT INTO particles VALUES('%s', '%s', '%s', '%s', '%s', null);" % (mol_names[i], iso_names[i], \ iso_abundances[i], iso_masses[i], 1) #this one is temporary #insert each molecule's properties into particles table sql_order(particle_property_query) #then, fetch all the data from HITRAN using HAPI hapi.db_begin('data') #becasue cannot choose inifinity as upper limit, use a giant number instead #gpp is upper state degeneracy hapi.fetch(mol_names[i], int(mol_ids[i]), int(iso_ids[i]), 0, 1e9, Parameters=['nu', 'a', 'gamma_air', 'n_air', 'delta_air', \ 'elower', 'gpp', 'gamma_H2', 'n_H2', 'delta_H2', 'gamma_He', 'n_He', 'delta_He']) #open the file and use insert_hitran.py to insert all parameters into transitions table filename = '/home/toma/Desktop/linelists-database/data/{}.data'.format( mol_names[i]) insert_hitran.insert_hitran(filename, version_name, i + 1, reference_link) #delete the files since the files are named by HAPI using mol_name instead of iso_name #s.t. python wont get confused in the for loop header_filename = '/home/toma/Desktop/linelists-database/data/{}.header'.format( mol_names[i]) os.remove(filename) os.remove(header_filename) print("Finished in %s seconds" % (time.time() - start_time))
def correct_default_line_source_id(): start_time = time.time() total_particle_id = 242 for i in range(total_particle_id): particle_id = i + 1 print(particle_id) #get all the sources for that isotopologue default_line_source_id = -1 get_line_sources = "SELECT line_source, line_source_id FROM source_properties WHERE particle_id = {};".format( particle_id) sources = fetch(get_line_sources) print(sources) has_exomol = False exomol_id = -1 has_hitemp = False hitemp_id = -1 has_hitran = False hitran_id = -1 for one_source in sources: source_name = one_source[0] if source_name.startswith('EXOMOL'): has_exomol = True exomol_id = one_source[1] elif source_name.startswith('HITEMP'): has_hitemp = True hitemp_id = one_source[1] elif source_name.startswith('HITRAN'): has_hitran = True hitran_id = one_source[1] if not has_exomol and not has_hitemp and not has_hitran: raise Exception( 'Oh Damn this isotopologue has none of the versions HITRAN, HITEMP, or EXOMOL...umm problematic~' ) if has_exomol: default_line_source_id = exomol_id elif not has_exomol and has_hitemp: default_line_source_id = hitemp_id else: #only HITRAN default_line_source_id = hitran_id print(default_line_source_id) update_default_line_source_id = 'UPDATE particles SET default_line_source_id = {} WHERE particle_id = {};'.format( default_line_source_id, particle_id) sql_order(update_default_line_source_id) print('Finished correcting particle ' + str(particle_id)) print("Finished in %s seconds" % (time.time() - start_time))
compressed)) ''' #can run out of memory file = request.urlopen(bz2_url) #decompressor = bz2.BZ2Decompressor() compressed_data = file.read() data = bz2.decompress(compressed_data) #file_lines = data.decode() #store file info in outfile outfile = open('{}'.format(outfile_name), 'wb') outfile.write(data) outfile.close() ''' ################## if __name__ == '__main__': #disable autocommit to improve performance sql_order('SET autocommit = 0') sql_order('SET unique_checks = 0') sql_order('SET foreign_key_checks = 0') sql_order('SET sql_log_bin = 0') populate_all_exomol() #turn them back on sql_order('SET unique_checks = 1') sql_order('SET foreign_key_checks = 1') sql_order('SET sql_log_bin = 1')
def insert_hitran(filename, version_name, particle_id, reference_link): #connect to the database db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') #create a cursor object cursor = db.cursor() #disable autocommit to improve performance sql_order('SET autocommit = 0') sql_order('SET unique_checks = 0') sql_order('SET foreign_key_checks = 0') sql_order('SET sql_log_bin = 0') #insert the data of all lines for CO into table lines # with open('CO(copy).out') as infile: # try: #insert the line_source into source_properties and get line_source_id insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \ bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'YES', 'YES', 'YES', '%s', \ '%s', null);" % (version_name, reference_link, particle_id) sql_order(insert_version_query) get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \ particle_id = {}".format(version_name, particle_id) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception( 'should have exactly one line_source_id corresponding to one line_source' ) line_source_id = data[0][0] #file that the parameters are written into and import to mysql using LOAD DATA INFILE f = open('/home/toma/Desktop/hitran.txt', 'w') #open the file infile = open(filename) counter = 0 #create a list of all the queries to bulk insert it #bulk_data = [] #query = "INSERT INTO transitions VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'HITRAN_2016', 1, null)" for line in infile: data = line.strip().split(',') for i in range(len(data)): if data[i] == '#': data[i] = '\\N' #line table arrangement corresponding to tuple indexes: #(nu, a, gamma_air, n_air, delta_air, elower, g_upper, gamma_H2, n_H2, delta_H2, gamma_He, n_He, delta_He, line_source_id, particle_id, line_id) #( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ) #make sure at least one gamma and one n value is not null if data[2] == '\\N' and data[7] == '\\N' and data[10] == '\\N': raise Exception('should have at least one gamma value') if data[3] == '\\N' and data[8] == '\\N' and data[11] == '\\N': raise Exception('should have at least one n value') #write into infile the parameters for each line for item in data: f.write("%s " % item) f.write("\n") counter += 1 f.close() print("Bulk inserting hitran data...") cursor.execute( "LOAD DATA LOCAL INFILE '/home/toma/Desktop/hitran.txt' INTO TABLE transitions FIELDS TERMINATED BY ' ' LINES TERMINATED BY '\n' \ (@col1, @col2, @col3, @col4, @col5, @col6, @col7, @col8, @col9, @col10, @col11, @col12, @col13) SET nu=@col1, A=@col2, gamma_air=@col3, \ n_air=@col4, delta_air=@col5, elower=@col6, g_upper=@col7, gamma_H2=@col8, n_H2=@col9, delta_H2=@col10, gamma_He=@col11, n_He=@col12, \ delta_He=@col13, line_source_id={}, particle_id={};".format( line_source_id, particle_id)) #commit changes and close file db.commit() infile.close() #turn it back on sql_order('SET unique_checks = 1') sql_order('SET foreign_key_checks = 1') sql_order('SET sql_log_bin = 1') print('Executed {} lines of hitran data'.format(counter)) except Exception as e: #if errors occur db.rollback() print('insert hitran data failed', e) finally: #close up cursor and connection cursor.close() db.close()
def import_exomol_data(mol_name, iso_name, version_name, trans_fp, states_fp, partitions_fp, \ broad_H2_fp, broad_He_fp, default_gamma, default_n, trans_file_num, reference_link): ################### if default_gamma is None: default_gamma = '\\N' print('Oh Damn this isotologue has no gamma at all in exomol data') if default_n is None: default_n = '\\N' print('Oh Damn this isotologue has no N at all in exomol data') ################### one_iso_time = time.time() #connect to the database db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') #create a cursor object cursor = db.cursor() #disable autocommit to improve performance #sql_order('SET autocommit = 0') #sql_order('SET unique_checks = 0') #sql_order('SET foreign_key_checks = 0') #sql_order('SET sql_log_bin = 0') ################## #get particle_id get_particle_id = "SELECT particle_id FROM particles WHERE iso_name = '{}'".format(iso_name) check = fetch(get_particle_id) if check == (): #if the particle is not yet in the particle table, insert it #need to update the particle later...insert 0 for now particle_property_query = "INSERT INTO particles VALUES('%s', '%s', '%s', '%s', '%s', null);" % (mol_name, iso_name, \ 0, 0, 1) #this 1 is temporary sql_order(particle_property_query) #now get particle_id data = fetch(get_particle_id) if len(data) != 1: raise Exception('iso_name should correspond to exactly one isotopologue in the database') particle_id = data[0][0] #load H2/He params and in the mean while #insert the line_source into source_properties and get line_source_id if broad_H2_fp is None and broad_He_fp is None: #when no .broad files in exomol no_broadening_param = True insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \ bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'NO', 'NO', 'NO', '%s', \ '%s', null);" % (version_name, reference_link, particle_id) H2_dict = None He_dict = None elif broad_H2_fp is not None and broad_He_fp is not None: #when both H2 and He .broad files in exomol no_broadening_param = False H2_dict = temp_broad_param_dict(broad_H2_fp) He_dict = temp_broad_param_dict(broad_He_fp) insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \ bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'NO', 'YES', 'YES', '%s', \ '%s', null);" % (version_name, reference_link, particle_id) else: raise Exception('Should have either neither or both of the H2 and He broad param files') get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \ particle_id = {}".format(version_name, particle_id) output = fetch(get_line_source_id_query) if output != (): #if source was inserted already line_source_id = output[0][0] else: #insert the source and get the source id #insert the line_source into source_properties and get line_source_id sql_order(insert_version_query) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception('should have exactly one line_source_id corresponding to one line_source') line_source_id = data[0][0] ##################### #insert partitions insert_partitions(partitions_fp, line_source_id, particle_id) db.commit() #load states states_time = time.time() #get parameters needed to insert exomol data into transitions print('Loading huge ass states file') #states in id order starts in 1 #for all files this is true Es, gs, Js= np.loadtxt(states_fp, usecols=(1, 2, 3), unpack=True) if no_broadening_param is False: has_K = False for key in H2_dict.keys(): if '_' in key: has_K = True for key in He_dict.keys(): if '_' in key: has_K = True #load or not load Ks if has_K is True: #when contain 'a1' or sth if mol_name == 'H2O': ##version specification for H2O if version_name == 'EXOMOL_POKAZATEL': Ks = np.loadtxt(states_fp, usecols=4, unpack=True, dtype=np.str) elif version_name == 'EXOMOL_BT2': Ks = np.loadtxt(states_fp, usecols=13, unpack=True, dtype=np.str) elif version_name == 'EXOMOL_HotWat78': Ks = np.loadtxt(states_fp, usecols=4, unpack=True, dtype=np.str) elif version_name == 'EXOMOL_VTT': Ks = np.loadtxt(states_fp, usecols=11, unpack=True, dtype=np.str) else: raise Exception('Should not have versions other than POKAZATEL, BT2, HotWat78, and VTT for H2O in EXOMOL') else: #cases like PH3 and CH4 Ks = np.loadtxt(states_fp, usecols=6, unpack=True, dtype=np.str) else: Ks = None else: #no broadening param Ks = None print('Finished loading states file in %s seconds' % (time.time() - states_time)) ###################### #insert transition files counter = 0 for file_num in range(1, trans_file_num + 1): ''' standard error control haha if iso_name == '' and version_name == '': if file_num <= 0: continue ''' curr_file = trans_fp + str(file_num) #get the number of lines in trans file length_trans = sum(1 for line in open(curr_file)) print(length_trans, 'lines : Opened the transition file') with open(curr_file) as trans: #for spliiting file into smalller chunks...but mysql auto_increment seems to not be working properly start_line = 0 max_size = 1e7 repeat = 0 while length_trans >= start_line + max_size: counter += insert_exomol(cursor, H2_dict, He_dict, Es, gs, Js, Ks, start_line, int(start_line + max_size), '/home/toma/Desktop/exomol.txt', \ trans, line_source_id, particle_id, default_gamma, default_n, no_broadening_param) #islice starts from the next line after the last read line length_trans -= max_size #print(int(length_trans)) repeat += 1 #out of the while loop when difference between start_line and the max lines in trans file is less than max_size counter += insert_exomol(cursor, H2_dict, He_dict, Es, gs, Js, Ks, start_line, int(length_trans), '/home/toma/Desktop/exomol.txt', \ trans, line_source_id, particle_id, default_gamma, default_n, no_broadening_param) #commit one file altogether at one time db.commit() trans.close() print('Finished loading {} with {} lines of data'.format(curr_file, int(length_trans + repeat * max_size))) #set @id:=0; update mytable set id = (@id := @id + 1) order by id; for correcting auto_increment if needed #turn them back on #sql_order('SET unique_checks = 1') #sql_order('SET foreign_key_checks = 1') #sql_order('SET sql_log_bin = 1') cursor.close() db.close() print("Finished inserting", counter, "lines of exomol", version_name, "data for", iso_name, "in %s seconds" % (time.time() - one_iso_time))
def insert_hitemp(fp, isotop_name, line_source, ref_link): insert_time = time.time() #connect to the database db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') #create a cursor object cursor = db.cursor() #disable autocommit to improve performance sql_order('SET autocommit = 0') sql_order('SET unique_checks = 0') sql_order('SET foreign_key_checks = 0') sql_order('SET sql_log_bin = 0') try: #get particle id particle_id = get_particle(isotop_name)[0] get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \ particle_id = {}".format(line_source, particle_id) output = fetch(get_line_source_id_query) if output != (): #source inserted already line_source_id = output[0][0] else: #insert the source and get the source id #insert the line_source into source_properties and get line_source_id insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \ bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'YES', 'NO', 'NO', '%s', \ '%s', null);" % (line_source, ref_link, particle_id) sql_order(insert_version_query) get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \ particle_id = {}".format(line_source, particle_id) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception( 'should have exactly one line_source_id corresponding to one line_source' ) line_source_id = data[0][0] file_length = sum(1 for line in open(fp)) print("Bulk inserting hitemp data...") cursor.execute("LOAD DATA LOCAL INFILE '{}' INTO TABLE transitions FIELDS TERMINATED BY ' ' LINES TERMINATED BY '\n' \ (@col1, @col2, @col3, @col4, @col5, @col6, @col7) SET nu=@col1, A=@col2, gamma_air=@col3, n_air=@col4, \ delta_air=@col5, elower=@col6, g_upper=@col7, line_source_id={}, particle_id={};" .format(fp, \ line_source_id, particle_id)) #commit changes db.commit() #turn it back on sql_order('SET unique_checks = 1') sql_order('SET foreign_key_checks = 1') sql_order('SET sql_log_bin = 1') print('Executed {} lines of hitemp data'.format(file_length)) except Exception as e: #if errors occur db.rollback() print('insert hitemp data failed', e) finally: #close up cursor and connection cursor.close() db.close() print("Finished in %s seconds" % (time.time() - insert_time))