def main(functionals, func, category='Aminoindan', bins=600, reg_m=10**2): ##################### #### SETUP START #### ##################### ############################################ Directory with KF files kf_dir = r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals" ##################### #### SETUP END #### ##################### func_best = { comp_funcs.wasserstein_distance: min, comp_funcs.wasserstein_distance_unbalanced: min, comp_funcs.l2: min, comp_funcs.diagonality: max, comp_funcs.bhattacharyya: min, comp_funcs.correlation: min, comp_funcs.chi_square: min, comp_funcs.kl_divergence: min } #get kffiles kff = [f for f in os.listdir(kf_dir)] kff = list( filter(lambda x: functionals[0] in x or functionals[1] in x, kff)) kff_of_cat = [kf_dir + '\\' + f for f in kff if f.startswith(category)] #get the spectra ir1 = [ ir.get_spectrum_from_kf(f, width=50, n=bins) for f in kff_of_cat if functionals[0] in f ] ir2 = [ ir.get_spectrum_from_kf(f, width=50, n=bins) for f in kff_of_cat if functionals[1] in f ] d = func(ir1, ir2, reg_m=reg_m) return d.astype(float)
kff_list = [kf_dir + '\\' + f for f in kff if f.startswith(category)] # kff_list = [ # r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals\ISO34_E8_DFTB3_DFTB.rkf", # r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals\ISO34_E7_LDA_DFT.t21", # ] kff_list = [ r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals\ISO34_E12_DFTB3_DFTB.rkf", r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals\ISO34_E22_LDA_DFT.t21", ] #get the spectra ir1 = [ir.get_spectrum_from_kf(f,width=50,n=bins) for f in kff_list if functionals[0] in f] ir2 = [ir.get_spectrum_from_kf(f,width=50,n=bins) for f in kff_list if functionals[1] in f] peaksa = [ir.get_freqs_intens(f) for f in kff_list if functionals[0] in f] freqsa, intensa = [list(c) for c in zip(*peaksa)] peaksa = [list(zip(freqsa[i], intensa[i])) for i in range(len(freqsa))] peaksb = [ir.get_freqs_intens(f) for f in kff_list if functionals[1] in f] freqsb, intensb = [list(c) for c in zip(*peaksb)] peaksb = [list(zip(freqsb[i], intensb[i])) for i in range(len(freqsb))] def setup(): global animation_folder animation_folder = os.getcwd() + r'\animation\\'
# r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\conf_unopt\AI5_DFT.t21", # r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\conf_unopt\AI6_DFT.t21"] # kf_dftb = [r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\conf_unopt\AI1_DFTB.rkf", # r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\conf_unopt\AI2_DFTB.rkf", # r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\conf_unopt\AI3_DFTB.rkf", # r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\conf_unopt\AI4_DFTB.rkf", # r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\conf_unopt\AI5_DFTB.rkf", # r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\conf_unopt\AI6_DFTB.rkf"] #get the spectra ir_dft = [ir.get_spectrum_from_kf(f,width=50) for f in kf_dft] ir_dftb = [ir.get_spectrum_from_kf(f,width=50) for f in kf_dftb] #normalize spectra ir_dft = [i/np.sum(i) for i in ir_dft] ir_dftb = [i/np.sum(i) for i in ir_dftb] #wasserstein distances Y, X = np.meshgrid(np.linspace(0,1,600), np.linspace(0,1,600)) C = abs(Y-X)**2 print('Case (Balanced Wasserstein)') d = np.zeros((len(ir_dft), len(ir_dftb))) for i, a in enumerate(ir_dft/np.sum(ir_dft)):
bottom_line = Border(bottom=Side(style='thin')) right_line = Border(right=Side(style='thin')) both_line = Border(right=Side(style='thin'), bottom=Side(style='thin')) all_line = Border(top=Side(style='thick'), left=Side(style='thick'), right=Side(style='thick'), bottom=Side(style='thick')) bold_huge = Font(bold=False, size=30) bold_large = Font(bold=False, size=16) bold_small = Font(bold=False, size=11) #write each category to a new sheet for cat in categories: #generate the spectra ir_dft = [ir.get_spectrum_from_kf(f,width=50) for f in kff_by_cat[cat] if f.endswith('.t21')] ir_dftb = [ir.get_spectrum_from_kf(f,width=50) for f in kff_by_cat[cat] if f.endswith('.rkf')] #setup new sheet ws = wb.create_sheet(cat) wb.active = ws ws['A1'] = cat ws['A1'].font = bold_huge data_col_offset = 5 #distance from the left side of the sheet where to write data n = len(ir_dft) #iterate over comparison functions for i, func in enumerate(funcs):
right_line = Border(right=Side(style='thin')) both_line = Border(right=Side(style='thin'), bottom=Side(style='thin')) all_line = Border(top=Side(style='thick'), left=Side(style='thick'), right=Side(style='thick'), bottom=Side(style='thick')) bold_huge = Font(bold=False, size=30) bold_large = Font(bold=False, size=16) bold_small = Font(bold=False, size=11) for i_cat, s_name, cat in zip(range(len(categories)), sheet_names, categories): #get the spectra ir_dft = [ ir.get_spectrum_from_kf(f, width=50) for f in kff_by_cat[cat] if f.endswith('.t21') ] ir_dftb = [ ir.get_spectrum_from_kf(f, width=50) for f in kff_by_cat[cat] if f.endswith('.rkf') ] # print(np.sum(ir_dft), np.sum(ir_dftb)) #setup sheet ws = wb.create_sheet(s_name) wb.active = ws ws['A1'] = s_name ws['A1'].font = bold_huge
import modules.plot as plot import modules.barycenter as bc import modules.ir as ir import os # kf_path = r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\[21-05](13-05-2020)\\" # kfs = [kf_path + d for d in os.listdir(kf_path)] kfs = [ r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_1_DFT.t21", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_1_DFTB.rkf", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_2_DFT.t21", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_2_DFTB.rkf", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_3_DFT.t21", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_3_DFTB.rkf", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_4_DFT.t21", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_4_DFTB.rkf", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_5_DFT.t21", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_5_DFTB.rkf", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_6_DFT.t21", r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\second_set\Aminoindan_CONF_6_DFTB.rkf", ] for kf in kfs: spec = ir.get_spectrum_from_kf(kf, xlim=(0, 4000), n=1000) plot.plot_hist(spec, title=kf.split('\\')[-1][:-4], axislabels=('Wavenumber ($cm^{-1}$)', 'Intensity'), xlim=(0, 4000), invert_xaxis=True)
#choose two histograms a and b: # a = hist.gaussian(400, 0.7, 0.1, 0) # a = hist.gaussian(400, 0.7, 0.1, 0) + 0.1 * hist.gaussian(400, 0.5,0.03) # a = hist.gaussian(400, 0.2, 0.06) + hist.gaussian(400, 0.5, 0.06)*2 + hist.gaussian(400, 0.8, 0.06) # a = hist.slater(400, 0.5, 30, 0) # a = hist.from_func(400, lambda x: 1-x**2) # a = hist.from_func(400, func) # a = hist.from_func(400, lambda x: 1-x,0) # a = hist.from_func(400, lambda x: np.cos(x*6*3.14)+1) # a = hist.from_func(400, lambda x: x) # a = hist.dirac_delta(400, 0.5) # a = hist.from_func(400, lambda x: ((x-0.5)*10)**4) + 3*hist.gaussian(400, 0.7, 0.1) # r = jobs.DFTJob('l-alanine', job_name='l-alanine_DFT').run(); a = ir.get_spectrum(r, xlim=(0,2000), n=400) a = ir.get_spectrum_from_kf( r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals\Aminoindan_CONF_4_LDA_DFT.t21", xlim=(0, 4000), n=400) # b = hist.gaussian(400, 0.3, 0.1, 0) # b = hist.gaussian(400, 0.8, 0.05, 0) + hist.gaussian(400, 0.2, 0.05, 0) # b = hist.gaussian(400, 0.5, 0.05, 0) # b = hist.gaussian(400, 0.2, 0.06)*2 + hist.gaussian(400, 0.5, 0.06) + hist.gaussian(400, 0.8, 0.06)*2 # b = hist.from_func(400, lambda x: np.sin(x*10*3.14)+1) # b = hist.from_func(400, func2) # b = hist.from_func(400, lambda x: x,0) # b = hist.from_func(400, lambda x: x**0) # b = hist.slater(400, 0.8, 30, 0) # b = hist.from_func(400, lambda x: np.cos(x*5*3.14)+1) # r = jobs.DFTBJob('l-alanine', job_name='l-alanine_DFT').run(); b = ir.get_spectrum(r, xlim=(0,2000), n=400) b = ir.get_spectrum_from_kf( r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals\Aminoindan_CONF_4_DFTB3_freq_DFTB.rkf",
def main(functionals, save_name=None, sub_dir='', bins=600, reg_m=10**2): ##################### #### SETUP START #### ##################### ############################################ Comparison functions funcs = [ # comp_funcs.wasserstein_distance, # comp_funcs.wasserstein_distance_unbalanced, comp_funcs.freq_int_wasserstein, # comp_funcs.l2, # comp_funcs.diagonality, # comp_funcs.bhattacharyya, # comp_funcs.correlation, # comp_funcs.chi_square, # comp_funcs.kl_divergence, ] ############################################ Colour map for data colouring colour_map = cmap.WhiteGreen() ############################################ Directory with KF files kf_dir = r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals" # kf_dir = r"C:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\random_set" ############################################ Categories to split the KF files into # categories = ['Aminoindan_CONF_', 'ISO34_E', 'ISO34_P', 'SCONF'] # categories = ['Aminoindan_CONF_', 'ISO34_E', 'SCONF'] # categories = ['Aminoindan_CONF_'] categories = [''] ############################################ Sheet names sheet_names = categories ############################################ Functionals to split the KF files into # functionals = ['OLYP_DFT', 'LDA_DFT', 'DFTB3_DFTB', 'DFTB3_freq_DFTB'] # functionals = ['DFTB3_freq_DFTB', 'LDA_DFT'] ############################################ Path to save xl file to if save_name is None: save_name = kf_dir.split('\\')[-1] + f'_comparisons_{functionals[0]}_{functionals[1]}' else: save_name = kf_dir.split('\\')[-1] + '_' + save_name # save_name = kf_dir.split('\\')[-1] + '_comparisons' ############################################# ##################### #### SETUP END #### ##################### assert len(categories) == len(sheet_names) func_best = {comp_funcs.wasserstein_distance: min, comp_funcs.wasserstein_distance_unbalanced: min, comp_funcs.freq_int_wasserstein: min, comp_funcs.l2: min, comp_funcs.diagonality: max, comp_funcs.bhattacharyya: min, comp_funcs.correlation: min, comp_funcs.chi_square: min, comp_funcs.kl_divergence: min} #get kffiles kff = [f for f in os.listdir(kf_dir)] kff = list(filter(lambda x: functionals[0] in x or functionals[1] in x, kff)) #separate into categories kff_by_cat = {} for cat in categories: kff_by_cat[cat] = [kf_dir + '\\' + f for f in kff if f.startswith(cat)] names_by_cat = {} for cat in categories: names_by_cat[cat] = {functionals[0]:[], functionals[1]:[]} for f in kff: name = f if name.startswith(cat): name = name[len(cat):][:-4] if functionals[0] in name: names_by_cat[cat][functionals[0]].append(name) elif functionals[1] in name: names_by_cat[cat][functionals[1]].append(name) #start writing out_file = f"excel_files/{sub_dir}/{save_name}_{bins}.xlsx" try: os.mkdir(f"excel_files/{sub_dir}") except: pass wb = xl.Workbook() ws0 = wb.active bottom_line = Border(bottom=Side(style='thin')) right_line = Border(right=Side(style='thin')) both_line = Border(right=Side(style='thin'), bottom=Side(style='thin')) all_line = Border(top=Side(style='thick'), left=Side(style='thick'), right=Side(style='thick'), bottom=Side(style='thick')) bold_huge = Font(bold=False, size=30) bold_large = Font(bold=False, size=16) bold_small = Font(bold=False, size=11) for i_cat, s_name, cat in zip(range(len(categories)), sheet_names, categories): #get the spectra ir1 = [ir.get_spectrum_from_kf(f,width=50,n=bins) for f in kff_by_cat[cat] if functionals[0] in f] ir2 = [ir.get_spectrum_from_kf(f,width=50,n=bins) for f in kff_by_cat[cat] if functionals[1] in f] peaksa = [ir.get_freqs_intens(f) for f in kff_by_cat[cat] if functionals[0] in f] freqsa, intensa = [list(c) for c in zip(*peaksa)] peaksa = [list(zip(freqsa[i], intensa[i])) for i in range(len(freqsa))] peaksb = [ir.get_freqs_intens(f) for f in kff_by_cat[cat] if functionals[1] in f] freqsb, intensb = [list(c) for c in zip(*peaksb)] peaksb = [list(zip(freqsb[i], intensb[i])) for i in range(len(freqsb))] #setup sheet ws = wb.create_sheet(s_name) wb.active = ws ws['A1'] = s_name ws['A1'].font = bold_huge data_col_offset = 5 n = len(ir1) for i, func in enumerate(funcs): #setup new matching table fn = str(func).split()[1] #func name print(cat + '_' + fn) ws.cell(row=i*(n+3)+3, column=1).value = fn.upper() ws.cell(row=i*(n+3)+3, column=1).font = bold_large ws.cell(row=i*(n+3)+4, column=1).value = 'Best' ws.cell(row=i*(n+3)+4, column=1).border = bottom_line ws.cell(row=i*(n+3)+4, column=1).font = bold_small ws.cell(row=i*(n+3)+4, column=2).value = 'Error' ws.cell(row=i*(n+3)+4, column=2).border = bottom_line ws.cell(row=i*(n+3)+4, column=2).font = bold_small ws.cell(row=i*(n+3)+4, column=3).value = 'Error rel' ws.cell(row=i*(n+3)+4, column=3).border = bottom_line ws.cell(row=i*(n+3)+4, column=3).font = bold_small ws.cell(row=i*(n+3)+4, column=data_col_offset).value = rf'{functionals[0]}\{functionals[1]}' ws.cell(row=i*(n+3)+4, column=data_col_offset).border = both_line ws.cell(row=i*(n+3)+4, column=data_col_offset).font = bold_small # kwargs = {'reg_m': np.linspace(-1,2,7)[i_cat]} d = func(ir1, ir2, reg_m=reg_m, peaksa=peaksa, peaksb=peaksb) if func_best[func] is min: dnorm = (d-d.min())/(d.max()-d.min()) if func_best[func] is max: dnorm = 1-(d-d.min())/(d.max()-d.min()) c = colour_map.get_hex_colours(dnorm) for j, row in enumerate(d): try: ws.cell(row=i*(n+3)+5+j, column=data_col_offset).value = names_by_cat[cat][functionals[0]][j] ws.cell(row=i*(n+3)+5+j, column=data_col_offset).border = right_line ws.cell(row=i*(n+3)+5+j, column=1).value = func_best[func](row) ws.cell(row=i*(n+3)+5+j, column=2).value = row[j] - func_best[func](row) ws.cell(row=i*(n+3)+5+j, column=3).value = f'{(row[j] - func_best[func](row))/row[j] * 100:.2f}%' for k, el in enumerate(row): ws.cell(row=i*(n+3)+4, column=data_col_offset+1+k).value = names_by_cat[cat][functionals[1]][k] ws.cell(row=i*(n+3)+4, column=data_col_offset+1+k).border = bottom_line ws.cell(row=i*(n+3)+5+j, column=data_col_offset+1+k).value = el ws.cell(row=i*(n+3)+5+j, column=data_col_offset+1+k).fill = PatternFill(start_color=c[j,k].decode("utf-8"), end_color=c[j,k].decode("utf-8"), fill_type = "solid") if el == func_best[func](row): ws.cell(row=i*(n+3)+5+j, column=data_col_offset+1+k).border = all_line except: raise wb.save(out_file) del wb['Sheet'] wb.save(out_file)
def main(functionals, func, category='Aminoindan', bins=600, reg_m=10**2, **kwargs): ##################### #### SETUP START #### ##################### ############################################ Directory with KF files kf_dir = r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals" ##################### #### SETUP END #### ##################### func_best = { comp_funcs.wasserstein_distance: min, comp_funcs.wasserstein_distance_unbalanced: min, comp_funcs.freq_int_wasserstein: min, comp_funcs.l2: min, comp_funcs.diagonality: max, comp_funcs.bhattacharyya: min, comp_funcs.correlation: min, comp_funcs.chi_square: min, comp_funcs.kl_divergence: min } #get kffiles kff = [f for f in os.listdir(kf_dir)] kff = list( filter(lambda x: functionals[0] in x or functionals[1] in x, kff)) kff_of_cat = [kf_dir + '\\' + f for f in kff if f.startswith(category)] #get the spectra ir1 = [ ir.get_spectrum_from_kf(f, width=50, n=bins) for f in kff_of_cat if functionals[0] in f ] ir2 = [ ir.get_spectrum_from_kf(f, width=50, n=bins) for f in kff_of_cat if functionals[1] in f ] peaksa = [ ir.get_freqs_intens(f) for f in kff_of_cat if functionals[0] in f ] freqsa, intensa = [list(c) for c in zip(*peaksa)] peaksa = [list(zip(freqsa[i], intensa[i])) for i in range(len(freqsa))] peaksb = [ ir.get_freqs_intens(f) for f in kff_of_cat if functionals[1] in f ] freqsb, intensb = [list(c) for c in zip(*peaksb)] peaksb = [list(zip(freqsb[i], intensb[i])) for i in range(len(freqsb))] print(peaksa) d = func(ir1, ir2, reg_m=reg_m, peaksa=peaksa, peaksb=peaksb, **kwargs) return d.astype(float)
plt.legend() plt.show() # plot.plot_hists(specs, ('mol1 - DFTB3', 'mol2 - LDA'), line=False, scatter=True) sys.exit() specs = tuple([s / np.sum(s) for s in specs]) res = sink.sinkhorn(*specs, converge_thresh=converge_thresh, max_iter=1000) plot.plot_sink_results(res) plot.plot_hists(specs, ('mol1 - DFTB3', 'mol2 - LDA')) specs = [ ir.get_spectrum_from_kf( r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals\ISO34_E22_DFTB3_DFTB.rkf", xlim=(0, 4000), n=600), ir.get_spectrum_from_kf( r"D:\Users\Yuman\Desktop\Programmeren\bachelorproject\scripting\RUNS\#KFFiles\functionals\ISO34_E22_LDA_DFT.t21", xlim=(0, 4000), n=600), ] specs = tuple([s / np.sum(s) for s in specs]) res = sink.sinkhorn(*specs, converge_thresh=converge_thresh, max_iter=1000) plot.plot_sink_results(res) plot.plot_hists(specs, ('mol1 - DFTB3', 'mol1 - LDA')) sys.exit() Y, X = np.meshgrid(np.linspace(0, 1, len(a)), np.linspace(0, 1, len(b))) C = cost_fn(X, Y)