def main(): # main settings # todo: has to be inherited from the sh file: sim = 'sim' # simulation folder name database_folder = 'db' # todo: this must be inherited bh5670 = 'bh5670' # the outermost folder in the scratch where all other data are put # todo yaml or predict my_offset = 15 # from the molecule to a vacuum. 15-20 is recommended! cutoff = 500 rel_cutoff = 50 basis_set_file_name = 'BASIS_CC_AUG_RI_NEW' # RI5, 2-5 cc, all aug-cc, RIFIT-all my_basis_sets = [ 'aug-cc-pVDZ', 'aug-cc-pVTZ', 'aug-cc-pVQZ', 'aug-cc-pV5Z' ] my_ri_basis_sets = [ 'aug-cc-pVDZ-RIFIT', 'aug-cc-pVTZ-RIFIT', 'aug-cc-pVQZ-RIFIT', 'aug-cc-pV5Z-RIFIT' ] debug = True # end: main settings dummy_run = False # does not invoke cp2k if true # parser begin parser = argparse.ArgumentParser(description='rank and num of cpus') parser.add_argument('-rank') # array job number parser.add_argument( '-num_cpus') # number of cpus you request for every array job args = parser.parse_args() # parser end # parsing input threads = int(args.num_cpus) - 1 # cpus used to compute rank = '{:0>6}'.format( args.rank) # transform rank from '1' to '000001' format prefix_xyz_file_name = 'dsgdb9nsd' xyz_file_name = f'{prefix_xyz_file_name}_{rank}.xyz' xyz_file_location = f'{prefix_xyz_file_name}/{xyz_file_name}' sim_folder_scratch = f'/scratch/{bh5670}/{sim}/{rank}' sim_folder_home = f'{sim}/{rank}' # sim folder at home exists. you create later {rank} folder if not os.path.exists(sim_folder_scratch): os.mkdir(sim_folder_scratch) else: rmtree(sim_folder_scratch ) # leftovers from previous simulations will be removed os.mkdir(sim_folder_scratch) # and the new folder will be created # xyz object created, normal xyz file is created at scratch my_xyz_file_obj = XYZ.from_file( xyz_file_location) # object created using the file from home xyz_at_scratch = sim_folder_scratch + '/' + xyz_file_name # my_xyz_file_obj.write(xyz_at_scratch) # writes a normal xyz (into scratch) # my molecule object is created. It will serve as a DB record my_new_mol = Cp2kOutput(rank) # rel_cutoff: 40; cutoff: 300; abc = 10 my_abc = str(my_xyz_file_obj.compute_box_size(offset=my_offset))[1:-2] # GLOBAL SETTINGS # ## base settings ## my_potential_file_name = 'POTENTIAL' my_potential = 'ALL' my_project_name = "this_is_template" # my_ri_aux_basis_set = 'RI-5Z' # often fails organic_elements = [ 'H', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'B', 'I' ] my_elements = organic_elements inp_file_name = 'test_2345.inp' my_vdw_parameters_file = 'dftd3.dat' activate_vdw = False activate_outer_scf = False wf_corr_num_proc = 1 # 16 in the ref paper; -1 to use all ########################################### CREATE TEMPLATE FOR TWO RUNS ########################################### calc = CP2K() calc.working_directory = './' calc.project_name = 'artem_gw_project' calc.mpi_n_processes = 1 # pycp2k objects CP2K_INPUT = calc.CP2K_INPUT FORCE_EVAL = CP2K_INPUT.FORCE_EVAL_add() FORCE_EVAL.Method = 'QUICKSTEP' SUBSYS = FORCE_EVAL.SUBSYS DFT = FORCE_EVAL.DFT XC = DFT.XC SCF = DFT.SCF OUTER_SCF = DFT.SCF.OUTER_SCF #################################################################################################################### # GLOBAL # # FORCE EVAL # set_global(CP2K_INPUT, project_name=my_project_name) ## SUBSYS ## set_unperiodic_cell(SUBSYS, abc=my_abc) set_nonperiodic_poisson(DFT) set_topology(SUBSYS, xyz_file_name=xyz_file_name) center_coordinates(SUBSYS) ## END SUBSYS ## ## DFT ## set_dft(DFT, potential_file_name=my_potential_file_name, basis_set_file_name=basis_set_file_name) set_cutoff(DFT, cutoff=cutoff, rel_cutoff=rel_cutoff, ngrids=5) set_scf(DFT, eps_scf=1.0E-9, max_scf=500, scf_guess='ATOMIC') add_ot(SCF, stepsize=0.05) # # add_outer_scf(OUTER_SCF) set_pbe(XC) # we start with pbe # set_pbe0(XC) no pbe0 in the beginning set_qs(DFT, eps_default=1.0E-10, eps_pgf_orb=np.sqrt(1.0E-10)) # print_mo(DFT.PRINT) if activate_vdw: add_vdw(XC, vdw_parameters_file=my_vdw_parameters_file) ## END DFT ## ######################################## END: CREATE TEMPLATE ########################################################## ######################################## BEGIN: RUN CP2K TWO TIMES ##################################################### suffix = ['2', '3', '4'] # cardinal numbers of the database # begin: input_from_yaml # cp2k_exe_path = '/home/artem/soft/cp2k/cp2k-7.1/exe/local/cp2k.popt' cp2k_exe_path = '/home/ws/bh5670/cp2k/cp2k-7.1/exe/local/cp2k.popt' my_run_type = 'mpi' for i_bs, suffix in enumerate(suffix): # bs calc_ = deepcopy(calc) # CP2K_INPUT_ = calc_.CP2K_INPUT FORCE_EVAL_ = CP2K_INPUT_.FORCE_EVAL_list[0] SUBSYS_ = FORCE_EVAL_.SUBSYS DFT_ = FORCE_EVAL_.DFT XC_ = DFT_.XC SCF_ = DFT_.SCF OUTER_SCF_ = DFT_.SCF.OUTER_SCF # set_global(CP2K_INPUT_, project_name=suffix) add_elements(SUBSYS_, elements=my_elements, basis=my_basis_sets[i_bs], aux_basis=my_ri_basis_sets[i_bs], pot=my_potential) # bs output_file = f'out_{suffix}.out' ot_file_name = 'OT_' + f'{suffix}_' + inp_file_name diag_file_name = 'DIAG_' + f'{suffix}_' + inp_file_name # end: input if i_bs != 0: set_scf(DFT_, eps_scf=1.0E-8, max_scf=500, scf_guess='RESTART' ) # TZ,QZ will start from RESTART of the DZ,QZ try: copy(sim_folder_scratch + '/' + f'{int(suffix)-1}-RESTART.wfn', sim_folder_scratch + '/' + f'{suffix}-RESTART.wfn') print('copied restart file 2->3 or 3->4') except: print('not succesfull copy of the restart file') elif i_bs == 0: set_scf(DFT_, eps_scf=1.0E-8, max_scf=500, scf_guess='ATOMIC') # DZ with ATOMIC guess # OT run to converge quickly calc_.write_input_file(sim_folder_scratch + '/' + ot_file_name) # first run print(f"Running PBE with OT (basis set = {suffix})...") if not dummy_run: cp2k_run(input_file=ot_file_name, xyz_file=xyz_file_name, run_type=my_run_type, np=threads, output_file=f'out_ot_{suffix}.out', cp2k_executable=cp2k_exe_path, execution_directory=sim_folder_scratch) # end: first run print(f"I have finished cp2k with OT (basis set = {suffix})") # DIAGONALIZATION RUN to reliably compute H**O and then GW # remove the OT method remove_ot(SCF_) # change calculations to a diagonalization add_diagonalization(SCF_) # add_smear(SCF_) # uses final T. add_mixing(SCF_) # add or not? add_mos(SCF_, added_mos=1000) # plot h**o/lumo #set_pbe0(XC_) # we want G0W0@PBE0. no pbe0 in the beginning print_mo_cubes(DFT_.PRINT, nhomo=10, nlumo=10) # all HOMOs are typicall plotted set_scf(DFT_, eps_scf=1E-6, max_scf=200) # add G0W0! add_gw_ver_0(XC_, ev_sc_iter=1, wf_corr_num_proc=wf_corr_num_proc, rpa_num_quad_points=100, max_memory_wf=4000, max_memory_hf=500, corr_occ=1, corr_virt=1) # GW! # it is important to keep WF memory smaller than HF memory, otherwise, it crashes calc_.write_input_file(sim_folder_scratch + '/' + diag_file_name) # second run print(f"Running G0W0 with DIAG (basis set = {suffix})...") my_out_file2 = f'out_diag_{suffix}.out' if not dummy_run: cp2k_run(input_file=diag_file_name, xyz_file=xyz_file_name, output_file=my_out_file2, run_type=my_run_type, np=threads, cp2k_executable=cp2k_exe_path, execution_directory=sim_folder_scratch) print(f"I have finished cp2k with DIAG (basis set = {suffix})") # extract h**o/lumo and gw h**o/lumo from the cp2k output file: path_to_out2_file = sim_folder_scratch + '/' + my_out_file2 # extract from the output try: num_orb = extract_number_of_independent_orbital_function( path_to_out2_file) print( f'basis set = {suffix}, number of independent orbital functions: {num_orb}' ) except: print('number of orbatals was not extracted') num_orb = 'not extracted' try: homos, lumos = [], [] homos, lumos = return_homo_lumo(path_to_out2_file) print(f'basis set = {suffix} ', 'h**o = ', homos[-1] * eV_to_Hartree(), ' eV') print(f'basis set = {suffix} ', 'lumo = ', lumos[0] * eV_to_Hartree(), ' eV') h**o = homos[-1] * eV_to_Hartree() lumo = lumos[0] * eV_to_Hartree() except: print(f'H**o/Lumo were not extracted') h**o = 'not extracted' lumo = 'not extracted' try: gw_occ, gw_vir, homo_, lumo_ = return_gw_energies( path_to_out2_file) if isinstance(h**o, str) and isinstance(lumo, str): h**o = homo_ lumo = lumo_ print(f'basis set = {suffix} ', 'h**o = ', h**o, ' eV') print(f'basis set = {suffix} ', 'lumo = ', lumo, ' eV') print(f'basis set = {suffix} ', 'gw h**o = ', gw_occ, ' eV') print(f'basis set = {suffix} ', 'gw lumo = ', gw_vir, ' eV') except: print("GW energies were not extracted") gw_occ = 'not extracted' gw_vir = 'not extracted' del calc_ # put computed data into the molecule object my_new_mol.add_energies(int(suffix), h**o, lumo, gw_occ, gw_vir) my_new_mol.add_num_orbitals(int(suffix), num_orb) my_new_mol.extrapolate_energy() db_record = my_new_mol.yield_dict( ) # this dict will be written into yaml. it will be a record in the global library # print("\nI am done\n") print('saving to DB...') with open(f'{database_folder}/DB_{rank}.yaml', 'w') as stream: yaml.safe_dump(db_record, stream) print(f"saved to {database_folder}/DB_{rank}.yaml") print('I will remove the content the sim folder') # Clean up before leave status = my_new_mol.status() if status == 'all_extracted': # all quantities are extracted if debug: print( f'status: {status}, but debug is on ==> will move {sim_folder_scratch} to {sim_folder_home}' ) copytree(sim_folder_scratch, sim_folder_home) # will rewrite the folder else: print(f'status: {status} ==> will remove {sim_folder_scratch}') try_to_remove_folder(sim_folder_scratch) else: print(f'status: {status} ==> will copy failed sim folder from scratch') #if not os.path.exists(sim_folder_home): #os.mkdir(sim_folder_home) # will overwrite if exists copytree(sim_folder_scratch, sim_folder_home) # will rewrite the folder print(f"I have copied {sim_folder_scratch} to {sim_folder_home}") try_to_remove_folder(sim_folder_scratch)
def main(): try: scratch = os.environ[ 'SCRATCH'] # SCRATCH has to be in the env var dict. Normally, it is. except: scratch = 'scratch' # parser begin parser = argparse.ArgumentParser(description='rank and num of cpus') parser.add_argument('-rank') # array job number parser.add_argument( '-num_cpus') # number of cpus you request for every array job parser.add_argument('-i') # input_from_yaml yaml file parser.add_argument( '-mol_ids' ) # mol_ids to simulate (without prefix and suffix) todo: not used? args = parser.parse_args() # parser end # yaml file yaml_file_name = args.i with open(yaml_file_name) as stream: input = yaml.load(stream=stream) # end: yaml file # todo: think over because it is imported twice # end: run-or-check settings debug = input['debug'] dummy_run = input['dummy_run'] # if not at cluster: test # debug = True # dummy_run = True # end: if not at cluster # folders names sim = input['folder_names']['simulations'] db = input['folder_names']['database'] bh5670 = input['folder_names'][ 'scratch'] # the outermost folder in the scratch folder where all other data are put prefix_xyz_file_name = input['prefix_xyz_file_name'] my_offset = input['molecule_vacuum_offset'] try: mpi = input['mpi'] except: mpi = 'openmpi' # parsing input_from_yaml threads = int( args.num_cpus ) # cpus used to compute. I do not subtract 1. This does not help # mol_id = parse_mixed_list() path_to_mol_ids_default = 'db/trash_db_numbers.csv' # simulate mols that did not fully converged # path_to_mol_ids_default = 'db/missing_num.csv' # simulate mols that are missing in the range of the simulated mols # by default: missing_numbers try: path_to_mol_ids = args.mol_ids if path_to_mol_ids is None: raise TypeError except TypeError: path_to_mol_ids = path_to_mol_ids_default finally: with open(path_to_mol_ids, 'r') as stream: csv_reader = csv.reader(stream) all_numbers = csv_reader.__next__( ) # only one line in this csv format file, so we do not loop over rank = all_numbers[int(args.rank) - 1] rank = '{:0>6}'.format( rank ) # transform rank from '1' to '000001' format. This is not a general thing xyz_file_name = f'{prefix_xyz_file_name}_{rank}.xyz' xyz_file_location = f'{prefix_xyz_file_name}/{xyz_file_name}' db_record_path = f'{db}/DB_{rank}.yaml' # file where the results will be saved todo: raeum es alles auf! # check is the output exists if os.path.exists(db_record_path): print( f'The simulation results of mol. {rank} is already in the folder of reference' ) exit() # end: check if the output exists if not dummy_run: sim_folder_scratch = f'/{scratch}/{bh5670}/{sim}/{rank}' else: sim_folder_scratch = f'{scratch}/{bh5670}/{sim}/{rank}' sim_folder_home = f'{sim}/{rank}' # sim folder at home exists. you create later {rank} folder if not os.path.exists(sim_folder_scratch): os.mkdir(sim_folder_scratch) else: rmtree(sim_folder_scratch ) # leftovers from previous simulations will be removed os.mkdir(sim_folder_scratch) # and the new folder will be created # xyz object created, normal xyz file is created at scratch try: my_xyz_file_obj = XYZ.from_file( xyz_file_location) # object created using the file from home except: # test my_xyz_file_obj = XYZ.from_file( 'H2O.xyz') # object created using the file from home xyz_at_scratch = sim_folder_scratch + '/' + xyz_file_name # my_xyz_file_obj.write(xyz_at_scratch) # writes a normal xyz (into scratch) # my molecule object is created. It will serve as a DB record my_new_mol = Cp2kOutput(rank) # rel_cutoff: 40; cutoff: 300; abc = 10 my_abc = str(my_xyz_file_obj.compute_box_size(offset=my_offset))[1:-2] input['my_abc'] = my_abc input['xyz_file_name'] = xyz_file_name # misc wf_corr_num_proc = 0 # 16 in the ref paper; -1 to use all inp_file_name = 'test_2344.inp' # base file name ######################################## BEGIN: RUN CP2K TWO TIMES ##################################################### # suffix = ['2', '3', '4'] # cardinal numbers of the database # begin: input_from_yaml # cp2k_exe_path = '/home/artem/soft/cp2k/cp2k-7.1/exe/local/cp2k.popt' #cp2k_exe_path = '/home/ws/bh5670/cp2k/cp2k-7.1/exe/local/cp2k.popt' cp2k_exe_path = input['cp2k_exe_path'] my_run_type = 'mpi' suffix = input[ 'basis_set_suffix'] # todo: fix DZ --> 2, TZ --> 3, QZ --> 4 # this will initialize class variables (that is the class) according to the input # actually, this is probably a bad idea to make it like that, because if one forgets doing so, # class functions will not work InputFactory.set_constants(input_from_yaml=input) # --> my_cp2k_run: condensed function with just 2 argument. # my_inp_file, my_out_file: return names # reason: its other parameters are the same for all 6 (or more runs) # this is nothing more than a shorthand, this is why it is ugly def my_inp_file(suf, ot_or_diag): return f'{ot_or_diag}_{suf}.inp' def my_out_file(suf, ot_or_diag): return f'out_{ot_or_diag}_{suf}.out' def my_cp2k_run(suf='2', ot_or_diag='ot'): cp2k_run(input_file=my_inp_file(suf, ot_or_diag), output_file=my_out_file(suf, ot_or_diag), xyz_file=xyz_file_name, run_type=my_run_type, np=threads, cp2k_executable=cp2k_exe_path, execution_directory=sim_folder_scratch, type_mpi=mpi) # <-- my_cp2k_run # ot_file_names = [f'OT_{suffix}_{inp_file_name}' for suffix in suffix] # diag_file_names =[f'DIAG_{suffix}_{inp_file_name}' for suffix in suffix] # out_ot_file_names = [f'out_ot_{suffix}.out' for suffix in suffix] # out_diag_file_names =[f'out_diag_{suffix}.out' for suffix in suffix] print('I am HERE') for i_bs, suf in enumerate(suffix): # --> OT dft. (OT = orbital transformation) dft_ot_simulation = InputFactory.new_dft_ot(i_bs) dft_ot_simulation.write_input_file( f"{sim_folder_scratch}/{my_inp_file(suf=suf, ot_or_diag='ot')}") # OT dft run below ... # ... but before, we copy the RESTART from the previous basis set (it exists unless for the smallest basis set) try_to_copy_previous_restart_file( i_bs=i_bs, sim_folder_scratch=sim_folder_scratch, suf=suf) print(f"Running PBE with OT (basis set = {suf})...") if not dummy_run: my_cp2k_run(suf=suf, ot_or_diag='ot') print(f"I have finished cp2k with OT (basis set = {suf})") # <-- OT dft # --> GW following DIAG dft. (DIAG = diagonalization) diag_out_file = f"{sim_folder_scratch}/{my_out_file(suf=suf, ot_or_diag='diag')}" diag_inp_file = f"{sim_folder_scratch}/{my_inp_file(suf=suf, ot_or_diag='diag')}" gw_diag_simulations = InputFactory.new_gw(i_bs) gw_diag_simulations.write_input_file(diag_inp_file) print(f"Running G0W0 with DIAG (basis set = {suf})...") if not dummy_run: my_cp2k_run(suf=suf, ot_or_diag='diag') print(f"I have finished cp2k with DIAG (basis set = {suf})") # --> extract (from diag out) # extract number of orbitals: try: num_orb = extract_number_of_independent_orbital_function( diag_out_file) print( f'basis set = {suf}, number of independent orbital functions: {num_orb}' ) except: print('number of orbitals was not extracted') num_orb = 'not extracted' # extract energies: try: homos, lumos = [], [] homos, lumos = return_homo_lumo(diag_out_file) print(f'basis set = {suffix} ', 'h**o = ', homos[-1] * eV_to_Hartree(), ' eV') print(f'basis set = {suffix} ', 'lumo = ', lumos[0] * eV_to_Hartree(), ' eV') h**o = homos[-1] * eV_to_Hartree() lumo = lumos[0] * eV_to_Hartree() except: print(f'H**o/Lumo were not extracted') h**o = 'not extracted' lumo = 'not extracted' try: gw_occ, gw_vir, homo_, lumo_ = return_gw_energies( diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before( homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, gw_occ, gw_vir) # on a screen except SCQPSolutionNotFound: # we know how to handle this error print( "GW is not extracted, because SCQPSolutionNotFound. Calling fallback ..." ) # --> of the solution not found, it could be that the number of quad points is insufficent gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[ 0].DFT.XC.WF_CORRELATION_list[ 0].RI_RPA.Rpa_num_quad_points = 500 print( "I write the fallback input file where num of q points = 500. It has the same name as before?" ) gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') except SCFNotConvergedNotPossibleToRunMP2: print( "GW is not extracted, because SCFNotConvergedNotPossibleToRunMP2. Calling fallback ..." ) print('NOT IMPLEMENTED') finally: try: gw_occ, gw_vir, homo_, lumo_ = return_gw_energies( diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before( homo_, lumo_, h**o, lumo) print_extracted_energies(suffix, h**o, lumo, gw_occ, gw_vir) # on a screen # <--- except: print( "GW energies were not extracted even in the fallback") gw_occ = 'not extracted' gw_vir = 'not extracted' del dft_ot_simulation, gw_diag_simulations # put computed data into the molecule object my_new_mol.add_energies(int(suf), h**o, lumo, gw_occ, gw_vir) my_new_mol.add_num_orbitals(int(suf), num_orb) my_new_mol.extrapolate_energy() # level up? db_record = my_new_mol.yield_dict( ) # this dict will be written into yaml. it will be a record in the global library # <-- EMD: GW run and extraction ####################################### END: RUN CP2K TWO TIMES ##################################################### print("\nI am done\n") if not dummy_run: print('saving to DB...') with open(f'{db}/DB_{rank}.yaml', 'w') as stream: yaml.safe_dump(db_record, stream) print(f"saved to {db}/DB_{rank}.yaml") print('I will remove the content of the sim folder') # Clean up before leave status = my_new_mol.status() if status == 'all_extracted': # all quantities are extracted if debug: print( f'status: {status}, but debug is on ==> will move {sim_folder_scratch} to {sim_folder_home}' ) copytree(sim_folder_scratch, sim_folder_home, dirs_exist_ok=True) # will rewrite the folder else: print(f'status: {status} ==> will remove {sim_folder_scratch}') try_to_remove_folder(sim_folder_scratch) else: print(f'status: {status} ==> will copy failed sim folder from scratch') #if not os.path.exists(sim_folder_home): #os.mkdir(sim_folder_home) # will overwrite if exists try: copytree(sim_folder_scratch, sim_folder_home) # will rewrite the folder? print(f"I have copied {sim_folder_scratch} to {sim_folder_home}") except: print( f"I could not copy {sim_folder_scratch} to {sim_folder_home}") try_to_remove_folder(sim_folder_scratch)
def main(): def my_out_file(suf, ot_or_diag): return f'out_{ot_or_diag}_{suf}.out' SUFFIX = ['2', '3'] my_path = os.path.abspath('') print(my_path) all_folders = os.listdir('sim') print(f'all folders of interest = {all_folders}') path_to_folders = [ os.path.abspath('sim' + '/' + folder) for folder in all_folders ] print(path_to_folders) for folder, path in zip(all_folders, path_to_folders): db_record_path = f'db/DB_{folder}.yaml' if os.path.exists(db_record_path): print( f'The simulation results of mol. {folder} is already in the folder of reference' ) exit() print(f'\nTHIS IS MOL_NUM {folder}\n') my_new_mol = Cp2kOutput(folder) for suf in SUFFIX: diag_out_file = f"sim/{folder}/{my_out_file(suf=suf, ot_or_diag='diag')}" # try: num_orb = extract_number_of_independent_orbital_function( diag_out_file) print( f'basis set = {suf}, number of independent orbital functions: {num_orb}' ) except: print('number of orbitals was not extracted') num_orb = 'not extracted' # extract energies: try: homos, lumos = [], [] homos, lumos = return_homo_lumo(diag_out_file) print(f'basis set = {suf} ', 'h**o = ', homos[-1] * eV_to_Hartree(), ' eV') print(f'basis set = {suf} ', 'lumo = ', lumos[0] * eV_to_Hartree(), ' eV') h**o = homos[-1] * eV_to_Hartree() lumo = lumos[0] * eV_to_Hartree() except: print(f'H**o/Lumo were not extracted') h**o = 'not extracted' lumo = 'not extracted' try: gw_occ, gw_vir, homo_, lumo_ = return_gw_energies( diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before( homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, gw_occ, gw_vir) # on a screen except SCFNotConvergedNotPossibleToRunMP2: print( "GW is not extracted, because SCFNotConvergedNotPossibleToRunMP2. Calling fallback ..." ) print('NOT IMPLEMENTED') finally: try: gw_occ, gw_vir, homo_, lumo_ = return_gw_energies( diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before( homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, gw_occ, gw_vir) # on a screen # <--- except: print( "GW energies were not extracted even in the fallback") gw_occ = 'not extracted' gw_vir = 'not extracted' # put computed data into the molecule object my_new_mol.add_energies(int(suf), h**o, lumo, gw_occ, gw_vir) my_new_mol.add_num_orbitals(int(suf), num_orb) my_new_mol.extrapolate_energy() # level up? db_record = my_new_mol.yield_dict( ) # this dict will be written into yaml. it will be a record in the global library # <-- EMD: GW run and extraction print('saving to DB...') with open(f'db/DB_{folder}.yaml', 'w') as stream: yaml.safe_dump(db_record, stream) print(f"saved to db/DB_{folder}.yaml") # print('done')
def main(): try: scratch = os.environ['SCRATCH'] # SCRATCH has to be in the env var dict. Normally, it is. except: scratch = 'scratch' # parser begin parser = argparse.ArgumentParser(description='rank and num of cpus') parser.add_argument('-rank') # array job number parser.add_argument('-num_cpus') # number of cpus you request for every array job parser.add_argument('-i') # input_from_yaml yaml file parser.add_argument('-mol_ids') # path to csv file with some identifiers of molecules to be simulated. # Identifiers may be: numbers of the molecules (6 digits) or the full names of the molecules mol_ids to simulate args = parser.parse_args() # parser end # yaml file. my_input is the dictionaty containing input information. yaml_file_name = args.i with open(yaml_file_name) as stream: my_input = yaml.load(stream=stream, Loader=yaml.SafeLoader) # end: yaml file # todo: think over because it is imported twice # end: run-or-check settings debug = my_input['debug'] # if True will copy the content of the scratch folder back to sim/* folder. Actually, should be used dummy_run = my_input['dummy_run'] # if not at cluster: test # debug = True # dummy_run = True # end: if not at cluster # folders names sim = my_input['folder_names']['simulations'] db = my_input['folder_names']['database'] bh5670 = my_input['folder_names']['scratch'] # the outermost folder in the scratch folder where all other data are put my_offset = my_input['molecule_vacuum_offset'] try: mpi = my_input['mpi'] except: mpi = 'openmpi' # parsing input_from_yaml threads = int(args.num_cpus) # cpus used to compute. I do not subtract 1. This does not help # mol_id = parse_mixed_list() def determine_file_name_path(my_input, args): """ Depending on the format of the input dictionary (my_input['db_format']), returns the name of the input file and the path to it @param my_input: input dictionary, which was read in from the yaml input file @param args: command line arguments list @return: xyz_file_name, xyz_file_path, mol_identifier (self-explained) """ try: db_format = my_input['db_format'] # 'general' means that names are full mols identifiers except KeyError: db_format = 'dsgdb9nsd' # this is the format of the corresponding dataset only. default try: path_to_mol_ids = args.mol_ids except TypeError: print(f'path_to_mol_ids {args.mol_ids} is not found. Exiting...') exit() with open(path_to_mol_ids, 'r') as stream: csv_reader = csv.reader(stream) all_mols_ids = csv_reader.__next__() # only one line in this csv format file, so we do not loop over if db_format =='dsgdb9nsd': prefix_xyz_file_name = my_input['prefix_xyz_file_name'] mol_identifier_variable_digits = all_mols_ids[int(args.rank) - 1] # the variable "rank" is not actually a rank. This is here 6 digits read from *.csv file (only for db_format = 'gdb...' or if it is nnot specified). mol_identifier_6_digits = '{:0>6}'.format(mol_identifier_variable_digits) # transform rank from '1' to '000001' format. This is not a general thing xyz_file_name = f'{prefix_xyz_file_name}_{mol_identifier_6_digits}.xyz' # only file name xyz_file_path = f'../{prefix_xyz_file_name}/{xyz_file_name}' # path to the file. db outside working folder return xyz_file_name, xyz_file_path, mol_identifier_6_digits elif db_format == 'general': try: prefix_xyz_file_name = my_input['prefix_xyz_file_name'] # here, xyz file name is the name of the db folder. except KeyError: prefix_xyz_file_name = my_input['dataset_name'] mol_identifier = all_mols_ids[int(args.rank) - 1].split('.')[0] xyz_file_name = f'{mol_identifier}.xyz' # only file name xyz_file_path = f'../{prefix_xyz_file_name}/{xyz_file_name}' # path to the file. db outside working folder return xyz_file_name, xyz_file_path, mol_identifier else: print('Unknown db_format. Exiting...') exit() xyz_file_name, xyz_file_path, mol_identifier = determine_file_name_path(my_input, args) # db --> # check is the output exists in 'db' folder db_record_path = f'{db}/DB_{mol_identifier}.yaml' # file where the results will be saved if os.path.exists(db_record_path): print(f'The simulation results of mol. {mol_identifier} is already in the folder of reference') exit() # here one can check if the DB_ file is not broken # end: check if the output exists # <-- db # scratch --> # this below makes something. look carefully! if not dummy_run: print('This a productive run') sim_folder_scratch = f'{scratch}/{bh5670}/{sim}/{mol_identifier}' print(f'I set a sim_folder_scratch to: {sim_folder_scratch}') else: print('This is dummy run') sim_folder_scratch = f'{scratch}/{bh5670}/{sim}/{mol_identifier}' print(f'I set a sim_folder_scratch to: {sim_folder_scratch}') os.makedirs(sim_folder_scratch, exist_ok=True) #<-- scratch # sim --> sim_folder_home = f'{sim}/{mol_identifier}' # sim folder at home exists (has to exist beforehand). you create later {mol_id} folder if not os.path.exists(sim_folder_home): # home os.mkdir(sim_folder_home) else: print(f"I have found the folder {sim_folder_home} in the sim folder and will try to copy it to scratch ....") copytree(sim_folder_home, sim_folder_scratch, dirs_exist_ok=True) # will rewrite the folder print('...done!') print(f'now I will remove the {sim_folder_home} folder at home and create a new empty folder at its place...') rmtree(sim_folder_home) # leftovers from previous simulations will be removed os.mkdir(sim_folder_home) # and the new folder will be created print('...done') # <-- sim # xyz object created, normal xyz file is created at scratch # todo: H20.xyz is not relevant anymore try: my_xyz_file_obj = XYZ.from_file(xyz_file_path) # object created using the file from home except FileNotFoundError: # test my_xyz_file_obj = XYZ.from_file('H2O.xyz') # object created using the file from home xyz_at_scratch = sim_folder_scratch + '/' + xyz_file_name # my_xyz_file_obj.write(xyz_at_scratch) # writes a normal xyz (into scratch) # my molecule object is created. It will serve as a DB record my_new_mol = Cp2kOutput(mol_identifier) # rel_cutoff: 40; cutoff: 300; abc = 10 my_abc = str(my_xyz_file_obj.compute_box_size(offset=my_offset))[1:-2] # last changes to my_input: offsets and xyz_file_name my_input['my_abc'] = my_abc my_input['xyz_file_name'] = xyz_file_name my_xyz_file_obj.identify_atom_types() my_input['elements'] = my_xyz_file_obj.unique_atom_types # elements identified automatically # misc wf_corr_num_proc = 0 # 16 in the ref paper; -1 to use all inp_file_name = 'test_2344.inp' # base file name ######################################## BEGIN: RUN CP2K TWO TIMES ##################################################### # suffix = ['2', '3', '4'] # cardinal numbers of the database # begin: input_from_yaml # cp2k_exe_path = '/home/artem/soft/cp2k/cp2k-7.1/exe/local/cp2k.popt' # cp2k_exe_path = '/home/ws/bh5670/cp2k/cp2k-7.1/exe/local/cp2k.popt' cp2k_exe_path = my_input['cp2k_exe_path'] my_run_type = 'mpi' suffix = my_input['basis_set_suffix'] # todo: fix DZ --> 2, TZ --> 3, QZ --> 4 # this will initialize class variables (that is the class) according to the input # actually, this is probably a bad idea to make it like that, because if one forgets doing so, # class functions will not work InputFactory.set_constants(input_from_yaml=my_input) # --> my_cp2k_run: condensed function with just 2 argument. # my_inp_file, my_out_file: return names # reason: its other parameters are the same for all 6 (or more runs) # this is nothing more than a shorthand, this is why it is ugly def my_inp_file(suf, ot_or_diag): return f'{ot_or_diag}_{suf}.inp' def my_out_file(suf, ot_or_diag): return f'out_{ot_or_diag}_{suf}.out' def my_cp2k_run(suf='2', ot_or_diag='ot'): cp2k_run(input_file=my_inp_file(suf, ot_or_diag), output_file=my_out_file(suf, ot_or_diag), xyz_file=xyz_file_name, run_type=my_run_type, np=threads, cp2k_executable=cp2k_exe_path, execution_directory=sim_folder_scratch, type_mpi=mpi) # <-- my_cp2k_run # ot_file_names = [f'OT_{suffix}_{inp_file_name}' for suffix in suffix] # diag_file_names =[f'DIAG_{suffix}_{inp_file_name}' for suffix in suffix] # out_ot_file_names = [f'out_ot_{suffix}.out' for suffix in suffix] # out_diag_file_names =[f'out_diag_{suffix}.out' for suffix in suffix] print('I am HERE') for i_bs, suf in enumerate(suffix): # --> OT dft. (OT = orbital transformation) dft_ot_simulation = InputFactory.new_dft_ot(i_bs) ot_inp_file = f"{sim_folder_scratch}/{my_inp_file(suf=suf, ot_or_diag='ot')}" dft_ot_simulation.write_input_file(ot_inp_file) # OT dft run below ... # ... but before, we copy the RESTART from the previous basis set (it exists unless for the smallest basis set) try_to_copy_previous_restart_file(i_bs=i_bs, sim_folder_scratch=sim_folder_scratch, suf=suf) print(f"Running PBE with OT (basis set = {suf})...") if not dummy_run: my_cp2k_run(suf=suf, ot_or_diag='ot') print(f"I have finished cp2k with OT (basis set = {suf})") # <-- OT dft # --> GW following DIAG dft. (DIAG = diagonalization) diag_out_file = f"{sim_folder_scratch}/{my_out_file(suf=suf, ot_or_diag='diag')}" diag_inp_file = f"{sim_folder_scratch}/{my_inp_file(suf=suf, ot_or_diag='diag')}" gw_diag_simulations = InputFactory.new_gw(i_bs) gw_diag_simulations.write_input_file(diag_inp_file) print(f"Running G0W0 with DIAG (basis set = {suf})...") if not dummy_run: my_cp2k_run(suf=suf, ot_or_diag='diag') print(f"I have finished cp2k with DIAG (basis set = {suf})") # --> extract (from diag out) # extract number of orbitals: try: num_orb = extract_number_of_independent_orbital_function(diag_out_file) print(f'basis set = {suf}, number of independent orbital functions: {num_orb}') except: print('number of orbitals was not extracted') num_orb = 'not extracted' # extract energies: try: homos, lumos = [], [] homos, lumos = return_homo_lumo(diag_out_file) print(f'basis set = {suffix} ', 'h**o = ', homos[-1] * eV_to_Hartree(), ' eV') print(f'basis set = {suffix} ', 'lumo = ', lumos[0] * eV_to_Hartree(), ' eV') h**o = homos[-1] * eV_to_Hartree() lumo = lumos[0] * eV_to_Hartree() except: print(f'H**o/Lumo were not extracted') h**o = 'not extracted' lumo = 'not extracted' try: # first try to return gw energies --> occ, vir, homo_, lumo_, occ_scf, vir_scf, occ_0, vir_0 = return_gw_energies_advanced(diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before(homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, occ, vir) # on a screen except (IterationLimit, LargeSigc, SCQPSolutionNotFound, NaNInGW): # 20 iterations try: # xyz + 10 print("GW is extracted, but scf is not converged, because of IterationLimit. Calling fallback ...") my_abc_plus_10 = str(my_xyz_file_obj.compute_box_size(offset=my_offset+10.0))[1:-2] # todo: hard # replay ot with a larger xyz space +10 # ot dft_ot_simulation.CP2K_INPUT.FORCE_EVAL_list[0].SUBSYS.CELL.Abc = my_abc_plus_10 dft_ot_simulation.write_input_file(ot_inp_file) print("Replay ot with xyz + 10") my_cp2k_run(suf=suf, ot_or_diag='ot') print("... ot succesfull") # diag print("diag Replay ot with xyz + 10 and Femi offset of 5E-2") gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].SUBSYS.CELL.Abc = my_abc_plus_10 gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.RI_G0W0.Fermi_level_offset = 4.0E-2 gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') print("... diag succesfull") # the following section is necessary to catch the error: occ, vir, homo_, lumo_, occ_scf, vir_scf, occ_0, vir_0 = return_gw_energies_advanced(diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before(homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, occ, vir) # on a screen except (IterationLimit, LargeSigc, SCQPSolutionNotFound, NaNInGW): try: print("GW is extracted, but scf is not converged AGAIN, because of IterationLimit. Calling fallback ...") # diag 200 Q points gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.Rpa_num_quad_points = 200 # this should help as well gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') print("I write the fallback input file with QUAD points = 200") gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') print("... diag succesful") # the following section is necessary to catch the error: occ, vir, homo_, lumo_, occ_scf, vir_scf, occ_0, vir_0 = return_gw_energies_advanced(diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before(homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, occ, vir) # on a screen except (IterationLimit, LargeSigc, SCQPSolutionNotFound): # replay ot with a larger cutoff then make diag with a larger cutoff # ot print("GW is extracted, but scf is not converged AGAIN AGAIN, because of IterationLimit. Calling fallback ...") print("Replay diag with FERMI offset 10E-2!") gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.RI_G0W0.Fermi_level_offset = 10.0E-2 gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') print("... diag succesful") except SCQPSolutionNotFound: # we know how to handle this error try: print("GW is not extracted, because SCQPSolutionNotFound. Calling fallback ...") # --> of the solution not found, it could be that the number of quad points is insufficent gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.RI_G0W0.Crossing_search = 'BISECTION' # this alone does not always work print("I write the fallback input file the crossing search is set to BISECTION") gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') # the following section is necessary to catch the error: occ, vir, homo_, lumo_, occ_scf, vir_scf, occ_0, vir_0 = return_gw_energies_advanced(diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before(homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, occ, vir) # on a screen except SCQPSolutionNotFound: print("GW is not extracted, because SCQPSolutionNotFound. Calling second fallback ...") gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.Rpa_num_quad_points = 500 # this should help as well gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.RI_G0W0.Crossing_search = 'BISECTION' print("I write the fallback input file with QUAD points = 500") gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') except SCFNotConvergedNotPossibleToRunMP2: print("GW is not extracted, because SCFNotConvergedNotPossibleToRunMP2. Calling fallback ...") # replay ot with a larger cutoff then make diag with a larger cutoff # ot dft_ot_simulation.CP2K_INPUT.FORCE_EVAL_list[0].DFT.MGRID.Cutoff = 1000 dft_ot_simulation.CP2K_INPUT.FORCE_EVAL_list[0].DFT.MGRID.Rel_cutoff = 100 dft_ot_simulation.write_input_file(ot_inp_file) print("Replay ot with cutoff of 100 rel_cutoff of 100...") my_cp2k_run(suf=suf, ot_or_diag='ot') print("... ot succesfull") # diag gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.MGRID.Cutoff = 1000 gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.MGRID.Rel_cutoff = 100 gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') # print('NOT IMPLEMENTED') except NaNInGW: try: print("GW is not extracted, because there is a NaN in the last frame of the SCF loop. Calling fallback") gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.RI_G0W0.Crossing_search = 'BISECTION' print("I wrote the fallback. The crossing search is set to BISECTION") gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') # print("NOT IMPLEMENTED") occ, vir, homo_, lumo_, occ_scf, vir_scf, occ_0, vir_0 = return_gw_energies_advanced(diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before(homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, occ, vir) # on a screen except (NaNInGW, SCQPSolutionNotFound): print("GW is not extracted, because NaNInGW AGAIN. Calling second fallback (BISECTION and num_quad_points = 500) ...") gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.Rpa_num_quad_points = 500 # this should help as well gw_diag_simulations.CP2K_INPUT.FORCE_EVAL_list[0].DFT.XC.WF_CORRELATION_list[0].RI_RPA.RI_G0W0.Crossing_search = 'BISECTION' print("I wrote the fallback input file with QUAD points = 500") gw_diag_simulations.write_input_file(diag_inp_file) my_cp2k_run(suf=suf, ot_or_diag='diag') finally: try: occ, vir, homo_, lumo_, occ_scf, vir_scf, occ_0, vir_0 = return_gw_energies_advanced(diag_out_file) h**o, lumo = redefine_homo_lumo_if_not_extracted_before(homo_, lumo_, h**o, lumo) print_extracted_energies(suf, h**o, lumo, occ, vir) # on a screen # <--- except: print("GW energies were not extracted even in the fallback") # occ = 'not extracted' # vir = 'not extracted' occ, vir, occ_scf, vir_scf, occ_0, vir_0 = ['not extracted']*6 del dft_ot_simulation, gw_diag_simulations # put computed data into the molecule object my_new_mol.add_energies_advanced(int(suf), h**o, lumo, occ, vir, occ_0, vir_0, occ_scf, vir_scf) my_new_mol.add_num_orbitals(int(suf), num_orb) my_new_mol.extrapolate_energy_advanced() # level up? db_record = my_new_mol.yield_dict() # this dict will be written into yaml. it will be a record in the global library # <-- EMD: GW run and extraction ####################################### END: RUN CP2K TWO TIMES ##################################################### print("\nI am done\n") if not dummy_run: print('saving to DB...') with open(f'{db}/DB_{mol_identifier}.yaml', 'w') as stream: yaml.safe_dump(db_record, stream) print(f"saved to {db}/DB_{mol_identifier}.yaml") print('I will remove the content of the sim folder') # Clean up before leave status = my_new_mol.status() if status == 'all_extracted': # all quantities are extracted if debug: print(f'status: {status}, but debug is on ==> will move {sim_folder_scratch} to {sim_folder_home}') copytree(sim_folder_scratch, sim_folder_home, dirs_exist_ok=True) # will rewrite the folder else: print(f'status: {status} ==> will remove {sim_folder_scratch}') try_to_remove_folder(sim_folder_scratch) else: print(f'status: {status} ==> will copy failed sim folder from scratch') # if not os.path.exists(sim_folder_home): # os.mkdir(sim_folder_home) # will overwrite if exists try: copytree(sim_folder_scratch, sim_folder_home, dirs_exist_ok=True) # will rewrite the folder > 3.8 needed print(f"I have copied {sim_folder_scratch} to {sim_folder_home}") except: print(f"I could not copy {sim_folder_scratch} to {sim_folder_home}") try_to_remove_folder(sim_folder_scratch)
def main(): scratch = os.environ['SCRATCH'] # SCRATCH has to be in the env var dict. Normally, it is. # parser begin parser = argparse.ArgumentParser(description='rank and num of cpus') parser.add_argument('-rank') # array job number parser.add_argument('-num_cpus') # number of cpus you request for every array job parser.add_argument('-i') # input_from_yaml yaml file args = parser.parse_args() # parser end # yaml file yaml_file_name = args.i with open(yaml_file_name) as stream: input = yaml.load(stream=stream) # end: yaml file # todo: think over because it is imported twice # end: run-or-check settings debug = input['debug'] dummy_run = input['dummy_run'] # if not at cluster: test # debug = True # dummy_run = True # end: if not at cluster # folders names sim = input['folder_names']['simulations'] db = input['folder_names']['database'] bh5670 = input['folder_names'][ 'scratch'] # the outermost folder in the scratch folder where all other data are put prefix_xyz_file_name = input['prefix_xyz_file_name'] my_offset = input['molecule_vacuum_offset'] try: type_mpi = input['mpi'] except: type_mpi = 'openmpi' # parsing input_from_yaml threads = int(args.num_cpus) # cpus used to compute. I do not subtract 1. This does not help rank = '{:0>6}'.format(args.rank) # transform rank from '1' to '000001' format. This is not a general thing xyz_file_name = f'{prefix_xyz_file_name}_{rank}.xyz' xyz_file_location = f'{prefix_xyz_file_name}/{xyz_file_name}' db_record_path = f'{db}/DB_{rank}.yaml' # file where the results will be saved todo: raeum es alles auf! # check is the output exists if os.path.exists(db_record_path): print(f'The simulation results of mol. {rank} is already in the folder of reference') exit() # end: check if the output exists if True: #not dummy_run: sim_folder_scratch = f'{scratch}/{bh5670}/{sim}/{rank}' else: sim_folder_scratch = f'scratch/{bh5670}/{sim}/{rank}' sim_folder_home = f'{sim}/{rank}' # sim folder at home exists. you create later {rank} folder if not os.path.exists(sim_folder_scratch): os.mkdir(sim_folder_scratch) else: rmtree(sim_folder_scratch) # leftovers from previous simulations will be removed os.mkdir(sim_folder_scratch) # and the new folder will be created # xyz object created, normal xyz file is created at scratch try: my_xyz_file_obj = XYZ.from_file(xyz_file_location) # object created using the file from home except: # test my_xyz_file_obj = XYZ.from_file('H2O.xyz') # object created using the file from home xyz_at_scratch = sim_folder_scratch + '/' + xyz_file_name # my_xyz_file_obj.write(xyz_at_scratch) # writes a normal xyz (into scratch) # my molecule object is created. It will serve as a DB record my_new_mol = Cp2kOutput(rank) # rel_cutoff: 40; cutoff: 300; abc = 10 my_abc = str(my_xyz_file_obj.compute_box_size(offset=my_offset))[1:-2] input['my_abc'] = my_abc input['xyz_file_name'] = xyz_file_name # misc wf_corr_num_proc = 0 # 16 in the ref paper; -1 to use all inp_file_name = 'test_2344.inp' # base file name ######################################## BEGIN: RUN CP2K TWO TIMES ##################################################### # suffix = ['2', '3', '4'] # cardinal numbers of the database # begin: input_from_yaml # cp2k_exe_path = '/home/artem/soft/cp2k/cp2k-7.1/exe/local/cp2k.popt' #cp2k_exe_path = '/home/ws/bh5670/cp2k/cp2k-7.1/exe/local/cp2k.popt' cp2k_exe_path = input['cp2k_exe_path'] my_run_type = 'mpi' suffix = input['basis_set_suffix'] # todo: fix DZ --> 2, TZ --> 3, QZ --> 4 InputFactory.set_constants(input_from_yaml=input) for i_bs, suffix in enumerate(suffix): # start: ot dft # I/O output_file = f'out_{suffix}.out' ot_file_name = 'OT_' + f'{suffix}_' + inp_file_name # for DFT (OT) diag_file_name = 'DIAG_' + f'{suffix}_' + inp_file_name # for GW (DIAG) # end: I/O # OT dft simulation to converge quickly: create the simulation object dft_ot_simulation = InputFactory.new_dft_ot(i_bs) # OT dft: write input file dft_ot_simulation.write_input_file(sim_folder_scratch + '/' + ot_file_name) # OT dft run below ... # ... but before, we copy the RESTART from the previous basis set (it exists unless for the smallest basis set) if i_bs != 0: try: copy(sim_folder_scratch + '/' + f'{int(suffix) - 1}-RESTART.wfn', sim_folder_scratch + '/' + f'{suffix}-RESTART.wfn') print('copied restart file 2->3 or 3->4') except: print('not succesfull copy of the restart file') elif i_bs == 0: pass # print(f"Running PBE with OT (basis set = {suffix})...") if not dummy_run: cp2k_run(input_file=ot_file_name, xyz_file=xyz_file_name, run_type=my_run_type, np=threads, output_file=f'out_ot_{suffix}.out', cp2k_executable=cp2k_exe_path, execution_directory=sim_folder_scratch, type_mpi=type_mpi) # end: first run print(f"I have finished cp2k with OT (basis set = {suffix})") # DIAGONALIZATION RUN to reliably compute H**O and then GW # gw: create the simulation object gw_diag_simulations = InputFactory.new_gw(i_bs) # gw: write the input file gw_diag_simulations.write_input_file(sim_folder_scratch + '/' + diag_file_name) # gw run print(f"Running G0W0 with DIAG (basis set = {suffix})...") my_out_file2 = f'out_diag_{suffix}.out' if not dummy_run: cp2k_run(input_file=diag_file_name, xyz_file=xyz_file_name, output_file=my_out_file2, run_type=my_run_type, np=threads, cp2k_executable=cp2k_exe_path, execution_directory=sim_folder_scratch, type_mpi=type_mpi) print(f"I have finished cp2k with DIAG (basis set = {suffix})") # extract h**o/lumo and gw h**o/lumo from the cp2k output file: path_to_out2_file = sim_folder_scratch + '/' + my_out_file2 # the method to extract? # extract from the output try: num_orb = extract_number_of_independent_orbital_function(path_to_out2_file) print(f'basis set = {suffix}, number of independent orbital functions: {num_orb}') except: print('number of orbitals was not extracted') num_orb = 'not extracted' try: homos, lumos = [], [] homos, lumos = return_homo_lumo(path_to_out2_file) print(f'basis set = {suffix} ', 'h**o = ', homos[-1]*eV_to_Hartree(), ' eV') print(f'basis set = {suffix} ', 'lumo = ', lumos[0]*eV_to_Hartree(), ' eV') h**o = homos[-1]*eV_to_Hartree() lumo = lumos[0]*eV_to_Hartree() except: print(f'H**o/Lumo were not extracted') h**o = 'not extracted' lumo = 'not extracted' try: gw_occ, gw_vir, homo_, lumo_ = return_gw_energies(path_to_out2_file) if isinstance(h**o, str) and isinstance(lumo, str): h**o = homo_ lumo = lumo_ print(f'basis set = {suffix} ', 'h**o = ', h**o, ' eV') print(f'basis set = {suffix} ', 'lumo = ', lumo, ' eV') print(f'basis set = {suffix} ', 'gw h**o = ', gw_occ, ' eV') print(f'basis set = {suffix} ', 'gw lumo = ', gw_vir, ' eV') except: print("GW energies were not extracted") gw_occ = 'not extracted' gw_vir = 'not extracted' del dft_ot_simulation, gw_diag_simulations # put computed data into the molecule object my_new_mol.add_energies(int(suffix), h**o, lumo, gw_occ, gw_vir) my_new_mol.add_num_orbitals(int(suffix), num_orb) my_new_mol.extrapolate_energy() # level up? db_record = my_new_mol.yield_dict() # this dict will be written into yaml. it will be a record in the global library # ######################################## END: RUN CP2K TWO TIMES ####################################################### print("\nI am done\n") if not dummy_run: print('saving to DB...') with open(f'{db}/DB_{rank}.yaml', 'w') as stream: yaml.safe_dump(db_record, stream) print(f"saved to {db}/DB_{rank}.yaml") print('I will remove the content of the sim folder') # Clean up before leave status = my_new_mol.status() if status == 'all_extracted': # all quantities are extracted if debug: print(f'status: {status}, but debug is on ==> will move {sim_folder_scratch} to {sim_folder_home}') copytree(sim_folder_scratch, sim_folder_home, dirs_exist_ok=True) # will rewrite the folder else: print(f'status: {status} ==> will remove {sim_folder_scratch}') try_to_remove_folder(sim_folder_scratch) else: print(f'status: {status} ==> will copy failed sim folder from scratch') #if not os.path.exists(sim_folder_home): #os.mkdir(sim_folder_home) # will overwrite if exists try: copytree(sim_folder_scratch, sim_folder_home) # will rewrite the folder? print(f"I have copied {sim_folder_scratch} to {sim_folder_home}") except: print(f"I could not copy {sim_folder_scratch} to {sim_folder_home}") try_to_remove_folder(sim_folder_scratch)