def main(opt, s_id): if opt['database']['dropbox']: # Download database file: dbx = dbh.get_dropbox() tmp_db_path = os.path.join(SET_UP_PATH, 'temp_db') db_path = opt['database']['path'] db_exists = dbh.check_dropbox_file_exist(dbx, db_path) if not db_exists: raise ValueError('Cannot find database on Dropbox. Exiting.') dbh.download_dropbox_file(dbx, db_path, tmp_db_path) db = read_pickle(tmp_db_path) else: db = read_pickle(opt['database']['path']) # Find the base options for this sid: base_opt = search_database_by_session_id(db, s_id) # For compatibility: if base_opt.get('set_up'): base_opt = {**base_opt, **base_opt['set_up']} src_path = os.path.join(base_opt['scratch']['path'], s_id) dst_path = os.path.join(base_opt['archive']['path'], s_id) sms_path = os.path.join(src_path, 'sims.pickle') print('session_id: {}'.format(s_id)) print('Source path: {}'.format(src_path)) print('Destination path: {}'.format(dst_path)) error_idx = check_errors(sms_path, src_path, opt.get('skip_idx')) if len(error_idx) > 0: raise ValueError('Errors found! Exiting process.py.') # off_fls = base_opt['scratch']['offline_files'] # move_offline_files(s_id, src_path, off_fls) if not os.path.isdir(src_path): raise ValueError('Source path is not a directory: {}'.format(src_path)) arch_opt = base_opt['archive'] is_dropbox = arch_opt.get('dropbox') exclude = opt.get('exclude') cpy_msg = 'remote Dropbox' if is_dropbox else 'local computer' print('Copying completed sims to archive (on {}).'.format(cpy_msg)) print('From path: {}'.format(src_path)) print('To path: {}'.format(dst_path)) if is_dropbox is True: dbh.upload_dropbox_dir(dbx, src_path, dst_path, exclude=exclude) else: # If Archive is not on Dropbox, assume it is on the scratch machine # i.e. the one from which this script (process.py) is run. copy_tree(src_path, dst_path) print('Archive copying finished.')
def check_errors(sms_path, src_path, skip_idx=None): """Some basic checks for errors in sim series output files.""" # Open the sims pickle, get list of AtomisticSimulation objects: sms = read_pickle(sms_path) all_sms = sms['all_sims'] # Get options from first sim if they don't exist (legacy compatiblity) base_opt = sms.get('base_options', all_sms[0].options) method = base_opt['method'] error_idx = [] s_count = 0 for s_idx, sim_i in enumerate(all_sms): if skip_idx is not None and s_idx in skip_idx: continue s_count += 1 srs_paths = [] srs_id = sim_i.options.get('series_id') if srs_id is not None: # (legacy compatibility) if isinstance(srs_id, dict) and len(srs_id) == 1: new_srs_id = [] for k, v in srs_id.items(): new_srs_id.append([{'name': k, **v}]) srs_id = new_srs_id if not isinstance(srs_id, dict): for srs_id_lst in srs_id: srs_paths.append('_'.join([i['path'] for i in srs_id_lst])) else: raise ValueError('Cannot parse `series_id` option from ' 's_idx: {}'.format(s_idx)) calc_path = os.path.join(src_path, 'calcs', *srs_paths) try: if method == 'castep': out = castep.read_castep_output(calc_path) elif method == 'lammps': out = lammps.read_lammps_output(calc_path) if out['errors']: error_idx.append(s_idx) except OSError: print('Skipping sim index: {} (No output files?)'.format(s_idx)) continue return error_idx
def read_results(sid, archive_path, skip_idx=None, overwrite=False, query_all=False): """ Parameters ---------- sid : str Simulation series ID. skip_idx : list, optional List of simulation series indices to skip. Default is None, in which case none of the simulations are skipped. Useful for skipping failed simulations. overwrite : bool or str ("ask"), optional If True, overwrite previously recorded results. If False, do not overwrite previously recoreded results. If "ask", query user, in which case `query_all` controls whether user is asked for each simulation in the series or just the first. Default is False. query_all : bool, optional Only applies if `overwrite` is "ask". If True, user is asked whether to overwrite results for each simulation in the series. If False, user is asked for only the first simulation and the answer is rememered for the remaining simulations. Default is False. """ sid_path = os.path.join(archive_path, sid) sims = read_pickle(os.path.join(sid_path, 'sims.pickle')) # Get options from first sim if they don't exist (legacy compatiblity) base_opt = sims.get('base_options', sims['all_sims'][0].options) method = base_opt['method'] s_count = 0 for s_idx, sim_i in enumerate(sims['all_sims']): if skip_idx is not None and s_idx in skip_idx: continue s_count += 1 srs_paths = [] srs_id = sim_i.options.get('series_id') if srs_id is not None: # (legacy compatibility) if isinstance(srs_id, dict) and len(srs_id) == 1: new_srs_id = [] for k, v in srs_id.items(): new_srs_id.append([{'name': k, **v}]) srs_id = new_srs_id if not isinstance(srs_id, dict): for srs_id_lst in srs_id: srs_paths.append('_'.join([i['path'] for i in srs_id_lst])) else: raise ValueError('Cannot parse `series_id` option from ' 's_idx: {}'.format(s_idx)) calc_path = os.path.join(sid_path, 'calcs', *srs_paths) if method == 'castep': out = castep.read_castep_output(calc_path) elif method == 'lammps': out = lammps.read_lammps_output(calc_path) results_exist = False if hasattr(sim_i, 'results'): results_exist = True if sim_i.results is None: results_exist = False if results_exist: if overwrite == True: # Overwrite without querying save_res = True elif overwrite == False: # Skip without querying continue elif overwrite == 'ask': # Ask user whether to overwrite. If `query_all` is True, user # is asked for each simulation, otherwise user is asked for the # first simulation, and the answer is applied for remaining # simulations. query_i = False if query_all: query_i = True elif not query_all and s_count == 1: query_i = True save_res = True if query_i: save_res = False msg = 'Results already collated for: {}'.format(sid) if query_all: msg += ' : {}'.format(s_idx) msg += '. Overwrite?' if utils.confirm(msg): save_res = True elif not query_all: overwrite = False else: save_res = True if save_res: sims['all_sims'][s_idx].results = out pick_path = os.path.join(sid_path, 'sims.pickle') write_pickle(sims, pick_path)
def visualise(structure, show_iplot=False, save=False, save_args=None, plot_2d='xyz', ret_fig=False, group_atoms_by=None, group_lattice_sites_by=None, group_interstices_by=None, wrap_sym_op=True): """ Parameters ---------- structure : one of CrystalStructure, CrystalBox or AtomisticStructure. use_interstice_names : bool, optional If True, bulk interstices are plotted by names given in `interstice_names` according to `interstice_names_idx`. group_atoms_by : list of str, optional If set, atoms are grouped according to one or more of their labels. For instance, if set to `species_count`, which is an atom label that is automatically added to the CrystalStructure, atoms will be grouped by their position in the motif within their species. So for a motif which has two X atoms, these atoms will be plotted on separate traces: "X (#1)" and "X (#2)". Note that atoms are grouped by species (e.g. "X") by default. group_lattice_sites_by : list of str, optional If set, lattice sites are grouped according to one or more of their labels. group_interstices_by : list of str, optional If set, interstices are grouped according to one or more of their labels. TODO: add `colour_{atoms, lattice_sites, interstices}_by` parameters which will be a string that must be in the corresponding group_{}_by list Or maybe don't have this restriction, would ideally want to be able to colour according to a colourscale e.g. by volume per atom, bond order parameter, etc. Can do this in Plotly by setting marker.colorscale to an array of the same length as the number of markers. And for Matplotlib: https://stackoverflow.com/questions/6063876/matplotlib-colorbar-for-scatter TODO: consider merging parameters into a dict: `group_sites_by` = { atoms: [...], lattice_sites: [...], interstices: [...]} etc. """ if save: if save_args is None: save_args = {'filename': 'plots.html', 'auto_open': False} elif save_args.get('filename') is None: save_args.update({'filename': 'plots.html'}) if group_atoms_by is None: group_atoms_by = [] if group_lattice_sites_by is None: group_lattice_sites_by = [] if group_interstices_by is None: group_interstices_by = [] for lab in group_atoms_by: if lab not in structure.atom_labels.keys(): raise ValueError('"{}" is not a valid atom label.'.format(lab)) for lab in group_lattice_sites_by: if lab not in structure.lattice_labels.keys(): raise ValueError( '"{}" is not a valid lattice site label.'.format(lab)) for lab in group_interstices_by: if lab not in structure.interstice_labels.keys(): raise ValueError( '"{}" is not a valid interstice label.'.format(lab)) # Get colours for atom species: atom_cols = readwrite.read_pickle( os.path.join(REF_PATH, 'jmol_colours.pickle')) # Add atom number labels: text = [] text.append({ 'data': structure.atom_sites, 'text': list(range(structure.atom_sites.shape[1])), 'position': 'top', 'colour': 'gray', 'name': 'Atom labels', 'visible': 'legendonly', }) points = [] # Add atoms by groupings atom_groups_names = [] atom_groups = [] for k, v in structure.atom_labels.items(): if k in group_atoms_by: atom_groups_names.append(k) atom_groups.append(v[0][v[1]]) atm_col = 'black' atm_sym = 'o' if len(atom_groups) > 0: atom_combs, atom_combs_idx = utils.combination_idx(*atom_groups) for ac_idx in range(len(atom_combs)): c = atom_combs[ac_idx] c_idx = atom_combs_idx[ac_idx] skip_idx = [] atoms_name = 'Atoms' # Special treatment for species and species_count if grouping requested: if 'species' in atom_groups_names: sp_group_name_idx = atom_groups_names.index('species') sp = c[sp_group_name_idx] atm_col = 'rgb' + str(atom_cols[sp]) atoms_name += ': {}'.format(sp) skip_idx = [sp_group_name_idx] if 'species_count' in atom_groups_names: sp_ct_group_name_idx = atom_groups_names.index( 'species_count') atoms_name += ' #{}'.format(c[sp_ct_group_name_idx] + 1) skip_idx.append(sp_ct_group_name_idx) for idx, (i, j) in enumerate(zip(atom_groups_names, c)): if idx in skip_idx: continue atoms_name += '; {}: {}'.format(i, j) points.append({ 'data': structure.atom_sites[:, c_idx], 'symbol': atm_sym, 'colour': atm_col, 'name': atoms_name, }) else: points.append({ 'data': structure.atom_sites, 'symbol': atm_sym, 'colour': atm_col, 'name': 'Atoms', }) # Add lattice sites by groupings if structure.lattice_sites is not None: lat_groups_names = [] lat_groups = [] for k, v in structure.lattice_labels.items(): if k in group_lattice_sites_by: lat_groups_names.append(k) lat_groups.append(v[0][v[1]]) lat_col = 'grey' lat_sym = 'x' if len(lat_groups) > 0: lat_combs, lat_combs_idx = utils.combination_idx(*lat_groups) for lc_idx in range(len(lat_combs)): c = lat_combs[lc_idx] c_idx = lat_combs_idx[lc_idx] skip_idx = [] lats_name = 'Lattice sites' for idx, (i, j) in enumerate(zip(lat_groups_names, c)): lats_name += '; {}: {}'.format(i, j) points.append({ 'data': structure.lattice_sites[:, c_idx], 'symbol': lat_sym, 'colour': lat_col, 'name': lats_name, 'visible': 'legendonly', }) else: points.append({ 'data': structure.lattice_sites, 'symbol': lat_sym, 'colour': lat_col, 'name': 'Lattice sites', 'visible': 'legendonly', }) # Add interstices by groupings if structure.interstice_sites is not None: int_groups_names = [] int_groups = [] for k, v in structure.interstice_labels.items(): if k in group_interstices_by: int_groups_names.append(k) int_groups.append(v[0][v[1]]) int_col = 'orange' int_sym = 'x' if len(int_groups) > 0: int_combs, int_combs_idx = utils.combination_idx(*int_groups) for ic_idx in range(len(int_combs)): c = int_combs[ic_idx] c_idx = int_combs_idx[ic_idx] skip_idx = [] ints_name = 'Interstices' for idx, (i, j) in enumerate(zip(int_groups_names, c)): ints_name += '; {}: {}'.format(i, j) points.append({ 'data': structure.interstice_sites[:, c_idx], 'symbol': int_sym, 'colour': int_col, 'name': ints_name, }) else: points.append({ 'data': structure.interstice_sites, 'symbol': int_sym, 'colour': int_col, 'name': 'Interstices', }) boxes = [] if hasattr(structure, 'bravais_lattice'): # CrystalStructure boxes.append({ 'edges': structure.bravais_lattice.vecs, 'name': 'Unit cell', 'colour': 'navy' }) if hasattr(structure, 'box_vecs'): # CrystalBox boxes.append({ 'edges': structure.box_vecs, 'origin': structure.origin, 'name': 'Crystal box', 'colour': 'green', }) # Add the bounding box trace: boxes.append({ 'edges': structure.bounding_box['bound_box'][0], 'origin': structure.bounding_box['bound_box_origin'], 'name': 'Bounding box', 'colour': 'red', }) if hasattr(structure, 'supercell'): # AtomisticStructure boxes.append({ 'edges': structure.supercell, 'origin': structure.origin, 'name': 'Supercell', 'colour': '#98df8a', }) crystal_cols = [ '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf' ] for c_idx, c in enumerate(structure.crystals): boxes.append({ 'edges': c['crystal'], 'origin': c['origin'], 'name': 'Crystal #{}'.format(c_idx + 1), 'colour': crystal_cols[c_idx], }) # Add a symmetry operation if hasattr(structure, 'symmetry_ops'): if structure.symmetry_ops: so = structure.symmetry_ops[0] as_sym = np.dot(so[0], structure.atom_sites) as_sym += np.dot(structure.supercell, so[1][:, np.newaxis]) if wrap_sym_op: as_sym_frac = np.dot(structure.supercell_inv, as_sym) as_sym_frac -= np.floor(as_sym_frac) as_sym = np.dot(structure.supercell, as_sym_frac) points.append({ 'data': as_sym, 'symbol': 'diamond-open', 'colour': 'purple', 'name': 'Atoms (symmetry)', }) text.append({ 'data': structure.atom_sites, 'text': np.arange(structure.num_atoms), 'position': 'bottom center', 'font': { 'color': 'purple', }, 'name': 'Atoms (symmetry labels)', }) # # Add lines mapping symmetrically connected atoms: # for a_idx, a in enumerate(atom_sites_sym.T): # data.append({ # 'type': 'scatter3d', # 'x': [a[0], self.atom_sites.T[a_idx][0]], # 'y': [a[1], self.atom_sites.T[a_idx][1]], # 'z': [a[2], self.atom_sites.T[a_idx][2]], # 'mode': 'lines', # 'name': 'Sym op', # 'legendgroup': 'Sym op', # 'showlegend': False, # 'line': { # 'color': 'purple', # }, # }) f3d, f2d = plotting.plot_geometry_plotly(points, boxes, text) if show_iplot: iplot(f3d) iplot(f2d) if save: if plot_2d != '': div_2d = plot(f2d, **save_args, output_type='div', include_plotlyjs=False) div_3d = plot(f3d, **save_args, output_type='div', include_plotlyjs=True) html_all = div_3d + div_2d with open(save_args.get('filename'), 'w') as plt_file: plt_file.write(html_all) if ret_fig: return (f3d, f2d)
def collate_results(res_opt, skip_idx=None, debug=False): """ Save a JSON file containing the results of one of more simulation series. Idea is to build a dict (saved as a JSON file) which has results from simulations in flat lists. """ rs_date, rs_num = utils.get_date_time_stamp(split=True) rs_id = rs_date + '_' + rs_num if debug: rs_id = '0000-00-00-0000_00000' def append_series_items(series_items, series_id, num_series, sim_idx, srs_names): out = {'path': []} for i in series_id: path = [] for j in i: srs_idx = srs_names.index(j['name']) for k, v in j.items(): if k == 'path': path.append(v) continue if k not in out: out.update({k: [None] * num_series}) if isinstance(v, np.ndarray): v = v.tolist() out[k][srs_idx] = v path_join = '_'.join([str(i) for i in path]) out['path'].append(path_join) for k, v in out.items(): if k in series_items: series_items[k].extend([v]) else: blank = [None] * num_series series_items.update({k: [blank] * sm_idx + [v]}) for k, v in series_items.items(): if k not in out: blank = [None] * num_series series_items[k].append(blank) return series_items computes = [] add_vars = [] # Make a list of all variable ids var_ids = [] # Variables ordered such that dependenices are listed before ordered_vars = [] # Loop though variables: do validation for vr_idx, vr in enumerate(res_opt['variables']): vr_type = vr['type'] vr_name = vr['name'] vr_name_idx = vr.get('idx') vr_id = vr['id'] # Check type is allowed: if vr_type not in VAR_ALLOWED_REQUIRED: raise ValueError('"{}" is not an allowed variable type: {}'.format( vr_type, VAR_ALLOWED_REQUIRED.keys())) # Check all required keys are given: for rk in VAR_ALLOWED_REQUIRED[vr_type]: if rk not in vr: rk_error = 'Variable #{} must have key: {}'.format(vr_idx, rk) raise ValueError(rk_error) # Check `id` is not repeated if vr_id not in var_ids: var_ids.append(vr_id) else: raise ValueError('Variable #{} id is not unique.'.format(vr_idx)) ordered_vars = get_reduced_depends(ordered_vars, vr, inc_id=True, inc_val=True) # Start building output dict, which will be saved as a JSON file: out = { 'session_id': [], 'session_id_idx': [], 'idx': [], 'series_name': [], 'variables': ordered_vars, 'rid': rs_id, 'output_path': res_opt['output']['path'], } # Get a list of lists of sims: all_sims = [] for sid in res_opt['sid']: path = os.path.join(res_opt['archive']['path'], sid) pick_path = os.path.join(path, 'sims.pickle') pick = read_pickle(pick_path) all_sims.append(pick['all_sims']) # Get a flat list of series names for this sim series and get all sims: all_srs_name = [] for series_sims in all_sims: sm_0 = series_sims[0] sm_0_opt = sm_0.options srs_id = sm_0_opt.get('series_id') if srs_id is not None: # (legacy compatibility) if isinstance(srs_id, dict) and len(srs_id) == 1: new_srs_id = [] for k, v in srs_id.items(): new_srs_id.append([{'name': k, **v}]) srs_id = new_srs_id for series_id_list in srs_id: for series_id_sublist in series_id_list: nm = series_id_sublist['name'] if nm not in all_srs_name: all_srs_name.append(nm) # Need better logic later to avoid doing this: if 'gamma_surface' in all_srs_name: all_srs_name[all_srs_name.index('gamma_surface')] = 'relative_shift' out['series_name'] = all_srs_name # Collect common series info list for each simulation series: all_csi = [] # Loop through each simulation series to append vals to `result`, # `parameter` and single `compute` variable types: all_ids = {} all_sim_idx = 0 for sid_idx, sid in enumerate(res_opt['sid']): skips = skip_idx[sid_idx] path = os.path.join(res_opt['archive']['path'], sid) # Open the pickle file associated with this simulation series: pick_path = os.path.join(path, 'sims.pickle') pick = read_pickle(pick_path) sims = pick['all_sims'] # Get options from first sim if they don't exist (legacy compatiblity) base_opt = pick.get('base_options', sims[0].options) all_csi.append(pick.get('common_series_info')) # Loop through each simulation for this series for sm_idx, sm in enumerate(sims): if sm_idx in skips: continue if sid in out['session_id']: sid_idx = out['session_id'].index(sid) else: out['session_id'].append(sid) sid_idx = len(out['session_id']) - 1 out['session_id_idx'].append(sid_idx) out['idx'].append(sm_idx) srs_id = sm.options.get('series_id') if srs_id is not None: # (legacy compatibility) if isinstance(srs_id, dict) and len(srs_id) == 1: new_srs_id = [] for k, v in srs_id.items(): new_srs_id.append([{'name': k, **v}]) srs_id = new_srs_id if srs_id is None: srs_id = [[]] all_ids = append_series_items(all_ids, srs_id, len(all_srs_name), all_sim_idx, all_srs_name) # Loop through requested variables: for vr_idx, vr in enumerate(out['variables']): vr_name = vr['name'] vr_type = vr['type'] args = {k: v for k, v in vr.items() if k not in VAR_STD_KEYS} if vr_type not in ['result', 'parameter', 'compute']: continue if vr_type == 'result': val = sm.results[vr_name] elif vr_type == 'parameter': val = sm.options[vr_name] elif vr_type == 'compute': func_name = SINGLE_COMPUTE_LOOKUP.get(vr_name) if func_name is not None: val = func_name(out, sm, all_sim_idx, **args) else: # Must be a multi compute continue all_sub_idx = vr.get('idx') if all_sub_idx is not None: for sub_idx in all_sub_idx: if vr_type == 'parameter': try: val = val[sub_idx] except KeyError: val = vr.get('default') break else: val = val[sub_idx] # To ensure the data is JSON compatible: if isinstance(val, np.ndarray): val = val.tolist() elif isinstance(val, np.generic): val = np.asscalar(val) out['variables'][vr_idx]['vals'].append(val) all_sim_idx += 1 all_ids = {k: v for k, v in all_ids.items() if k != 'name'} out['series_id'] = all_ids all_vrs = out['variables'] # Now calculate variables which are multi `compute`s and `series_id`s: for vr_idx, vr in enumerate(all_vrs): vr_type = vr['type'] vr_name = vr['name'] if vr_type == 'series_id': cid = all_srs_name.index(vr['col_id']) vals = utils.get_col(all_ids[vr_name], cid) if vr.get('col_idx') is not None: vals = utils.get_col_none(vals, vr['col_idx']) all_vrs[vr_idx]['vals'] = vals elif vr_type == 'compute' and SINGLE_COMPUTE_LOOKUP.get( vr_name) is None: func = MULTI_COMPUTE_LOOKUP[vr_name] req_vars_defn = get_reduced_depends([], vr, inc_id=False, inc_val=False) req_vars = [dict_from_list(all_vrs, i) for i in req_vars_defn] if vr_name in REQUIRES_CSI: func(out, req_vars, common_series_info=all_csi) else: func(out, req_vars) return out