def get_data(A): def sanity_check(M, N): for m, n in zip(M, N): if any([mm not in n for mm in m]): return False else: return True dpath = '../../get_db_fp/{}_fp.db'.format(A) with FingerprintDB(dpath) as fp: data = fp.get_fingerprints(params=np.arange(1, 108)) metadata = fp.get_metadata(params=np.arange(1, 6)) dpath = '../../get_db_fp_catlearn/{}_fp_catlearn.db'.format(A) with FingerprintDB(dpath) as fp: data_cl = fp.get_fingerprints(params=np.arange(1, 1062)) metadata2 = fp.get_metadata(params=np.arange(1, 6)) target = data[:, -1] data = data[:, :-1] print(data.shape) print(data_cl.shape) features = np.concatenate((data, data_cl), axis=1) if sanity_check(metadata2, metadata): return features, target, metadata else: return None, None, None
def get_data(A, l_A, C, l_C): def get_cond(A, B): if not set(A) == set(B): return False if '|' in A: if not A.split('|')[1] == B.split('|')[1]: return False if not Counter(A.split('|')[0].split('_')) == \ Counter(B.split('|')[0].split('_')): return False else: if not Counter(A.split('_')) == Counter(B.split('_')): return False return True with FingerprintDB(A) as fp: data_A = fp.get_fingerprints(params=np.arange(1, l_A)) data_A = data_A[:, -1] metadata_A = fp.get_metadata(params=np.arange(1, 6)) with FingerprintDB(C) as fp: data_C = fp.get_fingerprints(params=np.arange(1, l_C)) data_C = data_C[:, -1] metadata_C = fp.get_metadata(params=np.arange(1, 6)) x = [] y = [] metadata = [] for a, b in zip(data_A, metadata_A): for c, d in zip(data_C, metadata_C): if all([ b[1] == d[1], b[2] == d[2], get_cond(b[3], d[3]), b[4] == d[4] ]): x += [a] y += [c] metadata += [[ b[0], b[1], b[2], b[3], b[4], d[0], d[1], d[2], d[3], d[4] ]] return x, y, metadata
def get_data(A, B): def get_sym(A): ordered_metals = [ 'Y', 'La', 'Sc', 'Zr', 'Hf', 'Ti', 'Ta', 'Nb', 'V', 'Cr', 'Mo', 'W', 'Re', 'Tc', 'Os', 'Ru', 'Ir', 'Rh', 'Ni', 'Co', 'Fe', 'Mn', 'Pt', 'Pd', 'Au', 'Ag', 'Cu', 'Zn', 'Cd', 'Hg', 'Al', 'Ga', 'In', 'Tl', 'Pb', 'Sn', 'Bi' ] if '3' in A: return A, 'L12' elif A in ordered_metals: return A, 'A1' else: sym = [] for i in ordered_metals: if i in A: sym += [i] if not len(sym) == 2: A = 'ERROR!!' else: return '2'.join(sym) + '2', 'L10' def get_cond(A, B): if not set(A) == set(B): return False if '|' in A: if not A.split('|')[1] == B.split('|')[1]: return False if not Counter(A.split('|')[0].split('_')) == \ Counter(B.split('|')[0].split('_')): return False else: if not Counter(A.split('_')) == Counter(B.split('_')): return False return True def sanity_check(M, N): for m, n in zip(M, N): if any([mm not in n for mm in m]): return False else: return True dpath = '../../../get_db_fp/{}_fp.db'.format(B) with FingerprintDB(dpath) as fp: data = fp.get_fingerprints(params=np.arange(1, 108)) md1 = fp.get_metadata(params=np.arange(1, 6)) dpath = '../../../get_db_fp_catlearn/{}_fp_catlearn.db'.format(B) with FingerprintDB(dpath) as fp: data_cl = fp.get_fingerprints(params=np.arange(1, 1062)) md2 = fp.get_metadata(params=np.arange(1, 6)) temp_target = data[:, -1] data = data[:, :-1] print(data.shape, data_cl.shape) temp_features = np.concatenate((data, data_cl), axis=1) del data, data_cl if not sanity_check(md2, md1): sys.exit('Something went wrong in sanity check.') scaling_data_processed = [] scaling_data = np.load( '../../../get_data_cathub/{}_data.npy'.format(A))[()] scaling_data = scaling_data['reactions']['edges'] for d in scaling_data: if A in ['N', 'H']: if not '0.5' in d['node']['reactants']: continue if A == 'S': if 'Hstar' in d['node']['products']: continue adsorbate = re.findall(r'"([A-Z0-4]+)":', d['node']['sites'])[0] reactionenergy = d['node']['reactionEnergy'] symbol, sb_symbol = get_sym(d['node']['surfaceComposition']) site, site_type = re.findall(r'(?<=: )"(.+)"', d['node']['sites'])[0].split('|', 1) if site not in ['top', 'bridge', 'hollow']: print('Site {} not a valid site_type.'.format(site)) continue if sb_symbol == 'A1': site_type = site_type.replace('A', symbol) elif sb_symbol == 'L12': a, b = symbol.split('3') site_type = site_type.replace('A', 'aaa').replace('B', 'bbb') site_type = site_type.replace('aaa', a).replace('bbb', b) else: a, b, _ = symbol.split('2') if a > b: a, b = b, a site_type = site_type.replace('A', 'aaa').replace('B', 'bbb') site_type = site_type.replace('aaa', a).replace('bbb', b) if ':' in site_type: print('This is a problematic site type.\n\n\n') continue scaling_data_processed += [[ adsorbate, symbol, site, site_type, sb_symbol, reactionenergy ]] scaling_target = [] metadata = [] features = [] target = [] for a, b, c in zip(md1, temp_features, temp_target): for d in scaling_data_processed: if all([ a[1] == d[1], a[2] == d[2], get_cond(a[3], d[3]), a[4] == d[4] ]): features += [b] target += [c] scaling_target += [d[5]] metadata += [[ a[0], a[1], a[2], a[3], a[4], d[0], d[1], d[2], d[3], d[4] ]] features = np.array(features) target = np.array(target) scaling_target = np.array(scaling_target) return features, target, scaling_target, metadata
import os from collections import Counter def get_refined_data(A, B): M = A.copy() N = B.copy() mask = np.invert(np.isnan(M) | np.isnan(N)) return M[mask], N[mask] adsorbates = ['CH', 'CH2', 'CH3', 'OH', 'SH', 'NH'] for ads in adsorbates: print('Working on adsorbates: {}'.format(ads)) with FingerprintDB('../get_data_object/{}_fp.db'.format(ads)) as f: data = f.get_fingerprints(params=np.arange(107, 110)) EA = data[:, 1] EAx = data[:, 2] E = data[:, 0] del data x, y = get_refined_data(EA, E) jac1 = LinearScaling(np.array(x), np.array(y), 'EA', 'E', 'eV') x, y = get_refined_data(EAx, E) jac2 = LinearScaling(np.array(x), np.array(y), 'EAx', 'E', 'eV') jac1.get_coeff() jac2.get_coeff() a1 = jac1.plot_scaling() a2 = jac2.plot_scaling() if not os.path.exists('Jacobsen_{}'.format(ads)):
operation_list_local += [ 'bonding_convolution', 'layered_sum', 'local_ads_metal_fp' ] for datum in data: #generate atoms object #get data obtained from cathub w_data = np.load('../get_data_cathub/{}_data.npy'.format(datum))[()] #connect the database to store the objects db = connect('{}_atoms.db'.format(datum)) fp_file = '{}_fp.db'.format(datum) with FingerprintDB(fp_file) as fpd: for i in range(2): fpd.parameter_entry('AN_PC{}'.format(i), 'Atomic Number Slab: pc {}'.format(i)) fpd.parameter_entry('AR_PC{}'.format(i), 'Atomic Radius Slab: pc {}'.format(i)) fpd.parameter_entry('DBC_PC{}'.format(i), 'Dband center Slab: pc {}'.format(i)) fpd.parameter_entry('DBW_PC{}'.format(i), 'Dband Width Slab: pc {}'.format(i)) fpd.parameter_entry('DBS_PC{}'.format(i), 'Dband Skewness Slab: pc {}'.format(i)) fpd.parameter_entry('DBK_PC{}'.format(i), 'Dband Kurtosis Slab: pc {}'.format(i)) fpd.parameter_entry('DP_PC{}'.format(i), 'Dipole Polarizability Slab: pc {}'.format(i))
Adata[D][C][3]) except TypeError: q1 = None try: q2 = 0.33 * (Adata[D[0]][A][3] + Adata[D[0]][B][3] + Adata[D[0]][C][3]) except TypeError: q2 = None return q1, q2 data = ['CH', 'CH2', 'CH3', 'OH', 'NH', 'SH'] jacobsen_data = {d: None for d in data} for d in data: #f_loc = '/Users/osmanmamun/Delta_Learning_Paper/get_db_fp/{}_fp.db' with FingerprintDB('{}_fp.db'.format(d)) as f: metadata = f.get_metadata(params=np.arange(1, 6)) energies = f.get_fingerprints(params=np.array([107])) E = [] EA = [] EAx = [] md = [] for i, m in enumerate(metadata, 1): if m[-1] == 'A1': continue if not 'FCC' in m[3]: continue A, B, C = m[3].split('|')[0].split('_') E += [energies[i]] A, B = get_E(A, B, C, d) EA += [A]