def get_data(A):
    def sanity_check(M, N):
        for m, n in zip(M, N):
            if any([mm not in n for mm in m]):
                return False
            else:
                return True

    dpath = '../../get_db_fp/{}_fp.db'.format(A)
    with FingerprintDB(dpath) as fp:
        data = fp.get_fingerprints(params=np.arange(1, 108))
        metadata = fp.get_metadata(params=np.arange(1, 6))
    dpath = '../../get_db_fp_catlearn/{}_fp_catlearn.db'.format(A)
    with FingerprintDB(dpath) as fp:
        data_cl = fp.get_fingerprints(params=np.arange(1, 1062))
        metadata2 = fp.get_metadata(params=np.arange(1, 6))
    target = data[:, -1]
    data = data[:, :-1]
    print(data.shape)
    print(data_cl.shape)
    features = np.concatenate((data, data_cl), axis=1)
    if sanity_check(metadata2, metadata):
        return features, target, metadata
    else:
        return None, None, None
示例#2
0
def get_data(A, l_A, C, l_C):
    def get_cond(A, B):
        if not set(A) == set(B):
            return False
        if '|' in A:
            if not A.split('|')[1] == B.split('|')[1]:
                return False
            if not Counter(A.split('|')[0].split('_')) == \
                    Counter(B.split('|')[0].split('_')):
                return False
        else:
            if not Counter(A.split('_')) == Counter(B.split('_')):
                return False
        return True

    with FingerprintDB(A) as fp:
        data_A = fp.get_fingerprints(params=np.arange(1, l_A))
        data_A = data_A[:, -1]
        metadata_A = fp.get_metadata(params=np.arange(1, 6))
    with FingerprintDB(C) as fp:
        data_C = fp.get_fingerprints(params=np.arange(1, l_C))
        data_C = data_C[:, -1]
        metadata_C = fp.get_metadata(params=np.arange(1, 6))

    x = []
    y = []
    metadata = []

    for a, b in zip(data_A, metadata_A):
        for c, d in zip(data_C, metadata_C):
            if all([
                    b[1] == d[1], b[2] == d[2],
                    get_cond(b[3], d[3]), b[4] == d[4]
            ]):
                x += [a]
                y += [c]
                metadata += [[
                    b[0], b[1], b[2], b[3], b[4], d[0], d[1], d[2], d[3], d[4]
                ]]

    return x, y, metadata
示例#3
0
def get_data(A, B):
    def get_sym(A):
        ordered_metals = [
            'Y', 'La', 'Sc', 'Zr', 'Hf', 'Ti', 'Ta', 'Nb', 'V', 'Cr', 'Mo',
            'W', 'Re', 'Tc', 'Os', 'Ru', 'Ir', 'Rh', 'Ni', 'Co', 'Fe', 'Mn',
            'Pt', 'Pd', 'Au', 'Ag', 'Cu', 'Zn', 'Cd', 'Hg', 'Al', 'Ga', 'In',
            'Tl', 'Pb', 'Sn', 'Bi'
        ]
        if '3' in A:
            return A, 'L12'
        elif A in ordered_metals:
            return A, 'A1'
        else:
            sym = []
            for i in ordered_metals:
                if i in A:
                    sym += [i]

            if not len(sym) == 2:
                A = 'ERROR!!'
            else:
                return '2'.join(sym) + '2', 'L10'

    def get_cond(A, B):
        if not set(A) == set(B):
            return False
        if '|' in A:
            if not A.split('|')[1] == B.split('|')[1]:
                return False
            if not Counter(A.split('|')[0].split('_')) == \
                    Counter(B.split('|')[0].split('_')):
                return False
        else:
            if not Counter(A.split('_')) == Counter(B.split('_')):
                return False
        return True

    def sanity_check(M, N):
        for m, n in zip(M, N):
            if any([mm not in n for mm in m]):
                return False
            else:
                return True

    dpath = '../../../get_db_fp/{}_fp.db'.format(B)
    with FingerprintDB(dpath) as fp:
        data = fp.get_fingerprints(params=np.arange(1, 108))
        md1 = fp.get_metadata(params=np.arange(1, 6))
    dpath = '../../../get_db_fp_catlearn/{}_fp_catlearn.db'.format(B)
    with FingerprintDB(dpath) as fp:
        data_cl = fp.get_fingerprints(params=np.arange(1, 1062))
        md2 = fp.get_metadata(params=np.arange(1, 6))
    temp_target = data[:, -1]
    data = data[:, :-1]
    print(data.shape, data_cl.shape)
    temp_features = np.concatenate((data, data_cl), axis=1)
    del data, data_cl
    if not sanity_check(md2, md1):
        sys.exit('Something went wrong in sanity check.')

    scaling_data_processed = []
    scaling_data = np.load(
        '../../../get_data_cathub/{}_data.npy'.format(A))[()]
    scaling_data = scaling_data['reactions']['edges']
    for d in scaling_data:
        if A in ['N', 'H']:
            if not '0.5' in d['node']['reactants']:
                continue
        if A == 'S':
            if 'Hstar' in d['node']['products']:
                continue

        adsorbate = re.findall(r'"([A-Z0-4]+)":', d['node']['sites'])[0]
        reactionenergy = d['node']['reactionEnergy']
        symbol, sb_symbol = get_sym(d['node']['surfaceComposition'])
        site, site_type = re.findall(r'(?<=: )"(.+)"',
                                     d['node']['sites'])[0].split('|', 1)
        if site not in ['top', 'bridge', 'hollow']:
            print('Site {} not a valid site_type.'.format(site))
            continue
        if sb_symbol == 'A1':
            site_type = site_type.replace('A', symbol)
        elif sb_symbol == 'L12':
            a, b = symbol.split('3')
            site_type = site_type.replace('A', 'aaa').replace('B', 'bbb')
            site_type = site_type.replace('aaa', a).replace('bbb', b)
        else:
            a, b, _ = symbol.split('2')
            if a > b:
                a, b = b, a
            site_type = site_type.replace('A', 'aaa').replace('B', 'bbb')
            site_type = site_type.replace('aaa', a).replace('bbb', b)
        if ':' in site_type:
            print('This is a problematic site type.\n\n\n')
            continue
        scaling_data_processed += [[
            adsorbate, symbol, site, site_type, sb_symbol, reactionenergy
        ]]

    scaling_target = []
    metadata = []
    features = []
    target = []

    for a, b, c in zip(md1, temp_features, temp_target):
        for d in scaling_data_processed:
            if all([
                    a[1] == d[1], a[2] == d[2],
                    get_cond(a[3], d[3]), a[4] == d[4]
            ]):
                features += [b]
                target += [c]
                scaling_target += [d[5]]
                metadata += [[
                    a[0], a[1], a[2], a[3], a[4], d[0], d[1], d[2], d[3], d[4]
                ]]

    features = np.array(features)
    target = np.array(target)
    scaling_target = np.array(scaling_target)

    return features, target, scaling_target, metadata
示例#4
0
import os
from collections import Counter


def get_refined_data(A, B):
    M = A.copy()
    N = B.copy()
    mask = np.invert(np.isnan(M) | np.isnan(N))
    return M[mask], N[mask]


adsorbates = ['CH', 'CH2', 'CH3', 'OH', 'SH', 'NH']

for ads in adsorbates:
    print('Working on adsorbates: {}'.format(ads))
    with FingerprintDB('../get_data_object/{}_fp.db'.format(ads)) as f:
        data = f.get_fingerprints(params=np.arange(107, 110))
    EA = data[:, 1]
    EAx = data[:, 2]
    E = data[:, 0]
    del data
    x, y = get_refined_data(EA, E)
    jac1 = LinearScaling(np.array(x), np.array(y), 'EA', 'E', 'eV')
    x, y = get_refined_data(EAx, E)
    jac2 = LinearScaling(np.array(x), np.array(y), 'EAx', 'E', 'eV')
    jac1.get_coeff()
    jac2.get_coeff()
    a1 = jac1.plot_scaling()
    a2 = jac2.plot_scaling()

    if not os.path.exists('Jacobsen_{}'.format(ads)):
示例#5
0
operation_list_local += [
                  'bonding_convolution',
                  'layered_sum',
                  'local_ads_metal_fp'
                 ]


for datum in data:
    #generate atoms object
    #get data obtained from cathub
    w_data = np.load('../get_data_cathub/{}_data.npy'.format(datum))[()]
    #connect the database to store the objects
    db = connect('{}_atoms.db'.format(datum))
    fp_file = '{}_fp.db'.format(datum)

    with FingerprintDB(fp_file) as fpd:
        for i in range(2):
            fpd.parameter_entry('AN_PC{}'.format(i),
                                'Atomic Number Slab: pc {}'.format(i))
            fpd.parameter_entry('AR_PC{}'.format(i),
                                'Atomic Radius Slab: pc {}'.format(i))
            fpd.parameter_entry('DBC_PC{}'.format(i),
                                'Dband center Slab: pc {}'.format(i))
            fpd.parameter_entry('DBW_PC{}'.format(i),
                                'Dband Width Slab: pc {}'.format(i))
            fpd.parameter_entry('DBS_PC{}'.format(i),
                                'Dband Skewness Slab: pc {}'.format(i))
            fpd.parameter_entry('DBK_PC{}'.format(i),
                                'Dband Kurtosis Slab: pc {}'.format(i))
            fpd.parameter_entry('DP_PC{}'.format(i),
                                'Dipole Polarizability Slab: pc {}'.format(i))
示例#6
0
                     Adata[D][C][3])
    except TypeError:
        q1 = None
    try:
        q2 = 0.33 * (Adata[D[0]][A][3] + 
                     Adata[D[0]][B][3] + 
                     Adata[D[0]][C][3])
    except TypeError:
        q2 = None
    return q1, q2

data = ['CH', 'CH2', 'CH3', 'OH', 'NH', 'SH']
jacobsen_data = {d: None for d in data}
for d in data:
    #f_loc = '/Users/osmanmamun/Delta_Learning_Paper/get_db_fp/{}_fp.db'
    with FingerprintDB('{}_fp.db'.format(d)) as f:
        metadata = f.get_metadata(params=np.arange(1, 6))
        energies = f.get_fingerprints(params=np.array([107]))
    E = []
    EA = []
    EAx = []
    md = []
    for i, m in enumerate(metadata, 1):
        if m[-1] == 'A1':
            continue
        if not 'FCC' in m[3]:
            continue
        A, B, C = m[3].split('|')[0].split('_')
        E += [energies[i]]
        A, B = get_E(A, B, C, d)
        EA += [A]