Exemplo n.º 1
0
def get_data(normalize_y, sample_size):
    radius = 3
    max_neighbor_num = 10
    training = 0.9
    test = 0.1

    df = pd.read_csv("./Data/metal-alloy-db.v2/00Total_DB.csv")
    # df = df.sample(n=len(df))
    df = df.sample(n=sample_size)

    cifs = "./Data/metal-alloy-db.v2/" + df['DBname'] + ".cif"
    structures = [IStructure.from_file(cif) for cif in cifs]
    encoded_structures = [
        structure_encoder(structure, radius, max_neighbor_num)
        for structure in structures
    ]
    x_data = np.array(encoded_structures)

    formation_energy = df['FormationEnergy']
    if normalize_y:
        mean = formation_energy.mean()
        std = formation_energy.std()
        norm_form_energy = (df['FormationEnergy'] - mean) / std

        def norm_back(val, mean, std):
            return val * std + mean

        y_data = [[val] for val in norm_form_energy]
    else:
        y_data = [[val] for val in formation_energy]
    y_data = np.array(y_data)

    total = len(df)
    train = int(float(total) * training)
    test = int(float(total) * test)
    x_train = x_data[:train]
    y_train = y_data[:train]
    x_test = x_data[train:train + test]
    y_test = y_data[train:train + test]

    return x_train, y_train, x_test, y_test
Exemplo n.º 2
0

if __name__ == "__main__":
    radius = 3
    max_neighbor_num = 10
    training = 0.9
    test = 0.1

    df = pd.read_csv("./Data/metal-alloy-db.v1/00Total_DB.csv")
    # df = df.sample(n=len(df))
    df = df.sample(n=500)

    cifs = "./Data/metal-alloy-db.v1/" + df['DBname'] + ".cif"
    structures = [IStructure.from_file(cif) for cif in cifs]

    encoded_structures = [structure_encoder(structure, radius, max_neighbor_num) for structure in structures]
    x_data = np.array(encoded_structures)
    x_data = np.expand_dims(x_data, axis=4)

    formation_energy = df['FormationEnergy']
    y_normalization = False
    if y_normalization:
        mean = formation_energy.mean()
        std = formation_energy.std()
        norm_form_energy = (df['FormationEnergy'] - mean) / std

        def norm_back(val, mean, std):
            return val * std + mean
        y_data = [[val] for val in norm_form_energy]
    else:
        y_data = [[val] for val in formation_energy]
Exemplo n.º 3
0
    '''

    formation_energy = df['FormationEnergy']
    mean = formation_energy.mean()
    std = formation_energy.std()
    norm_form_energy = (df['FormationEnergy'] - mean) / std

    def norm_back(val, mean, std):
        return val * std + mean

    y_data = [[val] for val in norm_form_energy]
    y_data = np.array(y_data)

    print("Done")
    encoded_structures = [
        structure_encoder(structure, radius, max_neighbor_num)
        for structure in structures
    ]
    x_atom_fea = []
    x_nbr_fea_idx = []
    x_nbr_fea = []
    for i, each_structure in enumerate(encoded_structures):
        x_atom_fea.append(each_structure[0])
        x_nbr_fea_idx.append(each_structure[1])
        x_nbr_fea.append(each_structure[2])

    x_atom_fea = np.array(x_atom_fea, dtype='float32')
    x_nbr_fea_idx = np.array(x_nbr_fea_idx, dtype='float32')
    x_nbr_fea = np.array(x_nbr_fea, dtype='float32')

    atom_fea = tf.placeholder(tf.float32, [None, None, 92])