示例#1
0
def main(train_file, test_file, force):
    global train
    global test
    train = pd.read_feather(train_file)
    test = pd.read_feather(test_file)
    features = list(get_features(globals()))
    generate_features(features, force)
示例#2
0
        molecule_name_list = []

        for molecule_name in molecule_name_array:
            atom_size = np.amax(structures[structures["molecule_name"] == molecule_name]["atom_index"])
            atom_size_list.append(atom_size)
            molecule_name_list.append(molecule_name)

        molecule_df = pd.DataFrame()
        molecule_df["molecule_name"] = molecule_name_list
        molecule_df["atom_size"] = atom_size_list

        self.train = merge_df(train, molecule_df, "molecule_name", "molecule_name")
        self.test = merge_df(test, molecule_df, "molecule_name", "molecule_name")


def merge_df(df1, df2, column1, column2):
    df = pd.merge(df1, df2, how="left",
                  left_on=column1,
                  right_on=column2)
    return df


if __name__ == '__main__':
    args = get_arguments()

    train = pd.read_feather('../data/input/train.feather')
    test = pd.read_feather('../data/input/test.feather')
    structures = pd.read_feather('../data/input/structures.feather')

    generate_features(globals(), args.force)