def main(): dataset = CompositionData(data_path=args.data_path, fea_path=args.fea_path) orig_atom_fea_len = dataset.atom_fea_dim if args.test_path: print("using independent test set: {}".format(args.test_path)) train_set = dataset test_set = CompositionData(data_path=args.test_path, fea_path=args.fea_path) else: print("using {} of training set as test set".format(args.test_size)) indices = list(range(len(dataset))) train_idx, test_idx = split(indices, random_state=args.seed, test_size=args.test_size) train_set = torch.utils.data.Subset(dataset, train_idx[0::args.sample]) test_set = torch.utils.data.Subset(dataset, test_idx) if not os.path.isdir("models/"): os.makedirs("models/") if not os.path.isdir("runs/"): os.makedirs("runs/") if not os.path.isdir("results/"): os.makedirs("results/") ensemble(args.data_id, train_set, test_set, args.ensemble, orig_atom_fea_len)
def train_roost(args, model_name, csv_train, csv_val=None, val_frac=0.0, resume=False, transfer=None, fine_tune=None): args.data_path = f'data/datasets/{csv_train}' args.val_size = val_frac dataset = CompositionData(data_path=args.data_path, fea_path=args.fea_path) orig_atom_fea_len = dataset.atom_fea_dim args.fea_len = orig_atom_fea_len if resume: args.resume = resume else: if transfer is not None: args.transfer = transfer elif fine_tune is not None: args.fine_tune = fine_tune if csv_val is None: indices = list(range(len(dataset))) train_idx, val_idx = split(indices, random_state=args.seed, test_size=args.val_size) train_set = torch.utils.data.Subset(dataset, train_idx[0::args.sample]) val_set = torch.utils.data.Subset(dataset, val_idx) else: train_set = dataset val_set = CompositionData(data_path=f'data/datasets/{csv_val}', fea_path=args.fea_path) if not os.path.isdir("models/"): os.makedirs("models/") if not os.path.isdir("runs/"): os.makedirs("runs/") if not os.path.isdir("results/"): os.makedirs("results/") ensemble(model_name, args.fold_id, train_set, val_set, args.ensemble, orig_atom_fea_len, args)
def predict_roost(args, model_name, csv_pred): fold_id = args.fold_id ensemble_folds = args.ensemble fea_len = args.fea_len args.data_path = f'data/datasets/{csv_pred}' dataset = CompositionData(data_path=args.data_path, fea_path=args.fea_path) hold_out_set = dataset test_ensemble(model_name, fold_id, ensemble_folds, hold_out_set, fea_len, args)
def generate_standard_model(mat_prop, device): args = input_parser() args.device = device args.optim = 'AdamW' args.epochs = 300 args.fea_path = "data/embeddings/matscholar-embedding.json" model_name = f'{mat_prop}_model_{args.epochs}_epochs' csv_train = f'aflow/{mat_prop}/train.csv' csv_val = f'aflow/{mat_prop}/val.csv' csv_test = f'aflow/{mat_prop}/test.csv' # define dataset dataset = CompositionData(data_path=args.data_path, fea_path=args.fea_path) orig_atom_fea_len = dataset.atom_fea_dim args.fea_len = orig_atom_fea_len # train and test model train_roost(args, model_name, csv_train, csv_val=csv_val) predict_roost(args, model_name, csv_test)
def generate_cgcnn_aflow_transfer_model(mat_prop): args = input_parser() args.optim = 'AdamW' args.epochs = 2 args.fea_path = "data/embeddings/matscholar-embedding.json" model_name = f'{mat_prop}_cgcnn_aflow_transfer_model_{args.epochs}_epochs' csv_train = 'cgcnn_aflow/{mat_prop}_cgcnn_pred.csv' csv_val = 'aflow/{mat_prop}/val.csv' csv_test = 'aflow/{mat_prop}/test.csv' dataset = CompositionData(data_path=args.data_path, fea_path=args.fea_path) orig_atom_fea_len = dataset.atom_fea_dim args.fea_len = orig_atom_fea_len train_roost(args, model_name, csv_train, csv_val=csv_val) predict_roost(args, model_name, csv_test)
def run(): args = input_parser() args.optim = 'AdamW' args.epochs = 300 args.fea_path = "data/embeddings/matscholar-embedding.json" model_name = f'shear_train_{args.epochs}_epochs' model_name = 'finetune_500' # model_name = 'shear_cgcnn_aflow_fine_tune_500_epoch' # csv_train = 'cgcnn_predictions/shear_train_and_cgcnn_aflow_pcd_train.csv' csv_train = 'shear_train.csv' csv_val = 'shear_val.csv' csv_test = 'shear_test.csv' trained_model = 'models/checkpoint_shear_cgcnn_aflow_fine_tune_500_epoch' dataset = CompositionData(data_path=args.data_path, fea_path=args.fea_path) orig_atom_fea_len = dataset.atom_fea_dim args.fea_len = orig_atom_fea_len # train_roost(args, model_name, csv_train, val_frac=0.05) # train_roost(args, model_name, csv_train, csv_val=csv_val) # train_roost(args, model_name, csv_train, csv_val=csv_val, resume=True) # train_roost(args, model_name, csv_train, csv_val=csv_val, # fine_tune=trained_model) # train_roost(args, model_name, csv_train, csv_val=csv_val, # transfer=trained_model) predict_roost(args, model_name, csv_test) df_results = pd.read_csv(f'results/test_results_{model_name}.csv') y_act, y_pred = df_results['target'], df_results['pred-0'] # choose to "log10" or "unlog10" your data before metrics/plots if False: y_act, y_pred = np.log10(y_act), np.log10(y_pred) if False: y_act, y_pred = 10**y_act, 10**y_pred r2 = r2_score(y_act, y_pred) mae = mean_absolute_error(y_act, y_pred) print(f'-------------------') print(f'r2: {r2:0.4f}, mae: {mae:0.4f}') pva_plot(y_act, y_pred, model_name)
def main(): dataset = CompositionData(data_path=args.data_path, fea_path=args.fea_path) orig_atom_fea_len = dataset.atom_fea_dim indices = list(range(len(dataset))) train_idx, test_idx = split(indices, random_state=args.seed, test_size=args.test_size) train_set = torch.utils.data.Subset(dataset, train_idx[0::args.sample]) test_set = torch.utils.data.Subset(dataset, test_idx) if not os.path.isdir("models/"): os.makedirs("models/") if not os.path.isdir("runs/"): os.makedirs("runs/") if not os.path.isdir("results/"): os.makedirs("results/") ensemble(args.fold_id, train_set, test_set, args.ensemble, orig_atom_fea_len)