def main(): args = configs.arg_parse() fix_seed(args.seed) # Load the dataset data = prepare_data(args.dataset, args.train_ratio, args.input_dim, args.seed) # Define and train the model if args.dataset in ['Cora', 'PubMed']: # Retrieve the model and training hyperparameters depending the data/model given as input hyperparam = ''.join(['hparams_', args.dataset, '_', args.model]) param = ''.join(['params_', args.dataset, '_', args.model]) model = eval(args.model)(input_dim=data.num_features, output_dim=data.num_classes, **eval(hyperparam)) train_and_val(model, data, **eval(param)) _, test_acc = evaluate(data, model, data.test_mask) print('Test accuracy is {:.4f}'.format(test_acc)) elif args.dataset in ['syn6', 'Mutagenicity']: input_dims = data.x.shape[-1] model = GcnEncoderGraph(input_dims, args.hidden_dim, args.output_dim, data.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args) train_gc(data, model, args) _, test_acc = evaluate(data, model, data.test_mask) print('Test accuracy is {:.4f}'.format(test_acc)) else: # For pytorch geometric model #model = GCNNet(args.input_dim, args.hidden_dim, # data.num_classes, args.num_gc_layers, args=args) input_dims = data.x.shape[-1] model = GcnEncoderNode(data.num_features, args.hidden_dim, args.output_dim, data.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args) train_syn(data, model, args) _, test_acc = evaluate(data, model, data.test_mask) print('Test accuracy is {:.4f}'.format(test_acc)) # Save model model_path = 'models/{}_model_{}.pth'.format(args.model, args.dataset) if not os.path.exists(model_path) or args.save == True: torch.save(model, model_path)
def main(): args = configs.arg_parse() fix_seed(args.seed) # Load the dataset data = prepare_data(args.dataset, args.train_ratio, args.input_dim, args.seed) # Load the model model_path = 'models/{}_model_{}.pth'.format(args.model, args.dataset) model = torch.load(model_path) # Evaluate the model if args.dataset in ['Cora', 'PubMed']: _, test_acc = evaluate(data, model, data.test_mask) else: test_acc = test(data, model, data.test_mask) print('Test accuracy is {:.4f}'.format(test_acc)) # Explain it with GraphSVX explainer = GraphSVX(data, model, args.gpu) # Distinguish graph classfication from node classification if args.dataset in ['Mutagenicity', 'syn6']: explanations = explainer.explain_graphs(args.indexes, args.hops, args.num_samples, args.info, args.multiclass, args.fullempty, args.S, 'graph_classification', args.feat, args.coal, args.g, args.regu, True) else: explanations = explainer.explain(args.indexes, args.hops, args.num_samples, args.info, args.multiclass, args.fullempty, args.S, args.hv, args.feat, args.coal, args.g, args.regu, True) print('Sum explanations: ', [np.sum(explanation) for explanation in explanations]) print('Base value: ', explainer.base_values)
'reg_lambda': .1, 'subsample': .9, 'min_split_gain': .01, 'min_child_weight': 2, 'colsample_bytree': .9, # Subsample ratio of columns when constructing each tree. 'scale_pos_weight': 9, # because training data is unbalanced 'verbose': -1 } features = list(pd.read_csv(DATA_FOLDER + '/v3/importances.csv', index_col=0).head(800).index) train_features = [*features, "target"] train = pd.read_pickle(DATA_FOLDER + '/v3/train.pkl')[train_features] folds = prepare_folds(train) models, result = train_folds(folds, config) test = load_test(DATA_FOLDER + '/v3/test.pkl')[features] test_target = evaluate(models, test) print("AUC: %.4f, F1: %.4f" % (result['auc'], result['f1'])) importance = result['importances'].groupby(['feature']) \ .agg({'importance': 'mean'}) \ .sort_values(by="importance", ascending=False) importance.to_csv(DATA_FOLDER + "/v3/importances.csv") prepare_submission(test_target, "v3_AUC_%.4f_F1_%.4f" % (result['auc'], result['f1']))
gold_starts=gold_starts, gold_ends=gold_ends, cluster_ids=cluster_ids) loader = DataLoader( dataset, batch_size=1, shuffle=False ) model.load_state_dict(torch.load(args['model_path'])) # print(model) print(model.parameters) evaluate(loader, model, device) evaluate(loader, model, device) # if args['mode'] == 'predict': # all_sentences, tokens, batch_indices, mentions, gold_starts, gold_ends, clusters = process_ecb_plus(args['data_path'], args['mention_type']) # tokenizer = AutoTokenizer.from_pretrained("SpanBERT/spanbert-base-cased", use_fast=True) # encodings = tokenizer(all_sentences, return_offsets_mapping=True, is_split_into_words=True, truncation=True, padding=True) # encoded_tokens = fix_tokens_with_offsets(tokens, encodings.offset_mapping, batch_indices) # encoded_sentence_map = create_unmasked_sentence_map(encodings.offset_mapping, batch_indices) # encoded_tokens, gold_starts, gold_ends, mentions = process_gold_mentions(encoded_tokens, gold_starts, gold_ends, mentions, encodings.attention_mask, batch_indices) # cluster_ids = get_cluster_ids(mentions, clusters) # dataset = ECBDataset( # encodings=encodings, # batch_indices=batch_indices, # sentence_map=encoded_sentence_map,