print('Pearson Correlation between feature corr and blosum', pearsonr(corr_scores, blos_scores)[0]) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument('exp_name', help='Name of experiment') args = parser.parse_args() min_total = 100 filetype = 'pdf' cache_path = get_cache_path() / f'{args.exp_name}.pickle' args, feature_to_weighted_sums, weight_totals = pickle.load( open(cache_path, "rb")) print(args) print(weight_totals) report_dir = get_reports_path( ) / 'attention_analysis/blosum' / args.exp_name report_dir.mkdir(parents=True, exist_ok=True) create_figures(feature_to_weighted_sums, weight_totals, min_total, report_dir, filetype) with open(report_dir / 'args.json', 'w') as f: json.dump(vars(args), f)
elif feature_name == 'ss4': features.append(SecStructFeature()) elif feature_name == 'binding_sites': features.append(BindingSiteFeature()) elif feature_name == 'protein_modifications': features.append(ProteinModificationFeature()) elif feature_name == 'contact_map': features.append(ContactMapFeature()) else: raise ValueError(f"Invalid feature name: {feature_name}") feature_to_weighted_sum, weight_total = compute_mean_attention( model, num_layers, num_heads, items, features, tokenizer, args.model, model_version, cuda, max_seq_len=args.max_seq_len, min_attn=args.min_attn) cache_dir = get_cache_path() pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True) path = cache_dir / f'{args.exp_name}.pickle' pickle.dump((args, dict(feature_to_weighted_sum), weight_total), open(path, 'wb')) print('Wrote to', path)
import argparse parser = argparse.ArgumentParser() parser.add_argument('exp_names', nargs='+', help='Names of experiments') args = parser.parse_args() min_total = 100 filetype = 'pdf' report_dir = get_reports_path() / 'attention_analysis/edge_features_combined' pathlib.Path(report_dir).mkdir(parents=True, exist_ok=True) feature_data = [] include_features = [contact_map_pattern, secondary_to_pattern, binding_site_pattern] for exp in args.exp_names: cache_path = get_cache_path() / f'{exp}.pickle' args, feature_to_weighted_sum, weight_total = pickle.load(open(cache_path, "rb")) with open(report_dir / f'args_{exp}.json', 'w') as f: json.dump(vars(args), f) for feature, weighted_sum in feature_to_weighted_sum.items(): for p in include_features: m = p.match(feature) desc = None if m: if p == contact_map_pattern: desc = 'Contact' elif p == binding_site_pattern: desc = 'Binding Site' elif p == secondary_to_pattern: sec = m.group(1) desc = ss4_names.get(sec)