print('Pearson Correlation between feature corr and blosum',
          pearsonr(corr_scores, blos_scores)[0])


if __name__ == "__main__":

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('exp_name', help='Name of experiment')
    args = parser.parse_args()

    min_total = 100
    filetype = 'pdf'

    cache_path = get_cache_path() / f'{args.exp_name}.pickle'
    args, feature_to_weighted_sums, weight_totals = pickle.load(
        open(cache_path, "rb"))
    print(args)
    print(weight_totals)

    report_dir = get_reports_path(
    ) / 'attention_analysis/blosum' / args.exp_name
    report_dir.mkdir(parents=True, exist_ok=True)

    create_figures(feature_to_weighted_sums, weight_totals, min_total,
                   report_dir, filetype)

    with open(report_dir / 'args.json', 'w') as f:
        json.dump(vars(args), f)
        elif feature_name == 'ss4':
            features.append(SecStructFeature())
        elif feature_name == 'binding_sites':
            features.append(BindingSiteFeature())
        elif feature_name == 'protein_modifications':
            features.append(ProteinModificationFeature())
        elif feature_name == 'contact_map':
            features.append(ContactMapFeature())
        else:
            raise ValueError(f"Invalid feature name: {feature_name}")

    feature_to_weighted_sum, weight_total = compute_mean_attention(
        model,
        num_layers,
        num_heads,
        items,
        features,
        tokenizer,
        args.model,
        model_version,
        cuda,
        max_seq_len=args.max_seq_len,
        min_attn=args.min_attn)

    cache_dir = get_cache_path()
    pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True)
    path = cache_dir / f'{args.exp_name}.pickle'
    pickle.dump((args, dict(feature_to_weighted_sum), weight_total),
                open(path, 'wb'))
    print('Wrote to', path)
示例#3
0
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('exp_names', nargs='+', help='Names of experiments')
    args = parser.parse_args()

    min_total = 100
    filetype = 'pdf'

    report_dir = get_reports_path() / 'attention_analysis/edge_features_combined'
    pathlib.Path(report_dir).mkdir(parents=True, exist_ok=True)

    feature_data = []
    include_features = [contact_map_pattern, secondary_to_pattern, binding_site_pattern]
    for exp in args.exp_names:
        cache_path = get_cache_path() / f'{exp}.pickle'
        args, feature_to_weighted_sum, weight_total = pickle.load(open(cache_path, "rb"))
        with open(report_dir / f'args_{exp}.json', 'w') as f:
            json.dump(vars(args), f)
        for feature, weighted_sum in feature_to_weighted_sum.items():
            for p in include_features:
                m = p.match(feature)
                desc = None
                if m:
                    if p == contact_map_pattern:
                        desc = 'Contact'
                    elif p == binding_site_pattern:
                        desc = 'Binding Site'
                    elif p == secondary_to_pattern:
                        sec = m.group(1)
                        desc = ss4_names.get(sec)