This repository has been archived by the owner on Oct 3, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
featurecalc.py
138 lines (99 loc) · 4.09 KB
/
featurecalc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""This script loads the current features from the pickle, adds in undownloaded
repos, then downloads and calcuates any missing features. Since this can take
some time, the pickle is written after every 50 repos are processed."""
import argparse
import code
import datetime
import logging
import sys
from features import all_features
from models import Repo
import utils
logging.basicConfig(filename='calcfeatures.log', level=logging.DEBUG)
def progress_bar(processed, total):
pct_done = int(100.0 * processed / total)
bar = '#' * (pct_done / 5)
bar = bar.ljust(20)
flush_right = ' ' * 20 # make sure to overwrite status messages
sys.stdout.write("\rcalculating [{}] {}%{}".format(bar, pct_done, flush_right))
sys.stdout.flush()
def calculate(f_to_calc, f_to_overwrite, console, download):
"""Calculate a list of features."""
sys.stdout.write('loading')
sys.stdout.flush()
repos = Repo.load_sample()
seen = 0
total = len(repos)
dl_failures = []
calc_failures = []
last_write = datetime.datetime.now()
if f_to_calc or f_to_overwrite or download:
for repo in repos:
seen += 1
success = True
if download:
success = utils.clone(repo)
if not success:
dl_failures.append(repo)
continue
try:
if f_to_calc:
logging.info("calc: %s", repo)
repo.calculate_features(f_to_calc)
if f_to_overwrite:
logging.info("calc: %s", repo)
repo.calculate_features(f_to_overwrite, overwrite=True)
repo._clear_support_features() # we're done with this repo now
except:
print # from status line
logging.exception("!problem: %s", repo)
calc_failures.append(repo)
print
progress_bar(seen, total)
since_write = datetime.datetime.now() - last_write
if since_write > datetime.timedelta(minutes=5):
sys.stdout.write("\r(writing results)")
sys.stdout.flush()
Repo.write_update(repos)
last_write = datetime.datetime.now()
print # from progress bar line
if dl_failures:
print "%s failed to download:" % len(dl_failures)
for f in dl_failures:
print " %s" % f
print
if calc_failures:
print "%s failed during calc:" % len(calc_failures)
for f in calc_failures:
print " %s" % f
print
if console:
message = ('`repos` contains results;\n'
'use ^d to write out or `exit()` to cancel')
code.interact(message, local=locals())
print 'writing out...'
Repo.write_update(repos)
def main():
parser = argparse.ArgumentParser(description='Calculate features for the current sample.')
parser.add_argument('--calc', nargs='*', metavar='feature',
help='calculate the given features, but do not overwrite')
parser.add_argument('--overwrite', nargs='*', metavar='feature',
help='calculate the given features, overwriting any current value')
parser.add_argument('--console', action='store_true',
help=('after calculation and before write-out, open a repl.'
' the list of Repos is available as `repos`.'
' call `exit()` to abort before writing out,'
' otherwise use EOF to continue.'))
parser.add_argument('--nodownload', action='store_true',
help="do not download code if it's missing")
args = parser.parse_args()
if not args.console and not (args.calc or args.overwrite):
parser.print_help()
return
if args.calc == ['all']:
args.calc = all_features.keys()
if args.overwrite == ['all']:
args.overwrite = all_features.keys()
calculate(args.calc, args.overwrite, args.console, not args.nodownload)
if __name__ == '__main__':
main()