forked from JasonGross/coq-tools
/
split_definitions.py
188 lines (167 loc) · 9.8 KB
/
split_definitions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import re, time
from subprocess import Popen, PIPE, STDOUT
import split_definitions_old
from split_file import postprocess_split_proof_term
from coq_version import get_coq_accepts_time, get_proof_term_works_with_time
from custom_arguments import DEFAULT_LOG, DEFAULT_VERBOSITY
import util
__all__ = ["join_definitions", "split_statements_to_definitions"]
def get_definitions_diff(previous_definition_string, new_definition_string):
"""Returns a triple of lists (definitions_removed,
definitions_shared, definitions_added)"""
old_definitions = [i for i in previous_definition_string.split('|') if i]
new_definitions = [i for i in new_definition_string.split('|') if i]
if all(i == 'branch' for i in old_definitions + new_definitions): # work
# around bug #5577 when all theorem names are "branch", we
# don't assume that names are unique, and instead go by
# ordering
removed = []
shared = []
added = []
for i in range(max((len(old_definitions), len(new_definitions)))):
if i < len(old_definitions) and i < len(new_definitions):
if old_definitions[i] == new_definitions[i]:
shared.append(old_definitions[i])
else:
removed.append(old_definitions[i])
added.append(new_definitions[i])
elif i < len(old_definitions):
removed.append(old_definitions[i])
elif i < len(new_definitions):
added.append(new_definitions[i])
return (tuple(removed), tuple(shared), tuple(added))
else:
return (tuple(i for i in old_definitions if i not in new_definitions),
tuple(i for i in old_definitions if i in new_definitions),
tuple(i for i in new_definitions if i not in old_definitions))
def strip_newlines(string):
if not string: return string
if string[0] == '\n': return string[1:]
if string[-1] == '\n': return string[:-1]
return string
# Unfortunately, coqtop -emacs -time reports character locations in bytes
# So we need to handle unicode...
def slice_statements_string(statements_string, start=None, end=None):
statements_string = util.b(statements_string)
if start is None: start = 0
if end is None: end = len(statements_string)
return util.s(statements_string[start:end])
def len_statements_string(statements_string):
return len(util.b(statements_string))
def split_statements_to_definitions(statements, verbose=DEFAULT_VERBOSITY, log=DEFAULT_LOG, coqtop='coqtop', coqtop_args=tuple(), **kwargs):
"""Splits a list of statements into chunks which make up
independent definitions/hints/etc."""
def fallback():
if verbose: log("Your version of coqtop doesn't support -time. Falling back to more error-prone method.")
return split_definitions_old.split_statements_to_definitions(statements, verbose=verbose, log=log, coqtop=coqtop, coqtop_args=coqtop_args)
# check for -time
if not get_coq_accepts_time(coqtop, verbose=verbose, log=log):
return fallback()
if not get_proof_term_works_with_time(coqtop, is_coqtop=True, verbose=verbose, log=log, **kwargs):
statements = postprocess_split_proof_term(statements, log=log, verbose=verbose, **kwargs)
p = Popen([coqtop, '-q', '-emacs', '-time'] + list(coqtop_args), stdout=PIPE, stderr=STDOUT, stdin=PIPE)
split_reg = re.compile(r'Chars ([0-9]+) - ([0-9]+) [^\s]+ (.*?)<prompt>([^<]*?) < ([0-9]+) ([^<]*?) ([0-9]+) < ([^<]*?)</prompt>'.replace(' ', r'\s*'),
flags=re.DOTALL)
defined_reg = re.compile(r'^([^\s]+) is (?:defined|assumed)$', re.MULTILINE)
# goals and definitions are on stdout, prompts are on stderr
statements_string = '\n'.join(statements) + '\n\n'
if verbose: log('Sending statements to coqtop...')
if verbose >= 3: log(statements_string)
(stdout, stderr) = p.communicate(input=util.b(statements_string))
stdout = util.s(stdout)
if 'know what to do with -time' in stdout.strip().split('\n')[0]:
# we're using a version of coqtop that doesn't support -time
return fallback()
if verbose: log('Done. Splitting to definitions...')
rtn = []
cur_definition = {}
last_definitions = '||'
cur_definition_names = '||'
last_char_end = 0
#if verbose >= 3: log('re.findall(' + repr(r'Chars ([0-9]+) - ([0-9]+) [^\s]+ (.*?)<prompt>([^<]*?) < ([0-9]+) ([^<]*?) ([0-9]+) < ([^<]*?)</prompt>'.replace(' ', r'\s*')) + ', ' + repr(stdout) + ', ' + 'flags=re.DOTALL)')
responses = split_reg.findall(stdout)
for char_start, char_end, response_text, cur_name, line_num1, cur_definition_names, line_num2, unknown in responses:
char_start, char_end = int(char_start), int(char_end)
statement = strip_newlines(slice_statements_string(statements_string,last_char_end,char_end))
last_char_end = char_end
terms_defined = defined_reg.findall(response_text)
definitions_removed, definitions_shared, definitions_added = get_definitions_diff(last_definitions, cur_definition_names)
# first, to be on the safe side, we add the new
# definitions key to the dict, if it wasn't already there.
if cur_definition_names.strip('|') and cur_definition_names not in cur_definition:
cur_definition[cur_definition_names] = {'statements':[], 'terms_defined':[]}
if verbose >= 2: log((statement, (char_start, char_end), definitions_removed, terms_defined, 'last_definitions:', last_definitions, 'cur_definition_names:', cur_definition_names, cur_definition.get(last_definitions, []), cur_definition.get(cur_definition_names, []), response_text))
# first, we handle the case where we have just finished
# defining something. This should correspond to
# len(definitions_removed) > 0 and len(terms_defined) > 0.
# If only len(definitions_removed) > 0, then we have
# aborted something. If only len(terms_defined) > 0, then
# we have defined something with a one-liner.
if definitions_removed:
cur_definition[last_definitions]['statements'].append(statement)
cur_definition[last_definitions]['terms_defined'] += terms_defined
if cur_definition_names.strip('|'):
# we are still inside a definition. For now, we
# flatten all definitions.
#
# TODO(jgross): Come up with a better story for
# nested definitions.
cur_definition[cur_definition_names]['statements'] += cur_definition[last_definitions]['statements']
cur_definition[cur_definition_names]['terms_defined'] += cur_definition[last_definitions]['terms_defined']
del cur_definition[last_definitions]
else:
# we're at top-level, so add this as a new
# definition
rtn.append({'statements':tuple(cur_definition[last_definitions]['statements']),
'statement':'\n'.join(cur_definition[last_definitions]['statements']),
'terms_defined':tuple(cur_definition[last_definitions]['terms_defined'])})
del cur_definition[last_definitions]
# print('Adding:')
# print(rtn[-1])
elif terms_defined:
if cur_definition_names.strip('|'):
# we are still inside a definition. For now, we
# flatten all definitions.
#
# TODO(jgross): Come up with a better story for
# nested definitions.
cur_definition[cur_definition_names]['statements'].append(statement)
cur_definition[cur_definition_names]['terms_defined'] += terms_defined
else:
# we're at top level, so add this as a new
# definition
rtn.append({'statements':(statement,),
'statement':statement,
'terms_defined':tuple(terms_defined)})
# now we handle the case where we have just opened a fresh
# definition. We've already added the key to the
# dictionary.
elif definitions_added:
# print(definitions_added)
cur_definition[cur_definition_names]['statements'].append(statement)
else:
# if we're in a definition, append the statement to
# the queue, otherwise, just add it as it's own
# statement
if cur_definition_names.strip('|'):
cur_definition[cur_definition_names]['statements'].append(statement)
else:
rtn.append({'statements':(statement,),
'statement':statement,
'terms_defined':tuple()})
last_definitions = cur_definition_names
if verbose >= 2: log((last_definitions, cur_definition_names))
if last_definitions.strip('||'):
rtn.append({'statements':tuple(cur_definition[cur_definition_names]['statements']),
'statement':'\n'.join(cur_definition[cur_definition_names]['statements']),
'terms_defined':tuple(cur_definition[cur_definition_names]['terms_defined'])})
del cur_definition[last_definitions]
if last_char_end + 1 < len_statements_string(statements_string):
if verbose >= 2: log('Appending end of code from %d to %d: %s' % (last_char_end, len_statements_string(statements_string), slice_statements_string(statements_string,last_char_end,None)))
last_statement = strip_newlines(slice_statements_string(statements_string, last_char_end, None))
rtn.append({'statements':tuple(last_statement,),
'statement':last_statement,
'terms_defined':tuple()})
return rtn
def join_definitions(definitions):
return '\n'.join(i['statement'] for i in definitions)