This repository has been archived by the owner on May 28, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mixedsidgraph.py
242 lines (199 loc) · 8.96 KB
/
mixedsidgraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
from pgfplot import PgfPlot
from plot import Plot
import statistics
class MixedSIDPgfPlot(PgfPlot):
"""Creates a single graph containing the results of a mixed set of search, insert, and delete operations
on a set of data structures. Also plots the median data structure size at each plotted x-value."""
def __init__(
self,
filename,
searches,
inserts,
deletes,
operations, # Known internally as samples, same as PgfPlots.
bm_startindex,
bm_length,
bm_interval,
xlabel="",
ylabel="",
title="",
template="figure.template.tex",
):
"""Configures a series of benchmarks on all the inputs provided and plots them, alongside the size of the data
structure.
functions: a list of dictionaries, one per data structure. Each dictionary should be of the following form:
{MixedSIDBenchmarkPlot.SEARCH: your_search_Function,
MixedSIDBenchmarkPlot.INSERT: your_insert_function,
MixedSIDBenchmarkPlot.DELETE: your_delete_function}
This constructor does not accept the startindex, stopindex, repeat, or combine_method options,
since they do not apply here.
operations is a list of operations according to the MixedSIDBenchmark spec.
All other options are the same as PgfPlots and are directly passed through.
"""
super(MixedSIDPgfPlot, self).__init__(
filename,
searches[:], # Simply used for generating legend text, in our case. We'll augment this value in run().
operations,
0,
0,
bm_startindex,
bm_length,
bm_interval,
repeat=1,
xlabel=xlabel,
ylabel=ylabel,
title=title,
template=template
)
self.searches = searches
self.inserts = inserts
self.deletes = deletes
def run(self):
"""Runs the benchmarks specified for this graph. Overrides PgfPlot.run() method and does NOT call it.."""
# First, let's run all of the normal mixed-operation benchmarks.
last_plot = None
for i in range(len(self.searches)):
# Store the last plot used, simply so we can use it for plotSizes later.
last_plot = MixedSIDBenchmarkPlot(
self.samples,
self.searches[i],
self.inserts[i],
self.deletes[i],
self.bm_startindex,
self.bm_length,
self.bm_interval
)
self.addPlot(last_plot)
# Now, we'll add the plotSizes plot, which requires a little cleverness in MixedSIDBenchmarkPlot
# to generate its legend key.
self.functions.append(last_plot.plotSizes)
self.addPlot(
last_plot.plotSizes()
)
def runAndWrite(self):
"""Overrides PgfPlot.runAndWrite() to call OUR version."""
self.run()
self.write()
class MixedSIDBenchmarkPlot(Plot):
"""Performs a sequence of operations of diverse types, reading instructions
from an input list, and generating benchmarks from them.
For all functions, the operations list follows the same format. It is a list of pairs, (o, key) where o is
the operation and key is the data. o must be one of the constants defined in this class. key is the value to be
passed to the proper function, e.g. (INSERT, 4) might call insert(4)."""
# Constants for specifying operations.
# Just in case it rules out some weirdness, let's not use 0.
SEARCH = 1
INSERT = 2
DELETE = 3
# A little cleverness so we can plot plotSizes with the PgfPlot class.
legend_text = "Data Structure Size"
@staticmethod
def _simOp(op):
"""Returns the change in data structure size as a result of the given operation."""
if op == MixedSIDBenchmarkPlot.SEARCH:
return 0
if op == MixedSIDBenchmarkPlot.INSERT:
return 1
if op == MixedSIDBenchmarkPlot.DELETE:
return -1
raise ValueError("op must be one of the constants for specifying operations listed in MixedSIDBenchmarkPlot.")
def __init__(
self,
operations,
search,
insert,
delete,
bm_start,
bm_length,
bm_interval
):
"""Performs the operations sequence listed, producing the plot points listed."""
super(MixedSIDBenchmarkPlot, self).__init__()
self.operations = operations
self.search = search
self.insert = insert
self.delete = delete
self.bm_start = bm_start
self.bm_length = bm_length
self.bm_interval = bm_interval
# First, let's go up to bm_start.
for i in range(bm_start):
op = operations[i]
self.doOperation(delete, insert, search, op)
# Now, we'll do the standard benchmark loop....
# For the remainder of the benchmark cycle, we will use next_benchmark to track when to start a benchmark
# and p and q to represent the start and end, respectively, of a range that we're calculating.
p = bm_start
while p < len(operations):
# Don't overshoot stop when benchmarking.
q = min(p + bm_length, len(operations))
time = Plot.benchmark(
lambda x: self.doOperation(delete, insert, search, x),
operations,
p,
q
)
self.coordinates.append((p, time))
next_benchmark = p + bm_interval
# Then, advance to the next benchmark
p = q
q = min(next_benchmark, len(operations))
for i in range(p, q):
self.doOperation(delete, insert, search, operations[i])
# Before looping, update p.
p = q
def doOperation(self, delete, insert, search, op):
if op[0] == MixedSIDBenchmarkPlot.SEARCH:
search(op[1])
elif op[0] == MixedSIDBenchmarkPlot.INSERT:
insert(op[1])
elif op[0] == MixedSIDBenchmarkPlot.DELETE:
delete(op[1])
else:
raise ValueError(
"op must be one of the constants for specifying operations listed in MixedSIDBenchmarkPlot."
)
def plotSizes(self):
"""Generates a plot of the size of a data structure without actually performing the operations themselves.
Since the operations in bm_length may alter the data structure size significantly, all operations that are
benchmarked will be simulated, and the median data structure size will be used for each plot point."""
# Convenience lambda.
op = lambda x: self.operations[x][0]
size = 0
plot = Plot()
# Do operations until bm_start.
for i in range(self.bm_start):
size += MixedSIDBenchmarkPlot._simOp(op(i))
# Now plot points until we run out of road.
current_benchmark = self.bm_start
while current_benchmark < len(self.operations):
# Very first thing: establish the next benchmark and the end of the current benchmark, since we'll need
# both of these in various places.
next_benchmark = min(len(self.operations), current_benchmark + self.bm_interval)
end_of_benchmark = min(len(self.operations), current_benchmark + self.bm_length)
# We're at a plot point, so gather all the data structure sizes and plot the median value.
# We'll do the first by hand, then we can loop through easily.
bm_sizes = [
size + MixedSIDBenchmarkPlot._simOp(op(current_benchmark))
]
# Now we'll loop through the rest.
# Bounds explained:
# start at current_benchmark + 1 because we just did current_benchmark.
# End (exclusive of the end) at current_benchmark + bm_length (i.e. the last item being benchmarked),
# unless that exceeds the operations list, in which case, terminate after the last operation.
for index in range(current_benchmark + 1, end_of_benchmark):
bm_sizes.append(
bm_sizes[-1] + MixedSIDBenchmarkPlot._simOp(op(index))
)
index += 1
# Now we'll plot the median value
plot.plotPoint(current_benchmark, statistics.median(bm_sizes))
# Having finished the benchmark, we'll simulate operations up to the next benchmark, then loop.
# First, a bit of clean-up.
size = bm_sizes[-1]
# Now, we'll simply loop, adjusting sizes.
for index in range(end_of_benchmark, next_benchmark):
size += MixedSIDBenchmarkPlot._simOp(op(index))
# Finally, we're at the next benchmark, so update current_benchmark and loop.
current_benchmark = next_benchmark
return plot