-
Notifications
You must be signed in to change notification settings - Fork 0
/
boss_array.py
116 lines (105 loc) · 4.14 KB
/
boss_array.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from yos.boss import ysearch
from yos.yql import db
class BossArray(object):
def __init__(self, term):
self.term = term
self.retrieved = []
self.length = None
def _cache(self, start, results,sort=True):
i = start
for result in results:
self.retrieved.append((i,result))
i = i + 1
if sort is True:
self.retrieved.sort()
def _cached_portion(self, start, count):
def contains_result(n):
for pos,val in self.retrieved:
if pos == n:
return val
return None
cached = []
not_cached = []
for i in xrange(start,start+count):
val = contains_result(i)
if val is not None:
cached.append((i,val))
else:
not_cached.append(i)
return cached,not_cached
def _download(self, start, count, sort=True):
rows = []
length = 0
for i in xrange(0, count, 50):
offset = i * 50
pos = start + offset
num_results = min(count - offset, 50)
data = ysearch.search(self.term,start=pos,count=num_results)
length = data['ysearchresponse']['totalhits']
rows = rows + db.create(data=data).rows
self.length = int(length)
self._cache(start,rows,sort=sort)
return rows
def __getitem__(self, i):
if type(i) == int:
cached, not_cached = self._cached_portion(i,1)
if cached == []:
return self._download(i,1)[0]
else:
return cached[0][1] # [(index,value),]
else:
cached, not_cached = self._cached_portion(i.start,i.stop-i.start)
length = len(not_cached)
if length == 0:
return tuple(x[1] for x in cached)
elif length == 1:
index = not_cached[0]
downloaded = [(index,self._download(index,1),)]
else:
runs = []
length = len(not_cached)
start_of_run = not_cached[0]
prev = start_of_run
for i in xrange(1,length):
val = not_cached[i]
if val - prev == 1:
if i != length-1:
pass
else:
runs.append((start_of_run,1+val-start_of_run))
else:
runs.append((start_of_run,val-1-start_of_run))
if i < length - 1:
start_of_run = not_cached[i]
prev = val
# First we break apart runs into
# groups of 50, since that is the
# maximum allowable number of results
# per request.
legal_runs = []
for index,count in runs:
if count <= 50:
legal_runs.append((index,count))
else:
num_runs = count / 50
if count % 50 > 0:
num_runs = num_runs + 1
for i in xrange(0,num_runs):
offset = i * 50
tmp_count = min(count - offset,50)
legal_runs.append((index+offset,tmp_count))
downloaded = []
for index,count in legal_runs:
tuples = zip(xrange(index,index+count),self._download(index,count,sort=False))
downloaded = downloaded + tuples
# We turned off sorting while downloading the runs, since
# we knew we'd be adding a lot of data at once and it would
# be easier to only sort all the additional data once.
self.retrieved.sort()
cached = cached + downloaded
cached.sort()
return tuple(x[1] for x in cached)
def __len__(self):
if self.length is None:
self._download(0,1)
return self.length