/
data_manager.py
256 lines (214 loc) · 9.82 KB
/
data_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# -*- coding: utf-8 -*-
from price_crawler import PriceCrawler
from price_data_io import PriceWriter, PriceReader
from random import randint
from time import sleep
class IDManager( object ):
'''
Records the maximum price, minimum price, and volume for all
commodities. This data will later be used to determine what
the promising commodities to trade. For example, we'd like to
trade on Mithril Ore because the volume is high and the price
fluctuates quite a bit, but we don't want to trade on
Steel Longsword because nobody trades it and the price doesn't
move that much.
Note: This function appends to the price_data/item_stats file.
Make sure you look through the file first before giving it
a start and end ID.
@param startId - the first ID for which to record commodity stats
@param endId - one greater than the last ID for which to record
commodity stats (the range includes the start, but does not include
the end ID)
'''
@staticmethod
def record_commodity_stats( startId , endId ):
for i in range( startId , endId ):
print "Processing " + str( i )
testData = PriceCrawler.get_price_data_from_html( i )
if ( not testData is None ):
maxPrice = 0
minPrice = 999999999
maxVolume = 0
minVolume = 999999999
allPoints = testData.get_all_datapoints()
#we do not include the last data point because
#that is today's data, which may be incomplete
#and so the volume may be much less than what it
#really is.
for datapoint in allPoints[1:len(allPoints)-1]:
#prices and volumes of 0 are invalid
if ( datapoint.get_price != 0 ):
maxPrice = max( datapoint.get_price() , maxPrice )
minPrice = min( datapoint.get_price() , minPrice )
if ( datapoint.get_volume() != 0 ):
maxVolume = max( datapoint.get_volume() , maxVolume )
minVolume = min( datapoint.get_volume() , minVolume )
out = open( "price_data/item_stats" , "a" )
out.write( str(i) + "," + str(maxPrice) + "," + str(minPrice) + \
"," + str(maxVolume) + "," + str(minVolume) + "\n" )
out.close()
else:
print str( i ) + " was not a valid id"
#let's not get blocked for too many requests
sleepInterval = randint( 1 , 2 )
sleepInterval = 2
print "Sleeping " + str( sleepInterval )
sleep( sleepInterval )
'''
Provides functions for managing data.
There are two steps to this project: downloading and analyzing.
Downloading requires lots of file IO and can be inefficient,
so you want to minimize redundant downloads. That said, you must
download all your data before you can analyze it.
'''
class DataManager( object ):
'''
If the DataManager has been initialized yet
'''
initialized = False
'''
Maps integer object IDs to their commodity string names
'''
idToName = {}
'''
Maps lowercase string names of commodities to their integer object IDs
'''
nameToId = {}
'''
Initializes the DataManager. You cannot perform any data operations
using the DataManager if init() has not yet been called.
'''
@staticmethod
def init():
#don't initialize twice and waste time
if ( DataManager.initialized ):
return
f = open( "price_data/item_ids" )
lines = f.readlines()
for line in lines :
pairing = line.split( "," )
objName = pairing[ 0 ]
objID = int( pairing[ 1 ] )
DataManager.idToName[ objID ] = objName
DataManager.nameToId[ objName.lower() ] = objID
DataManager.initialized = True
'''
Downloads the most recent data for the commodity with the given name
and id. The name IS CASE SENSITIVE and should appear with the same
capitalization as in the file price_data/item_ids. If you do not
know the correct capitalization, do not use this function!
@param name - the name of the commodity, as a string. It is CASE SENSITIVE!
@param id - the object id of the commodity, as an integer
'''
@staticmethod
def download_data_by_name_and_id( name , id ):
data = PriceCrawler.get_price_data_from_json( name , id )
if ( data != None ) :
PriceWriter.save_data( data )
else :
raise "Invalid commodity name or id."
'''
Downloads the most recent data for the commodity with the given name.
@param objectName - the name of the commodity, as a string. The name is
case insensitive
'''
@staticmethod
def download_data_by_name( objectName ):
name = objectName.lower()
id = DataManager.nameToId[ name ]
caseSensitiveName = DataManager.idToName[ id ]
DataManager.download_data_by_name_and_id( caseSensitiveName , id )
'''
Downloads the most recent data for all given commodities.
@param names - a list of names (as strings) of commodities for which to
download price data. The names are case insensitive.
'''
@staticmethod
def download_data_by_names( *names ):
for name in names:
DataManager.download_data_by_name( name )
'''
Downloads the most recent data for the commodity with the given ID.
@param objectId - the ID of the commodity, as an integer
'''
@staticmethod
def download_data_by_id( objectId ):
id = objectId
name = DataManager.idToName[ id ]
DataManager.download_data_by_name_and_id( name , id )
'''
Gets all known price data for the commodity with the given name.
The name is not case sensitive.
@param name - the name of a commodity, as a string
@return - the PriceData for the given commodity, or None if the commodity
was not found.
'''
@staticmethod
def get_data_by_name( name ):
id = DataManager.nameToId[ name.lower() ]
return PriceReader.get_price_data_from_csv( id )
'''
Gets all known price data for the commodity with the given ID.
@param id - the ID of a commodity, as an integer
@return - the PriceData for the given commodity, or None if the
ID was not found.
'''
@staticmethod
def get_data_by_id( id ):
return PriceReader.get_price_data_from_csv( id )
'''
Gets all price data for a given commodity starting at the given start
date and ending at the given end date. These dates are inclusive.
@param name - the name of a commodity. This is a case insensitive string.
@param startMonth - the month of the start date, as an integer.
@param startYear - the year of the start date, as an integer.
@param endMonth - the month of the end date, as an integer.
@param endYear - the year of the end date, as an integer.
'''
@staticmethod
def get_data_by_date_range( name , startMonth , startYear , endMonth , endYear ):
caseSensitiveName = DataManager.idToName[ DataManager.nameToId[ name.lower() ] ]
rtn = []
#iterate through month by month, year by year from the start to the
#end and add any nonzero datapoints to the list of data
currYear = startYear
currMonth = startMonth
while( currYear <= endYear ):
#if the current year is before the end year, then we just
#iterate through all months
if ( currYear < endYear ):
while( currMonth <= 12 ):
monthData = PriceReader.read_month_data( currMonth , currYear , caseSensitiveName )
for day in range( 1 , len( monthData.data ) ):
prices = monthData.get( day )
#we only add nonzero prices becauase prices of 0
#means that there was no price data for that given date
if ( prices[ 0 ] != 0 and prices[ 1 ] != 0 ):
rtn.append( (currYear , currMonth , day , prices[ 0 ] , prices[ 1 ] ) )
currMonth = currMonth + 1
#have to reset the month back to 1 after we've gone through
#all 12 months in the current year
currMonth = 1
#if the current year is the end year, then we can only iterate
#up to the end month
elif ( currYear == endYear ):
while( currMonth <= endMonth ):
monthData = PriceReader.read_month_data( currMonth , currYear , name )
for day in range( 1 , len( monthData.data ) ):
prices = monthData.get( day )
if ( prices[ 0 ] != 0 and prices[ 1 ] != 0 ):
rtn.append( (currYear , currMonth , day , prices[ 0 ] , prices[ 1 ] ) )
currMonth = currMonth + 1
#move on to the next year
currYear = currYear + 1
return rtn
def main():
#DataManager.init()
#DataManager.download_data_by_names( "mithril ore" , "mithril bar" , "coal" , "iron ore" , "steel bar" )
#test = DataManager.get_data_by_date_range( "Mithril bar" , 12 , 2014 , 12 , 2015 )
#print test
#IDManager.record_commodity_stats( 12521 , 20001 )
#test = PriceCrawler.get_price_data_from_html( 12621 )
#print IDManager.is_interesting( test )
pass
if __name__ == "__main__" : main()