def get_price_data_from_csv( commodityId ): filename = "price_data/master_list/" + str( commodityId ) + ".csv" try: f = open( filename , "r" ) #have to get rid of the \n at the end of the line name = f.readline()[0:-1] datapoints = [] for line in f: datapoints.append( DataPoint.from_csv_data( line ) ) return CommodityPriceData( commodityId , name , datapoints ) except IOError: return None
def get_price_data_from_html( objectId ): page = requests.get( "http://services.runescape.com/m=itemdb_oldschool/viewitem?obj=" + str( objectId ) ) html = page.text #invalid object ID if ( "Sorry, there was a problem with your request." in html ): if ( "You've made too many requests recently." in html and \ "As a result, your IP address has been temporarily blocked. Please try again later." in html ): print "Computer IP has been blocked. Trying again in 15 seconds..." sleep( 15 ) return PriceCrawler.get_price_data_from_html( objectId ) return None #we can find the name in the title of the webpage. name = str( re.search( r'(?<=<title>)(.*)(?= - Grand Exchange)' , html ).group( 0 ) ) #and the price data is always pushed to the graphs on the webpage #with the command "average180.push( ... )" so we are just interested #in lines with that command priceData = re.findall( r'average180.push.*' , html ) volumeData = re.findall( r'trade180.push.*' , html ) datapoints = [] for price , volume in zip( priceData , volumeData ): priceNumbers = re.findall( r'\d+' , price ) #we'll keep years, months, and days in string form because #we want there to always be 4 digits in a year, 2 digits in #a month and 2 digits in a day. If we converted them to integers, #we would lose a digit sometimes - e.g. 01 would be converted to 1 #and we'd lose consistency. year = str( priceNumbers[ 1 ] ) month = str( priceNumbers[ 2 ] ) day = str( priceNumbers[ 3 ] ) #prices, on the other hand, can be converted to integers because #all commodities will always cost an integer number of coins. price = int( priceNumbers[ 4 ] ) average = int( priceNumbers[ 5 ] ) volumeNumbers = re.findall( r'\d+' , volume ) #volume can also be converted to integers #because all commodities will have an integer volume each day volume = int( volumeNumbers[ 4 ] ) datapoints.append( DataPoint( year , month , day , price , average , volume ) ) return CommodityPriceData( objectId , name , datapoints )
def read_month_data( month , year , commodity ): rtn = MonthData( month , year ) dir = "price_data/" + str( year ) + " " + DateUtils.format_month( month ) try : file = open( dir + "/" + commodity + ".csv" , "r" ) lines = file.readlines() for line in lines : datapoint = DataPoint.from_csv_month_data( year , month , line ) rtn.set( int( datapoint.get_day() ) , datapoint ) except IOError: #there is no data, so ignore the error and return default #values of 0 for daily and average prices pass return rtn
def get_price_data_from_json( name , objectId ): page = requests.get( "http://services.runescape.com/m=itemdb_oldschool/api/graph/" + str(objectId) + ".json" ) json = page.text #bad object ID if ( "404 - Page not found" in json ): return None #split the data given to us into the two halves: the first half #is our daily price data. the second half is our average price data dailyPriceJson = json[0:json.find( "average" )] averagePriceJson = json[json.find( "average"):len(json)] datapoints = [] #data comes in the form {timestamp:value, timestamp:value, ...} #where all timestamps and prices are integer values, #so we can just parse out all the integer values and process them #in pairs of 2. #note that the average price data and daily price data must have #the same length, so we can iterate through both lists simultaneously priceData = re.findall( r'\d+' , dailyPriceJson ) averageData = re.findall( r'\d+' , averagePriceJson ) for i in range( 0 , len( priceData ) , 2 ): timestamp = priceData[ i ] #have to add an extra 12 hours because Jagex is several hours ahead. #we just add 12 hours to be safe. We are only interested in dates #and the actual hour of day does not matter to us. dateValues = re.findall( r'\d+' , str(datetime.datetime.fromtimestamp( int(timestamp)/1000 + 43200 )) ) year = str( dateValues[ 0 ] ) month = str( dateValues[ 1 ] ) day = str( dateValues[ 2 ] ) price = int( priceData[ i+1 ] ) average = int( averageData[ i+1 ] ) datapoints.append( DataPoint( year , month , day , price , average ) ) return CommodityPriceData( objectId , name , datapoints )