def main(): test = MonthData( 2 , 2012 ) assert len( test.data ) == 30 test = MonthData( 2 , 2013 ) assert len( test.data ) == 29 test = MonthData( 3 , 2014 ) assert len( test.data ) == 32 test = MonthData( 4 , 2014 ) assert len( test.data ) == 31 #test = MonthData( 8 , 2015 ) #PriceWriter.write_month_data_to_file( test , "test" ) #test = PriceReader.read_month_data( 8 , 2015 , "test" ) #print test #Mithril ore priceData = PriceCrawler.get_price_data_from_html( 447 ) PriceWriter.save_data( priceData ) #Mithril bar priceData = PriceCrawler.get_price_data_from_html( 2359 ) PriceWriter.save_data( priceData ) priceData = PriceCrawler.get_price_data_from_json( "Mithril bar" , 2359 ) PriceWriter.save_data( priceData ) #404 Error priceData = PriceCrawler.get_price_data_from_html( 21736 ) assert( priceData == None ) fromCSV = PriceReader.get_price_data_from_csv( 447 ) fromHTML = PriceCrawler.get_price_data_from_html( 447 ) assert fromCSV == fromHTML print "Regression testing for price_data_io.py passed."
def print_prices(self): for item in self.config['all_info'].keys(): pc = PriceCrawler(self.config, item) interested_in_length = len(item) print ('-' * 20) + ('-' * interested_in_length) + ('-' * 20) print ('-' * 20) + item + ('-' * 20) print ('-' * 20) + ('-' * interested_in_length) + ('-' * 20) [self.print_pretty(x) for x in pc.get_all()] print ''
def download_data_by_id( id ): priceData = PriceCrawler.get_price_data_from_html( id ) if ( priceData != None ): f = open( "price_data/item_ids" , "a" ) f.write( priceData.get_name() + "," + str( id ) + "\n" ) f.close() PriceWriter.write_price_data_to_csv( "price_data/master_list/" + str(id) + ".csv" , priceData )
def get_prices(self): """ Generator method which produces a response dict for every item of interest (key in all_info dict) :return: response dict every time it is called """ for item in self.model.get_all_items(): for store in self.model.get_stores_for_item(item): pc = PriceCrawler(self.config, self.model, item) response = { 'timestamp': time.time(), 'item_name': item, } try: response.update(pc.get_store_price(store)) except TypeError: continue # no price for store yield response
def add_new_item(self, item): for store in item['stores']: rec = { 'item_name': item['item_name'], } rec.update(store) self.db.mongodb[self.items_collection_name].insert(rec) if store['store_name'] == 'skroutz': skroutz_site_url = store['url'] specs = { 'item_name': item['item_name'], 'specs': get_specs(skroutz_site_url), 'quick_specs': get_quick_specs(skroutz_site_url), 'image': get_image_for_item(skroutz_site_url) } self.db.mongodb[self.specs_collection_name].insert(specs) # specs = { # 'item_name': item['item_name'], # 'specs': get_specs('http://www.gsmarena.com/microsoft_lumia_950-7262.php') # } # self.db.mongodb['specs'].insert(specs) from price_crawler import PriceCrawler pc = PriceCrawler(self.config, self, item['item_name']) rec = { 'timestamp': time.time(), 'item_name': item['item_name'], } try: rec.update(pc.get_store_price(store['store_name'])) self.add_record(rec) except TypeError: pass # no price for store
def record_commodity_stats(startId, endId): for i in range(startId, endId): print "Processing " + str(i) testData = PriceCrawler.get_price_data_from_html(i) if (not testData is None): maxPrice = 0 minPrice = 999999999 maxVolume = 0 minVolume = 999999999 allPoints = testData.get_all_datapoints() #we do not include the last data point because #that is today's data, which may be incomplete #and so the volume may be much less than what it #really is. for datapoint in allPoints[1:len(allPoints) - 1]: #prices and volumes of 0 are invalid if (datapoint.get_price != 0): maxPrice = max(datapoint.get_price(), maxPrice) minPrice = min(datapoint.get_price(), minPrice) if (datapoint.get_volume() != 0): maxVolume = max(datapoint.get_volume(), maxVolume) minVolume = min(datapoint.get_volume(), minVolume) out = open("price_data/item_stats", "a") out.write( str(i) + "," + str(maxPrice) + "," + str(minPrice) + \ "," + str(maxVolume) + "," + str(minVolume) + "\n" ) out.close() else: print str(i) + " was not a valid id" #let's not get blocked for too many requests sleepInterval = randint(1, 2) sleepInterval = 2 print "Sleeping " + str(sleepInterval) sleep(sleepInterval)
def record_commodity_stats( startId , endId ): for i in range( startId , endId ): print "Processing " + str( i ) testData = PriceCrawler.get_price_data_from_html( i ) if ( not testData is None ): maxPrice = 0 minPrice = 999999999 maxVolume = 0 minVolume = 999999999 allPoints = testData.get_all_datapoints() #we do not include the last data point because #that is today's data, which may be incomplete #and so the volume may be much less than what it #really is. for datapoint in allPoints[1:len(allPoints)-1]: #prices and volumes of 0 are invalid if ( datapoint.get_price != 0 ): maxPrice = max( datapoint.get_price() , maxPrice ) minPrice = min( datapoint.get_price() , minPrice ) if ( datapoint.get_volume() != 0 ): maxVolume = max( datapoint.get_volume() , maxVolume ) minVolume = min( datapoint.get_volume() , minVolume ) out = open( "price_data/item_stats" , "a" ) out.write( str(i) + "," + str(maxPrice) + "," + str(minPrice) + \ "," + str(maxVolume) + "," + str(minVolume) + "\n" ) out.close() else: print str( i ) + " was not a valid id" #let's not get blocked for too many requests sleepInterval = randint( 1 , 2 ) sleepInterval = 2 print "Sleeping " + str( sleepInterval ) sleep( sleepInterval )
def download_data_by_name_and_id(name, id): data = PriceCrawler.get_price_data_from_json(name, id) if (data != None): PriceWriter.save_data(data) else: raise "Invalid commodity name or id."
def download_data_by_name_and_id( name , id ): data = PriceCrawler.get_price_data_from_json( name , id ) if ( data != None ) : PriceWriter.save_data( data ) else : raise "Invalid commodity name or id."