Python PriceCrawler示例，price_crawler.PriceCrawler Python示例

示例#1

0

显示文件

文件： price_data_io.py 项目： mjchao/Grand-Exchange-Analysis

def main():
    test = MonthData( 2 , 2012 )
    assert len( test.data ) == 30
    test = MonthData( 2 , 2013 )
    assert len( test.data ) == 29
    test = MonthData( 3 , 2014 )
    assert len( test.data ) == 32
    test = MonthData( 4 , 2014 )
    assert len( test.data ) == 31
    
    #test = MonthData( 8 , 2015 )
    #PriceWriter.write_month_data_to_file( test , "test" )
    #test = PriceReader.read_month_data( 8 , 2015 , "test" )
    #print test
    #Mithril ore
    priceData = PriceCrawler.get_price_data_from_html( 447 )
    PriceWriter.save_data( priceData )
    
    #Mithril bar
    priceData = PriceCrawler.get_price_data_from_html( 2359 )
    PriceWriter.save_data( priceData )
    
    priceData = PriceCrawler.get_price_data_from_json( "Mithril bar" , 2359 )
    PriceWriter.save_data( priceData )
    
    #404 Error
    priceData = PriceCrawler.get_price_data_from_html( 21736 )
    assert( priceData == None )
    
    fromCSV = PriceReader.get_price_data_from_csv( 447 )
    fromHTML = PriceCrawler.get_price_data_from_html( 447 )
    assert fromCSV == fromHTML
    
    print "Regression testing for price_data_io.py passed."

示例#2

0

显示文件

文件： crawlers.py 项目： joe-doe/price-crawler

    def print_prices(self):
        for item in self.config['all_info'].keys():
            pc = PriceCrawler(self.config, item)

            interested_in_length = len(item)
            print ('-' * 20) + ('-' * interested_in_length) + ('-' * 20)
            print ('-' * 20) + item + ('-' * 20)
            print ('-' * 20) + ('-' * interested_in_length) + ('-' * 20)

            [self.print_pretty(x) for x in pc.get_all()]
            print ''

示例#3

0

显示文件

文件： sandbox.py 项目： mjchao/Grand-Exchange-Analysis

def download_data_by_id( id ):
    priceData = PriceCrawler.get_price_data_from_html( id )
    if ( priceData != None ):
        f = open( "price_data/item_ids" , "a" )
        f.write( priceData.get_name() + "," + str( id ) + "\n" )
        f.close()
        PriceWriter.write_price_data_to_csv( "price_data/master_list/" + str(id) + ".csv" , priceData )

示例#4

0

显示文件

文件： crawlers.py 项目： joe-doe/price-crawler

    def get_prices(self):
        """
        Generator method which produces a response dict for every item
        of interest (key in all_info dict)

        :return: response dict every time it is called
        """

        for item in self.model.get_all_items():
            for store in self.model.get_stores_for_item(item):
                pc = PriceCrawler(self.config, self.model, item)

                response = {
                    'timestamp': time.time(),
                    'item_name': item,
                }

                try:
                    response.update(pc.get_store_price(store))
                except TypeError:
                    continue  # no price for store

                yield response

示例#5

0

显示文件

文件： model.py 项目： joe-doe/price-crawler

    def add_new_item(self, item):
        for store in item['stores']:
            rec = {
                'item_name': item['item_name'],
            }
            rec.update(store)
            self.db.mongodb[self.items_collection_name].insert(rec)

            if store['store_name'] == 'skroutz':
                skroutz_site_url = store['url']
                specs = {
                    'item_name': item['item_name'],
                    'specs': get_specs(skroutz_site_url),
                    'quick_specs': get_quick_specs(skroutz_site_url),
                    'image': get_image_for_item(skroutz_site_url)
                }
                self.db.mongodb[self.specs_collection_name].insert(specs)

            # specs = {
            #     'item_name': item['item_name'],
            #     'specs': get_specs('http://www.gsmarena.com/microsoft_lumia_950-7262.php')
            # }
            # self.db.mongodb['specs'].insert(specs)

            from price_crawler import PriceCrawler
            pc = PriceCrawler(self.config, self, item['item_name'])
            rec = {
                'timestamp': time.time(),
                'item_name': item['item_name'],
            }

            try:
                rec.update(pc.get_store_price(store['store_name']))
                self.add_record(rec)
            except TypeError:
                pass  # no price for store

示例#6

0

显示文件

    def record_commodity_stats(startId, endId):
        for i in range(startId, endId):
            print "Processing " + str(i)
            testData = PriceCrawler.get_price_data_from_html(i)
            if (not testData is None):
                maxPrice = 0
                minPrice = 999999999
                maxVolume = 0
                minVolume = 999999999
                allPoints = testData.get_all_datapoints()

                #we do not include the last data point because
                #that is today's data, which may be incomplete
                #and so the volume may be much less than what it
                #really is.
                for datapoint in allPoints[1:len(allPoints) - 1]:

                    #prices and volumes of 0 are invalid
                    if (datapoint.get_price != 0):
                        maxPrice = max(datapoint.get_price(), maxPrice)
                        minPrice = min(datapoint.get_price(), minPrice)

                    if (datapoint.get_volume() != 0):
                        maxVolume = max(datapoint.get_volume(), maxVolume)
                        minVolume = min(datapoint.get_volume(), minVolume)

                out = open("price_data/item_stats", "a")
                out.write( str(i) + "," + str(maxPrice) + "," + str(minPrice) + \
                    "," + str(maxVolume) + "," + str(minVolume) + "\n" )
                out.close()
            else:
                print str(i) + " was not a valid id"

            #let's not get blocked for too many requests
            sleepInterval = randint(1, 2)
            sleepInterval = 2
            print "Sleeping " + str(sleepInterval)
            sleep(sleepInterval)

示例#7

0

显示文件

文件： data_manager.py 项目： mjchao/Grand-Exchange-Analysis

    def record_commodity_stats( startId , endId ):
        for i in range( startId , endId ):
            print "Processing " + str( i )
            testData = PriceCrawler.get_price_data_from_html( i )
            if ( not testData is None ):
                maxPrice = 0
                minPrice = 999999999
                maxVolume = 0
                minVolume = 999999999
                allPoints = testData.get_all_datapoints()
                
                #we do not include the last data point because
                #that is today's data, which may be incomplete
                #and so the volume may be much less than what it
                #really is.
                for datapoint in allPoints[1:len(allPoints)-1]:

                    #prices and volumes of 0 are invalid                    
                    if ( datapoint.get_price != 0 ):
                        maxPrice = max( datapoint.get_price() , maxPrice )
                        minPrice = min( datapoint.get_price() , minPrice )
                    
                    if ( datapoint.get_volume() != 0 ):
                        maxVolume = max( datapoint.get_volume() , maxVolume )
                        minVolume = min( datapoint.get_volume() , minVolume )
                    
                out = open( "price_data/item_stats" , "a" )
                out.write( str(i) + "," + str(maxPrice) + "," + str(minPrice) + \
                    "," + str(maxVolume) + "," + str(minVolume) + "\n" )
                out.close()
            else:
                print str( i ) + " was not a valid id"
                
            #let's not get blocked for too many requests
            sleepInterval = randint( 1 , 2 )
            sleepInterval = 2
            print "Sleeping " + str( sleepInterval )
            sleep( sleepInterval )

示例#8

0

显示文件

 def download_data_by_name_and_id(name, id):
     data = PriceCrawler.get_price_data_from_json(name, id)
     if (data != None):
         PriceWriter.save_data(data)
     else:
         raise "Invalid commodity name or id."

示例#9

0

显示文件

文件： data_manager.py 项目： mjchao/Grand-Exchange-Analysis

 def download_data_by_name_and_id( name , id ):
     data = PriceCrawler.get_price_data_from_json( name , id )
     if ( data != None ) :
         PriceWriter.save_data( data )
     else :
         raise "Invalid commodity name or id."