Exemplo n.º 1
0
 def start(self):
     start = time.time()
     print 'init'
     self.driver.get(self.url+"&s=0")
     print 'load...'
     # print self.driver.page_source
     print 'loaded'
     wait = WebDriverWait(self.driver, 10)
     self.totalPage = int(wait.until(lambda x: x.find_element_by_xpath("//*[@id='mainsrp-pager']/div/div/div/div[1]").text)[2:-3])
     print "totalPage: ",self.totalPage
     if self.totalPage > 5 :
         self.totalPage = 5
     count = 1
     for i in range(1, self.totalPage+1):
         print "page ",i ,":"
         if i != 1:
             self.driver.get(self.url+"&s="+str((i-1)*44))
         elements = wait.until(lambda x: x.find_elements_by_class_name("J_IconMoreNew"))
         for element in elements:
             name = element.find_element_by_xpath("div[@class='row row-2 title']/a").text
             price = float(element.find_element_by_xpath("div[@class='row row-1 g-clearfix']/div[@class='price g_price g_price-highlight']/strong").text)
             pay = int(element.find_element_by_xpath("div[@class='row row-1 g-clearfix']/div[@class='deal-cnt']").text[:-3])
             try:
                 shopKeeper = element.find_element_by_xpath("div[@class='row row-3 g-clearfix']/div[@class='shop']/a/span[2]").text
             except NoSuchElementException:
                 shopKeeper = ""
             location = element.find_element_by_xpath("div[@class='row row-3 g-clearfix']/div[@class='location']").text
             link = element.find_element_by_xpath("div[@class='row row-2 title']/a").get_attribute("href")
             id = element.find_element_by_xpath("div[@class='row row-2 title']/a").get_attribute("data-nid")
             commodity = Commodity(name, price, pay, shopKeeper, location, link, id)
             if "tmall" in link or "click.simba" in link:
                 commodity.setIsTmall(True)
             if shopKeeper!="":
                 self.commodityList.append(commodity)
             print count
             commodity.show()
             count += 1
     if self.number == 0:
         self.filterNano()
     elif self.number == 1:
         self.filterGear()
     elif self.number == 2:
         self.filterTheta()
     elif self.number == 3:
         self.filterLG()
     self.distinct()
     # self.getSales()
     self.driver.quit()
     self.getSalesByRequest()
     self.sort()
     # self.showList()
     self.save()
     end = time.time()
     print
     print end - start
Exemplo n.º 2
0
 def start(self):
     print 'init'
     self.driver.get(self.url + "&page=1")
     print 'load...'
     wait = WebDriverWait(self.driver, 10)
     try:
         self.totalPage = int(
             wait.until(lambda x: x.find_element_by_xpath(
                 "//*[@id='J_bottomPage']/span[2]/em[1]/b").text))
     except TimeoutException:
         self.totalPage = 1
     # print "totalPage: ", self.totalPage
     if self.totalPage > 5:
         self.totalPage = 5
     count = 1
     for i in range(1, self.totalPage + 1):
         # print "page ", i, ":"
         if i != 1:
             self.driver.get(self.url + "&page=" + str((i - 1) * 2 + 1))
         warp = wait.until(
             lambda x: x.find_elements_by_class_name("gl-warp"))[0]
         elements = warp.find_elements_by_class_name('gl-item')
         for element in elements:
             name = element.find_element_by_xpath(
                 "div/div[@class='p-name p-name-type-2']/a/em").text
             price = float(
                 element.find_element_by_xpath(
                     "div/div[@class='p-price']/strong/i").text)
             temp = element.find_element_by_xpath(
                 "div/div[@class='p-commit']/strong/a").text
             temp = temp.replace('+', '')
             temp = temp.replace('万', '')
             comment = int(temp)
             print comment
             link = element.find_element_by_xpath(
                 "div/div[@class='p-img']/a").get_attribute("href")
             title = element.find_element_by_xpath(
                 "div/div[@class='p-img']/a").get_attribute("title")
             id = element.get_attribute("data-sku")
             commodity = Commodity(name, price, comment, link, id, title)
             self.commodityList.append(commodity)
             # print count
             # commodity.show()
             count += 1
     if self.product == 'insta360 Nano':
         self.filterNano()
     elif self.product == 'Gear 360':
         self.filterGear()
     elif self.product == 'Ricoh theta':
         self.filterTheta()
     elif self.product == 'LG 360 CAM':
         self.filterLG()
     self.distinct()
Exemplo n.º 3
0
 def __init__(self, name, market, money=0, stock=None):
     super(Trader, self).__init__()
     self.name = name
     self.market = market
     self.money = money
     if stock is None:
         stock = {c.name: c.getStock() for c in Commodity.all()}
     elif issubclass(type(stock), list) and len(
         [s for s in stock if issubclass(type(s), Stock)]) > 0:
         stock = {c.name: c for c in stock}
     elif not issubclass(type(stock), dict):
         raise Exception("The stock should be a dictionary.", stock)
     self.stock = stock
Exemplo n.º 4
0
def buildMarkets():
    red = Commodity.load('red')
    green = Commodity.load('green')
    blue = Commodity.load('blue')

    sc = Market("Santa Cruz", (5, 0), exports=[red])
    sf = Market('San Francisco', (-2, 7), exports=[green])
    rwc = Market('Redwood City', (10, -8), exports=[blue])

    nike = Business("Nike", sc, 100)
    adidas = Business("Adidas", sc, 77)
    asic = Business('Asic', sf, 200)
    reebok = Business('Reebok', sf, 200)
    sketchers = Business('Sketchers', rwc, 200)
    puma = Business('Puma', rwc, 200)
    vans = Business('Vans', sf, 200)

    nike.stock['Red'].quantity += 5
    nike.stock['Blue'].quantity += 5
    adidas.stock['Red'].quantity += 5
    adidas.stock['Green'].quantity += 5
    sc.save()
    sf.save()
    rwc.save()
Exemplo n.º 5
0
            print(
                "No transaction could be made between %s and %s.\n Try lowering your sell price or raising your buy price."
                % (A, B))
            return False

    #self is buying from business
    def buy(self, business, commodity, units):
        return Trader.transact(self, business, commodity, units, buy=True)

    #business is buying from self
    def sell(self, business, commodity, units):
        return Trader.transact(self, business, commodity, units, sell=True)


if __name__ == '__main__':
    from Market import Market
    cod = Commodity('cod')
    tuna = Commodity('tuna')
    mall = Market('test', exports=[cod, tuna])
    joe = Trader('joe',
                 mall,
                 money=100,
                 stock=[Stock(cod, 10, 15, 10),
                        Stock(tuna, 0, 10, 15)])
    ken = Trader('ken',
                 mall,
                 money=100,
                 stock=[Stock(cod, 0, 10, 15),
                        Stock(tuna, 10, 15, 10)])
    joe.sell(ken, cod, 20)
Exemplo n.º 6
0
from Commodity import Commodity
#from matplotlib.backends.backend_pdf import PdfPages
#Constans
os.chdir("../")  #AFFECT ALL THE EXETCUTION

RELATIVE_PATH = "./data/Cleaned/"

FILE_NAME = RELATIVE_PATH + "Monthly_data_cmo_step3"
FILE_FORMAT = ".csv"
GrouperColumns = ["CommodityId", "APMC"]

DF_Month = pd.read_csv("./%s%s" % (FILE_NAME, FILE_FORMAT))

DF_Month["date"] = pd.to_datetime(DF_Month["date"], format='%Y-%m-%d')

commodityManager = Commodity()


def flagMostFluctuation(DataFrame_View):
    by_group = DataFrame_View.groupby(["CommodityId"])

    #    by_group=sorted(by_group,  # iterates pairs of (key, corresponding subDataFrame)
    #                key=lambda x: len(x["rate_monthly_fluc"]),  # sort by number of rows (len of subDataFrame)
    #                reverse=True)  # reverse the sort i.e. largest first
    dataset = None
    for name, group in by_group:
        rate_monthly_fluc = group["rate_monthly_fluc"]
        rate_frequency_fluc = group["rate_frequency_fluc"]

        LIMIT = .7
        limitMonth = rate_monthly_fluc.quantile(LIMIT)
Exemplo n.º 7
0
 def start(self):
     start = time.time()
     print 'init'
     self.driver.get(self.url + "&s=0")
     print 'load...'
     # print self.driver.page_source
     print 'loaded'
     wait = WebDriverWait(self.driver, 10)
     self.totalPage = int(
         wait.until(lambda x: x.find_element_by_xpath(
             "//*[@id='mainsrp-pager']/div/div/div/div[1]").text)[2:-3])
     print "totalPage: ", self.totalPage
     if self.totalPage > 5:
         self.totalPage = 5
     count = 1
     for i in range(1, self.totalPage + 1):
         print "page ", i, ":"
         if i != 1:
             self.driver.get(self.url + "&s=" + str((i - 1) * 44))
         elements = wait.until(
             lambda x: x.find_elements_by_class_name("J_IconMoreNew"))
         for element in elements:
             name = element.find_element_by_xpath(
                 "div[@class='row row-2 title']/a").text
             price = float(
                 element.find_element_by_xpath(
                     "div[@class='row row-1 g-clearfix']/div[@class='price g_price g_price-highlight']/strong"
                 ).text)
             pay = int(
                 element.find_element_by_xpath(
                     "div[@class='row row-1 g-clearfix']/div[@class='deal-cnt']"
                 ).text[:-3])
             try:
                 shopKeeper = element.find_element_by_xpath(
                     "div[@class='row row-3 g-clearfix']/div[@class='shop']/a/span[2]"
                 ).text
             except NoSuchElementException:
                 shopKeeper = ""
             location = element.find_element_by_xpath(
                 "div[@class='row row-3 g-clearfix']/div[@class='location']"
             ).text
             link = element.find_element_by_xpath(
                 "div[@class='row row-2 title']/a").get_attribute("href")
             id = element.find_element_by_xpath(
                 "div[@class='row row-2 title']/a").get_attribute(
                     "data-nid")
             commodity = Commodity(name, price, pay, shopKeeper, location,
                                   link, id)
             if "tmall" in link or "click.simba" in link:
                 commodity.setIsTmall(True)
             if shopKeeper != "":
                 self.commodityList.append(commodity)
             print count
             commodity.show()
             count += 1
     if self.number == 0:
         self.filterNano()
     elif self.number == 1:
         self.filterGear()
     elif self.number == 2:
         self.filterTheta()
     elif self.number == 3:
         self.filterLG()
     self.distinct()
     # self.getSales()
     self.driver.quit()
     self.getSalesByRequest()
     self.sort()
     # self.showList()
     self.save()
     end = time.time()
     print
     print end - start
Exemplo n.º 8
0
class TaobaoCrawler:
    def __init__(self, number=0):
        self.number = number
        self.products = ['insta360 Nano', 'Gear 360', 'theta', 'LG 360 CAM']
        self.product = self.products[number]
        self.keyword = self.product.replace(' ', '+')
        self.cap = webdriver.DesiredCapabilities.PHANTOMJS
        self.cap["phantomjs.page.settings.resourceTimeout"] = 1000
        self.cap["phantomjs.page.settings.loadImages"] = False
        self.cap[
            "phantomjs.page.settings.localToRemoteUrlAccessEnabled"] = True
        self.cap[
            "userAgent"] = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0"
        self.cap["XSSAuditingEnabled"] = True
        self.driver = webdriver.PhantomJS(desired_capabilities=self.cap)
        # self.driver = webdriver.Chrome()
        self.date = time.strftime('%Y%m%d', time.localtime(time.time()))
        self.url = "https://s.taobao.com/search?q=" + self.keyword + "&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_" + self.date + "&ie=utf8" + "sort=sale-desc"
        self.commodityList = []
        self.totalPage = 0
        user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'
        self.headers = {'User-Agent': user_agent}

    def start(self):
        start = time.time()
        print 'init'
        self.driver.get(self.url + "&s=0")
        print 'load...'
        # print self.driver.page_source
        print 'loaded'
        wait = WebDriverWait(self.driver, 10)
        self.totalPage = int(
            wait.until(lambda x: x.find_element_by_xpath(
                "//*[@id='mainsrp-pager']/div/div/div/div[1]").text)[2:-3])
        print "totalPage: ", self.totalPage
        if self.totalPage > 5:
            self.totalPage = 5
        count = 1
        for i in range(1, self.totalPage + 1):
            print "page ", i, ":"
            if i != 1:
                self.driver.get(self.url + "&s=" + str((i - 1) * 44))
            elements = wait.until(
                lambda x: x.find_elements_by_class_name("J_IconMoreNew"))
            for element in elements:
                name = element.find_element_by_xpath(
                    "div[@class='row row-2 title']/a").text
                price = float(
                    element.find_element_by_xpath(
                        "div[@class='row row-1 g-clearfix']/div[@class='price g_price g_price-highlight']/strong"
                    ).text)
                pay = int(
                    element.find_element_by_xpath(
                        "div[@class='row row-1 g-clearfix']/div[@class='deal-cnt']"
                    ).text[:-3])
                try:
                    shopKeeper = element.find_element_by_xpath(
                        "div[@class='row row-3 g-clearfix']/div[@class='shop']/a/span[2]"
                    ).text
                except NoSuchElementException:
                    shopKeeper = ""
                location = element.find_element_by_xpath(
                    "div[@class='row row-3 g-clearfix']/div[@class='location']"
                ).text
                link = element.find_element_by_xpath(
                    "div[@class='row row-2 title']/a").get_attribute("href")
                id = element.find_element_by_xpath(
                    "div[@class='row row-2 title']/a").get_attribute(
                        "data-nid")
                commodity = Commodity(name, price, pay, shopKeeper, location,
                                      link, id)
                if "tmall" in link or "click.simba" in link:
                    commodity.setIsTmall(True)
                if shopKeeper != "":
                    self.commodityList.append(commodity)
                print count
                commodity.show()
                count += 1
        if self.number == 0:
            self.filterNano()
        elif self.number == 1:
            self.filterGear()
        elif self.number == 2:
            self.filterTheta()
        elif self.number == 3:
            self.filterLG()
        self.distinct()
        # self.getSales()
        self.driver.quit()
        self.getSalesByRequest()
        self.sort()
        # self.showList()
        self.save()
        end = time.time()
        print
        print end - start

    def startByJson(self):
        start = time.time()
        print 'init'
        result = {}
        try:
            request = urllib2.Request(self.url + "&s=0", headers=self.headers)
            response = urllib2.urlopen(request)
            content = response.read()
            pattern = re.compile('g_page_config = {.*?g_srp_loadCss', re.S)
            items = re.findall(pattern, content)
            jsonResult = items[0][16:-19]
            print jsonResult
            result = json.loads(jsonResult, encoding="utf-8")
            print result
            self.totalPage = result['mods']['pager']['data']['totalPage']
            print self.totalPage
        except urllib2.URLError, e:
            if hasattr(e, "code"):
                print e.code
            if hasattr(e, "reason"):
                print e.reason

        if self.totalPage > 5:
            self.totalPage = 5
        count = 1
        for i in range(1, self.totalPage + 1):
            print "page ", i, ":"
            if i != 1:
                try:
                    request = urllib2.Request(self.url + "&s=" + str(
                        (i - 1) * 44),
                                              headers=self.headers)
                    response = urllib2.urlopen(request)
                    content = response.read()
                    pattern = re.compile('g_page_config = {.*?g_srp_loadCss',
                                         re.S)
                    items = re.findall(pattern, content)
                    jsonResult = items[0][16:-19]
                    print jsonResult
                    result = json.loads(jsonResult, encoding="utf-8")
                except urllib2.URLError, e:
                    if hasattr(e, "code"):
                        print e.code
                    if hasattr(e, "reason"):
                        print e.reason
            elements = result['mods']['itemlist']['data']['auctions']
            for element in elements:
                name = element['raw_title']
                price = float(element['view_price'])
                pay = int(element['view_sales'][:-3])
                shopKeeper = element['nick']
                location = element['item_loc']
                link = 'https:' + element['detail_url']
                id = str(element['nid'])
                commodity = Commodity(name, price, pay, shopKeeper, location,
                                      link, id)
                if "tmall" in link or "click.simba" in link:
                    commodity.setIsTmall(True)
                if shopKeeper != "":
                    self.commodityList.append(commodity)
                print count
                commodity.show()
                count += 1
Exemplo n.º 9
0
class TaobaoCrawler:
    def __init__(self):
        self.product = 'insta360 Nano'
        self.keyword = self.product.replace(' ', '+')
        self.date = time.strftime('%Y%m%d', time.localtime(time.time()))
        self.url = "https://s.taobao.com/search?q=" + self.keyword + "&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_" + self.date + "&ie=utf8&sort=sale-desc"
        self.commodityList = []
        self.totalPage = 0
        # user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'
        user_agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
        cookie = 'miid=8929995663388453206; hng=CN%7Czh-cn%7CCNY; uc3=sg2=VFQmloNtynToEuMeFQKLTZ21PXTH85EtuHZVkHtdn%2FQ%3D&nk2=CNu7fvUK%2FEvBzGe9&id2=UonciUs0wvLz%2Bg%3D%3D&vt3=F8dARHfB55D4ceKVxQg%3D&lg2=UIHiLt3xD8xYTw%3D%3D; uss=W8hhc%2FiL5F3QQxDnorK5%2Bpxtk6UVQTxdX39qSJdTeNa%2FgPPtvy2njhEaqqM%3D; lgc=klqbtnsns123; tracknick=klqbtnsns123; _cc_=VT5L2FSpdA%3D%3D; tg=0; t=df50d2b4a1821ccecb466ada4db35fc8; mt=ci=-1_0; cookie2=31f737050ec6130e8a50270a329f6824; v=0; thw=cn; swfstore=151527; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; _m_h5_tk=a83b5b9c622408ab9af96c4a8f8ecf3f_1484714010722; _m_h5_tk_enc=77fdcaea2035bb430ad63b125727deed; _tb_token_=7e735e888a7e7; linezing_session=gsi35HL97qLeTfNpudWTOzFV_1484711392594XGpm_1; JSESSIONID=39D91055DEA807D78FB0FA80DE0A63AF; cna=JZgYEL1ENwACAXeJbdPXPixk; uc1=cookie14=UoW%2FWXYeb%2BAHbQ%3D%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; l=AvX1oXyLVgE50p9P9DX54yLahXuvcqmE; isg=AtLSiT6D3hIxcyKNso1DDjwRI5jex9Z96qDZXpwr_gVwr3KphHMmjdjNabxp'
        pragma = 'no-cache'
        cache_control = 'no-cache'
        upgrade_insecure_requests = 1
        self.headers = {'User-Agent': user_agent, 'pragma': pragma, 'cache-control': cache_control, 'upgrade-insecure-requests':upgrade_insecure_requests, 'cookie':cookie}

    def main(self):
        products = ['insta360 Nano', 'insta360 Air', 'Gear 360', 'theta', 'LG 360 CAM', '小米米家全景相机']
        result = []
        for product in products:
            self.product = product
            self.keyword = self.product.replace(' ', '+')
            self.url = "https://s.taobao.com/search?q=" + self.keyword + "&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_" + self.date + "&ie=utf8&sort=sale-desc"
            self.commodityList = []
            self.start()
            sales = self.getTotalSales()
            today = datetime.datetime.now().strftime('%Y-%m-%d')
            stores = []
            for commodity in self.commodityList:
                store = {
                    'name': commodity.name,
                    'price': commodity.price,
                    'pay': commodity.pay,
                    'shop_keeper': commodity.shopKeeper,
                    'shop': commodity.shop,
                    'location': commodity.location,
                    'link': commodity.link,
                    'store_id': commodity.id,
                    'sales': commodity.sales,
                    'is_tmall': commodity.isTmall,
                    'date': today,
                    'commodity': product
                }
                stores.append(store)
            temp = {'commodity': product, 'taobao_total_sales': sales, 'date': today, 'stores': stores}
            print temp
            result.append(temp)
        jsonResult = json.dumps(result)
        # print jsonResult
        return jsonResult

    def start(self):
        result = {}
        try:
            request = urllib2.Request(self.url + "&s=0", headers=self.headers)
            response = urllib2.urlopen(request)
            content = response.read()
            pattern = re.compile('g_page_config = {.*?g_srp_loadCss', re.S)
            items = re.findall(pattern, content)
            jsonResult = items[0][16:-19]
            # print jsonResult
            result = json.loads(jsonResult, encoding="utf-8")
            # print result
            try:
                self.totalPage = result['mods']['pager']['data']['totalPage']
            except:
                self.totalPage = 1
            # print self.totalPage
        except urllib2.URLError, e:
            if hasattr(e, "code"):
                print e.code
            if hasattr(e, "reason"):
                print e.reason

        if self.totalPage > 5:  #因为基本上第5页以后,销量都为0了
            self.totalPage = 5
        count = 1
        for i in range(1, self.totalPage + 1):
            if i != 1:
                try:
                    request = urllib2.Request(self.url + "&s=" + str((i - 1) * 44), headers=self.headers)
                    response = urllib2.urlopen(request)
                    content = response.read()
                    pattern = re.compile('g_page_config = {.*?g_srp_loadCss', re.S)
                    items = re.findall(pattern, content)
                    jsonResult = items[0][16:-19]
                    # print jsonResult
                    result = json.loads(jsonResult, encoding="utf-8")
                except urllib2.URLError, e:
                    if hasattr(e, "code"):
                        print e.code
                    if hasattr(e, "reason"):
                        print e.reason
            elements = result['mods']['itemlist']['data']['auctions']
            for element in elements:
                name = element['raw_title']
                price = float(element['view_price'])
                pay = int(element['view_sales'][:-3])
                shopKeeper = element['nick']
                location = element['item_loc']
                link = 'https:' + element['detail_url']
                id = str(element['nid'])
                commodity = Commodity(name, price, pay, shopKeeper, location, link, id)
                if "tmall" in link or "click.simba" in link:
                    commodity.setIsTmall(True)
                if shopKeeper != "":
                    self.commodityList.append(commodity)
                # print count
                # commodity.show()
                count += 1
Exemplo n.º 10
0
def buildCommodities():
    from Commodity import Commodity
    red = Commodity('red')
    blue = Commodity('blue')
    green = Commodity('green')
    red.save()
    blue.save()
    green.save()
Exemplo n.º 11
0
class TaobaoCrawler:
    def __init__(self):
        self.product = 'insta360 Nano'
        self.keyword = self.product.replace(' ', '+')
        self.date = time.strftime('%Y%m%d', time.localtime(time.time()))
        self.url = "https://s.taobao.com/search?q=" + self.keyword + "&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_" + self.date + "&ie=utf8" + "&sort=sale-desc"
        self.commodityList = []
        self.totalPage = 0
        user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'
        self.headers = {'User-Agent': user_agent}

    def main(self):
        products = ['insta360 Nano', 'Gear 360', 'theta', 'LG 360 CAM']
        result = []
        for product in products:
            self.product = product
            self.keyword = self.product.replace(' ', '+')
            self.url = "https://s.taobao.com/search?q=" + self.keyword + "&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_" + self.date + "&ie=utf8" + "&sort=sale-desc"
            self.commodityList = []
            self.start()
            sales = self.getTotalSales()
            today = datetime.datetime.now().strftime('%Y-%m-%d')
            stores = []
            for commodity in self.commodityList:
                store = {
                    'name': commodity.name,
                    'price': commodity.price,
                    'pay': commodity.pay,
                    'shop_keeper': commodity.shopKeeper,
                    'shop': commodity.shop,
                    'location': commodity.location,
                    'link': commodity.link,
                    'store_id': commodity.id,
                    'sales': commodity.sales,
                    'is_tmall': commodity.isTmall,
                    'date': today,
                    'commodity': product
                }
                stores.append(store)
            temp = {
                'commodity': product,
                'taobao_total_sales': sales,
                'date': today,
                'stores': stores
            }
            # print temp
            result.append(temp)
        jsonResult = json.dumps(result)
        # print jsonResult
        return jsonResult

    def start(self):
        result = {}
        try:
            request = urllib2.Request(self.url + "&s=0", headers=self.headers)
            response = urllib2.urlopen(request)
            content = response.read()
            pattern = re.compile('g_page_config = {.*?g_srp_loadCss', re.S)
            items = re.findall(pattern, content)
            jsonResult = items[0][16:-19]
            # print jsonResult
            result = json.loads(jsonResult, encoding="utf-8")
            # print result
            self.totalPage = result['mods']['pager']['data']['totalPage']
            # print self.totalPage
        except urllib2.URLError, e:
            if hasattr(e, "code"):
                print e.code
            if hasattr(e, "reason"):
                print e.reason

        if self.totalPage > 3:
            self.totalPage = 3
        count = 1
        for i in range(1, self.totalPage + 1):
            # print "page ",i ,":"
            if i != 1:
                try:
                    request = urllib2.Request(self.url + "&s=" + str(
                        (i - 1) * 44),
                                              headers=self.headers)
                    response = urllib2.urlopen(request)
                    content = response.read()
                    pattern = re.compile('g_page_config = {.*?g_srp_loadCss',
                                         re.S)
                    items = re.findall(pattern, content)
                    jsonResult = items[0][16:-19]
                    # print jsonResult
                    result = json.loads(jsonResult, encoding="utf-8")
                except urllib2.URLError, e:
                    if hasattr(e, "code"):
                        print e.code
                    if hasattr(e, "reason"):
                        print e.reason
            elements = result['mods']['itemlist']['data']['auctions']
            for element in elements:
                name = element['raw_title']
                price = float(element['view_price'])
                pay = int(element['view_sales'][:-3])
                shopKeeper = element['nick']
                location = element['item_loc']
                link = 'https:' + element['detail_url']
                id = str(element['nid'])
                commodity = Commodity(name, price, pay, shopKeeper, location,
                                      link, id)
                if "tmall" in link or "click.simba" in link:
                    commodity.setIsTmall(True)
                if shopKeeper != "":
                    self.commodityList.append(commodity)
                # print count
                # commodity.show()
                count += 1