Пример #1
0
    def fetch(self):

            parser = etree .HTMLParser(encoding='utf-8')
            time = datetime.datetime.now().strftime('%Y-%m-%d')

            text = urllib2.urlopen(ALL_URL).read()
            tree = etree.HTML(text,parser=parser)
            nodes = tree.xpath(XPATH)
            for node in nodes:
                print node.text
                city_url = urlparse.urljoin(ALL_URL,node.attrib['href'])
               # print city_url
                text1 = urllib2.urlopen(city_url).read()
                tree = etree.HTML(text1,parser=parser)
                list_nodes = tree.xpath(XPATH)
                for list_node in list_nodes:
                    addr_url = urlparse.urljoin(city_url,list_node.attrib['href'])
                  #  print addr_url
                    text2 = urllib2.urlopen(addr_url).read()
                    tree = etree.HTML(text2,parser=parser)
                    city_nodes = tree.xpath(CITY_XPATH)
                 #   print city_nodes
                    for city_node in city_nodes:
                     #   print city_node
                        name_node = city_node.find('li[2]')
                        storename = name_node.text
                        print u'店名:'+storename
                        addr_node = city_node.find('li[3]')
                        storeaddr = addr_node.text
                        print u'地址:'+storeaddr

                        self.logger.info(u'店名: %s  地址: %s ' % (storename,storeaddr))
                        latlng = getGoogleAPI.getgoogleapi(storeaddr)
                        if latlng == None:
                            print "can not find the latlng!!!!"
                            continue

                        collector.object_found.send(
                            self,
                            time = time, title = storename, url = 'hm-'+storename+'-'+storeaddr,
                            storeaddr=storeaddr,
                            lat = latlng[0],
                            lng = latlng[1],
                            brand='LevisCollector'
                        )
                        from shopping.signals import shop_found
                        shop_found.send(
                            self,
                            brand='LevisCollector',
                            address=storeaddr,
                            lat = latlng[0],
                            lng = latlng[1],
                        )
Пример #2
0
    def fetch(self):
        parser = etree.HTMLParser(encoding='utf-8')
        text = urllib2.urlopen(ADDR_URL).read()
        tree = etree.HTML(text, parser=parser)
        nodes = tree.xpath(CITY_XPATH)
        time = datetime.datetime.now().strftime('%Y-%m-%d')

        for node in nodes:
            print node.text
            info = urlparse.urljoin(ADDR_URL,node.attrib['href'])
            print info
            CITY_URL = info
            text1 = urllib2.urlopen(CITY_URL).read()
            tree1 = etree.HTML(text1, parser=parser)
            stores = tree1.xpath(STORE_XPATH)
            for store in stores:
                store_id = urlparse.urljoin(CITY_URL,store.attrib['rel'])[33:]
                url = STORE_URL % (store_id)
             #   print url
                text = urllib2.urlopen(url).read()
                tree = etree.HTML(text, parser=parser)
                name = tree.xpath(NAME_XPATH)
                storename = name[0].text
                print u'店名:'+storename
                addr = tree.xpath(ADDR_XPATH)
                storeaddr = addr[0].text
                print u'地址:'+storeaddr
                self.logger.info(u'店名: %s  地址: %s ' % (storename,storeaddr))
                latlng = getGoogleAPI.getgoogleapi(storeaddr)
                if latlng == None:
                    print "can not find the latlng!!!!"
                    continue

                collector.object_found.send(
                self,
                time = time, title = storename, url = 'coach-'+storename+'-'+storeaddr,
                storeaddr=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
                brand='CoachCollector'
                )
                from shopping.signals import shop_found
                shop_found.send(
                    self,
                    brand='CoachCollector',
                    address=storeaddr,
                    lat = latlng[0],
                    lng = latlng[1],
                )
Пример #3
0
    def fetch(self):
        time = datetime.datetime.now().strftime('%Y-%m-%d')
        addrs={'all':[{'city':'上海','info':[{'name':'正大广场店','addr':'上海市浦东新区陆家嘴西路168号正大广场内GF13-16'},{'name':'大宁店','addr':'上海市闸北区共和新路1868号大宁国际商业广场1(S2)栋一层101-106'},{'name':'龙之梦店','addr':'上海市长宁路1018号龙之梦购物中心1楼'},{'name':'淮海店','addr':'上海市淮海中路627-641号'},{'name':'金桥国际商业广场店','addr':'上海市浦东新区张扬路3611弄金桥国际商业广场3号一层二层101-103,105-108/201-207,210-214'},{'name':'四川北路店','addr':'上海市虹口区四川北路1318号盛邦国际大厦一层二层'},{'name':'长风景畔广场店','addr':'上海市普陀区大渡河路196号长风景畔广场娱乐商业中心C1幢113室、208室、209室和210室'},{'name':'上海百联徐汇商业广场店','addr':'华山路2038号百联徐汇商业广场一层二层'}]},
               {'city':'北京','info':[{'name':'新东安店','addr':'北京市东城区王府井大街138号新东安广场208-210'},{'name':'富力广场店','addr':'北京市朝阳区东三环中路65号218室'},{'name':'国瑞购物中心店','addr':'北京市崇文区崇文门外大街18号国瑞购物中心二层F2-18号'},{'name':'欧美汇购中心店','addr':'北京市海淀区丹棱街1号欧美汇购物中心二层三层F2-17, F3-13,F3-14'},{'name':'华联万柳店','addr':'北京市海淀区巴沟路2号北京华联万柳购物中心一层二层'}]},
               {'city':'辽宁','info':[{'name':'大悦城店','addr':'沈阳市大东区小东路5号大悦城B座一层二层B119-120,B219-220'},{'name':'万达店','addr':'沈阳市和平区太原南街2号沈阳万达广场城中城'},{'name':'大连天兴罗斯福国际中心店','addr':'大连沙河口区西安路139号罗斯福国际中心一层二层'},{'name':'沈阳龙之梦购物中心店','addr':'沈阳市大东区滂江街22号龙之梦购物中心一层'}]},
               {'city':'河北','info':[{'name':'石家庄裕华万达广场店','addr':'石家庄市裕华区建华南大街136号石家庄裕华万达广场一层1030室'},{'name':'唐山万达广场店','addr':'河北省唐山市路南区新华东道100号唐山万达广场1020和2023'}]},
               {'city':'河南','info':[{'name':'360国贸中心店','addr':'郑州市金水区花园路39号郑州国贸中心一层'},{'name':'郑州市','addr':'郑州市民主路88号印象城购物中心一层二层1024/2020'}]},
               {'city':'四川','info':[{'name':'富力天汇店','addr':'四川成都市顺城大街289号富力天汇购物中心2楼'}]},
               {'city':'浙江','info':[{'name':'宁波世纪东方广场','addr':'宁波市中山东路1083号宁波世纪东方广场一层二层'}]},
               {'city':'天津','info':[{'name':'利福广场店','addr':'天津市和平区滨江道219号利福广场一层二层'}]},
               {'city':'江苏','info':[{'name':'无锡市保利广场店','addr':'无锡市解放东路1000号保利广场一层185-187室,二层174-189室'},{'name':'苏州市印象城购物中心店','addr':'苏州市工业园区现代大道1699号印象城购物中心一层二层1001-1002/2001-2002'},{'name':'泰州万达广场店','addr':'江苏省泰州市海陵区济川东路226号泰州万达广场226-1-A'},{'name':'C&A无锡新之城店','addr':'无锡市新区新光路555号新之城全生活广场B区1F01与2F01'}]},
               {'city':'湖南','info':[{'name':'乐和城店','addr':'长沙市黄兴中路188号乐和城一层二层'}]},
               {'city':'湖北','info':[{'name':'光谷国际广场店','addr':'武汉市东湖新技术开发区珞瑜路889号光谷国际广场一层二层'},{'name':'武汉汉街店','addr':'武汉市武昌区汉街49号'},{'name':'汉商银座购物中心店','addr':'湖北武汉汉阳大道139号汉商银座购物中心地上一层和地上二层1-01,F1&F2,'},{'name':'武汉摩尔城店','addr':'武汉龙阳大道特六号,武汉摩尔城B栋一层'}]},
               {'city':'山东','info':[{'name':'济南和谐广场店','addr':'济南市槐荫区经十路22799号和谐广场一层二层L119-L121及L216-L218  1-2F'}]},
               {'city':'重庆','info':[{'name':'重庆日月光中心广场店','addr':'重庆市渝中区民权路89号日月光中心广场LG072-075'},{'name':'重庆南坪万达广场店','addr':'重庆市南岸区江南大道10号南坪万达广场一层JD1-2, JD1-3'}]}]
              }
        addrall = addrs['all']
        for addrs in addrall:
             cityname = addrs['city']
             addrsinfo = addrs['info']
             for addr in addrsinfo:

                storename = cityname+addr['name']
                storeaddr = addr['addr']
                print '店名:'+storename
                print '地址:'+storeaddr
                #print type(storeaddr)
                self.logger.info('店名: %s  地址: %s ' % (storename,storeaddr))
                latlng = getGoogleAPI.getgoogleapi(storeaddr)
                if latlng == None:
                    print "can not find the latlng!!!!"
                    continue

                collector.object_found.send(
                    self,
                    time = time, title = storename, url = 'ca-'+storename+'-'+storeaddr,
                    storeaddr=storeaddr,
                    lat = latlng[0],
                    lng = latlng[1],
                    brand='CACollector'
                )
                from shopping.signals import shop_found
                shop_found.send(
                    self,
                    brand='CACollector',
                    address=storeaddr,
                    lat = latlng[0],
                    lng = latlng[1],
                )
Пример #4
0
    def fetch(self):
        time = datetime.datetime.now().strftime('%Y-%m-%d')
        parser = etree .HTMLParser(encoding='utf-8')
        text = urllib2.urlopen(ADDR_URL).read()
        tree = etree.HTML(text, parser=parser)
        nodes = tree.xpath(XPATH)
    #    print nodes
        for node in nodes:
            info = node.attrib['href']

            allinfo = info[info.find('geocode=&q=')+11 :]
        #    print allinfo
            infolist = allinfo.split('+')
            city = ''
            if len(infolist)==4:
                city = infolist[3][infolist[3].find('&ll=')-2 :infolist[3].find('&ll=')]+u'市'
                if infolist[3].find('KUNMING') >0:
                    city='昆明'

            storename = infolist[0]
            print u'店名:'+storename
            storeaddr = infolist[1]
            print u'地址:'+city+storeaddr

            self.logger.info(u'店名: %s  地址: %s ' % (storename,storeaddr))
            latlng = getGoogleAPI.getgoogleapi(storeaddr)
            if latlng == None:
                print "can not find the latlng!!!!"
                continue

            collector.object_found.send(
                self,
                time = time, title = storename, url = 'Mango-'+storename+'-'+storeaddr,
                storeaddr=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
                brand='MangoCollector'
            )
            from shopping.signals import shop_found
            shop_found.send(
                self,
                brand='MangoCollector',
                address=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
            )
Пример #5
0
    def fetch(self):
        text = urllib2.urlopen(ADDR_URL).read()
        #   print text
        dict = json.loads(text,encoding='utf-8')
      #  print dict
        info = dict["storesCompleteResponse"]["storesComplete"]["storeComplete"]
#        print info
        for row in info:
            city = row["city"]
            #print city
            storename = city + row["name"]
            print storename
            storeaddr = row["address"]["addressLine"]
            time = datetime.datetime.now().strftime('%Y-%m-%d')
            if isinstance(storeaddr,int):
                continue
            if isinstance(storeaddr,str):
                continue
            if isinstance(storeaddr,unicode):
                continue
            if isinstance(storeaddr,list):
                ss = unicode(storeaddr[1]) + unicode(storeaddr[0])
            #    ss = ss[7:]
                print ss
                self.logger.info(u'店名: %s  地址: %s ' % (storename,ss))
            latlng = getGoogleAPI.getgoogleapi(ss)
            if latlng == None:
                print "can not find the latlng!!!!"
                continue
            collector.object_found.send(
                self,
                time = time, title = storename, url = 'hm-'+storename+'-'+ss,
                storeaddr=ss,
                lat = latlng[0],
                lng = latlng[1],
                brand='HMCollector'
            )
            from shopping.signals import shop_found
            shop_found.send(
                self,
                brand='HMCollector',
                address=ss,
                lat = latlng[0],
                lng = latlng[1],
            )
Пример #6
0
    def getData(self,citycode,storenuum,cityname):
        URL = ADDF_URL % (citycode,1)
        URL = URL + ADDL_URL
        text = urllib2.urlopen(URL).read()[3:]
     #   print text
        dict = json.loads(text,encoding='utf-8')
     #   print dict

        info = dict["wsResponse"]
     #   print info
        nums = info["results"]
        print nums
        all = info["result"]
    #    print all
        print cityname
        time = datetime.datetime.now().strftime('%Y-%m-%d')
        for num in range(1,int(nums)):
            storename = all[num]["name"]
            storeaddr = all[num]["street1"]
            print u'店名:'+ storename
            print u'地址:'+ storeaddr
            self.logger.info(u'店名: %s  地址: %s ' % (storename,storeaddr))
            latlng = getGoogleAPI.getgoogleapi(storeaddr)
            if latlng == None:
                print "can not find the latlng!!!!"
                continue
            collector.object_found.send(
                self,
                time = time, title = storename, url = 'adidas-'+cityname+'-'+storename,
                storeaddr=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
                brand='AdidasCollector'
            )

            from shopping.signals import shop_found
            shop_found.send(
                self,
                brand='AdidasCollector',
                address=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
            )
Пример #7
0
    def fetch(self):
        parser = etree.HTMLParser(encoding='utf-8')
        text = urllib2.urlopen(ADDR_URL).read()
        tree = etree.HTML(text, parser=parser)

        nodes = tree.xpath(XPATH)
       # print nodes
        time = datetime.datetime.now().strftime('%Y-%m-%d')

        for node in nodes:
            addr = etree.tostring(node, method='html', encoding='utf-8')
           # print addr
            storeaddr = addr [addr.index('&gt;</span>')+len('&gt;</span>'):addr.index('</li>')]

            if storeaddr.find('amp;') > 0 :
                storeaddr = storeaddr.replace('amp;','')

            print storeaddr
            storename = node.find('span').text
            #print storename
            self.logger.info(u'店名: %s  地址: %s ' % (storename,storeaddr))
            latlng = getGoogleAPI.getgoogleapi(storeaddr)
            print latlng
            if latlng == None:
                print "can not find the latlng!!!!"
                continue

            collector.object_found.send(
                self,
                time = time, title = storename, url = 'CityMe-'+storeaddr,
                storeaddr=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
                brand='CityMeCollector'
            )
            from shopping.signals import shop_found
            shop_found.send(
                self,
                brand='CityMeCollector',
                address=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
            )
Пример #8
0
    def fetch(self):

        time = datetime.datetime.now().strftime('%Y-%m-%d')
        parser = etree .HTMLParser(encoding='utf-8')
        text = urllib2.urlopen(ADDR_URL).read()
        tree = etree.HTML(text, parser=parser)
        nodes = tree.xpath(XPATH)

        for i in range(0,len(nodes),4):
            name_node = nodes[i].find('td[1]')
            storename = name_node.text
            if storename is None :
                continue
            print storename
            addr_node = nodes[i].find('td[3]')
            storeaddr = addr_node.text
            print storeaddr
            self.logger.info(u'店名: %s  地址: %s ' % (storename,storeaddr))

            latlng = getGoogleAPI.getgoogleapi(storeaddr)
            if latlng == None:
                print "can not find the latlng!!!!"
                continue


            collector.object_found.send(
                self,
                time = time, title = storename, url = 'Roxy-'+storename+'-'+storeaddr,
                storeaddr=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
                brand='RoxyCollector'
            )

            from shopping.signals import shop_found
            shop_found.send(
                self,
                brand='RoxyCollector',
                address=storeaddr,
                lat = latlng[0],
                lng = latlng[1],
            )
Пример #9
0
    def getData(self, code1, code2, cityname):
        time = datetime.datetime.now().strftime("%Y-%m-%d")
        print time
        parser = etree.HTMLParser(encoding="utf-8")
        url = ADDR_URL % (code1, code2)
        text = urllib2.urlopen(url).read()
        tree = etree.HTML(text, parser=parser)
        nodes = tree.xpath(XPATH)
        print cityname
        for node in nodes:
            city_node = node.find("a/span")
            name_node = city_node.find("strong")

            storename = city_node.text.strip() + name_node.text.strip()
            print u"店名:" + storename

            addr_node = node.find("div/span/strong")
            storeaddr = addr_node.text
            print u"地址:" + storeaddr

            self.logger.info(u"店名: %s  地址: %s " % (storename, storeaddr))
            latlng = getGoogleAPI.getgoogleapi(storeaddr)
            if latlng == None:
                print "can not find the latlng!!!!"
                continue

            collector.object_found.send(
                self,
                time=time,
                title=storename,
                url="Zara-" + storename + "-" + storeaddr,
                storeaddr=storeaddr,
                lat=latlng[0],
                lng=latlng[1],
                brand="ZaraCollector",
            )

            from shopping.signals import shop_found

            shop_found.send(self, brand="ZaraCollector", address=storeaddr, lat=latlng[0], lng=latlng[1])
Пример #10
0
    def fetch(self):
        time = datetime.datetime.now().strftime('%Y-%m-%d')
        for URL in ADDR_URL:
            text = urllib2.urlopen(URL).read()
            ntext = text [text.find('{"locations":'):-2]
            dict = json.loads(ntext,encoding='utf-8')
            infos = dict['locations']
            for info in infos:
                storename = info['name']
                print storename
                storeaddr = info['street']
                print storeaddr

                self.logger.info(u'店名: %s  地址: %s ' % (storename,storeaddr))

                latlng = getGoogleAPI.getgoogleapi(storeaddr)
                if latlng == None:
                     print "can not find the latlng!!!!"
                     continue

                collector.object_found.send(
                    self,
                    time = time, title = storename, url = 'Nike-'+storename+'-'+storeaddr,
                    storeaddr=storeaddr,
                    lat = latlng[0],
                    lng = latlng[1],
                    brand='NikeCollector'
                )

                from shopping.signals import shop_found
                shop_found.send(
                    self,
                    brand='NikeCollector',
                    address=storeaddr,
                    lat = latlng[0],
                    lng = latlng[1],
                )
Пример #11
0
    def fetch(self):
        parser = etree.HTMLParser(encoding='utf-8')
        text = urllib2.urlopen(ADDR_URL).read()
        tree = etree.HTML(text, parser=parser)
        nodes = tree.xpath(XPATH)
        time = datetime.datetime.now().strftime('%Y-%m-%d')
        print nodes

        for node in nodes:

             for i in range(2,101):
                  sub_node = node.find('option['+str(i)+']')
                  city = sub_node.text
                  city = city.replace(' ','+')
                  city = urllib.quote(city.encode('utf-8'))
               #   print city

                  CITY_URL = ADDR_URL + u'state=' + city
                  CITY_URL = CITY_URL.replace('%2B','+')
                  print CITY_URL
               #   CITY_URL = urllib.quote(CITY_URL.encode('utf-8'))
                  text = urllib2.urlopen(CITY_URL).read()
                  tree = etree.HTML(text, parser=parser)
                  stores = tree.xpath(STORE_XPATH)

                  for store in stores:
                      name_nodes = store.find('p[1]')
                      name_nodes_del = store.find('p[1]/b')
                      if name_nodes is not None and name_nodes_del is not None:
                          cityname = name_nodes.text
                          nameinfo = name_nodes_del.text
                          storename = cityname + nameinfo
                          print storename
                          self.logger.info(u'店名: %s ' % (storename))

                      addr_nodes = store.find('p[3]')
                      if addr_nodes is not None:
                          storeaddr = addr_nodes.text
                          print storeaddr
                          self.logger.info(u'地址: %s ' % (storeaddr))

                      latlng = getGoogleAPI.getgoogleapi(storeaddr)
                      if latlng == None:
                          print "can not find the latlng!!!!"
                          continue

                      collector.object_found.send(
                          self,
                          time = time, title = storename, url = 'crocs-'+storename+'-'+storeaddr,
                          storeaddr=storeaddr,
                          lat = latlng[0],
                          lng = latlng[1],
                          brand='CrocsCollector'
                      )
                      from shopping.signals import shop_found
                      shop_found.send(
                          self,
                          brand='CrocsCollector',
                          address=storeaddr,
                          lat = latlng[0],
                          lng = latlng[1],
                      )
Пример #12
0
    def fetch(self):

            parser = etree .HTMLParser(encoding='utf-8')
            time = datetime.datetime.now().strftime('%Y-%m-%d')
            text = urllib2.urlopen(ALL_URL).read()
            tree = etree.HTML(text,parser=parser)
            nodes = tree.xpath(ALL_XPATH)
            node = nodes[1]
            city_url = urlparse.urljoin(ALL_URL,node.attrib['href'])
            print city_url
            text1 = urllib2.urlopen(city_url).read()
            tree = etree.HTML(text1,parser=parser)
            list_nodes = tree.xpath(CITY_XPATH)
          #  print list_nodes
            for list_node in list_nodes:
             #   print list_node
                addr_url = urlparse.urljoin(city_url,list_node.attrib['href'])
                print addr_url
                text2 = urllib2.urlopen(addr_url).read()
                tree = etree.HTML(text2,parser=parser)
                city_nodes = tree.xpath(NAME_XPATH)
                for city_node in city_nodes:
                 #   print city_node

                    name_node = city_node.find('h2/span')
                    if name_node is not None:
                        print name_node
                        storename = name_node.text
                        print u'店名:'+storename
                        self.logger.info(u'店名: %s' % (storename))
                    addr_node = city_node.find('div/div/table/tr[1]/td')
                    if addr_node is not None:
                        print addr_node
                        storeaddr = addr_node.text
                        if storeaddr is None:
                            sub_addr=addr_node.find('p')
                            storeaddr = sub_addr.text
                        print u'地址:'+storeaddr
                        self.logger.info(u'地址: %s ' % (storeaddr))
                    latlng = getGoogleAPI.getgoogleapi(storeaddr)
                    if latlng == None:
                        print "can not find the latlng!!!!"
                        continue

                    collector.object_found.send(
                        self,
                        time = time, title = storename, url = 'uniqlo-'+storename+'-'+storeaddr,
                        storeaddr=storeaddr,
                        lat = latlng[0],
                        lng = latlng[1],
                        brand='UniqloCollector'
                    )

                    from shopping.signals import shop_found
                    shop_found.send(
                        self,
                        brand='UniqloCollector',
                        address=storeaddr,
                        lat = latlng[0],
                        lng = latlng[1],
                    )