Пример #1
0
    def _set_brands(self, session, insert):
        global hdr
        brands = []
            
        brandsUrl = "https://www.net-a-porter.com/de/en/Shop/AZDesigners?cm_sp=topnav-_-designers-_-designera-z"
        gender = "female"
        olog.log("NetaporterTracker._set_brands > Calling <b>"+brandsUrl+"</b>", 'info')
        
        req = urllib2.Request(brandsUrl, headers=hdr)
        data = urllib2.urlopen(req).read()
        tree = lxml.html.fromstring(data)
        
        brand_data = tree.cssselect('div[class=\"designer_list_col\"] ul li[class!=\"top-letter\"] a')
        for b in brand_data:
            brand = {'key' : None, 'name' : None, 'logoUrl' : None, 'logoLargeUrl' : None, 'shopUrl' : None}
            brand['shopUrl'] = 'http://www.net-a-porter.com'+b.attrib['href']+"?pn=1&npp=view_all&image_view=product&dScroll=0"
            brand['name'] = unicode(b.attrib['title'].title()).encode('ascii', 'xmlcharrefreplace')
        
            brand_in_db = session.query(orm.Brand).filter_by(name=unicode(brand['name'])).first()
            if brand_in_db is None:
                uuid = str(shortuuid.uuid(brand['name']))
                br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid)
                olog.log("NetaporterTracker._set_brands <<< Inserted brand <b>"+str(br)+"</b> with id <b>" + str(br.id) + "</b>", "warning")
                if insert is True:
                    session.add(br)
                    session.flush()
                    brandid = br.id
            else:
                br = brand_in_db
                olog.log("NetaporterTracker._set_brands <<< Brand <b>"+str(brand_in_db)+"</b> already in database", "info")                
                brandid = brand_in_db.id

              
            storebrand_in_db = session.query(orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(brandid=brandid).filter_by(gender=gender).first()
            
            if storebrand_in_db is None:
                storebrand = {'key': None, 'storeid' : None, 'brandid' : None, 'gender': None, 'url' : None}
                sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl'])
                olog.log("NetaporterTracker._set_brands <<< Inserted <b>"+str(sb)+"</b>", "warning")
                if insert is True:
                    session.add(sb)
                    session.flush()
            else:
                olog.log("NetaporterTracker._set_brands <<< <b>"+str(storebrand_in_db)+"</b> already in database", "info")
        
            brands.append(br)
        
        session.commit()

        return brands
Пример #2
0
    def _set_brands(self, session, insert):
        brands = []

        brand = {
            'key': None,
            'name': None,
            'logoUrl': None,
            'logoLargeUrl': None,
            'shopUrl': None
        }
        brand['name'] = 'Ted Baker'  # Fixed. Website only sells Ted Baker
        brand['shopUrl'] = 'http://www.tedbaker.com/nl/Mens/c/category_mens'
        uuid = str(shortuuid.uuid(brand['name']))
        gender = 'Male'
        br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'],
                       uuid)
        olog.log(
            "TedBakerTracker._set_brands << Found brand <b>" + str(br) +
            "</b>", 'debug')

        brand_in_db = session.query(
            orm.Brand).filter_by(name=unicode(br.name)).first()
        if brand_in_db is None:
            if insert is True:
                session.add(br)
                session.flush()
                brandid = br.id
                olog.log(
                    "TedBakerTracker._set_brands >>> Inserted brand <b>" +
                    br.name + "</b> with id <b>" + str(brandid) + "</b>",
                    "warning")
        else:
            brandid = brand_in_db.id
            olog.log(
                "TedBakerTracker._set_brands << Brand <b>" + brand_in_db.name +
                "</b> already in database with id <b>" + str(brandid) + "</b>",
                "info")

        storebrand_in_db = session.query(
            orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(
                brandid=brandid).filter_by(gender=gender).first()
        if storebrand_in_db is None:
            storebrand = {
                'key': None,
                'storeid': None,
                'brandid': None,
                'gender': None,
                'url': None
            }
            sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender,
                                brand['shopUrl'])
            olog.log(
                "TedBakerTracker._set_brands << Inserted <b>" + str(sb) +
                "</b>", "warning")
            if insert is True:
                session.add(sb)
                session.flush()
        else:
            olog.log(
                "TedBakerTracker._set_brands << StoreBrand <b>" +
                str(storebrand_in_db) +
                "</b> already in database with id <b>" +
                str(storebrand_in_db.id) + "</b>", "info")

        brands.append(br)

        brand = {
            'key': None,
            'name': None,
            'logoUrl': None,
            'logoLargeUrl': None,
            'shopUrl': None
        }
        brand['name'] = 'Ted Baker'  # Fixed. Website only sells Clarks
        brand[
            'shopUrl'] = 'http://www.tedbaker.com/nl/Womens/c/category_womens'
        uuid = str(shortuuid.uuid(brand['name']))
        gender = 'Female'
        br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'],
                       uuid)
        olog.log(
            "TedBakerTracker._set_brands << Found brand <b>" + str(br) +
            "</b>", 'debug')

        brand_in_db = session.query(
            orm.Brand).filter_by(name=unicode(br.name)).first()
        if brand_in_db is None:
            if insert is True:
                session.add(br)
                session.flush()
                brandid = br.id
                olog.log(
                    "TedBakerTracker._set_brands >>> Inserted brand <b>" +
                    br.name + "</b> with id <b>" + str(brandid) + "</b>",
                    "warning")
        else:
            brandid = brand_in_db.id
            olog.log(
                "TedBakerTracker._set_brands << Brand <b>" + brand_in_db.name +
                "</b> already in database with id <b>" + str(brandid) + "</b>",
                "info")

        storebrand_in_db = session.query(
            orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(
                brandid=brandid).filter_by(gender=gender).first()
        if storebrand_in_db is None:
            storebrand = {
                'key': None,
                'storeid': None,
                'brandid': None,
                'gender': None,
                'url': None
            }
            sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender,
                                brand['shopUrl'])
            olog.log(
                "TedBakerTracker._set_brands << Inserted <b>" + str(sb) +
                "</b>", "warning")
            if insert is True:
                session.add(sb)
                session.flush()
        else:
            olog.log(
                "TedBakerTracker._set_brands << StoreBrand <b>" +
                str(storebrand_in_db) +
                "</b> already in database with id <b>" +
                str(storebrand_in_db.id) + "</b>", "info")

        brands.append(br)

        if insert is True:
            session.commit()

        return brands
Пример #3
0
    def _set_brands(self, session, insert):
        brands = []

        # Male
        maleBrandsUrl = "http://www.sarenza.nl/herenschoenen"
        gender = "male"
        olog.log(
            "SarenzaTracker._set_brands > Calling <b>" + maleBrandsUrl +
            "</b>", 'info')

        display = Display(visible=0, size=(1920, 1080))
        display.start()
        browser = webdriver.Firefox()
        browser.get(maleBrandsUrl)
        data = browser.page_source

        tree = lxml.html.fromstring(data)

        brand_data = tree.cssselect('ul[class*=\"search-list\"] li a')
        for b in brand_data:
            brand = {
                'key': None,
                'name': None,
                'logoUrl': None,
                'logoLargeUrl': None,
                'shopUrl': None
            }
            brand['name'] = unicode(b.text_content()).encode(
                'ascii', 'xmlcharrefreplace')
            brandA = "http://sarenza.nl/Search.aspx?Ftq=" + brand[
                'name'] + "%20dames"
            req = urllib2.Request(brandA.replace(' ', '%20'), headers=hdr)
            adata = urllib2.urlopen(req).read()
            atree = lxml.html.fromstring(adata)
            prod_data = atree.cssselect('ul[class*=\"vignettes\"] li a')
            prodUrl = prod_data[0].attrib['href']
            req = urllib2.Request(prodUrl, headers=hdr)
            bdata = urllib2.urlopen(req).read()
            btree = lxml.html.fromstring(bdata)
            brand['logoLargeUrl'] = btree.cssselect(
                'img[id*=\"ImgBrandName\"]')[0].attrib['src'].split('?')[0]
            brand['shopUrl'] = "http://www.sarenza.nl" + b.attrib['href']

            regexp = 'Brand=([0-9]*)'
            result = re.search(regexp, brand['shopUrl'])
            if result:
                brand['key'] = result.group(1)

            uuid = str(shortuuid.uuid(brand['name']))
            br = orm.Brand(brand['name'], brand['logoUrl'],
                           brand['logoLargeUrl'], uuid)

            brand_in_db = session.query(
                orm.Brand).filter_by(name=unicode(br.name)).first()
            if brand_in_db is None:
                olog.log(
                    "SarenzaTracker._set_brands <<< Inserted brand <b>" +
                    br.name + "</b> with id <b>" + str(br.id) + "</b>",
                    "warning")
                if insert is True:
                    session.add(br)
                    session.flush()
                    brandid = br.id
            else:
                brandid = brand_in_db.id
                olog.log(
                    "Brand <b>" + str(brand_in_db) +
                    "</b> already in database with id <b>" + str(brandid) +
                    "</b>", "info")

            storebrand_in_db = session.query(orm.StoreBrand).filter_by(
                storeid=unicode(self.storeid)).filter_by(
                    brandid=brandid).filter_by(gender=gender).first()
            if storebrand_in_db is None:
                storebrand = {
                    'key': None,
                    'storeid': None,
                    'brandid': None,
                    'gender': None,
                    'url': None
                }
                sb = orm.StoreBrand(brand['key'], self.storeid, brandid,
                                    gender, brand['shopUrl'])
                olog.log(
                    "SarenzaTracker._set_brands <<< Inserted <b>" + str(sb) +
                    "</b>", "warning")
                if insert is True:
                    session.add(sb)
                    session.flush()
            else:
                olog.log(
                    "SarenzaTracker._set_brands <<< <b>" +
                    str(storebrand_in_db) + "</b> already in database", "info")
            brands.append(br)

        # Female
        femaleBrandsUrl = 'http://www.sarenza.nl/alle-damesschoenen'
        gender = "female"
        olog.log(
            "SarenzaTracker._set_brands > Calling <b>" + femaleBrandsUrl +
            "</b>", 'info')

        browser.get(femaleBrandsUrl)
        data = browser.page_source
        browser.quit()
        display.stop()

        tree = lxml.html.fromstring(data)

        brand_data = tree.cssselect('ul[class*=\"search-list\"] li a')
        for b in brand_data:
            brand = {
                'key': None,
                'name': None,
                'logoUrl': None,
                'logoLargeUrl': None
            }
            brand['name'] = unicode(b.text_content()).encode(
                'ascii', 'xmlcharrefreplace').strip()
            brandA = "http://sarenza.nl/Search.aspx?Ftq=" + brand[
                'name'] + "%20dames"
            req = urllib2.Request(brandA.replace(' ', '%20'), headers=hdr)
            adata = urllib2.urlopen(req).read()
            atree = lxml.html.fromstring(adata)
            prod_data = atree.cssselect('ul[class*=\"vignettes\"] li a')
            prodUrl = prod_data[0].attrib['href']
            req = urllib2.Request(prodUrl, headers=hdr)
            bdata = urllib2.urlopen(req).read()
            btree = lxml.html.fromstring(bdata)
            brand['logoLargeUrl'] = btree.cssselect(
                'img[id*=\"ImgBrandName\"]')[0].attrib['src'].split('?')[0]
            brand['shopUrl'] = btree.cssselect(
                'div[class*=\"row-fl\"] div[class*=\"item\"] a'
            )[0].attrib['href']
            regexp = 'Brand=([0-9]*)'
            result = re.search(regexp, brand['shopUrl'])
            if result:
                brand['key'] = result.group(1)

            uuid = str(shortuuid.uuid(brand['name']))
            br = orm.Brand(brand['name'], brand['logoUrl'],
                           brand['logoLargeUrl'], uuid)

            brand_in_db = session.query(
                orm.Brand).filter_by(name=unicode(br.name)).first()
            if brand_in_db is None:
                olog.log(
                    "SarenzaTracker._set_brands <<< Inserted brand <b>" +
                    str(br) + "</b> with id <b>" + str(br.id) + "</b>",
                    "warning")
                if insert is True:
                    session.add(br)
                    session.flush()
                    brandid = br.id
            else:
                brandid = brand_in_db.id
                olog.log(
                    "Brand <b>" + str(brand_in_db) +
                    "</b> already in database with id <b>" + str(brandid) +
                    "</b>", "info")

            storebrand_in_db = session.query(orm.StoreBrand).filter_by(
                storeid=unicode(self.storeid)).filter_by(
                    brandid=brandid).filter_by(gender=gender).first()
            if storebrand_in_db is None:
                storebrand = {
                    'key': None,
                    'storeid': None,
                    'brandid': None,
                    'gender': None,
                    'url': None
                }
                sb = orm.StoreBrand(brand['key'], self.storeid, brandid,
                                    gender, brand['shopUrl'])
                olog.log(
                    "SarenzaTracker._set_brands <<< Inserted <b>" + str(sb) +
                    "</b>", "warning")
                if insert is True:
                    session.add(sb)
                    session.flush()
            else:
                olog.log(
                    "SarenzaTracker._set_brands <<< <b>" +
                    str(storebrand_in_db) + "</b> already in database", "info")

            brands.append(br)
        session.commit()

        return brands
Пример #4
0
    def _get_brands(self, session, insert):
        brands = []

        # Male
        # maleBrandsUrl = "http://eu.topman.com/en/tmeu/category/brands-617803/view-all-brands-1700863"

        # gender = "male"
        # olog.log("TopshopTracker._get_brands > Calling <b>"+maleBrandsUrl+"</b>", 'info')
        # hdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 (.NET CLR 3.5.30729); nl-NL'}
        # req = urllib2.Request(maleBrandsUrl.replace(' ', '%20'), headers=hdr)
        # data = urllib2.urlopen(req).read()
        # tree = lxml.html.fromstring(data)

        # brand_data = tree.cssselect('div[class*=\"categoryBlock\"] ul li a')
        # for b in brand_data:
        #     brand = {'key' : None, 'name' : None, 'logoUrl' : None, 'logoLargeUrl' : None}
        #     brand['shopUrl'] = b.attrib['href']
        #     brand['name'] = unicode(b.attrib['title']).encode('ascii', 'xmlcharrefreplace')
        #     try:
        #         req = urllib2.Request(brand['shopUrl'], headers=hdr)
        #         adata = urllib2.urlopen(req).read()
        #         atree = lxml.html.fromstring(adata)
        #         brand['shopUrl'] = atree.cssselect('li[class*=\"show_all\"] a')[0].attrib['href']
        #     except:
        #         pass

        #     uuid = str(shortuuid.uuid(brand['name']))
        #     br = orm.Brand(brand['name'], brand['logoUrl'], brand['logoLargeUrl'], uuid)
        #     olog.log("TopshopTracker._get_brands << Found brand <b>"+str(br)+"</b>", 'debug')

        #     if insert is True:
        #         brand_in_db = session.query(orm.Brand).filter_by(name=unicode(br.name)).first()
        #         if brand_in_db is None:
        #             session.add(br)
        #             session.flush()
        #             brandid = br.id
        #             olog.log("TopshopTracker._get_brands >>> Inserted brand <b>"+br.name+"</b> with id <b>" + str(brandid) + "</b>", "warning")
        #         else:
        #             brandid = brand_in_db.id
        #             olog.log("Brand <b>"+brand_in_db.name+"</b> already in database with id <b>" + str(brandid) + "</b>", "debug")

        #         storebrand_in_db = session.query(orm.StoreBrand).filter_by(storeid=unicode(self.storeid)).filter_by(brandid=brandid).first()
        #         if storebrand_in_db is None:
        #             storebrand = {'key': None, 'storeid' : None, 'brandid' : None, 'gender': None, 'url' : None}
        #             sb = orm.StoreBrand(brand['key'], self.storeid, brandid, gender, brand['shopUrl'])
        #             olog.log("TopshopTracker._get_brands >>> Inserted <b>"+str(sb)+"</b>", "warning")
        #             session.add(sb)
        #             session.flush()

        #     brands.append(br)

        # Female
        femaleBrandsUrl = "http://eu.topshop.com/en/tseu/category/brands-a-to-z-4070022/home?TS=1422011935571"

        gender = "female"
        olog.log(
            "TopshopTracker._get_brands > Calling <b>" + femaleBrandsUrl +
            "</b>", 'info')

        display = Display(visible=0, size=(800, 600))
        display.start()
        browser = webdriver.Firefox()
        browser.get(femaleBrandsUrl)
        data = browser.page_source
        browser.quit()
        display.stop()

        tree = lxml.html.fromstring(data)

        brand_data = tree.cssselect(
            'div[class*=\"a-to-z\"] div[id*=\"jsonList\"] div[class*=\"columns\"] div div[class*=\"items\"] a'
        )

        for b in brand_data:
            brand = {
                'key': None,
                'name': None,
                'logoUrl': None,
                'logoLargeUrl': None
            }
            brand['shopUrl'] = b.attrib['href']
            brand['name'] = unicode(b.attrib['title'].title()).encode(
                'ascii', 'xmlcharrefreplace')
            uuid = str(shortuuid.uuid(brand['name']))
            br = orm.Brand(brand['name'], brand['logoUrl'],
                           brand['logoLargeUrl'], uuid)
            olog.log(
                "TopshopTracker._get_brands << Found brand <b>" + str(br) +
                "</b>", 'debug')

            if insert is True:
                brand_in_db = session.query(
                    orm.Brand).filter_by(name=unicode(br.name)).first()
                if brand_in_db is None:
                    session.add(br)
                    session.flush()
                    brandid = br.id
                    olog.log(
                        "TopshopTracker._get_brands >>> Inserted brand <b>" +
                        br.name + "</b> with id <b>" + str(brandid) + "</b>",
                        "warning")
                else:
                    brandid = brand_in_db.id
                    olog.log(
                        "Brand <b>" + brand_in_db.name +
                        "</b> already in database with id <b>" + str(brandid) +
                        "</b>", "debug")

                storebrand_in_db = session.query(orm.StoreBrand).filter_by(
                    storeid=unicode(self.storeid)).filter_by(
                        brandid=brandid).first()
                if storebrand_in_db is None:
                    storebrand = {
                        'key': None,
                        'storeid': None,
                        'brandid': None,
                        'gender': None,
                        'url': None
                    }
                    sb = orm.StoreBrand(brand['key'], self.storeid, brandid,
                                        gender, brand['shopUrl'])
                    olog.log(
                        "TopshopTracker._get_brands >>> Inserted <b>" +
                        str(sb) + "</b>", "warning")
                    session.add(sb)
                    session.flush()

            brands.append(br)

        session.commit()

        return brands